diff options
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 29 | 
1 files changed, 20 insertions, 9 deletions
| diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 2774ec30b..791e1fe62 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1370,10 +1370,18 @@ class YoutubeChannelIE(InfoExtractor):      def extract_videos_from_page(self, page):          ids_in_page = [] -        for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page): -            if mobj.group(1) not in ids_in_page: -                ids_in_page.append(mobj.group(1)) -        return ids_in_page +        titles_in_page = [] +        for mobj in re.finditer(r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?', page): +            video_id = mobj.group('id') +            video_title = unescapeHTML(mobj.group('title')) +            try: +                idx = ids_in_page.index(video_id) +                if video_title and not titles_in_page[idx]: +                    titles_in_page[idx] = video_title +            except ValueError: +                ids_in_page.append(video_id) +                titles_in_page.append(video_title) +        return zip(ids_in_page, titles_in_page)      def _real_extract(self, url):          channel_id = self._match_id(url) @@ -1390,10 +1398,12 @@ class YoutubeChannelIE(InfoExtractor):          if autogenerated:              # The videos are contained in a single page              # the ajax pages can't be used, they are empty -            video_ids = self.extract_videos_from_page(channel_page) +            videos = self.extract_videos_from_page(channel_page)              entries = [ -                self.url_result(video_id, 'Youtube', video_id=video_id) -                for video_id in video_ids] +                self.url_result( +                    video_id, 'Youtube', video_id=video_id, +                    video_title=video_title) +                for video_id, video_title in videos]              return self.playlist_result(entries, channel_id)          def _entries(): @@ -1401,9 +1411,10 @@ class YoutubeChannelIE(InfoExtractor):              for pagenum in itertools.count(1):                  ids_in_page = self.extract_videos_from_page(content_html) -                for video_id in ids_in_page: +                for video_id, video_title in ids_in_page:                      yield self.url_result( -                        video_id, 'Youtube', video_id=video_id) +                        video_id, 'Youtube', video_id=video_id, +                        video_title=video_title)                  mobj = re.search(                      r'data-uix-load-more-href="/?(?P<more>[^"]+)"', | 
