diff options
author | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2015-05-14 23:41:27 +0200 |
---|---|---|
committer | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2015-05-14 23:41:27 +0200 |
commit | 2bc43303031215436b201e656094b60ab3ec7e9e (patch) | |
tree | 3d1f5c5c2a7acbe4cc059eb6762a4c116a954cdd /youtube_dl/extractor/youtube.py | |
parent | 12675275a1d2158fbe409361888569e4cb52ef07 (diff) |
[youtube:history] Fix extraction (fixes #5702)
It uses the same method as YoutubeSubscriptionsIE, if other feed starts using it we should consider using base class.
Diffstat (limited to 'youtube_dl/extractor/youtube.py')
-rw-r--r-- | youtube_dl/extractor/youtube.py | 37 |
1 files changed, 33 insertions, 4 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 0869c9fd4..e58184adc 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1667,13 +1667,42 @@ class YoutubeWatchLaterIE(YoutubePlaylistIE): return self._extract_playlist('WL') -class YoutubeHistoryIE(YoutubeFeedsInfoExtractor): +class YoutubeHistoryIE(YoutubePlaylistIE): IE_NAME = 'youtube:history' IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)' _VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory' - _FEED_NAME = 'history' - _PERSONAL_FEED = True - _PLAYLIST_TITLE = 'Youtube Watch History' + _TESTS = [] + + def _real_extract(self, url): + title = 'Youtube History' + page = self._download_webpage('https://www.youtube.com/feed/history', title) + + # The extraction process is the same as for playlists, but the regex + # for the video ids doesn't contain an index + ids = [] + more_widget_html = content_html = page + + for page_num in itertools.count(1): + matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html) + new_ids = orderedSet(matches) + ids.extend(new_ids) + + mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html) + if not mobj: + break + + more = self._download_json( + 'https://youtube.com/%s' % mobj.group('more'), title, + 'Downloading page #%s' % page_num, + transform_source=uppercase_escape) + content_html = more['content_html'] + more_widget_html = more['load_more_widget_html'] + + return { + '_type': 'playlist', + 'title': title, + 'entries': self._ids_to_results(ids), + } class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): |