diff options
| author | Philipp Hagemeister <phihag@phihag.de> | 2014-09-01 00:01:48 +0200 | 
|---|---|---|
| committer | Philipp Hagemeister <phihag@phihag.de> | 2014-09-01 00:01:48 +0200 | 
| commit | 7d48c06f27fba7287c6c548e930f90f3aebac07a (patch) | |
| tree | 91351ba2ce7b52ab6ea877216e9eb8327a907839 | |
| parent | d169e36f5c4dc50a6c19c2155ea7ea7a89dd5c31 (diff) | |
| parent | 9ea9b614489038cc7e51f71c0f8ecc7d84534a30 (diff) | |
Merge remote-tracking branch 'origin/master'
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 43 | 
1 files changed, 37 insertions, 6 deletions
| diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 3417c1275..78f3b7e7b 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1434,12 +1434,6 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):              paging = mobj.group('paging')          return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE) -class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor): -    IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)' -    _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?' -    _FEED_NAME = 'subscriptions' -    _PLAYLIST_TITLE = u'Youtube Subscriptions' -  class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):      IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'      _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?' @@ -1472,6 +1466,43 @@ class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):          return self.url_result(playlist_id, 'YoutubePlaylist') +class YoutubeSubscriptionsIE(YoutubePlaylistIE): +    IE_NAME = u'youtube:subscriptions' +    IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)' +    _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?' + +    def _real_extract(self, url): +        title = u'Youtube Subscriptions' +        page = self._download_webpage('https://www.youtube.com/feed/subscriptions', title) + +        # The extraction process is the same as for playlists, but the regex +        # for the video ids doesn't contain an index +        ids = [] +        more_widget_html = content_html = page + +        for page_num in itertools.count(1): +            matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html) +            new_ids = orderedSet(matches) +            ids.extend(new_ids) + +            mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html) +            if not mobj: +                break + +            more = self._download_json( +                'https://youtube.com/%s' % mobj.group('more'), title, +                'Downloading page #%s' % page_num, +                transform_source=uppercase_escape) +            content_html = more['content_html'] +            more_widget_html = more['load_more_widget_html'] + +        return { +            '_type': 'playlist', +            'title': title, +            'entries': self._ids_to_results(ids), +        } + +  class YoutubeTruncatedURLIE(InfoExtractor):      IE_NAME = 'youtube:truncated_url'      IE_DESC = False  # Do not list | 
