aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>2013-07-07 13:58:23 +0200
committerJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>2013-07-07 13:58:23 +0200
commit04cc96173c6dc6bb92ad8ed90be01e8257f9e850 (patch)
treebe1eec586d8c869e3d2934321f27c9e394cf9c92
parentfbaaad49d7d6683b620929233ae661de64df1101 (diff)
downloadyoutube-dl-04cc96173c6dc6bb92ad8ed90be01e8257f9e850.tar.xz
[youtube] Add and extractor for the subscriptions feed (closes #498)
It can be downloaded using the ytsubscriptions keyword. It needs the login information.
-rw-r--r--youtube_dl/extractor/__init__.py10
-rw-r--r--youtube_dl/extractor/youtube.py35
2 files changed, 43 insertions, 2 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index ff5cbf4c9..1a5f68e15 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -69,7 +69,15 @@ from .yahoo import YahooIE, YahooSearchIE
from .youjizz import YouJizzIE
from .youku import YoukuIE
from .youporn import YouPornIE
-from .youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE, YoutubeShowIE
+from .youtube import (
+ YoutubeIE,
+ YoutubePlaylistIE,
+ YoutubeSearchIE,
+ YoutubeUserIE,
+ YoutubeChannelIE,
+ YoutubeShowIE,
+ YoutubeSubscriptionsIE,
+)
from .zdf import ZDFIE
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 14a8bd6ea..7ca6244e9 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -4,6 +4,7 @@ import json
import netrc
import re
import socket
+import itertools
from .common import InfoExtractor, SearchInfoExtractor
from ..utils import (
@@ -19,6 +20,7 @@ from ..utils import (
ExtractorError,
unescapeHTML,
unified_strdate,
+ orderedSet,
)
@@ -122,7 +124,7 @@ class YoutubeIE(InfoExtractor):
@classmethod
def suitable(cls, url):
"""Receives a URL and returns True if suitable for this IE."""
- if YoutubePlaylistIE.suitable(url): return False
+ if YoutubePlaylistIE.suitable(url) or YoutubeSubscriptionsIE.suitable(url): return False
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
def report_lang(self):
@@ -864,3 +866,34 @@ class YoutubeShowIE(InfoExtractor):
m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
+
+
+class YoutubeSubscriptionsIE(YoutubeIE):
+ """It's a subclass of YoutubeIE because we need to login"""
+ IE_DESC = u'YouTube.com subscriptions feed, "ytsubscriptions" keyword(requires authentication)'
+ _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|ytsubscriptions'
+ IE_NAME = u'youtube:subscriptions'
+ _FEED_TEMPLATE = 'http://www.youtube.com/feed_ajax?action_load_system_feed=1&feed_name=subscriptions&paging=%s'
+ _PAGING_STEP = 30
+
+ _TESTS = []
+
+ @classmethod
+ def suitable(cls, url):
+ return re.match(cls._VALID_URL, url) is not None
+
+ def _real_extract(self, url):
+ feed_entries = []
+ # The step argument is available only in 2.7 or higher
+ for i in itertools.count(0):
+ paging = i*self._PAGING_STEP
+ info = self._download_webpage(self._FEED_TEMPLATE % paging, 'feed',
+ u'Downloading page %s' % i)
+ info = json.loads(info)
+ feed_html = info['feed_html']
+ m_ids = re.finditer(r'"/watch\?v=(.*?)"', feed_html)
+ ids = orderedSet(m.group(1) for m in m_ids)
+ feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
+ if info['paging'] is None:
+ break
+ return self.playlist_result(feed_entries, playlist_title='Youtube Subscriptions')