aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
authorPhilipp Hagemeister <phihag@phihag.de>2013-11-28 04:37:02 +0100
committerPhilipp Hagemeister <phihag@phihag.de>2013-11-28 04:37:02 +0100
commit9d93e7da6c23151272a91fbc2889c098e3b5f139 (patch)
treefa5ff870328b974afa27a402e44d9eb12aea8981 /youtube_dl/extractor
parentdcca796ce431da0d8b6927609c08938f22ba44cf (diff)
parent0e44d8381a439c84dd23477d32f7da4bb0a06293 (diff)
Merge branch 'master' of github.com:rg3/youtube-dl
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/yahoo.py8
-rw-r--r--youtube_dl/extractor/youtube.py19
2 files changed, 8 insertions, 19 deletions
diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py
index 34e6afb20..617e3bb06 100644
--- a/youtube_dl/extractor/yahoo.py
+++ b/youtube_dl/extractor/yahoo.py
@@ -46,7 +46,7 @@ class YahooIE(InfoExtractor):
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
- items_json = self._search_regex(r'YVIDEO_INIT_ITEMS = ({.*?});$',
+ items_json = self._search_regex(r'mediaItems: ({.*?})$',
webpage, u'items', flags=re.MULTILINE)
items = json.loads(items_json)
info = items['mediaItems']['query']['results']['mediaObj'][0]
@@ -91,17 +91,13 @@ class YahooIE(InfoExtractor):
formats.append(format_info)
formats = sorted(formats, key=lambda f:(f['height'], f['width']))
- info = {
+ return {
'id': video_id,
'title': meta['title'],
'formats': formats,
'description': clean_html(meta['description']),
'thumbnail': meta['thumbnail'],
}
- # TODO: Remove when #980 has been merged
- info.update(formats[-1])
-
- return info
class YahooSearchIE(SearchInfoExtractor):
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 9ef5fecce..765b4a9bf 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1547,7 +1547,9 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
# the id of the playlist is just 'RD' + video_id
url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[2:], playlist_id)
webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix')
- title = clean_html(get_element_by_attribute('class', 'title long-title', webpage))
+ title_span = (get_element_by_attribute('class', 'title long-title', webpage) or
+ get_element_by_attribute('class', 'title ', webpage))
+ title = clean_html(title_span)
video_re = r'data-index="\d+".*?href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s' % re.escape(playlist_id)
ids = orderedSet(re.findall(video_re, webpage))
url_results = self._ids_to_results(ids)
@@ -1789,7 +1791,6 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
"""
_LOGIN_REQUIRED = True
- _PAGING_STEP = 30
# use action_load_personal_feed instead of action_load_system_feed
_PERSONAL_FEED = False
@@ -1809,9 +1810,8 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
def _real_extract(self, url):
feed_entries = []
- # The step argument is available only in 2.7 or higher
- for i in itertools.count(0):
- paging = i*self._PAGING_STEP
+ paging = 0
+ for i in itertools.count(1):
info = self._download_webpage(self._FEED_TEMPLATE % paging,
u'%s feed' % self._FEED_NAME,
u'Downloading page %s' % i)
@@ -1824,6 +1824,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
for video_id in ids)
if info['paging'] is None:
break
+ paging = info['paging']
return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
@@ -1843,7 +1844,6 @@ class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
_VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
_FEED_NAME = 'watch_later'
_PLAYLIST_TITLE = u'Youtube Watch Later'
- _PAGING_STEP = 100
_PERSONAL_FEED = True
class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
@@ -1853,13 +1853,6 @@ class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
_PERSONAL_FEED = True
_PLAYLIST_TITLE = u'Youtube Watch History'
- def _real_extract(self, url):
- webpage = self._download_webpage('https://www.youtube.com/feed/history', u'History')
- data_paging = self._search_regex(r'data-paging="(\d+)"', webpage, u'data-paging')
- # The step is actually a ridiculously big number (like 1374343569725646)
- self._PAGING_STEP = int(data_paging)
- return super(YoutubeHistoryIE, self)._real_extract(url)
-
class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
IE_NAME = u'youtube:favorites'
IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'