diff options
author | Philipp Hagemeister <phihag@phihag.de> | 2015-01-09 21:33:07 +0100 |
---|---|---|
committer | Philipp Hagemeister <phihag@phihag.de> | 2015-01-09 21:33:07 +0100 |
commit | 176cf9e0c3ef9efd4864b940ac1c2a1c0236593a (patch) | |
tree | 0786350f1a1756d05e345b82a174aa0913170ad9 /youtube_dl | |
parent | 7b6faddfc808ba798ab654312e98d01005cda9ee (diff) |
[wdr] Support overviews (Fixes #4651)
Diffstat (limited to 'youtube_dl')
-rw-r--r-- | youtube_dl/extractor/wdr.py | 26 |
1 files changed, 26 insertions, 0 deletions
diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index d1c46ccb3..45466e31b 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals +import itertools import re from .common import InfoExtractor @@ -67,6 +68,10 @@ class WDRIE(InfoExtractor): 'upload_date': '20140717', }, }, + { + 'url': 'http://www1.wdr.de/mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100.html', + 'playlist_mincount': 146, + } ] def _real_extract(self, url): @@ -81,6 +86,27 @@ class WDRIE(InfoExtractor): self.url_result(page_url + href, 'WDR') for href in re.findall(r'<a href="/?(.+?%s\.html)" rel="nofollow"' % self._PLAYER_REGEX, webpage) ] + + if entries: # Playlist page + return self.playlist_result(entries, page_id) + + # Overview page + entries = [] + for page_num in itertools.count(2): + hrefs = re.findall( + r'<li class="mediathekvideo"\s*>\s*<img[^>]*>\s*<a href="(/mediathek/video/[^"]+)"', + webpage) + entries.extend( + self.url_result(page_url + href, 'WDR') + for href in hrefs) + next_url_m = re.search( + r'<li class="nextToLast">\s*<a href="([^"]+)"', webpage) + if not next_url_m: + break + next_url = page_url + next_url_m.group(1) + webpage = self._download_webpage( + next_url, page_id, + note='Downloading playlist page %d' % page_num) return self.playlist_result(entries, page_id) flashvars = compat_parse_qs( |