aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilipp Hagemeister <phihag@phihag.de>2015-01-09 21:33:07 +0100
committerPhilipp Hagemeister <phihag@phihag.de>2015-01-09 21:33:07 +0100
commit176cf9e0c3ef9efd4864b940ac1c2a1c0236593a (patch)
tree0786350f1a1756d05e345b82a174aa0913170ad9
parent7b6faddfc808ba798ab654312e98d01005cda9ee (diff)
downloadyoutube-dl-176cf9e0c3ef9efd4864b940ac1c2a1c0236593a.tar.xz
[wdr] Support overviews (Fixes #4651)
-rw-r--r--youtube_dl/extractor/wdr.py26
1 files changed, 26 insertions, 0 deletions
diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py
index d1c46ccb3..45466e31b 100644
--- a/youtube_dl/extractor/wdr.py
+++ b/youtube_dl/extractor/wdr.py
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
+import itertools
import re
from .common import InfoExtractor
@@ -67,6 +68,10 @@ class WDRIE(InfoExtractor):
'upload_date': '20140717',
},
},
+ {
+ 'url': 'http://www1.wdr.de/mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100.html',
+ 'playlist_mincount': 146,
+ }
]
def _real_extract(self, url):
@@ -81,6 +86,27 @@ class WDRIE(InfoExtractor):
self.url_result(page_url + href, 'WDR')
for href in re.findall(r'<a href="/?(.+?%s\.html)" rel="nofollow"' % self._PLAYER_REGEX, webpage)
]
+
+ if entries: # Playlist page
+ return self.playlist_result(entries, page_id)
+
+ # Overview page
+ entries = []
+ for page_num in itertools.count(2):
+ hrefs = re.findall(
+ r'<li class="mediathekvideo"\s*>\s*<img[^>]*>\s*<a href="(/mediathek/video/[^"]+)"',
+ webpage)
+ entries.extend(
+ self.url_result(page_url + href, 'WDR')
+ for href in hrefs)
+ next_url_m = re.search(
+ r'<li class="nextToLast">\s*<a href="([^"]+)"', webpage)
+ if not next_url_m:
+ break
+ next_url = page_url + next_url_m.group(1)
+ webpage = self._download_webpage(
+ next_url, page_id,
+ note='Downloading playlist page %d' % page_num)
return self.playlist_result(entries, page_id)
flashvars = compat_parse_qs(