diff options
Diffstat (limited to 'youtube_dl/extractor/wdr.py')
| -rw-r--r-- | youtube_dl/extractor/wdr.py | 17 | 
1 files changed, 12 insertions, 5 deletions
| diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index fddcbf190..dd107ef8a 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -10,6 +10,7 @@ from ..compat import (  )  from ..utils import (      determine_ext, +    js_to_json,      strip_jsonp,      unified_strdate,      ExtractorError, @@ -21,8 +22,6 @@ class WDRIE(InfoExtractor):      _PAGE_REGEX = r'/mediathek/(?P<media_type>[^/]+)/(?P<type>[^/]+)/(?P<display_id>.+)\.html'      _VALID_URL = r'(?P<page_url>https?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL -    _JS_URL_REGEX = r'(https?://deviceids-medp.wdr.de/ondemand/\d+/\d+\.js)' -      _TESTS = [          {              'url': 'http://www1.wdr.de/mediathek/video/sendungen/doku-am-freitag/video-geheimnis-aachener-dom-100.html', @@ -102,9 +101,13 @@ class WDRIE(InfoExtractor):          display_id = mobj.group('display_id')          webpage = self._download_webpage(url, display_id) -        js_url = self._search_regex(self._JS_URL_REGEX, webpage, 'js_url', default=None) +        # for wdr.de the data-extension is in a tag with the class "mediaLink" +        # for wdrmaus its in a link to the page in a multiline "videoLink"-tag +        json_metadata = self._html_search_regex( +            r'class=(?:"mediaLink\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"', +            webpage, 'media link', default=None, flags=re.MULTILINE) -        if not js_url: +        if not json_metadata:              entries = [                  self.url_result(page_url + href[0], 'WDR')                  for href in re.findall( @@ -117,8 +120,12 @@ class WDRIE(InfoExtractor):              raise ExtractorError('No downloadable streams found', expected=True) +        media_link_obj = self._parse_json(json_metadata, display_id, +                                          transform_source=js_to_json) +        jsonp_url = media_link_obj['mediaObj']['url'] +          metadata = self._download_json( -            js_url, 'metadata', transform_source=strip_jsonp) +            jsonp_url, 'metadata', transform_source=strip_jsonp)          metadata_tracker_data = metadata['trackerData']          metadata_media_resource = metadata['mediaResource'] | 
