diff options
| author | Sergey M․ <dstftw@gmail.com> | 2015-05-01 03:59:13 +0600 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2015-05-01 03:59:13 +0600 | 
| commit | 8683b4d8d91a7c6b72ca4a12bf6b538cbb4b2a68 (patch) | |
| tree | d2b98b614566d73febc42cfb8a51ca9d05e10af2 | |
| parent | 1dbd717eb49d075fa1efabc674e8074fd165eb0a (diff) | |
[bbccouk] Improve extraction (Closes #5530)
| -rw-r--r-- | youtube_dl/extractor/bbccouk.py | 35 | 
1 files changed, 26 insertions, 9 deletions
| diff --git a/youtube_dl/extractor/bbccouk.py b/youtube_dl/extractor/bbccouk.py index abc34a576..22c2843be 100644 --- a/youtube_dl/extractor/bbccouk.py +++ b/youtube_dl/extractor/bbccouk.py @@ -3,7 +3,10 @@ from __future__ import unicode_literals  import xml.etree.ElementTree  from .common import InfoExtractor -from ..utils import ExtractorError +from ..utils import ( +    ExtractorError, +    int_or_none, +)  from ..compat import compat_HTTPError @@ -326,16 +329,29 @@ class BBCCoUkIE(InfoExtractor):          webpage = self._download_webpage(url, group_id, 'Downloading video page') -        programme_id = self._search_regex( -            r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False, default=None) +        thumbnail = self._og_search_thumbnail(webpage) + +        programme_id = None + +        tviplayer = self._search_regex( +            r'mediator\.bind\(({.+?})\s*,\s*document\.getElementById', +            webpage, 'player', default=None) + +        if tviplayer: +            player = self._parse_json(tviplayer, group_id).get('player', {}) +            duration = int_or_none(player.get('duration')) +            programme_id = player.get('vpid') + +        if not programme_id: +            programme_id = self._search_regex( +                r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False, default=None) +          if programme_id: -            player = self._download_json( -                'http://www.bbc.co.uk/iplayer/episode/%s.json' % group_id, -                group_id)['jsConf']['player'] -            title = player['title'] -            description = player['subtitle'] -            duration = player['duration']              formats, subtitles = self._download_media_selector(programme_id) +            title = self._og_search_title(webpage) +            description = self._search_regex( +                r'<p class="medium-description">([^<]+)</p>', +                webpage, 'description', fatal=False)          else:              programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id) @@ -345,6 +361,7 @@ class BBCCoUkIE(InfoExtractor):              'id': programme_id,              'title': title,              'description': description, +            'thumbnail': thumbnail,              'duration': duration,              'formats': formats,              'subtitles': subtitles, | 
