diff options
Diffstat (limited to 'youtube_dl/extractor/bbc.py')
| -rw-r--r-- | youtube_dl/extractor/bbc.py | 62 | 
1 files changed, 57 insertions, 5 deletions
| diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index dd65b8d86..8b20c03d6 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -6,14 +6,18 @@ import itertools  from .common import InfoExtractor  from ..utils import ( +    clean_html,      dict_get,      ExtractorError,      float_or_none, +    get_element_by_class,      int_or_none,      parse_duration,      parse_iso8601,      try_get,      unescapeHTML, +    urlencode_postdata, +    urljoin,  )  from ..compat import (      compat_etree_fromstring, @@ -25,19 +29,23 @@ from ..compat import (  class BBCCoUkIE(InfoExtractor):      IE_NAME = 'bbc.co.uk'      IE_DESC = 'BBC iPlayer' -    _ID_REGEX = r'[pb][\da-z]{7}' +    _ID_REGEX = r'[pbw][\da-z]{7}'      _VALID_URL = r'''(?x)                      https?://                          (?:www\.)?bbc\.co\.uk/                          (?:                              programmes/(?!articles/)|                              iplayer(?:/[^/]+)?/(?:episode/|playlist/)| -                            music/clips[/#]| -                            radio/player/ +                            music/(?:clips|audiovideo/popular)[/#]| +                            radio/player/| +                            events/[^/]+/play/[^/]+/                          )                          (?P<id>%s)(?!/(?:episodes|broadcasts|clips))                      ''' % _ID_REGEX +    _LOGIN_URL = 'https://account.bbc.com/signin' +    _NETRC_MACHINE = 'bbc' +      _MEDIASELECTOR_URLS = [          # Provides HQ HLS streams with even better quality that pc mediaset but fails          # with geolocation in some cases when it's even not geo restricted at all (e.g. @@ -222,11 +230,49 @@ class BBCCoUkIE(InfoExtractor):          }, {              'url': 'http://www.bbc.co.uk/radio/player/p03cchwf',              'only_matching': True, -        } -    ] +        }, { +            'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55', +            'only_matching': True, +        }, { +            'url': 'http://www.bbc.co.uk/programmes/w3csv1y9', +            'only_matching': True, +        }]      _USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8' +    def _login(self): +        username, password = self._get_login_info() +        if username is None: +            return + +        login_page = self._download_webpage( +            self._LOGIN_URL, None, 'Downloading signin page') + +        login_form = self._hidden_inputs(login_page) + +        login_form.update({ +            'username': username, +            'password': password, +        }) + +        post_url = urljoin(self._LOGIN_URL, self._search_regex( +            r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page, +            'post url', default=self._LOGIN_URL, group='url')) + +        response, urlh = self._download_webpage_handle( +            post_url, None, 'Logging in', data=urlencode_postdata(login_form), +            headers={'Referer': self._LOGIN_URL}) + +        if self._LOGIN_URL in urlh.geturl(): +            error = clean_html(get_element_by_class('form-message', response)) +            if error: +                raise ExtractorError( +                    'Unable to login: %s' % error, expected=True) +            raise ExtractorError('Unable to log in') + +    def _real_initialize(self): +        self._login() +      class MediaSelectionError(Exception):          def __init__(self, id):              self.id = id @@ -483,6 +529,12 @@ class BBCCoUkIE(InfoExtractor):          webpage = self._download_webpage(url, group_id, 'Downloading video page') +        error = self._search_regex( +            r'<div\b[^>]+\bclass=["\']smp__message delta["\'][^>]*>([^<]+)<', +            webpage, 'error', default=None) +        if error: +            raise ExtractorError(error, expected=True) +          programme_id = None          duration = None | 
