diff options
author | Sergey M․ <dstftw@gmail.com> | 2017-05-26 22:12:24 +0700 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2017-05-26 22:12:24 +0700 |
commit | 97067db2ae32b18ef573af99b91d36026b4b1d05 (patch) | |
tree | 71953bc2f50d127219d3374ffcad5d94127086c4 /youtube_dl/extractor/bbc.py | |
parent | c130f0a37b5c31fa559373ec319f79830c591c22 (diff) |
[bbc] Add support for authentication
Diffstat (limited to 'youtube_dl/extractor/bbc.py')
-rw-r--r-- | youtube_dl/extractor/bbc.py | 40 |
1 files changed, 40 insertions, 0 deletions
diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index dd65b8d86..7cd4446b4 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -6,14 +6,18 @@ import itertools from .common import InfoExtractor from ..utils import ( + clean_html, dict_get, ExtractorError, float_or_none, + get_element_by_class, int_or_none, parse_duration, parse_iso8601, try_get, unescapeHTML, + urlencode_postdata, + urljoin, ) from ..compat import ( compat_etree_fromstring, @@ -38,6 +42,9 @@ class BBCCoUkIE(InfoExtractor): (?P<id>%s)(?!/(?:episodes|broadcasts|clips)) ''' % _ID_REGEX + _LOGIN_URL = 'https://account.bbc.com/signin' + _NETRC_MACHINE = 'bbc' + _MEDIASELECTOR_URLS = [ # Provides HQ HLS streams with even better quality that pc mediaset but fails # with geolocation in some cases when it's even not geo restricted at all (e.g. @@ -227,6 +234,39 @@ class BBCCoUkIE(InfoExtractor): _USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8' + def _login(self): + username, password = self._get_login_info() + if username is None: + return + + login_page = self._download_webpage( + self._LOGIN_URL, None, 'Downloading signin page') + + login_form = self._hidden_inputs(login_page) + + login_form.update({ + 'username': username, + 'password': password, + }) + + post_url = urljoin(self._LOGIN_URL, self._search_regex( + r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page, + 'post url', default=self._LOGIN_URL, group='url')) + + response, urlh = self._download_webpage_handle( + post_url, None, 'Logging in', data=urlencode_postdata(login_form), + headers={'Referer': self._LOGIN_URL}) + + if self._LOGIN_URL in urlh.geturl(): + error = clean_html(get_element_by_class('form-message', response)) + if error: + raise ExtractorError( + 'Unable to login: %s' % error, expected=True) + raise ExtractorError('Unable to log in') + + def _real_initialize(self): + self._login() + class MediaSelectionError(Exception): def __init__(self, id): self.id = id |