diff options
author | Yen Chi Hsuan <yan12125@gmail.com> | 2017-05-01 23:09:18 +0800 |
---|---|---|
committer | Yen Chi Hsuan <yan12125@gmail.com> | 2017-05-04 16:26:17 +0800 |
commit | 0c265486016b06342fb257966474ce591667aaff (patch) | |
tree | 535a5d3331dd5be08e818174e630389485ad30ac /youtube_dl/extractor | |
parent | 5401bea27fd6bcdd030e50f3af85145807eeab27 (diff) |
[cda] Implement birthday verification (closes #12789)
Diffstat (limited to 'youtube_dl/extractor')
-rwxr-xr-x | youtube_dl/extractor/cda.py | 52 | ||||
-rw-r--r-- | youtube_dl/extractor/videopress.py | 9 |
2 files changed, 52 insertions, 9 deletions
diff --git a/youtube_dl/extractor/cda.py b/youtube_dl/extractor/cda.py index 1ee35b501..78b7a923c 100755 --- a/youtube_dl/extractor/cda.py +++ b/youtube_dl/extractor/cda.py @@ -9,7 +9,10 @@ from ..utils import ( ExtractorError, float_or_none, int_or_none, + multipart_encode, parse_duration, + random_birthday, + urljoin, ) @@ -27,7 +30,8 @@ class CDAIE(InfoExtractor): 'description': 'md5:269ccd135d550da90d1662651fcb9772', 'thumbnail': r're:^https?://.*\.jpg$', 'average_rating': float, - 'duration': 39 + 'duration': 39, + 'age_limit': 0, } }, { 'url': 'http://www.cda.pl/video/57413289', @@ -41,13 +45,41 @@ class CDAIE(InfoExtractor): 'uploader': 'crash404', 'view_count': int, 'average_rating': float, - 'duration': 137 + 'duration': 137, + 'age_limit': 0, } }, { + # Age-restricted + 'url': 'http://www.cda.pl/video/1273454c4', + 'info_dict': { + 'id': '1273454c4', + 'ext': 'mp4', + 'title': 'Bronson (2008) napisy HD 1080p', + 'description': 'md5:1b6cb18508daf2dc4e0fa4db77fec24c', + 'height': 1080, + 'uploader': 'boniek61', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 5554, + 'age_limit': 18, + 'view_count': int, + 'average_rating': float, + }, + }, { 'url': 'http://ebd.cda.pl/0x0/5749950c', 'only_matching': True, }] + def _download_age_confirm_page(self, url, video_id, *args, **kwargs): + form_data = random_birthday('rok', 'miesiac', 'dzien') + form_data.update({'return': url, 'module': 'video', 'module_id': video_id}) + data, content_type = multipart_encode(form_data) + return self._download_webpage( + urljoin(url, '/a/validatebirth'), video_id, *args, + data=data, headers={ + 'Referer': url, + 'Content-Type': content_type, + }, **kwargs) + def _real_extract(self, url): video_id = self._match_id(url) self._set_cookie('cda.pl', 'cda.player', 'html5') @@ -57,6 +89,13 @@ class CDAIE(InfoExtractor): if 'Ten film jest dostępny dla użytkowników premium' in webpage: raise ExtractorError('This video is only available for premium users.', expected=True) + need_confirm_age = False + if self._html_search_regex(r'(<form[^>]+action="/a/validatebirth")', + webpage, 'birthday validate form', default=None): + webpage = self._download_age_confirm_page( + url, video_id, note='Confirming age') + need_confirm_age = True + formats = [] uploader = self._search_regex(r'''(?x) @@ -81,6 +120,7 @@ class CDAIE(InfoExtractor): 'thumbnail': self._og_search_thumbnail(webpage), 'formats': formats, 'duration': None, + 'age_limit': 18 if need_confirm_age else 0, } def extract_format(page, version): @@ -121,7 +161,12 @@ class CDAIE(InfoExtractor): for href, resolution in re.findall( r'<a[^>]+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)', webpage): - webpage = self._download_webpage( + if need_confirm_age: + handler = self._download_age_confirm_page + else: + handler = self._download_webpage + + webpage = handler( self._BASE_URL + href, video_id, 'Downloading %s version information' % resolution, fatal=False) if not webpage: @@ -129,6 +174,7 @@ class CDAIE(InfoExtractor): # invalid version is requested. self.report_warning('Unable to download %s version information' % resolution) continue + extract_format(webpage, resolution) self._sort_formats(formats) diff --git a/youtube_dl/extractor/videopress.py b/youtube_dl/extractor/videopress.py index 049db25a5..e5f964d39 100644 --- a/youtube_dl/extractor/videopress.py +++ b/youtube_dl/extractor/videopress.py @@ -1,7 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import random import re from .common import InfoExtractor @@ -11,6 +10,7 @@ from ..utils import ( float_or_none, parse_age_limit, qualities, + random_birthday, try_get, unified_timestamp, urljoin, @@ -47,13 +47,10 @@ class VideoPressIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) + query = random_birthday('birth_year', 'birth_month', 'birth_day') video = self._download_json( 'https://public-api.wordpress.com/rest/v1.1/videos/%s' % video_id, - video_id, query={ - 'birth_month': random.randint(1, 12), - 'birth_day': random.randint(1, 31), - 'birth_year': random.randint(1950, 1995), - }) + video_id, query=query) title = video['title'] |