diff options
Diffstat (limited to 'youtube_dl/extractor/cda.py')
| -rwxr-xr-x | youtube_dl/extractor/cda.py | 52 | 
1 files changed, 49 insertions, 3 deletions
| diff --git a/youtube_dl/extractor/cda.py b/youtube_dl/extractor/cda.py index 1ee35b501..78b7a923c 100755 --- a/youtube_dl/extractor/cda.py +++ b/youtube_dl/extractor/cda.py @@ -9,7 +9,10 @@ from ..utils import (      ExtractorError,      float_or_none,      int_or_none, +    multipart_encode,      parse_duration, +    random_birthday, +    urljoin,  ) @@ -27,7 +30,8 @@ class CDAIE(InfoExtractor):              'description': 'md5:269ccd135d550da90d1662651fcb9772',              'thumbnail': r're:^https?://.*\.jpg$',              'average_rating': float, -            'duration': 39 +            'duration': 39, +            'age_limit': 0,          }      }, {          'url': 'http://www.cda.pl/video/57413289', @@ -41,13 +45,41 @@ class CDAIE(InfoExtractor):              'uploader': 'crash404',              'view_count': int,              'average_rating': float, -            'duration': 137 +            'duration': 137, +            'age_limit': 0,          }      }, { +        # Age-restricted +        'url': 'http://www.cda.pl/video/1273454c4', +        'info_dict': { +            'id': '1273454c4', +            'ext': 'mp4', +            'title': 'Bronson (2008) napisy HD 1080p', +            'description': 'md5:1b6cb18508daf2dc4e0fa4db77fec24c', +            'height': 1080, +            'uploader': 'boniek61', +            'thumbnail': r're:^https?://.*\.jpg$', +            'duration': 5554, +            'age_limit': 18, +            'view_count': int, +            'average_rating': float, +        }, +    }, {          'url': 'http://ebd.cda.pl/0x0/5749950c',          'only_matching': True,      }] +    def _download_age_confirm_page(self, url, video_id, *args, **kwargs): +        form_data = random_birthday('rok', 'miesiac', 'dzien') +        form_data.update({'return': url, 'module': 'video', 'module_id': video_id}) +        data, content_type = multipart_encode(form_data) +        return self._download_webpage( +            urljoin(url, '/a/validatebirth'), video_id, *args, +            data=data, headers={ +                'Referer': url, +                'Content-Type': content_type, +            }, **kwargs) +      def _real_extract(self, url):          video_id = self._match_id(url)          self._set_cookie('cda.pl', 'cda.player', 'html5') @@ -57,6 +89,13 @@ class CDAIE(InfoExtractor):          if 'Ten film jest dostępny dla użytkowników premium' in webpage:              raise ExtractorError('This video is only available for premium users.', expected=True) +        need_confirm_age = False +        if self._html_search_regex(r'(<form[^>]+action="/a/validatebirth")', +                                   webpage, 'birthday validate form', default=None): +            webpage = self._download_age_confirm_page( +                url, video_id, note='Confirming age') +            need_confirm_age = True +          formats = []          uploader = self._search_regex(r'''(?x) @@ -81,6 +120,7 @@ class CDAIE(InfoExtractor):              'thumbnail': self._og_search_thumbnail(webpage),              'formats': formats,              'duration': None, +            'age_limit': 18 if need_confirm_age else 0,          }          def extract_format(page, version): @@ -121,7 +161,12 @@ class CDAIE(InfoExtractor):          for href, resolution in re.findall(                  r'<a[^>]+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)',                  webpage): -            webpage = self._download_webpage( +            if need_confirm_age: +                handler = self._download_age_confirm_page +            else: +                handler = self._download_webpage + +            webpage = handler(                  self._BASE_URL + href, video_id,                  'Downloading %s version information' % resolution, fatal=False)              if not webpage: @@ -129,6 +174,7 @@ class CDAIE(InfoExtractor):                  # invalid version is requested.                  self.report_warning('Unable to download %s version information' % resolution)                  continue +              extract_format(webpage, resolution)          self._sort_formats(formats) | 
