diff options
Diffstat (limited to 'youtube_dl/extractor/smotri.py')
| -rw-r--r-- | youtube_dl/extractor/smotri.py | 59 | 
1 files changed, 38 insertions, 21 deletions
| diff --git a/youtube_dl/extractor/smotri.py b/youtube_dl/extractor/smotri.py index 4ea89bf85..beea58d63 100644 --- a/youtube_dl/extractor/smotri.py +++ b/youtube_dl/extractor/smotri.py @@ -1,5 +1,6 @@  # encoding: utf-8 +import os.path  import re  import json  import hashlib @@ -10,6 +11,7 @@ from ..utils import (      compat_urllib_parse,      compat_urllib_request,      ExtractorError, +    url_basename,  ) @@ -132,7 +134,16 @@ class SmotriIE(InfoExtractor):          # We will extract some from the video web page instead          video_page_url = 'http://' + mobj.group('url')          video_page = self._download_webpage(video_page_url, video_id, u'Downloading video page') -         + +        # Warning if video is unavailable +        warning = self._html_search_regex( +            r'<div class="videoUnModer">(.*?)</div>', video_page, +            u'warning messagef', default=None) +        if warning is not None: +            self._downloader.report_warning( +                u'Video %s may not be available; smotri said: %s ' % +                (video_id, warning)) +          # Adult content          if re.search(u'EroConfirmText">', video_page) is not None:              self.report_age_confirmation() @@ -148,38 +159,44 @@ class SmotriIE(InfoExtractor):          # Extract the rest of meta data          video_title = self._search_meta(u'name', video_page, u'title')          if not video_title: -            video_title = video_url.rsplit('/', 1)[-1] +            video_title = os.path.splitext(url_basename(video_url))[0]          video_description = self._search_meta(u'description', video_page)          END_TEXT = u' на сайте Smotri.com' -        if video_description.endswith(END_TEXT): +        if video_description and video_description.endswith(END_TEXT):              video_description = video_description[:-len(END_TEXT)]          START_TEXT = u'Смотреть онлайн ролик ' -        if video_description.startswith(START_TEXT): +        if video_description and video_description.startswith(START_TEXT):              video_description = video_description[len(START_TEXT):]          video_thumbnail = self._search_meta(u'thumbnail', video_page)          upload_date_str = self._search_meta(u'uploadDate', video_page, u'upload date') -        upload_date_m = re.search(r'(?P<year>\d{4})\.(?P<month>\d{2})\.(?P<day>\d{2})T', upload_date_str) -        video_upload_date = ( -            ( -                upload_date_m.group('year') + -                upload_date_m.group('month') + -                upload_date_m.group('day') +        if upload_date_str: +            upload_date_m = re.search(r'(?P<year>\d{4})\.(?P<month>\d{2})\.(?P<day>\d{2})T', upload_date_str) +            video_upload_date = ( +                ( +                    upload_date_m.group('year') + +                    upload_date_m.group('month') + +                    upload_date_m.group('day') +                ) +                if upload_date_m else None              ) -            if upload_date_m else None -        ) +        else: +            video_upload_date = None          duration_str = self._search_meta(u'duration', video_page) -        duration_m = re.search(r'T(?P<hours>[0-9]{2})H(?P<minutes>[0-9]{2})M(?P<seconds>[0-9]{2})S', duration_str) -        video_duration = ( -            ( -                (int(duration_m.group('hours')) * 60 * 60) + -                (int(duration_m.group('minutes')) * 60) + -                int(duration_m.group('seconds')) +        if duration_str: +            duration_m = re.search(r'T(?P<hours>[0-9]{2})H(?P<minutes>[0-9]{2})M(?P<seconds>[0-9]{2})S', duration_str) +            video_duration = ( +                ( +                    (int(duration_m.group('hours')) * 60 * 60) + +                    (int(duration_m.group('minutes')) * 60) + +                    int(duration_m.group('seconds')) +                ) +                if duration_m else None              ) -            if duration_m else None -        ) +        else: +            video_duration = None          video_uploader = self._html_search_regex(              u'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info[^"]+">(.*?)</a>', @@ -202,7 +219,7 @@ class SmotriIE(InfoExtractor):              'uploader': video_uploader,              'upload_date': video_upload_date,              'uploader_id': video_uploader_id, -            'video_duration': video_duration, +            'duration': video_duration,              'view_count': video_view_count,              'age_limit': 18 if adult_content else 0,              'video_page_url': video_page_url | 
