diff options
Diffstat (limited to 'youtube_dl/extractor/ivi.py')
| -rw-r--r-- | youtube_dl/extractor/ivi.py | 69 | 
1 files changed, 33 insertions, 36 deletions
| diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index 029878d24..216c534b5 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -7,6 +7,7 @@ import json  from .common import InfoExtractor  from ..utils import (      ExtractorError, +    int_or_none,      sanitized_Request,  ) @@ -27,7 +28,7 @@ class IviIE(InfoExtractor):                  'title': 'Иван Васильевич меняет профессию',                  'description': 'md5:b924063ea1677c8fe343d8a72ac2195f',                  'duration': 5498, -                'thumbnail': 'http://thumbs.ivi.ru/f20.vcp.digitalaccess.ru/contents/d/1/c3c885163a082c29bceeb7b5a267a6.jpg', +                'thumbnail': 're:^https?://.*\.jpg$',              },              'skip': 'Only works from Russia',          }, @@ -38,33 +39,23 @@ class IviIE(InfoExtractor):              'info_dict': {                  'id': '9549',                  'ext': 'mp4', -                'title': 'Двое из ларца - Серия 1', +                'title': 'Двое из ларца - Дело Гольдберга (1 часть)', +                'series': 'Двое из ларца', +                'episode': 'Дело Гольдберга (1 часть)', +                'episode_number': 1,                  'duration': 2655, -                'thumbnail': 'http://thumbs.ivi.ru/f15.vcp.digitalaccess.ru/contents/8/4/0068dc0677041f3336b7c2baad8fc0.jpg', +                'thumbnail': 're:^https?://.*\.jpg$',              },              'skip': 'Only works from Russia',          }      ]      # Sorted by quality -    _known_formats = ['MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi', 'MP4-SHQ'] - -    # Sorted by size -    _known_thumbnails = ['Thumb-120x90', 'Thumb-160', 'Thumb-640x480'] - -    def _extract_description(self, html): -        m = re.search(r'<meta name="description" content="(?P<description>[^"]+)"/>', html) -        return m.group('description') if m is not None else None - -    def _extract_comment_count(self, html): -        m = re.search('(?s)<a href="#" id="view-comments" class="action-button dim gradient">\s*Комментарии:\s*(?P<commentcount>\d+)\s*</a>', html) -        return int(m.group('commentcount')) if m is not None else 0 +    _KNOWN_FORMATS = ['MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi', 'MP4-SHQ']      def _real_extract(self, url):          video_id = self._match_id(url) -        api_url = 'http://api.digitalaccess.ru/api/json/' -          data = {              'method': 'da.content.get',              'params': [ @@ -76,11 +67,10 @@ class IviIE(InfoExtractor):              ]          } -        request = sanitized_Request(api_url, json.dumps(data)) - -        video_json_page = self._download_webpage( +        request = sanitized_Request( +            'http://api.digitalaccess.ru/api/json/', json.dumps(data)) +        video_json = self._download_json(              request, video_id, 'Downloading video JSON') -        video_json = json.loads(video_json_page)          if 'error' in video_json:              error = video_json['error'] @@ -95,35 +85,42 @@ class IviIE(InfoExtractor):          formats = [{              'url': x['url'],              'format_id': x['content_format'], -            'preference': self._known_formats.index(x['content_format']), -        } for x in result['files'] if x['content_format'] in self._known_formats] +            'preference': self._KNOWN_FORMATS.index(x['content_format']), +        } for x in result['files'] if x['content_format'] in self._KNOWN_FORMATS]          self._sort_formats(formats) -        if not formats: -            raise ExtractorError('No media links available for %s' % video_id) - -        duration = result['duration'] -        compilation = result['compilation']          title = result['title'] +        duration = int_or_none(result.get('duration')) +        compilation = result.get('compilation') +        episode = title if compilation else None +          title = '%s - %s' % (compilation, title) if compilation is not None else title -        previews = result['preview'] -        previews.sort(key=lambda fmt: self._known_thumbnails.index(fmt['content_format'])) -        thumbnail = previews[-1]['url'] if len(previews) > 0 else None +        thumbnails = [{ +            'url': preview['url'], +            'id': preview.get('content_format'), +        } for preview in result.get('preview', []) if preview.get('url')] + +        webpage = self._download_webpage(url, video_id) + +        episode_number = int_or_none(self._search_regex( +            r'<meta[^>]+itemprop="episode"[^>]*>\s*<meta[^>]+itemprop="episodeNumber"[^>]+content="(\d+)', +            webpage, 'episode number', default=None)) -        video_page = self._download_webpage(url, video_id, 'Downloading video page') -        description = self._extract_description(video_page) -        comment_count = self._extract_comment_count(video_page) +        description = self._og_search_description(webpage, default=None) or self._html_search_meta( +            'description', webpage, 'description', default=None)          return {              'id': video_id,              'title': title, -            'thumbnail': thumbnail, +            'series': compilation, +            'episode': episode, +            'episode_number': episode_number, +            'thumbnails': thumbnails,              'description': description,              'duration': duration, -            'comment_count': comment_count,              'formats': formats,          } | 
