diff options
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/channel9.py | 12 | ||||
| -rw-r--r-- | youtube_dl/extractor/criterion.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/fczenit.py | 41 | ||||
| -rw-r--r-- | youtube_dl/extractor/fivemin.py | 84 | ||||
| -rw-r--r-- | youtube_dl/extractor/vimeo.py | 2 | 
6 files changed, 121 insertions, 23 deletions
| diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 75720843c..f6d185818 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -167,6 +167,7 @@ from .extremetube import ExtremeTubeIE  from .facebook import FacebookIE  from .faz import FazIE  from .fc2 import FC2IE +from .fczenit import FczenitIE  from .firstpost import FirstpostIE  from .firsttv import FirstTVIE  from .fivemin import FiveMinIE diff --git a/youtube_dl/extractor/channel9.py b/youtube_dl/extractor/channel9.py index 3dfc24f5b..1ce004932 100644 --- a/youtube_dl/extractor/channel9.py +++ b/youtube_dl/extractor/channel9.py @@ -158,7 +158,7 @@ class Channel9IE(InfoExtractor):      def _extract_session_day(self, html):          m = re.search(r'<li class="day">\s*<a href="/Events/[^"]+">(?P<day>[^<]+)</a>\s*</li>', html) -        return m.group('day') if m is not None else None +        return m.group('day').strip() if m is not None else None      def _extract_session_room(self, html):          m = re.search(r'<li class="room">\s*(?P<room>.+?)\s*</li>', html) @@ -224,12 +224,12 @@ class Channel9IE(InfoExtractor):          if contents is None:              return contents -        authors = self._extract_authors(html) +        if len(contents) > 1: +            raise ExtractorError('Got more than one entry') +        result = contents[0] +        result['authors'] = self._extract_authors(html) -        for content in contents: -            content['authors'] = authors - -        return contents +        return result      def _extract_session(self, html, content_path):          contents = self._extract_content(html, content_path) diff --git a/youtube_dl/extractor/criterion.py b/youtube_dl/extractor/criterion.py index 4fb178165..dedb810a0 100644 --- a/youtube_dl/extractor/criterion.py +++ b/youtube_dl/extractor/criterion.py @@ -27,9 +27,7 @@ class CriterionIE(InfoExtractor):          final_url = self._search_regex(              r'so.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url')          title = self._og_search_title(webpage) -        description = self._html_search_regex( -            r'<meta name="description" content="(.+?)" />', -            webpage, 'video description') +        description = self._html_search_meta('description', webpage)          thumbnail = self._search_regex(              r'so.addVariable\("thumbnailURL", "(.+?)"\)\;',              webpage, 'thumbnail url') diff --git a/youtube_dl/extractor/fczenit.py b/youtube_dl/extractor/fczenit.py new file mode 100644 index 000000000..f1f150ef2 --- /dev/null +++ b/youtube_dl/extractor/fczenit.py @@ -0,0 +1,41 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class FczenitIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?fc-zenit\.ru/video/gl(?P<id>[0-9]+)' +    _TEST = { +        'url': 'http://fc-zenit.ru/video/gl6785/', +        'md5': '458bacc24549173fe5a5aa29174a5606', +        'info_dict': { +            'id': '6785', +            'ext': 'mp4', +            'title': '«Зенит-ТВ»: как Олег Шатов играл против «Урала»', +        }, +    } + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        webpage = self._download_webpage(url, video_id) + +        video_title = self._html_search_regex(r'<div class=\"photoalbum__title\">([^<]+)', webpage, 'title') + +        bitrates_raw = self._html_search_regex(r'bitrates:.*\n(.*)\]', webpage, 'video URL') +        bitrates = re.findall(r'url:.?\'(.+?)\'.*?bitrate:.?([0-9]{3}?)', bitrates_raw) + +        formats = [{ +            'url': furl, +            'tbr': tbr, +        } for furl, tbr in bitrates] + +        self._sort_formats(formats) + +        return { +            'id': video_id, +            'title': video_title, +            'formats': formats, +        } diff --git a/youtube_dl/extractor/fivemin.py b/youtube_dl/extractor/fivemin.py index 157094e8c..2955965d9 100644 --- a/youtube_dl/extractor/fivemin.py +++ b/youtube_dl/extractor/fivemin.py @@ -2,11 +2,15 @@ from __future__ import unicode_literals  from .common import InfoExtractor  from ..compat import ( -    compat_str,      compat_urllib_parse, +    compat_parse_qs, +    compat_urllib_parse_urlparse, +    compat_urlparse,  )  from ..utils import (      ExtractorError, +    parse_duration, +    replace_extension,  ) @@ -28,6 +32,7 @@ class FiveMinIE(InfoExtractor):                  'id': '518013791',                  'ext': 'mp4',                  'title': 'iPad Mini with Retina Display Review', +                'duration': 177,              },          },          { @@ -38,9 +43,52 @@ class FiveMinIE(InfoExtractor):                  'id': '518086247',                  'ext': 'mp4',                  'title': 'How to Make a Next-Level Fruit Salad', +                'duration': 184,              },          },      ] +    _ERRORS = { +        'ErrorVideoNotExist': 'We\'re sorry, but the video you are trying to watch does not exist.', +        'ErrorVideoNoLongerAvailable': 'We\'re sorry, but the video you are trying to watch is no longer available.', +        'ErrorVideoRejected': 'We\'re sorry, but the video you are trying to watch has been removed.', +        'ErrorVideoUserNotGeo': 'We\'re sorry, but the video you are trying to watch cannot be viewed from your current location.', +        'ErrorVideoLibraryRestriction': 'We\'re sorry, but the video you are trying to watch is currently unavailable for viewing at this domain.', +        'ErrorExposurePermission': 'We\'re sorry, but the video you are trying to watch is currently unavailable for viewing at this domain.', +    } +    _QUALITIES = { +        1: { +            'width': 640, +            'height': 360, +        }, +        2: { +            'width': 854, +            'height': 480, +        }, +        4: { +            'width': 1280, +            'height': 720, +        }, +        8: { +            'width': 1920, +            'height': 1080, +        }, +        16: { +            'width': 640, +            'height': 360, +        }, +        32: { +            'width': 854, +            'height': 480, +        }, +        64: { +            'width': 1280, +            'height': 720, +        }, +        128: { +            'width': 640, +            'height': 360, +        }, +    }      def _real_extract(self, url):          video_id = self._match_id(url) @@ -59,26 +107,36 @@ class FiveMinIE(InfoExtractor):              'https://syn.5min.com/handlers/SenseHandler.ashx?' + query,              video_id)          if not response['success']: -            err_msg = response['errorMessage'] -            if err_msg == 'ErrorVideoUserNotGeo': -                msg = 'Video not available from your location' -            else: -                msg = 'Aol said: %s' % err_msg -            raise ExtractorError(msg, expected=True, video_id=video_id) +            raise ExtractorError( +                '%s said: %s' % ( +                    self.IE_NAME, +                    self._ERRORS.get(response['errorMessage'], response['errorMessage'])), +                expected=True)          info = response['binding'][0] -        second_id = compat_str(int(video_id[:-2]) + 1)          formats = [] -        for quality, height in [(1, 320), (2, 480), (4, 720), (8, 1080)]: -            if any(r['ID'] == quality for r in info['Renditions']): +        parsed_video_url = compat_urllib_parse_urlparse(compat_parse_qs( +            compat_urllib_parse_urlparse(info['EmbededURL']).query)['videoUrl'][0]) +        for rendition in info['Renditions']: +            if rendition['RenditionType'] == 'm3u8': +                formats.extend(self._extract_m3u8_formats(rendition['Url'], video_id, m3u8_id='hls')) +            elif rendition['RenditionType'] == 'aac': +                continue +            else: +                rendition_url = compat_urlparse.urlunparse(parsed_video_url._replace(path=replace_extension(parsed_video_url.path.replace('//', '/%s/' % rendition['ID']), rendition['RenditionType']))) +                quality = self._QUALITIES.get(rendition['ID'], {})                  formats.append({ -                    'format_id': compat_str(quality), -                    'url': 'http://avideos.5min.com/%s/%s/%s_%s.mp4' % (second_id[-3:], second_id, video_id, quality), -                    'height': height, +                    'format_id': '%s-%d' % (rendition['RenditionType'], rendition['ID']), +                    'url': rendition_url, +                    'width': quality.get('width'), +                    'height': quality.get('height'),                  }) +        self._sort_formats(formats)          return {              'id': video_id,              'title': info['Title'], +            'thumbnail': info.get('ThumbURL'), +            'duration': parse_duration(info.get('Duration')),              'formats': formats,          } diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 027f47ee3..fa1b22049 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -212,7 +212,7 @@ class VimeoIE(VimeoBaseInfoExtractor):              url = url.replace('http://', 'https://')          password_request = compat_urllib_request.Request(url + '/password', data)          password_request.add_header('Content-Type', 'application/x-www-form-urlencoded') -        password_request.add_header('Cookie', 'clip_v=1; vuid=%s' % vuid) +        password_request.add_header('Cookie', 'clip_test2=1; vuid=%s' % vuid)          password_request.add_header('Referer', url)          return self._download_webpage(              password_request, video_id, | 
