diff options
| -rw-r--r-- | youtube_dl/extractor/firsttv.py | 57 | 
1 files changed, 39 insertions, 18 deletions
| diff --git a/youtube_dl/extractor/firsttv.py b/youtube_dl/extractor/firsttv.py index 08ceee4ed..6e015ca16 100644 --- a/youtube_dl/extractor/firsttv.py +++ b/youtube_dl/extractor/firsttv.py @@ -8,45 +8,66 @@ from ..utils import int_or_none  class FirstTVIE(InfoExtractor): -    IE_NAME = 'firsttv' -    IE_DESC = 'Видеоархив - Первый канал' -    _VALID_URL = r'http://(?:www\.)?1tv\.ru/videoarchive/(?P<id>\d+)' +    IE_NAME = '1tv' +    IE_DESC = 'Первый канал' +    _VALID_URL = r'http://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>.+)' -    _TEST = { +    _TESTS = [{          'url': 'http://www.1tv.ru/videoarchive/73390', -        'md5': '3de6390cf0cca4a5eae1d1d83895e5ad', +        'md5': '777f525feeec4806130f4f764bc18a4f',          'info_dict': {              'id': '73390',              'ext': 'mp4',              'title': 'Олимпийские канатные дороги', -            'description': 'md5:cc730d2bf4215463e37fff6a1e277b13', -            'thumbnail': 'http://img1.1tv.ru/imgsize640x360/PR20140210114657.JPG', +            'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', +            'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',              'duration': 149, +            'like_count': int, +            'dislike_count': int,          },          'skip': 'Only works from Russia', -    } +    }, { +        'url': 'http://www.1tv.ru/prj/inprivate/vypusk/35930', +        'md5': 'a1b6b60d530ebcf8daacf4565762bbaf', +        'info_dict': { +            'id': '35930', +            'ext': 'mp4', +            'title': 'Наедине со всеми. Людмила Сенчина', +            'description': 'md5:89553aed1d641416001fe8d450f06cb9', +            'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$', +            'duration': 2694, +        }, +        'skip': 'Only works from Russia', +    }]      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') +        video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id, 'Downloading page')          video_url = self._html_search_regex( -            r'''(?s)jwplayer\('flashvideoportal_1'\)\.setup\({.*?'file': '([^']+)'.*?}\);''', webpage, 'video URL') +            r'''(?s)(?:jwplayer\('flashvideoportal_1'\)\.setup\({|var\s+playlistObj\s*=).*?'file'\s*:\s*'([^']+)'.*?}\);''', +            webpage, 'video URL')          title = self._html_search_regex( -            r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>', webpage, 'title') +            [r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>', +             r"'title'\s*:\s*'([^']+)'"], webpage, 'title')          description = self._html_search_regex( -            r'<div class="descr">\s*<div> </div>\s*<p>([^<]*)</p></div>', webpage, 'description', fatal=False) +            r'<div class="descr">\s*<div> </div>\s*<p>([^<]*)</p></div>', +            webpage, 'description', default=None) or self._html_search_meta( +                'description', webpage, 'description')          thumbnail = self._og_search_thumbnail(webpage) -        duration = self._og_search_property('video:duration', webpage, 'video duration', fatal=False) +        duration = self._og_search_property( +            'video:duration', webpage, +            'video duration', fatal=False) -        like_count = self._html_search_regex(r'title="Понравилось".*?/></label> \[(\d+)\]', -                                             webpage, 'like count', fatal=False) -        dislike_count = self._html_search_regex(r'title="Не понравилось".*?/></label> \[(\d+)\]', -                                                webpage, 'dislike count', fatal=False) +        like_count = self._html_search_regex( +            r'title="Понравилось".*?/></label> \[(\d+)\]', +            webpage, 'like count', default=None) +        dislike_count = self._html_search_regex( +            r'title="Не понравилось".*?/></label> \[(\d+)\]', +            webpage, 'dislike count', default=None)          return {              'id': video_id, | 
