diff options
| author | Sergey M․ <dstftw@gmail.com> | 2019-08-02 05:25:01 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2019-08-02 05:25:01 +0700 | 
| commit | be306d6a313903a3ebdb8a8ff055bb6b58c9f818 (patch) | |
| tree | 6f7b3c0ca5645b53963612ac5ff9a44066c59ee8 | |
| parent | 33b529fabd282a371d3a4c21ee861badd20dae28 (diff) | |
[tvigle] Fix extraction and add support for HLS and DASH formats (closes #21967)
| -rw-r--r-- | youtube_dl/extractor/tvigle.py | 53 | 
1 files changed, 36 insertions, 17 deletions
| diff --git a/youtube_dl/extractor/tvigle.py b/youtube_dl/extractor/tvigle.py index 3475ef4c3..180259aba 100644 --- a/youtube_dl/extractor/tvigle.py +++ b/youtube_dl/extractor/tvigle.py @@ -9,6 +9,8 @@ from ..utils import (      float_or_none,      int_or_none,      parse_age_limit, +    try_get, +    url_or_none,  ) @@ -23,11 +25,10 @@ class TvigleIE(InfoExtractor):      _TESTS = [          {              'url': 'http://www.tvigle.ru/video/sokrat/', -            'md5': '36514aed3657d4f70b4b2cef8eb520cd',              'info_dict': {                  'id': '1848932',                  'display_id': 'sokrat', -                'ext': 'flv', +                'ext': 'mp4',                  'title': 'Сократ',                  'description': 'md5:d6b92ffb7217b4b8ebad2e7665253c17',                  'duration': 6586, @@ -37,7 +38,6 @@ class TvigleIE(InfoExtractor):          },          {              'url': 'http://www.tvigle.ru/video/vladimir-vysotskii/vedushchii-teleprogrammy-60-minut-ssha-o-vladimire-vysotskom/', -            'md5': 'e7efe5350dd5011d0de6550b53c3ba7b',              'info_dict': {                  'id': '5142516',                  'ext': 'flv', @@ -62,7 +62,7 @@ class TvigleIE(InfoExtractor):              webpage = self._download_webpage(url, display_id)              video_id = self._html_search_regex(                  (r'<div[^>]+class=["\']player["\'][^>]+id=["\'](\d+)', -                 r'var\s+cloudId\s*=\s*["\'](\d+)', +                 r'cloudId\s*=\s*["\'](\d+)',                   r'class="video-preview current_playing" id="(\d+)"'),                  webpage, 'video id') @@ -90,21 +90,40 @@ class TvigleIE(InfoExtractor):          age_limit = parse_age_limit(item.get('ageRestrictions'))          formats = [] -        for vcodec, fmts in item['videos'].items(): +        for vcodec, url_or_fmts in item['videos'].items():              if vcodec == 'hls': -                continue -            for format_id, video_url in fmts.items(): -                if format_id == 'm3u8': +                m3u8_url = url_or_none(url_or_fmts) +                if not m3u8_url: +                    continue +                formats.extend(self._extract_m3u8_formats( +                    m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native', +                    m3u8_id='hls', fatal=False)) +            elif vcodec == 'dash': +                mpd_url = url_or_none(url_or_fmts) +                if not mpd_url: +                    continue +                formats.extend(self._extract_mpd_formats( +                    mpd_url, video_id, mpd_id='dash', fatal=False)) +            else: +                if not isinstance(url_or_fmts, dict):                      continue -                height = self._search_regex( -                    r'^(\d+)[pP]$', format_id, 'height', default=None) -                formats.append({ -                    'url': video_url, -                    'format_id': '%s-%s' % (vcodec, format_id), -                    'vcodec': vcodec, -                    'height': int_or_none(height), -                    'filesize': int_or_none(item.get('video_files_size', {}).get(vcodec, {}).get(format_id)), -                }) +                for format_id, video_url in url_or_fmts.items(): +                    if format_id == 'm3u8': +                        continue +                    video_url = url_or_none(video_url) +                    if not video_url: +                        continue +                    height = self._search_regex( +                        r'^(\d+)[pP]$', format_id, 'height', default=None) +                    filesize = int_or_none(try_get( +                        item, lambda x: x['video_files_size'][vcodec][format_id])) +                    formats.append({ +                        'url': video_url, +                        'format_id': '%s-%s' % (vcodec, format_id), +                        'vcodec': vcodec, +                        'height': int_or_none(height), +                        'filesize': filesize, +                    })          self._sort_formats(formats)          return { | 
