diff options
Diffstat (limited to 'youtube_dl/extractor')
| -rw-r--r-- | youtube_dl/extractor/vbox7.py | 80 | 
1 files changed, 53 insertions, 27 deletions
| diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py index d114ecb07..1c0b77074 100644 --- a/youtube_dl/extractor/vbox7.py +++ b/youtube_dl/extractor/vbox7.py @@ -7,6 +7,7 @@ import time  from .common import InfoExtractor  from ..compat import compat_kwargs  from ..utils import ( +    base_url,      determine_ext,      ExtractorError,      float_or_none, @@ -14,6 +15,7 @@ from ..utils import (      T,      traverse_obj,      txt_or_none, +    url_basename,      url_or_none,  ) @@ -33,8 +35,8 @@ class Vbox7IE(InfoExtractor):                      '''      _EMBED_REGEX = [r'<iframe[^>]+src=(?P<q>["\'])(?P<url>(?:https?:)?//vbox7\.com/emb/external\.php.+?)(?P=q)']      _GEO_COUNTRIES = ['BG'] -    _GEO_BYPASS = False      _TESTS = [{ +        # the http: URL just redirects here          'url': 'https://vbox7.com/play:0946fff23c',          'md5': '50ca1f78345a9c15391af47d8062d074',          'info_dict': { @@ -42,17 +44,19 @@ class Vbox7IE(InfoExtractor):              'ext': 'mp4',              'title': 'Борисов: Притеснен съм за бъдещето на България',              'description': 'По думите му е опасно страната ни да бъде обявена за "сигурна"', -            'thumbnail': r're:^https?://.*\.jpg$',              'timestamp': 1470982814,              'upload_date': '20160812',              'uploader': 'zdraveibulgaria', +            'thumbnail': r're:^https?://.*\.jpg$', +            'view_count': int, +            'duration': 2640,          },          'expected_warnings': [              'Unable to download webpage',          ],      }, {          'url': 'http://vbox7.com/play:249bb972c2', -        'md5': 'aaf19465e37ec0b30b918df83ec32c50', +        'md5': '99f65c0c9ef9b682b97313e052734c3f',          'info_dict': {              'id': '249bb972c2',              'ext': 'mp4', @@ -61,7 +65,11 @@ class Vbox7IE(InfoExtractor):              'timestamp': 1360215023,              'upload_date': '20130207',              'uploader': 'svideteliat_ot_varshava', +            'thumbnail': 'https://i49.vbox7.com/o/249/249bb972c20.jpg', +            'view_count': int, +            'duration': 83,          }, +        'expected_warnings': ['Failed to download m3u8 information'],      }, {          'url': 'http://vbox7.com/emb/external.php?vid=a240d20f9c&autoplay=1',          'only_matching': True, @@ -76,6 +84,9 @@ class Vbox7IE(InfoExtractor):          if mobj:              return mobj.group('url') +    # specialisation to transform what looks like ld+json that +    # may contain invalid character combinations +      # transform_source=None, fatal=True      def _parse_json(self, json_string, video_id, *args, **kwargs):          if '"@context"' in json_string[:30]: @@ -103,49 +114,64 @@ class Vbox7IE(InfoExtractor):          now = time.time()          response = self._download_json( -            'https://www.vbox7.com/aj/player/item/options?vid=%s' % (video_id,), -            video_id, headers={'Referer': url}) +            'https://www.vbox7.com/aj/player/item/options', video_id, +            query={'vid': video_id}, headers={'Referer': url})          # estimate time to which possible `ago` member is relative          now = now + 0.5 * (time.time() - now) -        if 'error' in response: +        if traverse_obj(response, 'error'):              raise ExtractorError(                  '%s said: %s' % (self.IE_NAME, response['error']), expected=True) -        video_url = traverse_obj(response, ('options', 'src', T(url_or_none))) +        src_url = traverse_obj(response, ('options', 'src', T(url_or_none))) or '' -        if '/na.mp4' in video_url or '': +        fmt_base = url_basename(src_url).rsplit('.', 1)[0].rsplit('_', 1)[0] +        if fmt_base in ('na', 'vn'):              self.raise_geo_restricted(countries=self._GEO_COUNTRIES) -        ext = determine_ext(video_url) +        ext = determine_ext(src_url)          if ext == 'mpd': -            # In case MPD cannot be parsed, or anyway, get mp4 combined -            # formats usually provided to Safari, iOS, and old Windows +            # extract MPD              try:                  formats, subtitles = self._extract_mpd_formats_and_subtitles( -                    video_url, video_id, 'dash', fatal=False) -            except KeyError: +                    src_url, video_id, 'dash', fatal=False) +            except KeyError:  # fatal doesn't catch this                  self.report_warning('Failed to parse MPD manifest')                  formats, subtitles = [], {} +        elif ext != 'm3u8': +            formats = [{ +                'url': src_url, +            }] if src_url else [] +            subtitles = {} +        if src_url: +            # possibly extract HLS, based on https://github.com/yt-dlp/yt-dlp/pull/9100 +            fmt_base = base_url(src_url) + fmt_base +            # prepare for _extract_m3u8_formats_and_subtitles() +            # hls_formats, hls_subs = self._extract_m3u8_formats_and_subtitles( +            hls_formats = self._extract_m3u8_formats( +                '{0}.m3u8'.format(fmt_base), video_id, m3u8_id='hls', fatal=False) +            formats.extend(hls_formats) +            # self._merge_subtitles(hls_subs, target=subtitles) + +            # In case MPD/HLS cannot be parsed, or anyway, get mp4 combined +            # formats usually provided to Safari, iOS, and old Windows              video = response['options']              resolutions = (1080, 720, 480, 240, 144) -            highest_res = traverse_obj(video, ('highestRes', T(int))) or resolutions[0] -            for res in traverse_obj(video, ('resolutions', lambda _, r: int(r) > 0)) or resolutions: -                if res > highest_res: -                    continue -                formats.append({ -                    'url': video_url.replace('.mpd', '_%d.mp4' % res), -                    'format_id': '%dp' % res, +            highest_res = traverse_obj(video, ( +                'highestRes', T(int))) or resolutions[0] +            resolutions = traverse_obj(video, ( +                'resolutions', lambda _, r: highest_res >= int(r) > 0)) or resolutions +            mp4_formats = traverse_obj(resolutions, ( +                Ellipsis, T(lambda res: { +                    'url': '{0}_{1}.mp4'.format(fmt_base, res), +                    'format_id': 'http-{0}'.format(res),                      'height': res, -                }) +                })))              # if above formats are flaky, enable the line below -            # self._check_formats(formats, video_id) -        else: -            formats = [{ -                'url': video_url, -            }] -            subtitles = {} +            # self._check_formats(mp4_formats, video_id) +            formats.extend(mp4_formats) +          self._sort_formats(formats)          webpage = self._download_webpage(url, video_id, fatal=False) or '' | 
