diff options
Diffstat (limited to 'youtube_dl/extractor/arte.py')
| -rw-r--r-- | youtube_dl/extractor/arte.py | 114 | 
1 files changed, 44 insertions, 70 deletions
diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index b9a9440c0..3a57ce527 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -5,13 +5,12 @@ import re  from .common import InfoExtractor  from ..utils import ( -    ExtractorError,      find_xpath_attr,      unified_strdate, -    determine_ext,      get_element_by_id,      get_element_by_attribute,      int_or_none, +    qualities,  )  # There are different sources of video in arte.tv, the extraction process  @@ -102,79 +101,54 @@ class ArteTVPlus7IE(InfoExtractor):              'upload_date': unified_strdate(upload_date_str),              'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),          } +        qfunc = qualities(['HQ', 'MQ', 'EQ', 'SQ']) -        all_formats = [] +        formats = []          for format_id, format_dict in player_info['VSR'].items(): -            fmt = dict(format_dict) -            fmt['format_id'] = format_id -            all_formats.append(fmt) -        # Some formats use the m3u8 protocol -        all_formats = list(filter(lambda f: f.get('videoFormat') != 'M3U8', all_formats)) -        def _match_lang(f): -            if f.get('versionCode') is None: -                return True -            # Return true if that format is in the language of the url -            if lang == 'fr': -                l = 'F' -            elif lang == 'de': -                l = 'A' -            else: -                l = lang -            regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l] -            return any(re.match(r, f['versionCode']) for r in regexes) -        # Some formats may not be in the same language as the url -        # TODO: Might want not to drop videos that does not match requested language -        # but to process those formats with lower precedence -        formats = filter(_match_lang, all_formats) -        formats = list(formats)  # in python3 filter returns an iterator -        if not formats: -            # Some videos are only available in the 'Originalversion' -            # they aren't tagged as being in French or German -            # Sometimes there are neither videos of requested lang code -            # nor original version videos available -            # For such cases we just take all_formats as is -            formats = all_formats -            if not formats: -                raise ExtractorError('The formats list is empty') - -        if re.match(r'[A-Z]Q', formats[0]['quality']) is not None: -            def sort_key(f): -                return ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality']) -        else: -            def sort_key(f): -                versionCode = f.get('versionCode') -                if versionCode is None: -                    versionCode = '' -                return ( -                    # Sort first by quality -                    int(f.get('height', -1)), -                    int(f.get('bitrate', -1)), -                    # The original version with subtitles has lower relevance -                    re.match(r'VO-ST(F|A)', versionCode) is None, -                    # The version with sourds/mal subtitles has also lower relevance -                    re.match(r'VO?(F|A)-STM\1', versionCode) is None, -                    # Prefer http downloads over m3u8 -                    0 if f['url'].endswith('m3u8') else 1, -                ) -        formats = sorted(formats, key=sort_key) -        def _format(format_info): -            info = { -                'format_id': format_info['format_id'], -                'format_note': '%s, %s' % (format_info.get('versionCode'), format_info.get('versionLibelle')), -                'width': int_or_none(format_info.get('width')), -                'height': int_or_none(format_info.get('height')), -                'tbr': int_or_none(format_info.get('bitrate')), +            f = dict(format_dict) +            versionCode = f.get('versionCode') + +            langcode = { +                'fr': 'F', +                'de': 'A', +            }.get(lang, lang) +            lang_rexs = [r'VO?%s' % langcode, r'VO?.-ST%s' % langcode] +            lang_pref = ( +                None if versionCode is None else ( +                    10 if any(re.match(r, versionCode) for r in lang_rexs) +                    else -10)) +            source_pref = 0 +            if versionCode is not None: +                # The original version with subtitles has lower relevance +                if re.match(r'VO-ST(F|A)', versionCode): +                    source_pref -= 10 +                # The version with sourds/mal subtitles has also lower relevance +                elif re.match(r'VO?(F|A)-STM\1', versionCode): +                    source_pref -= 9 +            format = { +                'format_id': format_id, +                'preference': -10 if f.get('videoFormat') == 'M3U8' else None, +                'language_preference': lang_pref, +                'format_note': '%s, %s' % (f.get('versionCode'), f.get('versionLibelle')), +                'width': int_or_none(f.get('width')), +                'height': int_or_none(f.get('height')), +                'tbr': int_or_none(f.get('bitrate')), +                'quality': qfunc(f['quality']), +                'source_preference': source_pref,              } -            if format_info['mediaType'] == 'rtmp': -                info['url'] = format_info['streamer'] -                info['play_path'] = 'mp4:' + format_info['url'] -                info['ext'] = 'flv' + +            if f.get('mediaType') == 'rtmp': +                format['url'] = f['streamer'] +                format['play_path'] = 'mp4:' + f['url'] +                format['ext'] = 'flv'              else: -                info['url'] = format_info['url'] -                info['ext'] = determine_ext(info['url']) -            return info -        info_dict['formats'] = [_format(f) for f in formats] +                format['url'] = f['url'] + +            formats.append(format) + +        self._sort_formats(formats) +        info_dict['formats'] = formats          return info_dict  | 
