diff options
| author | nixxo <nixxo@protonmail.com> | 2021-01-07 14:48:45 +0100 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-01-07 13:48:45 +0000 | 
| commit | 3a61e6d36003b6043dd53dec5ae714a839225044 (patch) | |
| tree | e2d07586424a9ad9ea386f5d47edf396ca4b3ec1 /youtube_dl/extractor/rai.py | |
| parent | 3d8e32dcc0bc6014fe05f445d755907ae046d5b5 (diff) | |
[rai] improve subtitles extraction (#27705)
closes #27698
Diffstat (limited to 'youtube_dl/extractor/rai.py')
| -rw-r--r-- | youtube_dl/extractor/rai.py | 50 | 
1 files changed, 33 insertions, 17 deletions
diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index 0a68d16b0..67b86fc72 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -103,22 +103,28 @@ class RaiBaseIE(InfoExtractor):          }.items() if v is not None)      @staticmethod -    def _extract_subtitles(url, subtitle_url): +    def _extract_subtitles(url, video_data): +        STL_EXT = 'stl' +        SRT_EXT = 'srt'          subtitles = {} -        if subtitle_url and isinstance(subtitle_url, compat_str): -            subtitle_url = urljoin(url, subtitle_url) -            STL_EXT = '.stl' -            SRT_EXT = '.srt' -            subtitles['it'] = [{ -                'ext': 'stl', -                'url': subtitle_url, -            }] -            if subtitle_url.endswith(STL_EXT): -                srt_url = subtitle_url[:-len(STL_EXT)] + SRT_EXT -                subtitles['it'].append({ -                    'ext': 'srt', -                    'url': srt_url, +        subtitles_array = video_data.get('subtitlesArray') or [] +        for k in ('subtitles', 'subtitlesUrl'): +            subtitles_array.append({'url': video_data.get(k)}) +        for subtitle in subtitles_array: +            sub_url = subtitle.get('url') +            if sub_url and isinstance(sub_url, compat_str): +                sub_lang = subtitle.get('language') or 'it' +                sub_url = urljoin(url, sub_url) +                sub_ext = determine_ext(sub_url, SRT_EXT) +                subtitles.setdefault(sub_lang, []).append({ +                    'ext': sub_ext, +                    'url': sub_url,                  }) +                if STL_EXT == sub_ext: +                    subtitles[sub_lang].append({ +                        'ext': SRT_EXT, +                        'url': sub_url[:-len(STL_EXT)] + SRT_EXT, +                    })          return subtitles @@ -138,6 +144,9 @@ class RaiPlayIE(RaiBaseIE):              'duration': 6160,              'series': 'Report',              'season': '2013/14', +            'subtitles': { +                'it': 'count:2', +            },          },          'params': {              'skip_download': True, @@ -145,6 +154,10 @@ class RaiPlayIE(RaiBaseIE):      }, {          'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?',          'only_matching': True, +    }, { +        # subtitles at 'subtitlesArray' key (see #27698) +        'url': 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html', +        'only_matching': True,      }]      def _real_extract(self, url): @@ -172,7 +185,7 @@ class RaiPlayIE(RaiBaseIE):          if date_published and time_published:              date_published += ' ' + time_published -        subtitles = self._extract_subtitles(url, video.get('subtitles')) +        subtitles = self._extract_subtitles(url, video)          program_info = media.get('program_info') or {}          season = media.get('season') @@ -327,7 +340,7 @@ class RaiIE(RaiBaseIE):              'skip_download': True,          },      }, { -        # ContentItem in iframe (see #12652) +        # ContentItem in iframe (see #12652) and subtitle at 'subtitlesUrl' key          'url': 'http://www.presadiretta.rai.it/dl/portali/site/puntata/ContentItem-3ed19d13-26c2-46ff-a551-b10828262f1b.html',          'info_dict': {              'id': '1ad6dc64-444a-42a4-9bea-e5419ad2f5fd', @@ -335,6 +348,9 @@ class RaiIE(RaiBaseIE):              'title': 'Partiti acchiappavoti - Presa diretta del 13/09/2015',              'description': 'md5:d291b03407ec505f95f27970c0b025f4',              'upload_date': '20150913', +            'subtitles': { +                'it': 'count:2', +            },          },          'params': {              'skip_download': True, @@ -379,7 +395,7 @@ class RaiIE(RaiBaseIE):                      'url': compat_urlparse.urljoin(url, thumbnail_url),                  }) -        subtitles = self._extract_subtitles(url, media.get('subtitlesUrl')) +        subtitles = self._extract_subtitles(url, media)          info = {              'id': content_id,  | 
