diff options
| author | Joost Verdoorn <jpverdoorn@gmail.com> | 2020-11-19 17:26:53 +0100 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-11-19 17:26:53 +0100 | 
| commit | cf1a8668e8e47a56c834fb567d227787d7480d08 (patch) | |
| tree | 469e7625ce86a218a80d1bb3c2f28bbfdedeae75 /youtube_dl/extractor | |
| parent | 2dbb45ae82836699486d434cfb6d902920bab66e (diff) | |
[Amara] Add new extractor (#20618)
* [Amara] Add new extractor
Diffstat (limited to 'youtube_dl/extractor')
| -rw-r--r-- | youtube_dl/extractor/amara.py | 76 | ||||
| -rw-r--r-- | youtube_dl/extractor/extractors.py | 1 | 
2 files changed, 77 insertions, 0 deletions
diff --git a/youtube_dl/extractor/amara.py b/youtube_dl/extractor/amara.py new file mode 100644 index 000000000..b222154bd --- /dev/null +++ b/youtube_dl/extractor/amara.py @@ -0,0 +1,76 @@ +# coding: utf-8 +from __future__ import unicode_literals +from .common import InfoExtractor + + +class AmaraIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?amara\.org/(?:\w+/)?videos/(?P<id>\w+)' +    _TESTS = [ +        { +            'url': 'https://amara.org/en/videos/jVx79ZKGK1ky/info/why-jury-trials-are-becoming-less-common/?tab=video', +            'md5': 'ea10daf2b6154b8c1ecf9922aca5e8ae', +            'info_dict': { +                'id': 'h6ZuVdvYnfE', +                'ext': 'mp4', +                'title': 'Why jury trials are becoming less common', +                'description': 'md5:a61811c319943960b6ab1c23e0cbc2c1', +                'thumbnail': r're:^https?://.*\.jpg$', +                'subtitles': dict, +                'upload_date': '20160813', +                'uploader': 'PBS NewsHour', +                'uploader_id': 'PBSNewsHour' +            } +        }, +        { +            'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011', +            'md5': '99392c75fa05d432a8f11df03612195e', +            'info_dict': { +                'id': '18622084', +                'ext': 'mov', +                'title': 'Vimeo at CES 2011!', +                'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', +                'thumbnail': r're:^https?://.*\.jpg$', +                'subtitles': dict, +                'timestamp': 1294649110, +                'upload_date': '20110110', +                'uploader': 'Sam Morrill', +                'uploader_id': 'sammorrill' +            } +        }, +        { +            'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/', +            'md5': 'd3970f08512738ee60c5807311ff5d3f', +            'info_dict': { +                'id': 'ChimamandaAdichie_2009G-transcript', +                'ext': 'mp4', +                'title': 'The danger of a single story', +                'description': 'md5:d769b31139c3b8bb5be9177f62ea3f23', +                'thumbnail': r're:^https?://.*\.jpg$', +                'subtitles': dict, +                'upload_date': '20131206' +            } +        } +    ] + +    def get_subtitles_for_language(self, language): +        return [{ +            'ext': type, +            'url': language['subtitles_uri'].replace('format=json', 'format=' + type) +        } for type in ['vtt', 'srt', 'json']] + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        meta = self._download_json('https://amara.org/api/videos/%s/' % video_id, video_id, query={'format': 'json'}) + +        video_url = meta.get('all_urls')[0] +        subtitles = dict([(language['code'], self.get_subtitles_for_language(language)) for language in meta.get('languages', []) if language['published']]) + +        return { +            '_type': 'url_transparent', +            'url': video_url, +            'id': video_id, +            'subtitles': subtitles, +            'title': meta['title'], +            'description': meta.get('description'), +            'thumbnail': meta.get('thumbnail') +        } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 088800eb9..183050e07 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -36,6 +36,7 @@ from .afreecatv import AfreecaTVIE  from .airmozilla import AirMozillaIE  from .aljazeera import AlJazeeraIE  from .alphaporno import AlphaPornoIE +from .amara import AmaraIE  from .amcnetworks import AMCNetworksIE  from .americastestkitchen import AmericasTestKitchenIE  from .animeondemand import AnimeOnDemandIE  | 
