diff options
| author | Sergey M․ <dstftw@gmail.com> | 2020-11-26 02:58:48 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2020-11-26 02:58:48 +0700 | 
| commit | 686e898fde48de9981c170f21d631d15e6f419ad (patch) | |
| tree | 80a3f5c62ae9e280baa6bda77753ade1f5781119 | |
| parent | 3a78198a96ffcb989d1302294f6a747c6147b418 (diff) | |
[spreaker] Add extractor (closes #13480, closes #13877)
| -rw-r--r-- | youtube_dl/extractor/extractors.py | 6 | ||||
| -rw-r--r-- | youtube_dl/extractor/spreaker.py | 176 | 
2 files changed, 182 insertions, 0 deletions
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index f7757b4f4..fd19f0f0a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1082,6 +1082,12 @@ from .stitcher import StitcherIE  from .sport5 import Sport5IE  from .sportbox import SportBoxIE  from .sportdeutschland import SportDeutschlandIE +from .spreaker import ( +    SpreakerIE, +    SpreakerPageIE, +    SpreakerShowIE, +    SpreakerShowPageIE, +)  from .springboardplatform import SpringboardPlatformIE  from .sprout import SproutIE  from .srgssr import ( diff --git a/youtube_dl/extractor/spreaker.py b/youtube_dl/extractor/spreaker.py new file mode 100644 index 000000000..beee6670c --- /dev/null +++ b/youtube_dl/extractor/spreaker.py @@ -0,0 +1,176 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import itertools + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( +    float_or_none, +    int_or_none, +    str_or_none, +    try_get, +    unified_timestamp, +    url_or_none, +) + + +def _extract_episode(data, episode_id=None): +    title = data['title'] +    download_url = data['download_url'] + +    series = try_get(data, lambda x: x['show']['title'], compat_str) +    uploader = try_get(data, lambda x: x['author']['fullname'], compat_str) + +    thumbnails = [] +    for image in ('image_original', 'image_medium', 'image'): +        image_url = url_or_none(data.get('%s_url' % image)) +        if image_url: +            thumbnails.append({'url': image_url}) + +    def stats(key): +        return int_or_none(try_get( +            data, +            (lambda x: x['%ss_count' % key], +             lambda x: x['stats']['%ss' % key]))) + +    def duration(key): +        return float_or_none(data.get(key), scale=1000) + +    return { +        'id': compat_str(episode_id or data['episode_id']), +        'url': download_url, +        'display_id': data.get('permalink'), +        'title': title, +        'description': data.get('description'), +        'timestamp': unified_timestamp(data.get('published_at')), +        'uploader': uploader, +        'uploader_id': str_or_none(data.get('author_id')), +        'creator': uploader, +        'duration': duration('duration') or duration('length'), +        'view_count': stats('play'), +        'like_count': stats('like'), +        'comment_count': stats('message'), +        'format': 'MPEG Layer 3', +        'format_id': 'mp3', +        'container': 'mp3', +        'ext': 'mp3', +        'thumbnails': thumbnails, +        'series': series, +        'extractor_key': SpreakerIE.ie_key(), +    } + + +class SpreakerIE(InfoExtractor): +    _VALID_URL = r'''(?x) +                    https?:// +                        api\.spreaker\.com/ +                        (?: +                            (?:download/)?episode| +                            v2/episodes +                        )/ +                        (?P<id>\d+) +                    ''' +    _TESTS = [{ +        'url': 'https://api.spreaker.com/episode/12534508', +        'info_dict': { +            'id': '12534508', +            'display_id': 'swm-ep15-how-to-market-your-music-part-2', +            'ext': 'mp3', +            'title': 'EP:15 | Music Marketing (Likes) - Part 2', +            'description': 'md5:0588c43e27be46423e183076fa071177', +            'timestamp': 1502250336, +            'upload_date': '20170809', +            'uploader': 'SWM', +            'uploader_id': '9780658', +            'duration': 1063.42, +            'view_count': int, +            'like_count': int, +            'comment_count': int, +            'series': 'Success With Music (SWM)', +        }, +    }, { +        'url': 'https://api.spreaker.com/download/episode/12534508/swm_ep15_how_to_market_your_music_part_2.mp3', +        'only_matching': True, +    }, { +        'url': 'https://api.spreaker.com/v2/episodes/12534508?export=episode_segments', +        'only_matching': True, +    }] + +    def _real_extract(self, url): +        episode_id = self._match_id(url) +        data = self._download_json( +            'https://api.spreaker.com/v2/episodes/%s' % episode_id, +            episode_id)['response']['episode'] +        return _extract_episode(data, episode_id) + + +class SpreakerPageIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?spreaker\.com/user/[^/]+/(?P<id>[^/?#&]+)' +    _TESTS = [{ +        'url': 'https://www.spreaker.com/user/9780658/swm-ep15-how-to-market-your-music-part-2', +        'only_matching': True, +    }] + +    def _real_extract(self, url): +        display_id = self._match_id(url) +        webpage = self._download_webpage(url, display_id) +        episode_id = self._search_regex( +            (r'data-episode_id=["\'](?P<id>\d+)', +             r'episode_id\s*:\s*(?P<id>\d+)'), webpage, 'episode id') +        return self.url_result( +            'https://api.spreaker.com/episode/%s' % episode_id, +            ie=SpreakerIE.ie_key(), video_id=episode_id) + + +class SpreakerShowIE(InfoExtractor): +    _VALID_URL = r'https?://api\.spreaker\.com/show/(?P<id>\d+)' +    _TESTS = [{ +        'url': 'https://www.spreaker.com/show/3-ninjas-podcast', +        'info_dict': { +            'id': '4652058', +        }, +        'playlist_mincount': 118, +    }] + +    def _entries(self, show_id): +        for page_num in itertools.count(1): +            episodes = self._download_json( +                'https://api.spreaker.com/show/%s/episodes' % show_id, +                show_id, note='Downloading JSON page %d' % page_num, query={ +                    'page': page_num, +                    'max_per_page': 100, +                }) +            pager = try_get(episodes, lambda x: x['response']['pager'], dict) +            if not pager: +                break +            results = pager.get('results') +            if not results or not isinstance(results, list): +                break +            for result in results: +                if not isinstance(result, dict): +                    continue +                yield _extract_episode(result) +            if page_num == pager.get('last_page'): +                break + +    def _real_extract(self, url): +        show_id = self._match_id(url) +        return self.playlist_result(self._entries(show_id), playlist_id=show_id) + + +class SpreakerShowPageIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?spreaker\.com/show/(?P<id>[^/?#&]+)' +    _TESTS = [{ +        'url': 'https://www.spreaker.com/show/success-with-music', +        'only_matching': True, +    }] + +    def _real_extract(self, url): +        display_id = self._match_id(url) +        webpage = self._download_webpage(url, display_id) +        show_id = self._search_regex( +            r'show_id\s*:\s*(?P<id>\d+)', webpage, 'show id') +        return self.url_result( +            'https://api.spreaker.com/show/%s' % show_id, +            ie=SpreakerShowIE.ie_key(), video_id=show_id)  | 
