diff options
| author | Sergey M․ <dstftw@gmail.com> | 2017-08-12 23:24:46 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2017-08-12 23:43:51 +0700 | 
| commit | 0c43a481b91c657643eb42f72d293f245a410c52 (patch) | |
| tree | b572fdb47713c986ca2e23acbb072c2086137f98 /youtube_dl/extractor/reddit.py | |
| parent | 868f79db41a4d81a87ef12c8bd5ef73205c9c029 (diff) | |
[reddit] Add extractors (closes #13847)
Diffstat (limited to 'youtube_dl/extractor/reddit.py')
| -rw-r--r-- | youtube_dl/extractor/reddit.py | 114 | 
1 files changed, 114 insertions, 0 deletions
| diff --git a/youtube_dl/extractor/reddit.py b/youtube_dl/extractor/reddit.py new file mode 100644 index 000000000..01c85ee01 --- /dev/null +++ b/youtube_dl/extractor/reddit.py @@ -0,0 +1,114 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( +    ExtractorError, +    int_or_none, +    float_or_none, +) + + +class RedditIE(InfoExtractor): +    _VALID_URL = r'https?://v\.redd\.it/(?P<id>[^/?#&]+)' +    _TEST = { +        # from https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/ +        'url': 'https://v.redd.it/zv89llsvexdz', +        'md5': '655d06ace653ea3b87bccfb1b27ec99d', +        'info_dict': { +            'id': 'zv89llsvexdz', +            'ext': 'mp4', +            'title': 'zv89llsvexdz', +        }, +        'params': { +            'format': 'bestvideo', +        }, +    } + +    def _real_extract(self, url): +        video_id = self._match_id(url) + +        formats = self._extract_m3u8_formats( +            'https://v.redd.it/%s/HLSPlaylist.m3u8' % video_id, video_id, +            'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) + +        formats.extend(self._extract_mpd_formats( +            'https://v.redd.it/%s/DASHPlaylist.mpd' % video_id, video_id, +            mpd_id='dash', fatal=False)) + +        return { +            'id': video_id, +            'title': video_id, +            'formats': formats, +        } + + +class RedditRIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?reddit\.com/r/[^/]+/comments/(?P<id>[^/]+)' +    _TESTS = [{ +        'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/', +        'info_dict': { +            'id': 'zv89llsvexdz', +            'ext': 'mp4', +            'title': 'That small heart attack.', +            'thumbnail': r're:^https?://.*\.jpg$', +            'timestamp': 1501941939, +            'upload_date': '20170805', +            'uploader': 'Antw87', +            'like_count': int, +            'dislike_count': int, +            'comment_count': int, +            'age_limit': 0, +        }, +        'params': { +            'format': 'bestvideo', +            'skip_download': True, +        }, +    }, { +        'url': 'https://www.reddit.com/r/videos/comments/6rrwyj', +        'only_matching': True, +    }, { +        # imgur +        'url': 'https://www.reddit.com/r/MadeMeSmile/comments/6t7wi5/wait_for_it/', +        'only_matching': True, +    }, { +        # streamable +        'url': 'https://www.reddit.com/r/videos/comments/6t7sg9/comedians_hilarious_joke_about_the_guam_flag/', +        'only_matching': True, +    }, { +        # youtube +        'url': 'https://www.reddit.com/r/videos/comments/6t75wq/southern_man_tries_to_speak_without_an_accent/', +        'only_matching': True, +    }] + +    def _real_extract(self, url): +        video_id = self._match_id(url) + +        data = self._download_json( +            url + '.json', video_id)[0]['data']['children'][0]['data'] + +        video_url = data['url'] + +        # Avoid recursing into the same reddit URL +        if 'reddit.com/' in video_url and '/%s/' % video_id in video_url: +            raise ExtractorError('No media found', expected=True) + +        over_18 = data.get('over_18') +        if over_18 is True: +            age_limit = 18 +        elif over_18 is False: +            age_limit = 0 +        else: +            age_limit = None + +        return { +            '_type': 'url_transparent', +            'url': video_url, +            'title': data.get('title'), +            'thumbnail': data.get('thumbnail'), +            'timestamp': float_or_none(data.get('created_utc')), +            'uploader': data.get('author'), +            'like_count': int_or_none(data.get('ups')), +            'dislike_count': int_or_none(data.get('downs')), +            'comment_count': int_or_none(data.get('num_comments')), +            'age_limit': age_limit, +        } | 
