diff options
author | Philipp Hagemeister <phihag@phihag.de> | 2015-02-19 01:43:20 +0100 |
---|---|---|
committer | Philipp Hagemeister <phihag@phihag.de> | 2015-02-19 01:43:20 +0100 |
commit | 3bf5705316adb4e486ec76a9308198b499787947 (patch) | |
tree | 63d6eb9b09e2baa9ae0bf83e9a45abc91f40e677 /youtube_dl | |
parent | 1c2528c8a3c599740558d5d752dfb368c8714a89 (diff) |
[imgur] Add new extractor
Diffstat (limited to 'youtube_dl')
-rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/imgur.py | 84 |
2 files changed, 85 insertions, 0 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 84a7edffc..1d1f07ff5 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -205,6 +205,7 @@ from .imdb import ( ImdbIE, ImdbListIE ) +from .imgur import ImgurIE from .ina import InaIE from .infoq import InfoQIE from .instagram import InstagramIE, InstagramUserIE diff --git a/youtube_dl/extractor/imgur.py b/youtube_dl/extractor/imgur.py new file mode 100644 index 000000000..16488e0c4 --- /dev/null +++ b/youtube_dl/extractor/imgur.py @@ -0,0 +1,84 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + js_to_json, + mimetype2ext, +) + + +class ImgurIE(InfoExtractor): + _VALID_URL = r'https?://i\.imgur\.com/(?P<id>[a-zA-Z0-9]+)\.(?:mp4|gifv)' + + _TESTS = [{ + 'url': 'https://i.imgur.com/A61SaA1.gifv', + 'info_dict': { + 'id': 'A61SaA1', + 'ext': 'mp4', + 'title': 'MRW gifv is up and running without any bugs', + 'description': 'The Internet\'s visual storytelling community. Explore, share, and discuss the best visual stories the Internet has to offer.', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + width = int_or_none(self._search_regex( + r'<param name="width" value="([0-9]+)"', + webpage, 'width', fatal=False)) + height = int_or_none(self._search_regex( + r'<param name="height" value="([0-9]+)"', + webpage, 'height', fatal=False)) + + formats = [] + video_elements = self._search_regex( + r'(?s)<div class="video-elements">(.*?)</div>', + webpage, 'video elements') + formats = [] + for m in re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements): + formats.append({ + 'format_id': m.group('type').partition('/')[2], + 'url': self._proto_relative_url(m.group('src')), + 'ext': mimetype2ext(m.group('type')), + 'acodec': 'none', + 'width': width, + 'height': height, + 'http_headers': { + 'User-Agent': 'youtube-dl (like wget)', + }, + }) + + gif_json = self._search_regex( + r'(?s)var\s+videoItem\s*=\s*(\{.*?\})', + webpage, 'GIF code', fatal=False) + if gif_json: + gifd = self._parse_json( + gif_json, video_id, transform_source=js_to_json) + formats.append({ + 'format_id': 'gif', + 'preference': -10, + 'width': width, + 'height': height, + 'ext': 'gif', + 'acodec': 'none', + 'vcodec': 'gif', + 'container': 'gif', + 'url': self._proto_relative_url(gifd['gifUrl']), + 'filesize': gifd.get('size'), + 'http_headers': { + 'User-Agent': 'youtube-dl (like wget)', + }, + }) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'formats': formats, + 'description': self._og_search_description(webpage), + 'title': self._og_search_title(webpage), + } |