aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilipp Hagemeister <phihag@phihag.de>2013-12-05 14:29:08 +0100
committerPhilipp Hagemeister <phihag@phihag.de>2013-12-05 14:29:08 +0100
commit7fc3fa0545f8a07414e8c97be9862a3c2f79bb98 (patch)
tree85de96d6f0276e9663ea96768a95991b3f8d6a28
parent29030c0a4c2f4dded5a310add940aae0791f9d73 (diff)
downloadyoutube-dl-7fc3fa0545f8a07414e8c97be9862a3c2f79bb98.tar.xz
[9gag] Add extractor
-rw-r--r--youtube_dl/YoutubeDL.py34
-rw-r--r--youtube_dl/extractor/__init__.py1
-rw-r--r--youtube_dl/extractor/ninegag.py41
3 files changed, 73 insertions, 3 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index b68b110a4..8ad7bd1da 100644
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -405,7 +405,8 @@ class YoutubeDL(object):
for key, value in extra_info.items():
info_dict.setdefault(key, value)
- def extract_info(self, url, download=True, ie_key=None, extra_info={}):
+ def extract_info(self, url, download=True, ie_key=None, extra_info={},
+ process=True):
'''
Returns a list with a dictionary for each video we find.
If 'download', also downloads the videos.
@@ -441,7 +442,10 @@ class YoutubeDL(object):
'webpage_url': url,
'extractor_key': ie.ie_key(),
})
- return self.process_ie_result(ie_result, download, extra_info)
+ if process:
+ return self.process_ie_result(ie_result, download, extra_info)
+ else:
+ return ie_result
except ExtractorError as de: # An error we somewhat expected
self.report_error(compat_str(de), de.format_traceback())
break
@@ -474,8 +478,32 @@ class YoutubeDL(object):
download,
ie_key=ie_result.get('ie_key'),
extra_info=extra_info)
+ elif result_type == 'url_transparent':
+ # Use the information from the embedding page
+ info = self.extract_info(
+ ie_result['url'], ie_key=ie_result.get('ie_key'),
+ extra_info=extra_info, download=False, process=False)
+
+ def make_result(embedded_info):
+ new_result = ie_result.copy()
+ for f in ('_type', 'url', 'ext', 'player_url', 'formats',
+ 'entries', 'urlhandle', 'ie_key', 'duration',
+ 'subtitles', 'annotations', 'format'):
+ if f in new_result:
+ del new_result[f]
+ if f in embedded_info:
+ new_result[f] = embedded_info[f]
+ return new_result
+ new_result = make_result(info)
+
+ assert new_result.get('_type') != 'url_transparent'
+ if new_result.get('_type') == 'compat_list':
+ new_result['entries'] = [
+ make_result(e) for e in new_result['entries']]
+
+ return self.process_ie_result(
+ new_result, download=download, extra_info=extra_info)
elif result_type == 'playlist':
-
# We process each entry in the playlist
playlist = ie_result.get('title', None) or ie_result.get('id', None)
self.to_screen(u'[download] Downloading playlist: %s' % playlist)
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index bd996483b..a77e98d49 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -102,6 +102,7 @@ from .nbc import NBCNewsIE
from .newgrounds import NewgroundsIE
from .nhl import NHLIE, NHLVideocenterIE
from .niconico import NiconicoIE
+from .ninegag import NineGagIE
from .nowvideo import NowVideoIE
from .ooyala import OoyalaIE
from .orf import ORFIE
diff --git a/youtube_dl/extractor/ninegag.py b/youtube_dl/extractor/ninegag.py
new file mode 100644
index 000000000..cc00ffbcc
--- /dev/null
+++ b/youtube_dl/extractor/ninegag.py
@@ -0,0 +1,41 @@
+import json
+import re
+
+from .common import InfoExtractor
+
+
+class NineGagIE(InfoExtractor):
+ IE_NAME = '9gag'
+ _VALID_URL = r'^https?://(?:www\.)?9gag\.tv/v/(?P<id>[0-9]+)'
+
+ _TEST = {
+ u"url": u"http://9gag.tv/v/1912",
+ u"file": u"1912.mp4",
+ u"info_dict": {
+ u"description": u"This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)",
+ u"title": u"\"People Are Awesome 2013\" Is Absolutely Awesome"
+ },
+ u'add_ie': [u'Youtube']
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+ data_json = self._html_search_regex(r'''(?x)
+ <div\s*id="tv-video"\s*data-video-source="youtube"\s*
+ data-video-meta="([^"]+)"''', webpage, u'video metadata')
+
+ data = json.loads(data_json)
+
+ return {
+ '_type': 'url_transparent',
+ 'url': data['youtubeVideoId'],
+ 'ie_key': 'Youtube',
+ 'id': video_id,
+ 'title': data['title'],
+ 'description': data['description'],
+ 'view_count': int(data['view_count']),
+ 'thumbnail': data['thumbnail_url'],
+ }