diff options
author | Philipp Hagemeister <phihag@phihag.de> | 2014-03-09 18:08:16 +0100 |
---|---|---|
committer | Philipp Hagemeister <phihag@phihag.de> | 2014-03-09 18:08:16 +0100 |
commit | 9e55e37a2e0e3a4e7d3fac2efd4ea13efe689b0e (patch) | |
tree | d77de1b25da3c63f93142fc3fa781e2872965b72 /youtube_dl/extractor | |
parent | a4ff6c4762fd01b606d54a2962530c753d4c52ec (diff) | |
parent | 14719565733c114320e99228cf2dc570f8c3b6dc (diff) |
Merge remote-tracking branch 'origin/master'
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/collegehumor.py | 14 | ||||
-rw-r--r-- | youtube_dl/extractor/gamekings.py | 13 | ||||
-rw-r--r-- | youtube_dl/extractor/mtv.py | 39 | ||||
-rw-r--r-- | youtube_dl/extractor/soundcloud.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/spike.py | 15 | ||||
-rw-r--r-- | youtube_dl/extractor/videodetective.py | 22 | ||||
-rw-r--r-- | youtube_dl/extractor/vube.py | 4 |
7 files changed, 78 insertions, 30 deletions
diff --git a/youtube_dl/extractor/collegehumor.py b/youtube_dl/extractor/collegehumor.py index 6b9fa4209..45d81aae5 100644 --- a/youtube_dl/extractor/collegehumor.py +++ b/youtube_dl/extractor/collegehumor.py @@ -35,15 +35,15 @@ class CollegeHumorIE(InfoExtractor): }, # embedded youtube video { - 'url': 'http://www.collegehumor.com/embed/6950457', + 'url': 'http://www.collegehumor.com/embed/6950306', 'info_dict': { - 'id': 'W5gMp3ZjYg4', + 'id': 'Z-bao9fg6Yc', 'ext': 'mp4', - 'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]', - 'uploader': 'FunnyPlox TV', - 'uploader_id': 'funnyploxtv', - 'description': 'md5:7ded37421526d54afdf005e25bc2b7a3', - 'upload_date': '20140128', + 'title': 'Young Americans Think President John F. Kennedy Died THIS MORNING IN A CAR ACCIDENT!!!', + 'uploader': 'Mark Dice', + 'uploader_id': 'MarkDice', + 'description': 'md5:62c3dab9351fac7bb44b53b69511d87f', + 'upload_date': '20140127', }, 'params': { 'skip_download': True, diff --git a/youtube_dl/extractor/gamekings.py b/youtube_dl/extractor/gamekings.py index a3a5251fe..233398966 100644 --- a/youtube_dl/extractor/gamekings.py +++ b/youtube_dl/extractor/gamekings.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import re from .common import InfoExtractor @@ -6,13 +8,14 @@ from .common import InfoExtractor class GamekingsIE(InfoExtractor): _VALID_URL = r'http://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)' _TEST = { - u"url": u"http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/", - u'file': u'20130811.mp4', + 'url': 'http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/', # MD5 is flaky, seems to change regularly - #u'md5': u'2f32b1f7b80fdc5cb616efb4f387f8a3', + # 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3', u'info_dict': { - u"title": u"Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review", - u"description": u"Melle en Steven hebben voor de review een week in de rechtbank doorbracht met Phoenix Wright: Ace Attorney - Dual Destinies.", + 'id': '20130811', + 'ext': 'mp4', + 'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review', + 'description': 'md5:632e61a9f97d700e83f43d77ddafb6a4', } } diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 5447b6c0c..d75241d3f 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -5,9 +5,12 @@ import re from .common import InfoExtractor from ..utils import ( compat_urllib_parse, + compat_urllib_request, ExtractorError, find_xpath_attr, fix_xml_ampersands, + HEADRequest, + unescapeHTML, url_basename, RegexNotFoundError, ) @@ -18,6 +21,7 @@ def _media_xml_tag(tag): class MTVServicesInfoExtractor(InfoExtractor): + _MOBILE_TEMPLATE = None @staticmethod def _id_from_uri(uri): return uri.split(':')[-1] @@ -39,9 +43,29 @@ class MTVServicesInfoExtractor(InfoExtractor): else: return thumb_node.attrib['url'] - def _extract_video_formats(self, mdoc): - if re.match(r'.*/error_country_block\.swf$', mdoc.find('.//src').text) is not None: - raise ExtractorError('This video is not available from your country.', expected=True) + def _extract_mobile_video_formats(self, mtvn_id): + webpage_url = self._MOBILE_TEMPLATE % mtvn_id + req = compat_urllib_request.Request(webpage_url) + # Otherwise we get a webpage that would execute some javascript + req.add_header('Youtubedl-user-agent', 'curl/7') + webpage = self._download_webpage(req, mtvn_id, + 'Downloading mobile page') + metrics_url = unescapeHTML(self._search_regex(r'<a href="(http://metrics.+?)"', webpage, 'url')) + req = HEADRequest(metrics_url) + response = self._request_webpage(req, mtvn_id, 'Resolving url') + url = response.geturl() + # Transform the url to get the best quality: + url = re.sub(r'.+pxE=mp4', 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=0+_pxK=18639+_pxE=mp4', url, 1) + return [{'url': url,'ext': 'mp4'}] + + def _extract_video_formats(self, mdoc, mtvn_id): + if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4)$', mdoc.find('.//src').text) is not None: + if mtvn_id is not None and self._MOBILE_TEMPLATE is not None: + self.to_screen('The normal version is not available from your ' + 'country, trying with the mobile version') + return self._extract_mobile_video_formats(mtvn_id) + raise ExtractorError('This video is not available from your country.', + expected=True) formats = [] for rendition in mdoc.findall('.//rendition'): @@ -94,9 +118,16 @@ class MTVServicesInfoExtractor(InfoExtractor): raise ExtractorError('Could not find video title') title = title.strip() + # This a short id that's used in the webpage urls + mtvn_id = None + mtvn_id_node = find_xpath_attr(itemdoc, './/{http://search.yahoo.com/mrss/}category', + 'scheme', 'urn:mtvn:id') + if mtvn_id_node is not None: + mtvn_id = mtvn_id_node.text + return { 'title': title, - 'formats': self._extract_video_formats(mediagen_doc), + 'formats': self._extract_video_formats(mediagen_doc, mtvn_id), 'id': video_id, 'thumbnail': self._get_thumbnail_url(uri, itemdoc), 'description': description, diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 1cc0dcb15..2f254f023 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -54,6 +54,7 @@ class SoundcloudIE(InfoExtractor): 'id': '47127627', 'ext': 'mp3', 'title': 'Goldrushed', + 'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com', 'uploader': 'The Royal Concept', 'upload_date': '20120521', }, diff --git a/youtube_dl/extractor/spike.py b/youtube_dl/extractor/spike.py index 56682ac45..a3adf54e3 100644 --- a/youtube_dl/extractor/spike.py +++ b/youtube_dl/extractor/spike.py @@ -1,10 +1,15 @@ from __future__ import unicode_literals +import re + from .mtv import MTVServicesInfoExtractor class SpikeIE(MTVServicesInfoExtractor): - _VALID_URL = r'https?://www\.spike\.com/(video-clips|episodes)/.+' + _VALID_URL = r'''(?x)https?:// + (www\.spike\.com/(video-clips|episodes)/.+| + m\.spike\.com/videos/video.rbml\?id=(?P<mobile_id>[^&]+)) + ''' _TEST = { 'url': 'http://www.spike.com/video-clips/lhtu8m/auction-hunters-can-allen-ride-a-hundred-year-old-motorcycle', 'md5': '1a9265f32b0c375793d6c4ce45255256', @@ -17,3 +22,11 @@ class SpikeIE(MTVServicesInfoExtractor): } _FEED_URL = 'http://www.spike.com/feeds/mrss/' + _MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s' + + def _real_extract(self, url): + mobj = re.search(self._VALID_URL, url) + mobile_id = mobj.group('mobile_id') + if mobile_id is not None: + url = 'http://www.spike.com/video-clips/%s' % mobile_id + return super(SpikeIE, self)._real_extract(url) diff --git a/youtube_dl/extractor/videodetective.py b/youtube_dl/extractor/videodetective.py index 265dd5b91..ac6c25537 100644 --- a/youtube_dl/extractor/videodetective.py +++ b/youtube_dl/extractor/videodetective.py @@ -1,22 +1,23 @@ +from __future__ import unicode_literals + import re from .common import InfoExtractor from .internetvideoarchive import InternetVideoArchiveIE -from ..utils import ( - compat_urlparse, -) +from ..utils import compat_urlparse class VideoDetectiveIE(InfoExtractor): _VALID_URL = r'https?://www\.videodetective\.com/[^/]+/[^/]+/(?P<id>\d+)' _TEST = { - u'url': u'http://www.videodetective.com/movies/kick-ass-2/194487', - u'file': u'194487.mp4', - u'info_dict': { - u'title': u'KICK-ASS 2', - u'description': u'md5:65ba37ad619165afac7d432eaded6013', - u'duration': 135, + 'url': 'http://www.videodetective.com/movies/kick-ass-2/194487', + 'info_dict': { + 'id': '194487', + 'ext': 'mp4', + 'title': 'KICK-ASS 2', + 'description': 'md5:65ba37ad619165afac7d432eaded6013', + 'duration': 135, }, } @@ -26,5 +27,4 @@ class VideoDetectiveIE(InfoExtractor): webpage = self._download_webpage(url, video_id) og_video = self._og_search_video_url(webpage) query = compat_urlparse.urlparse(og_video).query - return self.url_result(InternetVideoArchiveIE._build_url(query), - ie=InternetVideoArchiveIE.ie_key()) + return self.url_result(InternetVideoArchiveIE._build_url(query), ie=InternetVideoArchiveIE.ie_key()) diff --git a/youtube_dl/extractor/vube.py b/youtube_dl/extractor/vube.py index fbdff471a..935c97ae9 100644 --- a/youtube_dl/extractor/vube.py +++ b/youtube_dl/extractor/vube.py @@ -13,7 +13,7 @@ class VubeIE(InfoExtractor): _TEST = { 'url': 'http://vube.com/Chiara+Grispo+Video+Channel/YL2qNPkqon', - 'md5': 'f81dcf6d0448e3291f54380181695821', + 'md5': 'db7aba89d4603dadd627e9d1973946fe', 'info_dict': { 'id': 'YL2qNPkqon', 'ext': 'mp4', @@ -77,4 +77,4 @@ class VubeIE(InfoExtractor): 'like_count': like_count, 'dislike_count': dislike_count, 'comment_count': comment_count, - }
\ No newline at end of file + } |