diff options
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/__init__.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/facebook.py | 4 | ||||
-rw-r--r-- | youtube_dl/extractor/francetv.py | 50 | ||||
-rw-r--r-- | youtube_dl/extractor/youku.py | 7 | ||||
-rw-r--r-- | youtube_dl/extractor/youtube.py | 4 |
5 files changed, 63 insertions, 4 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 949f59a44..d1b7e5f99 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -42,6 +42,8 @@ from .flickr import FlickrIE from .francetv import ( PluzzIE, FranceTvInfoIE, + France2IE, + GenerationQuoiIE ) from .freesound import FreesoundIE from .funnyordie import FunnyOrDieIE diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index beaa5b4bd..9d1bc0751 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -106,8 +106,8 @@ class FacebookIE(InfoExtractor): video_duration = int(video_data['video_duration']) thumbnail = video_data['thumbnail_src'] - video_title = self._html_search_regex('<h2 class="uiHeaderTitle">([^<]+)</h2>', - webpage, u'title') + video_title = self._html_search_regex( + r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, u'title') info = { 'id': video_id, diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index b8fe82e47..b1530e549 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -1,6 +1,7 @@ # encoding: utf-8 import re import xml.etree.ElementTree +import json from .common import InfoExtractor from ..utils import ( @@ -65,3 +66,52 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor): webpage = self._download_webpage(url, page_title) video_id = self._search_regex(r'id-video=(\d+?)"', webpage, u'video id') return self._extract_video(video_id) + + +class France2IE(FranceTVBaseInfoExtractor): + IE_NAME = u'france2.fr' + _VALID_URL = r'https?://www\.france2\.fr/emissions/.*?/videos/(?P<id>\d+)' + + _TEST = { + u'url': u'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104', + u'file': u'75540104.mp4', + u'info_dict': { + u'title': u'13h15, le samedi...', + u'description': u'md5:2e5b58ba7a2d3692b35c792be081a03d', + }, + u'params': { + u'skip_download': True, + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + return self._extract_video(video_id) + + +class GenerationQuoiIE(InfoExtractor): + IE_NAME = u'http://generation-quoi.france2.fr' + _VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<name>.*)(\?|$)' + + _TEST = { + u'url': u'http://generation-quoi.france2.fr/portrait/garde-a-vous', + u'file': u'k7FJX8VBcvvLmX4wA5Q.mp4', + u'info_dict': { + u'title': u'Génération Quoi - Garde à Vous', + u'uploader': u'Génération Quoi', + }, + u'params': { + # It uses Dailymotion + u'skip_download': True, + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + name = mobj.group('name') + info_url = compat_urlparse.urljoin(url, '/medias/video/%s.json' % name) + info_json = self._download_webpage(info_url, name) + info = json.loads(info_json) + return self.url_result('http://www.dailymotion.com/video/%s' % info['id'], + ie='Dailymotion') diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index 996d38478..00fa2ccb5 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -66,6 +66,12 @@ class YoukuIE(InfoExtractor): self.report_extraction(video_id) try: config = json.loads(jsondata) + error_code = config['data'][0].get('error_code') + if error_code: + # -8 means blocked outside China. + error = config['data'][0].get('error') # Chinese and English, separated by newline. + raise ExtractorError(error or u'Server reported error %i' % error_code, + expected=True) video_title = config['data'][0]['title'] seed = config['data'][0]['seed'] @@ -89,6 +95,7 @@ class YoukuIE(InfoExtractor): fileid = config['data'][0]['streamfileids'][format] keys = [s['k'] for s in config['data'][0]['segs'][format]] + # segs is usually a dictionary, but an empty *list* if an error occured. except (UnicodeDecodeError, ValueError, KeyError): raise ExtractorError(u'Unable to extract info section') diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index e883a2c54..6beda8f3b 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -361,7 +361,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): u"info_dict": { u"upload_date": u"20120506", u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]", - u"description": u"md5:3e2666e0a55044490499ea45fe9037b7", + u"description": u"md5:bdac09887d209a4ed54b8f76b2bdaa8b", u"uploader": u"Icona Pop", u"uploader_id": u"IconaPop" } @@ -1096,7 +1096,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): elif len(s) == 85: return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84] elif len(s) == 84: - return s[81:36:-1] + s[0] + s[35:2:-1] + return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1] elif len(s) == 83: return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0] elif len(s) == 82: |