diff options
Diffstat (limited to 'youtube_dl')
-rw-r--r-- | youtube_dl/__init__.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/__init__.py | 3 | ||||
-rw-r--r-- | youtube_dl/extractor/c56.py | 36 | ||||
-rw-r--r-- | youtube_dl/extractor/instagram.py | 22 | ||||
-rw-r--r-- | youtube_dl/extractor/sina.py | 67 | ||||
-rw-r--r-- | youtube_dl/extractor/ted.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/weibo.py | 48 | ||||
-rw-r--r-- | youtube_dl/extractor/youku.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/youtube.py | 10 | ||||
-rw-r--r-- | youtube_dl/version.py | 2 |
10 files changed, 174 insertions, 20 deletions
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 31427f7c5..eb23c53a5 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -129,7 +129,7 @@ def parseOpts(overrideArguments=None): general.add_option('-v', '--version', action='version', help='print program version and exit') general.add_option('-U', '--update', - action='store_true', dest='update_self', help='update this program to latest version') + action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)') general.add_option('-i', '--ignore-errors', action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) general.add_option('--dump-user-agent', diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 70a2363e4..b4a1c20e9 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -52,6 +52,7 @@ from .pornotube import PornotubeIE from .rbmaradio import RBMARadioIE from .redtube import RedTubeIE from .ringtv import RingTVIE +from .sina import SinaIE from .soundcloud import SoundcloudIE, SoundcloudSetIE from .spiegel import SpiegelIE from .stanfordoc import StanfordOpenClassroomIE @@ -71,7 +72,9 @@ from .veoh import VeohIE from .vevo import VevoIE from .vimeo import VimeoIE from .vine import VineIE +from .c56 import C56IE from .wat import WatIE +from .weibo import WeiboIE from .wimp import WimpIE from .worldstarhiphop import WorldStarHipHopIE from .xhamster import XHamsterIE diff --git a/youtube_dl/extractor/c56.py b/youtube_dl/extractor/c56.py new file mode 100644 index 000000000..4c8a8af09 --- /dev/null +++ b/youtube_dl/extractor/c56.py @@ -0,0 +1,36 @@ +# coding: utf-8 + +import re +import json + +from .common import InfoExtractor +from ..utils import determine_ext + +class C56IE(InfoExtractor): + _VALID_URL = r'https?://((www|player)\.)?56\.com/(.+?/)?(v_|(play_album.+-))(?P<textid>.+?)\.(html|swf)' + IE_NAME = u'56.com' + + _TEST ={ + u'url': u'http://www.56.com/u39/v_OTM0NDA3MTY.html', + u'file': u'93440716.mp4', + u'md5': u'9dc07b5c8e978112a6441f9e75d2b59e', + u'info_dict': { + u'title': u'网事知多少 第32期:车怒', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE) + text_id = mobj.group('textid') + info_page = self._download_webpage('http://vxml.56.com/json/%s/' % text_id, + text_id, u'Downloading video info') + info = json.loads(info_page)['info'] + best_format = sorted(info['rfiles'], key=lambda f: int(f['filesize']))[-1] + video_url = best_format['url'] + + return {'id': info['vid'], + 'title': info['Subject'], + 'url': video_url, + 'ext': determine_ext(video_url), + 'thumbnail': info.get('bimg') or info.get('img'), + } diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index f9ac8d5b4..ddc42882a 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -10,7 +10,8 @@ class InstagramIE(InfoExtractor): u'md5': u'0d2da106a9d2631273e192b372806516', u'info_dict': { u"uploader_id": u"naomipq", - u"title": u"Video by naomipq" + u"title": u"Video by naomipq", + u'description': u'md5:1f17f0ab29bd6fe2bfad705f58de3cb8', } } @@ -18,20 +19,17 @@ class InstagramIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group(1) webpage = self._download_webpage(url, video_id) - html_title = self._html_search_regex( - r'<title>(.+?)</title>', - webpage, u'title', flags=re.DOTALL) - title = re.sub(u'(?: *\(Videos?\))? \u2022 Instagram$', '', html_title).strip() - uploader_id = self._html_search_regex( - r'<div class="media-user" id="media_user">.*?<h2><a href="[^"]*">([^<]*)</a></h2>', - webpage, u'uploader id', fatal=False, flags=re.DOTALL) - ext = 'mp4' + uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"', + webpage, u'uploader id', fatal=False) + desc = self._search_regex(r'"caption":"(.*?)"', webpage, u'description', + fatal=False) return [{ 'id': video_id, 'url': self._og_search_video_url(webpage), - 'ext': ext, - 'title': title, + 'ext': 'mp4', + 'title': u'Video by %s' % uploader_id, 'thumbnail': self._og_search_thumbnail(webpage), - 'uploader_id' : uploader_id + 'uploader_id' : uploader_id, + 'description': desc, }] diff --git a/youtube_dl/extractor/sina.py b/youtube_dl/extractor/sina.py new file mode 100644 index 000000000..14b1c656c --- /dev/null +++ b/youtube_dl/extractor/sina.py @@ -0,0 +1,67 @@ +# coding: utf-8 + +import re +import xml.etree.ElementTree + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_request, + compat_urllib_parse, +) + + +class SinaIE(InfoExtractor): + _VALID_URL = r'''https?://(.*?\.)?video\.sina\.com\.cn/ + ( + (.+?/(((?P<pseudo_id>\d+).html)|(.*?(\#|(vid=))(?P<id>\d+?)($|&)))) + | + # This is used by external sites like Weibo + (api/sinawebApi/outplay.php/(?P<token>.+?)\.swf) + ) + ''' + + _TEST = { + u'url': u'http://video.sina.com.cn/news/vlist/zt/chczlj2013/?opsubject_id=top12#110028898', + u'file': u'110028898.flv', + u'md5': u'd65dd22ddcf44e38ce2bf58a10c3e71f', + u'info_dict': { + u'title': u'《中国新闻》 朝鲜要求巴拿马立即释放被扣船员', + } + } + + @classmethod + def suitable(cls, url): + return re.match(cls._VALID_URL, url, flags=re.VERBOSE) is not None + + def _extract_video(self, video_id): + data = compat_urllib_parse.urlencode({'vid': video_id}) + url_page = self._download_webpage('http://v.iask.com/v_play.php?%s' % data, + video_id, u'Downloading video url') + image_page = self._download_webpage( + 'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data, + video_id, u'Downloading thumbnail info') + url_doc = xml.etree.ElementTree.fromstring(url_page.encode('utf-8')) + + return {'id': video_id, + 'url': url_doc.find('./durl/url').text, + 'ext': 'flv', + 'title': url_doc.find('./vname').text, + 'thumbnail': image_page.split('=')[1], + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE) + video_id = mobj.group('id') + if mobj.group('token') is not None: + # The video id is in the redirected url + self.to_screen(u'Getting video id') + request = compat_urllib_request.Request(url) + request.get_method = lambda: 'HEAD' + (_, urlh) = self._download_webpage_handle(request, 'NA', False) + return self._real_extract(urlh.geturl()) + elif video_id is None: + pseudo_id = mobj.group('pseudo_id') + webpage = self._download_webpage(url, pseudo_id) + video_id = self._search_regex(r'vid:\'(\d+?)\'', webpage, u'video id') + + return self._extract_video(video_id) diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index 8b73b8340..4c11f7a03 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -67,7 +67,7 @@ class TEDIE(InfoExtractor): webpage = self._download_webpage(url, video_id, 'Downloading \"%s\" page' % video_name) self.report_extraction(video_name) # If the url includes the language we get the title translated - title = self._html_search_regex(r'<span id="altHeadline" >(?P<title>.*)</span>', + title = self._html_search_regex(r'<span .*?id="altHeadline".+?>(?P<title>.*)</span>', webpage, 'title') json_data = self._search_regex(r'<script.*?>var talkDetails = ({.*?})</script>', webpage, 'json data') diff --git a/youtube_dl/extractor/weibo.py b/youtube_dl/extractor/weibo.py new file mode 100644 index 000000000..0757495bd --- /dev/null +++ b/youtube_dl/extractor/weibo.py @@ -0,0 +1,48 @@ +# coding: utf-8 + +import re +import json + +from .common import InfoExtractor + +class WeiboIE(InfoExtractor): + """ + The videos in Weibo come from different sites, this IE just finds the link + to the external video and returns it. + """ + _VALID_URL = r'https?://video\.weibo\.com/v/weishipin/t_(?P<id>.+?)\.htm' + + _TEST = { + u'url': u'http://video.weibo.com/v/weishipin/t_zjUw2kZ.htm', + u'file': u'98322879.flv', + u'info_dict': { + u'title': u'魔声耳机最新广告“All Eyes On Us”', + }, + u'note': u'Sina video', + u'params': { + u'skip_download': True, + }, + } + + # Additional example videos from different sites + # Youku: http://video.weibo.com/v/weishipin/t_zQGDWQ8.htm + # 56.com: http://video.weibo.com/v/weishipin/t_zQ44HxN.htm + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE) + video_id = mobj.group('id') + info_url = 'http://video.weibo.com/?s=v&a=play_list&format=json&mix_video_id=t_%s' % video_id + info_page = self._download_webpage(info_url, video_id) + info = json.loads(info_page) + + videos_urls = map(lambda v: v['play_page_url'], info['result']['data']) + #Prefer sina video since they have thumbnails + videos_urls = sorted(videos_urls, key=lambda u: u'video.sina.com' in u) + player_url = videos_urls[-1] + m_sina = re.match(r'https?://video.sina.com.cn/v/b/(\d+)-\d+.html', player_url) + if m_sina is not None: + self.to_screen('Sina video detected') + sina_id = m_sina.group(1) + player_url = 'http://you.video.sina.com.cn/swf/quotePlayer.swf?vid=%s' % sina_id + return self.url_result(player_url) + diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index eb9829801..996d38478 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -13,7 +13,7 @@ from ..utils import ( class YoukuIE(InfoExtractor): - _VALID_URL = r'(?:http://)?v\.youku\.com/v_show/id_(?P<ID>[A-Za-z0-9]+)\.html' + _VALID_URL = r'(?:http://)?(v|player)\.youku\.com/(v_show/id_|player\.php/sid/)(?P<ID>[A-Za-z0-9]+)(\.html|/v.swf)' _TEST = { u"url": u"http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html", u"file": u"XNDgyMDQ2NTQw_part00.flv", diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 580f39ee8..12e8fc25d 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -179,14 +179,18 @@ class YoutubeIE(InfoExtractor): def _decrypt_signature(self, s): """Turn the encrypted s field into a working signature""" - if len(s) == 88: + if len(s) == 92: + return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83] + elif len(s) == 90: + return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81] + elif len(s) == 88: return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12] elif len(s) == 87: return s[62] + s[82:62:-1] + s[83] + s[61:52:-1] + s[0] + s[51:2:-1] elif len(s) == 86: return s[2:63] + s[82] + s[64:82] + s[63] elif len(s) == 85: - return s[76] + s[82:76:-1] + s[83] + s[75:60:-1] + s[0] + s[59:50:-1] + s[1] + s[49:2:-1] + return s[2:8] + s[0] + s[9:21] + s[65] + s[22:65] + s[84] + s[66:82] + s[21] elif len(s) == 84: return s[83:36:-1] + s[2] + s[35:26:-1] + s[3] + s[25:3:-1] + s[26] elif len(s) == 83: @@ -195,8 +199,6 @@ class YoutubeIE(InfoExtractor): return s[36] + s[79:67:-1] + s[81] + s[66:40:-1] + s[33] + s[39:36:-1] + s[40] + s[35] + s[0] + s[67] + s[32:0:-1] + s[34] elif len(s) == 81: return s[6] + s[3:6] + s[33] + s[7:24] + s[0] + s[25:33] + s[2] + s[34:53] + s[24] + s[54:81] - elif len(s) == 92: - return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]; else: raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s))) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index cba83a488..32eb27dad 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.07.17.1' +__version__ = '2013.07.19' |