aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl')
-rw-r--r--youtube_dl/__init__.py2
-rw-r--r--youtube_dl/extractor/__init__.py3
-rw-r--r--youtube_dl/extractor/c56.py36
-rw-r--r--youtube_dl/extractor/instagram.py22
-rw-r--r--youtube_dl/extractor/sina.py67
-rw-r--r--youtube_dl/extractor/ted.py2
-rw-r--r--youtube_dl/extractor/weibo.py48
-rw-r--r--youtube_dl/extractor/youku.py2
-rw-r--r--youtube_dl/extractor/youtube.py10
-rw-r--r--youtube_dl/version.py2
10 files changed, 174 insertions, 20 deletions
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 31427f7c5..eb23c53a5 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -129,7 +129,7 @@ def parseOpts(overrideArguments=None):
general.add_option('-v', '--version',
action='version', help='print program version and exit')
general.add_option('-U', '--update',
- action='store_true', dest='update_self', help='update this program to latest version')
+ action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
general.add_option('-i', '--ignore-errors',
action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
general.add_option('--dump-user-agent',
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 70a2363e4..b4a1c20e9 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -52,6 +52,7 @@ from .pornotube import PornotubeIE
from .rbmaradio import RBMARadioIE
from .redtube import RedTubeIE
from .ringtv import RingTVIE
+from .sina import SinaIE
from .soundcloud import SoundcloudIE, SoundcloudSetIE
from .spiegel import SpiegelIE
from .stanfordoc import StanfordOpenClassroomIE
@@ -71,7 +72,9 @@ from .veoh import VeohIE
from .vevo import VevoIE
from .vimeo import VimeoIE
from .vine import VineIE
+from .c56 import C56IE
from .wat import WatIE
+from .weibo import WeiboIE
from .wimp import WimpIE
from .worldstarhiphop import WorldStarHipHopIE
from .xhamster import XHamsterIE
diff --git a/youtube_dl/extractor/c56.py b/youtube_dl/extractor/c56.py
new file mode 100644
index 000000000..4c8a8af09
--- /dev/null
+++ b/youtube_dl/extractor/c56.py
@@ -0,0 +1,36 @@
+# coding: utf-8
+
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import determine_ext
+
+class C56IE(InfoExtractor):
+ _VALID_URL = r'https?://((www|player)\.)?56\.com/(.+?/)?(v_|(play_album.+-))(?P<textid>.+?)\.(html|swf)'
+ IE_NAME = u'56.com'
+
+ _TEST ={
+ u'url': u'http://www.56.com/u39/v_OTM0NDA3MTY.html',
+ u'file': u'93440716.mp4',
+ u'md5': u'9dc07b5c8e978112a6441f9e75d2b59e',
+ u'info_dict': {
+ u'title': u'网事知多少 第32期:车怒',
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
+ text_id = mobj.group('textid')
+ info_page = self._download_webpage('http://vxml.56.com/json/%s/' % text_id,
+ text_id, u'Downloading video info')
+ info = json.loads(info_page)['info']
+ best_format = sorted(info['rfiles'], key=lambda f: int(f['filesize']))[-1]
+ video_url = best_format['url']
+
+ return {'id': info['vid'],
+ 'title': info['Subject'],
+ 'url': video_url,
+ 'ext': determine_ext(video_url),
+ 'thumbnail': info.get('bimg') or info.get('img'),
+ }
diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py
index f9ac8d5b4..ddc42882a 100644
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@@ -10,7 +10,8 @@ class InstagramIE(InfoExtractor):
u'md5': u'0d2da106a9d2631273e192b372806516',
u'info_dict': {
u"uploader_id": u"naomipq",
- u"title": u"Video by naomipq"
+ u"title": u"Video by naomipq",
+ u'description': u'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
}
}
@@ -18,20 +19,17 @@ class InstagramIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
webpage = self._download_webpage(url, video_id)
- html_title = self._html_search_regex(
- r'<title>(.+?)</title>',
- webpage, u'title', flags=re.DOTALL)
- title = re.sub(u'(?: *\(Videos?\))? \u2022 Instagram$', '', html_title).strip()
- uploader_id = self._html_search_regex(
- r'<div class="media-user" id="media_user">.*?<h2><a href="[^"]*">([^<]*)</a></h2>',
- webpage, u'uploader id', fatal=False, flags=re.DOTALL)
- ext = 'mp4'
+ uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"',
+ webpage, u'uploader id', fatal=False)
+ desc = self._search_regex(r'"caption":"(.*?)"', webpage, u'description',
+ fatal=False)
return [{
'id': video_id,
'url': self._og_search_video_url(webpage),
- 'ext': ext,
- 'title': title,
+ 'ext': 'mp4',
+ 'title': u'Video by %s' % uploader_id,
'thumbnail': self._og_search_thumbnail(webpage),
- 'uploader_id' : uploader_id
+ 'uploader_id' : uploader_id,
+ 'description': desc,
}]
diff --git a/youtube_dl/extractor/sina.py b/youtube_dl/extractor/sina.py
new file mode 100644
index 000000000..14b1c656c
--- /dev/null
+++ b/youtube_dl/extractor/sina.py
@@ -0,0 +1,67 @@
+# coding: utf-8
+
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import (
+ compat_urllib_request,
+ compat_urllib_parse,
+)
+
+
+class SinaIE(InfoExtractor):
+ _VALID_URL = r'''https?://(.*?\.)?video\.sina\.com\.cn/
+ (
+ (.+?/(((?P<pseudo_id>\d+).html)|(.*?(\#|(vid=))(?P<id>\d+?)($|&))))
+ |
+ # This is used by external sites like Weibo
+ (api/sinawebApi/outplay.php/(?P<token>.+?)\.swf)
+ )
+ '''
+
+ _TEST = {
+ u'url': u'http://video.sina.com.cn/news/vlist/zt/chczlj2013/?opsubject_id=top12#110028898',
+ u'file': u'110028898.flv',
+ u'md5': u'd65dd22ddcf44e38ce2bf58a10c3e71f',
+ u'info_dict': {
+ u'title': u'《中国新闻》 朝鲜要求巴拿马立即释放被扣船员',
+ }
+ }
+
+ @classmethod
+ def suitable(cls, url):
+ return re.match(cls._VALID_URL, url, flags=re.VERBOSE) is not None
+
+ def _extract_video(self, video_id):
+ data = compat_urllib_parse.urlencode({'vid': video_id})
+ url_page = self._download_webpage('http://v.iask.com/v_play.php?%s' % data,
+ video_id, u'Downloading video url')
+ image_page = self._download_webpage(
+ 'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data,
+ video_id, u'Downloading thumbnail info')
+ url_doc = xml.etree.ElementTree.fromstring(url_page.encode('utf-8'))
+
+ return {'id': video_id,
+ 'url': url_doc.find('./durl/url').text,
+ 'ext': 'flv',
+ 'title': url_doc.find('./vname').text,
+ 'thumbnail': image_page.split('=')[1],
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
+ video_id = mobj.group('id')
+ if mobj.group('token') is not None:
+ # The video id is in the redirected url
+ self.to_screen(u'Getting video id')
+ request = compat_urllib_request.Request(url)
+ request.get_method = lambda: 'HEAD'
+ (_, urlh) = self._download_webpage_handle(request, 'NA', False)
+ return self._real_extract(urlh.geturl())
+ elif video_id is None:
+ pseudo_id = mobj.group('pseudo_id')
+ webpage = self._download_webpage(url, pseudo_id)
+ video_id = self._search_regex(r'vid:\'(\d+?)\'', webpage, u'video id')
+
+ return self._extract_video(video_id)
diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py
index 8b73b8340..4c11f7a03 100644
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@@ -67,7 +67,7 @@ class TEDIE(InfoExtractor):
webpage = self._download_webpage(url, video_id, 'Downloading \"%s\" page' % video_name)
self.report_extraction(video_name)
# If the url includes the language we get the title translated
- title = self._html_search_regex(r'<span id="altHeadline" >(?P<title>.*)</span>',
+ title = self._html_search_regex(r'<span .*?id="altHeadline".+?>(?P<title>.*)</span>',
webpage, 'title')
json_data = self._search_regex(r'<script.*?>var talkDetails = ({.*?})</script>',
webpage, 'json data')
diff --git a/youtube_dl/extractor/weibo.py b/youtube_dl/extractor/weibo.py
new file mode 100644
index 000000000..0757495bd
--- /dev/null
+++ b/youtube_dl/extractor/weibo.py
@@ -0,0 +1,48 @@
+# coding: utf-8
+
+import re
+import json
+
+from .common import InfoExtractor
+
+class WeiboIE(InfoExtractor):
+ """
+ The videos in Weibo come from different sites, this IE just finds the link
+ to the external video and returns it.
+ """
+ _VALID_URL = r'https?://video\.weibo\.com/v/weishipin/t_(?P<id>.+?)\.htm'
+
+ _TEST = {
+ u'url': u'http://video.weibo.com/v/weishipin/t_zjUw2kZ.htm',
+ u'file': u'98322879.flv',
+ u'info_dict': {
+ u'title': u'魔声耳机最新广告“All Eyes On Us”',
+ },
+ u'note': u'Sina video',
+ u'params': {
+ u'skip_download': True,
+ },
+ }
+
+ # Additional example videos from different sites
+ # Youku: http://video.weibo.com/v/weishipin/t_zQGDWQ8.htm
+ # 56.com: http://video.weibo.com/v/weishipin/t_zQ44HxN.htm
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
+ video_id = mobj.group('id')
+ info_url = 'http://video.weibo.com/?s=v&a=play_list&format=json&mix_video_id=t_%s' % video_id
+ info_page = self._download_webpage(info_url, video_id)
+ info = json.loads(info_page)
+
+ videos_urls = map(lambda v: v['play_page_url'], info['result']['data'])
+ #Prefer sina video since they have thumbnails
+ videos_urls = sorted(videos_urls, key=lambda u: u'video.sina.com' in u)
+ player_url = videos_urls[-1]
+ m_sina = re.match(r'https?://video.sina.com.cn/v/b/(\d+)-\d+.html', player_url)
+ if m_sina is not None:
+ self.to_screen('Sina video detected')
+ sina_id = m_sina.group(1)
+ player_url = 'http://you.video.sina.com.cn/swf/quotePlayer.swf?vid=%s' % sina_id
+ return self.url_result(player_url)
+
diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py
index eb9829801..996d38478 100644
--- a/youtube_dl/extractor/youku.py
+++ b/youtube_dl/extractor/youku.py
@@ -13,7 +13,7 @@ from ..utils import (
class YoukuIE(InfoExtractor):
- _VALID_URL = r'(?:http://)?v\.youku\.com/v_show/id_(?P<ID>[A-Za-z0-9]+)\.html'
+ _VALID_URL = r'(?:http://)?(v|player)\.youku\.com/(v_show/id_|player\.php/sid/)(?P<ID>[A-Za-z0-9]+)(\.html|/v.swf)'
_TEST = {
u"url": u"http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html",
u"file": u"XNDgyMDQ2NTQw_part00.flv",
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 580f39ee8..12e8fc25d 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -179,14 +179,18 @@ class YoutubeIE(InfoExtractor):
def _decrypt_signature(self, s):
"""Turn the encrypted s field into a working signature"""
- if len(s) == 88:
+ if len(s) == 92:
+ return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
+ elif len(s) == 90:
+ return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
+ elif len(s) == 88:
return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12]
elif len(s) == 87:
return s[62] + s[82:62:-1] + s[83] + s[61:52:-1] + s[0] + s[51:2:-1]
elif len(s) == 86:
return s[2:63] + s[82] + s[64:82] + s[63]
elif len(s) == 85:
- return s[76] + s[82:76:-1] + s[83] + s[75:60:-1] + s[0] + s[59:50:-1] + s[1] + s[49:2:-1]
+ return s[2:8] + s[0] + s[9:21] + s[65] + s[22:65] + s[84] + s[66:82] + s[21]
elif len(s) == 84:
return s[83:36:-1] + s[2] + s[35:26:-1] + s[3] + s[25:3:-1] + s[26]
elif len(s) == 83:
@@ -195,8 +199,6 @@ class YoutubeIE(InfoExtractor):
return s[36] + s[79:67:-1] + s[81] + s[66:40:-1] + s[33] + s[39:36:-1] + s[40] + s[35] + s[0] + s[67] + s[32:0:-1] + s[34]
elif len(s) == 81:
return s[6] + s[3:6] + s[33] + s[7:24] + s[0] + s[25:33] + s[2] + s[34:53] + s[24] + s[54:81]
- elif len(s) == 92:
- return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83];
else:
raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index cba83a488..32eb27dad 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
-__version__ = '2013.07.17.1'
+__version__ = '2013.07.19'