diff options
-rw-r--r-- | docs/supportedsites.md | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/clipsyndicate.py | 14 | ||||
-rw-r--r-- | youtube_dl/extractor/gfycat.py | 28 | ||||
-rw-r--r-- | youtube_dl/extractor/qqmusic.py | 97 | ||||
-rw-r--r-- | youtube_dl/extractor/vk.py | 48 | ||||
-rw-r--r-- | youtube_dl/version.py | 2 |
7 files changed, 158 insertions, 33 deletions
diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 687936103..0ca06c71d 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -384,6 +384,7 @@ - **Pyvideo** - **qqmusic** - **qqmusic:album** + - **qqmusic:playlist** - **qqmusic:singer** - **qqmusic:toplist** - **QuickVid** diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 8665855eb..a0e3b333d 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -432,6 +432,7 @@ from .qqmusic import ( QQMusicSingerIE, QQMusicAlbumIE, QQMusicToplistIE, + QQMusicPlaylistIE, ) from .quickvid import QuickVidIE from .r7 import R7IE diff --git a/youtube_dl/extractor/clipsyndicate.py b/youtube_dl/extractor/clipsyndicate.py index d07d544ea..8306d6fb7 100644 --- a/youtube_dl/extractor/clipsyndicate.py +++ b/youtube_dl/extractor/clipsyndicate.py @@ -1,7 +1,5 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import ( find_xpath_attr, @@ -10,9 +8,9 @@ from ..utils import ( class ClipsyndicateIE(InfoExtractor): - _VALID_URL = r'http://www\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)' + _VALID_URL = r'http://(?:chic|www)\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe', 'md5': '4d7d549451bad625e0ff3d7bd56d776c', 'info_dict': { @@ -22,11 +20,13 @@ class ClipsyndicateIE(InfoExtractor): 'duration': 612, 'thumbnail': 're:^https?://.+\.jpg', }, - } + }, { + 'url': 'http://chic.clipsyndicate.com/video/play/5844117/shark_attack', + 'only_matching': True, + }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) js_player = self._download_webpage( 'http://eplayer.clipsyndicate.com/embed/player.js?va_id=%s' % video_id, video_id, 'Downlaoding player') diff --git a/youtube_dl/extractor/gfycat.py b/youtube_dl/extractor/gfycat.py index 397f1d42e..884700c52 100644 --- a/youtube_dl/extractor/gfycat.py +++ b/youtube_dl/extractor/gfycat.py @@ -6,12 +6,13 @@ from ..utils import ( int_or_none, float_or_none, qualities, + ExtractorError, ) class GfycatIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?P<id>[^/?#]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ifr/)?(?P<id>[^/?#]+)' + _TESTS = [{ 'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher', 'info_dict': { 'id': 'DeadlyDecisiveGermanpinscher', @@ -27,14 +28,33 @@ class GfycatIE(InfoExtractor): 'categories': list, 'age_limit': 0, } - } + }, { + 'url': 'http://gfycat.com/ifr/JauntyTimelyAmazontreeboa', + 'info_dict': { + 'id': 'JauntyTimelyAmazontreeboa', + 'ext': 'mp4', + 'title': 'JauntyTimelyAmazontreeboa', + 'timestamp': 1411720126, + 'upload_date': '20140926', + 'uploader': 'anonymous', + 'duration': 3.52, + 'view_count': int, + 'like_count': int, + 'dislike_count': int, + 'categories': list, + 'age_limit': 0, + } + }] def _real_extract(self, url): video_id = self._match_id(url) gfy = self._download_json( 'http://gfycat.com/cajax/get/%s' % video_id, - video_id, 'Downloading video info')['gfyItem'] + video_id, 'Downloading video info') + if 'error' in gfy: + raise ExtractorError('Gfycat said: ' + gfy['error'], expected=True) + gfy = gfy['gfyItem'] title = gfy.get('title') or gfy['gfyName'] description = gfy.get('description') diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index bafa81c21..476432330 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -9,6 +9,7 @@ from .common import InfoExtractor from ..utils import ( strip_jsonp, unescapeHTML, + clean_html, ) from ..compat import compat_urllib_request @@ -26,6 +27,20 @@ class QQMusicIE(InfoExtractor): 'upload_date': '20141227', 'creator': '林俊杰', 'description': 'md5:d327722d0361576fde558f1ac68a7065', + 'thumbnail': 're:^https?://.*\.jpg$', + } + }, { + 'note': 'There is no mp3-320 version of this song.', + 'url': 'http://y.qq.com/#type=song&mid=004MsGEo3DdNxV', + 'md5': 'fa3926f0c585cda0af8fa4f796482e3e', + 'info_dict': { + 'id': '004MsGEo3DdNxV', + 'ext': 'mp3', + 'title': '如果', + 'upload_date': '20050626', + 'creator': '李季美', + 'description': 'md5:46857d5ed62bc4ba84607a805dccf437', + 'thumbnail': 're:^https?://.*\.jpg$', } }] @@ -68,6 +83,14 @@ class QQMusicIE(InfoExtractor): if lrc_content: lrc_content = lrc_content.replace('\\n', '\n') + thumbnail_url = None + albummid = self._search_regex( + [r'albummid:\'([0-9a-zA-Z]+)\'', r'"albummid":"([0-9a-zA-Z]+)"'], + detail_info_page, 'album mid', default=None) + if albummid: + thumbnail_url = "http://i.gtimg.cn/music/photo/mid_album_500/%s/%s/%s.jpg" \ + % (albummid[-2:-1], albummid[-1], albummid) + guid = self.m_r_get_ruin() vkey = self._download_json( @@ -85,6 +108,7 @@ class QQMusicIE(InfoExtractor): 'preference': details['preference'], 'abr': details.get('abr'), }) + self._check_formats(formats, mid) self._sort_formats(formats) return { @@ -94,6 +118,7 @@ class QQMusicIE(InfoExtractor): 'upload_date': publish_time, 'creator': singer, 'description': lrc_content, + 'thumbnail': thumbnail_url, } @@ -163,31 +188,40 @@ class QQMusicAlbumIE(QQPlaylistBaseIE): IE_NAME = 'qqmusic:album' _VALID_URL = r'http://y.qq.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)' - _TEST = { - 'url': 'http://y.qq.com/#type=album&mid=000gXCTb2AhRR1&play=0', + _TESTS = [{ + 'url': 'http://y.qq.com/#type=album&mid=000gXCTb2AhRR1', 'info_dict': { 'id': '000gXCTb2AhRR1', 'title': '我们都是这样长大的', - 'description': 'md5:d216c55a2d4b3537fe4415b8767d74d6', + 'description': 'md5:179c5dce203a5931970d306aa9607ea6', }, 'playlist_count': 4, - } + }, { + 'url': 'http://y.qq.com/#type=album&mid=002Y5a3b3AlCu3', + 'info_dict': { + 'id': '002Y5a3b3AlCu3', + 'title': '그리고...', + 'description': 'md5:a48823755615508a95080e81b51ba729', + }, + 'playlist_count': 8, + }] def _real_extract(self, url): mid = self._match_id(url) - album_page = self._download_webpage( - self.qq_static_url('album', mid), mid, 'Download album page') + album = self._download_json( + 'http://i.y.qq.com/v8/fcg-bin/fcg_v8_album_info_cp.fcg?albummid=%s&format=json' % mid, + mid, 'Download album page')['data'] - entries = self.get_entries_from_page(album_page) - - album_name = self._html_search_regex( - r"albumname\s*:\s*'([^']+)',", album_page, 'album name', - default=None) - - album_detail = self._html_search_regex( - r'<div class="album_detail close_detail">\s*<p>((?:[^<>]+(?:<br />)?)+)</p>', - album_page, 'album details', default=None) + entries = [ + self.url_result( + 'http://y.qq.com/#type=song&mid=' + song['songmid'], 'QQMusic', song['songmid'] + ) for song in album['list'] + ] + album_name = album.get('name') + album_detail = album.get('desc') + if album_detail is not None: + album_detail = album_detail.strip() return self.playlist_result(entries, mid, album_name, album_detail) @@ -243,3 +277,36 @@ class QQMusicToplistIE(QQPlaylistBaseIE): list_name = topinfo.get('ListName') list_description = topinfo.get('info') return self.playlist_result(entries, list_id, list_name, list_description) + + +class QQMusicPlaylistIE(QQPlaylistBaseIE): + IE_NAME = 'qqmusic:playlist' + _VALID_URL = r'http://y\.qq\.com/#type=taoge&id=(?P<id>[0-9]+)' + + _TEST = { + 'url': 'http://y.qq.com/#type=taoge&id=3462654915', + 'info_dict': { + 'id': '3462654915', + 'title': '韩国5月新歌精选下旬', + 'description': 'md5:d2c9d758a96b9888cf4fe82f603121d4', + }, + 'playlist_count': 40, + } + + def _real_extract(self, url): + list_id = self._match_id(url) + + list_json = self._download_json( + 'http://i.y.qq.com/qzone-music/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg?type=1&json=1&utf8=1&onlysong=0&disstid=%s' + % list_id, list_id, 'Download list page', + transform_source=strip_jsonp)['cdlist'][0] + + entries = [ + self.url_result( + 'http://y.qq.com/#type=song&mid=' + song['songmid'], 'QQMusic', song['songmid'] + ) for song in list_json['songlist'] + ] + + list_name = list_json.get('dissname') + list_description = clean_html(unescapeHTML(list_json.get('desc'))) + return self.playlist_result(entries, list_id, list_name, list_description) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index f2ae109f9..23d153031 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -21,7 +21,17 @@ from ..utils import ( class VKIE(InfoExtractor): IE_NAME = 'vk.com' - _VALID_URL = r'https?://(?:m\.)?vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|(?:.+?\?.*?z=)?video(?P<videoid>[^s].*?)(?:\?|%2F|$))' + _VALID_URL = r'''(?x) + https?:// + (?: + (?:m\.)?vk\.com/video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)| + (?: + (?:m\.)?vk\.com/(?:.+?\?.*?z=)?video| + (?:www\.)?biqle\.ru/watch/ + ) + (?P<videoid>[^s].*?)(?:\?|%2F|$) + ) + ''' _NETRC_MACHINE = 'vk' _TESTS = [ @@ -110,10 +120,30 @@ class VKIE(InfoExtractor): 'skip': 'Only works from Russia', }, { + # youtube embed + 'url': 'https://vk.com/video276849682_170681728', + 'info_dict': { + 'id': 'V3K4mi0SYkc', + 'ext': 'mp4', + 'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate", + 'description': 'md5:bf9c26cfa4acdfb146362682edd3827a', + 'duration': 179, + 'upload_date': '20130116', + 'uploader': "Children's Joy Foundation", + 'uploader_id': 'thecjf', + 'view_count': int, + }, + }, + { # removed video, just testing that we match the pattern 'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a', 'only_matching': True, }, + { + # vk wrapper + 'url': 'http://www.biqle.ru/watch/847655_160197695', + 'only_matching': True, + } ] def _login(self): @@ -153,9 +183,14 @@ class VKIE(InfoExtractor): if not video_id: video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id')) - info_url = 'http://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id + info_url = 'https://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id info_page = self._download_webpage(info_url, video_id) + if re.search(r'<!>/login\.php\?.*\bact=security_check', info_page): + raise ExtractorError( + 'You are trying to log in from an unusual location. You should confirm ownership at vk.com to log in with this IP.', + expected=True) + ERRORS = { r'>Видеозапись .*? была изъята из публичного доступа в связи с обращением правообладателя.<': 'Video %s has been removed from public access due to rightholder complaint.', @@ -175,10 +210,11 @@ class VKIE(InfoExtractor): if re.search(error_re, info_page): raise ExtractorError(error_msg % video_id, expected=True) - m_yt = re.search(r'src="(http://www.youtube.com/.*?)"', info_page) - if m_yt is not None: - self.to_screen('Youtube video detected') - return self.url_result(m_yt.group(1), 'Youtube') + youtube_url = self._search_regex( + r'<iframe[^>]+src="((?:https?:)?//www.youtube.com/embed/[^"]+)"', + info_page, 'youtube iframe', default=None) + if youtube_url: + return self.url_result(youtube_url, 'Youtube') m_rutube = re.search( r'\ssrc="((?:https?:)?//rutube\.ru\\?/video\\?/embed(?:.*?))\\?"', info_page) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index eff4aebeb..3364647ed 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.07.04' +__version__ = '2015.07.07' |