diff options
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/empflix.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/qqmusic.py | 60 | ||||
-rw-r--r-- | youtube_dl/extractor/teamcoco.py | 29 | ||||
-rw-r--r-- | youtube_dl/extractor/tnaflix.py | 12 |
4 files changed, 58 insertions, 45 deletions
diff --git a/youtube_dl/extractor/empflix.py b/youtube_dl/extractor/empflix.py index 9a5a8f4bb..4827022e0 100644 --- a/youtube_dl/extractor/empflix.py +++ b/youtube_dl/extractor/empflix.py @@ -26,6 +26,6 @@ class EMPFlixIE(TNAFlixIE): }, { 'url': 'http://www.empflix.com/videos/[AROMA][ARMD-718]-Aoi-Yoshino-Sawa-25826.html', - 'matching_only': True, + 'only_matching': True, } ] diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index dc300e189..e24ddaefe 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -9,7 +9,6 @@ from .common import InfoExtractor from ..utils import ( strip_jsonp, unescapeHTML, - js_to_json, ) from ..compat import compat_urllib_request @@ -196,60 +195,49 @@ class QQMusicToplistIE(QQPlaylistBaseIE): _VALID_URL = r'http://y\.qq\.com/#type=toplist&p=(?P<id>(top|global)_[0-9]+)' _TESTS = [{ - 'url': 'http://y.qq.com/#type=toplist&p=global_12', + 'url': 'http://y.qq.com/#type=toplist&p=global_123', 'info_dict': { - 'id': 'global_12', - 'title': 'itunes榜', + 'id': 'global_123', + 'title': '美国iTunes榜', }, 'playlist_count': 10, }, { - 'url': 'http://y.qq.com/#type=toplist&p=top_6', + 'url': 'http://y.qq.com/#type=toplist&p=top_3', 'info_dict': { - 'id': 'top_6', + 'id': 'top_3', 'title': 'QQ音乐巅峰榜·欧美', + 'description': 'QQ音乐巅峰榜·欧美根据用户收听行为自动生成,集结当下最流行的欧美新歌!:更新时间:每周四22点|统' + '计周期:一周(上周四至本周三)|统计对象:三个月内发行的欧美歌曲|统计数量:100首|统计算法:根据' + '歌曲在一周内的有效播放次数,由高到低取前100名(同一歌手最多允许5首歌曲同时上榜)|有效播放次数:' + '登录用户完整播放一首歌曲,记为一次有效播放;同一用户收听同一首歌曲,每天记录为1次有效播放' }, 'playlist_count': 100, }, { - 'url': 'http://y.qq.com/#type=toplist&p=global_5', + 'url': 'http://y.qq.com/#type=toplist&p=global_106', 'info_dict': { - 'id': 'global_5', - 'title': '韩国mnet排行榜', + 'id': 'global_106', + 'title': '韩国Mnet榜', }, 'playlist_count': 50, }] - @staticmethod - def strip_qq_jsonp(code): - return js_to_json(re.sub(r'^MusicJsonCallback\((.*?)\)/\*.+?\*/$', r'\1', code)) - def _real_extract(self, url): list_id = self._match_id(url) list_type, num_id = list_id.split("_") - list_page = self._download_webpage( - "http://y.qq.com/y/static/toplist/index/%s.html" % list_id, - list_id, 'Download toplist page') - - entries = [] - if list_type == 'top': - jsonp_url = "http://y.qq.com/y/static/toplist/json/top/%s/1.js" % num_id - else: - jsonp_url = "http://y.qq.com/y/static/toplist/json/global/%s/1_1.js" % num_id - toplist_json = self._download_json( - jsonp_url, list_id, note='Retrieve toplist json', - errnote='Unable to get toplist json', transform_source=self.strip_qq_jsonp) - - for song in toplist_json['l']: - s = song['s'] - song_mid = s.split("|")[20] - entries.append(self.url_result( - 'http://y.qq.com/#type=song&mid=' + song_mid, 'QQMusic', - song_mid)) + 'http://i.y.qq.com/v8/fcg-bin/fcg_v8_toplist_cp.fcg?type=%s&topid=%s&format=json' + % (list_type, num_id), + list_id, 'Download toplist page') - list_name = self._html_search_regex( - r'<h2 id="top_name">([^\']+)</h2>', list_page, 'top list name', - default=None) + entries = [ + self.url_result( + 'http://y.qq.com/#type=song&mid=' + song['data']['songmid'], 'QQMusic', song['data']['songmid'] + ) for song in toplist_json['songlist'] + ] - return self.playlist_result(entries, list_id, list_name) + topinfo = toplist_json.get('topinfo', {}) + list_name = topinfo.get('ListName') + list_description = topinfo.get('info') + return self.playlist_result(entries, list_id, list_name, list_description) diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index b2a4b1fc0..d1b7264b4 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -51,6 +51,17 @@ class TeamcocoIE(InfoExtractor): 'params': { 'skip_download': True, # m3u8 downloads } + }, { + 'url': 'http://teamcoco.com/video/full-episode-mon-6-1-joel-mchale-jake-tapper-and-musical-guest-courtney-barnett?playlist=x;eyJ0eXBlIjoidGFnIiwiaWQiOjl9', + 'info_dict': { + 'id': '89341', + 'ext': 'mp4', + 'title': 'Full Episode - Mon. 6/1 - Joel McHale, Jake Tapper, And Musical Guest Courtney Barnett', + 'description': 'Guests: Joel McHale, Jake Tapper, And Musical Guest Courtney Barnett', + }, + 'params': { + 'skip_download': True, # m3u8 downloads + } } ] _VIDEO_ID_REGEXES = ( @@ -110,9 +121,23 @@ class TeamcocoIE(InfoExtractor): get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p']) for filed in data['files']: if determine_ext(filed['url']) == 'm3u8': - formats.extend(self._extract_m3u8_formats( - filed['url'], video_id, ext='mp4')) + # compat_urllib_parse.urljoin does not work here + if filed['url'].startswith('/'): + m3u8_url = 'http://ht.cdn.turner.com/tbs/big/teamcoco' + filed['url'] + else: + m3u8_url = filed['url'] + m3u8_formats = self._extract_m3u8_formats( + m3u8_url, video_id, ext='mp4') + for m3u8_format in m3u8_formats: + if m3u8_format not in formats: + formats.append(m3u8_format) + elif determine_ext(filed['url']) == 'f4m': + # TODO Correct f4m extraction + continue else: + if filed['url'].startswith('/mp4:protected/'): + # TODO Correct extraction for these files + continue m_format = re.search(r'(\d+(k|p))\.mp4', filed['url']) if m_format is not None: format_id = m_format.group(1) diff --git a/youtube_dl/extractor/tnaflix.py b/youtube_dl/extractor/tnaflix.py index 59af9aba0..c282865b2 100644 --- a/youtube_dl/extractor/tnaflix.py +++ b/youtube_dl/extractor/tnaflix.py @@ -33,7 +33,7 @@ class TNAFlixIE(InfoExtractor): }, { 'url': 'https://www.tnaflix.com/amateur-porn/bunzHD-Ms.Donk/video358632', - 'matching_only': True, + 'only_matching': True, } ] @@ -51,9 +51,8 @@ class TNAFlixIE(InfoExtractor): age_limit = self._rta_search(webpage) - duration = self._html_search_meta('duration', webpage, 'duration', default=None) - if duration: - duration = parse_duration(duration[1:]) + duration = parse_duration(self._html_search_meta( + 'duration', webpage, 'duration', default=None)) cfg_url = self._proto_relative_url(self._html_search_regex( self._CONFIG_REGEX, webpage, 'flashvars.config'), 'http:') @@ -62,14 +61,15 @@ class TNAFlixIE(InfoExtractor): cfg_url, display_id, note='Downloading metadata', transform_source=fix_xml_ampersands) - thumbnail = cfg_xml.find('./startThumb').text + thumbnail = self._proto_relative_url( + cfg_xml.find('./startThumb').text, 'http:') formats = [] for item in cfg_xml.findall('./quality/item'): video_url = re.sub('speed=\d+', 'speed=', item.find('videoLink').text) format_id = item.find('res').text fmt = { - 'url': video_url, + 'url': self._proto_relative_url(video_url, 'http:'), 'format_id': format_id, } m = re.search(r'^(\d+)', format_id) |