diff options
Diffstat (limited to 'youtube_dl')
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/clipsyndicate.py | 14 | ||||
| -rw-r--r-- | youtube_dl/extractor/gfycat.py | 28 | ||||
| -rw-r--r-- | youtube_dl/extractor/qqmusic.py | 97 | ||||
| -rw-r--r-- | youtube_dl/extractor/vk.py | 48 | ||||
| -rw-r--r-- | youtube_dl/version.py | 2 | 
6 files changed, 157 insertions, 33 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 8665855eb..a0e3b333d 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -432,6 +432,7 @@ from .qqmusic import (      QQMusicSingerIE,      QQMusicAlbumIE,      QQMusicToplistIE, +    QQMusicPlaylistIE,  )  from .quickvid import QuickVidIE  from .r7 import R7IE diff --git a/youtube_dl/extractor/clipsyndicate.py b/youtube_dl/extractor/clipsyndicate.py index d07d544ea..8306d6fb7 100644 --- a/youtube_dl/extractor/clipsyndicate.py +++ b/youtube_dl/extractor/clipsyndicate.py @@ -1,7 +1,5 @@  from __future__ import unicode_literals -import re -  from .common import InfoExtractor  from ..utils import (      find_xpath_attr, @@ -10,9 +8,9 @@ from ..utils import (  class ClipsyndicateIE(InfoExtractor): -    _VALID_URL = r'http://www\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)' +    _VALID_URL = r'http://(?:chic|www)\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)' -    _TEST = { +    _TESTS = [{          'url': 'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',          'md5': '4d7d549451bad625e0ff3d7bd56d776c',          'info_dict': { @@ -22,11 +20,13 @@ class ClipsyndicateIE(InfoExtractor):              'duration': 612,              'thumbnail': 're:^https?://.+\.jpg',          }, -    } +    }, { +        'url': 'http://chic.clipsyndicate.com/video/play/5844117/shark_attack', +        'only_matching': True, +    }]      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') +        video_id = self._match_id(url)          js_player = self._download_webpage(              'http://eplayer.clipsyndicate.com/embed/player.js?va_id=%s' % video_id,              video_id, 'Downlaoding player') diff --git a/youtube_dl/extractor/gfycat.py b/youtube_dl/extractor/gfycat.py index 397f1d42e..884700c52 100644 --- a/youtube_dl/extractor/gfycat.py +++ b/youtube_dl/extractor/gfycat.py @@ -6,12 +6,13 @@ from ..utils import (      int_or_none,      float_or_none,      qualities, +    ExtractorError,  )  class GfycatIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?P<id>[^/?#]+)' -    _TEST = { +    _VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ifr/)?(?P<id>[^/?#]+)' +    _TESTS = [{          'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher',          'info_dict': {              'id': 'DeadlyDecisiveGermanpinscher', @@ -27,14 +28,33 @@ class GfycatIE(InfoExtractor):              'categories': list,              'age_limit': 0,          } -    } +    }, { +        'url': 'http://gfycat.com/ifr/JauntyTimelyAmazontreeboa', +        'info_dict': { +            'id': 'JauntyTimelyAmazontreeboa', +            'ext': 'mp4', +            'title': 'JauntyTimelyAmazontreeboa', +            'timestamp': 1411720126, +            'upload_date': '20140926', +            'uploader': 'anonymous', +            'duration': 3.52, +            'view_count': int, +            'like_count': int, +            'dislike_count': int, +            'categories': list, +            'age_limit': 0, +        } +    }]      def _real_extract(self, url):          video_id = self._match_id(url)          gfy = self._download_json(              'http://gfycat.com/cajax/get/%s' % video_id, -            video_id, 'Downloading video info')['gfyItem'] +            video_id, 'Downloading video info') +        if 'error' in gfy: +            raise ExtractorError('Gfycat said: ' + gfy['error'], expected=True) +        gfy = gfy['gfyItem']          title = gfy.get('title') or gfy['gfyName']          description = gfy.get('description') diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index bafa81c21..476432330 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -9,6 +9,7 @@ from .common import InfoExtractor  from ..utils import (      strip_jsonp,      unescapeHTML, +    clean_html,  )  from ..compat import compat_urllib_request @@ -26,6 +27,20 @@ class QQMusicIE(InfoExtractor):              'upload_date': '20141227',              'creator': '林俊杰',              'description': 'md5:d327722d0361576fde558f1ac68a7065', +            'thumbnail': 're:^https?://.*\.jpg$', +        } +    }, { +        'note': 'There is no mp3-320 version of this song.', +        'url': 'http://y.qq.com/#type=song&mid=004MsGEo3DdNxV', +        'md5': 'fa3926f0c585cda0af8fa4f796482e3e', +        'info_dict': { +            'id': '004MsGEo3DdNxV', +            'ext': 'mp3', +            'title': '如果', +            'upload_date': '20050626', +            'creator': '李季美', +            'description': 'md5:46857d5ed62bc4ba84607a805dccf437', +            'thumbnail': 're:^https?://.*\.jpg$',          }      }] @@ -68,6 +83,14 @@ class QQMusicIE(InfoExtractor):          if lrc_content:              lrc_content = lrc_content.replace('\\n', '\n') +        thumbnail_url = None +        albummid = self._search_regex( +            [r'albummid:\'([0-9a-zA-Z]+)\'', r'"albummid":"([0-9a-zA-Z]+)"'], +            detail_info_page, 'album mid', default=None) +        if albummid: +            thumbnail_url = "http://i.gtimg.cn/music/photo/mid_album_500/%s/%s/%s.jpg" \ +                            % (albummid[-2:-1], albummid[-1], albummid) +          guid = self.m_r_get_ruin()          vkey = self._download_json( @@ -85,6 +108,7 @@ class QQMusicIE(InfoExtractor):                  'preference': details['preference'],                  'abr': details.get('abr'),              }) +        self._check_formats(formats, mid)          self._sort_formats(formats)          return { @@ -94,6 +118,7 @@ class QQMusicIE(InfoExtractor):              'upload_date': publish_time,              'creator': singer,              'description': lrc_content, +            'thumbnail': thumbnail_url,          } @@ -163,31 +188,40 @@ class QQMusicAlbumIE(QQPlaylistBaseIE):      IE_NAME = 'qqmusic:album'      _VALID_URL = r'http://y.qq.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)' -    _TEST = { -        'url': 'http://y.qq.com/#type=album&mid=000gXCTb2AhRR1&play=0', +    _TESTS = [{ +        'url': 'http://y.qq.com/#type=album&mid=000gXCTb2AhRR1',          'info_dict': {              'id': '000gXCTb2AhRR1',              'title': '我们都是这样长大的', -            'description': 'md5:d216c55a2d4b3537fe4415b8767d74d6', +            'description': 'md5:179c5dce203a5931970d306aa9607ea6',          },          'playlist_count': 4, -    } +    }, { +        'url': 'http://y.qq.com/#type=album&mid=002Y5a3b3AlCu3', +        'info_dict': { +            'id': '002Y5a3b3AlCu3', +            'title': '그리고...', +            'description': 'md5:a48823755615508a95080e81b51ba729', +        }, +        'playlist_count': 8, +    }]      def _real_extract(self, url):          mid = self._match_id(url) -        album_page = self._download_webpage( -            self.qq_static_url('album', mid), mid, 'Download album page') +        album = self._download_json( +            'http://i.y.qq.com/v8/fcg-bin/fcg_v8_album_info_cp.fcg?albummid=%s&format=json' % mid, +            mid, 'Download album page')['data'] -        entries = self.get_entries_from_page(album_page) - -        album_name = self._html_search_regex( -            r"albumname\s*:\s*'([^']+)',", album_page, 'album name', -            default=None) - -        album_detail = self._html_search_regex( -            r'<div class="album_detail close_detail">\s*<p>((?:[^<>]+(?:<br />)?)+)</p>', -            album_page, 'album details', default=None) +        entries = [ +            self.url_result( +                'http://y.qq.com/#type=song&mid=' + song['songmid'], 'QQMusic', song['songmid'] +            ) for song in album['list'] +        ] +        album_name = album.get('name') +        album_detail = album.get('desc') +        if album_detail is not None: +            album_detail = album_detail.strip()          return self.playlist_result(entries, mid, album_name, album_detail) @@ -243,3 +277,36 @@ class QQMusicToplistIE(QQPlaylistBaseIE):          list_name = topinfo.get('ListName')          list_description = topinfo.get('info')          return self.playlist_result(entries, list_id, list_name, list_description) + + +class QQMusicPlaylistIE(QQPlaylistBaseIE): +    IE_NAME = 'qqmusic:playlist' +    _VALID_URL = r'http://y\.qq\.com/#type=taoge&id=(?P<id>[0-9]+)' + +    _TEST = { +        'url': 'http://y.qq.com/#type=taoge&id=3462654915', +        'info_dict': { +            'id': '3462654915', +            'title': '韩国5月新歌精选下旬', +            'description': 'md5:d2c9d758a96b9888cf4fe82f603121d4', +        }, +        'playlist_count': 40, +    } + +    def _real_extract(self, url): +        list_id = self._match_id(url) + +        list_json = self._download_json( +            'http://i.y.qq.com/qzone-music/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg?type=1&json=1&utf8=1&onlysong=0&disstid=%s' +            % list_id, list_id, 'Download list page', +            transform_source=strip_jsonp)['cdlist'][0] + +        entries = [ +            self.url_result( +                'http://y.qq.com/#type=song&mid=' + song['songmid'], 'QQMusic', song['songmid'] +            ) for song in list_json['songlist'] +        ] + +        list_name = list_json.get('dissname') +        list_description = clean_html(unescapeHTML(list_json.get('desc'))) +        return self.playlist_result(entries, list_id, list_name, list_description) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index f2ae109f9..23d153031 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -21,7 +21,17 @@ from ..utils import (  class VKIE(InfoExtractor):      IE_NAME = 'vk.com' -    _VALID_URL = r'https?://(?:m\.)?vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|(?:.+?\?.*?z=)?video(?P<videoid>[^s].*?)(?:\?|%2F|$))' +    _VALID_URL = r'''(?x) +                    https?:// +                        (?: +                            (?:m\.)?vk\.com/video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)| +                            (?: +                                (?:m\.)?vk\.com/(?:.+?\?.*?z=)?video| +                                (?:www\.)?biqle\.ru/watch/ +                            ) +                            (?P<videoid>[^s].*?)(?:\?|%2F|$) +                        ) +                    '''      _NETRC_MACHINE = 'vk'      _TESTS = [ @@ -110,10 +120,30 @@ class VKIE(InfoExtractor):              'skip': 'Only works from Russia',          },          { +            # youtube embed +            'url': 'https://vk.com/video276849682_170681728', +            'info_dict': { +                'id': 'V3K4mi0SYkc', +                'ext': 'mp4', +                'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate", +                'description': 'md5:bf9c26cfa4acdfb146362682edd3827a', +                'duration': 179, +                'upload_date': '20130116', +                'uploader': "Children's Joy Foundation", +                'uploader_id': 'thecjf', +                'view_count': int, +            }, +        }, +        {              # removed video, just testing that we match the pattern              'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',              'only_matching': True,          }, +        { +            # vk wrapper +            'url': 'http://www.biqle.ru/watch/847655_160197695', +            'only_matching': True, +        }      ]      def _login(self): @@ -153,9 +183,14 @@ class VKIE(InfoExtractor):          if not video_id:              video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id')) -        info_url = 'http://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id +        info_url = 'https://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id          info_page = self._download_webpage(info_url, video_id) +        if re.search(r'<!>/login\.php\?.*\bact=security_check', info_page): +            raise ExtractorError( +                'You are trying to log in from an unusual location. You should confirm ownership at vk.com to log in with this IP.', +                expected=True) +          ERRORS = {              r'>Видеозапись .*? была изъята из публичного доступа в связи с обращением правообладателя.<':              'Video %s has been removed from public access due to rightholder complaint.', @@ -175,10 +210,11 @@ class VKIE(InfoExtractor):              if re.search(error_re, info_page):                  raise ExtractorError(error_msg % video_id, expected=True) -        m_yt = re.search(r'src="(http://www.youtube.com/.*?)"', info_page) -        if m_yt is not None: -            self.to_screen('Youtube video detected') -            return self.url_result(m_yt.group(1), 'Youtube') +        youtube_url = self._search_regex( +            r'<iframe[^>]+src="((?:https?:)?//www.youtube.com/embed/[^"]+)"', +            info_page, 'youtube iframe', default=None) +        if youtube_url: +            return self.url_result(youtube_url, 'Youtube')          m_rutube = re.search(              r'\ssrc="((?:https?:)?//rutube\.ru\\?/video\\?/embed(?:.*?))\\?"', info_page) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index eff4aebeb..3364647ed 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@  from __future__ import unicode_literals -__version__ = '2015.07.04' +__version__ = '2015.07.07'  | 
