diff options
| -rw-r--r-- | youtube_dl/extractor/extractors.py | 6 | ||||
| -rw-r--r-- | youtube_dl/extractor/xiami.py | 161 | 
2 files changed, 167 insertions, 0 deletions
| diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b1b7f9b42..14ca9eaee 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -941,6 +941,12 @@ from .xhamster import (      XHamsterIE,      XHamsterEmbedIE,  ) +from .xiami import ( +    XiamiIE, +    XiamiAlbumIE, +    XiamiArtistIE, +    XiamiCollectionIE +)  from .xminus import XMinusIE  from .xnxx import XNXXIE  from .xstream import XstreamIE diff --git a/youtube_dl/extractor/xiami.py b/youtube_dl/extractor/xiami.py new file mode 100644 index 000000000..a28d63c48 --- /dev/null +++ b/youtube_dl/extractor/xiami.py @@ -0,0 +1,161 @@ +# -*- coding: utf-8 -*- + +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( +    xpath_element, +    xpath_text, +    xpath_with_ns, +    int_or_none, +    ExtractorError +) +from ..compat import compat_urllib_parse_unquote + + +class XiamiBaseIE(InfoExtractor): + +    _XML_BASE_URL = 'http://www.xiami.com/song/playlist/id' +    _NS_MAP = {'xm': 'http://xspf.org/ns/0/'} + +    def _extract_track(self, track): +        artist = xpath_text(track, xpath_with_ns('xm:artist', self._NS_MAP), default='') +        artist = artist.split(';') + +        ret = { +            'id': xpath_text(track, xpath_with_ns('xm:song_id', self._NS_MAP)), +            'title': xpath_text(track, xpath_with_ns('xm:title', self._NS_MAP)), +            'album': xpath_text(track, xpath_with_ns('xm:album_name', self._NS_MAP)), +            'artist': ';'.join(artist) if artist else None, +            'creator': artist[0] if artist else None, +            'url': self._decrypt(xpath_text(track, xpath_with_ns('xm:location', self._NS_MAP))), +            'thumbnail': xpath_text(track, xpath_with_ns('xm:pic', self._NS_MAP), default=None), +            'duration': int_or_none(xpath_text(track, xpath_with_ns('xm:length', self._NS_MAP))), +        } + +        lyrics_url = xpath_text(track, xpath_with_ns('xm:lyric', self._NS_MAP)) +        if lyrics_url and lyrics_url.endswith('.lrc'): +            ret['description'] = self._download_webpage(lyrics_url, ret['id']) +        return ret + +    def _extract_xml(self, _id, typ=''): +        playlist = self._download_xml('%s/%s%s' % (self._XML_BASE_URL, _id, typ), _id) +        tracklist = xpath_element(playlist, xpath_with_ns('./xm:trackList', self._NS_MAP)) + +        if not len(tracklist): +            raise ExtractorError('No track found') +        return [self._extract_track(track) for track in tracklist] + +    @staticmethod +    def _decrypt(origin): +        n = int(origin[0]) +        origin = origin[1:] +        short_lenth = len(origin) // n +        long_num = len(origin) - short_lenth * n +        l = tuple() +        for i in range(0, n): +            length = short_lenth +            if i < long_num: +                length += 1 +            l += (origin[0:length], ) +            origin = origin[length:] +        ans = '' +        for i in range(0, short_lenth + 1): +            for j in range(0, n): +                if len(l[j])>i: +                    ans += l[j][i] +        return compat_urllib_parse_unquote(ans).replace('^', '0') + + +class XiamiIE(XiamiBaseIE): +    IE_NAME = 'xiami:song' +    IE_DESC = '虾米音乐' +    _VALID_URL = r'http://www\.xiami\.com/song/(?P<id>[0-9]+)' +    _TESTS = [ +        { +            'url': 'http://www.xiami.com/song/1775610518', +            'md5': '521dd6bea40fd5c9c69f913c232cb57e', +            'info_dict': { +                'id': '1775610518', +                'ext': 'mp3', +                'title': 'Woman', +                'creator': 'HONNE', +                'album': 'Woman', +                'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg', +                'description': 'md5:052ec7de41ca19f67e7fd70a1bfc4e0b', +            } +        }, +        { +            'url': 'http://www.xiami.com/song/1775256504', +            'md5': '932a3abd45c6aa2b1fdbe028fcb4c4fc', +            'info_dict': { +                'id': '1775256504', +                'ext': 'mp3', +                'title': '悟空', +                'creator': '戴荃', +                'album': '悟空', +                'description': 'md5:206e67e84f9bed1d473d04196a00b990', +            } +        }, +    ] + +    def _real_extract(self, url): +        _id = self._match_id(url) +        return self._extract_xml(_id)[0] + + +class XiamiAlbumIE(XiamiBaseIE): +    IE_NAME = 'xiami:album' +    IE_DESC = '虾米音乐 - 专辑' +    _VALID_URL = r'http://www\.xiami\.com/album/(?P<id>[0-9]+)' +    _TESTS = [ +        { +            'url': 'http://www.xiami.com/album/2100300444', +            'info_dict': { +                'id': '2100300444', +            }, +            'playlist_count': 10, +        }, +        { +            'url': 'http://www.xiami.com/album/512288?spm=a1z1s.6843761.1110925389.6.hhE9p9', +            'only_matching': True, +        } +    ] + +    def _real_extract(self, url): +        _id = self._match_id(url) +        return self.playlist_result(self._extract_xml(_id, '/type/1'), _id) + + +class XiamiArtistIE(XiamiBaseIE): +    IE_NAME = 'xiami:artist' +    IE_DESC = '虾米音乐 - 歌手' +    _VALID_URL = r'http://www\.xiami\.com/artist/(?P<id>[0-9]+)' +    _TEST = { +        'url': 'http://www.xiami.com/artist/2132?spm=0.0.0.0.dKaScp', +        'info_dict': { +            'id': '2132', +        }, +        'playlist_count': 20, +    } + +    def _real_extract(self, url): +        _id = self._match_id(url) +        return self.playlist_result(self._extract_xml(_id, '/type/2'), _id) + + +class XiamiCollectionIE(XiamiBaseIE): +    IE_NAME = 'xiami:collection' +    IE_DESC = '虾米音乐 - 精选集' +    _VALID_URL = r'http://www\.xiami\.com/collect/(?P<id>[0-9]+)' +    _TEST = { +        'url': 'http://www.xiami.com/collect/156527391?spm=a1z1s.2943601.6856193.12.4jpBnr', +        'info_dict': { +            'id': '156527391', +        }, +        'playlist_count': 26, +    } + +    def _real_extract(self, url): +        _id = self._match_id(url) +        return self.playlist_result(self._extract_xml(_id, '/type/3'), _id) | 
