diff options
| author | aerworker <aerworker@users.noreply.github.com> | 2019-07-13 22:38:47 +0300 | 
|---|---|---|
| committer | Sergey M <dstftw@gmail.com> | 2019-07-14 02:38:47 +0700 | 
| commit | c452790a796730113dd62db0e743b11045606e27 (patch) | |
| tree | 2307faafcdd57e9f60c8f3fada221be906c63c3f | |
| parent | d89a0a8026e0010a96a1309d70f8fcc2164dd5a6 (diff) | |
[yandexmusic] Add support for multi disk albums and extract track number and disk number (closes #21420) (#21421)
* [yandexmusic] extract tracks from all volumes of an album (closes #21420)
* [yandexmusic] extract genre, disk_number and track_number
* [yandexmusic] extract decomposed artist names
* Update yandexmusic.py
* Update yandexmusic.py
* Update yandexmusic.py
| -rw-r--r-- | youtube_dl/extractor/yandexmusic.py | 63 | 
1 files changed, 55 insertions, 8 deletions
| diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py index 1dfee59e9..fea817419 100644 --- a/youtube_dl/extractor/yandexmusic.py +++ b/youtube_dl/extractor/yandexmusic.py @@ -51,23 +51,43 @@ class YandexMusicTrackIE(YandexMusicBaseIE):      IE_DESC = 'Яндекс.Музыка - Трек'      _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<album_id>\d+)/track/(?P<id>\d+)' -    _TEST = { +    _TESTS = [{          'url': 'http://music.yandex.ru/album/540508/track/4878838',          'md5': 'f496818aa2f60b6c0062980d2e00dc20',          'info_dict': {              'id': '4878838',              'ext': 'mp3', -            'title': 'Carlo Ambrosio, Carlo Ambrosio & Fabio Di Bari - Gypsy Eyes 1', +            'title': 'Carlo Ambrosio & Fabio Di Bari - Gypsy Eyes 1',              'filesize': 4628061,              'duration': 193.04,              'track': 'Gypsy Eyes 1',              'album': 'Gypsy Soul',              'album_artist': 'Carlo Ambrosio', -            'artist': 'Carlo Ambrosio, Carlo Ambrosio & Fabio Di Bari', +            'artist': 'Carlo Ambrosio & Fabio Di Bari',              'release_year': 2009,          },          'skip': 'Travis CI servers blocked by YandexMusic', -    } +    }, { +        # multiple disks +        'url': 'http://music.yandex.ru/album/3840501/track/705105', +        'md5': 'ebe7b4e2ac7ac03fe11c19727ca6153e', +        'info_dict': { +            'id': '705105', +            'ext': 'mp3', +            'title': 'Hooverphonic - Sometimes', +            'filesize': 5743386, +            'duration': 239.27, +            'track': 'Sometimes', +            'album': 'The Best of Hooverphonic', +            'album_artist': 'Hooverphonic', +            'artist': 'Hooverphonic', +            'release_year': 2016, +            'genre': 'pop', +            'disc_number': 2, +            'track_number': 9, +        }, +        'skip': 'Travis CI servers blocked by YandexMusic', +    }]      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) @@ -110,9 +130,21 @@ class YandexMusicTrackIE(YandexMusicBaseIE):              'abr': int_or_none(download_data.get('bitrate')),          } +        def extract_artist_name(artist): +            decomposed = artist.get('decomposed') +            if not isinstance(decomposed, list): +                return artist['name'] +            parts = [artist['name']] +            for element in decomposed: +                if isinstance(element, dict) and element.get('name'): +                    parts.append(element['name']) +                elif isinstance(element, compat_str): +                    parts.append(element) +            return ''.join(parts) +          def extract_artist(artist_list):              if artist_list and isinstance(artist_list, list): -                artists_names = [a['name'] for a in artist_list if a.get('name')] +                artists_names = [extract_artist_name(a) for a in artist_list if a.get('name')]                  if artists_names:                      return ', '.join(artists_names) @@ -121,10 +153,17 @@ class YandexMusicTrackIE(YandexMusicBaseIE):              album = albums[0]              if isinstance(album, dict):                  year = album.get('year') +                disc_number = int_or_none(try_get( +                    album, lambda x: x['trackPosition']['volume'])) +                track_number = int_or_none(try_get( +                    album, lambda x: x['trackPosition']['index']))                  track_info.update({                      'album': album.get('title'),                      'album_artist': extract_artist(album.get('artists')),                      'release_year': int_or_none(year), +                    'genre': album.get('genre'), +                    'disc_number': disc_number, +                    'track_number': track_number,                  })          track_artist = extract_artist(track.get('artists')) @@ -152,7 +191,7 @@ class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):      IE_DESC = 'Яндекс.Музыка - Альбом'      _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<id>\d+)/?(\?|$)' -    _TEST = { +    _TESTS = [{          'url': 'http://music.yandex.ru/album/540508',          'info_dict': {              'id': '540508', @@ -160,7 +199,15 @@ class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):          },          'playlist_count': 50,          'skip': 'Travis CI servers blocked by YandexMusic', -    } +    }, { +        'url': 'https://music.yandex.ru/album/3840501', +        'info_dict': { +            'id': '3840501', +            'title': 'Hooverphonic - The Best of Hooverphonic (2016)', +        }, +        'playlist_count': 33, +        'skip': 'Travis CI servers blocked by YandexMusic', +    }]      def _real_extract(self, url):          album_id = self._match_id(url) @@ -169,7 +216,7 @@ class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):              'http://music.yandex.ru/handlers/album.jsx?album=%s' % album_id,              album_id, 'Downloading album JSON') -        entries = self._build_playlist(album['volumes'][0]) +        entries = self._build_playlist([track for volume in album['volumes'] for track in volume])          title = '%s - %s' % (album['artists'][0]['name'], album['title'])          year = album.get('year') | 
