diff options
| -rw-r--r-- | Makefile | 2 | ||||
| -rw-r--r-- | README.md | 2 | ||||
| -rwxr-xr-x | youtube_dl/YoutubeDL.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/aol.py | 8 | ||||
| -rw-r--r-- | youtube_dl/extractor/common.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/dailymail.py | 61 | ||||
| -rw-r--r-- | youtube_dl/extractor/extractors.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/fczenit.py | 33 | ||||
| -rw-r--r-- | youtube_dl/extractor/kuwo.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/redtube.py | 58 | ||||
| -rw-r--r-- | youtube_dl/extractor/udemy.py | 28 | ||||
| -rw-r--r-- | youtube_dl/extractor/vevo.py | 17 | ||||
| -rw-r--r-- | youtube_dl/extractor/xfileshare.py | 40 | ||||
| -rw-r--r-- | youtube_dl/extractor/xiami.py | 14 | ||||
| -rw-r--r-- | youtube_dl/extractor/yandexmusic.py | 79 | 
15 files changed, 244 insertions, 107 deletions
| @@ -1,7 +1,7 @@  all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites  clean: -	rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe +	rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi *.mkv *.webm CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe  	find . -name "*.pyc" -delete  	find . -name "*.class" -delete @@ -465,7 +465,7 @@ The basic usage is not to set any template arguments when downloading a single f   - `display_id`: An alternative identifier for the video   - `uploader`: Full name of the video uploader   - `license`: License name the video is licensed under - - `creator`: The main artist who created the video + - `creator`: The creator of the video   - `release_date`: The date (YYYYMMDD) when the video was released   - `timestamp`: UNIX timestamp of the moment the video became available   - `upload_date`: Video upload date (YYYYMMDD) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 055433362..2187dcc8f 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -580,7 +580,7 @@ class YoutubeDL(object):                  is_id=(k == 'id'))              template_dict = dict((k, sanitize(k, v))                                   for k, v in template_dict.items() -                                 if v is not None) +                                 if v is not None and not isinstance(v, (list, tuple, dict)))              template_dict = collections.defaultdict(lambda: 'NA', template_dict)              outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) @@ -1639,7 +1639,7 @@ class YoutubeDL(object):                      # Just a single file                      success = dl(filename, info_dict)              except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: -                self.report_error('unable to download video data: %s' % str(err)) +                self.report_error('unable to download video data: %s' % error_to_compat_str(err))                  return              except (OSError, IOError) as err:                  raise UnavailableVideoError(err) diff --git a/youtube_dl/extractor/aol.py b/youtube_dl/extractor/aol.py index 24df8fe93..42c21bf41 100644 --- a/youtube_dl/extractor/aol.py +++ b/youtube_dl/extractor/aol.py @@ -12,7 +12,7 @@ from ..utils import (  class AolIE(InfoExtractor):      IE_NAME = 'on.aol.com' -    _VALID_URL = r'(?:aol-video:|https?://on\.aol\.com/.*-)(?P<id>[^/?-]+)' +    _VALID_URL = r'(?:aol-video:|https?://on\.aol\.com/(?:[^/]+/)*(?:[^/?#&]+-)?)(?P<id>[^/?#&]+)'      _TESTS = [{          # video with 5min ID @@ -53,6 +53,12 @@ class AolIE(InfoExtractor):      }, {          'url': 'http://on.aol.com/shows/park-bench-shw518173474-559a1b9be4b0c3bfad3357a7?context=SH:SHW518173474:PL4327:1460619712763',          'only_matching': True, +    }, { +        'url': 'http://on.aol.com/video/519442220', +        'only_matching': True, +    }, { +        'url': 'aol-video:5707d6b8e4b090497b04f706', +        'only_matching': True,      }]      def _real_extract(self, url): diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 61a5d124c..0843d89af 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -163,7 +163,7 @@ class InfoExtractor(object):      description:    Full video description.      uploader:       Full name of the video uploader.      license:        License name the video is licensed under. -    creator:        The main artist who created the video. +    creator:        The creator of the video.      release_date:   The date (YYYYMMDD) when the video was released.      timestamp:      UNIX timestamp of the moment the video became available.      upload_date:    Video upload date (YYYYMMDD). diff --git a/youtube_dl/extractor/dailymail.py b/youtube_dl/extractor/dailymail.py new file mode 100644 index 000000000..b60a1d813 --- /dev/null +++ b/youtube_dl/extractor/dailymail.py @@ -0,0 +1,61 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( +    int_or_none, +    determine_protocol, +) + + +class DailyMailIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?dailymail\.co\.uk/video/[^/]+/video-(?P<id>[0-9]+)' +    _TEST = { +        'url': 'http://www.dailymail.co.uk/video/sciencetech/video-1288527/Turn-video-impressionist-masterpiece.html', +        'md5': '2f639d446394f53f3a33658b518b6615', +        'info_dict': { +            'id': '1288527', +            'ext': 'mp4', +            'title': 'Turn any video into an impressionist masterpiece', +            'description': 'md5:88ddbcb504367987b2708bb38677c9d2', +        } +    } + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        webpage = self._download_webpage(url, video_id) +        video_data = self._parse_json(self._search_regex( +            r"data-opts='({.+?})'", webpage, 'video data'), video_id) +        title = video_data['title'] +        video_sources = self._download_json(video_data.get( +            'sources', {}).get('url') or 'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id, video_id) + +        formats = [] +        for rendition in video_sources['renditions']: +            rendition_url = rendition.get('url') +            if not rendition_url: +                continue +            tbr = int_or_none(rendition.get('encodingRate'), 1000) +            container = rendition.get('videoContainer') +            is_hls = container == 'M2TS' +            protocol = 'm3u8_native' if is_hls else determine_protocol({'url': rendition_url}) +            formats.append({ +                'format_id': ('hls' if is_hls else protocol) + ('-%d' % tbr if tbr else ''), +                'url': rendition_url, +                'width': int_or_none(rendition.get('frameWidth')), +                'height': int_or_none(rendition.get('frameHeight')), +                'tbr': tbr, +                'vcodec': rendition.get('videoCodec'), +                'container': container, +                'protocol': protocol, +                'ext': 'mp4' if is_hls else None, +            }) +        self._sort_formats(formats) + +        return { +            'id': video_id, +            'title': title, +            'description': video_data.get('descr'), +            'thumbnail': video_data.get('poster') or video_data.get('thumbnail'), +            'formats': formats, +        } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ef4431364..aac85066f 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -157,6 +157,7 @@ from .cspan import CSpanIE  from .ctsnews import CtsNewsIE  from .cultureunplugged import CultureUnpluggedIE  from .cwtv import CWTVIE +from .dailymail import DailyMailIE  from .dailymotion import (      DailymotionIE,      DailymotionPlaylistIE, diff --git a/youtube_dl/extractor/fczenit.py b/youtube_dl/extractor/fczenit.py index f1f150ef2..8d1010b88 100644 --- a/youtube_dl/extractor/fczenit.py +++ b/youtube_dl/extractor/fczenit.py @@ -1,20 +1,19 @@  # coding: utf-8  from __future__ import unicode_literals -import re -  from .common import InfoExtractor +from ..compat import compat_urlparse  class FczenitIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?fc-zenit\.ru/video/gl(?P<id>[0-9]+)' +    _VALID_URL = r'https?://(?:www\.)?fc-zenit\.ru/video/(?P<id>[0-9]+)'      _TEST = { -        'url': 'http://fc-zenit.ru/video/gl6785/', -        'md5': '458bacc24549173fe5a5aa29174a5606', +        'url': 'http://fc-zenit.ru/video/41044/', +        'md5': '0e3fab421b455e970fa1aa3891e57df0',          'info_dict': { -            'id': '6785', +            'id': '41044',              'ext': 'mp4', -            'title': '«Зенит-ТВ»: как Олег Шатов играл против «Урала»', +            'title': 'Так пишется история: казанский разгром ЦСКА на «Зенит-ТВ»',          },      } @@ -22,15 +21,23 @@ class FczenitIE(InfoExtractor):          video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id) -        video_title = self._html_search_regex(r'<div class=\"photoalbum__title\">([^<]+)', webpage, 'title') +        video_title = self._html_search_regex( +            r'<[^>]+class=\"photoalbum__title\">([^<]+)', webpage, 'title') + +        video_items = self._parse_json(self._search_regex( +            r'arrPath\s*=\s*JSON\.parse\(\'(.+)\'\)', webpage, 'video items'), +            video_id) -        bitrates_raw = self._html_search_regex(r'bitrates:.*\n(.*)\]', webpage, 'video URL') -        bitrates = re.findall(r'url:.?\'(.+?)\'.*?bitrate:.?([0-9]{3}?)', bitrates_raw) +        def merge_dicts(*dicts): +            ret = {} +            for a_dict in dicts: +                ret.update(a_dict) +            return ret          formats = [{ -            'url': furl, -            'tbr': tbr, -        } for furl, tbr in bitrates] +            'url': compat_urlparse.urljoin(url, video_url), +            'tbr': int(tbr), +        } for tbr, video_url in merge_dicts(*video_items).items()]          self._sort_formats(formats) diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py index 3740869c7..11b31a699 100644 --- a/youtube_dl/extractor/kuwo.py +++ b/youtube_dl/extractor/kuwo.py @@ -283,6 +283,8 @@ class KuwoCategoryIE(InfoExtractor):          category_desc = remove_start(              get_element_by_id('intro', webpage).strip(),              '%s简介:' % category_name) +        if category_desc == '暂无': +            category_desc = None          jsonm = self._parse_json(self._html_search_regex(              r'var\s+jsonm\s*=\s*([^;]+);', webpage, 'category songs'), category_id) diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py index 7ba41ba59..721fc3a9e 100644 --- a/youtube_dl/extractor/redtube.py +++ b/youtube_dl/extractor/redtube.py @@ -1,7 +1,12 @@  from __future__ import unicode_literals  from .common import InfoExtractor -from ..utils import ExtractorError +from ..utils import ( +    ExtractorError, +    int_or_none, +    str_to_int, +    unified_strdate, +)  class RedTubeIE(InfoExtractor): @@ -13,6 +18,9 @@ class RedTubeIE(InfoExtractor):              'id': '66418',              'ext': 'mp4',              'title': 'Sucked on a toilet', +            'upload_date': '20120831', +            'duration': 596, +            'view_count': int,              'age_limit': 18,          }      } @@ -24,12 +32,39 @@ class RedTubeIE(InfoExtractor):          if any(s in webpage for s in ['video-deleted-info', '>This video has been removed']):              raise ExtractorError('Video %s has been removed' % video_id, expected=True) -        video_url = self._html_search_regex( -            r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL') -        video_title = self._html_search_regex( -            r'<h1 class="videoTitle[^"]*">(.+?)</h1>', -            webpage, 'title') -        video_thumbnail = self._og_search_thumbnail(webpage) +        title = self._html_search_regex( +            (r'<h1 class="videoTitle[^"]*">(?P<title>.+?)</h1>', +             r'videoTitle\s*:\s*(["\'])(?P<title>)\1'), +            webpage, 'title', group='title') + +        formats = [] +        sources = self._parse_json( +            self._search_regex( +                r'sources\s*:\s*({.+?})', webpage, 'source', default='{}'), +            video_id, fatal=False) +        if sources and isinstance(sources, dict): +            for format_id, format_url in sources.items(): +                if format_url: +                    formats.append({ +                        'url': format_url, +                        'format_id': format_id, +                        'height': int_or_none(format_id), +                    }) +        else: +            video_url = self._html_search_regex( +                r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL') +            formats.append({'url': video_url}) +        self._sort_formats(formats) + +        thumbnail = self._og_search_thumbnail(webpage) +        upload_date = unified_strdate(self._search_regex( +            r'<span[^>]+class="added-time"[^>]*>ADDED ([^<]+)<', +            webpage, 'upload date', fatal=False)) +        duration = int_or_none(self._search_regex( +            r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False)) +        view_count = str_to_int(self._search_regex( +            r'<span[^>]*>VIEWS</span></td>\s*<td>([\d,.]+)', +            webpage, 'view count', fatal=False))          # No self-labeling, but they describe themselves as          # "Home of Videos Porno" @@ -37,9 +72,12 @@ class RedTubeIE(InfoExtractor):          return {              'id': video_id, -            'url': video_url,              'ext': 'mp4', -            'title': video_title, -            'thumbnail': video_thumbnail, +            'title': title, +            'thumbnail': thumbnail, +            'upload_date': upload_date, +            'duration': duration, +            'view_count': view_count,              'age_limit': age_limit, +            'formats': formats,          } diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index d1e6f2703..13e0cd237 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -5,7 +5,6 @@ import re  from .common import InfoExtractor  from ..compat import (      compat_HTTPError, -    compat_urllib_parse_urlencode,      compat_urllib_request,      compat_urlparse,  ) @@ -84,18 +83,19 @@ class UdemyIE(InfoExtractor):          if enroll_url:              webpage = self._download_webpage(                  combine_url(base_url, enroll_url), -                course_id, 'Enrolling in the course') +                course_id, 'Enrolling in the course', +                headers={'Referer': base_url})              if '>You have enrolled in' in webpage:                  self.to_screen('%s: Successfully enrolled in the course' % course_id)      def _download_lecture(self, course_id, lecture_id):          return self._download_json( -            'https://www.udemy.com/api-2.0/users/me/subscribed-courses/%s/lectures/%s?%s' % ( -                course_id, lecture_id, compat_urllib_parse_urlencode({ -                    'fields[lecture]': 'title,description,view_html,asset', -                    'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,data', -                })), -            lecture_id, 'Downloading lecture JSON') +            'https://www.udemy.com/api-2.0/users/me/subscribed-courses/%s/lectures/%s?' +            % (course_id, lecture_id), +            lecture_id, 'Downloading lecture JSON', query={ +                'fields[lecture]': 'title,description,view_html,asset', +                'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,data', +            })      def _handle_error(self, response):          if not isinstance(response, dict): @@ -155,13 +155,13 @@ class UdemyIE(InfoExtractor):              'password': password,          }) -        request = sanitized_Request( -            self._LOGIN_URL, urlencode_postdata(login_form)) -        request.add_header('Referer', self._ORIGIN_URL) -        request.add_header('Origin', self._ORIGIN_URL) -          response = self._download_webpage( -            request, None, 'Logging in as %s' % username) +            self._LOGIN_URL, None, 'Logging in as %s' % username, +            data=urlencode_postdata(login_form), +            headers={ +                'Referer': self._ORIGIN_URL, +                'Origin': self._ORIGIN_URL, +            })          if not is_logged(response):              error = self._html_search_regex( diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 63eab4148..c0ef08c02 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -5,6 +5,7 @@ import re  from .common import InfoExtractor  from ..compat import (      compat_etree_fromstring, +    compat_str,      compat_urlparse,  )  from ..utils import ( @@ -116,6 +117,10 @@ class VevoIE(VevoBaseIE):              'genre': 'Pop',          },          'expected_warnings': ['Failed to download video versions info'], +    }, { +        # no genres available +        'url': 'http://www.vevo.com/watch/INS171400764', +        'only_matching': True,      }]      _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com'      _SOURCE_TYPES = { @@ -184,8 +189,8 @@ class VevoIE(VevoBaseIE):              errnote='Unable to retrieve oauth token')          if 'THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION' in webpage: -            raise ExtractorError( -                '%s said: This page is currently unavailable in your region.' % self.IE_NAME, expected=True) +            self.raise_geo_restricted( +                '%s said: This page is currently unavailable in your region' % self.IE_NAME)          auth_info = self._parse_json(webpage, video_id)          self._api_url_template = self.http_scheme() + '//apiv2.vevo.com/%s?token=' + auth_info['access_token'] @@ -200,12 +205,10 @@ class VevoIE(VevoBaseIE):          response = self._download_json(              json_url, video_id, 'Downloading video info', 'Unable to download info')          video_info = response.get('video') or {} -        video_versions = video_info.get('videoVersions')          artist = None          featured_artist = None          uploader = None          view_count = None -        timestamp = None          formats = []          if not video_info: @@ -339,7 +342,11 @@ class VevoIE(VevoBaseIE):          if featured_artist:              artist = '%s ft. %s' % (artist, featured_artist)          title = '%s - %s' % (artist, track) if artist else track -        genre = video_info.get('genres', [None])[0] + +        genres = video_info.get('genres') +        genre = ( +            genres[0] if genres and isinstance(genres, list) and +            isinstance(genres[0], compat_str) else None)          is_explicit = video_info.get('isExplicit')          if is_explicit is True: diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py index 2d1504eaa..769003735 100644 --- a/youtube_dl/extractor/xfileshare.py +++ b/youtube_dl/extractor/xfileshare.py @@ -13,12 +13,21 @@ from ..utils import (  class XFileShareIE(InfoExtractor): -    IE_DESC = 'XFileShare based sites: GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net, filehoot.com and vidto.me' -    _VALID_URL = r'''(?x) -        https?://(?P<host>(?:www\.)? -            (?:daclips\.in|gorillavid\.in|movpod\.in|fastvideo\.in|realvid\.net|filehoot\.com|vidto\.me|powerwatch\.pw))/ -        (?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)? -    ''' +    _SITES = ( +        ('daclips.in', 'DaClips'), +        ('filehoot.com', 'FileHoot'), +        ('gorillavid.in', 'GorillaVid'), +        ('movpod.in', 'MovPod'), +        ('powerwatch.pw', 'PowerWatch'), +        ('rapidvideo.ws', 'Rapidvideo.ws'), +        ('thevideobee.to', 'TheVideoBee'), +        ('vidto.me', 'Vidto'), +        ('streamin.to', 'Streamin.To'), +    ) + +    IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1]) +    _VALID_URL = (r'https?://(?P<host>(?:www\.)?(?:%s))/(?:embed-)?(?P<id>[0-9a-zA-Z]+)' +                  % '|'.join(re.escape(site) for site in list(zip(*_SITES))[0]))      _FILE_NOT_FOUND_REGEX = r'>(?:404 - )?File Not Found<' @@ -44,25 +53,6 @@ class XFileShareIE(InfoExtractor):              'thumbnail': 're:http://.*\.jpg',          }      }, { -        # video with countdown timeout -        'url': 'http://fastvideo.in/1qmdn1lmsmbw', -        'md5': '8b87ec3f6564a3108a0e8e66594842ba', -        'info_dict': { -            'id': '1qmdn1lmsmbw', -            'ext': 'mp4', -            'title': 'Man of Steel - Trailer', -            'thumbnail': 're:http://.*\.jpg', -        }, -    }, { -        'url': 'http://realvid.net/ctn2y6p2eviw', -        'md5': 'b2166d2cf192efd6b6d764c18fd3710e', -        'info_dict': { -            'id': 'ctn2y6p2eviw', -            'ext': 'flv', -            'title': 'rdx 1955', -            'thumbnail': 're:http://.*\.jpg', -        }, -    }, {          'url': 'http://movpod.in/0wguyyxi1yca',          'only_matching': True,      }, { diff --git a/youtube_dl/extractor/xiami.py b/youtube_dl/extractor/xiami.py index e4ed306b4..a6dfc4af9 100644 --- a/youtube_dl/extractor/xiami.py +++ b/youtube_dl/extractor/xiami.py @@ -9,6 +9,11 @@ from ..utils import int_or_none  class XiamiBaseIE(InfoExtractor):      _API_BASE_URL = 'http://www.xiami.com/song/playlist/cat/json/id' +    def _download_webpage(self, *args, **kwargs): +        webpage = super(XiamiBaseIE, self)._download_webpage(*args, **kwargs) +        if '>Xiami is currently not available in your country.<' in webpage: +            self.raise_geo_restricted('Xiami is currently not available in your country') +      def _extract_track(self, track, track_id=None):          title = track['title']          track_url = self._decrypt(track['location']) @@ -81,7 +86,8 @@ class XiamiSongIE(XiamiBaseIE):                      'ext': 'lrc',                  }],              }, -        } +        }, +        'skip': 'Georestricted',      }, {          'url': 'http://www.xiami.com/song/1775256504',          'md5': '932a3abd45c6aa2b1fdbe028fcb4c4fc', @@ -100,7 +106,8 @@ class XiamiSongIE(XiamiBaseIE):                      'ext': 'lrc',                  }],              }, -        } +        }, +        'skip': 'Georestricted',      }]      def _real_extract(self, url): @@ -124,6 +131,7 @@ class XiamiAlbumIE(XiamiPlaylistBaseIE):              'id': '2100300444',          },          'playlist_count': 10, +        'skip': 'Georestricted',      }, {          'url': 'http://www.xiami.com/album/512288?spm=a1z1s.6843761.1110925389.6.hhE9p9',          'only_matching': True, @@ -141,6 +149,7 @@ class XiamiArtistIE(XiamiPlaylistBaseIE):              'id': '2132',          },          'playlist_count': 20, +        'skip': 'Georestricted',      } @@ -155,4 +164,5 @@ class XiamiCollectionIE(XiamiPlaylistBaseIE):              'id': '156527391',          },          'playlist_mincount': 29, +        'skip': 'Georestricted',      } diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py index ce3723b55..0f78466e6 100644 --- a/youtube_dl/extractor/yandexmusic.py +++ b/youtube_dl/extractor/yandexmusic.py @@ -10,8 +10,6 @@ from ..utils import (      ExtractorError,      int_or_none,      float_or_none, -    sanitized_Request, -    urlencode_postdata,  ) @@ -177,7 +175,7 @@ class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):  class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):      IE_NAME = 'yandexmusic:playlist'      IE_DESC = 'Яндекс.Музыка - Плейлист' -    _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/users/[^/]+/playlists/(?P<id>\d+)' +    _VALID_URL = r'https?://music\.yandex\.(?P<tld>ru|kz|ua|by)/users/(?P<user>[^/]+)/playlists/(?P<id>\d+)'      _TESTS = [{          'url': 'http://music.yandex.ru/users/music.partners/playlists/1245', @@ -196,47 +194,64 @@ class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):              'id': '1036',              'title': 'Музыка 90-х',          }, -        'playlist_count': 310, +        'playlist_mincount': 300,          'skip': 'Travis CI servers blocked by YandexMusic',      }]      def _real_extract(self, url): -        playlist_id = self._match_id(url) - -        webpage = self._download_webpage(url, playlist_id) - -        mu = self._parse_json( -            self._search_regex( -                r'var\s+Mu\s*=\s*({.+?});\s*</script>', webpage, 'player'), -            playlist_id) - -        playlist = mu['pageData']['playlist'] -        tracks, track_ids = playlist['tracks'], playlist['trackIds'] - -        # tracks dictionary shipped with webpage is limited to 150 tracks, +        mobj = re.match(self._VALID_URL, url) +        tld = mobj.group('tld') +        user = mobj.group('user') +        playlist_id = mobj.group('id') + +        playlist = self._download_json( +            'https://music.yandex.%s/handlers/playlist.jsx' % tld, +            playlist_id, 'Downloading missing tracks JSON', +            fatal=False, +            headers={ +                'Referer': url, +                'X-Requested-With': 'XMLHttpRequest', +                'X-Retpath-Y': url, +            }, +            query={ +                'owner': user, +                'kinds': playlist_id, +                'light': 'true', +                'lang': tld, +                'external-domain': 'music.yandex.%s' % tld, +                'overembed': 'false', +            })['playlist'] + +        tracks, track_ids = playlist['tracks'], map(compat_str, playlist['trackIds']) + +        # tracks dictionary shipped with playlist.jsx API is limited to 150 tracks,          # missing tracks should be retrieved manually.          if len(tracks) < len(track_ids): -            present_track_ids = set([compat_str(track['id']) for track in tracks if track.get('id')]) -            missing_track_ids = set(map(compat_str, track_ids)) - set(present_track_ids) -            request = sanitized_Request( -                'https://music.yandex.ru/handlers/track-entries.jsx', -                urlencode_postdata({ +            present_track_ids = set([ +                compat_str(track['id']) +                for track in tracks if track.get('id')]) +            missing_track_ids = [ +                track_id for track_id in track_ids +                if track_id not in present_track_ids] +            missing_tracks = self._download_json( +                'https://music.yandex.%s/handlers/track-entries.jsx' % tld, +                playlist_id, 'Downloading missing tracks JSON', +                fatal=False, +                headers={ +                    'Referer': url, +                    'X-Requested-With': 'XMLHttpRequest', +                }, +                query={                      'entries': ','.join(missing_track_ids), -                    'lang': mu.get('settings', {}).get('lang', 'en'), -                    'external-domain': 'music.yandex.ru', +                    'lang': tld, +                    'external-domain': 'music.yandex.%s' % tld,                      'overembed': 'false', -                    'sign': mu.get('authData', {}).get('user', {}).get('sign'),                      'strict': 'true', -                })) -            request.add_header('Referer', url) -            request.add_header('X-Requested-With', 'XMLHttpRequest') - -            missing_tracks = self._download_json( -                request, playlist_id, 'Downloading missing tracks JSON', fatal=False) +                })              if missing_tracks:                  tracks.extend(missing_tracks)          return self.playlist_result(              self._build_playlist(tracks),              compat_str(playlist_id), -            playlist['title'], playlist.get('description')) +            playlist.get('title'), playlist.get('description')) | 
