diff options
| author | Sergey M․ <dstftw@gmail.com> | 2020-05-02 07:18:08 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2020-05-02 07:18:08 +0700 | 
| commit | e40c758c2a8f049cd254ad73f0e6ade00bd004d9 (patch) | |
| tree | 5a474bfd28a9bfad1025180a263f8e222154245b | |
| parent | 011e75e641ad2b7fa141a05fa35d3e5d6d0b736b (diff) | |
[youtube] Improve player id extraction and add tests
| -rw-r--r-- | test/test_youtube_signature.py | 22 | ||||
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 38 | 
2 files changed, 39 insertions, 21 deletions
| diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index f0c370eee..69df30eda 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -74,6 +74,28 @@ _TESTS = [  ] +class TestPlayerInfo(unittest.TestCase): +    def test_youtube_extract_player_info(self): +        PLAYER_URLS = ( +            ('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/en_US/base.js', '64dddad9'), +            # obsolete +            ('https://www.youtube.com/yts/jsbin/player_ias-vfle4-e03/en_US/base.js', 'vfle4-e03'), +            ('https://www.youtube.com/yts/jsbin/player_ias-vfl49f_g4/en_US/base.js', 'vfl49f_g4'), +            ('https://www.youtube.com/yts/jsbin/player_ias-vflCPQUIL/en_US/base.js', 'vflCPQUIL'), +            ('https://www.youtube.com/yts/jsbin/player-vflzQZbt7/en_US/base.js', 'vflzQZbt7'), +            ('https://www.youtube.com/yts/jsbin/player-en_US-vflaxXRn1/base.js', 'vflaxXRn1'), +            ('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', 'vflXGBaUN'), +            ('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', 'vflKjOTVq'), +            ('http://s.ytimg.com/yt/swfbin/watch_as3-vflrEm9Nq.swf', 'vflrEm9Nq'), +            ('https://s.ytimg.com/yts/swfbin/player-vflenCdZL/watch_as3.swf', 'vflenCdZL'), +        ) +        for player_url, expected_player_id in PLAYER_URLS: +            expected_player_type = player_url.split('.')[-1] +            player_type, player_id = YoutubeIE._extract_player_info(player_url) +            self.assertEqual(player_type, expected_player_type) +            self.assertEqual(player_id, expected_player_id) + +  class TestSignature(unittest.TestCase):      def setUp(self):          TEST_DIR = os.path.dirname(os.path.abspath(__file__)) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 28886cff2..5ea66c962 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -426,6 +426,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):                       (?(1).+)?                                                # if we found the ID, everything can follow                       $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}      _NEXT_URL_RE = r'[\?&]next_url=([^&]+)' +    _PLAYER_INFO_RE = ( +        r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$', +        r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$', +    )      _formats = {          '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},          '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'}, @@ -1273,14 +1277,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor):          """ Return a string representation of a signature """          return '.'.join(compat_str(len(part)) for part in example_sig.split('.')) -    def _extract_signature_function(self, video_id, player_url, example_sig): -        id_m = re.match( -            r'.*?[-.](?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$', -            player_url) -        if not id_m: +    @classmethod +    def _extract_player_info(cls, player_url): +        for player_re in cls._PLAYER_INFO_RE: +            id_m = re.search(player_re, player_url) +            if id_m: +                break +        else:              raise ExtractorError('Cannot identify player %r' % player_url) -        player_type = id_m.group('ext') -        player_id = id_m.group('id') +        return id_m.group('ext'), id_m.group('id') + +    def _extract_signature_function(self, video_id, player_url, example_sig): +        player_type, player_id = self._extract_player_info(player_url)          # Read from filesystem cache          func_id = '%s_%s_%s' % ( @@ -2009,22 +2017,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):                          if self._downloader.params.get('verbose'):                              if player_url is None: -                                player_version = 'unknown'                                  player_desc = 'unknown'                              else: -                                if player_url.endswith('swf'): -                                    player_version = self._search_regex( -                                        r'-(.+?)(?:/watch_as3)?\.swf$', player_url, -                                        'flash player', fatal=False) -                                    player_desc = 'flash player %s' % player_version -                                else: -                                    player_version = self._search_regex( -                                        [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js', -                                         r'(?:www|player(?:_ias)?)[-.]([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'], -                                        player_url, -                                        'html5 player', fatal=False) -                                    player_desc = 'html5 player %s' % player_version - +                                player_type, player_version = self._extract_player_info(player_url) +                                player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)                              parts_sizes = self._signature_cache_id(encrypted_sig)                              self.to_screen('{%s} signature length %s, %s' %                                             (format_id, parts_sizes, player_desc)) | 
