diff options
| -rw-r--r-- | test/test_InfoExtractor.py | 41 | ||||
| -rw-r--r-- | youtube_dl/extractor/common.py | 43 | ||||
| -rw-r--r-- | youtube_dl/extractor/generic.py | 4 | 
3 files changed, 52 insertions, 36 deletions
| diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index a695ce64b..4833396a5 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -698,40 +698,47 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/          _TEST_CASES = [              (                  'foo_xspf', -                'https://example.org/src/', +                'https://example.org/src/foo_xspf.xspf',                  [{ +                    'id': 'foo_xspf', +                    'title': 'Pandemonium',                      'description': 'Visit http://bigbrother404.bandcamp.com',                      'duration': 202.416, -                    'formats': [{'url': 'https://example.org/src/cd1/track%201.mp3'}], +                    'formats': [{ +                        'manifest_url': 'https://example.org/src/foo_xspf.xspf', +                        'url': 'https://example.org/src/cd1/track%201.mp3', +                    }], +                }, {                      'id': 'foo_xspf', -                    'title': 'Pandemonium' -                }, -                { +                    'title': 'Final Cartridge (Nichico Twelve Remix)',                      'description': 'Visit http://bigbrother404.bandcamp.com',                      'duration': 255.857, -                    'formats': [{'url': 'https://example.org/%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3'}], +                    'formats': [{ +                        'manifest_url': 'https://example.org/src/foo_xspf.xspf', +                        'url': 'https://example.org/%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3', +                    }], +                }, {                      'id': 'foo_xspf', -                    'title': 'Final Cartridge (Nichico Twelve Remix)' -                }, -                { +                    'title': 'Rebuilding Nightingale',                      'description': 'Visit http://bigbrother404.bandcamp.com',                      'duration': 287.915, -                    'formats': [ -                        {'url': 'https://example.org/src/track3.mp3'}, -                        {'url': 'https://example.com/track3.mp3'} -                    ], -                    'id': 'foo_xspf', -                    'title': 'Rebuilding Nightingale' +                    'formats': [{ +                        'manifest_url': 'https://example.org/src/foo_xspf.xspf', +                        'url': 'https://example.org/src/track3.mp3', +                    }, { +                        'manifest_url': 'https://example.org/src/foo_xspf.xspf', +                        'url': 'https://example.com/track3.mp3', +                    }]                  }]              ),          ] -        for xspf_file, xspf_base_url, expected_entries in _TEST_CASES: +        for xspf_file, xspf_url, expected_entries in _TEST_CASES:              with io.open('./test/testdata/xspf/%s.xspf' % xspf_file,                           mode='r', encoding='utf-8') as f:                  entries = self.ie._parse_xspf(                      compat_etree_fromstring(f.read().encode('utf-8')), -                        xspf_file, xspf_base_url) +                    xspf_file, xspf_url=xspf_url, xspf_base_url=xspf_url)                  expect_value(self, entries, expected_entries, None)                  for i in range(len(entries)):                      expect_dict(self, entries[i], expected_entries[i]) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index a50778509..2e2a02948 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1706,22 +1706,24 @@ class InfoExtractor(object):              })          return subtitles -    def _extract_xspf_playlist(self, playlist_url, playlist_id, fatal=True): +    def _extract_xspf_playlist(self, xspf_url, playlist_id, fatal=True):          xspf = self._download_xml( -            playlist_url, playlist_id, 'Downloading xpsf playlist', +            xspf_url, playlist_id, 'Downloading xpsf playlist',              'Unable to download xspf manifest', fatal=fatal)          if xspf is False:              return [] -        return self._parse_xspf(xspf, playlist_id, base_url(playlist_url)) +        return self._parse_xspf( +            xspf, playlist_id, xspf_url=xspf_url, +            xspf_base_url=base_url(xspf_url)) -    def _parse_xspf(self, playlist, playlist_id, playlist_base_url=''): +    def _parse_xspf(self, xspf_doc, playlist_id, xspf_url=None, xspf_base_url=None):          NS_MAP = {              'xspf': 'http://xspf.org/ns/0/',              's1': 'http://static.streamone.nl/player/ns/0',          }          entries = [] -        for track in playlist.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)): +        for track in xspf_doc.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)):              title = xpath_text(                  track, xpath_with_ns('./xspf:title', NS_MAP), 'title', default=playlist_id)              description = xpath_text( @@ -1731,12 +1733,18 @@ class InfoExtractor(object):              duration = float_or_none(                  xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000) -            formats = [{ -                'url': urljoin(playlist_base_url, location.text), -                'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)), -                'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))), -                'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))), -            } for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP))] +            formats = [] +            for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP)): +                format_url = urljoin(xspf_base_url, location.text) +                if not format_url: +                    continue +                formats.append({ +                    'url': format_url, +                    'manifest_url': xspf_url, +                    'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)), +                    'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))), +                    'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))), +                })              self._sort_formats(formats)              entries.append({ @@ -1750,18 +1758,18 @@ class InfoExtractor(object):          return entries      def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}): -        res = self._download_webpage_handle( +        res = self._download_xml_handle(              mpd_url, video_id,              note=note or 'Downloading MPD manifest',              errnote=errnote or 'Failed to download MPD manifest',              fatal=fatal)          if res is False:              return [] -        mpd, urlh = res +        mpd_doc, urlh = res          mpd_base_url = base_url(urlh.geturl())          return self._parse_mpd_formats( -            compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, +            mpd_doc, mpd_id=mpd_id, mpd_base_url=mpd_base_url,              formats_dict=formats_dict, mpd_url=mpd_url)      def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}, mpd_url=None): @@ -2035,17 +2043,16 @@ class InfoExtractor(object):          return formats      def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True): -        res = self._download_webpage_handle( +        res = self._download_xml_handle(              ism_url, video_id,              note=note or 'Downloading ISM manifest',              errnote=errnote or 'Failed to download ISM manifest',              fatal=fatal)          if res is False:              return [] -        ism, urlh = res +        ism_doc, urlh = res -        return self._parse_ism_formats( -            compat_etree_fromstring(ism.encode('utf-8')), urlh.geturl(), ism_id) +        return self._parse_ism_formats(ism_doc, urlh.geturl(), ism_id)      def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):          """ diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 023ccbc9b..1cc491b19 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2233,7 +2233,9 @@ class GenericIE(InfoExtractor):                  return smil              elif doc.tag == '{http://xspf.org/ns/0/}playlist':                  return self.playlist_result( -                    self._parse_xspf(doc, video_id, compat_str(full_response.geturl())), +                    self._parse_xspf( +                        doc, video_id, xspf_url=url, +                        xspf_base_url=compat_str(full_response.geturl())),                      video_id)              elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):                  info_dict['formats'] = self._parse_mpd_formats( | 
