diff options
| author | Ricardo Constantino <wiiaboo@gmail.com> | 2018-03-07 21:31:53 +0000 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2018-03-18 02:48:44 +0700 | 
| commit | 96b8b9abaecb7518d901dc9d6a617f19c3161236 (patch) | |
| tree | aa320ed4589f11131c381e08777265229424ff9c | |
| parent | 178ee88319a384b66d9b2da27a819f32ba870425 (diff) | |
[extractor/generic] Support relative URIs in _parse_xspf
<location> can have relative URIs, not just absolute.
| -rw-r--r-- | test/test_InfoExtractor.py | 42 | ||||
| -rw-r--r-- | test/testdata/xspf/foo_xspf.xspf | 34 | ||||
| -rw-r--r-- | youtube_dl/extractor/common.py | 6 | ||||
| -rw-r--r-- | youtube_dl/extractor/generic.py | 4 | 
4 files changed, 82 insertions, 4 deletions
| diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 7b31d5198..a695ce64b 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -694,6 +694,48 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/                  self.ie._sort_formats(formats)                  expect_value(self, formats, expected_formats, None) +    def test_parse_xspf(self): +        _TEST_CASES = [ +            ( +                'foo_xspf', +                'https://example.org/src/', +                [{ +                    'description': 'Visit http://bigbrother404.bandcamp.com', +                    'duration': 202.416, +                    'formats': [{'url': 'https://example.org/src/cd1/track%201.mp3'}], +                    'id': 'foo_xspf', +                    'title': 'Pandemonium' +                }, +                { +                    'description': 'Visit http://bigbrother404.bandcamp.com', +                    'duration': 255.857, +                    'formats': [{'url': 'https://example.org/%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3'}], +                    'id': 'foo_xspf', +                    'title': 'Final Cartridge (Nichico Twelve Remix)' +                }, +                { +                    'description': 'Visit http://bigbrother404.bandcamp.com', +                    'duration': 287.915, +                    'formats': [ +                        {'url': 'https://example.org/src/track3.mp3'}, +                        {'url': 'https://example.com/track3.mp3'} +                    ], +                    'id': 'foo_xspf', +                    'title': 'Rebuilding Nightingale' +                }] +            ), +        ] + +        for xspf_file, xspf_base_url, expected_entries in _TEST_CASES: +            with io.open('./test/testdata/xspf/%s.xspf' % xspf_file, +                         mode='r', encoding='utf-8') as f: +                entries = self.ie._parse_xspf( +                    compat_etree_fromstring(f.read().encode('utf-8')), +                        xspf_file, xspf_base_url) +                expect_value(self, entries, expected_entries, None) +                for i in range(len(entries)): +                    expect_dict(self, entries[i], expected_entries[i]) +  if __name__ == '__main__':      unittest.main() diff --git a/test/testdata/xspf/foo_xspf.xspf b/test/testdata/xspf/foo_xspf.xspf new file mode 100644 index 000000000..b7f0086b3 --- /dev/null +++ b/test/testdata/xspf/foo_xspf.xspf @@ -0,0 +1,34 @@ +<?xml version="1.0" encoding="UTF-8"?> +<playlist version="1" xmlns="http://xspf.org/ns/0/"> +    <date>2018-03-09T18:01:43Z</date> +    <trackList> +        <track> +            <location>cd1/track%201.mp3</location> +            <title>Pandemonium</title> +            <creator>Foilverb</creator> +            <annotation>Visit http://bigbrother404.bandcamp.com</annotation> +            <album>Pandemonium EP</album> +            <trackNum>1</trackNum> +            <duration>202416</duration> +        </track> +        <track> +            <location>../%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3</location> +            <title>Final Cartridge (Nichico Twelve Remix)</title> +            <annotation>Visit http://bigbrother404.bandcamp.com</annotation> +            <creator>Foilverb</creator> +            <album>Pandemonium EP</album> +            <trackNum>2</trackNum> +            <duration>255857</duration> +        </track> +        <track> +            <location>track3.mp3</location> +            <location>https://example.com/track3.mp3</location> +            <title>Rebuilding Nightingale</title> +            <annotation>Visit http://bigbrother404.bandcamp.com</annotation> +            <creator>Foilverb</creator> +            <album>Pandemonium EP</album> +            <trackNum>3</trackNum> +            <duration>287915</duration> +        </track> +    </trackList> +</playlist> diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index fcdd0fd14..c1e1012e7 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1700,9 +1700,9 @@ class InfoExtractor(object):              'Unable to download xspf manifest', fatal=fatal)          if xspf is False:              return [] -        return self._parse_xspf(xspf, playlist_id) +        return self._parse_xspf(xspf, playlist_id, base_url(playlist_url)) -    def _parse_xspf(self, playlist, playlist_id): +    def _parse_xspf(self, playlist, playlist_id, playlist_base_url=''):          NS_MAP = {              'xspf': 'http://xspf.org/ns/0/',              's1': 'http://static.streamone.nl/player/ns/0', @@ -1720,7 +1720,7 @@ class InfoExtractor(object):                  xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000)              formats = [{ -                'url': location.text, +                'url': urljoin(playlist_base_url, location.text),                  'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),                  'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),                  'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))), diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index dbd565066..023ccbc9b 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2232,7 +2232,9 @@ class GenericIE(InfoExtractor):                  self._sort_formats(smil['formats'])                  return smil              elif doc.tag == '{http://xspf.org/ns/0/}playlist': -                return self.playlist_result(self._parse_xspf(doc, video_id), video_id) +                return self.playlist_result( +                    self._parse_xspf(doc, video_id, compat_str(full_response.geturl())), +                    video_id)              elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):                  info_dict['formats'] = self._parse_mpd_formats(                      doc, | 
