diff options
| -rw-r--r-- | test/helper.py | 3 | ||||
| -rw-r--r-- | test/test_utils.py | 5 | ||||
| -rw-r--r-- | youtube_dl/extractor/dailymotion.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/spiegel.py | 62 | ||||
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 2 | ||||
| -rw-r--r-- | youtube_dl/version.py | 2 | 
6 files changed, 52 insertions, 24 deletions
| diff --git a/test/helper.py b/test/helper.py index 325f72f0a..8be37a183 100644 --- a/test/helper.py +++ b/test/helper.py @@ -145,7 +145,8 @@ def expect_info_dict(self, expected_dict, got_dict):          info_dict_str = ''.join(              '    %s: %s,\n' % (_repr(k), _repr(v))              for k, v in test_info_dict.items()) -        write_string('\n"info_dict": {\n' + info_dict_str + '}\n', out=sys.stderr) +        write_string( +            '\n\'info_dict\': {\n' + info_dict_str + '}\n', out=sys.stderr)          self.assertFalse(              missing_keys,              'Missing keys in test definition: %s' % ( diff --git a/test/test_utils.py b/test/test_utils.py index e59547784..0c11d0438 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -16,6 +16,7 @@ import json  import xml.etree.ElementTree  from youtube_dl.utils import ( +    clean_html,      DateRange,      encodeFilename,      find_xpath_attr, @@ -345,5 +346,9 @@ class TestUtil(unittest.TestCase):          on = js_to_json('{"abc": true}')          self.assertEqual(json.loads(on), {'abc': True}) +    def test_clean_html(self): +        self.assertEqual(clean_html('a:\nb'), 'a: b') +        self.assertEqual(clean_html('a:\n   "b"'), 'a:    "b"') +  if __name__ == '__main__':      unittest.main() diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index dbcf5d6a7..fd4bc75b2 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -94,7 +94,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):          # It may just embed a vevo video:          m_vevo = re.search( -            r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?videoId=(?P<id>[\w]*)', +            r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?video=(?P<id>[\w]*)',              webpage)          if m_vevo is not None:              vevo_id = m_vevo.group('id') diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py index 9ed7d3b39..9586a7da2 100644 --- a/youtube_dl/extractor/spiegel.py +++ b/youtube_dl/extractor/spiegel.py @@ -4,11 +4,11 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..utils import compat_urlparse +from ..compat import compat_urlparse  class SpiegelIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$' +    _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<id>[0-9]+)(?:-embed)?(?:\.html)?(?:#.*)?$'      _TESTS = [{          'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',          'md5': '2c2754212136f35fb4b19767d242f66e', @@ -29,16 +29,24 @@ class SpiegelIE(InfoExtractor):              'description': 'md5:c2322b65e58f385a820c10fa03b2d088',              'duration': 983,          }, +    }, { +        'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-embed.html', +        'md5': 'd8eeca6bfc8f1cd6f490eb1f44695d51', +        'info_dict': { +            'id': '1519126', +            'ext': 'mp4', +            'description': 'SPIEGEL ONLINE-Nutzer durften den deutschen Astronauten Alexander Gerst über sein Leben auf der ISS-Station befragen. Hier kommen seine Antworten auf die besten sechs Fragen.', +            'title': 'Fragen an Astronaut Alexander Gerst: "Bekommen Sie die Tageszeiten mit?"', +        }      }]      def _real_extract(self, url): -        m = re.match(self._VALID_URL, url) -        video_id = m.group('videoID') - +        video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id) -        title = self._html_search_regex( -            r'<div class="module-title">(.*?)</div>', webpage, 'title') +        title = re.sub(r'\s+', ' ', self._html_search_regex( +            r'(?s)<(?:h1|div) class="module-title"[^>]*>(.*?)</(?:h1|div)>', +            webpage, 'title'))          description = self._html_search_meta('description', webpage, 'description')          base_url = self._search_regex( @@ -79,7 +87,7 @@ class SpiegelArticleIE(InfoExtractor):      _VALID_URL = 'https?://www\.spiegel\.de/(?!video/)[^?#]*?-(?P<id>[0-9]+)\.html'      IE_NAME = 'Spiegel:Article'      IE_DESC = 'Articles on spiegel.de' -    _TEST = { +    _TESTS = [{          'url': 'http://www.spiegel.de/sport/sonst/badminton-wm-die-randsportart-soll-populaerer-werden-a-987092.html',          'info_dict': {              'id': '1516455', @@ -87,20 +95,34 @@ class SpiegelArticleIE(InfoExtractor):              'title': 'Faszination Badminton: Nennt es bloß nicht Federball',              'description': 're:^Patrick Kämnitz gehört.{100,}',          }, -    } +    }, { +        'url': 'http://www.spiegel.de/wissenschaft/weltall/astronaut-alexander-gerst-antwortet-spiegel-online-lesern-a-989876.html', +        'info_dict': { -    def _real_extract(self, url): -        m = re.match(self._VALID_URL, url) -        video_id = m.group('id') +        }, +        'playlist_count': 6, +    }] +    def _real_extract(self, url): +        video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id) + +        # Single video on top of the page          video_link = self._search_regex(              r'<a href="([^"]+)" onclick="return spOpenVideo\(this,', webpage, -            'video page URL') -        video_url = compat_urlparse.urljoin( -            self.http_scheme() + '//spiegel.de/', video_link) - -        return { -            '_type': 'url', -            'url': video_url, -        } +            'video page URL', default=None) +        if video_link: +            video_url = compat_urlparse.urljoin( +                self.http_scheme() + '//spiegel.de/', video_link) +            return self.url_result(video_url) + +        # Multiple embedded videos +        embeds = re.findall( +            r'<div class="vid_holder[0-9]+.*?</div>\s*.*?url\s*=\s*"([^"]+)"', +            webpage) +        entries = [ +            self.url_result(compat_urlparse.urljoin( +                self.http_scheme() + '//spiegel.de/', embed_path)) +            for embed_path in embeds +        ] +        return self.playlist_result(entries) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 5406de50b..cea3d5c95 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -510,7 +510,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):      def _parse_sig_js(self, jscode):          funcname = self._search_regex( -            r'signature=([$a-zA-Z]+)', jscode, +            r'\.sig\|\|([a-zA-Z0-9]+)\(', jscode,               'Initial JS player signature function name')          jsi = JSInterpreter(jscode) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 473916f7a..d0e215393 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.11.12.1' +__version__ = '2014.11.13' | 
