diff options
| -rw-r--r-- | test/test_InfoExtractor.py | 44 | ||||
| -rw-r--r-- | test/test_playlists.py | 2 | ||||
| -rw-r--r-- | youtube_dl/YoutubeDL.py | 13 | ||||
| -rw-r--r-- | youtube_dl/extractor/collegehumor.py | 14 | ||||
| -rw-r--r-- | youtube_dl/extractor/gamekings.py | 13 | ||||
| -rw-r--r-- | youtube_dl/extractor/mtv.py | 39 | ||||
| -rw-r--r-- | youtube_dl/extractor/soundcloud.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/spike.py | 15 | ||||
| -rw-r--r-- | youtube_dl/extractor/videodetective.py | 22 | ||||
| -rw-r--r-- | youtube_dl/extractor/vube.py | 4 | 
10 files changed, 131 insertions, 36 deletions
| diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py new file mode 100644 index 000000000..13c18ed95 --- /dev/null +++ b/test/test_InfoExtractor.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python + +from __future__ import unicode_literals + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import FakeYDL +from youtube_dl.extractor.common import InfoExtractor +from youtube_dl.extractor import YoutubeIE, get_info_extractor + + +class TestIE(InfoExtractor): +    pass + + +class TestInfoExtractor(unittest.TestCase): +    def setUp(self): +        self.ie = TestIE(FakeYDL()) + +    def test_ie_key(self): +        self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE) + +    def test_html_search_regex(self): +        html = '<p id="foo">Watch this <a href="http://www.youtube.com/watch?v=BaW_jenozKc">video</a></p>' +        search = lambda re, *args: self.ie._html_search_regex(re, html, *args) +        self.assertEqual(search(r'<p id="foo">(.+?)</p>', 'foo'), 'Watch this video') + +    def test_opengraph(self): +        ie = self.ie +        html = ''' +            <meta name="og:title" content='Foo'/> +            <meta content="Some video's description " name="og:description"/> +            <meta property='og:image' content='http://domain.com/pic.jpg?key1=val1&key2=val2'/> +            ''' +        self.assertEqual(ie._og_search_title(html), 'Foo') +        self.assertEqual(ie._og_search_description(html), 'Some video\'s description ') +        self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2') + +if __name__ == '__main__': +    unittest.main() diff --git a/test/test_playlists.py b/test/test_playlists.py index 4bd815a0e..4785161f2 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -99,7 +99,7 @@ class TestPlaylists(unittest.TestCase):          result = ie.extract('http://www.ustream.tv/channel/young-americans-for-liberty')          self.assertIsPlaylist(result)          self.assertEqual(result['id'], '5124905') -        self.assertTrue(len(result['entries']) >= 11) +        self.assertTrue(len(result['entries']) >= 6)      def test_soundcloud_set(self):          dl = FakeYDL() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 6fd0969b5..fcb8dd19c 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -370,12 +370,15 @@ class YoutubeDL(object):          Print the message to stderr, it will be prefixed with 'WARNING:'          If stderr is a tty file the 'WARNING:' will be colored          ''' -        if self._err_file.isatty() and os.name != 'nt': -            _msg_header = '\033[0;33mWARNING:\033[0m' +        if self.params.get('logger') is not None: +            self.params['logger'].warning(message)          else: -            _msg_header = 'WARNING:' -        warning_message = '%s %s' % (_msg_header, message) -        self.to_stderr(warning_message) +            if self._err_file.isatty() and os.name != 'nt': +                _msg_header = '\033[0;33mWARNING:\033[0m' +            else: +                _msg_header = 'WARNING:' +            warning_message = '%s %s' % (_msg_header, message) +            self.to_stderr(warning_message)      def report_error(self, message, tb=None):          ''' diff --git a/youtube_dl/extractor/collegehumor.py b/youtube_dl/extractor/collegehumor.py index 6b9fa4209..45d81aae5 100644 --- a/youtube_dl/extractor/collegehumor.py +++ b/youtube_dl/extractor/collegehumor.py @@ -35,15 +35,15 @@ class CollegeHumorIE(InfoExtractor):      },      # embedded youtube video      { -        'url': 'http://www.collegehumor.com/embed/6950457', +        'url': 'http://www.collegehumor.com/embed/6950306',          'info_dict': { -            'id': 'W5gMp3ZjYg4', +            'id': 'Z-bao9fg6Yc',              'ext': 'mp4', -            'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]', -            'uploader': 'FunnyPlox TV', -            'uploader_id': 'funnyploxtv', -            'description': 'md5:7ded37421526d54afdf005e25bc2b7a3', -            'upload_date': '20140128', +            'title': 'Young Americans Think President John F. Kennedy Died THIS MORNING IN A CAR ACCIDENT!!!', +            'uploader': 'Mark Dice', +            'uploader_id': 'MarkDice', +            'description': 'md5:62c3dab9351fac7bb44b53b69511d87f', +            'upload_date': '20140127',          },          'params': {              'skip_download': True, diff --git a/youtube_dl/extractor/gamekings.py b/youtube_dl/extractor/gamekings.py index a3a5251fe..233398966 100644 --- a/youtube_dl/extractor/gamekings.py +++ b/youtube_dl/extractor/gamekings.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals +  import re  from .common import InfoExtractor @@ -6,13 +8,14 @@ from .common import InfoExtractor  class GamekingsIE(InfoExtractor):      _VALID_URL = r'http://www\.gamekings\.tv/videos/(?P<name>[0-9a-z\-]+)'      _TEST = { -        u"url": u"http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/", -        u'file': u'20130811.mp4', +        'url': 'http://www.gamekings.tv/videos/phoenix-wright-ace-attorney-dual-destinies-review/',          # MD5 is flaky, seems to change regularly -        #u'md5': u'2f32b1f7b80fdc5cb616efb4f387f8a3', +        # 'md5': '2f32b1f7b80fdc5cb616efb4f387f8a3',          u'info_dict': { -            u"title": u"Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review", -            u"description": u"Melle en Steven hebben voor de review een week in de rechtbank doorbracht met Phoenix Wright: Ace Attorney - Dual Destinies.", +            'id': '20130811', +            'ext': 'mp4', +            'title': 'Phoenix Wright: Ace Attorney \u2013 Dual Destinies Review', +            'description': 'md5:632e61a9f97d700e83f43d77ddafb6a4',          }      } diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 5447b6c0c..d75241d3f 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -5,9 +5,12 @@ import re  from .common import InfoExtractor  from ..utils import (      compat_urllib_parse, +    compat_urllib_request,      ExtractorError,      find_xpath_attr,      fix_xml_ampersands, +    HEADRequest, +    unescapeHTML,      url_basename,      RegexNotFoundError,  ) @@ -18,6 +21,7 @@ def _media_xml_tag(tag):  class MTVServicesInfoExtractor(InfoExtractor): +    _MOBILE_TEMPLATE = None      @staticmethod      def _id_from_uri(uri):          return uri.split(':')[-1] @@ -39,9 +43,29 @@ class MTVServicesInfoExtractor(InfoExtractor):          else:              return thumb_node.attrib['url'] -    def _extract_video_formats(self, mdoc): -        if re.match(r'.*/error_country_block\.swf$', mdoc.find('.//src').text) is not None: -            raise ExtractorError('This video is not available from your country.', expected=True) +    def _extract_mobile_video_formats(self, mtvn_id): +        webpage_url = self._MOBILE_TEMPLATE % mtvn_id +        req = compat_urllib_request.Request(webpage_url) +        # Otherwise we get a webpage that would execute some javascript +        req.add_header('Youtubedl-user-agent', 'curl/7') +        webpage = self._download_webpage(req, mtvn_id, +            'Downloading mobile page') +        metrics_url = unescapeHTML(self._search_regex(r'<a href="(http://metrics.+?)"', webpage, 'url')) +        req = HEADRequest(metrics_url) +        response = self._request_webpage(req, mtvn_id, 'Resolving url') +        url = response.geturl() +        # Transform the url to get the best quality: +        url = re.sub(r'.+pxE=mp4', 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=0+_pxK=18639+_pxE=mp4', url, 1) +        return [{'url': url,'ext': 'mp4'}] + +    def _extract_video_formats(self, mdoc, mtvn_id): +        if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4)$', mdoc.find('.//src').text) is not None: +            if mtvn_id is not None and self._MOBILE_TEMPLATE is not None: +                self.to_screen('The normal version is not available from your ' +                    'country, trying with the mobile version') +                return self._extract_mobile_video_formats(mtvn_id) +            raise ExtractorError('This video is not available from your country.', +                expected=True)          formats = []          for rendition in mdoc.findall('.//rendition'): @@ -94,9 +118,16 @@ class MTVServicesInfoExtractor(InfoExtractor):              raise ExtractorError('Could not find video title')          title = title.strip() +        # This a short id that's used in the webpage urls +        mtvn_id = None +        mtvn_id_node = find_xpath_attr(itemdoc, './/{http://search.yahoo.com/mrss/}category', +                'scheme', 'urn:mtvn:id') +        if mtvn_id_node is not None: +            mtvn_id = mtvn_id_node.text +          return {              'title': title, -            'formats': self._extract_video_formats(mediagen_doc), +            'formats': self._extract_video_formats(mediagen_doc, mtvn_id),              'id': video_id,              'thumbnail': self._get_thumbnail_url(uri, itemdoc),              'description': description, diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 1cc0dcb15..2f254f023 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -54,6 +54,7 @@ class SoundcloudIE(InfoExtractor):                  'id': '47127627',                  'ext': 'mp3',                  'title': 'Goldrushed', +                'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',                  'uploader': 'The Royal Concept',                  'upload_date': '20120521',              }, diff --git a/youtube_dl/extractor/spike.py b/youtube_dl/extractor/spike.py index 56682ac45..a3adf54e3 100644 --- a/youtube_dl/extractor/spike.py +++ b/youtube_dl/extractor/spike.py @@ -1,10 +1,15 @@  from __future__ import unicode_literals +import re +  from .mtv import MTVServicesInfoExtractor  class SpikeIE(MTVServicesInfoExtractor): -    _VALID_URL = r'https?://www\.spike\.com/(video-clips|episodes)/.+' +    _VALID_URL = r'''(?x)https?:// +        (www\.spike\.com/(video-clips|episodes)/.+| +         m\.spike\.com/videos/video.rbml\?id=(?P<mobile_id>[^&]+)) +        '''      _TEST = {          'url': 'http://www.spike.com/video-clips/lhtu8m/auction-hunters-can-allen-ride-a-hundred-year-old-motorcycle',          'md5': '1a9265f32b0c375793d6c4ce45255256', @@ -17,3 +22,11 @@ class SpikeIE(MTVServicesInfoExtractor):      }      _FEED_URL = 'http://www.spike.com/feeds/mrss/' +    _MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s' + +    def _real_extract(self, url): +        mobj = re.search(self._VALID_URL, url) +        mobile_id = mobj.group('mobile_id') +        if mobile_id is not None: +            url = 'http://www.spike.com/video-clips/%s' % mobile_id +        return super(SpikeIE, self)._real_extract(url) diff --git a/youtube_dl/extractor/videodetective.py b/youtube_dl/extractor/videodetective.py index 265dd5b91..ac6c25537 100644 --- a/youtube_dl/extractor/videodetective.py +++ b/youtube_dl/extractor/videodetective.py @@ -1,22 +1,23 @@ +from __future__ import unicode_literals +  import re  from .common import InfoExtractor  from .internetvideoarchive import InternetVideoArchiveIE -from ..utils import ( -    compat_urlparse, -) +from ..utils import compat_urlparse  class VideoDetectiveIE(InfoExtractor):      _VALID_URL = r'https?://www\.videodetective\.com/[^/]+/[^/]+/(?P<id>\d+)'      _TEST = { -        u'url': u'http://www.videodetective.com/movies/kick-ass-2/194487', -        u'file': u'194487.mp4', -        u'info_dict': { -            u'title': u'KICK-ASS 2', -            u'description': u'md5:65ba37ad619165afac7d432eaded6013', -            u'duration': 135, +        'url': 'http://www.videodetective.com/movies/kick-ass-2/194487', +        'info_dict': { +            'id': '194487', +            'ext': 'mp4', +            'title': 'KICK-ASS 2', +            'description': 'md5:65ba37ad619165afac7d432eaded6013', +            'duration': 135,          },      } @@ -26,5 +27,4 @@ class VideoDetectiveIE(InfoExtractor):          webpage = self._download_webpage(url, video_id)          og_video = self._og_search_video_url(webpage)          query = compat_urlparse.urlparse(og_video).query -        return self.url_result(InternetVideoArchiveIE._build_url(query), -            ie=InternetVideoArchiveIE.ie_key()) +        return self.url_result(InternetVideoArchiveIE._build_url(query), ie=InternetVideoArchiveIE.ie_key()) diff --git a/youtube_dl/extractor/vube.py b/youtube_dl/extractor/vube.py index fbdff471a..935c97ae9 100644 --- a/youtube_dl/extractor/vube.py +++ b/youtube_dl/extractor/vube.py @@ -13,7 +13,7 @@ class VubeIE(InfoExtractor):      _TEST = {          'url': 'http://vube.com/Chiara+Grispo+Video+Channel/YL2qNPkqon', -        'md5': 'f81dcf6d0448e3291f54380181695821', +        'md5': 'db7aba89d4603dadd627e9d1973946fe',          'info_dict': {              'id': 'YL2qNPkqon',              'ext': 'mp4', @@ -77,4 +77,4 @@ class VubeIE(InfoExtractor):              'like_count': like_count,              'dislike_count': dislike_count,              'comment_count': comment_count, -        }
\ No newline at end of file +        } | 
