diff options
| author | Allan Zhou <allanzp@gmail.com> | 2013-08-19 17:11:52 -0700 | 
|---|---|---|
| committer | Allan Zhou <allanzp@gmail.com> | 2013-08-19 17:11:52 -0700 | 
| commit | 90d3989b99c7a9446089ea635ca288216625d44c (patch) | |
| tree | 5ae78d1cc8de87eaf361aea5fd069b84d3154fd7 | |
| parent | 95fdc7d69cec1381382cea33f0e4e06c8045486d (diff) | |
| parent | d741e55a423a09c40b3c5e19551f432a050353d7 (diff) | |
Merge remote-tracking branch 'upstream/master'
| -rw-r--r-- | devscripts/youtube_genalgo.py | 4 | ||||
| -rw-r--r-- | test/test_all_urls.py | 1 | ||||
| -rw-r--r-- | test/test_youtube_sig.py | 79 | ||||
| -rw-r--r-- | youtube_dl/YoutubeDL.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/arte.py | 32 | ||||
| -rw-r--r-- | youtube_dl/extractor/collegehumor.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/muzu.py | 64 | ||||
| -rw-r--r-- | youtube_dl/extractor/myvideo.py | 18 | ||||
| -rw-r--r-- | youtube_dl/extractor/ooyala.py | 52 | ||||
| -rw-r--r-- | youtube_dl/extractor/rtlnow.py | 100 | ||||
| -rw-r--r-- | youtube_dl/extractor/vevo.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/videofyme.py | 49 | ||||
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 28 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 4 | ||||
| -rw-r--r-- | youtube_dl/version.py | 2 | 
16 files changed, 344 insertions, 103 deletions
| diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py index fd0120650..31d6ec952 100644 --- a/devscripts/youtube_genalgo.py +++ b/devscripts/youtube_genalgo.py @@ -26,9 +26,9 @@ tests = [      # 84      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",       "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"), -    # 83 - vflcaqGO8 2013/07/11 +    # 83 - vflTWC9KW 2013/08/01      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<", -     "urty8ioplkjhgfdsazxcvbqm1234567S90QWERTYUIOPLKJHGFDnAZXCVBNM!#$%^&*()_+={[};?/>.<"), +     "qwertyuioplkjhg>dsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/f"),      # 82      ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<",       "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"), diff --git a/test/test_all_urls.py b/test/test_all_urls.py index c73d0e467..c54faa380 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -50,6 +50,7 @@ class TestAllURLsMatching(unittest.TestCase):          self.assertEqual(YoutubeIE()._extract_id('http://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')          self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')          self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc'), 'BaW_jenozKc') +        self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch_popup?v=BaW_jenozKc'), 'BaW_jenozKc')      def test_no_duplicates(self):          ies = gen_extractors() diff --git a/test/test_youtube_sig.py b/test/test_youtube_sig.py deleted file mode 100644 index d06f3c8aa..000000000 --- a/test/test_youtube_sig.py +++ /dev/null @@ -1,79 +0,0 @@ -#!/usr/bin/env python - -import unittest -import sys - -# Allow direct execution -import os -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -from youtube_dl.extractor.youtube import YoutubeIE -from helper import FakeYDL - -ie = YoutubeIE(FakeYDL()) -sig = ie._decrypt_signature -sig_age_gate = ie._decrypt_signature_age_gate - -class TestYoutubeSig(unittest.TestCase): -    def test_92(self): -        wrong = "F9F9B6E6FD47029957AB911A964CC20D95A181A5D37A2DBEFD67D403DB0E8BE4F4910053E4E8A79.0B70B.0B80B8" -        right = "69B6E6FD47029957AB911A9F4CC20D95A181A5D3.A2DBEFD67D403DB0E8BE4F4910053E4E8A7980B7" -        self.assertEqual(sig(wrong), right) - -    def test_90(self): -        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`" -        right = "mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|" -        self.assertEqual(sig(wrong), right) - -    def test_88(self): -        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<" -        right = "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej" -        self.assertEqual(sig(wrong), right) - -    def test_87(self): -        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<" -        right = "tyuioplkjhgfdsazxcv<nm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>" -        self.assertEqual(sig(wrong), right) - -    def test_86(self): -        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<" -        right = ">.1}|[{=+-_)(*&^%$#@!MNBVCXZASDFGHJK<POIUYTREW509876L432/mnbvcxzasdfghjklpoiuytre" -        self.assertEqual(sig(wrong), right) - -    def test_85(self): -        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<" -        right = "ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c" -        self.assertEqual(sig(wrong), right) - -    def test_84(self): -        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<" -        right = "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1" -        self.assertEqual(sig(wrong), right) - -    def test_83(self): -        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<" -        right = "urty8ioplkjhgfdsazxcvbqm1234567S90QWERTYUIOPLKJHGFDnAZXCVBNM!#$%^&*()_+={[};?/>.<" -        self.assertEqual(sig(wrong), right) - -    def test_82(self): -        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<" -        right = "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9" -        self.assertEqual(sig(wrong), right) - -    def test_81(self): -        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>." -        right = "C>/?;}[{=+-(*&^%$#@!MNBVYXZASDFGHKLPOIU.TREWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp" -        self.assertEqual(sig(wrong), right) - -    def test_79(self): -        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/" -        right = "Z?;}[{=+-(*&^%$#@!MNBVCXRASDFGHKLPOIUYT/EWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp" -        self.assertEqual(sig(wrong), right) -     -    def test_86_age_gate(self): -        wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<" -        right = "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@" -        self.assertEqual(sig_age_gate(wrong), right) - -if __name__ == '__main__': -    unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 496866900..d1618da79 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -264,7 +264,7 @@ class YoutubeDL(object):              self.report_error(u'Erroneous output template')              return None          except ValueError as err: -            self.report_error(u'Insufficient system charset ' + repr(preferredencoding())) +            self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')              return None      def _match_entry(self, info_dict): @@ -547,7 +547,7 @@ class YoutubeDL(object):                  try:                      success = self.fd._do_download(filename, info_dict)                  except (OSError, IOError) as err: -                    raise UnavailableVideoError() +                    raise UnavailableVideoError(err)                  except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:                      self.report_error(u'unable to download video data: %s' % str(err))                      return diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index c20172a53..5bb44e764 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -45,15 +45,18 @@ from .livestream import LivestreamIE  from .metacafe import MetacafeIE  from .mixcloud import MixcloudIE  from .mtv import MTVIE +from .muzu import MuzuTVIE  from .myspass import MySpassIE  from .myvideo import MyVideoIE  from .nba import NBAIE +from .ooyala import OoyalaIE  from .photobucket import PhotobucketIE  from .pornotube import PornotubeIE  from .rbmaradio import RBMARadioIE  from .redtube import RedTubeIE  from .ringtv import RingTVIE  from .roxwel import RoxwelIE +from .rtlnow import RTLnowIE  from .sina import SinaIE  from .soundcloud import SoundcloudIE, SoundcloudSetIE  from .spiegel import SpiegelIE @@ -72,6 +75,7 @@ from .ustream import UstreamIE  from .vbox7 import Vbox7IE  from .veoh import VeohIE  from .vevo import VevoIE +from .videofyme import VideofyMeIE  from .vimeo import VimeoIE, VimeoChannelIE  from .vine import VineIE  from .c56 import C56IE diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 18d591658..69b3b0ad7 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -17,13 +17,14 @@ class ArteTvIE(InfoExtractor):      """      _EMISSION_URL = r'(?:http://)?www\.arte.tv/guide/(?P<lang>fr|de)/(?:(?:sendungen|emissions)/)?(?P<id>.*?)/(?P<name>.*?)(\?.*)?'      _VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?P<lang>fr|de)/.*-(?P<id>.*?).html' +    _LIVEWEB_URL = r'(?:http://)?liveweb.arte.tv/(?P<lang>fr|de)/(?P<subpage>.+?)/(?P<name>.+)'      _LIVE_URL = r'index-[0-9]+\.html$'      IE_NAME = u'arte.tv'      @classmethod      def suitable(cls, url): -        return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL)) +        return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL, cls._LIVEWEB_URL))      # TODO implement Live Stream      # from ..utils import compat_urllib_parse @@ -68,6 +69,12 @@ class ArteTvIE(InfoExtractor):              lang = mobj.group('lang')              return self._extract_video(url, id, lang) +        mobj = re.match(self._LIVEWEB_URL, url) +        if mobj is not None: +            name = mobj.group('name') +            lang = mobj.group('lang') +            return self._extract_liveweb(url, name, lang) +          if re.search(self._LIVE_URL, video_id) is not None:              raise ExtractorError(u'Arte live streams are not yet supported, sorry')              # self.extractLiveStream(url) @@ -85,7 +92,7 @@ class ArteTvIE(InfoExtractor):          info_dict = {'id': player_info['VID'],                       'title': player_info['VTI'], -                     'description': player_info['VDE'], +                     'description': player_info.get('VDE'),                       'upload_date': unified_strdate(player_info['VDA'].split(' ')[0]),                       'thumbnail': player_info['programImage'],                       'ext': 'flv', @@ -104,6 +111,8 @@ class ArteTvIE(InfoExtractor):          formats = filter(_match_lang, formats)          # We order the formats by quality          formats = sorted(formats, key=lambda f: int(f['height'])) +        # Prefer videos without subtitles in the same language +        formats = sorted(formats, key=lambda f: re.match(r'VO(F|A)-STM\1', f['versionCode']) is None)          # Pick the best quality          format_info = formats[-1]          if format_info['mediaType'] == u'rtmp': @@ -144,3 +153,22 @@ class ArteTvIE(InfoExtractor):                  'url': video_url,                  'ext': 'flv',                  } + +    def _extract_liveweb(self, url, name, lang): +        """Extract form http://liveweb.arte.tv/""" +        webpage = self._download_webpage(url, name) +        video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, u'event id') +        config_xml = self._download_webpage('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id, +                                            video_id, u'Downloading information') +        config_doc = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8')) +        event_doc = config_doc.find('event') +        url_node = event_doc.find('video').find('urlHd') +        if url_node is None: +            url_node = video_doc.find('urlSd') + +        return {'id': video_id, +                'title': event_doc.find('name%s' % lang.capitalize()).text, +                'url': url_node.text.replace('MP4', 'mp4'), +                'ext': 'flv', +                'thumbnail': self._og_search_thumbnail(webpage), +                } diff --git a/youtube_dl/extractor/collegehumor.py b/youtube_dl/extractor/collegehumor.py index 5badde03a..30b9c7549 100644 --- a/youtube_dl/extractor/collegehumor.py +++ b/youtube_dl/extractor/collegehumor.py @@ -10,7 +10,7 @@ from ..utils import (  class CollegeHumorIE(InfoExtractor): -    _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$' +    _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$'      _TEST = {          u'url': u'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe', diff --git a/youtube_dl/extractor/muzu.py b/youtube_dl/extractor/muzu.py new file mode 100644 index 000000000..03e31ea1c --- /dev/null +++ b/youtube_dl/extractor/muzu.py @@ -0,0 +1,64 @@ +import re +import json + +from .common import InfoExtractor +from ..utils import ( +    compat_urllib_parse, +    determine_ext, +) + + +class MuzuTVIE(InfoExtractor): +    _VALID_URL = r'https?://www.muzu.tv/(.+?)/(.+?)/(?P<id>\d+)' +    IE_NAME = u'muzu.tv' + +    _TEST = { +        u'url': u'http://www.muzu.tv/defected/marcashken-featuring-sos-cat-walk-original-mix-music-video/1981454/', +        u'file': u'1981454.mp4', +        u'md5': u'98f8b2c7bc50578d6a0364fff2bfb000', +        u'info_dict': { +            u'title': u'Cat Walk (Original Mix)', +            u'description': u'md5:90e868994de201b2570e4e5854e19420', +            u'uploader': u'MarcAshken featuring SOS', +        }, +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') + +        info_data = compat_urllib_parse.urlencode({'format': 'json', +                                                   'url': url, +                                                   }) +        video_info_page = self._download_webpage('http://www.muzu.tv/api/oembed/?%s' % info_data, +                                                 video_id, u'Downloading video info') +        info = json.loads(video_info_page) + +        player_info_page = self._download_webpage('http://player.muzu.tv/player/playerInit?ai=%s' % video_id, +                                                  video_id, u'Downloading player info') +        video_info = json.loads(player_info_page)['videos'][0] +        for quality in ['1080' , '720', '480', '360']: +            if video_info.get('v%s' % quality): +                break + +        data = compat_urllib_parse.urlencode({'ai': video_id, +                                              # Even if each time you watch a video the hash changes, +                                              # it seems to work for different videos, and it will work +                                              # even if you use any non empty string as a hash +                                              'viewhash': 'VBNff6djeV4HV5TRPW5kOHub2k', +                                              'device': 'web', +                                              'qv': quality, +                                              }) +        video_url_page = self._download_webpage('http://player.muzu.tv/player/requestVideo?%s' % data, +                                                video_id, u'Downloading video url') +        video_url_info = json.loads(video_url_page) +        video_url = video_url_info['url'] + +        return {'id': video_id, +                'title': info['title'], +                'url': video_url, +                'ext': determine_ext(video_url), +                'thumbnail': info['thumbnail_url'], +                'description': info['description'], +                'uploader': info['author_name'], +                } diff --git a/youtube_dl/extractor/myvideo.py b/youtube_dl/extractor/myvideo.py index b2a7b1df0..0404e6e43 100644 --- a/youtube_dl/extractor/myvideo.py +++ b/youtube_dl/extractor/myvideo.py @@ -2,11 +2,13 @@ import binascii  import base64  import hashlib  import re +import json  from .common import InfoExtractor  from ..utils import (      compat_ord,      compat_urllib_parse, +    compat_urllib_request,      ExtractorError,  ) @@ -16,7 +18,7 @@ from ..utils import (  class MyVideoIE(InfoExtractor):      """Information Extractor for myvideo.de.""" -    _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*' +    _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/(?:[^/]+/)?watch/([0-9]+)/([^?/]+).*'      IE_NAME = u'myvideo'      _TEST = {          u'url': u'http://www.myvideo.de/watch/8229274/bowling_fail_or_win', @@ -85,6 +87,20 @@ class MyVideoIE(InfoExtractor):                  'ext':      video_ext,              }] +        mobj = re.search(r'data-video-service="/service/data/video/%s/config' % video_id, webpage) +        if mobj is not None: +            request = compat_urllib_request.Request('http://www.myvideo.de/service/data/video/%s/config' % video_id, '') +            response = self._download_webpage(request, video_id, +                                              u'Downloading video info') +            info = json.loads(base64.b64decode(response).decode('utf-8')) +            return {'id': video_id, +                    'title': info['title'], +                    'url': info['streaming_url'].replace('rtmpe', 'rtmpt'), +                    'play_path': info['filename'], +                    'ext': 'flv', +                    'thumbnail': info['thumbnail'][0]['url'], +                    } +          # try encxml          mobj = re.search('var flashvars={(.+?)}', webpage)          if mobj is None: diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py new file mode 100644 index 000000000..b734722d0 --- /dev/null +++ b/youtube_dl/extractor/ooyala.py @@ -0,0 +1,52 @@ +import re +import json + +from .common import InfoExtractor +from ..utils import unescapeHTML + +class OoyalaIE(InfoExtractor): +    _VALID_URL = r'https?://.+?\.ooyala\.com/.*?embedCode=(?P<id>.+?)(&|$)' + +    _TEST = { +        # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video +        u'url': u'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8', +        u'file': u'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8.mp4', +        u'md5': u'3f5cceb3a7bf461d6c29dc466cf8033c', +        u'info_dict': { +            u'title': u'Explaining Data Recovery from Hard Drives and SSDs', +            u'description': u'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.', +        }, +    } + +    def _extract_result(self, info, more_info): +        return {'id': info['embedCode'], +                'ext': 'mp4', +                'title': unescapeHTML(info['title']), +                'url': info['url'], +                'description': unescapeHTML(more_info['description']), +                'thumbnail': more_info['promo'], +                } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        embedCode = mobj.group('id') +        player_url = 'http://player.ooyala.com/player.js?embedCode=%s' % embedCode +        player = self._download_webpage(player_url, embedCode) +        mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="', +                                        player, u'mobile player url') +        mobile_player = self._download_webpage(mobile_url, embedCode) +        videos_info = self._search_regex(r'eval\("\((\[{.*?stream_redirect.*?}\])\)"\);', mobile_player, u'info').replace('\\"','"') +        videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, u'more info').replace('\\"','"') +        videos_info = json.loads(videos_info) +        videos_more_info =json.loads(videos_more_info) + +        if videos_more_info.get('lineup'): +            videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])] +            return {'_type': 'playlist', +                    'id': embedCode, +                    'title': unescapeHTML(videos_more_info['title']), +                    'entries': videos, +                    } +        else: +            return self._extract_result(videos_info[0], videos_more_info) +         diff --git a/youtube_dl/extractor/rtlnow.py b/youtube_dl/extractor/rtlnow.py new file mode 100644 index 000000000..d993a990a --- /dev/null +++ b/youtube_dl/extractor/rtlnow.py @@ -0,0 +1,100 @@ +# encoding: utf-8 +import re + +from .common import InfoExtractor +from ..utils import ExtractorError + +class RTLnowIE(InfoExtractor): +    """Information Extractor for RTLnow, RTL2now and VOXnow""" +    _VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl(?:(?P<is_rtl2>2)|-)now\.rtl(?(is_rtl2)2|)\.de/|(?:www\.)?voxnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)' +    _TESTS = [{ +        u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1', +        u'file': u'90419.flv', +        u'info_dict': { +            u'upload_date': u'20070416',  +            u'title': u'Ahornallee - Folge 1 - Der Einzug', +            u'description': u'Folge 1 - Der Einzug', +        }, +        u'params': { +            u'skip_download': True, +        }, +    }, +    { +        u'url': u'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5', +        u'file': u'69756.flv', +        u'info_dict': { +            u'upload_date': u'20120519',  +            u'title': u'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit...', +            u'description': u'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit u.a.', +            u'thumbnail': u'http://autoimg.static-fra.de/rtl2now/219850/1500x1500/image2.jpg', +        }, +        u'params': { +            u'skip_download': True, +        }, +    }, +    { +        u'url': u'www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17', +        u'file': u'13883.flv', +        u'info_dict': { +            u'upload_date': u'20090627',  +            u'title': u'Voxtours - Südafrika-Reporter II', +            u'description': u'Südafrika-Reporter II', +        }, +        u'params': { +            u'skip_download': True, +        }, +    }] + +    def _real_extract(self,url): +        mobj = re.match(self._VALID_URL, url) + +        webpage_url = u'http://' + mobj.group('url') +        video_page_url = u'http://' + mobj.group('base_url') +        video_id = mobj.group(u'video_id') + +        webpage = self._download_webpage(webpage_url, video_id) +        video_title = self._html_search_regex(r'<title>(?P<title>[^<]+)</title>', +            webpage, u'title') +        playerdata_url = self._html_search_regex(r'\'playerdata\': \'(?P<playerdata_url>[^\']+)\'', +            webpage, u'playerdata_url') + +        playerdata = self._download_webpage(playerdata_url, video_id) +        mobj = re.search(r'<title><!\[CDATA\[(?P<description>.+?)\s+- (?:Sendung )?vom (?P<upload_date_d>[0-9]{2})\.(?P<upload_date_m>[0-9]{2})\.(?:(?P<upload_date_Y>[0-9]{4})|(?P<upload_date_y>[0-9]{2})) [0-9]{2}:[0-9]{2} Uhr\]\]></title>', playerdata) +        if mobj: +            video_description = mobj.group(u'description') +            if mobj.group('upload_date_Y'): +                video_upload_date = mobj.group('upload_date_Y') +            else: +                video_upload_date = u'20' + mobj.group('upload_date_y') +            video_upload_date += mobj.group('upload_date_m')+mobj.group('upload_date_d') +        else: +            video_description = None +            video_upload_date = None +            self._downloader.report_warning(u'Unable to extract description and upload date') + +        # Thumbnail: not every video has an thumbnail +        mobj = re.search(r'<meta property="og:image" content="(?P<thumbnail>[^"]+)">', webpage) +        if mobj: +            video_thumbnail = mobj.group(u'thumbnail') +        else: +            video_thumbnail = None + +        mobj = re.search(r'<filename [^>]+><!\[CDATA\[(?P<url>rtmpe://(?:[^/]+/){2})(?P<play_path>[^\]]+)\]\]></filename>', playerdata) +        if mobj is None: +            raise ExtractorError(u'Unable to extract media URL') +        video_url = mobj.group(u'url') +        video_play_path = u'mp4:' + mobj.group(u'play_path') +        video_player_url = video_page_url + u'includes/vodplayer.swf' + +        return [{ +            'id':          video_id, +            'url':         video_url, +            'play_path':   video_play_path, +            'page_url':    video_page_url, +            'player_url':  video_player_url, +            'ext':         'flv', +            'title':       video_title, +            'description': video_description, +            'upload_date': video_upload_date, +            'thumbnail':   video_thumbnail, +        }] diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 67537eae5..14abd58e8 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -8,7 +8,7 @@ from ..utils import (  class VevoIE(InfoExtractor):      """ -    Accecps urls from vevo.com or in the format 'vevo:{id}' +    Accepts urls from vevo.com or in the format 'vevo:{id}'      (currently used by MTVIE)      """      _VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P<id>.*)$' @@ -19,7 +19,7 @@ class VevoIE(InfoExtractor):          u'info_dict': {              u"upload_date": u"20130624",               u"uploader": u"Hurts",  -            u"title": u"Somebody To Die For" +            u"title": u"Somebody to Die For"          }      } diff --git a/youtube_dl/extractor/videofyme.py b/youtube_dl/extractor/videofyme.py new file mode 100644 index 000000000..04106672b --- /dev/null +++ b/youtube_dl/extractor/videofyme.py @@ -0,0 +1,49 @@ +import re +import xml.etree.ElementTree + +from .common import InfoExtractor +from ..utils import ( +    find_xpath_attr, +    determine_ext, +) + +class VideofyMeIE(InfoExtractor): +    _VALID_URL = r'https?://(www.videofy.me/.+?|p.videofy.me/v)/(?P<id>\d+)(&|#|$)' +    IE_NAME = u'videofy.me' + +    _TEST = { +        u'url': u'http://www.videofy.me/thisisvideofyme/1100701', +        u'file':  u'1100701.mp4', +        u'md5': u'2046dd5758541d630bfa93e741e2fd79', +        u'info_dict': { +            u'title': u'This is VideofyMe', +            u'description': None, +            u'uploader': u'VideofyMe', +            u'uploader_id': u'thisisvideofyme', +        }, +         +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') +        config_xml = self._download_webpage('http://sunshine.videofy.me/?videoId=%s' % video_id, +                                            video_id) +        config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8')) +        video = config.find('video') +        sources = video.find('sources') +        url_node = find_xpath_attr(sources, 'source', 'id', 'HQ on') +        if url_node is None: +            url_node = find_xpath_attr(sources, 'source', 'id', 'HQ off') +        video_url = url_node.find('url').text + +        return {'id': video_id, +                'title': video.find('title').text, +                'url': video_url, +                'ext': determine_ext(video_url), +                'thumbnail': video.find('thumb').text, +                'description': video.find('description').text, +                'uploader': config.find('blog/name').text, +                'uploader_id': video.find('identifier').text, +                'view_count': re.search(r'\d+', video.find('views').text).group(), +                } diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index c08387c27..843a973ca 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -141,7 +141,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):                           (?:                                                  # the various things that can precede the ID:                               (?:(?:v|embed|e)/)                               # v/ or embed/ or e/                               |(?:                                             # or the v= param in all its forms -                                 (?:watch|movie(?:_popup)?(?:\.php)?)?              # preceding watch(_popup|.php) or nothing (like /?v=xxxx) +                                 (?:(?:watch|movie)(?:_popup)?(?:\.php)?)?    # preceding watch(_popup|.php) or nothing (like /?v=xxxx)                                   (?:\?|\#!?)                                  # the params delimiter ? or # or #!                                   (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)                                   v= @@ -221,6 +221,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):          '132': '240p',          '151': '72p',      } +    _3d_itags = ['85', '84', '102', '83', '101', '82', '100']      IE_NAME = u'youtube'      _TESTS = [          { @@ -334,18 +335,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor):              return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]          elif len(s) == 90:              return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81] +        elif len(s) == 89: +            return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]          elif len(s) == 88:              return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12]          elif len(s) == 87: -            return s[4:23] + s[86] + s[24:85] +            return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]          elif len(s) == 86: -            return s[83:85] + s[26] + s[79:46:-1] + s[85] + s[45:36:-1] + s[30] + s[35:30:-1] + s[46] + s[29:26:-1] + s[82] + s[25:1:-1] +            return s[5:20] + s[2] + s[21:]          elif len(s) == 85: -            return s[2:8] + s[0] + s[9:21] + s[65] + s[22:65] + s[84] + s[66:82] + s[21] +            return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]          elif len(s) == 84: -            return s[83:36:-1] + s[2] + s[35:26:-1] + s[3] + s[25:3:-1] + s[26] +            return s[83:27:-1] + s[0] + s[26:5:-1] + s[2:0:-1] + s[27]          elif len(s) == 83: -            return s[6] + s[3:6] + s[33] + s[7:24] + s[0] + s[25:33] + s[53] + s[34:53] + s[24] + s[54:] +            return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]          elif len(s) == 82:              return s[36] + s[79:67:-1] + s[81] + s[66:40:-1] + s[33] + s[39:36:-1] + s[40] + s[35] + s[0] + s[67] + s[32:0:-1] + s[34]          elif len(s) == 81: @@ -467,7 +470,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):      def _print_formats(self, formats):          print('Available formats:')          for x in formats: -            print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???'))) +            print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'), +                                        self._video_dimensions.get(x, '???'), +                                        ' (3D)' if x in self._3d_itags else ''))      def _extract_id(self, url):          mobj = re.match(self._VALID_URL, url, re.VERBOSE) @@ -715,8 +720,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):                              s = url_data['s'][0]                              if age_gate:                                  player_version = self._search_regex(r'ad3-(.+?)\.swf', -                                    video_info['ad3_module'][0], 'flash player', -                                    fatal=False) +                                    video_info['ad3_module'][0] if 'ad3_module' in video_info else 'NOT FOUND', +                                    'flash player', fatal=False)                                  player = 'flash player %s' % player_version                              else:                                  player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage, @@ -751,8 +756,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):              # Extension              video_extension = self._video_extensions.get(format_param, 'flv') -            video_format = '{0} - {1}'.format(format_param if format_param else video_extension, -                                              self._video_dimensions.get(format_param, '???')) +            video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension, +                                              self._video_dimensions.get(format_param, '???'), +                                              ' (3D)' if format_param in self._3d_itags else '')              results.append({                  'id':       video_id, diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index cf2ea654e..5dd5b2923 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -207,7 +207,7 @@ if sys.version_info >= (2,7):      def find_xpath_attr(node, xpath, key, val):          """ Find the xpath xpath[@key=val] """          assert re.match(r'^[a-zA-Z]+$', key) -        assert re.match(r'^[a-zA-Z@]*$', val) +        assert re.match(r'^[a-zA-Z@\s]*$', val)          expr = xpath + u"[@%s='%s']" % (key, val)          return node.find(expr)  else: @@ -497,7 +497,7 @@ class ExtractorError(Exception):          if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):              expected = True          if not expected: -            msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output.' +            msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type  youtube-dl -U  to update.'          super(ExtractorError, self).__init__(msg)          self.traceback = tb diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 0be025ed8..8c93a275c 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.07.31' +__version__ = '2013.08.17' | 
