diff options
| -rwxr-xr-x | devscripts/release.sh | 4 | ||||
| -rw-r--r-- | test/test_download.py | 4 | ||||
| -rw-r--r-- | test/tests.json | 35 | ||||
| -rw-r--r-- | youtube_dl/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 11 | ||||
| -rw-r--r-- | youtube_dl/extractor/common.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/googleplus.py | 10 | ||||
| -rw-r--r-- | youtube_dl/extractor/jukebox.py | 56 | ||||
| -rw-r--r-- | youtube_dl/extractor/tudou.py | 32 | ||||
| -rw-r--r-- | youtube_dl/extractor/worldstarhiphop.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 9 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 2 | ||||
| -rw-r--r-- | youtube_dl/version.py | 2 | 
13 files changed, 155 insertions, 15 deletions
| diff --git a/devscripts/release.sh b/devscripts/release.sh index d32ae47dd..46c31e437 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -69,7 +69,9 @@ git checkout HEAD -- youtube-dl youtube-dl.exe  /bin/echo -e "\n### Signing and uploading the new binaries to youtube-dl.org..."  for f in $RELEASE_FILES; do gpg --detach-sig "build/$version/$f"; done -scp -r "build/$version" ytdl@youtube-dl.org:html/downloads/ +scp -r "build/$version" ytdl@yt-dl.org:html/tmp/ +ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/" +ssh ytdl@yt-dl.org "sh html/update_latest.sh $version"  /bin/echo -e "\n### Now switching to gh-pages..."  git clone --branch gh-pages --single-branch . build/gh-pages diff --git a/test/test_download.py b/test/test_download.py index 9af626dca..067bde4bb 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -153,9 +153,11 @@ def generator(test_case):      return test_template  ### And add them to TestDownload -for test_case in defs: +for n, test_case in enumerate(defs):      test_method = generator(test_case)      test_method.__name__ = "test_{0}".format(test_case["name"]) +    if getattr(TestDownload, test_method.__name__, False): +        test_method.__name__ = "test_{0}_{1}".format(test_case["name"], n)      setattr(TestDownload, test_method.__name__, test_method)      del test_method diff --git a/test/tests.json b/test/tests.json index fd037d818..5f4f642e8 100644 --- a/test/tests.json +++ b/test/tests.json @@ -12,6 +12,32 @@      }    },    { +    "name": "Youtube", +    "url":  "http://www.youtube.com/watch?v=1ltcDfZMA3U", +    "file":  "1ltcDfZMA3U.flv", +    "note": "Test VEVO video (#897)", +    "info_dict": { +      "upload_date": "20070518", +      "title": "Maps - It Will Find You", +      "description": "Music video by Maps performing It Will Find You.", +      "uploader": "MuteUSA", +      "uploader_id": "MuteUSA" +    } +  }, +  { +    "name": "Youtube", +    "url":  "http://www.youtube.com/watch?v=UxxajLWwzqY", +    "file":  "UxxajLWwzqY.mp4", +    "note": "Test generic use_cipher_signature video (#897)", +    "info_dict": { +      "upload_date": "20120506", +      "title": "Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]", +      "description": "md5:b085c9804f5ab69f4adea963a2dceb3c", +      "uploader": "IconaPop", +      "uploader_id": "IconaPop" +    } +  }, +  {      "name": "Dailymotion",      "md5":  "392c4b85a60a90dc4792da41ce3144eb",      "url":  "http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech", @@ -660,5 +686,14 @@          "upload_date": "20130624",          "uploader": "Hurts"      } +  }, +  { +    "name": "Tudou", +    "url": "http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html", +    "file": "159447792.f4v", +    "md5": "ad7c358a01541e926a1e413612c6b10a", +    "info_dict": { +        "title": "卡马乔国足开大脚长传冲吊集锦" +    }    }  ] diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 70e7d53de..cdc4a6962 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -25,6 +25,7 @@ __authors__  = (      'M. Yasoob Ullah Khalid',      'Julien Fraichard',      'Johny Mo Swag', +    'Axel Noack',      )  __license__ = 'Public Domain' diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 9878ad942..0ea990860 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -4,8 +4,8 @@ from .arte import ArteTvIE  from .bandcamp import BandcampIE  from .bliptv import BlipTVIE, BlipTVUserIE  from .breakcom import BreakIE -from .comedycentral import ComedyCentralIE  from .collegehumor import CollegeHumorIE +from .comedycentral import ComedyCentralIE  from .dailymotion import DailymotionIE  from .depositfiles import DepositFilesIE  from .eighttracks import EightTracksIE @@ -21,6 +21,7 @@ from .howcast import HowcastIE  from .hypem import HypemIE  from .ina import InaIE  from .infoq import InfoQIE +from .jukebox import JukeboxIE  from .justintv import JustinTVIE  from .keek import KeekIE  from .liveleak import LiveLeakIE @@ -30,7 +31,6 @@ from .mtv import MTVIE  from .myspass import MySpassIE  from .myvideo import MyVideoIE  from .nba import NBAIE -from .statigram import StatigramIE  from .photobucket import PhotobucketIE  from .pornotube import PornotubeIE  from .rbmaradio import RBMARadioIE @@ -38,9 +38,11 @@ from .redtube import RedTubeIE  from .soundcloud import SoundcloudIE, SoundcloudSetIE  from .spiegel import SpiegelIE  from .stanfordoc import StanfordOpenClassroomIE +from .statigram import StatigramIE  from .steam import SteamIE  from .teamcoco import TeamcocoIE  from .ted import TEDIE +from .tudou import TudouIE  from .tumblr import TumblrIE  from .ustream import UstreamIE  from .vbox7 import Vbox7IE @@ -48,8 +50,8 @@ from .vevo import VevoIE  from .vimeo import VimeoIE  from .vine import VineIE  from .worldstarhiphop import WorldStarHipHopIE -from .xnxx import XNXXIE  from .xhamster import XHamsterIE +from .xnxx import XNXXIE  from .xvideos import XVideosIE  from .yahoo import YahooIE, YahooSearchIE  from .youjizz import YouJizzIE @@ -58,6 +60,7 @@ from .youporn import YouPornIE  from .youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE  from .zdf import ZDFIE +  def gen_extractors():      """ Return a list of an instance of every supported extractor.      The order does matter; the first extractor matched is the one handling the URL. @@ -127,6 +130,8 @@ def gen_extractors():          StatigramIE(),          BreakIE(),          VevoIE(), +        JukeboxIE(), +        TudouIE(),          GenericIE()      ] diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 062f4cf1e..64d63e109 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -211,7 +211,7 @@ class InfoExtractor(object):              raise ExtractorError(u'Unable to extract %s' % _name)          else:              self._downloader.report_warning(u'unable to extract %s; ' -                u'please report this issue on GitHub.' % _name) +                u'please report this issue on http://yt-dl.org/bug' % _name)              return None      def _html_search_regex(self, pattern, string, name, default=None, fatal=True, flags=0): diff --git a/youtube_dl/extractor/googleplus.py b/youtube_dl/extractor/googleplus.py index e922bd140..ff2cdeebb 100644 --- a/youtube_dl/extractor/googleplus.py +++ b/youtube_dl/extractor/googleplus.py @@ -46,14 +46,18 @@ class GooglePlusIE(InfoExtractor):          video_title = self._html_search_regex(r'<meta name\=\"Description\" content\=\"(.*?)[\n<"]',              webpage, 'title', default=u'NA') -        # Step 2, Stimulate clicking the image box to launch video -        video_page = self._search_regex('"(https\://plus\.google\.com/photos/.*?)",,"image/jpeg","video"\]', +        # Step 2, Simulate clicking the image box to launch video +        DOMAIN = 'https://plus.google.com' +        video_page = self._search_regex(r'<a href="((?:%s)?/photos/.*?)"' % re.escape(DOMAIN),              webpage, u'video page URL') +        if not video_page.startswith(DOMAIN): +            video_page = DOMAIN + video_page +          webpage = self._download_webpage(video_page, video_id, u'Downloading video page')          # Extract video links on video page          """Extract video links of all sizes""" -        pattern = '\d+,\d+,(\d+),"(http\://redirector\.googlevideo\.com.*?)"' +        pattern = r'\d+,\d+,(\d+),"(http\://redirector\.googlevideo\.com.*?)"'          mobj = re.findall(pattern, webpage)          if len(mobj) == 0:              raise ExtractorError(u'Unable to extract video links') diff --git a/youtube_dl/extractor/jukebox.py b/youtube_dl/extractor/jukebox.py new file mode 100644 index 000000000..c7bb234fe --- /dev/null +++ b/youtube_dl/extractor/jukebox.py @@ -0,0 +1,56 @@ +# coding: utf-8 +import re + +from .common import InfoExtractor +from ..utils import ( +    ExtractorError, +    unescapeHTML, +) + +class JukeboxIE(InfoExtractor): +    _VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<video_id>[a-z0-9\-]+).html' +    _IFRAME = r'<iframe .*src="(?P<iframe>[^"]*)".*>' +    _VIDEO_URL = r'"config":{"file":"(?P<video_url>http:[^"]+[.](?P<video_ext>[^.?]+)[?]mdtk=[0-9]+)"' +    _TITLE = r'<h1 class="inline">(?P<title>[^<]+)</h1>.*<span id="infos_article_artist">(?P<artist>[^<]+)</span>' +    _IS_YOUTUBE = r'config":{"file":"(?P<youtube_url>http:[\\][/][\\][/]www[.]youtube[.]com[\\][/]watch[?]v=[^"]+)"' + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('video_id') + +        html = self._download_webpage(url, video_id) + +        mobj = re.search(self._IFRAME, html) +        if mobj is None: +            raise ExtractorError(u'Cannot extract iframe url') +        iframe_url = unescapeHTML(mobj.group('iframe')) + +        iframe_html = self._download_webpage(iframe_url, video_id, 'Downloading iframe') +        mobj = re.search(r'class="jkb_waiting"', iframe_html) +        if mobj is not None: +            raise ExtractorError(u'Video is not available(in your country?)!') + +        self.report_extraction(video_id) + +        mobj = re.search(self._VIDEO_URL, iframe_html) +        if mobj is None: +            mobj = re.search(self._IS_YOUTUBE, iframe_html) +            if mobj is None: +                raise ExtractorError(u'Cannot extract video url') +            youtube_url = unescapeHTML(mobj.group('youtube_url')).replace('\/','/') +            self.to_screen(u'Youtube video detected') +            return self.url_result(youtube_url,ie='Youtube') +        video_url = unescapeHTML(mobj.group('video_url')).replace('\/','/') +        video_ext = unescapeHTML(mobj.group('video_ext')) + +        mobj = re.search(self._TITLE, html) +        if mobj is None: +            raise ExtractorError(u'Cannot extract title') +        title = unescapeHTML(mobj.group('title')) +        artist = unescapeHTML(mobj.group('artist')) + +        return [{'id': video_id, +                 'url': video_url, +                 'title': artist + '-' + title, +                 'ext': video_ext +                 }] diff --git a/youtube_dl/extractor/tudou.py b/youtube_dl/extractor/tudou.py new file mode 100644 index 000000000..9ca860ab0 --- /dev/null +++ b/youtube_dl/extractor/tudou.py @@ -0,0 +1,32 @@ +import re + +from .common import InfoExtractor + + +class TudouIE(InfoExtractor): +    _VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs)/(?:view|(.+?))/(?:([^/]+)|([^/]+)\.html)' + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group(2).replace('.html','') +        webpage = self._download_webpage(url, video_id) +        video_id = re.search('"k":(.+?),',webpage).group(1) +        title = re.search(",kw:\"(.+)\"",webpage) +        if title is None: +            title = re.search(",kw: \'(.+)\'",webpage) +        title = title.group(1) +        thumbnail_url = re.search(",pic: \'(.+?)\'",webpage) +        if thumbnail_url is None: +            thumbnail_url = re.search(",pic:\"(.+?)\"",webpage) +        thumbnail_url = thumbnail_url.group(1) +        info_url = "http://v2.tudou.com/f?id="+str(video_id) +        webpage = self._download_webpage(info_url, video_id, "Opening the info webpage") +        final_url = re.search('\>(.+?)\<\/f\>',webpage).group(1) +        ext = (final_url.split('?')[0]).split('.')[-1] +        return [{ +            'id':        video_id, +            'url':       final_url, +            'ext':       ext, +            'title':     title, +            'thumbnail': thumbnail_url, +        }] diff --git a/youtube_dl/extractor/worldstarhiphop.py b/youtube_dl/extractor/worldstarhiphop.py index f628e4fb1..531d0889f 100644 --- a/youtube_dl/extractor/worldstarhiphop.py +++ b/youtube_dl/extractor/worldstarhiphop.py @@ -18,7 +18,7 @@ class WorldStarHipHopIE(InfoExtractor):          if 'youtube' in video_url:              self.to_screen(u'Youtube video detected:') -            return self.url_result('%s' % video_url, ie='Youtube') +            return self.url_result(video_url, ie='Youtube')          if 'mp4' in video_url:              ext = 'mp4' diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index b34c1a7b9..de653cb3d 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -129,12 +129,13 @@ class YoutubeIE(InfoExtractor):          """Indicate the download will use the RTMP protocol."""          self.to_screen(u'RTMP download detected') -    @staticmethod -    def _decrypt_signature(s): +    def _decrypt_signature(self, s):          """Decrypt the key the two subkeys must have a length of 43"""          (a,b) = s.split('.')          if len(a) != 43 or len(b) != 43: -            raise ExtractorError(u'Unable to decrypt signature, subkeys lengths not valid') +            raise ExtractorError(u'Unable to decrypt signature, subkeys lengths %d.%d not supported; retrying might work' % (len(a), len(b))) +        if self._downloader.params.get('verbose'): +            self.to_screen('encrypted signature length %d.%d' % (len(a), len(b)))          b = ''.join([b[:8],a[0],b[9:18],b[-4],b[19:39], b[18]])[0:40]          a = a[-40:]          s_dec = '.'.join((a,b))[::-1] @@ -484,6 +485,8 @@ class YoutubeIE(InfoExtractor):          try:              mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage) +            if not mobj: +                raise ValueError('Could not find vevo ID')              info = json.loads(mobj.group(1))              args = info['args']              # Easy way to know if the 's' value is in url_encoded_fmt_stream_map diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 66ae41e31..4d415bd61 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -474,7 +474,7 @@ class ExtractorError(Exception):          """ tb, if given, is the original traceback (so that it can be printed out). """          if not sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError): -            msg = msg + u'; please report this issue on GitHub.' +            msg = msg + u'; please report this issue on http://yt-dl.org/bug'          super(ExtractorError, self).__init__(msg)          self.traceback = tb diff --git a/youtube_dl/version.py b/youtube_dl/version.py index ba5d5b919..15003cccd 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.06.29' +__version__ = '2013.06.32' | 
