diff options
-rw-r--r-- | test/test_download.py | 8 | ||||
-rw-r--r-- | test/test_playlists.py | 9 | ||||
-rw-r--r-- | youtube_dl/FileDownloader.py | 33 | ||||
-rw-r--r-- | youtube_dl/YoutubeDL.py | 2 | ||||
-rw-r--r-- | youtube_dl/__init__.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/__init__.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/arte.py | 33 | ||||
-rw-r--r-- | youtube_dl/extractor/brightcove.py | 57 | ||||
-rw-r--r-- | youtube_dl/extractor/cnn.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/eitb.py | 37 | ||||
-rw-r--r-- | youtube_dl/extractor/generic.py | 24 | ||||
-rw-r--r-- | youtube_dl/extractor/kankan.py | 7 | ||||
-rw-r--r-- | youtube_dl/extractor/mtv.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/slashdot.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/soundcloud.py | 130 | ||||
-rw-r--r-- | youtube_dl/extractor/space.py | 35 | ||||
-rw-r--r-- | youtube_dl/extractor/weibo.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/xnxx.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/youtube.py | 31 | ||||
-rw-r--r-- | youtube_dl/version.py | 2 |
20 files changed, 278 insertions, 141 deletions
diff --git a/test/test_download.py b/test/test_download.py index 73379beb1..16f200809 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -31,6 +31,7 @@ from youtube_dl.utils import ( ExtractorError, UnavailableVideoError, ) +from youtube_dl.extractor import get_info_extractor RETRIES = 3 @@ -63,9 +64,10 @@ def generator(test_case): def test_template(self): ie = youtube_dl.extractor.get_info_extractor(test_case['name']) + other_ies = [get_info_extractor(ie_key) for ie_key in test_case.get('add_ie', [])] def print_skipping(reason): print('Skipping %s: %s' % (test_case['name'], reason)) - if not ie._WORKING: + if not ie.working(): print_skipping('IE marked as not _WORKING') return if 'playlist' not in test_case: @@ -77,6 +79,10 @@ def generator(test_case): if 'skip' in test_case: print_skipping(test_case['skip']) return + for other_ie in other_ies: + if not other_ie.working(): + print_skipping(u'test depends on %sIE, marked as not WORKING' % other_ie.ie_key()) + return params = get_params(test_case.get('params', {})) diff --git a/test/test_playlists.py b/test/test_playlists.py index de1e8d88e..706b6bdca 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -17,6 +17,7 @@ from youtube_dl.extractor import ( DailymotionUserIE, VimeoChannelIE, UstreamChannelIE, + SoundcloudSetIE, SoundcloudUserIE, LivestreamIE, NHLVideocenterIE, @@ -61,6 +62,14 @@ class TestPlaylists(unittest.TestCase): self.assertEqual(result['id'], u'5124905') self.assertTrue(len(result['entries']) >= 11) + def test_soundcloud_set(self): + dl = FakeYDL() + ie = SoundcloudSetIE(dl) + result = ie.extract('https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep') + self.assertIsPlaylist(result) + self.assertEqual(result['title'], u'The Royal Concept EP') + self.assertTrue(len(result['entries']) >= 6) + def test_soundcloud_user(self): dl = FakeYDL() ie = SoundcloudUserIE(dl) diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 8ecabab1a..35fa3ca61 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -4,12 +4,19 @@ import re import subprocess import sys import time -import traceback if os.name == 'nt': import ctypes -from .utils import * +from .utils import ( + compat_urllib_error, + compat_urllib_request, + ContentTooShortError, + determine_ext, + encodeFilename, + sanitize_open, + timeconvert, +) class FileDownloader(object): @@ -194,7 +201,7 @@ class FileDownloader(object): if old_filename == new_filename: return os.rename(encodeFilename(old_filename), encodeFilename(new_filename)) - except (IOError, OSError) as err: + except (IOError, OSError): self.report_error(u'unable to rename file') def try_utime(self, filename, last_modified_hdr): @@ -227,8 +234,14 @@ class FileDownloader(object): if self.params.get('noprogress', False): return clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'') - eta_str = self.format_eta(eta) - percent_str = self.format_percent(percent) + if eta is not None: + eta_str = self.format_eta(eta) + else: + eta_str = 'Unknown ETA' + if percent is not None: + percent_str = self.format_percent(percent) + else: + percent_str = 'Unknown %' speed_str = self.format_speed(speed) if self.params.get('progress_with_newline', False): self.to_screen(u'[download] %s of %s at %s ETA %s' % @@ -251,7 +264,7 @@ class FileDownloader(object): """Report file has already been fully downloaded.""" try: self.to_screen(u'[download] %s has already been downloaded' % file_name) - except (UnicodeEncodeError) as err: + except UnicodeEncodeError: self.to_screen(u'[download] The file has already been downloaded') def report_unable_to_resume(self): @@ -366,7 +379,8 @@ class FileDownloader(object): self.report_destination(filename) tmpfilename = self.temp_name(filename) - args = ['ffmpeg', '-y', '-i', url, '-f', 'mp4', tmpfilename] + args = ['ffmpeg', '-y', '-i', url, '-f', 'mp4', '-c', 'copy', + '-absf', 'aac_adtstoasc', tmpfilename] # Check for ffmpeg first try: subprocess.call(['ffmpeg', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT) @@ -550,12 +564,11 @@ class FileDownloader(object): # Progress message speed = self.calc_speed(start, time.time(), byte_counter - resume_len) if data_len is None: - self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA') - eta = None + eta = percent = None else: percent = self.calc_percent(byte_counter, data_len) eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len) - self.report_progress(percent, data_len_str, speed, eta) + self.report_progress(percent, data_len_str, speed, eta) self._hook_progress({ 'downloaded_bytes': byte_counter, diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 86a6fd043..5253c39e1 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -782,7 +782,7 @@ class YoutubeDL(object): def list_formats(self, info_dict): def line(format): - return (u'%-15s%-10s%-12s%s' % ( + return (u'%-20s%-10s%-12s%s' % ( format['format_id'], format['ext'], self.format_resolution(format), diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 48ffcbf8e..ab7879c5d 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -349,7 +349,7 @@ def parseOpts(overrideArguments=None): 'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .')) filesystem.add_option('--autonumber-size', dest='autonumber_size', metavar='NUMBER', - help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --autonumber option is given') + help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given') filesystem.add_option('--restrict-filenames', action='store_true', dest='restrictfilenames', help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 888a91cce..f9caca4ef 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -38,6 +38,7 @@ from .defense import DefenseGouvFrIE from .ebaumsworld import EbaumsWorldIE from .ehow import EHowIE from .eighttracks import EightTracksIE +from .eitb import EitbIE from .escapist import EscapistIE from .exfm import ExfmIE from .extremetube import ExtremeTubeIE @@ -115,6 +116,7 @@ from .slideshare import SlideshareIE from .sohu import SohuIE from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE from .southparkstudios import SouthParkStudiosIE +from .space import SpaceIE from .spankwire import SpankwireIE from .spiegel import SpiegelIE from .stanfordoc import StanfordOpenClassroomIE diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index e10c74c11..b35a679e3 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -10,6 +10,7 @@ from ..utils import ( unified_strdate, determine_ext, get_element_by_id, + compat_str, ) # There are different sources of video in arte.tv, the extraction process @@ -181,20 +182,30 @@ class ArteTVPlus7IE(InfoExtractor): formats = all_formats else: raise ExtractorError(u'The formats list is empty') - # We order the formats by quality + if re.match(r'[A-Z]Q', formats[0]['quality']) is not None: - sort_key = lambda f: ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality']) + def sort_key(f): + return ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality']) else: - sort_key = lambda f: int(f.get('height',-1)) + def sort_key(f): + return ( + # Sort first by quality + int(f.get('height',-1)), + int(f.get('bitrate',-1)), + # The original version with subtitles has lower relevance + re.match(r'VO-ST(F|A)', f.get('versionCode', '')) is None, + # The version with sourds/mal subtitles has also lower relevance + re.match(r'VO?(F|A)-STM\1', f.get('versionCode', '')) is None, + ) formats = sorted(formats, key=sort_key) - # Prefer videos without subtitles in the same language - formats = sorted(formats, key=lambda f: re.match(r'VO(F|A)-STM\1', f.get('versionCode', '')) is None) - # Pick the best quality def _format(format_info): - quality = format_info['quality'] - m_quality = re.match(r'\w*? - (\d*)p', quality) - if m_quality is not None: - quality = m_quality.group(1) + quality = '' + height = format_info.get('height') + if height is not None: + quality = compat_str(height) + bitrate = format_info.get('bitrate') + if bitrate is not None: + quality += '-%d' % bitrate if format_info.get('versionCode') is not None: format_id = u'%s-%s' % (quality, format_info['versionCode']) else: @@ -203,7 +214,7 @@ class ArteTVPlus7IE(InfoExtractor): 'format_id': format_id, 'format_note': format_info.get('versionLibelle'), 'width': format_info.get('width'), - 'height': format_info.get('height'), + 'height': height, } if format_info['mediaType'] == u'rtmp': info['url'] = format_info['streamer'] diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 0d9b87a34..d8c35465a 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -9,10 +9,13 @@ from ..utils import ( compat_urllib_parse, find_xpath_attr, compat_urlparse, + compat_str, + compat_urllib_request, ExtractorError, ) + class BrightcoveIE(InfoExtractor): _VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)' _FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s' @@ -41,6 +44,17 @@ class BrightcoveIE(InfoExtractor): u'uploader': u'Oracle', }, }, + { + # From http://mashable.com/2013/10/26/thermoelectric-bracelet-lets-you-control-your-body-temperature/ + u'url': u'http://c.brightcove.com/services/viewer/federated_f9?&playerID=1265504713001&publisherID=AQ%7E%7E%2CAAABBzUwv1E%7E%2CxP-xFHVUstiMFlNYfvF4G9yFnNaqCw_9&videoID=2750934548001', + u'info_dict': { + u'id': u'2750934548001', + u'ext': u'mp4', + u'title': u'This Bracelet Acts as a Personal Thermostat', + u'description': u'md5:547b78c64f4112766ccf4e151c20b6a0', + u'uploader': u'Mashable', + }, + }, ] @classmethod @@ -68,24 +82,48 @@ class BrightcoveIE(InfoExtractor): videoPlayer = find_xpath_attr(object_doc, './param', 'name', '@videoPlayer') if videoPlayer is not None: params['@videoPlayer'] = videoPlayer.attrib['value'] + linkBase = find_xpath_attr(object_doc, './param', 'name', 'linkBaseURL') + if linkBase is not None: + params['linkBaseURL'] = linkBase.attrib['value'] data = compat_urllib_parse.urlencode(params) return cls._FEDERATED_URL_TEMPLATE % data + @classmethod + def _extract_brightcove_url(cls, webpage): + """Try to extract the brightcove url from the wepbage, returns None + if it can't be found + """ + m_brightcove = re.search( + r'<object[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1.+?</object>', + webpage, re.DOTALL) + if m_brightcove is not None: + return cls._build_brighcove_url(m_brightcove.group()) + else: + return None + def _real_extract(self, url): + # Change the 'videoId' and others field to '@videoPlayer' + url = re.sub(r'(?<=[?&])(videoI(d|D)|bctid)', '%40videoPlayer', url) + # Change bckey (used by bcove.me urls) to playerKey + url = re.sub(r'(?<=[?&])bckey', 'playerKey', url) mobj = re.match(self._VALID_URL, url) query_str = mobj.group('query') query = compat_urlparse.parse_qs(query_str) videoPlayer = query.get('@videoPlayer') if videoPlayer: - return self._get_video_info(videoPlayer[0], query_str) + return self._get_video_info(videoPlayer[0], query_str, query) else: player_key = query['playerKey'] return self._get_playlist_info(player_key[0]) - def _get_video_info(self, video_id, query): - request_url = self._FEDERATED_URL_TEMPLATE % query - webpage = self._download_webpage(request_url, video_id) + def _get_video_info(self, video_id, query_str, query): + request_url = self._FEDERATED_URL_TEMPLATE % query_str + req = compat_urllib_request.Request(request_url) + linkBase = query.get('linkBaseURL') + if linkBase is not None: + req.add_header('Referer', linkBase[0]) + webpage = self._download_webpage(req, video_id) self.report_extraction(video_id) info = self._search_regex(r'var experienceJSON = ({.*?});', webpage, 'json') @@ -109,7 +147,7 @@ class BrightcoveIE(InfoExtractor): def _extract_video_info(self, video_info): info = { - 'id': video_info['id'], + 'id': compat_str(video_info['id']), 'title': video_info['displayName'], 'description': video_info.get('shortDescription'), 'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'), @@ -119,10 +157,11 @@ class BrightcoveIE(InfoExtractor): renditions = video_info.get('renditions') if renditions: renditions = sorted(renditions, key=lambda r: r['size']) - best_format = renditions[-1] - info.update({ - 'url': best_format['defaultURL'], - }) + info['formats'] = [{ + 'url': rend['defaultURL'], + 'height': rend.get('frameHeight'), + 'width': rend.get('frameWidth'), + } for rend in renditions] elif video_info.get('FLVFullLengthURL') is not None: info.update({ 'url': video_info['FLVFullLengthURL'], diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py index a79f881cd..34adf6dda 100644 --- a/youtube_dl/extractor/cnn.py +++ b/youtube_dl/extractor/cnn.py @@ -6,7 +6,7 @@ from ..utils import determine_ext class CNNIE(InfoExtractor): - _VALID_URL = r'''(?x)https?://(edition\.)?cnn\.com/video/(data/.+?|\?)/ + _VALID_URL = r'''(?x)https?://((edition|www)\.)?cnn\.com/video/(data/.+?|\?)/ (?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn|(?=&)))''' _TESTS = [{ diff --git a/youtube_dl/extractor/eitb.py b/youtube_dl/extractor/eitb.py new file mode 100644 index 000000000..4ba323148 --- /dev/null +++ b/youtube_dl/extractor/eitb.py @@ -0,0 +1,37 @@ +# encoding: utf-8 +import re + +from .common import InfoExtractor +from .brightcove import BrightcoveIE +from ..utils import ExtractorError + + +class EitbIE(InfoExtractor): + IE_NAME = u'eitb.tv' + _VALID_URL = r'https?://www\.eitb\.tv/(eu/bideoa|es/video)/[^/]+/(?P<playlist_id>\d+)/(?P<chapter_id>\d+)' + + _TEST = { + u'add_ie': ['Brightcove'], + u'url': u'http://www.eitb.tv/es/video/60-minutos-60-minutos-2013-2014/2677100210001/2743577154001/lasa-y-zabala-30-anos/', + u'md5': u'edf4436247185adee3ea18ce64c47998', + u'info_dict': { + u'id': u'2743577154001', + u'ext': u'mp4', + u'title': u'60 minutos (Lasa y Zabala, 30 años)', + # All videos from eitb has this description in the brightcove info + u'description': u'.', + u'uploader': u'Euskal Telebista', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + chapter_id = mobj.group('chapter_id') + webpage = self._download_webpage(url, chapter_id) + bc_url = BrightcoveIE._extract_brightcove_url(webpage) + if bc_url is None: + raise ExtractorError(u'Could not extract the Brightcove url') + # The BrightcoveExperience object doesn't contain the video id, we set + # it manually + bc_url += '&%40videoPlayer={0}'.format(chapter_id) + return self.url_result(bc_url, BrightcoveIE.ie_key()) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index b3fec8e86..c7552fddb 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -33,6 +33,7 @@ class GenericIE(InfoExtractor): }, # embedded vimeo video { + u'add_ie': ['Vimeo'], u'url': u'http://skillsmatter.com/podcast/home/move-semanticsperfect-forwarding-and-rvalue-references', u'file': u'22444065.mp4', u'md5': u'2903896e23df39722c33f015af0666e2', @@ -44,6 +45,7 @@ class GenericIE(InfoExtractor): }, # bandcamp page with custom domain { + u'add_ie': ['Bandcamp'], u'url': u'http://bronyrock.com/track/the-pony-mash', u'file': u'3235767654.mp3', u'info_dict': { @@ -52,6 +54,23 @@ class GenericIE(InfoExtractor): }, u'skip': u'There is a limit of 200 free downloads / month for the test song', }, + # embedded brightcove video + # it also tests brightcove videos that need to set the 'Referer' in the + # http requests + { + u'add_ie': ['Brightcove'], + u'url': u'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/', + u'info_dict': { + u'id': u'2765128793001', + u'ext': u'mp4', + u'title': u'Le cours de bourse : l’analyse technique', + u'description': u'md5:7e9ad046e968cb2d1114004aba466fd9', + u'uploader': u'BFM BUSINESS', + }, + u'params': { + u'skip_download': True, + }, + }, ] def report_download_webpage(self, video_id): @@ -144,10 +163,9 @@ class GenericIE(InfoExtractor): self.report_extraction(video_id) # Look for BrightCove: - m_brightcove = re.search(r'<object[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL) - if m_brightcove is not None: + bc_url = BrightcoveIE._extract_brightcove_url(webpage) + if bc_url is not None: self.to_screen(u'Brightcove video detected.') - bc_url = BrightcoveIE._build_brighcove_url(m_brightcove.group()) return self.url_result(bc_url, 'Brightcove') # Look for embedded Vimeo player diff --git a/youtube_dl/extractor/kankan.py b/youtube_dl/extractor/kankan.py index 445d46501..50916f4a6 100644 --- a/youtube_dl/extractor/kankan.py +++ b/youtube_dl/extractor/kankan.py @@ -1,8 +1,10 @@ import re +import hashlib from .common import InfoExtractor from ..utils import determine_ext +_md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() class KankanIE(InfoExtractor): _VALID_URL = r'https?://(?:.*?\.)?kankan\.com/.+?/(?P<id>\d+)\.shtml' @@ -30,7 +32,10 @@ class KankanIE(InfoExtractor): video_id, u'Downloading video url info') ip = self._search_regex(r'ip:"(.+?)"', video_info_page, u'video url ip') path = self._search_regex(r'path:"(.+?)"', video_info_page, u'video url path') - video_url = 'http://%s%s' % (ip, path) + param1 = self._search_regex(r'param1:(\d+)', video_info_page, u'param1') + param2 = self._search_regex(r'param2:(\d+)', video_info_page, u'param2') + key = _md5('xl_mp43651' + param1 + param2) + video_url = 'http://%s%s?key=%s&key1=%s' % (ip, path, key, param2) return {'id': video_id, 'title': title, diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index e96d3952c..24a79ae13 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -26,6 +26,7 @@ class MTVIE(InfoExtractor): }, }, { + u'add_ie': ['Vevo'], u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml', u'file': u'USCJY1331283.mp4', u'md5': u'73b4e7fcadd88929292fe52c3ced8caf', diff --git a/youtube_dl/extractor/slashdot.py b/youtube_dl/extractor/slashdot.py index 2cba53076..f5003c7f9 100644 --- a/youtube_dl/extractor/slashdot.py +++ b/youtube_dl/extractor/slashdot.py @@ -7,6 +7,7 @@ class SlashdotIE(InfoExtractor): _VALID_URL = r'https?://tv.slashdot.org/video/\?embed=(?P<id>.*?)(&|$)' _TEST = { + u'add_ie': ['Ooyala'], u'url': u'http://tv.slashdot.org/video/?embed=JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz', u'file': u'JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz.mp4', u'md5': u'd2222e7a4a4c1541b3e0cf732fb26735', diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 29cd5617c..4717fbb77 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -29,17 +29,34 @@ class SoundcloudIE(InfoExtractor): ) ''' IE_NAME = u'soundcloud' - _TEST = { - u'url': u'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy', - u'file': u'62986583.mp3', - u'md5': u'ebef0a451b909710ed1d7787dddbf0d7', - u'info_dict': { - u"upload_date": u"20121011", - u"description": u"No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd", - u"uploader": u"E.T. ExTerrestrial Music", - u"title": u"Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1" - } - } + _TESTS = [ + { + u'url': u'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy', + u'file': u'62986583.mp3', + u'md5': u'ebef0a451b909710ed1d7787dddbf0d7', + u'info_dict': { + u"upload_date": u"20121011", + u"description": u"No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd", + u"uploader": u"E.T. ExTerrestrial Music", + u"title": u"Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1" + } + }, + # not streamable song + { + u'url': u'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep', + u'info_dict': { + u'id': u'47127627', + u'ext': u'mp3', + u'title': u'Goldrushed', + u'uploader': u'The Royal Concept', + u'upload_date': u'20120521', + }, + u'params': { + # rtmp + u'skip_download': True, + }, + }, + ] _CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28' @@ -56,16 +73,16 @@ class SoundcloudIE(InfoExtractor): return 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID def _extract_info_dict(self, info, full_title=None, quiet=False): - video_id = info['id'] - name = full_title or video_id + track_id = compat_str(info['id']) + name = full_title or track_id if quiet == False: self.report_extraction(name) thumbnail = info['artwork_url'] if thumbnail is not None: thumbnail = thumbnail.replace('-large', '-t500x500') - return { - 'id': info['id'], + result = { + 'id': track_id, 'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID, 'uploader': info['user']['username'], 'upload_date': unified_strdate(info['created_at']), @@ -74,6 +91,21 @@ class SoundcloudIE(InfoExtractor): 'description': info['description'], 'thumbnail': thumbnail, } + if info.get('downloadable', False): + result['url'] = 'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(track_id, self._CLIENT_ID) + if not info.get('streamable', False): + # We have to get the rtmp url + stream_json = self._download_webpage( + 'http://api.soundcloud.com/i1/tracks/{0}/streams?client_id={1}'.format(track_id, self._CLIENT_ID), + track_id, u'Downloading track url') + rtmp_url = json.loads(stream_json)['rtmp_mp3_128_url'] + # The url doesn't have an rtmp app, we have to extract the playpath + url, path = rtmp_url.split('mp3:', 1) + result.update({ + 'url': url, + 'play_path': 'mp3:' + path, + }) + return result def _real_extract(self, url): mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE) @@ -106,70 +138,8 @@ class SoundcloudIE(InfoExtractor): class SoundcloudSetIE(SoundcloudIE): _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$' IE_NAME = u'soundcloud:set' - _TEST = { - u"url":"https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep", - u"playlist": [ - { - u"file":"30510138.mp3", - u"md5":"f9136bf103901728f29e419d2c70f55d", - u"info_dict": { - u"upload_date": u"20111213", - u"description": u"The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com", - u"uploader": u"The Royal Concept", - u"title": u"D-D-Dance" - } - }, - { - u"file":"47127625.mp3", - u"md5":"09b6758a018470570f8fd423c9453dd8", - u"info_dict": { - u"upload_date": u"20120521", - u"description": u"The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com", - u"uploader": u"The Royal Concept", - u"title": u"The Royal Concept - Gimme Twice" - } - }, - { - u"file":"47127627.mp3", - u"md5":"154abd4e418cea19c3b901f1e1306d9c", - u"info_dict": { - u"upload_date": u"20120521", - u"uploader": u"The Royal Concept", - u"title": u"Goldrushed" - } - }, - { - u"file":"47127629.mp3", - u"md5":"2f5471edc79ad3f33a683153e96a79c1", - u"info_dict": { - u"upload_date": u"20120521", - u"description": u"The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com", - u"uploader": u"The Royal Concept", - u"title": u"In the End" - } - }, - { - u"file":"47127631.mp3", - u"md5":"f9ba87aa940af7213f98949254f1c6e2", - u"info_dict": { - u"upload_date": u"20120521", - u"description": u"The Royal Concept from Stockholm\r\nFilip / David / Povel / Magnus\r\nwww.theroyalconceptband.com", - u"uploader": u"The Royal Concept", - u"title": u"Knocked Up" - } - }, - { - u"file":"75206121.mp3", - u"md5":"f9d1fe9406717e302980c30de4af9353", - u"info_dict": { - u"upload_date": u"20130116", - u"description": u"The unreleased track World on Fire premiered on the CW's hit show Arrow (8pm/7pm central). \r\nAs a gift to our fans we would like to offer you a free download of the track! ", - u"uploader": u"The Royal Concept", - u"title": u"World On Fire" - } - } - ] - } + # it's in tests/test_playlists.py + _TESTS = [] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -208,7 +178,7 @@ class SoundcloudUserIE(SoundcloudIE): IE_NAME = u'soundcloud:user' # it's in tests/test_playlists.py - _TEST = None + _TESTS = [] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/space.py b/youtube_dl/extractor/space.py new file mode 100644 index 000000000..0d32a0688 --- /dev/null +++ b/youtube_dl/extractor/space.py @@ -0,0 +1,35 @@ +import re + +from .common import InfoExtractor +from .brightcove import BrightcoveIE +from ..utils import RegexNotFoundError, ExtractorError + + +class SpaceIE(InfoExtractor): + _VALID_URL = r'https?://www\.space\.com/\d+-(?P<title>[^/\.\?]*?)-video.html' + _TEST = { + u'add_ie': ['Brightcove'], + u'url': u'http://www.space.com/23373-huge-martian-landforms-detail-revealed-by-european-probe-video.html', + u'info_dict': { + u'id': u'2780937028001', + u'ext': u'mp4', + u'title': u'Huge Martian Landforms\' Detail Revealed By European Probe | Video', + u'description': u'md5:db81cf7f3122f95ed234b631a6ea1e61', + u'uploader': u'TechMedia Networks', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + title = mobj.group('title') + webpage = self._download_webpage(url, title) + try: + # Some videos require the playerKey field, which isn't define in + # the BrightcoveExperience object + brightcove_url = self._og_search_video_url(webpage) + except RegexNotFoundError: + # Other videos works fine with the info from the object + brightcove_url = BrightcoveIE._extract_brightcove_url(webpage) + if brightcove_url is None: + raise ExtractorError(u'The webpage does not contain a video', expected=True) + return self.url_result(brightcove_url, BrightcoveIE.ie_key()) diff --git a/youtube_dl/extractor/weibo.py b/youtube_dl/extractor/weibo.py index 0757495bd..fa784ab99 100644 --- a/youtube_dl/extractor/weibo.py +++ b/youtube_dl/extractor/weibo.py @@ -13,6 +13,7 @@ class WeiboIE(InfoExtractor): _VALID_URL = r'https?://video\.weibo\.com/v/weishipin/t_(?P<id>.+?)\.htm' _TEST = { + u'add_ie': ['Sina'], u'url': u'http://video.weibo.com/v/weishipin/t_zjUw2kZ.htm', u'file': u'98322879.flv', u'info_dict': { diff --git a/youtube_dl/extractor/xnxx.py b/youtube_dl/extractor/xnxx.py index 8a0eb1afd..1177a4b14 100644 --- a/youtube_dl/extractor/xnxx.py +++ b/youtube_dl/extractor/xnxx.py @@ -9,7 +9,7 @@ from ..utils import ( class XNXXIE(InfoExtractor): - _VALID_URL = r'^(?:https?://)?video\.xnxx\.com/video([0-9]+)/(.*)' + _VALID_URL = r'^(?:https?://)?(?:video|www)\.xnxx\.com/video([0-9]+)/(.*)' VIDEO_URL_RE = r'flv_url=(.*?)&' VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM' VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&' diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 74a381fe2..f745b8b14 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1572,7 +1572,6 @@ class YoutubePlaylistIE(InfoExtractor): class YoutubeChannelIE(InfoExtractor): IE_DESC = u'YouTube.com channels' _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)" - _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en' _MORE_PAGES_INDICATOR = 'yt-uix-load-more' _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s' IE_NAME = u'youtube:channel' @@ -1593,30 +1592,20 @@ class YoutubeChannelIE(InfoExtractor): # Download channel page channel_id = mobj.group(1) video_ids = [] - pagenum = 1 - url = self._TEMPLATE_URL % (channel_id, pagenum) - page = self._download_webpage(url, channel_id, - u'Downloading page #%s' % pagenum) + # Download all channel pages using the json-based channel_ajax query + for pagenum in itertools.count(1): + url = self._MORE_PAGES_URL % (pagenum, channel_id) + page = self._download_webpage(url, channel_id, + u'Downloading page #%s' % pagenum) - # Extract video identifiers - ids_in_page = self.extract_videos_from_page(page) - video_ids.extend(ids_in_page) + page = json.loads(page) - # Download any subsequent channel pages using the json-based channel_ajax query - if self._MORE_PAGES_INDICATOR in page: - for pagenum in itertools.count(1): - url = self._MORE_PAGES_URL % (pagenum, channel_id) - page = self._download_webpage(url, channel_id, - u'Downloading page #%s' % pagenum) - - page = json.loads(page) - - ids_in_page = self.extract_videos_from_page(page['content_html']) - video_ids.extend(ids_in_page) + ids_in_page = self.extract_videos_from_page(page['content_html']) + video_ids.extend(ids_in_page) - if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']: - break + if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']: + break self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids))) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index cc0f9cb4e..84bf0f35c 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.11.03' +__version__ = '2013.11.07' |