aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--test/test_download.py8
-rw-r--r--test/test_playlists.py9
-rw-r--r--youtube_dl/FileDownloader.py33
-rw-r--r--youtube_dl/YoutubeDL.py2
-rw-r--r--youtube_dl/__init__.py2
-rw-r--r--youtube_dl/extractor/__init__.py2
-rw-r--r--youtube_dl/extractor/arte.py33
-rw-r--r--youtube_dl/extractor/brightcove.py57
-rw-r--r--youtube_dl/extractor/cnn.py2
-rw-r--r--youtube_dl/extractor/eitb.py37
-rw-r--r--youtube_dl/extractor/generic.py24
-rw-r--r--youtube_dl/extractor/kankan.py7
-rw-r--r--youtube_dl/extractor/mtv.py1
-rw-r--r--youtube_dl/extractor/slashdot.py1
-rw-r--r--youtube_dl/extractor/soundcloud.py130
-rw-r--r--youtube_dl/extractor/space.py35
-rw-r--r--youtube_dl/extractor/weibo.py1
-rw-r--r--youtube_dl/extractor/xnxx.py2
-rw-r--r--youtube_dl/extractor/youtube.py31
-rw-r--r--youtube_dl/version.py2
20 files changed, 278 insertions, 141 deletions
diff --git a/test/test_download.py b/test/test_download.py
index 73379beb1..16f200809 100644
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -31,6 +31,7 @@ from youtube_dl.utils import (
ExtractorError,
UnavailableVideoError,
)
+from youtube_dl.extractor import get_info_extractor
RETRIES = 3
@@ -63,9 +64,10 @@ def generator(test_case):
def test_template(self):
ie = youtube_dl.extractor.get_info_extractor(test_case['name'])
+ other_ies = [get_info_extractor(ie_key) for ie_key in test_case.get('add_ie', [])]
def print_skipping(reason):
print('Skipping %s: %s' % (test_case['name'], reason))
- if not ie._WORKING:
+ if not ie.working():
print_skipping('IE marked as not _WORKING')
return
if 'playlist' not in test_case:
@@ -77,6 +79,10 @@ def generator(test_case):
if 'skip' in test_case:
print_skipping(test_case['skip'])
return
+ for other_ie in other_ies:
+ if not other_ie.working():
+ print_skipping(u'test depends on %sIE, marked as not WORKING' % other_ie.ie_key())
+ return
params = get_params(test_case.get('params', {}))
diff --git a/test/test_playlists.py b/test/test_playlists.py
index de1e8d88e..706b6bdca 100644
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@@ -17,6 +17,7 @@ from youtube_dl.extractor import (
DailymotionUserIE,
VimeoChannelIE,
UstreamChannelIE,
+ SoundcloudSetIE,
SoundcloudUserIE,
LivestreamIE,
NHLVideocenterIE,
@@ -61,6 +62,14 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['id'], u'5124905')
self.assertTrue(len(result['entries']) >= 11)
+ def test_soundcloud_set(self):
+ dl = FakeYDL()
+ ie = SoundcloudSetIE(dl)
+ result = ie.extract('https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep')
+ self.assertIsPlaylist(result)
+ self.assertEqual(result['title'], u'The Royal Concept EP')
+ self.assertTrue(len(result['entries']) >= 6)
+
def test_soundcloud_user(self):
dl = FakeYDL()
ie = SoundcloudUserIE(dl)
diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py
index 8ecabab1a..35fa3ca61 100644
--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@@ -4,12 +4,19 @@ import re
import subprocess
import sys
import time
-import traceback
if os.name == 'nt':
import ctypes
-from .utils import *
+from .utils import (
+ compat_urllib_error,
+ compat_urllib_request,
+ ContentTooShortError,
+ determine_ext,
+ encodeFilename,
+ sanitize_open,
+ timeconvert,
+)
class FileDownloader(object):
@@ -194,7 +201,7 @@ class FileDownloader(object):
if old_filename == new_filename:
return
os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
- except (IOError, OSError) as err:
+ except (IOError, OSError):
self.report_error(u'unable to rename file')
def try_utime(self, filename, last_modified_hdr):
@@ -227,8 +234,14 @@ class FileDownloader(object):
if self.params.get('noprogress', False):
return
clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
- eta_str = self.format_eta(eta)
- percent_str = self.format_percent(percent)
+ if eta is not None:
+ eta_str = self.format_eta(eta)
+ else:
+ eta_str = 'Unknown ETA'
+ if percent is not None:
+ percent_str = self.format_percent(percent)
+ else:
+ percent_str = 'Unknown %'
speed_str = self.format_speed(speed)
if self.params.get('progress_with_newline', False):
self.to_screen(u'[download] %s of %s at %s ETA %s' %
@@ -251,7 +264,7 @@ class FileDownloader(object):
"""Report file has already been fully downloaded."""
try:
self.to_screen(u'[download] %s has already been downloaded' % file_name)
- except (UnicodeEncodeError) as err:
+ except UnicodeEncodeError:
self.to_screen(u'[download] The file has already been downloaded')
def report_unable_to_resume(self):
@@ -366,7 +379,8 @@ class FileDownloader(object):
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
- args = ['ffmpeg', '-y', '-i', url, '-f', 'mp4', tmpfilename]
+ args = ['ffmpeg', '-y', '-i', url, '-f', 'mp4', '-c', 'copy',
+ '-absf', 'aac_adtstoasc', tmpfilename]
# Check for ffmpeg first
try:
subprocess.call(['ffmpeg', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
@@ -550,12 +564,11 @@ class FileDownloader(object):
# Progress message
speed = self.calc_speed(start, time.time(), byte_counter - resume_len)
if data_len is None:
- self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
- eta = None
+ eta = percent = None
else:
percent = self.calc_percent(byte_counter, data_len)
eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
- self.report_progress(percent, data_len_str, speed, eta)
+ self.report_progress(percent, data_len_str, speed, eta)
self._hook_progress({
'downloaded_bytes': byte_counter,
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 86a6fd043..5253c39e1 100644
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -782,7 +782,7 @@ class YoutubeDL(object):
def list_formats(self, info_dict):
def line(format):
- return (u'%-15s%-10s%-12s%s' % (
+ return (u'%-20s%-10s%-12s%s' % (
format['format_id'],
format['ext'],
self.format_resolution(format),
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 48ffcbf8e..ab7879c5d 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -349,7 +349,7 @@ def parseOpts(overrideArguments=None):
'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .'))
filesystem.add_option('--autonumber-size',
dest='autonumber_size', metavar='NUMBER',
- help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --autonumber option is given')
+ help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given')
filesystem.add_option('--restrict-filenames',
action='store_true', dest='restrictfilenames',
help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False)
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 888a91cce..f9caca4ef 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -38,6 +38,7 @@ from .defense import DefenseGouvFrIE
from .ebaumsworld import EbaumsWorldIE
from .ehow import EHowIE
from .eighttracks import EightTracksIE
+from .eitb import EitbIE
from .escapist import EscapistIE
from .exfm import ExfmIE
from .extremetube import ExtremeTubeIE
@@ -115,6 +116,7 @@ from .slideshare import SlideshareIE
from .sohu import SohuIE
from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE
from .southparkstudios import SouthParkStudiosIE
+from .space import SpaceIE
from .spankwire import SpankwireIE
from .spiegel import SpiegelIE
from .stanfordoc import StanfordOpenClassroomIE
diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py
index e10c74c11..b35a679e3 100644
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -10,6 +10,7 @@ from ..utils import (
unified_strdate,
determine_ext,
get_element_by_id,
+ compat_str,
)
# There are different sources of video in arte.tv, the extraction process
@@ -181,20 +182,30 @@ class ArteTVPlus7IE(InfoExtractor):
formats = all_formats
else:
raise ExtractorError(u'The formats list is empty')
- # We order the formats by quality
+
if re.match(r'[A-Z]Q', formats[0]['quality']) is not None:
- sort_key = lambda f: ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality'])
+ def sort_key(f):
+ return ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality'])
else:
- sort_key = lambda f: int(f.get('height',-1))
+ def sort_key(f):
+ return (
+ # Sort first by quality
+ int(f.get('height',-1)),
+ int(f.get('bitrate',-1)),
+ # The original version with subtitles has lower relevance
+ re.match(r'VO-ST(F|A)', f.get('versionCode', '')) is None,
+ # The version with sourds/mal subtitles has also lower relevance
+ re.match(r'VO?(F|A)-STM\1', f.get('versionCode', '')) is None,
+ )
formats = sorted(formats, key=sort_key)
- # Prefer videos without subtitles in the same language
- formats = sorted(formats, key=lambda f: re.match(r'VO(F|A)-STM\1', f.get('versionCode', '')) is None)
- # Pick the best quality
def _format(format_info):
- quality = format_info['quality']
- m_quality = re.match(r'\w*? - (\d*)p', quality)
- if m_quality is not None:
- quality = m_quality.group(1)
+ quality = ''
+ height = format_info.get('height')
+ if height is not None:
+ quality = compat_str(height)
+ bitrate = format_info.get('bitrate')
+ if bitrate is not None:
+ quality += '-%d' % bitrate
if format_info.get('versionCode') is not None:
format_id = u'%s-%s' % (quality, format_info['versionCode'])
else:
@@ -203,7 +214,7 @@ class ArteTVPlus7IE(InfoExtractor):
'format_id': format_id,
'format_note': format_info.get('versionLibelle'),
'width': format_info.get('width'),
- 'height': format_info.get('height'),
+ 'height': height,
}
if format_info['mediaType'] == u'rtmp':
info['url'] = format_info['streamer']
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py
index 0d9b87a34..d8c35465a 100644
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -9,10 +9,13 @@ from ..utils import (
compat_urllib_parse,
find_xpath_attr,
compat_urlparse,
+ compat_str,
+ compat_urllib_request,
ExtractorError,
)
+
class BrightcoveIE(InfoExtractor):
_VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)'
_FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
@@ -41,6 +44,17 @@ class BrightcoveIE(InfoExtractor):
u'uploader': u'Oracle',
},
},
+ {
+ # From http://mashable.com/2013/10/26/thermoelectric-bracelet-lets-you-control-your-body-temperature/
+ u'url': u'http://c.brightcove.com/services/viewer/federated_f9?&playerID=1265504713001&publisherID=AQ%7E%7E%2CAAABBzUwv1E%7E%2CxP-xFHVUstiMFlNYfvF4G9yFnNaqCw_9&videoID=2750934548001',
+ u'info_dict': {
+ u'id': u'2750934548001',
+ u'ext': u'mp4',
+ u'title': u'This Bracelet Acts as a Personal Thermostat',
+ u'description': u'md5:547b78c64f4112766ccf4e151c20b6a0',
+ u'uploader': u'Mashable',
+ },
+ },
]
@classmethod
@@ -68,24 +82,48 @@ class BrightcoveIE(InfoExtractor):
videoPlayer = find_xpath_attr(object_doc, './param', 'name', '@videoPlayer')
if videoPlayer is not None:
params['@videoPlayer'] = videoPlayer.attrib['value']
+ linkBase = find_xpath_attr(object_doc, './param', 'name', 'linkBaseURL')
+ if linkBase is not None:
+ params['linkBaseURL'] = linkBase.attrib['value']
data = compat_urllib_parse.urlencode(params)
return cls._FEDERATED_URL_TEMPLATE % data
+ @classmethod
+ def _extract_brightcove_url(cls, webpage):
+ """Try to extract the brightcove url from the wepbage, returns None
+ if it can't be found
+ """
+ m_brightcove = re.search(
+ r'<object[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1.+?</object>',
+ webpage, re.DOTALL)
+ if m_brightcove is not None:
+ return cls._build_brighcove_url(m_brightcove.group())
+ else:
+ return None
+
def _real_extract(self, url):
+ # Change the 'videoId' and others field to '@videoPlayer'
+ url = re.sub(r'(?<=[?&])(videoI(d|D)|bctid)', '%40videoPlayer', url)
+ # Change bckey (used by bcove.me urls) to playerKey
+ url = re.sub(r'(?<=[?&])bckey', 'playerKey', url)
mobj = re.match(self._VALID_URL, url)
query_str = mobj.group('query')
query = compat_urlparse.parse_qs(query_str)
videoPlayer = query.get('@videoPlayer')
if videoPlayer:
- return self._get_video_info(videoPlayer[0], query_str)
+ return self._get_video_info(videoPlayer[0], query_str, query)
else:
player_key = query['playerKey']
return self._get_playlist_info(player_key[0])
- def _get_video_info(self, video_id, query):
- request_url = self._FEDERATED_URL_TEMPLATE % query
- webpage = self._download_webpage(request_url, video_id)
+ def _get_video_info(self, video_id, query_str, query):
+ request_url = self._FEDERATED_URL_TEMPLATE % query_str
+ req = compat_urllib_request.Request(request_url)
+ linkBase = query.get('linkBaseURL')
+ if linkBase is not None:
+ req.add_header('Referer', linkBase[0])
+ webpage = self._download_webpage(req, video_id)
self.report_extraction(video_id)
info = self._search_regex(r'var experienceJSON = ({.*?});', webpage, 'json')
@@ -109,7 +147,7 @@ class BrightcoveIE(InfoExtractor):
def _extract_video_info(self, video_info):
info = {
- 'id': video_info['id'],
+ 'id': compat_str(video_info['id']),
'title': video_info['displayName'],
'description': video_info.get('shortDescription'),
'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'),
@@ -119,10 +157,11 @@ class BrightcoveIE(InfoExtractor):
renditions = video_info.get('renditions')
if renditions:
renditions = sorted(renditions, key=lambda r: r['size'])
- best_format = renditions[-1]
- info.update({
- 'url': best_format['defaultURL'],
- })
+ info['formats'] = [{
+ 'url': rend['defaultURL'],
+ 'height': rend.get('frameHeight'),
+ 'width': rend.get('frameWidth'),
+ } for rend in renditions]
elif video_info.get('FLVFullLengthURL') is not None:
info.update({
'url': video_info['FLVFullLengthURL'],
diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py
index a79f881cd..34adf6dda 100644
--- a/youtube_dl/extractor/cnn.py
+++ b/youtube_dl/extractor/cnn.py
@@ -6,7 +6,7 @@ from ..utils import determine_ext
class CNNIE(InfoExtractor):
- _VALID_URL = r'''(?x)https?://(edition\.)?cnn\.com/video/(data/.+?|\?)/
+ _VALID_URL = r'''(?x)https?://((edition|www)\.)?cnn\.com/video/(data/.+?|\?)/
(?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn|(?=&)))'''
_TESTS = [{
diff --git a/youtube_dl/extractor/eitb.py b/youtube_dl/extractor/eitb.py
new file mode 100644
index 000000000..4ba323148
--- /dev/null
+++ b/youtube_dl/extractor/eitb.py
@@ -0,0 +1,37 @@
+# encoding: utf-8
+import re
+
+from .common import InfoExtractor
+from .brightcove import BrightcoveIE
+from ..utils import ExtractorError
+
+
+class EitbIE(InfoExtractor):
+ IE_NAME = u'eitb.tv'
+ _VALID_URL = r'https?://www\.eitb\.tv/(eu/bideoa|es/video)/[^/]+/(?P<playlist_id>\d+)/(?P<chapter_id>\d+)'
+
+ _TEST = {
+ u'add_ie': ['Brightcove'],
+ u'url': u'http://www.eitb.tv/es/video/60-minutos-60-minutos-2013-2014/2677100210001/2743577154001/lasa-y-zabala-30-anos/',
+ u'md5': u'edf4436247185adee3ea18ce64c47998',
+ u'info_dict': {
+ u'id': u'2743577154001',
+ u'ext': u'mp4',
+ u'title': u'60 minutos (Lasa y Zabala, 30 años)',
+ # All videos from eitb has this description in the brightcove info
+ u'description': u'.',
+ u'uploader': u'Euskal Telebista',
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ chapter_id = mobj.group('chapter_id')
+ webpage = self._download_webpage(url, chapter_id)
+ bc_url = BrightcoveIE._extract_brightcove_url(webpage)
+ if bc_url is None:
+ raise ExtractorError(u'Could not extract the Brightcove url')
+ # The BrightcoveExperience object doesn't contain the video id, we set
+ # it manually
+ bc_url += '&%40videoPlayer={0}'.format(chapter_id)
+ return self.url_result(bc_url, BrightcoveIE.ie_key())
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index b3fec8e86..c7552fddb 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -33,6 +33,7 @@ class GenericIE(InfoExtractor):
},
# embedded vimeo video
{
+ u'add_ie': ['Vimeo'],
u'url': u'http://skillsmatter.com/podcast/home/move-semanticsperfect-forwarding-and-rvalue-references',
u'file': u'22444065.mp4',
u'md5': u'2903896e23df39722c33f015af0666e2',
@@ -44,6 +45,7 @@ class GenericIE(InfoExtractor):
},
# bandcamp page with custom domain
{
+ u'add_ie': ['Bandcamp'],
u'url': u'http://bronyrock.com/track/the-pony-mash',
u'file': u'3235767654.mp3',
u'info_dict': {
@@ -52,6 +54,23 @@ class GenericIE(InfoExtractor):
},
u'skip': u'There is a limit of 200 free downloads / month for the test song',
},
+ # embedded brightcove video
+ # it also tests brightcove videos that need to set the 'Referer' in the
+ # http requests
+ {
+ u'add_ie': ['Brightcove'],
+ u'url': u'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
+ u'info_dict': {
+ u'id': u'2765128793001',
+ u'ext': u'mp4',
+ u'title': u'Le cours de bourse : l’analyse technique',
+ u'description': u'md5:7e9ad046e968cb2d1114004aba466fd9',
+ u'uploader': u'BFM BUSINESS',
+ },
+ u'params': {
+ u'skip_download': True,
+ },
+ },
]
def report_download_webpage(self, video_id):
@@ -144,10 +163,9 @@ class GenericIE(InfoExtractor):
self.report_extraction(video_id)
# Look for BrightCove:
- m_brightcove = re.search(r'<object[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL)
- if m_brightcove is not None:
+ bc_url = BrightcoveIE._extract_brightcove_url(webpage)
+ if bc_url is not None:
self.to_screen(u'Brightcove video detected.')
- bc_url = BrightcoveIE._build_brighcove_url(m_brightcove.group())
return self.url_result(bc_url, 'Brightcove')
# Look for embedded Vimeo player
diff --git a/youtube_dl/extractor/kankan.py b/youtube_dl/extractor/kankan.py
index 445d46501..50916f4a6 100644
--- a/youtube_dl/extractor/kankan.py
+++ b/youtube_dl/extractor/kankan.py
@@ -1,8 +1,10 @@
import re
+import hashlib
from .common import InfoExtractor
from ..utils import determine_ext
+_md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
class KankanIE(InfoExtractor):
_VALID_URL = r'https?://(?:.*?\.)?kankan\.com/.+?/(?P<id>\d+)\.shtml'
@@ -30,7 +32,10 @@ class KankanIE(InfoExtractor):
video_id, u'Downloading video url info')
ip = self._search_regex(r'ip:"(.+?)"', video_info_page, u'video url ip')
path = self._search_regex(r'path:"(.+?)"', video_info_page, u'video url path')
- video_url = 'http://%s%s' % (ip, path)
+ param1 = self._search_regex(r'param1:(\d+)', video_info_page, u'param1')
+ param2 = self._search_regex(r'param2:(\d+)', video_info_page, u'param2')
+ key = _md5('xl_mp43651' + param1 + param2)
+ video_url = 'http://%s%s?key=%s&key1=%s' % (ip, path, key, param2)
return {'id': video_id,
'title': title,
diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py
index e96d3952c..24a79ae13 100644
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@@ -26,6 +26,7 @@ class MTVIE(InfoExtractor):
},
},
{
+ u'add_ie': ['Vevo'],
u'url': u'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml',
u'file': u'USCJY1331283.mp4',
u'md5': u'73b4e7fcadd88929292fe52c3ced8caf',
diff --git a/youtube_dl/extractor/slashdot.py b/youtube_dl/extractor/slashdot.py
index 2cba53076..f5003c7f9 100644
--- a/youtube_dl/extractor/slashdot.py
+++ b/youtube_dl/extractor/slashdot.py
@@ -7,6 +7,7 @@ class SlashdotIE(InfoExtractor):
_VALID_URL = r'https?://tv.slashdot.org/video/\?embed=(?P<id>.*?)(&|$)'
_TEST = {
+ u'add_ie': ['Ooyala'],
u'url': u'http://tv.slashdot.org/video/?embed=JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz',
u'file': u'JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz.mp4',
u'md5': u'd2222e7a4a4c1541b3e0cf732fb26735',
diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py
index 29cd5617c..4717fbb77 100644
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -29,17 +29,34 @@ class SoundcloudIE(InfoExtractor):
)
'''
IE_NAME = u'soundcloud'
- _TEST = {
- u'url': u'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
- u'file': u'62986583.mp3',
- u'md5': u'ebef0a451b909710ed1d7787dddbf0d7',
- u'info_dict': {
- u"upload_date": u"20121011",
- u"description": u"No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd",
- u"uploader": u"E.T. ExTerrestrial Music",
- u"title": u"Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1"
- }
- }
+ _TESTS = [
+ {
+ u'url': u'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
+ u'file': u'62986583.mp3',
+ u'md5': u'ebef0a451b909710ed1d7787dddbf0d7',
+ u'info_dict': {
+ u"upload_date": u"20121011",
+ u"description": u"No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd",
+ u"uploader": u"E.T. ExTerrestrial Music",
+ u"title": u"Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1"
+ }
+ },
+ # not streamable song
+ {
+ u'url': u'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
+ u'info_dict': {
+ u'id': u'47127627',
+ u'ext': u'mp3',
+ u'title': u'Goldrushed',
+ u'uploader': u'The Royal Concept',
+ u'upload_date': u'20120521',
+ },
+ u'params': {
+ # rtmp
+ u'skip_download': True,
+ },
+ },
+ ]
_CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28'
@@ -56,16 +73,16 @@ class SoundcloudIE(InfoExtractor):
return 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID
def _extract_info_dict(self, info, full_title=None, quiet=False):
- video_id = info['id']
- name = full_title or video_id
+ track_id = compat_str(info['id'])
+ name = full_title or track_id
if quiet == False:
self.report_extraction(name)
thumbnail = info['artwork_url']
if thumbnail is not None:
thumbnail = thumbnail.replace('-large', '-t500x500')
- return {
- 'id': info['id'],
+ result = {
+ 'id': track_id,
'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID,
'uploader': info['user']['username'],
'upload_date': unified_strdate(info['created_at']),
@@ -74,6 +91,21 @@ class SoundcloudIE(InfoExtractor):
'description': info['description'],
'thumbnail': thumbnail,
}
+ if info.get('downloadable', False):
+ result['url'] = 'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(track_id, self._CLIENT_ID)
+ if not info.get('streamable', False):
+ # We have to get the rtmp url
+ stream_json = self._download_webpage(
+ 'http://api.soundcloud.com/i1/tracks/{0}/streams?client_id={1}'.format(track_id, self._CLIENT_ID),
+ track_id, u'Downloading track url')
+ rtmp_url = json.loads(stream_json)['rtmp_mp3_128_url']
+ # The url doesn't have an rtmp app, we have to extract the playpath
+ url, path = rtmp_url.split('mp3:', 1)
+ result.update({
+ 'url': url,
+ 'play_path': 'mp3:' + path,
+ })
+ return result
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
@@ -106,70 +138,8 @@ class SoundcloudIE(InfoExtractor):
class SoundcloudSetIE(SoundcloudIE):
_VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$'
IE_NAME = u'soundcloud:set'
- _TEST = {
- u"url":"https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep",
- u"playlist": [
- {
- u"file":"30510138.mp3",
- u"md5":"f9136bf103901728f29e419d2c70f55d",
- u"info_dict": {
- u"upload_date": u"20111213",
- u"description": u"The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com",
- u"uploader": u"The Royal Concept",
- u"title": u"D-D-Dance"
- }
- },
- {
- u"file":"47127625.mp3",
- u"md5":"09b6758a018470570f8fd423c9453dd8",
- u"info_dict": {
- u"upload_date": u"20120521",
- u"description": u"The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com",
- u"uploader": u"The Royal Concept",
- u"title": u"The Royal Concept - Gimme Twice"
- }
- },
- {
- u"file":"47127627.mp3",
- u"md5":"154abd4e418cea19c3b901f1e1306d9c",
- u"info_dict": {
- u"upload_date": u"20120521",
- u"uploader": u"The Royal Concept",
- u"title": u"Goldrushed"
- }
- },
- {
- u"file":"47127629.mp3",
- u"md5":"2f5471edc79ad3f33a683153e96a79c1",
- u"info_dict": {
- u"upload_date": u"20120521",
- u"description": u"The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com",
- u"uploader": u"The Royal Concept",
- u"title": u"In the End"
- }
- },
- {
- u"file":"47127631.mp3",
- u"md5":"f9ba87aa940af7213f98949254f1c6e2",
- u"info_dict": {
- u"upload_date": u"20120521",
- u"description": u"The Royal Concept from Stockholm\r\nFilip / David / Povel / Magnus\r\nwww.theroyalconceptband.com",
- u"uploader": u"The Royal Concept",
- u"title": u"Knocked Up"
- }
- },
- {
- u"file":"75206121.mp3",
- u"md5":"f9d1fe9406717e302980c30de4af9353",
- u"info_dict": {
- u"upload_date": u"20130116",
- u"description": u"The unreleased track World on Fire premiered on the CW's hit show Arrow (8pm/7pm central). \r\nAs a gift to our fans we would like to offer you a free download of the track! ",
- u"uploader": u"The Royal Concept",
- u"title": u"World On Fire"
- }
- }
- ]
- }
+ # it's in tests/test_playlists.py
+ _TESTS = []
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -208,7 +178,7 @@ class SoundcloudUserIE(SoundcloudIE):
IE_NAME = u'soundcloud:user'
# it's in tests/test_playlists.py
- _TEST = None
+ _TESTS = []
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/space.py b/youtube_dl/extractor/space.py
new file mode 100644
index 000000000..0d32a0688
--- /dev/null
+++ b/youtube_dl/extractor/space.py
@@ -0,0 +1,35 @@
+import re
+
+from .common import InfoExtractor
+from .brightcove import BrightcoveIE
+from ..utils import RegexNotFoundError, ExtractorError
+
+
+class SpaceIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.space\.com/\d+-(?P<title>[^/\.\?]*?)-video.html'
+ _TEST = {
+ u'add_ie': ['Brightcove'],
+ u'url': u'http://www.space.com/23373-huge-martian-landforms-detail-revealed-by-european-probe-video.html',
+ u'info_dict': {
+ u'id': u'2780937028001',
+ u'ext': u'mp4',
+ u'title': u'Huge Martian Landforms\' Detail Revealed By European Probe | Video',
+ u'description': u'md5:db81cf7f3122f95ed234b631a6ea1e61',
+ u'uploader': u'TechMedia Networks',
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ title = mobj.group('title')
+ webpage = self._download_webpage(url, title)
+ try:
+ # Some videos require the playerKey field, which isn't define in
+ # the BrightcoveExperience object
+ brightcove_url = self._og_search_video_url(webpage)
+ except RegexNotFoundError:
+ # Other videos works fine with the info from the object
+ brightcove_url = BrightcoveIE._extract_brightcove_url(webpage)
+ if brightcove_url is None:
+ raise ExtractorError(u'The webpage does not contain a video', expected=True)
+ return self.url_result(brightcove_url, BrightcoveIE.ie_key())
diff --git a/youtube_dl/extractor/weibo.py b/youtube_dl/extractor/weibo.py
index 0757495bd..fa784ab99 100644
--- a/youtube_dl/extractor/weibo.py
+++ b/youtube_dl/extractor/weibo.py
@@ -13,6 +13,7 @@ class WeiboIE(InfoExtractor):
_VALID_URL = r'https?://video\.weibo\.com/v/weishipin/t_(?P<id>.+?)\.htm'
_TEST = {
+ u'add_ie': ['Sina'],
u'url': u'http://video.weibo.com/v/weishipin/t_zjUw2kZ.htm',
u'file': u'98322879.flv',
u'info_dict': {
diff --git a/youtube_dl/extractor/xnxx.py b/youtube_dl/extractor/xnxx.py
index 8a0eb1afd..1177a4b14 100644
--- a/youtube_dl/extractor/xnxx.py
+++ b/youtube_dl/extractor/xnxx.py
@@ -9,7 +9,7 @@ from ..utils import (
class XNXXIE(InfoExtractor):
- _VALID_URL = r'^(?:https?://)?video\.xnxx\.com/video([0-9]+)/(.*)'
+ _VALID_URL = r'^(?:https?://)?(?:video|www)\.xnxx\.com/video([0-9]+)/(.*)'
VIDEO_URL_RE = r'flv_url=(.*?)&amp;'
VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM'
VIDEO_THUMB_RE = r'url_bigthumb=(.*?)&amp;'
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 74a381fe2..f745b8b14 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1572,7 +1572,6 @@ class YoutubePlaylistIE(InfoExtractor):
class YoutubeChannelIE(InfoExtractor):
IE_DESC = u'YouTube.com channels'
_VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
- _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
_MORE_PAGES_INDICATOR = 'yt-uix-load-more'
_MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
IE_NAME = u'youtube:channel'
@@ -1593,30 +1592,20 @@ class YoutubeChannelIE(InfoExtractor):
# Download channel page
channel_id = mobj.group(1)
video_ids = []
- pagenum = 1
- url = self._TEMPLATE_URL % (channel_id, pagenum)
- page = self._download_webpage(url, channel_id,
- u'Downloading page #%s' % pagenum)
+ # Download all channel pages using the json-based channel_ajax query
+ for pagenum in itertools.count(1):
+ url = self._MORE_PAGES_URL % (pagenum, channel_id)
+ page = self._download_webpage(url, channel_id,
+ u'Downloading page #%s' % pagenum)
- # Extract video identifiers
- ids_in_page = self.extract_videos_from_page(page)
- video_ids.extend(ids_in_page)
+ page = json.loads(page)
- # Download any subsequent channel pages using the json-based channel_ajax query
- if self._MORE_PAGES_INDICATOR in page:
- for pagenum in itertools.count(1):
- url = self._MORE_PAGES_URL % (pagenum, channel_id)
- page = self._download_webpage(url, channel_id,
- u'Downloading page #%s' % pagenum)
-
- page = json.loads(page)
-
- ids_in_page = self.extract_videos_from_page(page['content_html'])
- video_ids.extend(ids_in_page)
+ ids_in_page = self.extract_videos_from_page(page['content_html'])
+ video_ids.extend(ids_in_page)
- if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
- break
+ if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
+ break
self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index cc0f9cb4e..84bf0f35c 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
-__version__ = '2013.11.03'
+__version__ = '2013.11.07'