aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.md21
-rw-r--r--test/test_download.py2
-rw-r--r--test/test_playlists.py10
-rw-r--r--test/test_subtitles.py4
-rw-r--r--test/test_youtube_signature.py6
-rw-r--r--youtube_dl/__init__.py1
-rw-r--r--youtube_dl/extractor/__init__.py2
-rw-r--r--youtube_dl/extractor/cnn.py13
-rw-r--r--youtube_dl/extractor/common.py4
-rw-r--r--youtube_dl/extractor/condenast.py74
-rw-r--r--youtube_dl/extractor/everyonesmixtape.py69
-rw-r--r--youtube_dl/extractor/flickr.py22
-rw-r--r--youtube_dl/extractor/franceinter.py38
-rw-r--r--youtube_dl/extractor/gamespot.py14
-rw-r--r--youtube_dl/extractor/generic.py2
-rw-r--r--youtube_dl/extractor/kankan.py40
-rw-r--r--youtube_dl/extractor/mixcloud.py34
-rw-r--r--youtube_dl/extractor/mpora.py2
-rw-r--r--youtube_dl/extractor/redtube.py2
-rw-r--r--youtube_dl/extractor/soundcloud.py103
-rw-r--r--youtube_dl/extractor/spankwire.py38
-rw-r--r--youtube_dl/extractor/teamcoco.py57
-rw-r--r--youtube_dl/extractor/ted.py17
-rw-r--r--youtube_dl/extractor/youtube.py2
-rw-r--r--youtube_dl/postprocessor/ffmpeg.py1
-rw-r--r--youtube_dl/version.py2
26 files changed, 368 insertions, 212 deletions
diff --git a/README.md b/README.md
index bc7dfac69..cf0bb7b65 100644
--- a/README.md
+++ b/README.md
@@ -93,13 +93,13 @@ which means you can modify it, redistribute it or use it however you like.
different, %(autonumber)s to get an automatically
incremented number, %(ext)s for the filename
extension, %(format)s for the format description
- (like "22 - 1280x720" or "HD"),%(format_id)s for
+ (like "22 - 1280x720" or "HD"), %(format_id)s for
the unique id of the format (like Youtube's
- itags: "137"),%(upload_date)s for the upload date
- (YYYYMMDD), %(extractor)s for the provider
- (youtube, metacafe, etc), %(id)s for the video id
- , %(playlist)s for the playlist the video is in,
- %(playlist_index)s for the position in the
+ itags: "137"), %(upload_date)s for the upload
+ date (YYYYMMDD), %(extractor)s for the provider
+ (youtube, metacafe, etc), %(id)s for the video
+ id, %(playlist)s for the playlist the video is
+ in, %(playlist_index)s for the position in the
playlist and %% for a literal percent. Use - to
output to stdout. Can also be used to download to
a different directory, for example with -o '/my/d
@@ -111,7 +111,7 @@ which means you can modify it, redistribute it or use it however you like.
avoid "&" and spaces in filenames
-a, --batch-file FILE file containing URLs to download ('-' for stdin)
--load-info FILE json file containing the video information
- (created with the "--write-json" option
+ (created with the "--write-json" option)
-w, --no-overwrites do not overwrite files
-c, --continue force resume of partially downloaded files. By
default, youtube-dl will resume downloads if
@@ -145,7 +145,7 @@ which means you can modify it, redistribute it or use it however you like.
--no-progress do not print progress bar
--console-title display progress in console titlebar
-v, --verbose print various debugging information
- --dump-intermediate-pages print downloaded pages to debug problems(very
+ --dump-intermediate-pages print downloaded pages to debug problems (very
verbose)
--write-pages Write downloaded intermediary pages to files in
the current directory to debug problems
@@ -158,8 +158,7 @@ which means you can modify it, redistribute it or use it however you like.
--prefer-free-formats prefer free video formats unless a specific one
is requested
--max-quality FORMAT highest quality format to download
- -F, --list-formats list all available formats (currently youtube
- only)
+ -F, --list-formats list all available formats
## Subtitle Options:
--write-sub write subtitle file
@@ -177,7 +176,7 @@ which means you can modify it, redistribute it or use it however you like.
-u, --username USERNAME account username
-p, --password PASSWORD account password
-n, --netrc use .netrc authentication data
- --video-password PASSWORD video password (vimeo only)
+ --video-password PASSWORD video password (vimeo, smotri)
## Post-processing Options:
-x, --extract-audio convert video files to audio-only files (requires
diff --git a/test/test_download.py b/test/test_download.py
index d0be8d27c..0d925ae69 100644
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -148,7 +148,7 @@ def generator(test_case):
for key, value in info_dict.items()
if value and key in ('title', 'description', 'uploader', 'upload_date', 'uploader_id', 'location'))
if not all(key in tc.get('info_dict', {}).keys() for key in test_info_dict.keys()):
- sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=2) + u'\n')
+ sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n')
# Check for the presence of mandatory fields
for key in ('id', 'url', 'title', 'ext'):
diff --git a/test/test_playlists.py b/test/test_playlists.py
index b3bfbd923..5eeba091e 100644
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@@ -32,6 +32,7 @@ from youtube_dl.extractor import (
IviCompilationIE,
ImdbListIE,
KhanAcademyIE,
+ EveryonesMixtapeIE,
)
@@ -210,6 +211,15 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['description'], 'How have humans protected their secret messages through history? What has changed today?')
self.assertTrue(len(result['entries']) >= 3)
+ def test_EveryonesMixtape(self):
+ dl = FakeYDL()
+ ie = EveryonesMixtapeIE(dl)
+ result = ie.extract('http://everyonesmixtape.com/#/mix/m7m0jJAbMQi')
+ self.assertIsPlaylist(result)
+ self.assertEqual(result['id'], 'm7m0jJAbMQi')
+ self.assertEqual(result['title'], 'Driving')
+ self.assertEqual(len(result['entries']), 24)
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_subtitles.py b/test/test_subtitles.py
index 263b5ac69..1e4e62faa 100644
--- a/test/test_subtitles.py
+++ b/test/test_subtitles.py
@@ -167,13 +167,13 @@ class TestTedSubtitles(BaseTestSubtitles):
def test_subtitles(self):
self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles()
- self.assertEqual(md5(subtitles['en']), '2154f31ff9b9f89a0aa671537559c21d')
+ self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14')
def test_subtitles_lang(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitleslangs'] = ['fr']
subtitles = self.getSubtitles()
- self.assertEqual(md5(subtitles['fr']), '7616cbc6df20ec2c1204083c83871cf6')
+ self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5')
def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index 056700614..a3fc53047 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -27,12 +27,6 @@ _TESTS = [
85,
u'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@',
),
- (
- u'https://s.ytimg.com/yts/swfbin/watch_as3-vflg5GhxU.swf',
- u'swf',
- 82,
- u':/.-,+*)=\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBAzyxw>utsrqponmlkjihgfedcba987654321'
- ),
]
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 8f783a86c..82b1ff4f4 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -39,6 +39,7 @@ __authors__ = (
'Sergey M.',
'Michael Orlitzky',
'Chris Gahan',
+ 'Saimadhav Heblikar',
)
__license__ = 'Public Domain'
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index b887c7f10..d66f7b026 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -52,6 +52,7 @@ from .ehow import EHowIE
from .eighttracks import EightTracksIE
from .eitb import EitbIE
from .escapist import EscapistIE
+from .everyonesmixtape import EveryonesMixtapeIE
from .exfm import ExfmIE
from .extremetube import ExtremeTubeIE
from .facebook import FacebookIE
@@ -61,6 +62,7 @@ from .fktv import (
FKTVPosteckeIE,
)
from .flickr import FlickrIE
+from .franceinter import FranceInterIE
from .francetv import (
PluzzIE,
FranceTvInfoIE,
diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py
index c9e7cc561..80bf59ade 100644
--- a/youtube_dl/extractor/cnn.py
+++ b/youtube_dl/extractor/cnn.py
@@ -25,12 +25,13 @@ class CNNIE(InfoExtractor):
},
},
{
- u"url": u"http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29",
- u"file": u"us_2013_08_21_sot-student-gives-epic-speech.georgia-institute-of-technology.mp4",
- u"md5": u"b5cc60c60a3477d185af8f19a2a26f4e",
- u"info_dict": {
- u"title": "Student's epic speech stuns new freshmen",
- u"description": "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\""
+ "url": "http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29",
+ "file": "us_2013_08_21_sot-student-gives-epic-speech.georgia-institute-of-technology.mp4",
+ "md5": "b5cc60c60a3477d185af8f19a2a26f4e",
+ "info_dict": {
+ "title": "Student's epic speech stuns new freshmen",
+ "description": "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"",
+ "upload_date": "20130821",
}
}]
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index ce3d16903..692d828da 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1,4 +1,5 @@
import base64
+import hashlib
import json
import os
import re
@@ -234,6 +235,9 @@ class InfoExtractor(object):
url = url_or_request.get_full_url()
except AttributeError:
url = url_or_request
+ if len(url) > 200:
+ h = hashlib.md5(url).hexdigest()
+ url = url[:200 - len(h)] + h
raw_filename = ('%s_%s.dump' % (video_id, url))
filename = sanitize_filename(raw_filename, restricted=True)
self.to_screen(u'Saving request to ' + filename)
diff --git a/youtube_dl/extractor/condenast.py b/youtube_dl/extractor/condenast.py
index f336a3c62..03b75b80d 100644
--- a/youtube_dl/extractor/condenast.py
+++ b/youtube_dl/extractor/condenast.py
@@ -1,4 +1,5 @@
# coding: utf-8
+from __future__ import unicode_literals
import re
import json
@@ -20,30 +21,31 @@ class CondeNastIE(InfoExtractor):
# The keys are the supported sites and the values are the name to be shown
# to the user and in the extractor description.
- _SITES = {'wired': u'WIRED',
- 'gq': u'GQ',
- 'vogue': u'Vogue',
- 'glamour': u'Glamour',
- 'wmagazine': u'W Magazine',
- 'vanityfair': u'Vanity Fair',
- }
+ _SITES = {
+ 'wired': 'WIRED',
+ 'gq': 'GQ',
+ 'vogue': 'Vogue',
+ 'glamour': 'Glamour',
+ 'wmagazine': 'W Magazine',
+ 'vanityfair': 'Vanity Fair',
+ }
_VALID_URL = r'http://(video|www).(?P<site>%s).com/(?P<type>watch|series|video)/(?P<id>.+)' % '|'.join(_SITES.keys())
- IE_DESC = u'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))
+ IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))
_TEST = {
- u'url': u'http://video.wired.com/watch/3d-printed-speakers-lit-with-led',
- u'file': u'5171b343c2b4c00dd0c1ccb3.mp4',
- u'md5': u'1921f713ed48aabd715691f774c451f7',
- u'info_dict': {
- u'title': u'3D Printed Speakers Lit With LED',
- u'description': u'Check out these beautiful 3D printed LED speakers. You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.',
+ 'url': 'http://video.wired.com/watch/3d-printed-speakers-lit-with-led',
+ 'file': '5171b343c2b4c00dd0c1ccb3.mp4',
+ 'md5': '1921f713ed48aabd715691f774c451f7',
+ 'info_dict': {
+ 'title': '3D Printed Speakers Lit With LED',
+ 'description': 'Check out these beautiful 3D printed LED speakers. You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.',
}
}
def _extract_series(self, url, webpage):
title = self._html_search_regex(r'<div class="cne-series-info">.*?<h1>(.+?)</h1>',
- webpage, u'series title', flags=re.DOTALL)
+ webpage, 'series title', flags=re.DOTALL)
url_object = compat_urllib_parse_urlparse(url)
base_url = '%s://%s' % (url_object.scheme, url_object.netloc)
m_paths = re.finditer(r'<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]',
@@ -57,39 +59,41 @@ class CondeNastIE(InfoExtractor):
description = self._html_search_regex([r'<div class="cne-video-description">(.+?)</div>',
r'<div class="video-post-content">(.+?)</div>',
],
- webpage, u'description',
+ webpage, 'description',
fatal=False, flags=re.DOTALL)
params = self._search_regex(r'var params = {(.+?)}[;,]', webpage,
- u'player params', flags=re.DOTALL)
- video_id = self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, u'video id')
- player_id = self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, u'player id')
- target = self._search_regex(r'target: [\'"](.+?)[\'"]', params, u'target')
+ 'player params', flags=re.DOTALL)
+ video_id = self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id')
+ player_id = self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, 'player id')
+ target = self._search_regex(r'target: [\'"](.+?)[\'"]', params, 'target')
data = compat_urllib_parse.urlencode({'videoId': video_id,
'playerId': player_id,
'target': target,
})
base_info_url = self._search_regex(r'url = [\'"](.+?)[\'"][,;]',
- webpage, u'base info url',
+ webpage, 'base info url',
default='http://player.cnevids.com/player/loader.js?')
info_url = base_info_url + data
info_page = self._download_webpage(info_url, video_id,
- u'Downloading video info')
- video_info = self._search_regex(r'var video = ({.+?});', info_page, u'video info')
+ 'Downloading video info')
+ video_info = self._search_regex(r'var video = ({.+?});', info_page, 'video info')
video_info = json.loads(video_info)
- def _formats_sort_key(f):
- type_ord = 1 if f['type'] == 'video/mp4' else 0
- quality_ord = 1 if f['quality'] == 'high' else 0
- return (quality_ord, type_ord)
- best_format = sorted(video_info['sources'][0], key=_formats_sort_key)[-1]
+ formats = [{
+ 'format_id': '%s-%s' % (fdata['type'].split('/')[-1], fdata['quality']),
+ 'url': fdata['src'],
+ 'ext': fdata['type'].split('/')[-1],
+ 'quality': 1 if fdata['quality'] == 'high' else 0,
+ } for fdata in video_info['sources'][0]]
+ self._sort_formats(formats)
- return {'id': video_id,
- 'url': best_format['src'],
- 'ext': best_format['type'].split('/')[-1],
- 'title': video_info['title'],
- 'thumbnail': video_info['poster_frame'],
- 'description': description,
- }
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': video_info['title'],
+ 'thumbnail': video_info['poster_frame'],
+ 'description': description,
+ }
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/everyonesmixtape.py b/youtube_dl/extractor/everyonesmixtape.py
new file mode 100644
index 000000000..12829cbcc
--- /dev/null
+++ b/youtube_dl/extractor/everyonesmixtape.py
@@ -0,0 +1,69 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ compat_urllib_request,
+ ExtractorError,
+)
+
+
+class EveryonesMixtapeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?everyonesmixtape\.com/#/mix/(?P<id>[0-9a-zA-Z]+)(?:/(?P<songnr>[0-9]))?$'
+
+ _TEST = {
+ 'url': 'http://everyonesmixtape.com/#/mix/m7m0jJAbMQi/5',
+ 'file': '5bfseWNmlds.mp4',
+ "info_dict": {
+ "title": "Passion Pit - \"Sleepyhead\" (Official Music Video)",
+ "uploader": "FKR.TV",
+ "uploader_id": "frenchkissrecords",
+ "description": "Music video for \"Sleepyhead\" from Passion Pit's debut EP Chunk Of Change.\nBuy on iTunes: https://itunes.apple.com/us/album/chunk-of-change-ep/id300087641\n\nDirected by The Wilderness.\n\nhttp://www.passionpitmusic.com\nhttp://www.frenchkissrecords.com",
+ "upload_date": "20081015"
+ },
+ 'params': {
+ 'skip_download': True, # This is simply YouTube
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ playlist_id = mobj.group('id')
+
+ pllist_url = 'http://everyonesmixtape.com/mixtape.php?a=getMixes&u=-1&linked=%s&explore=' % playlist_id
+ pllist_req = compat_urllib_request.Request(pllist_url)
+ pllist_req.add_header('X-Requested-With', 'XMLHttpRequest')
+
+ playlist_list = self._download_json(
+ pllist_req, playlist_id, note='Downloading playlist metadata')
+ try:
+ playlist_no = next(playlist['id']
+ for playlist in playlist_list
+ if playlist['code'] == playlist_id)
+ except StopIteration:
+ raise ExtractorError('Playlist id not found')
+
+ pl_url = 'http://everyonesmixtape.com/mixtape.php?a=getMix&id=%s&userId=null&code=' % playlist_no
+ pl_req = compat_urllib_request.Request(pl_url)
+ pl_req.add_header('X-Requested-With', 'XMLHttpRequest')
+ playlist = self._download_json(
+ pl_req, playlist_id, note='Downloading playlist info')
+
+ entries = [{
+ '_type': 'url',
+ 'url': t['url'],
+ 'title': t['title'],
+ } for t in playlist['tracks']]
+
+ if mobj.group('songnr'):
+ songnr = int(mobj.group('songnr')) - 1
+ return entries[songnr]
+
+ playlist_title = playlist['mixData']['name']
+ return {
+ '_type': 'playlist',
+ 'id': playlist_id,
+ 'title': playlist_title,
+ 'entries': entries,
+ }
diff --git a/youtube_dl/extractor/flickr.py b/youtube_dl/extractor/flickr.py
index e1d2f0526..21ea5ec2b 100644
--- a/youtube_dl/extractor/flickr.py
+++ b/youtube_dl/extractor/flickr.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
import re
from .common import InfoExtractor
@@ -11,13 +13,13 @@ class FlickrIE(InfoExtractor):
"""Information Extractor for Flickr videos"""
_VALID_URL = r'(?:https?://)?(?:www\.|secure\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
_TEST = {
- u'url': u'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/',
- u'file': u'5645318632.mp4',
- u'md5': u'6fdc01adbc89d72fc9c4f15b4a4ba87b',
- u'info_dict': {
- u"description": u"Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.",
- u"uploader_id": u"forestwander-nature-pictures",
- u"title": u"Dark Hollow Waterfalls"
+ 'url': 'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/',
+ 'file': '5645318632.mp4',
+ 'md5': '6fdc01adbc89d72fc9c4f15b4a4ba87b',
+ 'info_dict': {
+ "description": "Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.",
+ "uploader_id": "forestwander-nature-pictures",
+ "title": "Dark Hollow Waterfalls"
}
}
@@ -29,13 +31,13 @@ class FlickrIE(InfoExtractor):
webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
webpage = self._download_webpage(webpage_url, video_id)
- secret = self._search_regex(r"photo_secret: '(\w+)'", webpage, u'secret')
+ secret = self._search_regex(r"photo_secret: '(\w+)'", webpage, 'secret')
first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self'
first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage')
node_id = self._html_search_regex(r'<Item id="id">(\d+-\d+)</Item>',
- first_xml, u'node_id')
+ first_xml, 'node_id')
second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage')
@@ -44,7 +46,7 @@ class FlickrIE(InfoExtractor):
mobj = re.search(r'<STREAM APP="(.+?)" FULLPATH="(.+?)"', second_xml)
if mobj is None:
- raise ExtractorError(u'Unable to extract video url')
+ raise ExtractorError('Unable to extract video url')
video_url = mobj.group(1) + unescapeHTML(mobj.group(2))
return [{
diff --git a/youtube_dl/extractor/franceinter.py b/youtube_dl/extractor/franceinter.py
new file mode 100644
index 000000000..deb1b0b9d
--- /dev/null
+++ b/youtube_dl/extractor/franceinter.py
@@ -0,0 +1,38 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class FranceInterIE(InfoExtractor):
+ _VALID_URL = r'http://(?:www\.)?franceinter\.fr/player/reecouter\?play=(?P<id>[0-9]{6})'
+ _TEST = {
+ 'url': 'http://www.franceinter.fr/player/reecouter?play=793962',
+ 'file': '793962.mp3',
+ 'md5': '4764932e466e6f6c79c317d2e74f6884',
+ "info_dict": {
+ "title": "L’Histoire dans les jeux vidéo",
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+ title = self._html_search_regex(
+ r'<span class="roll_overflow">(.*?)</span></h1>', webpage, 'title')
+ path = self._search_regex(
+ r'&urlAOD=(.*?)&startTime', webpage, 'video url')
+ video_url = 'http://www.franceinter.fr/' + path
+
+ return {
+ 'id': video_id,
+ 'formats': [{
+ 'url': video_url,
+ 'vcodec': 'none',
+ }],
+ 'title': title,
+ }
diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py
index 26b7d2ae5..380ebbe55 100644
--- a/youtube_dl/extractor/gamespot.py
+++ b/youtube_dl/extractor/gamespot.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
import re
import json
@@ -13,12 +15,12 @@ from ..utils import (
class GameSpotIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/.*-(?P<page_id>\d+)/?'
_TEST = {
- u"url": u"http://www.gamespot.com/arma-iii/videos/arma-iii-community-guide-sitrep-i-6410818/",
- u"file": u"gs-2300-6410818.mp4",
- u"md5": u"b2a30deaa8654fcccd43713a6b6a4825",
- u"info_dict": {
- u"title": u"Arma 3 - Community Guide: SITREP I",
- u'description': u'Check out this video where some of the basics of Arma 3 is explained.',
+ "url": "http://www.gamespot.com/arma-iii/videos/arma-iii-community-guide-sitrep-i-6410818/",
+ "file": "gs-2300-6410818.mp4",
+ "md5": "b2a30deaa8654fcccd43713a6b6a4825",
+ "info_dict": {
+ "title": "Arma 3 - Community Guide: SITREP I",
+ 'description': 'Check out this video where some of the basics of Arma 3 is explained.',
}
}
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index a9023f38d..839530982 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -328,7 +328,7 @@ class GenericIE(InfoExtractor):
mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
if mobj is None:
# Broaden the search a little bit: JWPlayer JS loader
- mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http[^\'"]*)', webpage)
+ mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage)
if mobj is None:
# Try to find twitter cards info
mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
diff --git a/youtube_dl/extractor/kankan.py b/youtube_dl/extractor/kankan.py
index 50916f4a6..23103b163 100644
--- a/youtube_dl/extractor/kankan.py
+++ b/youtube_dl/extractor/kankan.py
@@ -1,21 +1,24 @@
+from __future__ import unicode_literals
+
import re
import hashlib
from .common import InfoExtractor
-from ..utils import determine_ext
_md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
+
class KankanIE(InfoExtractor):
_VALID_URL = r'https?://(?:.*?\.)?kankan\.com/.+?/(?P<id>\d+)\.shtml'
_TEST = {
- u'url': u'http://yinyue.kankan.com/vod/48/48863.shtml',
- u'file': u'48863.flv',
- u'md5': u'29aca1e47ae68fc28804aca89f29507e',
- u'info_dict': {
- u'title': u'Ready To Go',
+ 'url': 'http://yinyue.kankan.com/vod/48/48863.shtml',
+ 'file': '48863.flv',
+ 'md5': '29aca1e47ae68fc28804aca89f29507e',
+ 'info_dict': {
+ 'title': 'Ready To Go',
},
+ 'skip': 'Only available from China',
}
def _real_extract(self, url):
@@ -23,22 +26,23 @@ class KankanIE(InfoExtractor):
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
- title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, u'video title')
+ title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, 'video title')
surls = re.search(r'surls:\[\'.+?\'\]|lurl:\'.+?\.flv\'', webpage).group(0)
gcids = re.findall(r"http://.+?/.+?/(.+?)/", surls)
gcid = gcids[-1]
- video_info_page = self._download_webpage('http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid,
- video_id, u'Downloading video url info')
- ip = self._search_regex(r'ip:"(.+?)"', video_info_page, u'video url ip')
- path = self._search_regex(r'path:"(.+?)"', video_info_page, u'video url path')
- param1 = self._search_regex(r'param1:(\d+)', video_info_page, u'param1')
- param2 = self._search_regex(r'param2:(\d+)', video_info_page, u'param2')
+ info_url = 'http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid
+ video_info_page = self._download_webpage(
+ info_url, video_id, 'Downloading video url info')
+ ip = self._search_regex(r'ip:"(.+?)"', video_info_page, 'video url ip')
+ path = self._search_regex(r'path:"(.+?)"', video_info_page, 'video url path')
+ param1 = self._search_regex(r'param1:(\d+)', video_info_page, 'param1')
+ param2 = self._search_regex(r'param2:(\d+)', video_info_page, 'param2')
key = _md5('xl_mp43651' + param1 + param2)
video_url = 'http://%s%s?key=%s&key1=%s' % (ip, path, key, param2)
- return {'id': video_id,
- 'title': title,
- 'url': video_url,
- 'ext': determine_ext(video_url),
- }
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': video_url,
+ }
diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py
index 7c54ea0f4..f3356db50 100644
--- a/youtube_dl/extractor/mixcloud.py
+++ b/youtube_dl/extractor/mixcloud.py
@@ -1,4 +1,5 @@
-import json
+from __future__ import unicode_literals
+
import re
from .common import InfoExtractor
@@ -10,17 +11,17 @@ from ..utils import (
class MixcloudIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)'
- IE_NAME = u'mixcloud'
+ IE_NAME = 'mixcloud'
_TEST = {
- u'url': u'http://www.mixcloud.com/dholbach/cryptkeeper/',
- u'file': u'dholbach-cryptkeeper.mp3',
- u'info_dict': {
- u'title': u'Cryptkeeper',
- u'description': u'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
- u'uploader': u'Daniel Holbach',
- u'uploader_id': u'dholbach',
- u'upload_date': u'20111115',
+ 'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/',
+ 'file': 'dholbach-cryptkeeper.mp3',
+ 'info_dict': {
+ 'title': 'Cryptkeeper',
+ 'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
+ 'uploader': 'Daniel Holbach',
+ 'uploader_id': 'dholbach',
+ 'upload_date': '20111115',
},
}
@@ -42,17 +43,18 @@ class MixcloudIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
-
uploader = mobj.group(1)
cloudcast_name = mobj.group(2)
track_id = '-'.join((uploader, cloudcast_name))
- api_url = 'http://api.mixcloud.com/%s/%s/' % (uploader, cloudcast_name)
+
webpage = self._download_webpage(url, track_id)
- json_data = self._download_webpage(api_url, track_id,
- u'Downloading cloudcast info')
- info = json.loads(json_data)
- preview_url = self._search_regex(r'data-preview-url="(.+?)"', webpage, u'preview url')
+ api_url = 'http://api.mixcloud.com/%s/%s/' % (uploader, cloudcast_name)
+ info = self._download_json(
+ api_url, track_id, 'Downloading cloudcast info')
+
+ preview_url = self._search_regex(
+ r'\s(?:data-preview-url|m-preview)="(.+?)"', webpage, 'preview url')
song_url = preview_url.replace('/previews/', '/c/originals/')
template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
final_song_url = self._get_url(template_url)
diff --git a/youtube_dl/extractor/mpora.py b/youtube_dl/extractor/mpora.py
index 0836243ea..6a8e2cc44 100644
--- a/youtube_dl/extractor/mpora.py
+++ b/youtube_dl/extractor/mpora.py
@@ -34,7 +34,7 @@ class MporaIE(InfoExtractor):
data = json.loads(data_json)
- uploader = data['info_overlay']['name']
+ uploader = data['info_overlay'].get('username')
duration = data['video']['duration'] // 1000
thumbnail = data['video']['encodings']['sd']['poster']
title = data['info_overlay']['title']
diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py
index c2254ae8a..5c4cd2068 100644
--- a/youtube_dl/extractor/redtube.py
+++ b/youtube_dl/extractor/redtube.py
@@ -4,7 +4,7 @@ from .common import InfoExtractor
class RedTubeIE(InfoExtractor):
- _VALID_URL = r'(?:http://)?(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
+ _VALID_URL = r'http://(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
_TEST = {
u'url': u'http://www.redtube.com/66418',
u'file': u'66418.mp4',
diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py
index 951e977bd..393b5f17c 100644
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -1,4 +1,6 @@
# encoding: utf-8
+from __future__ import unicode_literals
+
import json
import re
import itertools
@@ -32,58 +34,58 @@ class SoundcloudIE(InfoExtractor):
|(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*)
)
'''
- IE_NAME = u'soundcloud'
+ IE_NAME = 'soundcloud'
_TESTS = [
{
- u'url': u'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
- u'file': u'62986583.mp3',
- u'md5': u'ebef0a451b909710ed1d7787dddbf0d7',
- u'info_dict': {
- u"upload_date": u"20121011",
- u"description": u"No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd",
- u"uploader": u"E.T. ExTerrestrial Music",
- u"title": u"Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1"
+ 'url': 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
+ 'file': '62986583.mp3',
+ 'md5': 'ebef0a451b909710ed1d7787dddbf0d7',
+ 'info_dict': {
+ "upload_date": "20121011",
+ "description": "No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd",
+ "uploader": "E.T. ExTerrestrial Music",
+ "title": "Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1"
}
},
# not streamable song
{
- u'url': u'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
- u'info_dict': {
- u'id': u'47127627',
- u'ext': u'mp3',
- u'title': u'Goldrushed',
- u'uploader': u'The Royal Concept',
- u'upload_date': u'20120521',
+ 'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
+ 'info_dict': {
+ 'id': '47127627',
+ 'ext': 'mp3',
+ 'title': 'Goldrushed',
+ 'uploader': 'The Royal Concept',
+ 'upload_date': '20120521',
},
- u'params': {
+ 'params': {
# rtmp
- u'skip_download': True,
+ 'skip_download': True,
},
},
# private link
{
- u'url': u'https://soundcloud.com/jaimemf/youtube-dl-test-video-a-y-baw/s-8Pjrp',
- u'md5': u'aa0dd32bfea9b0c5ef4f02aacd080604',
- u'info_dict': {
- u'id': u'123998367',
- u'ext': u'mp3',
- u'title': u'Youtube - Dl Test Video \'\' Ä↭',
- u'uploader': u'jaimeMF',
- u'description': u'test chars: \"\'/\\ä↭',
- u'upload_date': u'20131209',
+ 'url': 'https://soundcloud.com/jaimemf/youtube-dl-test-video-a-y-baw/s-8Pjrp',
+ 'md5': 'aa0dd32bfea9b0c5ef4f02aacd080604',
+ 'info_dict': {
+ 'id': '123998367',
+ 'ext': 'mp3',
+ 'title': 'Youtube - Dl Test Video \'\' Ä↭',
+ 'uploader': 'jaimeMF',
+ 'description': 'test chars: \"\'/\\ä↭',
+ 'upload_date': '20131209',
},
},
# downloadable song
{
- u'url': u'https://soundcloud.com/simgretina/just-your-problem-baby-1',
- u'md5': u'56a8b69568acaa967b4c49f9d1d52d19',
- u'info_dict': {
- u'id': u'105614606',
- u'ext': u'wav',
- u'title': u'Just Your Problem Baby (Acapella)',
- u'description': u'Vocals',
- u'uploader': u'Sim Gretina',
- u'upload_date': u'20130815',
+ 'url': 'https://soundcloud.com/simgretina/just-your-problem-baby-1',
+ 'md5': '56a8b69568acaa967b4c49f9d1d52d19',
+ 'info_dict': {
+ 'id': '105614606',
+ 'ext': 'wav',
+ 'title': 'Just Your Problem Baby (Acapella)',
+ 'description': 'Vocals',
+ 'uploader': 'Sim Gretina',
+ 'upload_date': '20130815',
},
},
]
@@ -112,7 +114,7 @@ class SoundcloudIE(InfoExtractor):
thumbnail = info['artwork_url']
if thumbnail is not None:
thumbnail = thumbnail.replace('-large', '-t500x500')
- ext = u'mp3'
+ ext = 'mp3'
result = {
'id': track_id,
'uploader': info['user']['username'],
@@ -124,11 +126,11 @@ class SoundcloudIE(InfoExtractor):
if info.get('downloadable', False):
# We can build a direct link to the song
format_url = (
- u'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(
+ 'https://api.soundcloud.com/tracks/{0}/download?client_id={1}'.format(
track_id, self._CLIENT_ID))
result['formats'] = [{
'format_id': 'download',
- 'ext': info.get('original_format', u'mp3'),
+ 'ext': info.get('original_format', 'mp3'),
'url': format_url,
'vcodec': 'none',
}]
@@ -138,7 +140,7 @@ class SoundcloudIE(InfoExtractor):
'client_id={1}&secret_token={2}'.format(track_id, self._IPHONE_CLIENT_ID, secret_token))
stream_json = self._download_webpage(
streams_url,
- track_id, u'Downloading track url')
+ track_id, 'Downloading track url')
formats = []
format_dict = json.loads(stream_json)
@@ -165,20 +167,19 @@ class SoundcloudIE(InfoExtractor):
# We fallback to the stream_url in the original info, this
# cannot be always used, sometimes it can give an HTTP 404 error
formats.append({
- 'format_id': u'fallback',
+ 'format_id': 'fallback',
'url': info['stream_url'] + '?client_id=' + self._CLIENT_ID,
'ext': ext,
'vcodec': 'none',
})
- def format_pref(f):
+ for f in formats:
if f['format_id'].startswith('http'):
- return 2
+ f['protocol'] = 'http'
if f['format_id'].startswith('rtmp'):
- return 1
- return 0
+ f['protocol'] = 'rtmp'
- formats.sort(key=format_pref)
+ self._sort_formats(formats)
result['formats'] = formats
return result
@@ -210,14 +211,14 @@ class SoundcloudIE(InfoExtractor):
url = 'http://soundcloud.com/%s' % resolve_title
info_json_url = self._resolv_url(url)
- info_json = self._download_webpage(info_json_url, full_title, u'Downloading info JSON')
+ info_json = self._download_webpage(info_json_url, full_title, 'Downloading info JSON')
info = json.loads(info_json)
return self._extract_info_dict(info, full_title, secret_token=token)
class SoundcloudSetIE(SoundcloudIE):
_VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)(?:[?].*)?$'
- IE_NAME = u'soundcloud:set'
+ IE_NAME = 'soundcloud:set'
# it's in tests/test_playlists.py
_TESTS = []
@@ -254,7 +255,7 @@ class SoundcloudSetIE(SoundcloudIE):
class SoundcloudUserIE(SoundcloudIE):
_VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)(/?(tracks/)?)?(\?.*)?$'
- IE_NAME = u'soundcloud:user'
+ IE_NAME = 'soundcloud:user'
# it's in tests/test_playlists.py
_TESTS = []
@@ -266,7 +267,7 @@ class SoundcloudUserIE(SoundcloudIE):
url = 'http://soundcloud.com/%s/' % uploader
resolv_url = self._resolv_url(url)
user_json = self._download_webpage(resolv_url, uploader,
- u'Downloading user info')
+ 'Downloading user info')
user = json.loads(user_json)
tracks = []
@@ -276,7 +277,7 @@ class SoundcloudUserIE(SoundcloudIE):
})
tracks_url = 'http://api.soundcloud.com/users/%s/tracks.json?' % user['id'] + data
response = self._download_webpage(tracks_url, uploader,
- u'Downloading tracks page %s' % (i+1))
+ 'Downloading tracks page %s' % (i+1))
new_tracks = json.loads(response)
tracks.extend(self._extract_info_dict(track, quiet=True) for track in new_tracks)
if len(new_tracks) < 50:
diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py
index 9e2ad0d99..3362b3db8 100644
--- a/youtube_dl/extractor/spankwire.py
+++ b/youtube_dl/extractor/spankwire.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
import os
import re
@@ -11,17 +13,18 @@ from ..aes import (
aes_decrypt_text
)
+
class SpankwireIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<videoid>[0-9]+)/?)'
_TEST = {
- u'url': u'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/',
- u'file': u'103545.mp4',
- u'md5': u'1b3f55e345500552dbc252a3e9c1af43',
- u'info_dict': {
- u"uploader": u"oreusz",
- u"title": u"Buckcherry`s X Rated Music Video Crazy Bitch",
- u"description": u"Crazy Bitch X rated music video.",
- u"age_limit": 18,
+ 'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/',
+ 'file': '103545.mp4',
+ 'md5': '1b3f55e345500552dbc252a3e9c1af43',
+ 'info_dict': {
+ "uploader": "oreusz",
+ "title": "Buckcherry`s X Rated Music Video Crazy Bitch",
+ "description": "Crazy Bitch X rated music video.",
+ "age_limit": 18,
}
}
@@ -34,17 +37,17 @@ class SpankwireIE(InfoExtractor):
req.add_header('Cookie', 'age_verified=1')
webpage = self._download_webpage(req, video_id)
- video_title = self._html_search_regex(r'<h1>([^<]+)', webpage, u'title')
+ video_title = self._html_search_regex(r'<h1>([^<]+)', webpage, 'title')
video_uploader = self._html_search_regex(
- r'by:\s*<a [^>]*>(.+?)</a>', webpage, u'uploader', fatal=False)
+ r'by:\s*<a [^>]*>(.+?)</a>', webpage, 'uploader', fatal=False)
thumbnail = self._html_search_regex(
- r'flashvars\.image_url = "([^"]+)', webpage, u'thumbnail', fatal=False)
+ r'flashvars\.image_url = "([^"]+)', webpage, 'thumbnail', fatal=False)
description = self._html_search_regex(
- r'<div\s+id="descriptionContent">([^<]+)<', webpage, u'description', fatal=False)
+ r'<div\s+id="descriptionContent">([^<]+)<', webpage, 'description', fatal=False)
video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'flashvars\.quality_[0-9]{3}p = "([^"]+)', webpage)))
if webpage.find('flashvars\.encrypted = "true"') != -1:
- password = self._html_search_regex(r'flashvars\.video_title = "([^"]+)', webpage, u'password').replace('+', ' ')
+ password = self._html_search_regex(r'flashvars\.video_title = "([^"]+)', webpage, 'password').replace('+', ' ')
video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))
formats = []
@@ -52,14 +55,21 @@ class SpankwireIE(InfoExtractor):
path = compat_urllib_parse_urlparse(video_url).path
extension = os.path.splitext(path)[1][1:]
format = path.split('/')[4].split('_')[:2]
+ resolution, bitrate_str = format
format = "-".join(format)
+ height = int(resolution.rstrip('P'))
+ tbr = int(bitrate_str.rstrip('K'))
+
formats.append({
'url': video_url,
'ext': extension,
+ 'resolution': resolution,
'format': format,
+ 'tbr': tbr,
+ 'height': height,
'format_id': format,
})
- formats.sort(key=lambda format: list(map(lambda s: s.zfill(6), format['format'].split('-'))))
+ self._sort_formats(formats)
age_limit = self._rta_search(webpage)
diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py
index 2bf26d056..9dcffead0 100644
--- a/youtube_dl/extractor/teamcoco.py
+++ b/youtube_dl/extractor/teamcoco.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
import re
from .common import InfoExtractor
@@ -9,61 +11,66 @@ from ..utils import (
class TeamcocoIE(InfoExtractor):
_VALID_URL = r'http://teamcoco\.com/video/(?P<url_title>.*)'
_TEST = {
- u'url': u'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
- u'file': u'19705.mp4',
- u'md5': u'cde9ba0fa3506f5f017ce11ead928f9a',
- u'info_dict': {
- u"description": u"Louis C.K. got starstruck by George W. Bush, so what? Part one.",
- u"title": u"Louis C.K. Interview Pt. 1 11/3/11"
+ 'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
+ 'file': '19705.mp4',
+ 'md5': 'cde9ba0fa3506f5f017ce11ead928f9a',
+ 'info_dict': {
+ "description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.",
+ "title": "Louis C.K. Interview Pt. 1 11/3/11"
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
- raise ExtractorError(u'Invalid URL: %s' % url)
+ raise ExtractorError('Invalid URL: %s' % url)
url_title = mobj.group('url_title')
webpage = self._download_webpage(url, url_title)
- video_id = self._html_search_regex(r'<article class="video" data-id="(\d+?)"',
- webpage, u'video id')
+ video_id = self._html_search_regex(
+ r'<article class="video" data-id="(\d+?)"',
+ webpage, 'video id')
self.report_extraction(video_id)
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
data = self._download_xml(data_url, video_id, 'Downloading data webpage')
-
qualities = ['500k', '480p', '1000k', '720p', '1080p']
formats = []
- for file in data.findall('files/file'):
- if file.attrib.get('playmode') == 'all':
+ for filed in data.findall('files/file'):
+ if filed.attrib.get('playmode') == 'all':
# it just duplicates one of the entries
break
- file_url = file.text
+ file_url = filed.text
m_format = re.search(r'(\d+(k|p))\.mp4', file_url)
if m_format is not None:
format_id = m_format.group(1)
else:
- format_id = file.attrib['bitrate']
+ format_id = filed.attrib['bitrate']
+ tbr = (
+ int(filed.attrib['bitrate'])
+ if filed.attrib['bitrate'].isdigit()
+ else None)
+
+ try:
+ quality = qualities.index(format_id)
+ except ValueError:
+ quality = -1
formats.append({
'url': file_url,
'ext': 'mp4',
+ 'tbr': tbr,
'format_id': format_id,
+ 'quality': quality,
})
- def sort_key(f):
- try:
- return qualities.index(f['format_id'])
- except ValueError:
- return -1
- formats.sort(key=sort_key)
- if not formats:
- raise ExtractorError(u'Unable to extract video URL')
+
+ self._sort_formats(formats)
return {
- 'id': video_id,
+ 'id': video_id,
'formats': formats,
- 'title': self._og_search_title(webpage),
- 'thumbnail': self._og_search_thumbnail(webpage),
+ 'title': self._og_search_title(webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage),
'description': self._og_search_description(webpage),
}
diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py
index 4bca62ba0..8b31caa92 100644
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
import json
import re
@@ -7,6 +9,7 @@ from ..utils import (
RegexNotFoundError,
)
+
class TEDIE(SubtitlesInfoExtractor):
_VALID_URL=r'''http://www\.ted\.com/
(
@@ -18,12 +21,12 @@ class TEDIE(SubtitlesInfoExtractor):
/(?P<name>\w+) # Here goes the name and then ".html"
'''
_TEST = {
- u'url': u'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
- u'file': u'102.mp4',
- u'md5': u'2d76ee1576672e0bd8f187513267adf6',
- u'info_dict': {
- u"description": u"md5:c6fa72e6eedbd938c9caf6b2702f5922",
- u"title": u"Dan Dennett: The illusion of consciousness"
+ 'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
+ 'file': '102.mp4',
+ 'md5': '4ea1dada91e4174b53dac2bb8ace429d',
+ 'info_dict': {
+ "description": "md5:c6fa72e6eedbd938c9caf6b2702f5922",
+ "title": "Dan Dennett: The illusion of consciousness"
}
}
@@ -47,7 +50,7 @@ class TEDIE(SubtitlesInfoExtractor):
'''Returns the videos of the playlist'''
webpage = self._download_webpage(
- url, playlist_id, u'Downloading playlist webpage')
+ url, playlist_id, 'Downloading playlist webpage')
matches = re.finditer(
r'<p\s+class="talk-title[^"]*"><a\s+href="(?P<talk_url>/talks/[^"]+\.html)">[^<]*</a></p>',
webpage)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 28c88ffc7..bf3fde610 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -131,6 +131,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
(
(?:https?://|//)? # http(s):// or protocol-independent URL (optional)
(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
+ (?:www\.)?deturl\.com/www\.youtube\.com/|
+ (?:www\.)?pwnyoutube\.com|
tube\.majestyc\.net/|
youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
(?:.*?\#/)? # handle anchor (#/) redirect urls
diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py
index 8c19ed7fa..c22f2cdc6 100644
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -479,6 +479,7 @@ class FFmpegMergerPP(FFmpegPostProcessor):
def run(self, info):
filename = info['filepath']
args = ['-c', 'copy']
+ self._downloader.to_screen(u'[ffmpeg] Merging formats into "%s"' % filename)
self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args)
return True, info
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index d1233be65..b9c25c4a9 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
-__version__ = '2014.01.08'
+__version__ = '2014.01.17.2'