diff options
25 files changed, 423 insertions, 131 deletions
@@ -77,6 +77,6 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash- --exclude 'docs/_build' \ -- \ bin devscripts test youtube_dl docs \ - CHANGELOG LICENSE README.md README.txt \ + LICENSE README.md README.txt \ Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion setup.py \ youtube-dl diff --git a/devscripts/release.sh b/devscripts/release.sh index 2974a7c3e..453087e5f 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -45,9 +45,9 @@ fi /bin/echo -e "\n### Changing version in version.py..." sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py -/bin/echo -e "\n### Committing CHANGELOG README.md and youtube_dl/version.py..." +/bin/echo -e "\n### Committing README.md and youtube_dl/version.py..." make README.md -git add CHANGELOG README.md youtube_dl/version.py +git add README.md youtube_dl/version.py git commit -m "release $version" /bin/echo -e "\n### Now tagging, signing and pushing..." diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 8735013f7..e794cc97f 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -67,7 +67,7 @@ class TestFormatSelection(unittest.TestCase): downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['ext'], 'mp4') - # No prefer_free_formats => prefer mp4 and flv for greater compatibilty + # No prefer_free_formats => prefer mp4 and flv for greater compatibility ydl = YDL() ydl.params['prefer_free_formats'] = False formats = [ @@ -279,7 +279,7 @@ class TestFormatSelection(unittest.TestCase): self.assertEqual(ydl._format_note({}), '') assertRegexpMatches(self, ydl._format_note({ 'vbr': 10, - }), '^x\s*10k$') + }), '^\s*10k$') if __name__ == '__main__': unittest.main() diff --git a/test/test_playlists.py b/test/test_playlists.py index cc871698a..057ce43f0 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -209,20 +209,20 @@ class TestPlaylists(unittest.TestCase): def test_ivi_compilation(self): dl = FakeYDL() ie = IviCompilationIE(dl) - result = ie.extract('http://www.ivi.ru/watch/dezhurnyi_angel') + result = ie.extract('http://www.ivi.ru/watch/dvoe_iz_lartsa') self.assertIsPlaylist(result) - self.assertEqual(result['id'], 'dezhurnyi_angel') - self.assertEqual(result['title'], 'Дежурный ангел (2010 - 2012)') - self.assertTrue(len(result['entries']) >= 23) + self.assertEqual(result['id'], 'dvoe_iz_lartsa') + self.assertEqual(result['title'], 'Двое из ларца (2006 - 2008)') + self.assertTrue(len(result['entries']) >= 24) def test_ivi_compilation_season(self): dl = FakeYDL() ie = IviCompilationIE(dl) - result = ie.extract('http://www.ivi.ru/watch/dezhurnyi_angel/season2') + result = ie.extract('http://www.ivi.ru/watch/dvoe_iz_lartsa/season1') self.assertIsPlaylist(result) - self.assertEqual(result['id'], 'dezhurnyi_angel/season2') - self.assertEqual(result['title'], 'Дежурный ангел (2010 - 2012) 2 сезон') - self.assertTrue(len(result['entries']) >= 7) + self.assertEqual(result['id'], 'dvoe_iz_lartsa/season1') + self.assertEqual(result['title'], 'Двое из ларца (2006 - 2008) 1 сезон') + self.assertTrue(len(result['entries']) >= 12) def test_imdb_list(self): dl = FakeYDL() diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 3503c76b7..def58f1d6 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -194,7 +194,10 @@ from .normalboots import NormalbootsIE from .novamov import NovaMovIE from .nowness import NownessIE from .nowvideo import NowVideoIE -from .nrk import NRKIE +from .nrk import ( + NRKIE, + NRKTVIE, +) from .ntv import NTVIE from .nytimes import NYTimesIE from .nuvid import NuvidIE @@ -260,6 +263,7 @@ from .stanfordoc import StanfordOpenClassroomIE from .steam import SteamIE from .streamcloud import StreamcloudIE from .streamcz import StreamCZIE +from .swrmediathek import SWRMediathekIE from .syfy import SyfyIE from .sztvhu import SztvHuIE from .teamcoco import TeamcocoIE diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index b88f71bc4..a87b32b22 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -38,7 +38,9 @@ class ARDIE(InfoExtractor): webpage = self._download_webpage(url, video_id) title = self._html_search_regex( - r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>', webpage, 'title') + [r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>', + r'<h4 class="headline">(.*?)</h4>'], + webpage, 'title') description = self._html_search_meta( 'dcterms.abstract', webpage, 'description') thumbnail = self._og_search_thumbnail(webpage) diff --git a/youtube_dl/extractor/cinemassacre.py b/youtube_dl/extractor/cinemassacre.py index 2301f61b6..496271be4 100644 --- a/youtube_dl/extractor/cinemassacre.py +++ b/youtube_dl/extractor/cinemassacre.py @@ -1,10 +1,12 @@ # encoding: utf-8 from __future__ import unicode_literals + import re from .common import InfoExtractor from ..utils import ( ExtractorError, + int_or_none, ) @@ -13,9 +15,10 @@ class CinemassacreIE(InfoExtractor): _TESTS = [ { 'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/', - 'file': '19911.mp4', - 'md5': '782f8504ca95a0eba8fc9177c373eec7', + 'md5': 'fde81fbafaee331785f58cd6c0d46190', 'info_dict': { + 'id': '19911', + 'ext': 'mp4', 'upload_date': '20121110', 'title': '“Angry Video Game Nerd: The Movie” – Trailer', 'description': 'md5:fb87405fcb42a331742a0dce2708560b', @@ -23,9 +26,10 @@ class CinemassacreIE(InfoExtractor): }, { 'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940', - 'file': '521be8ef82b16.mp4', - 'md5': 'dec39ee5118f8d9cc067f45f9cbe3a35', + 'md5': 'd72f10cd39eac4215048f62ab477a511', 'info_dict': { + 'id': '521be8ef82b16', + 'ext': 'mp4', 'upload_date': '20131002', 'title': 'The Mummy’s Hand (1940)', }, @@ -50,29 +54,40 @@ class CinemassacreIE(InfoExtractor): r'<div class="entry-content">(?P<description>.+?)</div>', webpage, 'description', flags=re.DOTALL, fatal=False) - playerdata = self._download_webpage(playerdata_url, video_id) + playerdata = self._download_webpage(playerdata_url, video_id, 'Downloading player webpage') + video_thumbnail = self._search_regex( + r'image: \'(?P<thumbnail>[^\']+)\'', playerdata, 'thumbnail', fatal=False) + sd_url = self._search_regex(r'file: \'([^\']+)\', label: \'SD\'', playerdata, 'sd_file') + videolist_url = self._search_regex(r'file: \'([^\']+\.smil)\'}', playerdata, 'videolist_url') - sd_url = self._html_search_regex(r'file: \'([^\']+)\', label: \'SD\'', playerdata, 'sd_file') - hd_url = self._html_search_regex( - r'file: \'([^\']+)\', label: \'HD\'', playerdata, 'hd_file', - default=None) - video_thumbnail = self._html_search_regex(r'image: \'(?P<thumbnail>[^\']+)\'', playerdata, 'thumbnail', fatal=False) + videolist = self._download_xml(videolist_url, video_id, 'Downloading videolist XML') - formats = [{ - 'url': sd_url, - 'ext': 'mp4', - 'format': 'sd', - 'format_id': 'sd', - 'quality': 1, - }] - if hd_url: - formats.append({ - 'url': hd_url, - 'ext': 'mp4', - 'format': 'hd', - 'format_id': 'hd', - 'quality': 2, - }) + formats = [] + baseurl = sd_url[:sd_url.rfind('/')+1] + for video in videolist.findall('.//video'): + src = video.get('src') + if not src: + continue + file_ = src.partition(':')[-1] + width = int_or_none(video.get('width')) + height = int_or_none(video.get('height')) + bitrate = int_or_none(video.get('system-bitrate')) + format = { + 'url': baseurl + file_, + 'format_id': src.rpartition('.')[0].rpartition('_')[-1], + } + if width or height: + format.update({ + 'tbr': bitrate // 1000 if bitrate else None, + 'width': width, + 'height': height, + }) + else: + format.update({ + 'abr': bitrate // 1000 if bitrate else None, + 'vcodec': 'none', + }) + formats.append(format) self._sort_formats(formats) return { diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 6e3a316c6..ba4d73ab8 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -188,7 +188,7 @@ class ComedyCentralShowsIE(InfoExtractor): }) formats.append({ 'format_id': 'rtmp-%s' % format, - 'url': rtmp_video_url, + 'url': rtmp_video_url.replace('viacomccstrm', 'viacommtvstrm'), 'ext': self._video_extensions.get(format, 'mp4'), 'height': h, 'width': w, diff --git a/youtube_dl/extractor/empflix.py b/youtube_dl/extractor/empflix.py index eaeee5a51..e6952588f 100644 --- a/youtube_dl/extractor/empflix.py +++ b/youtube_dl/extractor/empflix.py @@ -3,20 +3,18 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( - ExtractorError, -) class EmpflixIE(InfoExtractor): _VALID_URL = r'^https?://www\.empflix\.com/videos/.*?-(?P<id>[0-9]+)\.html' _TEST = { 'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html', - 'md5': '5e5cc160f38ca9857f318eb97146e13e', + 'md5': 'b1bc15b6412d33902d6e5952035fcabc', 'info_dict': { 'id': '33051', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Amateur Finger Fuck', + 'description': 'Amateur solo finger fucking.', 'age_limit': 18, } } @@ -30,6 +28,8 @@ class EmpflixIE(InfoExtractor): video_title = self._html_search_regex( r'name="title" value="(?P<title>[^"]*)"', webpage, 'title') + video_description = self._html_search_regex( + r'name="description" value="([^"]*)"', webpage, 'description', fatal=False) cfg_url = self._html_search_regex( r'flashvars\.config = escape\("([^"]+)"', @@ -37,12 +37,18 @@ class EmpflixIE(InfoExtractor): cfg_xml = self._download_xml( cfg_url, video_id, note='Downloading metadata') - video_url = cfg_xml.find('videoLink').text + + formats = [ + { + 'url': item.find('videoLink').text, + 'format_id': item.find('res').text, + } for item in cfg_xml.findall('./quality/item') + ] return { 'id': video_id, - 'url': video_url, - 'ext': 'flv', 'title': video_title, + 'description': video_description, + 'formats': formats, 'age_limit': age_limit, } diff --git a/youtube_dl/extractor/extremetube.py b/youtube_dl/extractor/extremetube.py index ff7c0cd3e..14a196ffc 100644 --- a/youtube_dl/extractor/extremetube.py +++ b/youtube_dl/extractor/extremetube.py @@ -37,7 +37,7 @@ class ExtremeTubeIE(InfoExtractor): webpage = self._download_webpage(req, video_id) video_title = self._html_search_regex( - r'<h1 [^>]*?title="([^"]+)"[^>]*>\1<', webpage, 'title') + r'<h1 [^>]*?title="([^"]+)"[^>]*>', webpage, 'title') uploader = self._html_search_regex( r'>Posted by:(?=<)(?:\s|<[^>]*>)*(.+?)\|', webpage, 'uploader', fatal=False) diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py index ca8993241..18f91efac 100644 --- a/youtube_dl/extractor/fc2.py +++ b/youtube_dl/extractor/fc2.py @@ -13,7 +13,7 @@ from ..utils import ( class FC2IE(InfoExtractor): - _VALID_URL = r'^http://video\.fc2\.com/(?P<lang>[^/]+)/content/(?P<id>[^/]+)' + _VALID_URL = r'^http://video\.fc2\.com/((?P<lang>[^/]+)/)?content/(?P<id>[^/]+)' IE_NAME = 'fc2' _TEST = { 'url': 'http://video.fc2.com/en/content/20121103kUan1KHs', @@ -36,7 +36,7 @@ class FC2IE(InfoExtractor): thumbnail = self._og_search_thumbnail(webpage) refer = url.replace('/content/', '/a/content/') - mimi = hashlib.md5(video_id + '_gGddgPfeaf_gzyr').hexdigest() + mimi = hashlib.md5((video_id + '_gGddgPfeaf_gzyr').encode('utf-8')).hexdigest() info_url = ( "http://video.fc2.com/ginfo.php?mimi={1:s}&href={2:s}&v={0:s}&fversion=WIN%2011%2C6%2C602%2C180&from=2&otag=0&upid={0:s}&tk=null&". diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index 1ba4966c7..528be1524 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -33,14 +33,14 @@ class IviIE(InfoExtractor): }, # Serial's serie { - 'url': 'http://www.ivi.ru/watch/dezhurnyi_angel/74791', - 'md5': '3e6cc9a848c1d2ebcc6476444967baa9', + 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/9549', + 'md5': '221f56b35e3ed815fde2df71032f4b3e', 'info_dict': { - 'id': '74791', + 'id': '9549', 'ext': 'mp4', - 'title': 'Дежурный ангел - 1 серия', - 'duration': 2490, - 'thumbnail': 'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg', + 'title': 'Двое из ларца - Серия 1', + 'duration': 2655, + 'thumbnail': 'http://thumbs.ivi.ru/f15.vcp.digitalaccess.ru/contents/8/4/0068dc0677041f3336b7c2baad8fc0.jpg', }, 'skip': 'Only works from Russia', } diff --git a/youtube_dl/extractor/mailru.py b/youtube_dl/extractor/mailru.py index 5016989cc..7460d81cd 100644 --- a/youtube_dl/extractor/mailru.py +++ b/youtube_dl/extractor/mailru.py @@ -9,29 +9,48 @@ from .common import InfoExtractor class MailRuIE(InfoExtractor): IE_NAME = 'mailru' IE_DESC = 'Видео@Mail.Ru' - _VALID_URL = r'http://(?:www\.)?my\.mail\.ru/video/.*#video=/?(?P<id>[^/]+/[^/]+/[^/]+/\d+)' + _VALID_URL = r'http://(?:www\.)?my\.mail\.ru/(?:video/.*#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|(?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html)' - _TEST = { - 'url': 'http://my.mail.ru/video/top#video=/mail/sonypicturesrus/75/76', - 'md5': 'dea205f03120046894db4ebb6159879a', - 'info_dict': { - 'id': '46301138', - 'ext': 'mp4', - 'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро', - 'timestamp': 1393232740, - 'upload_date': '20140224', - 'uploader': 'sonypicturesrus', - 'uploader_id': 'sonypicturesrus@mail.ru', - 'duration': 184, - } - } + _TESTS = [ + { + 'url': 'http://my.mail.ru/video/top#video=/mail/sonypicturesrus/75/76', + 'md5': 'dea205f03120046894db4ebb6159879a', + 'info_dict': { + 'id': '46301138', + 'ext': 'mp4', + 'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро', + 'timestamp': 1393232740, + 'upload_date': '20140224', + 'uploader': 'sonypicturesrus', + 'uploader_id': 'sonypicturesrus@mail.ru', + 'duration': 184, + }, + }, + { + 'url': 'http://my.mail.ru/corp/hitech/video/news_hi-tech_mail_ru/1263.html', + 'md5': '00a91a58c3402204dcced523777b475f', + 'info_dict': { + 'id': '46843144', + 'ext': 'mp4', + 'title': 'Samsung Galaxy S5 Hammer Smash Fail Battery Explosion', + 'timestamp': 1397217632, + 'upload_date': '20140411', + 'uploader': 'hitech', + 'uploader_id': 'hitech@corp.mail.ru', + 'duration': 245, + }, + }, + ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = mobj.group('idv1') + + if not video_id: + video_id = mobj.group('idv2prefix') + mobj.group('idv2suffix') video_data = self._download_json( - 'http://videoapi.my.mail.ru/videos/%s.json?new=1' % video_id, video_id, 'Downloading video JSON') + 'http://api.video.mail.ru/videos/%s.json?new=1' % video_id, video_id, 'Downloading video JSON') author = video_data['author'] uploader = author['name'] @@ -40,6 +59,8 @@ class MailRuIE(InfoExtractor): movie = video_data['movie'] content_id = str(movie['contentId']) title = movie['title'] + if title.endswith('.mp4'): + title = title[:-4] thumbnail = movie['poster'] duration = movie['duration'] diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 1a63ab56a..aa34665d1 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -1,6 +1,7 @@ from __future__ import unicode_literals import re +import json from .common import InfoExtractor from ..utils import find_xpath_attr, compat_str @@ -31,30 +32,68 @@ class NBCIE(InfoExtractor): class NBCNewsIE(InfoExtractor): - _VALID_URL = r'https?://www\.nbcnews\.com/video/.+?/(?P<id>\d+)' + _VALID_URL = r'''(?x)https?://www\.nbcnews\.com/ + ((video/.+?/(?P<id>\d+))| + (feature/[^/]+/(?P<title>.+))) + ''' - _TEST = { - 'url': 'http://www.nbcnews.com/video/nbc-news/52753292', - 'md5': '47abaac93c6eaf9ad37ee6c4463a5179', - 'info_dict': { - 'id': '52753292', - 'ext': 'flv', - 'title': 'Crew emerges after four-month Mars food study', - 'description': 'md5:24e632ffac72b35f8b67a12d1b6ddfc1', + _TESTS = [ + { + 'url': 'http://www.nbcnews.com/video/nbc-news/52753292', + 'md5': '47abaac93c6eaf9ad37ee6c4463a5179', + 'info_dict': { + 'id': '52753292', + 'ext': 'flv', + 'title': 'Crew emerges after four-month Mars food study', + 'description': 'md5:24e632ffac72b35f8b67a12d1b6ddfc1', + }, }, - } + { + 'url': 'http://www.nbcnews.com/feature/edward-snowden-interview/how-twitter-reacted-snowden-interview-n117236', + 'md5': 'b2421750c9f260783721d898f4c42063', + 'info_dict': { + 'id': 'I1wpAI_zmhsQ', + 'ext': 'flv', + 'title': 'How Twitter Reacted To The Snowden Interview', + 'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64', + }, + 'add_ie': ['ThePlatform'], + }, + ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id) - info = all_info.find('video') + if video_id is not None: + all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id) + info = all_info.find('video') - return { - 'id': video_id, - 'title': info.find('headline').text, - 'ext': 'flv', - 'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text, - 'description': compat_str(info.find('caption').text), - 'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text, - } + return { + 'id': video_id, + 'title': info.find('headline').text, + 'ext': 'flv', + 'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text, + 'description': compat_str(info.find('caption').text), + 'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text, + } + else: + # "feature" pages use theplatform.com + title = mobj.group('title') + webpage = self._download_webpage(url, title) + bootstrap_json = self._search_regex( + r'var bootstrapJson = ({.+})\s*$', webpage, 'bootstrap json', + flags=re.MULTILINE) + bootstrap = json.loads(bootstrap_json) + info = bootstrap['results'][0]['video'] + playlist_url = info['fallbackPlaylistUrl'] + '?form=MPXNBCNewsAPI' + mpxid = info['mpxId'] + all_videos = self._download_json(playlist_url, title)['videos'] + # The response contains additional videos + info = next(v for v in all_videos if v['mpxId'] == mpxid) + + return { + '_type': 'url', + # We get the best quality video + 'url': info['videoAssets'][-1]['publicUrl'], + 'ie_key': 'ThePlatform', + } diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py index 53b34f5e6..3d6096e46 100644 --- a/youtube_dl/extractor/ndr.py +++ b/youtube_dl/extractor/ndr.py @@ -7,6 +7,7 @@ from .common import InfoExtractor from ..utils import ( ExtractorError, int_or_none, + qualities, ) @@ -57,7 +58,7 @@ class NDRIE(InfoExtractor): formats = [] - mp3_url = re.search(r'''{src:'(?P<audio>[^']+)', type:"audio/mp3"},''', page) + mp3_url = re.search(r'''\{src:'(?P<audio>[^']+)', type:"audio/mp3"},''', page) if mp3_url: formats.append({ 'url': mp3_url.group('audio'), @@ -66,15 +67,15 @@ class NDRIE(InfoExtractor): thumbnail = None - video_url = re.search(r'''3: {src:'(?P<video>.+?)\.hi\.mp4', type:"video/mp4"},''', page) + video_url = re.search(r'''3: \{src:'(?P<video>.+?)\.hi\.mp4', type:"video/mp4"},''', page) if video_url: - thumbnails = re.findall(r'''\d+: {src: "([^"]+)"(?: \|\| '[^']+')?, quality: '([^']+)'}''', page) + thumbnails = re.findall(r'''\d+: \{src: "([^"]+)"(?: \|\| '[^']+')?, quality: '([^']+)'}''', page) if thumbnails: - QUALITIES = ['xs', 's', 'm', 'l', 'xl'] - thumbnails.sort(key=lambda thumb: QUALITIES.index(thumb[1]) if thumb[1] in QUALITIES else -1) - thumbnail = 'http://www.ndr.de' + thumbnails[-1][0] + quality_key = qualities(['xs', 's', 'm', 'l', 'xl']) + largest = max(thumbnails, key=lambda thumb: quality_key(thumb[1])) + thumbnail = 'http://www.ndr.de' + largest[0] - for format_id in ['lo', 'hi', 'hq']: + for format_id in 'lo', 'hi', 'hq': formats.append({ 'url': '%s.%s.mp4' % (video_url.group('video'), format_id), 'format_id': format_id, diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index e6d68b836..3a6a7883e 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -4,7 +4,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ExtractorError +from ..utils import ( + ExtractorError, + int_or_none, + unified_strdate, +) class NRKIE(InfoExtractor): @@ -64,4 +68,78 @@ class NRKIE(InfoExtractor): 'title': data['title'], 'description': data['description'], 'thumbnail': thumbnail, + } + + +class NRKTVIE(InfoExtractor): + _VALID_URL = r'http://tv\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-z]{4}\d{8})' + + _TESTS = [ + { + 'url': 'http://tv.nrk.no/serie/20-spoersmaal-tv/muhh48000314/23-05-2014', + 'md5': '7b96112fbae1faf09a6f9ae1aff6cb84', + 'info_dict': { + 'id': 'muhh48000314', + 'ext': 'flv', + 'title': '20 spørsmål', + 'description': 'md5:bdea103bc35494c143c6a9acdd84887a', + 'upload_date': '20140523', + 'duration': 1741.52, + } + }, + { + 'url': 'http://tv.nrk.no/program/mdfp15000514', + 'md5': '383650ece2b25ecec996ad7b5bb2a384', + 'info_dict': { + 'id': 'mdfp15000514', + 'ext': 'flv', + 'title': 'Kunnskapskanalen: Grunnlovsjubiléet - Stor ståhei for ingenting', + 'description': 'md5:654c12511f035aed1e42bdf5db3b206a', + 'upload_date': '20140524', + 'duration': 4605.0, + } + }, + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + page = self._download_webpage(url, video_id) + + title = self._html_search_meta('title', page, 'title') + description = self._html_search_meta('description', page, 'description') + thumbnail = self._html_search_regex(r'data-posterimage="([^"]+)"', page, 'thumbnail', fatal=False) + upload_date = unified_strdate(self._html_search_meta('rightsfrom', page, 'upload date', fatal=False)) + duration = self._html_search_regex(r'data-duration="([^"]+)"', page, 'duration', fatal=False) + if duration: + duration = float(duration) + + formats = [] + + f4m_url = re.search(r'data-media="([^"]+)"', page) + if f4m_url: + formats.append({ + 'url': f4m_url.group(1) + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', + 'format_id': 'f4m', + 'ext': 'flv', + }) + + m3u8_url = re.search(r'data-hls-media="([^"]+)"', page) + if m3u8_url: + formats.append({ + 'url': m3u8_url.group(1), + 'format_id': 'm3u8', + }) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'upload_date': upload_date, + 'duration': duration, + 'formats': formats, }
\ No newline at end of file diff --git a/youtube_dl/extractor/nuvid.py b/youtube_dl/extractor/nuvid.py index f0befa116..e3db9fe8c 100644 --- a/youtube_dl/extractor/nuvid.py +++ b/youtube_dl/extractor/nuvid.py @@ -30,7 +30,7 @@ class NuvidIE(InfoExtractor): webpage, 'title').strip() url_end = self._html_search_regex( - r'href="(/mp4/[^"]+)"[^>]*data-link_type="mp4"', + r'href="(/[^"]+)"[^>]*data-link_type="mp4"', webpage, 'video_url') video_url = 'http://m.nuvid.com' + url_end diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 7dd3dca0d..4118ee956 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -45,7 +45,7 @@ class PornHubIE(InfoExtractor): video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title') video_uploader = self._html_search_regex( - r'(?s)<div class="video-info-row">\s*From: .+?<(?:a href="/users/|<span class="username)[^>]+>(.+?)<', + r'(?s)From: .+?<(?:a href="/users/|<span class="username)[^>]+>(.+?)<', webpage, 'uploader', fatal=False) thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False) if thumbnail: diff --git a/youtube_dl/extractor/streamcz.py b/youtube_dl/extractor/streamcz.py index 7362904db..73efe9542 100644 --- a/youtube_dl/extractor/streamcz.py +++ b/youtube_dl/extractor/streamcz.py @@ -5,13 +5,16 @@ import re import json from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( + int_or_none, + compat_str, +) class StreamCZIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?stream\.cz/.+/(?P<videoid>.+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.stream.cz/peklonataliri/765767-ecka-pro-deti', 'md5': '6d3ca61a8d0633c9c542b92fcb936b0c', 'info_dict': { @@ -22,7 +25,18 @@ class StreamCZIE(InfoExtractor): 'thumbnail': 'http://im.stream.cz/episode/52961d7e19d423f8f06f0100', 'duration': 256, }, - } + }, { + 'url': 'http://www.stream.cz/blanik/10002447-tri-roky-pro-mazanka', + 'md5': '246272e753e26bbace7fcd9deca0650c', + 'info_dict': { + 'id': '10002447', + 'ext': 'mp4', + 'title': 'Kancelář Blaník: Tři roky pro Mazánka', + 'description': 'md5:9177695a8b756a0a8ab160de4043b392', + 'thumbnail': 'http://im.stream.cz/episode/537f838c50c11f8d21320000', + 'duration': 368, + }, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -57,7 +71,7 @@ class StreamCZIE(InfoExtractor): self._sort_formats(formats) return { - 'id': str(jsonData['id']), + 'id': compat_str(jsonData['episode_id']), 'title': self._og_search_title(webpage), 'thumbnail': jsonData['episode_image_original_url'].replace('//', 'http://'), 'formats': formats, diff --git a/youtube_dl/extractor/swrmediathek.py b/youtube_dl/extractor/swrmediathek.py new file mode 100644 index 000000000..6c688c520 --- /dev/null +++ b/youtube_dl/extractor/swrmediathek.py @@ -0,0 +1,104 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import parse_duration + + +class SWRMediathekIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?swrmediathek\.de/player\.htm\?show=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' + + _TESTS = [{ + 'url': 'http://swrmediathek.de/player.htm?show=849790d0-dab8-11e3-a953-0026b975f2e6', + 'md5': '8c5f6f0172753368547ca8413a7768ac', + 'info_dict': { + 'id': '849790d0-dab8-11e3-a953-0026b975f2e6', + 'ext': 'mp4', + 'title': 'SWR odysso', + 'description': 'md5:2012e31baad36162e97ce9eb3f157b8a', + 'thumbnail': 're:^http:.*\.jpg$', + 'duration': 2602, + 'upload_date': '20140515', + 'uploader': 'SWR Fernsehen', + 'uploader_id': '990030', + }, + }, { + 'url': 'http://swrmediathek.de/player.htm?show=0e1a8510-ddf2-11e3-9be3-0026b975f2e6', + 'md5': 'b10ab854f912eecc5a6b55cd6fc1f545', + 'info_dict': { + 'id': '0e1a8510-ddf2-11e3-9be3-0026b975f2e6', + 'ext': 'mp4', + 'title': 'Nachtcafé - Alltagsdroge Alkohol - zwischen Sektempfang und Komasaufen', + 'description': 'md5:e0a3adc17e47db2c23aab9ebc36dbee2', + 'thumbnail': 're:http://.*\.jpg', + 'duration': 5305, + 'upload_date': '20140516', + 'uploader': 'SWR Fernsehen', + 'uploader_id': '990030', + }, + }, { + 'url': 'http://swrmediathek.de/player.htm?show=bba23e10-cb93-11e3-bf7f-0026b975f2e6', + 'md5': '4382e4ef2c9d7ce6852535fa867a0dd3', + 'info_dict': { + 'id': 'bba23e10-cb93-11e3-bf7f-0026b975f2e6', + 'ext': 'mp3', + 'title': 'Saša Stanišic: Vor dem Fest', + 'description': 'md5:5b792387dc3fbb171eb709060654e8c9', + 'thumbnail': 're:http://.*\.jpg', + 'duration': 3366, + 'upload_date': '20140520', + 'uploader': 'SWR 2', + 'uploader_id': '284670', + } + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + video = self._download_json( + 'http://swrmediathek.de/AjaxEntry?ekey=%s' % video_id, video_id, 'Downloading video JSON') + + attr = video['attr'] + media_type = attr['entry_etype'] + + formats = [] + for entry in video['sub']: + if entry['name'] != 'entry_media': + continue + + entry_attr = entry['attr'] + codec = entry_attr['val0'] + quality = int(entry_attr['val1']) + + fmt = { + 'url': entry_attr['val2'], + 'quality': quality, + } + + if media_type == 'Video': + fmt.update({ + 'format_note': ['144p', '288p', '544p'][quality-1], + 'vcodec': codec, + }) + elif media_type == 'Audio': + fmt.update({ + 'acodec': codec, + }) + formats.append(fmt) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': attr['entry_title'], + 'description': attr['entry_descl'], + 'thumbnail': attr['entry_image_16_9'], + 'duration': parse_duration(attr['entry_durat']), + 'upload_date': attr['entry_pdatet'][:-4], + 'uploader': attr['channel_title'], + 'uploader_id': attr['channel_idkey'], + 'formats': formats, + }
\ No newline at end of file diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index f15780ef5..b6b2dba9c 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import re import json @@ -18,17 +20,17 @@ class ThePlatformIE(InfoExtractor): _TEST = { # from http://www.metacafe.com/watch/cb-e9I_cZgTgIPd/blackberrys_big_bold_z30/ - u'url': u'http://link.theplatform.com/s/dJ5BDC/e9I_cZgTgIPd/meta.smil?format=smil&Tracking=true&mbr=true', - u'info_dict': { - u'id': u'e9I_cZgTgIPd', - u'ext': u'flv', - u'title': u'Blackberry\'s big, bold Z30', - u'description': u'The Z30 is Blackberry\'s biggest, baddest mobile messaging device yet.', - u'duration': 247, + 'url': 'http://link.theplatform.com/s/dJ5BDC/e9I_cZgTgIPd/meta.smil?format=smil&Tracking=true&mbr=true', + 'info_dict': { + 'id': 'e9I_cZgTgIPd', + 'ext': 'flv', + 'title': 'Blackberry\'s big, bold Z30', + 'description': 'The Z30 is Blackberry\'s biggest, baddest mobile messaging device yet.', + 'duration': 247, }, - u'params': { + 'params': { # rtmp download - u'skip_download': True, + 'skip_download': True, }, } @@ -39,7 +41,7 @@ class ThePlatformIE(InfoExtractor): error_msg = next( n.attrib['abstract'] for n in meta.findall(_x('.//smil:ref')) - if n.attrib.get('title') == u'Geographic Restriction') + if n.attrib.get('title') == 'Geographic Restriction') except StopIteration: pass else: @@ -101,8 +103,7 @@ class ThePlatformIE(InfoExtractor): config_url = url+ '&form=json' config_url = config_url.replace('swf/', 'config/') config_url = config_url.replace('onsite/', 'onsite/config/') - config_json = self._download_webpage(config_url, video_id, u'Downloading config') - config = json.loads(config_json) + config = self._download_json(config_url, video_id, 'Downloading config') smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m' else: smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?' diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py index e4bb3b949..488b10df9 100644 --- a/youtube_dl/extractor/ustream.py +++ b/youtube_dl/extractor/ustream.py @@ -11,29 +11,36 @@ from ..utils import ( class UstreamIE(InfoExtractor): - _VALID_URL = r'https?://www\.ustream\.tv/(?P<type>recorded|embed)/(?P<videoID>\d+)' + _VALID_URL = r'https?://www\.ustream\.tv/(?P<type>recorded|embed|embed/recorded)/(?P<videoID>\d+)' IE_NAME = 'ustream' _TEST = { 'url': 'http://www.ustream.tv/recorded/20274954', - 'file': '20274954.flv', 'md5': '088f151799e8f572f84eb62f17d73e5c', 'info_dict': { - "uploader": "Young Americans for Liberty", - "title": "Young Americans for Liberty February 7, 2012 2:28 AM", + 'id': '20274954', + 'ext': 'flv', + 'uploader': 'Young Americans for Liberty', + 'title': 'Young Americans for Liberty February 7, 2012 2:28 AM', }, } def _real_extract(self, url): m = re.match(self._VALID_URL, url) + video_id = m.group('videoID') + + # some sites use this embed format (see: http://github.com/rg3/youtube-dl/issues/2990) + if m.group('type') == 'embed/recorded': + video_id = m.group('videoID') + desktop_url = 'http://www.ustream.tv/recorded/' + video_id + return self.url_result(desktop_url, 'Ustream') if m.group('type') == 'embed': video_id = m.group('videoID') webpage = self._download_webpage(url, video_id) - desktop_video_id = self._html_search_regex(r'ContentVideoIds=\["([^"]*?)"\]', webpage, 'desktop_video_id') + desktop_video_id = self._html_search_regex( + r'ContentVideoIds=\["([^"]*?)"\]', webpage, 'desktop_video_id') desktop_url = 'http://www.ustream.tv/recorded/' + desktop_video_id return self.url_result(desktop_url, 'Ustream') - video_id = m.group('videoID') - video_url = 'http://tcdn.ustream.tv/video/%s' % video_id webpage = self._download_webpage(url, video_id) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index ea34a8f16..eada13ce9 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -16,7 +16,7 @@ class VevoIE(InfoExtractor): (currently used by MTVIE) """ _VALID_URL = r'''(?x) - (?:https?://www\.vevo\.com/watch/(?:[^/]+/[^/]+/)?| + (?:https?://www\.vevo\.com/watch/(?:[^/]+/(?:[^/]+/)?)?| https?://cache\.vevo\.com/m/html/embed\.html\?video=| https?://videoplayer\.vevo\.com/embed/embedded\?videoId=| vevo:) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 981ca62c0..8327fb146 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1140,7 +1140,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): mobj = re.search(r'(?s)id="eow-date.*?>(.*?)</span>', video_webpage) if mobj is None: mobj = re.search( - r'(?s)id="watch-uploader-info".*?>.*?(?:Published|Uploaded) on (.*?)</strong>', + r'(?s)id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live) on (.*?)</strong>', video_webpage) if mobj is not None: upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split()) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 638ff8af5..3c3c4e777 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.05.19' +__version__ = '2014.06.02' |