From 32a35e441874ad9daba10c29a6a33f13a4953fbb Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Mon, 28 Oct 2013 17:35:01 +0100 Subject: Add support for http://www.extremetube.com --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/extremetube.py | 52 +++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 youtube_dl/extractor/extremetube.py (limited to 'youtube_dl') diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 0d933986f..5eed1eebd 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -39,6 +39,7 @@ from .ehow import EHowIE from .eighttracks import EightTracksIE from .escapist import EscapistIE from .exfm import ExfmIE +from .extremetube import ExtremeTubeIE from .facebook import FacebookIE from .faz import FazIE from .fktv import ( diff --git a/youtube_dl/extractor/extremetube.py b/youtube_dl/extractor/extremetube.py new file mode 100644 index 000000000..981de430d --- /dev/null +++ b/youtube_dl/extractor/extremetube.py @@ -0,0 +1,52 @@ +import os +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse_urlparse, + compat_urllib_request, + compat_urllib_parse, +) + +class ExtremeTubeIE(InfoExtractor): + _VALID_URL = r'^(?:https?://)?(?:www\.)?(?Pextremetube\.com/video/.+?(?P[0-9]+))(?:[/?&]|$)' + _TEST = { + u'url': u'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431', + u'file': u'652431.mp4', + u'md5': u'1fb9228f5e3332ec8c057d6ac36f33e0', + u'info_dict': { + u"title": u"Music Video 14 british euro brit european cumshots swallow", + u"uploader": u"unknown", + u"age_limit": 18, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('videoid') + url = 'http://www.' + mobj.group('url') + + req = compat_urllib_request.Request(url) + req.add_header('Cookie', 'age_verified=1') + webpage = self._download_webpage(req, video_id) + + video_title = self._html_search_regex(r'

]*?title="([^"]+)"[^>]*>\1<', webpage, u'title') + uploader = self._html_search_regex(r'>Posted by:(?=<)(\s|<[^>]*>)*(.+?)\|', webpage, u'uploader', fatal=False) + video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&', webpage, u'video_url')) + path = compat_urllib_parse_urlparse( video_url ).path + extension = os.path.splitext( path )[1][1:] + format = path.split('/')[5].split('_')[:2] + format = "-".join( format ) + + age_limit = self._rta_search(webpage) + + return { + 'id': video_id, + 'title': video_title, + 'uploader': uploader, + 'url': video_url, + 'ext': extension, + 'format': format, + 'format_id': format, + 'age_limit': age_limit, + } -- cgit v1.2.3 From 77ae65877e7b4b71d446ea928fd14f973826f07b Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Mon, 28 Oct 2013 18:18:58 +0100 Subject: Add support for http://www.mofosex.com --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/mofosex.py | 49 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100644 youtube_dl/extractor/mofosex.py (limited to 'youtube_dl') diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 0d933986f..045d4447a 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -81,6 +81,7 @@ from .metacafe import MetacafeIE from .metacritic import MetacriticIE from .mit import TechTVMITIE, MITIE from .mixcloud import MixcloudIE +from .mofosex import MofosexIE from .mtv import MTVIE from .muzu import MuzuTVIE from .myspass import MySpassIE diff --git a/youtube_dl/extractor/mofosex.py b/youtube_dl/extractor/mofosex.py new file mode 100644 index 000000000..a0c926cd1 --- /dev/null +++ b/youtube_dl/extractor/mofosex.py @@ -0,0 +1,49 @@ +import os +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse_urlparse, + compat_urllib_request, + compat_urllib_parse, +) + +class MofosexIE(InfoExtractor): + _VALID_URL = r'^(?:https?://)?(?:www\.)?(?Pmofosex\.com/videos/(?P[0-9]+)/.*?\.html)' + _TEST = { + u'url': u'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html', + u'file': u'5018.mp4', + u'md5': u'1b2eb47ac33cc75d4a80e3026b613c5a', + u'info_dict': { + u"title": u"Japanese Teen Music Video", + u"age_limit": 18, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('videoid') + url = 'http://www.' + mobj.group('url') + + req = compat_urllib_request.Request(url) + req.add_header('Cookie', 'age_verified=1') + webpage = self._download_webpage(req, video_id) + + video_title = self._html_search_regex(r'

(.+?)<', webpage, u'title') + video_url = compat_urllib_parse.unquote(self._html_search_regex(r'flashvars.video_url = \'([^\']+)', webpage, u'video_url')) + path = compat_urllib_parse_urlparse( video_url ).path + extension = os.path.splitext( path )[1][1:] + format = path.split('/')[5].split('_')[:2] + format = "-".join( format ) + + age_limit = self._rta_search(webpage) + + return { + 'id': video_id, + 'title': video_title, + 'url': video_url, + 'ext': extension, + 'format': format, + 'format_id': format, + 'age_limit': age_limit, + } -- cgit v1.2.3 From 2bc67c35acece68a75284b88fcb03d69f267a63c Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Mon, 28 Oct 2013 18:22:55 +0100 Subject: [KeezMoviesIE] Detect URLs with numbers in the SEO part correct --- youtube_dl/extractor/keezmovies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'youtube_dl') diff --git a/youtube_dl/extractor/keezmovies.py b/youtube_dl/extractor/keezmovies.py index 5e05900da..786924445 100644 --- a/youtube_dl/extractor/keezmovies.py +++ b/youtube_dl/extractor/keezmovies.py @@ -12,7 +12,7 @@ from ..aes import ( ) class KeezMoviesIE(InfoExtractor): - _VALID_URL = r'^(?:https?://)?(?:www\.)?(?Pkeezmovies\.com/video/.+?(?P[0-9]+))' + _VALID_URL = r'^(?:https?://)?(?:www\.)?(?Pkeezmovies\.com/video/.+?(?P[0-9]+))(?:[/?&]|$)' _TEST = { u'url': u'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711', u'file': u'1214711.mp4', -- cgit v1.2.3 From dcc2a706ef7df65839aa40ce5fda61f8cea36645 Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Mon, 28 Oct 2013 19:23:48 +0100 Subject: Add support for http://www.xtube.com --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/xtube.py | 54 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 youtube_dl/extractor/xtube.py (limited to 'youtube_dl') diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 0d933986f..7efd097e4 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -149,6 +149,7 @@ from .worldstarhiphop import WorldStarHipHopIE from .xhamster import XHamsterIE from .xnxx import XNXXIE from .xvideos import XVideosIE +from .xtube import XTubeIE from .yahoo import YahooIE, YahooSearchIE from .youjizz import YouJizzIE from .youku import YoukuIE diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py new file mode 100644 index 000000000..7d06a7021 --- /dev/null +++ b/youtube_dl/extractor/xtube.py @@ -0,0 +1,54 @@ +import os +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse_urlparse, + compat_urllib_request, + compat_urllib_parse, +) + +class XTubeIE(InfoExtractor): + _VALID_URL = r'^(?:https?://)?(?:www\.)?(?Pxtube\.com/watch\.php\?v=(?P[^/?&]+))' + _TEST = { + u'url': u'http://www.xtube.com/watch.php?v=kVTUy_G222_', + u'file': u'kVTUy_G222_.mp4', + u'md5': u'092fbdd3cbe292c920ef6fc6a8a9cdab', + u'info_dict': { + u"title": u"strange erotica", + u"uploader": u"greenshowers", + u"age_limit": 18, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('videoid') + url = 'http://www.' + mobj.group('url') + + req = compat_urllib_request.Request(url) + req.add_header('Cookie', 'age_verified=1') + webpage = self._download_webpage(req, video_id) + + video_title = self._html_search_regex(r'
([^<]+)', webpage, u'description', default=None) + video_url= self._html_search_regex(r'var videoMp4 = "([^"]+)', webpage, u'video_url').replace('\\/', '/') + path = compat_urllib_parse_urlparse( video_url ).path + extension = os.path.splitext( path )[1][1:] + format = path.split('/')[5].split('_')[:2] + format[0] += 'p' + format[1] += 'k' + format = "-".join( format ) + + return { + 'id': video_id, + 'title': video_title, + 'uploader': video_uploader, + 'description': video_description, + 'url': video_url, + 'ext': extension, + 'format': format, + 'format_id': format, + 'age_limit': 18, + } -- cgit v1.2.3 From 702665c0854af6fb317600c4825c0b00e2a4c981 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 28 Oct 2013 22:01:37 +0100 Subject: tests: build the filename from the info_dict if the 'file' key is missing It will need to have the 'id' and 'ext' keys to work. --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'youtube_dl') diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 313295839..060678e9b 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -272,7 +272,7 @@ class YoutubeDL(object): autonumber_size = 5 autonumber_templ = u'%0' + str(autonumber_size) + u'd' template_dict['autonumber'] = autonumber_templ % self._num_downloads - if template_dict['playlist_index'] is not None: + if template_dict.get('playlist_index') is not None: template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index'] sanitize = lambda k, v: sanitize_filename( -- cgit v1.2.3 From 2563bcc85cc09382d7e731709b2c8a4ad96c7ea3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 28 Oct 2013 22:02:17 +0100 Subject: Add an extractor for MySpace (closes #1666) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/myspace.py | 48 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 youtube_dl/extractor/myspace.py (limited to 'youtube_dl') diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 0d933986f..caaf54456 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -83,6 +83,7 @@ from .mit import TechTVMITIE, MITIE from .mixcloud import MixcloudIE from .mtv import MTVIE from .muzu import MuzuTVIE +from .myspace import MySpaceIE from .myspass import MySpassIE from .myvideo import MyVideoIE from .naver import NaverIE diff --git a/youtube_dl/extractor/myspace.py b/youtube_dl/extractor/myspace.py new file mode 100644 index 000000000..050f54a5a --- /dev/null +++ b/youtube_dl/extractor/myspace.py @@ -0,0 +1,48 @@ +import re +import json + +from .common import InfoExtractor +from ..utils import ( + compat_str, +) + + +class MySpaceIE(InfoExtractor): + _VALID_URL = r'https?://myspace\.com/([^/]+)/video/[^/]+/(?P\d+)' + + _TEST = { + u'url': u'https://myspace.com/coldplay/video/viva-la-vida/100008689', + u'info_dict': { + u'id': u'100008689', + u'ext': u'flv', + u'title': u'Viva La Vida', + u'description': u'The official Viva La Vida video, directed by Hype Williams', + u'uploader': u'Coldplay', + u'uploader_id': u'coldplay', + }, + u'params': { + # rtmp download + u'skip_download': True, + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + webpage = self._download_webpage(url, video_id) + context = json.loads(self._search_regex(r'context = ({.*?});', webpage, + u'context')) + video = context['video'] + rtmp_url, play_path = video['streamUrl'].split(';', 1) + + return { + 'id': compat_str(video['mediaId']), + 'title': video['title'], + 'url': rtmp_url, + 'play_path': play_path, + 'ext': 'flv', + 'description': video['description'], + 'thumbnail': video['imageUrl'], + 'uploader': video['artistName'], + 'uploader_id': video['artistUsername'], + } -- cgit v1.2.3 From 321a01f97110c3048e9d9c360a099d1ec8cd4479 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 28 Oct 2013 23:37:01 +0100 Subject: [mtv] Remove the templates from the mediagen url --- youtube_dl/extractor/mtv.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'youtube_dl') diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index e520e2bb4..e96d3952c 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -80,6 +80,8 @@ class MTVIE(InfoExtractor): video_id = self._id_from_uri(uri) self.report_extraction(video_id) mediagen_url = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content'))).attrib['url'] + # Remove the templates, like &device={device} + mediagen_url = re.sub(r'&[^=]*?={.*?}(?=(&|$))', u'', mediagen_url) if 'acceptMethods' not in mediagen_url: mediagen_url += '&acceptMethods=fms' mediagen_page = self._download_webpage(mediagen_url, video_id, -- cgit v1.2.3 From 795f28f871074aca2a74dfe67e1e75252b525c4c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 29 Oct 2013 06:45:54 +0100 Subject: [youtube] Fix login (Fixes #1681) --- youtube_dl/extractor/youtube.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'youtube_dl') diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index d05d0a8c1..f3a2a32b4 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -74,14 +74,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor): self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err)) return False - galx = None - dsh = None - match = re.search(re.compile(r' Date: Tue, 29 Oct 2013 06:48:39 +0100 Subject: release 2013.10.29 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'youtube_dl') diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 048afc8e7..1a94003bc 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.10.28' +__version__ = '2013.10.29' -- cgit v1.2.3 From 912cbf5d4ef5b131af88e63815863c389083d077 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 29 Oct 2013 14:00:01 +0100 Subject: [vevo] Fix timestamp handling ( / 1000 is implicit float division ) --- youtube_dl/extractor/vevo.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'youtube_dl') diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 1c1cc418d..26ec9fa1b 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -58,9 +58,9 @@ class VevoIE(InfoExtractor): 'width': int(attr['frameWidth']), }) - date_epoch = int(self._search_regex( - r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date'))/1000 - upload_date = datetime.datetime.fromtimestamp(date_epoch) + timestamp_ms = int(self._search_regex( + r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date')) + upload_date = datetime.datetime.fromtimestamp(timestamp_ms // 1000) info = { 'id': video_id, 'title': video_info['title'], -- cgit v1.2.3 From 57dd9a8f2f5885fb3d909c4905adb69b4749491c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 29 Oct 2013 15:09:45 +0100 Subject: Nicer --list-formats output --- youtube_dl/YoutubeDL.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) (limited to 'youtube_dl') diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 060678e9b..260cd2809 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -759,6 +759,8 @@ class YoutubeDL(object): @staticmethod def format_resolution(format, default='unknown'): + if format.get('_resolution') is not None: + return format['_resolution'] if format.get('height') is not None: if format.get('width') is not None: res = u'%sx%s' % (format['width'], format['height']) @@ -769,19 +771,22 @@ class YoutubeDL(object): return res def list_formats(self, info_dict): - formats_s = [] - for format in info_dict.get('formats', [info_dict]): - formats_s.append(u'%-15s%-7s %-15s%s' % ( + def line(format): + return (u'%-15s%-10s%-12s%s' % ( format['format_id'], format['ext'], - format.get('format_note', ''), self.format_resolution(format), + format.get('format_note', ''), ) ) + + formats_s = list(map(line, info_dict.get('formats', [info_dict]))) if len(formats_s) != 1: - formats_s[0] += ' (worst)' - formats_s[-1] += ' (best)' - formats_s = "\n".join(formats_s) - self.to_screen(u'[info] Available formats for %s:\n' - u'format code extension note resolution\n%s' % ( - info_dict['id'], formats_s)) + formats_s[0] += (' ' if formats_s[0] else '') + '(worst)' + formats_s[-1] += (' ' if formats_s[-1] else '') + '(best)' + + header_line = line({ + 'format_id': u'format code', 'ext': u'extension', + '_resolution': u'resolution', 'format_note': u'note'}) + self.to_screen(u'[info] Available formats for %s:\n%s\n%s' % + (info_dict['id'], header_line, u"\n".join(formats_s))) -- cgit v1.2.3 From e54fd4b23b8110779e8caff805d3078dcf042d0b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 29 Oct 2013 15:10:09 +0100 Subject: [vevo] Add more format details --- youtube_dl/extractor/vevo.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'youtube_dl') diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 26ec9fa1b..4d9f2a843 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -50,10 +50,11 @@ class VevoIE(InfoExtractor): # Already sorted from worst to best quality for rend in renditions.findall('rendition'): attr = rend.attrib - f_url = attr['url'] + format_note = '%(videoCodec)s@%(videoBitrate)4sK, %(audioCodec)s@%(audioBitrate)3sK' % attr formats.append({ - 'url': f_url, - 'ext': determine_ext(f_url), + 'url': attr['url'], + 'format_id': attr['name'], + 'format_note': format_note, 'height': int(attr['frameheight']), 'width': int(attr['frameWidth']), }) @@ -71,7 +72,4 @@ class VevoIE(InfoExtractor): 'duration': video_info['duration'], } - # TODO: Remove when #980 has been merged - info.update(formats[-1]) - return info -- cgit v1.2.3 From 21c924f4068692786e0c5435689d10f3d17ef612 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Tue, 29 Oct 2013 20:58:49 +0100 Subject: [arte] Download the 'Originalversion' version if it's the only one available (fixes #1682) --- youtube_dl/extractor/arte.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'youtube_dl') diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index d39b48951..e10c74c11 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -158,7 +158,9 @@ class ArteTVPlus7IE(InfoExtractor): 'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'), } - formats = player_info['VSR'].values() + all_formats = player_info['VSR'].values() + # Some formats use the m3u8 protocol + all_formats = list(filter(lambda f: f.get('videoFormat') != 'M3U8', all_formats)) def _match_lang(f): if f.get('versionCode') is None: return True @@ -170,11 +172,16 @@ class ArteTVPlus7IE(InfoExtractor): regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l] return any(re.match(r, f['versionCode']) for r in regexes) # Some formats may not be in the same language as the url - formats = filter(_match_lang, formats) - # Some formats use the m3u8 protocol - formats = filter(lambda f: f.get('videoFormat') != 'M3U8', formats) - # We order the formats by quality + formats = filter(_match_lang, all_formats) formats = list(formats) # in python3 filter returns an iterator + if not formats: + # Some videos are only available in the 'Originalversion' + # they aren't tagged as being in French or German + if all(f['versionCode'] == 'VO' for f in all_formats): + formats = all_formats + else: + raise ExtractorError(u'The formats list is empty') + # We order the formats by quality if re.match(r'[A-Z]Q', formats[0]['quality']) is not None: sort_key = lambda f: ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality']) else: -- cgit v1.2.3 From b9a836515fad5df57a86412b2cd41c49869ec0d6 Mon Sep 17 00:00:00 2001 From: Filippo Valsorda Date: Tue, 29 Oct 2013 16:44:35 -0400 Subject: Update the Vimeo test vector md5 confirmed that this is indeed the first 10241 (we went off by one with byte range 0-10240) of the full, playing mp4, so they probably reencoded or something --- youtube_dl/extractor/vimeo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'youtube_dl') diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index b4dbcd2ee..c7d864a2b 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -27,7 +27,7 @@ class VimeoIE(InfoExtractor): { u'url': u'http://vimeo.com/56015672#at=0', u'file': u'56015672.mp4', - u'md5': u'ae7a1d8b183758a0506b0622f37dfa14', + u'md5': u'8879b6cc097e987f02484baf890129e5', u'info_dict': { u"upload_date": u"20121220", u"description": u"This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550", -- cgit v1.2.3 From 94badb2599e54bfd711b38f3a74c552ff652d6d3 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 30 Oct 2013 01:09:26 +0100 Subject: Fix output indenting for --list-formats --- youtube_dl/YoutubeDL.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'youtube_dl') diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 260cd2809..898533496 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -780,10 +780,11 @@ class YoutubeDL(object): ) ) - formats_s = list(map(line, info_dict.get('formats', [info_dict]))) - if len(formats_s) != 1: - formats_s[0] += (' ' if formats_s[0] else '') + '(worst)' - formats_s[-1] += (' ' if formats_s[-1] else '') + '(best)' + formats = info_dict.get('formats', [info_dict]) + formats_s = list(map(line, formats)) + if len(formats) > 1: + formats_s[0] += (' ' if formats[0].get('format_note') else '') + '(worst)' + formats_s[-1] += (' ' if formats[-1].get('format_note') else '') + '(best)' header_line = line({ 'format_id': u'format code', 'ext': u'extension', -- cgit v1.2.3 From b5d0d817bc8a23ef6dc2a00d1af6fad893143206 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 30 Oct 2013 01:09:44 +0100 Subject: Remove superfluous space --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'youtube_dl') diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index ce349fe20..cef4dce85 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -63,7 +63,7 @@ class InfoExtractor(object): * ext Will be calculated from url if missing * format A human-readable description of the format ("mp4 container with h264/opus"). - Calculated from the format_id, width, height + Calculated from the format_id, width, height. and format_note fields if missing. * format_id A short description of the format ("mp4_h264_opus" or "19") -- cgit v1.2.3 From 72321ead7b176824d1a8b2895ad4926555e41b88 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 30 Oct 2013 01:14:17 +0100 Subject: [vevo] Readd support for SMIL (Fixes #1683) --- youtube_dl/extractor/vevo.py | 80 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 66 insertions(+), 14 deletions(-) (limited to 'youtube_dl') diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 4d9f2a843..3f6020f74 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -5,7 +5,7 @@ import datetime from .common import InfoExtractor from ..utils import ( - determine_ext, + compat_HTTPError, ExtractorError, ) @@ -16,26 +16,22 @@ class VevoIE(InfoExtractor): (currently used by MTVIE) """ _VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P.*?)(\?|$)' - _TEST = { + _TESTS = [{ u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', u'file': u'GB1101300280.mp4', + u"md5": u"06bea460acb744eab74a9d7dcb4bfd61", u'info_dict': { u"upload_date": u"20130624", u"uploader": u"Hurts", u"title": u"Somebody to Die For", - u'duration': 230, + u"duration": 230, + u"width": 1920, + u"height": 1080, } - } + }] + _SMIL_BASE_URL = 'http://smil.lvl3.vevo.com/' - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - - json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id - info_json = self._download_webpage(json_url, video_id, u'Downloading json info') - - self.report_extraction(video_id) - video_info = json.loads(info_json)['video'] + def _formats_from_json(self, video_info): last_version = {'version': -1} for version in video_info['videoVersions']: # These are the HTTP downloads, other types are for different manifests @@ -50,7 +46,7 @@ class VevoIE(InfoExtractor): # Already sorted from worst to best quality for rend in renditions.findall('rendition'): attr = rend.attrib - format_note = '%(videoCodec)s@%(videoBitrate)4sK, %(audioCodec)s@%(audioBitrate)3sK' % attr + format_note = '%(videoCodec)s@%(videoBitrate)4sk, %(audioCodec)s@%(audioBitrate)3sk' % attr formats.append({ 'url': attr['url'], 'format_id': attr['name'], @@ -58,6 +54,62 @@ class VevoIE(InfoExtractor): 'height': int(attr['frameheight']), 'width': int(attr['frameWidth']), }) + return formats + + def _formats_from_smil(self, smil_xml): + formats = [] + smil_doc = xml.etree.ElementTree.fromstring(smil_xml.encode('utf-8')) + els = smil_doc.findall('.//{http://www.w3.org/2001/SMIL20/Language}video') + for el in els: + src = el.attrib['src'] + m = re.match(r'''(?xi) + (?P[a-z0-9]+): + (?P + [/a-z0-9]+ # The directory and main part of the URL + _(?P[0-9]+)k + _(?P[0-9]+)x(?P[0-9]+) + _(?P[a-z0-9]+) + _(?P[0-9]+) + _(?P[a-z0-9]+) + _(?P[0-9]+) + \.[a-z0-9]+ # File extension + )''', src) + if not m: + continue + + format_url = self._SMIL_BASE_URL + m.group('path') + format_note = ('%(vcodec)s@%(vbr)4sk, %(acodec)s@%(abr)3sk' % + m.groupdict()) + formats.append({ + 'url': format_url, + 'format_id': u'SMIL_' + m.group('cbr'), + 'format_note': format_note, + 'ext': m.group('ext'), + 'width': int(m.group('width')), + 'height': int(m.group('height')), + }) + return formats + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id + info_json = self._download_webpage(json_url, video_id, u'Downloading json info') + video_info = json.loads(info_json)['video'] + + formats = self._formats_from_json(video_info) + try: + smil_url = '%s/Video/V2/VFILE/%s/%sr.smil' % ( + self._SMIL_BASE_URL, video_id, video_id.lower()) + smil_xml = self._download_webpage(smil_url, video_id, + u'Downloading SMIL info') + formats.extend(self._formats_from_smil(smil_xml)) + except ExtractorError as ee: + if not isinstance(ee.cause, compat_HTTPError): + raise + self._downloader.report_warning( + u'Cannot download SMIL information, falling back to JSON ..') timestamp_ms = int(self._search_regex( r'/Date\((\d+)\)/', video_info['launchDate'], u'launch date')) -- cgit v1.2.3 From 7193498811cb17a66ca57569a8588adb28ba2b27 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 30 Oct 2013 01:17:00 +0100 Subject: Use index in formt string (Fixes vevo test on Python 2.6) --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'youtube_dl') diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 898533496..7f73ea360 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -482,7 +482,7 @@ class YoutubeDL(object): format['format'] = u'{id} - {res}{note}'.format( id=format['format_id'], res=self.format_resolution(format), - note=u' ({})'.format(format['format_note']) if format.get('format_note') is not None else '', + note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '', ) # Automatically determine file extension if missing if 'ext' not in format: -- cgit v1.2.3 From 33b1d9595d853893b5d732863dc2f5eabd939637 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 30 Oct 2013 01:17:20 +0100 Subject: release 2013.10.30 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'youtube_dl') diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 1a94003bc..e8eade7ad 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.10.29' +__version__ = '2013.10.30' -- cgit v1.2.3 From 9f1109a56424d118263963062bc5185d8415835e Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 31 Oct 2013 00:20:49 +0100 Subject: [dailymotion] Fix support for age-restricted videos (Fixes #1688) --- youtube_dl/extractor/dailymotion.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) (limited to 'youtube_dl') diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 4c0488245..355b4ed0a 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -21,6 +21,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor): """Build a request with the family filter disabled""" request = compat_urllib_request.Request(url) request.add_header('Cookie', 'family_filter=off') + request.add_header('Cookie', 'ff=off') return request class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): @@ -61,6 +62,18 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): }, u'skip': u'VEVO is only available in some countries', }, + # age-restricted video + { + u'url': u'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband', + u'file': u'xyh2zz.mp4', + u'md5': u'0d667a7b9cebecc3c89ee93099c4159d', + u'info_dict': { + u'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]', + u'uploader': 'HotWaves1012', + u'age_limit': 18, + } + + } ] def _real_extract(self, url): @@ -90,7 +103,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): video_uploader = self._search_regex([r'(?im)[^<]+?]+?>([^<]+?)', # Looking for official user r'<(?:span|a) .*?rel="author".*?>([^<]+?)([0-9]{2})-([0-9]{2})-([0-9]{4})
', webpage) @@ -132,15 +146,16 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): self._list_available_subtitles(video_id) return - return [{ + return { 'id': video_id, 'formats': formats, 'uploader': video_uploader, 'upload_date': video_upload_date, 'title': self._og_search_title(webpage), 'subtitles': video_subtitles, - 'thumbnail': info['thumbnail_url'] - }] + 'thumbnail': info['thumbnail_url'], + 'age_limit': age_limit, + } def _get_available_subtitles(self, video_id): try: -- cgit v1.2.3 From 5f1ea943ab6814c2f8ca2a383f990e3f4c9e5f87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 31 Oct 2013 08:07:26 +0100 Subject: [livestream] fix the extraction of events It now uses a json dictionary from the webpage. --- youtube_dl/extractor/livestream.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'youtube_dl') diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index d04da98c8..4531fd6ab 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -40,13 +40,9 @@ class LivestreamIE(InfoExtractor): if video_id is None: # This is an event page: - player = get_meta_content('twitter:player', webpage) - if player is None: - raise ExtractorError('Couldn\'t extract event api url') - api_url = player.replace('/player', '') - api_url = re.sub(r'^(https?://)(new\.)', r'\1api.\2', api_url) - info = json.loads(self._download_webpage(api_url, event_name, - u'Downloading event info')) + config_json = self._search_regex(r'window.config = ({.*?});', + webpage, u'window config') + info = json.loads(config_json)['event'] videos = [self._extract_video_info(video_data['data']) for video_data in info['feed']['data'] if video_data['type'] == u'video'] return self.playlist_result(videos, info['id'], info['full_name']) -- cgit v1.2.3 From ac2547f5ffc30a352207336194e7bbb0435d01a7 Mon Sep 17 00:00:00 2001 From: Alex Van't Hof Date: Thu, 31 Oct 2013 01:57:22 -0400 Subject: [teamcoco] Fix video url extraction for some videos Video url extraction failed for some videos, e.g. http://teamcoco.com/video/old-time-baseball The url extracted was also occasionally suboptimal quality, e.g. http://teamcoco.com/video/louis-ck-interview-george-w-bush --- youtube_dl/extractor/teamcoco.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) (limited to 'youtube_dl') diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index c910110ca..76246c7cc 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -3,6 +3,7 @@ import re from .common import InfoExtractor from ..utils import ( ExtractorError, + RegexNotFoundError, ) @@ -11,7 +12,7 @@ class TeamcocoIE(InfoExtractor): _TEST = { u'url': u'http://teamcoco.com/video/louis-ck-interview-george-w-bush', u'file': u'19705.mp4', - u'md5': u'27b6f7527da5acf534b15f21b032656e', + u'md5': u'cde9ba0fa3506f5f017ce11ead928f9a', u'info_dict': { u"description": u"Louis C.K. got starstruck by George W. Bush, so what? Part one.", u"title": u"Louis C.K. Interview Pt. 1 11/3/11" @@ -33,8 +34,21 @@ class TeamcocoIE(InfoExtractor): data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id data = self._download_webpage(data_url, video_id, 'Downloading data webpage') - video_url = self._html_search_regex(r']*type="high".*?>(.*?)', - data, u'video URL') + + qualities = [ '1080p', '720p', '1000k', '480p', '500k' ] + best_quality_idx = len(qualities)+1 # First regex match may not be optimal + for idx, quality in enumerate(qualities): + regex = r']*type="(?:high|standard)".*?>(.*%s.*)' % quality + try: + url = self._html_search_regex(regex, data, u'video URL') + if idx < best_quality_idx: + video_url = url + best_quality_idx = idx + except RegexNotFoundError: + # Just catch fatal exc. Don't want the fatal=False warning + continue + if not video_url: + raise RegexNotFoundError(u'Unable to extract video URL') return [{ 'id': video_id, -- cgit v1.2.3 From ab4e15134719e6c01a3a9768f21a0f361e4b781d Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Fri, 1 Nov 2013 01:24:23 +0100 Subject: [CinemassacreIE] Support more embed urls --- youtube_dl/extractor/cinemassacre.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'youtube_dl') diff --git a/youtube_dl/extractor/cinemassacre.py b/youtube_dl/extractor/cinemassacre.py index 2fe1033f0..8f9396d6b 100644 --- a/youtube_dl/extractor/cinemassacre.py +++ b/youtube_dl/extractor/cinemassacre.py @@ -41,7 +41,7 @@ class CinemassacreIE(InfoExtractor): webpage_url = u'http://' + mobj.group('url') webpage = self._download_webpage(webpage_url, None) # Don't know video id yet video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d') - mobj = re.search(r'src="(?Phttp://player\.screenwavemedia\.com/play/(?:embed|player)\.php\?id=(?:Cinemassacre-)?(?P.+?))"', webpage) + mobj = re.search(r'src="(?Phttp://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?id=(?:Cinemassacre-)?(?P.+?))"', webpage) if not mobj: raise ExtractorError(u'Can\'t extract embed url and video id') playerdata_url = mobj.group(u'embed_url') -- cgit v1.2.3 From 66cf3ac3426b62fb960b4de770c4ea8203a0e205 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 1 Nov 2013 11:55:35 +0100 Subject: [metacafe] Fix support for age-restricted videos (fixes #1696) The 'Content-Type' header must be set for disabling the family filter. The 'flashversion' cookie is only needed for AnyClip videos. Added tests for standard metacafe videos and for age-restricted videos. Also set the 'age_limit' field. --- youtube_dl/extractor/metacafe.py | 51 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 5 deletions(-) (limited to 'youtube_dl') diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py index 234b9e80f..91480ba87 100644 --- a/youtube_dl/extractor/metacafe.py +++ b/youtube_dl/extractor/metacafe.py @@ -20,7 +20,9 @@ class MetacafeIE(InfoExtractor): _DISCLAIMER = 'http://www.metacafe.com/family_filter/' _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user' IE_NAME = u'metacafe' - _TESTS = [{ + _TESTS = [ + # Youtube video + { u"add_ie": ["Youtube"], u"url": u"http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/", u"file": u"_aUehQsCQtM.mp4", @@ -32,15 +34,42 @@ class MetacafeIE(InfoExtractor): u"uploader_id": u"PBS" } }, + # Normal metacafe video + { + u'url': u'http://www.metacafe.com/watch/11121940/news_stuff_you_wont_do_with_your_playstation_4/', + u'md5': u'6e0bca200eaad2552e6915ed6fd4d9ad', + u'info_dict': { + u'id': u'11121940', + u'ext': u'mp4', + u'title': u'News: Stuff You Won\'t Do with Your PlayStation 4', + u'uploader': u'ign', + u'description': u'Sony released a massive FAQ on the PlayStation Blog detailing the PS4\'s capabilities and limitations.', + }, + }, + # AnyClip video { u"url": u"http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/", u"file": u"an-dVVXnuY7Jh77J.mp4", u"info_dict": { u"title": u"The Andromeda Strain (1971): Stop the Bomb Part 3", u"uploader": u"anyclip", - u"description": u"md5:38c711dd98f5bb87acf973d573442e67" - } - }] + u"description": u"md5:38c711dd98f5bb87acf973d573442e67", + }, + }, + # age-restricted video + { + u'url': u'http://www.metacafe.com/watch/5186653/bbc_internal_christmas_tape_79_uncensored_outtakes_etc/', + u'md5': u'98dde7c1a35d02178e8ab7560fe8bd09', + u'info_dict': { + u'id': u'5186653', + u'ext': u'mp4', + u'title': u'BBC INTERNAL Christmas Tape \'79 - UNCENSORED Outtakes, Etc.', + u'uploader': u'Dwayne Pipe', + u'description': u'md5:950bf4c581e2c059911fa3ffbe377e4b', + u'age_limit': 18, + }, + }, + ] def report_disclaimer(self): @@ -62,6 +91,7 @@ class MetacafeIE(InfoExtractor): 'submit': "Continue - I'm over 18", } request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form)) + request.add_header('Content-Type', 'application/x-www-form-urlencoded') try: self.report_age_confirmation() compat_urllib_request.urlopen(request).read() @@ -83,7 +113,12 @@ class MetacafeIE(InfoExtractor): # Retrieve video webpage to extract further information req = compat_urllib_request.Request('http://www.metacafe.com/watch/%s/' % video_id) - req.headers['Cookie'] = 'flashVersion=0;' + + # AnyClip videos require the flashversion cookie so that we get the link + # to the mp4 file + mobj_an = re.match(r'^an-(.*?)$', video_id) + if mobj_an: + req.headers['Cookie'] = 'flashVersion=0;' webpage = self._download_webpage(req, video_id) # Extract URL, uploader and title from webpage @@ -125,6 +160,11 @@ class MetacafeIE(InfoExtractor): r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);', webpage, u'uploader nickname', fatal=False) + if re.search(r'"contentRating":"restricted"', webpage) is not None: + age_limit = 18 + else: + age_limit = 0 + return { '_type': 'video', 'id': video_id, @@ -134,4 +174,5 @@ class MetacafeIE(InfoExtractor): 'upload_date': None, 'title': video_title, 'ext': video_ext, + 'age_limit': age_limit, } -- cgit v1.2.3 From 60d142aa8d896674ca2b062a53b3d18c644192ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 1 Nov 2013 22:28:51 +0100 Subject: Add an extractor for vk.com (closes #1635) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/vk.py | 45 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) create mode 100644 youtube_dl/extractor/vk.py (limited to 'youtube_dl') diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index caaf54456..bcf1cce7f 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -142,6 +142,7 @@ from .videofyme import VideofyMeIE from .videopremium import VideoPremiumIE from .vimeo import VimeoIE, VimeoChannelIE from .vine import VineIE +from .vk import VKIE from .wat import WatIE from .websurg import WeBSurgIE from .weibo import WeiboIE diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py new file mode 100644 index 000000000..90d8a6d07 --- /dev/null +++ b/youtube_dl/extractor/vk.py @@ -0,0 +1,45 @@ +# encoding: utf-8 +import re +import json + +from .common import InfoExtractor +from ..utils import ( + compat_str, + unescapeHTML, +) + + +class VKIE(InfoExtractor): + IE_NAME = u'vk.com' + _VALID_URL = r'https?://vk\.com/(?:videos.*?\?.*?z=)?video(?P.*?)(?:\?|%2F|$)' + + _TEST = { + u'url': u'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521', + u'md5': u'0deae91935c54e00003c2a00646315f0', + u'info_dict': { + u'id': u'162222515', + u'ext': u'flv', + u'title': u'ProtivoGunz - Хуёвая песня', + u'uploader': u'Noize MC', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + info_url = 'http://vk.com/al_video.php?act=show&al=1&video=%s' % video_id + info_page = self._download_webpage(info_url, video_id) + m_yt = re.search(r'src="(http://www.youtube.com/.*?)"', info_page) + if m_yt is not None: + self.to_screen(u'Youtube video detected') + return self.url_result(m_yt.group(1), 'Youtube') + vars_json = self._search_regex(r'var vars = ({.*?});', info_page, u'vars') + vars = json.loads(vars_json) + + return { + 'id': compat_str(vars['vid']), + 'url': vars['url240'], + 'title': unescapeHTML(vars['md_title']), + 'thumbnail': vars['jpg'], + 'uploader': vars['md_author'], + } -- cgit v1.2.3 From 8eddf3e91ddab3bb766bc5176edb3120be5743ea Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 2 Nov 2013 11:21:05 +0100 Subject: [youtube] Encode subtitle track name in request (Fixes #1700) --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'youtube_dl') diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index f3a2a32b4..dc601de52 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1111,7 +1111,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): 'lang': lang, 'v': video_id, 'fmt': self._downloader.params.get('subtitlesformat'), - 'name': l[0], + 'name': l[0].encode('utf-8'), }) url = u'http://www.youtube.com/api/timedtext?' + params sub_lang_list[lang] = url -- cgit v1.2.3 From aa2484e390d8a5e74d740fda61b4062a4a8c1d0e Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 2 Nov 2013 11:21:36 +0100 Subject: release 2013.11.02 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'youtube_dl') diff --git a/youtube_dl/version.py b/youtube_dl/version.py index e8eade7ad..75a46a2d5 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.10.30' +__version__ = '2013.11.02' -- cgit v1.2.3 From 31366066bd18cfd32de901264f53f42fe96f55c2 Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Sat, 2 Nov 2013 18:08:16 +0100 Subject: Add support for live parameter to rtmpdump --- youtube_dl/FileDownloader.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'youtube_dl') diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 8ecabab1a..0804dfbe1 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -267,7 +267,7 @@ class FileDownloader(object): self.to_screen(u'\r%s[download] 100%% of %s in %s' % (clear_line, data_len_str, self.format_seconds(tot_time))) - def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url): + def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url, live): self.report_destination(filename) tmpfilename = self.temp_name(filename) test = self.params.get('test', False) @@ -294,6 +294,8 @@ class FileDownloader(object): basic_args += ['--tcUrl', url] if test: basic_args += ['--stop', '1'] + if live: + basic_args += ['--live'] args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)] if self.params.get('verbose', False): try: @@ -411,7 +413,8 @@ class FileDownloader(object): info_dict.get('player_url', None), info_dict.get('page_url', None), info_dict.get('play_path', None), - info_dict.get('tc_url', None)) + info_dict.get('tc_url', None), + info_dict.get('live', False)) # Attempt to download using mplayer if url.startswith('mms') or url.startswith('rtsp'): -- cgit v1.2.3 From 0a43ddf3209e13f5e87b07c440e03a45deea3e57 Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Sat, 2 Nov 2013 18:08:35 +0100 Subject: [CinemassacreIE] Add live paramter to extracted info as a workaround --- youtube_dl/extractor/cinemassacre.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'youtube_dl') diff --git a/youtube_dl/extractor/cinemassacre.py b/youtube_dl/extractor/cinemassacre.py index 2fe1033f0..79d879ced 100644 --- a/youtube_dl/extractor/cinemassacre.py +++ b/youtube_dl/extractor/cinemassacre.py @@ -65,6 +65,7 @@ class CinemassacreIE(InfoExtractor): { 'url': url, 'play_path': 'mp4:' + sd_file, + 'live': True, # workaround 'ext': 'flv', 'format': 'sd', 'format_id': 'sd', @@ -72,6 +73,7 @@ class CinemassacreIE(InfoExtractor): { 'url': url, 'play_path': 'mp4:' + hd_file, + 'live': True, # workaround 'ext': 'flv', 'format': 'hd', 'format_id': 'hd', -- cgit v1.2.3 From 72a5b4f70216fe1a5b1c22be34653ae0ff81058a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 2 Nov 2013 19:01:01 +0100 Subject: Add an extractor for bambuser.com (#1702) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/bambuser.py | 42 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 youtube_dl/extractor/bambuser.py (limited to 'youtube_dl') diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index bcf1cce7f..a1e35eb46 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -9,6 +9,7 @@ from .arte import ( ArteTVFutureIE, ) from .auengine import AUEngineIE +from .bambuser import BambuserIE from .bandcamp import BandcampIE from .bliptv import BlipTVIE, BlipTVUserIE from .bloomberg import BloombergIE diff --git a/youtube_dl/extractor/bambuser.py b/youtube_dl/extractor/bambuser.py new file mode 100644 index 000000000..cf8da22e3 --- /dev/null +++ b/youtube_dl/extractor/bambuser.py @@ -0,0 +1,42 @@ +import re +import json + +from .common import InfoExtractor + + +class BambuserIE(InfoExtractor): + _VALID_URL = r'https?://bambuser\.com/v/(?P\d+)' + _API_KEY = '005f64509e19a868399060af746a00aa' + + _TEST = { + u'url': u'http://bambuser.com/v/4050584', + u'md5': u'fba8f7693e48fd4e8641b3fd5539a641', + u'info_dict': { + u'id': u'4050584', + u'ext': u'flv', + u'title': u'Education engineering days - lightning talks', + u'duration': 3741, + u'uploader': u'pixelversity', + u'uploader_id': u'344706', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + info_url = ('http://player-c.api.bambuser.com/getVideo.json?' + '&api_key=%s&vid=%s' % (self._API_KEY, video_id)) + info_json = self._download_webpage(info_url, video_id) + info = json.loads(info_json)['result'] + + return { + 'id': video_id, + 'title': info['title'], + 'url': info['url'], + 'thumbnail': info['preview'], + 'duration': int(info['length']), + 'view_count': int(info['views_total']), + 'uploader': info['username'], + 'uploader_id': info['uid'], + } + -- cgit v1.2.3 From 1f343eaabbb9e0daf67363b7737833cf5e2a3e16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Isma=C3=ABl=20Mej=C3=ADa?= Date: Sat, 2 Nov 2013 18:01:05 +0100 Subject: [subtitles] refactor to support websites with subtitle information the webpage. I added the parameter webpage, so now it's similar to the way automatic captions are handled. This is an improvement needed for websites like TED. --- youtube_dl/extractor/dailymotion.py | 6 +++--- youtube_dl/extractor/subtitles.py | 12 ++++++------ youtube_dl/extractor/youtube.py | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'youtube_dl') diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 7d8353946..3aef82bcf 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -113,9 +113,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): video_url = info[max_quality] # subtitles - video_subtitles = self.extract_subtitles(video_id) + video_subtitles = self.extract_subtitles(video_id, webpage) if self._downloader.params.get('listsubtitles', False): - self._list_available_subtitles(video_id) + self._list_available_subtitles(video_id, webpage) return return [{ @@ -129,7 +129,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): 'thumbnail': info['thumbnail_url'] }] - def _get_available_subtitles(self, video_id): + def _get_available_subtitles(self, video_id, webpage): try: sub_list = self._download_webpage( 'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id, diff --git a/youtube_dl/extractor/subtitles.py b/youtube_dl/extractor/subtitles.py index 90de7de3a..4b4c5235d 100644 --- a/youtube_dl/extractor/subtitles.py +++ b/youtube_dl/extractor/subtitles.py @@ -12,9 +12,9 @@ class SubtitlesInfoExtractor(InfoExtractor): return any([self._downloader.params.get('writesubtitles', False), self._downloader.params.get('writeautomaticsub')]) - def _list_available_subtitles(self, video_id, webpage=None): + def _list_available_subtitles(self, video_id, webpage): """ outputs the available subtitles for the video """ - sub_lang_list = self._get_available_subtitles(video_id) + sub_lang_list = self._get_available_subtitles(video_id, webpage) auto_captions_list = self._get_available_automatic_caption(video_id, webpage) sub_lang = ",".join(list(sub_lang_list.keys())) self.to_screen(u'%s: Available subtitles for video: %s' % @@ -23,7 +23,7 @@ class SubtitlesInfoExtractor(InfoExtractor): self.to_screen(u'%s: Available automatic captions for video: %s' % (video_id, auto_lang)) - def extract_subtitles(self, video_id, video_webpage=None): + def extract_subtitles(self, video_id, webpage): """ returns {sub_lang: sub} ,{} if subtitles not found or None if the subtitles aren't requested. @@ -32,9 +32,9 @@ class SubtitlesInfoExtractor(InfoExtractor): return None available_subs_list = {} if self._downloader.params.get('writeautomaticsub', False): - available_subs_list.update(self._get_available_automatic_caption(video_id, video_webpage)) + available_subs_list.update(self._get_available_automatic_caption(video_id, webpage)) if self._downloader.params.get('writesubtitles', False): - available_subs_list.update(self._get_available_subtitles(video_id)) + available_subs_list.update(self._get_available_subtitles(video_id, webpage)) if not available_subs_list: # error, it didn't get the available subtitles return {} @@ -74,7 +74,7 @@ class SubtitlesInfoExtractor(InfoExtractor): return return sub - def _get_available_subtitles(self, video_id): + def _get_available_subtitles(self, video_id, webpage): """ returns {sub_lang: url} or {} if not available Must be redefined by the subclasses diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 4347651d7..d7c9b38f9 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1099,7 +1099,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): else: raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s))) - def _get_available_subtitles(self, video_id): + def _get_available_subtitles(self, video_id, webpage): try: sub_list = self._download_webpage( 'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id, -- cgit v1.2.3 From a9a3876d55be943a7eaf505cbeb8fb862514db6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Isma=C3=ABl=20Mej=C3=ADa?= Date: Sat, 2 Nov 2013 19:48:39 +0100 Subject: [ted] Added support for subtitle download --- youtube_dl/extractor/ted.py | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) (limited to 'youtube_dl') diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index dfa1176a3..239e2a448 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -1,10 +1,9 @@ import json import re -from .common import InfoExtractor +from .subtitles import SubtitlesInfoExtractor - -class TEDIE(InfoExtractor): +class TEDIE(SubtitlesInfoExtractor): _VALID_URL=r'''http://www\.ted\.com/ ( ((?Pplaylists)/(?P\d+)) # We have a playlist @@ -82,11 +81,21 @@ class TEDIE(InfoExtractor): 'url': stream['file'], 'format': stream['id'] } for stream in info['htmlStreams']] + + video_id = info['id'] + + # subtitles + video_subtitles = self.extract_subtitles(video_id, webpage) + if self._downloader.params.get('listsubtitles', False): + self._list_available_subtitles(video_id, webpage) + return + info = { - 'id': info['id'], + 'id': video_id, 'title': title, 'thumbnail': thumbnail, 'description': desc, + 'subtitles': video_subtitles, 'formats': formats, } @@ -94,3 +103,14 @@ class TEDIE(InfoExtractor): info.update(info['formats'][-1]) return info + + def _get_available_subtitles(self, video_id, webpage): + options = self._search_regex(r'(?:)', webpage, 'subtitles_language_select', flags=re.DOTALL) + languages = re.findall(r'(?: