diff options
Diffstat (limited to 'youtube_dl')
-rwxr-xr-x | youtube_dl/YoutubeDL.py | 8 | ||||
-rw-r--r-- | youtube_dl/__init__.py | 16 | ||||
-rw-r--r-- | youtube_dl/downloader/common.py | 10 | ||||
-rw-r--r-- | youtube_dl/downloader/f4m.py | 4 | ||||
-rw-r--r-- | youtube_dl/extractor/__init__.py | 3 | ||||
-rw-r--r-- | youtube_dl/extractor/adobetv.py | 7 | ||||
-rw-r--r-- | youtube_dl/extractor/appletrailers.py | 9 | ||||
-rw-r--r-- | youtube_dl/extractor/common.py | 16 | ||||
-rw-r--r-- | youtube_dl/extractor/defense.py | 5 | ||||
-rw-r--r-- | youtube_dl/extractor/escapist.py | 11 | ||||
-rw-r--r-- | youtube_dl/extractor/gdcvault.py | 9 | ||||
-rw-r--r-- | youtube_dl/extractor/generic.py | 17 | ||||
-rw-r--r-- | youtube_dl/extractor/imgur.py | 8 | ||||
-rw-r--r-- | youtube_dl/extractor/r7.py | 88 | ||||
-rw-r--r-- | youtube_dl/extractor/rtve.py | 9 | ||||
-rw-r--r-- | youtube_dl/extractor/teamcoco.py | 49 | ||||
-rw-r--r-- | youtube_dl/extractor/ted.py | 27 | ||||
-rw-r--r-- | youtube_dl/extractor/tv4.py | 100 | ||||
-rw-r--r-- | youtube_dl/extractor/vimeo.py | 6 | ||||
-rw-r--r-- | youtube_dl/extractor/zapiks.py | 110 | ||||
-rw-r--r-- | youtube_dl/utils.py | 4 | ||||
-rw-r--r-- | youtube_dl/version.py | 2 |
22 files changed, 439 insertions, 79 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 88809783b..ca7c3f5c6 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -308,8 +308,8 @@ class YoutubeDL(object): raise if (sys.version_info >= (3,) and sys.platform != 'win32' and - sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] - and not params.get('restrictfilenames', False)): + sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and + not params.get('restrictfilenames', False)): # On Python 3, the Unicode filesystem API will throw errors (#1474) self.report_warning( 'Assuming --restrict-filenames since file system encoding ' @@ -1366,8 +1366,8 @@ class YoutubeDL(object): """Download a given list of URLs.""" outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) if (len(url_list) > 1 and - '%' not in outtmpl - and self.params.get('max_downloads') != 1): + '%' not in outtmpl and + self.params.get('max_downloads') != 1): raise SameFileError(outtmpl) for url in url_list: diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index eac2a26ec..25ab3fdfe 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -189,14 +189,14 @@ def _real_main(argv=None): # In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems) if opts.outtmpl is not None: opts.outtmpl = opts.outtmpl.decode(preferredencoding()) - outtmpl = ((opts.outtmpl is not None and opts.outtmpl) - or (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') - or (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') - or (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s') - or (opts.usetitle and '%(title)s-%(id)s.%(ext)s') - or (opts.useid and '%(id)s.%(ext)s') - or (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s') - or DEFAULT_OUTTMPL) + outtmpl = ((opts.outtmpl is not None and opts.outtmpl) or + (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') or + (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') or + (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s') or + (opts.usetitle and '%(title)s-%(id)s.%(ext)s') or + (opts.useid and '%(id)s.%(ext)s') or + (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s') or + DEFAULT_OUTTMPL) if not os.path.splitext(outtmpl)[1] and opts.extractaudio: parser.error('Cannot download a video and extract audio into the same' ' file! Use "{0}.%(ext)s" instead of "{0}" as the output' diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 45e55b99c..3ae90021a 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -311,14 +311,14 @@ class FileDownloader(object): """ nooverwrites_and_exists = ( - self.params.get('nooverwrites', False) - and os.path.exists(encodeFilename(filename)) + self.params.get('nooverwrites', False) and + os.path.exists(encodeFilename(filename)) ) continuedl_and_exists = ( - self.params.get('continuedl', False) - and os.path.isfile(encodeFilename(filename)) - and not self.params.get('nopart', False) + self.params.get('continuedl', False) and + os.path.isfile(encodeFilename(filename)) and + not self.params.get('nopart', False) ) # Check file already present diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index b40ebfa50..7b8fe8cf5 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -325,8 +325,8 @@ class F4mFD(FileDownloader): state['frag_index'] += 1 estimated_size = ( - (state['downloaded_bytes'] + frag_total_bytes) - / (state['frag_index'] + 1) * total_frags) + (state['downloaded_bytes'] + frag_total_bytes) / + (state['frag_index'] + 1) * total_frags) time_now = time.time() state['total_bytes_estimate'] = estimated_size state['elapsed'] = time_now - start diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index cf58f0800..c3088fba2 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -365,6 +365,7 @@ from .promptfile import PromptFileIE from .prosiebensat1 import ProSiebenSat1IE from .pyvideo import PyvideoIE from .quickvid import QuickVidIE +from .r7 import R7IE from .radiode import RadioDeIE from .radiobremen import RadioBremenIE from .radiofrance import RadioFranceIE @@ -494,6 +495,7 @@ from .tumblr import TumblrIE from .tunein import TuneInIE from .turbo import TurboIE from .tutv import TutvIE +from .tv4 import TV4IE from .tvigle import TvigleIE from .tvp import TvpIE, TvpSeriesIE from .tvplay import TVPlayIE @@ -615,6 +617,7 @@ from .youtube import ( YoutubeUserIE, YoutubeWatchLaterIE, ) +from .zapiks import ZapiksIE from .zdf import ZDFIE, ZDFChannelIE from .zingmp3 import ( ZingMp3SongIE, diff --git a/youtube_dl/extractor/adobetv.py b/youtube_dl/extractor/adobetv.py index 28e07f8b0..97d128560 100644 --- a/youtube_dl/extractor/adobetv.py +++ b/youtube_dl/extractor/adobetv.py @@ -28,7 +28,6 @@ class AdobeTVIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) player = self._parse_json( @@ -44,8 +43,10 @@ class AdobeTVIE(InfoExtractor): self._html_search_meta('datepublished', webpage, 'upload date')) duration = parse_duration( - self._html_search_meta('duration', webpage, 'duration') - or self._search_regex(r'Runtime:\s*(\d{2}:\d{2}:\d{2})', webpage, 'duration')) + self._html_search_meta('duration', webpage, 'duration') or + self._search_regex( + r'Runtime:\s*(\d{2}:\d{2}:\d{2})', + webpage, 'duration', fatal=False)) view_count = str_to_int(self._search_regex( r'<div class="views">\s*Views?:\s*([\d,.]+)\s*</div>', diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py index 43e82847f..576f03b5b 100644 --- a/youtube_dl/extractor/appletrailers.py +++ b/youtube_dl/extractor/appletrailers.py @@ -11,8 +11,8 @@ from ..utils import ( class AppleTrailersIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)' + _TESTS = [{ "url": "http://trailers.apple.com/trailers/wb/manofsteel/", 'info_dict': { 'id': 'manofsteel', @@ -63,7 +63,10 @@ class AppleTrailersIE(InfoExtractor): }, }, ] - } + }, { + 'url': 'http://trailers.apple.com/ca/metropole/autrui/', + 'only_matching': True, + }] _JSON_RE = r'iTunes.playURL\((.*?)\);' diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 08b8ad37c..79f6d199b 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -391,6 +391,16 @@ class InfoExtractor(object): if blocked_iframe: msg += ' Visit %s for more details' % blocked_iframe raise ExtractorError(msg, expected=True) + if '<title>The URL you requested has been blocked</title>' in content[:512]: + msg = ( + 'Access to this webpage has been blocked by Indian censorship. ' + 'Use a VPN or proxy server (with --proxy) to route around it.') + block_msg = self._html_search_regex( + r'</h1><p>(.*?)</p>', + content, 'block message', default=None) + if block_msg: + msg += ' (Message: "%s")' % block_msg.replace('\n', ' ') + raise ExtractorError(msg, expected=True) return content @@ -798,8 +808,8 @@ class InfoExtractor(object): media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media') for i, media_el in enumerate(media_nodes): if manifest_version == '2.0': - manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/' - + (media_el.attrib.get('href') or media_el.attrib.get('url'))) + manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/' + + (media_el.attrib.get('href') or media_el.attrib.get('url'))) tbr = int_or_none(media_el.attrib.get('bitrate')) formats.append({ 'format_id': '-'.join(filter(None, [f4m_id, 'f4m-%d' % (i if tbr is None else tbr)])), @@ -823,7 +833,7 @@ class InfoExtractor(object): 'url': m3u8_url, 'ext': ext, 'protocol': 'm3u8', - 'preference': -1, + 'preference': preference - 1 if preference else -1, 'resolution': 'multiple', 'format_note': 'Quality selection URL', }] diff --git a/youtube_dl/extractor/defense.py b/youtube_dl/extractor/defense.py index 2b90bf4fc..98e3aedfd 100644 --- a/youtube_dl/extractor/defense.py +++ b/youtube_dl/extractor/defense.py @@ -25,8 +25,9 @@ class DefenseGouvFrIE(InfoExtractor): r"flashvars.pvg_id=\"(\d+)\";", webpage, 'ID') - json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/' - + video_id) + json_url = ( + 'http://static.videos.gouv.fr/brightcovehub/export/json/%s' % + video_id) info = self._download_json(json_url, title, 'Downloading JSON config') video_url = info['renditions'][0]['url'] diff --git a/youtube_dl/extractor/escapist.py b/youtube_dl/extractor/escapist.py index 4303feccd..b49b9869f 100644 --- a/youtube_dl/extractor/escapist.py +++ b/youtube_dl/extractor/escapist.py @@ -22,6 +22,7 @@ class EscapistIE(InfoExtractor): 'uploader_id': 'the-escapist-presents', 'uploader': 'The Escapist Presents', 'title': "Breaking Down Baldur's Gate", + 'thumbnail': 're:^https?://.*\.jpg$', } } @@ -30,19 +31,18 @@ class EscapistIE(InfoExtractor): webpage = self._download_webpage(url, video_id) uploader_id = self._html_search_regex( - r"<h1 class='headline'><a href='/videos/view/(.*?)'", + r"<h1\s+class='headline'>\s*<a\s+href='/videos/view/(.*?)'", webpage, 'uploader ID', fatal=False) uploader = self._html_search_regex( - r"<h1 class='headline'>(.*?)</a>", + r"<h1\s+class='headline'>(.*?)</a>", webpage, 'uploader', fatal=False) description = self._html_search_meta('description', webpage) raw_title = self._html_search_meta('title', webpage, fatal=True) title = raw_title.partition(' : ')[2] - player_url = self._og_search_video_url(webpage, name='player URL') - config_url = compat_urllib_parse.unquote(self._search_regex( - r'config=(.*)$', player_url, 'config URL')) + config_url = compat_urllib_parse.unquote(self._html_search_regex( + r'<param\s+name="flashvars"\s+value="config=([^"&]+)', webpage, 'config URL')) formats = [] @@ -81,5 +81,4 @@ class EscapistIE(InfoExtractor): 'title': title, 'thumbnail': self._og_search_thumbnail(webpage), 'description': description, - 'player_url': player_url, } diff --git a/youtube_dl/extractor/gdcvault.py b/youtube_dl/extractor/gdcvault.py index fed968f51..05f58f1af 100644 --- a/youtube_dl/extractor/gdcvault.py +++ b/youtube_dl/extractor/gdcvault.py @@ -7,6 +7,7 @@ from ..compat import ( compat_urllib_parse, compat_urllib_request, ) +from ..utils import remove_end class GDCVaultIE(InfoExtractor): @@ -68,7 +69,9 @@ class GDCVaultIE(InfoExtractor): akami_url = xml_description.find('./metadata/akamaiHost').text slide_video_path = xml_description.find('./metadata/slideVideo').text video_formats.append({ - 'url': 'rtmp://' + akami_url + '/' + slide_video_path, + 'url': 'rtmp://%s/ondemand?ovpfv=1.1' % 'fms.digitallyspeaking.com/cfx/st', + 'play_path': remove_end(slide_video_path, '.flv'), + 'ext': 'flv', 'format_note': 'slide deck video', 'quality': -2, 'preference': -2, @@ -76,7 +79,9 @@ class GDCVaultIE(InfoExtractor): }) speaker_video_path = xml_description.find('./metadata/speakerVideo').text video_formats.append({ - 'url': 'rtmp://' + akami_url + '/' + speaker_video_path, + 'url': 'rtmp://%s/ondemand?ovpfv=1.1' % 'fms.digitallyspeaking.com/cfx/st', + 'play_path': remove_end(speaker_video_path, '.flv'), + 'ext': 'flv', 'format_note': 'speaker video', 'quality': -1, 'preference': -1, diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 8dce96a64..875e1bf05 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -547,7 +547,16 @@ class GenericIE(InfoExtractor): 'id': 'aanslagen-kopenhagen', 'title': 'Aanslagen Kopenhagen | RTL Nieuws', } - } + }, + # Zapiks embed + { + 'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html', + 'info_dict': { + 'id': '118046', + 'ext': 'mp4', + 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !', + } + }, ] def report_following_redirect(self, new_url): @@ -1098,6 +1107,12 @@ class GenericIE(InfoExtractor): if mobj is not None: return self.url_result(mobj.group('url'), 'Livestream') + # Look for Zapiks embed + mobj = re.search( + r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage) + if mobj is not None: + return self.url_result(mobj.group('url'), 'Zapiks') + def check_video(vurl): if YoutubeIE.suitable(vurl): return True diff --git a/youtube_dl/extractor/imgur.py b/youtube_dl/extractor/imgur.py index b16c7aed0..fe5d95e2c 100644 --- a/youtube_dl/extractor/imgur.py +++ b/youtube_dl/extractor/imgur.py @@ -19,16 +19,16 @@ class ImgurIE(InfoExtractor): 'info_dict': { 'id': 'A61SaA1', 'ext': 'mp4', - 'title': 'MRW gifv is up and running without any bugs', - 'description': 'The Internet\'s visual storytelling community. Explore, share, and discuss the best visual stories the Internet has to offer.', + 'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$', + 'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$', }, }, { 'url': 'https://imgur.com/A61SaA1', 'info_dict': { 'id': 'A61SaA1', 'ext': 'mp4', - 'title': 'MRW gifv is up and running without any bugs', - 'description': 'The Internet\'s visual storytelling community. Explore, share, and discuss the best visual stories the Internet has to offer.', + 'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$', + 'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$', }, }] diff --git a/youtube_dl/extractor/r7.py b/youtube_dl/extractor/r7.py new file mode 100644 index 000000000..976c8feec --- /dev/null +++ b/youtube_dl/extractor/r7.py @@ -0,0 +1,88 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + js_to_json, + unescapeHTML, + int_or_none, +) + + +class R7IE(InfoExtractor): + _VALID_URL = r'''(?x)https?:// + (?: + (?:[a-zA-Z]+)\.r7\.com(?:/[^/]+)+/idmedia/| + noticias\.r7\.com(?:/[^/]+)+/[^/]+-| + player\.r7\.com/video/i/ + ) + (?P<id>[\da-f]{24}) + ''' + _TESTS = [{ + 'url': 'http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html', + 'md5': '403c4e393617e8e8ddc748978ee8efde', + 'info_dict': { + 'id': '54e7050b0cf2ff57e0279389', + 'ext': 'mp4', + 'title': 'Policiais humilham suspeito à beira da morte: "Morre com dignidade"', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 98, + 'like_count': int, + 'view_count': int, + }, + }, { + 'url': 'http://esportes.r7.com/videos/cigano-manda-recado-aos-fas/idmedia/4e176727b51a048ee6646a1b.html', + 'only_matching': True, + }, { + 'url': 'http://noticias.r7.com/record-news/video/representante-do-instituto-sou-da-paz-fala-sobre-fim-do-estatuto-do-desarmamento-5480fc580cf2285b117f438d/', + 'only_matching': True, + }, { + 'url': 'http://player.r7.com/video/i/54e7050b0cf2ff57e0279389?play=true&video=http://vsh.r7.com/54e7050b0cf2ff57e0279389/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-ATOS_copy.mp4&linkCallback=http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html&thumbnail=http://vtb.r7.com/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-thumb.jpg&idCategory=192&share=true&layout=full&full=true', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage( + 'http://player.r7.com/video/i/%s' % video_id, video_id) + + item = self._parse_json(js_to_json(self._search_regex( + r'(?s)var\s+item\s*=\s*({.+?});', webpage, 'player')), video_id) + + title = unescapeHTML(item['title']) + thumbnail = item.get('init', {}).get('thumbUri') + duration = None + + statistics = item.get('statistics', {}) + like_count = int_or_none(statistics.get('likes')) + view_count = int_or_none(statistics.get('views')) + + formats = [] + for format_key, format_dict in item['playlist'][0].items(): + src = format_dict.get('src') + if not src: + continue + format_id = format_dict.get('format') or format_key + if duration is None: + duration = format_dict.get('duration') + if '.f4m' in src: + formats.extend(self._extract_f4m_formats(src, video_id, preference=-1)) + elif src.endswith('.m3u8'): + formats.extend(self._extract_m3u8_formats(src, video_id, 'mp4', preference=-2)) + else: + formats.append({ + 'url': src, + 'format_id': format_id, + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'duration': duration, + 'like_count': like_count, + 'view_count': view_count, + 'formats': formats, + } diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index 3469d9578..e60f85b5b 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -6,6 +6,7 @@ import re import time from .common import InfoExtractor +from ..compat import compat_urlparse from ..utils import ( struct_unpack, remove_end, @@ -96,12 +97,10 @@ class RTVEALaCartaIE(InfoExtractor): ).replace('.net.rtve', '.multimedia.cdn.rtve') video_path = self._download_webpage( auth_url, video_id, 'Getting video url') - # Use mvod.akcdn instead of flash.akamaihd.multimedia.cdn to get + # Use mvod1.akcdn instead of flash.akamaihd.multimedia.cdn to get # the right Content-Length header and the mp4 format - video_url = ( - 'http://mvod.akcdn.rtve.es/{0}&v=2.6.8' - '&fp=MAC%2016,0,0,296&r=MRUGG&g=OEOJWFXNFGCP'.format(video_path) - ) + video_url = compat_urlparse.urljoin( + 'http://mvod1.akcdn.rtve.es/', video_path) return { 'id': video_id, diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index a73da1c9c..5793dbc10 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -1,8 +1,10 @@ from __future__ import unicode_literals +import base64 import re from .common import InfoExtractor +from ..utils import qualities class TeamcocoIE(InfoExtractor): @@ -24,8 +26,8 @@ class TeamcocoIE(InfoExtractor): 'info_dict': { 'id': '19705', 'ext': 'mp4', - "description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.", - "title": "Louis C.K. Interview Pt. 1 11/3/11", + 'description': 'Louis C.K. got starstruck by George W. Bush, so what? Part one.', + 'title': 'Louis C.K. Interview Pt. 1 11/3/11', 'age_limit': 0, } } @@ -42,42 +44,39 @@ class TeamcocoIE(InfoExtractor): display_id = mobj.group('display_id') webpage = self._download_webpage(url, display_id) - video_id = mobj.group("video_id") + video_id = mobj.group('video_id') if not video_id: video_id = self._html_search_regex( self._VIDEO_ID_REGEXES, webpage, 'video id') - data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id - data = self._download_xml( - data_url, display_id, 'Downloading data webpage') + embed_url = 'http://teamcoco.com/embed/v/%s' % video_id + embed = self._download_webpage( + embed_url, video_id, 'Downloading embed page') + + encoded_data = self._search_regex( + r'"preload"\s*:\s*"([^"]+)"', embed, 'encoded data') + data = self._parse_json( + base64.b64decode(encoded_data.encode('ascii')).decode('utf-8'), video_id) - qualities = ['500k', '480p', '1000k', '720p', '1080p'] formats = [] - for filed in data.findall('files/file'): - if filed.attrib.get('playmode') == 'all': - # it just duplicates one of the entries - break - file_url = filed.text - m_format = re.search(r'(\d+(k|p))\.mp4', file_url) + get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p']) + for filed in data['files']: + m_format = re.search(r'(\d+(k|p))\.mp4', filed['url']) if m_format is not None: format_id = m_format.group(1) else: - format_id = filed.attrib['bitrate'] + format_id = filed['bitrate'] tbr = ( - int(filed.attrib['bitrate']) - if filed.attrib['bitrate'].isdigit() + int(filed['bitrate']) + if filed['bitrate'].isdigit() else None) - try: - quality = qualities.index(format_id) - except ValueError: - quality = -1 formats.append({ - 'url': file_url, + 'url': filed['url'], 'ext': 'mp4', 'tbr': tbr, 'format_id': format_id, - 'quality': quality, + 'quality': get_quality(format_id), }) self._sort_formats(formats) @@ -86,8 +85,8 @@ class TeamcocoIE(InfoExtractor): 'id': video_id, 'display_id': display_id, 'formats': formats, - 'title': self._og_search_title(webpage), - 'thumbnail': self._og_search_thumbnail(webpage), - 'description': self._og_search_description(webpage), + 'title': data['title'], + 'thumbnail': data.get('thumb', {}).get('href'), + 'description': data.get('teaser'), 'age_limit': self._family_friendly_search(webpage), } diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index 10b3b706a..59678399d 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -83,6 +83,22 @@ class TEDIE(SubtitlesInfoExtractor): 'params': { 'skip_download': True, }, + }, { + # YouTube video + 'url': 'http://www.ted.com/talks/jeffrey_kluger_the_sibling_bond', + 'add_ie': ['Youtube'], + 'info_dict': { + 'id': 'aFBIPO-P7LM', + 'ext': 'mp4', + 'title': 'The hidden power of siblings: Jeff Kluger at TEDxAsheville', + 'description': 'md5:3d7a4f50d95ca5dd67104e2a20f43fe1', + 'uploader': 'TEDx Talks', + 'uploader_id': 'TEDxTalks', + 'upload_date': '20111216', + }, + 'params': { + 'skip_download': True, + }, }] _NATIVE_FORMATS = { @@ -132,11 +148,16 @@ class TEDIE(SubtitlesInfoExtractor): talk_info = self._extract_info(webpage)['talks'][0] - if talk_info.get('external') is not None: - self.to_screen('Found video from %s' % talk_info['external']['service']) + external = talk_info.get('external') + if external: + service = external['service'] + self.to_screen('Found video from %s' % service) + ext_url = None + if service.lower() == 'youtube': + ext_url = external.get('code') return { '_type': 'url', - 'url': talk_info['external']['uri'], + 'url': ext_url or external['uri'], } formats = [{ diff --git a/youtube_dl/extractor/tv4.py b/youtube_dl/extractor/tv4.py new file mode 100644 index 000000000..1c4b6d635 --- /dev/null +++ b/youtube_dl/extractor/tv4.py @@ -0,0 +1,100 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + parse_iso8601, +) + + +class TV4IE(InfoExtractor): + IE_DESC = 'tv4.se and tv4play.se' + _VALID_URL = r'''(?x)https?://(?:www\.)? + (?: + tv4\.se/(?:[^/]+)/klipp/(?:.*)-| + tv4play\.se/ + (?: + (?:program|barn)/(?:[^\?]+)\?video_id=| + iframe/video/| + film/| + sport/| + ) + )(?P<id>[0-9]+)''' + _TESTS = [ + { + 'url': 'http://www.tv4.se/kalla-fakta/klipp/kalla-fakta-5-english-subtitles-2491650', + 'md5': '909d6454b87b10a25aa04c4bdd416a9b', + 'info_dict': { + 'id': '2491650', + 'ext': 'mp4', + 'title': 'Kalla Fakta 5 (english subtitles)', + 'thumbnail': 're:^https?://.*\.jpg$', + 'timestamp': int, + 'upload_date': '20131125', + }, + }, + { + 'url': 'http://www.tv4play.se/iframe/video/3054113', + 'md5': '77f851c55139ffe0ebd41b6a5552489b', + 'info_dict': { + 'id': '3054113', + 'ext': 'mp4', + 'title': 'Så här jobbar ficktjuvarna - se avslöjande bilder', + 'thumbnail': 're:^https?://.*\.jpg$', + 'description': 'Unika bilder avslöjar hur turisternas fickor vittjas mitt på Stockholms central. Två experter på ficktjuvarna avslöjar knepen du ska se upp för.', + 'timestamp': int, + 'upload_date': '20150130', + }, + }, + { + 'url': 'http://www.tv4play.se/sport/3060959', + 'only_matching': True, + }, + { + 'url': 'http://www.tv4play.se/film/2378136', + 'only_matching': True, + }, + { + 'url': 'http://www.tv4play.se/barn/looney-tunes?video_id=3062412', + 'only_matching': True, + }, + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + + info = self._download_json( + 'http://www.tv4play.se/player/assets/%s.json' % video_id, video_id, 'Downloading video info JSON') + + # If is_geo_restricted is true, it doesn't neceserally mean we can't download it + if info['is_geo_restricted']: + self.report_warning('This content might not be available in your country due to licensing restrictions.') + if info['requires_subscription']: + raise ExtractorError('This content requires subscription.', expected=True) + + sources_data = self._download_json( + 'https://prima.tv4play.se/api/web/asset/%s/play.json?protocol=http&videoFormat=MP4' % video_id, video_id, 'Downloading sources JSON') + sources = sources_data['playback'] + + formats = [] + for item in sources.get('items', {}).get('item', []): + ext, bitrate = item['mediaFormat'], item['bitrate'] + formats.append({ + 'format_id': '%s_%s' % (ext, bitrate), + 'tbr': bitrate, + 'ext': ext, + 'url': item['url'], + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': info['title'], + 'formats': formats, + 'description': info.get('description'), + 'timestamp': parse_iso8601(info.get('broadcast_date_time')), + 'duration': info.get('duration'), + 'thumbnail': info.get('image'), + 'is_live': sources.get('live'), + } diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 78d287e0e..4cd2f73d9 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import json import re import itertools +import hashlib from .common import InfoExtractor from .subtitles import SubtitlesInfoExtractor @@ -225,6 +226,11 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): if mobj.group('pro') or mobj.group('player'): url = 'http://player.vimeo.com/video/' + video_id + password = self._downloader.params.get('videopassword', None) + if password: + headers['Cookie'] = '%s_password=%s' % ( + video_id, hashlib.md5(password.encode('utf-8')).hexdigest()) + # Retrieve video webpage to extract further information request = compat_urllib_request.Request(url, None, headers) try: diff --git a/youtube_dl/extractor/zapiks.py b/youtube_dl/extractor/zapiks.py new file mode 100644 index 000000000..22a9a57e8 --- /dev/null +++ b/youtube_dl/extractor/zapiks.py @@ -0,0 +1,110 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + parse_duration, + parse_iso8601, + xpath_with_ns, + xpath_text, + int_or_none, +) + + +class ZapiksIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?zapiks\.(?:fr|com)/(?:(?:[a-z]{2}/)?(?P<display_id>.+?)\.html|index\.php\?.*\bmedia_id=(?P<id>\d+))' + _TESTS = [ + { + 'url': 'http://www.zapiks.fr/ep2s3-bon-appetit-eh-be-viva.html', + 'md5': 'aeb3c473b2d564b2d46d664d28d5f050', + 'info_dict': { + 'id': '80798', + 'ext': 'mp4', + 'title': 'EP2S3 - Bon Appétit - Eh bé viva les pyrénées con!', + 'description': 'md5:7054d6f6f620c6519be1fe710d4da847', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 528, + 'timestamp': 1359044972, + 'upload_date': '20130124', + 'view_count': int, + 'comment_count': int, + }, + }, + { + 'url': 'http://www.zapiks.com/ep3s5-bon-appetit-baqueira-m-1.html', + 'only_matching': True, + }, + { + 'url': 'http://www.zapiks.com/nl/ep3s5-bon-appetit-baqueira-m-1.html', + 'only_matching': True, + }, + { + 'url': 'http://www.zapiks.fr/index.php?action=playerIframe&media_id=118046&width=640&height=360&autoStart=false&language=fr', + 'only_matching': True, + }, + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + display_id = mobj.group('display_id') or video_id + + webpage = self._download_webpage(url, display_id) + + if not video_id: + video_id = self._search_regex( + r'data-media-id="(\d+)"', webpage, 'video id') + + playlist = self._download_xml( + 'http://www.zapiks.fr/view/index.php?action=playlist&media_id=%s&lang=en' % video_id, + display_id) + + NS_MAP = { + 'jwplayer': 'http://rss.jwpcdn.com/' + } + + def ns(path): + return xpath_with_ns(path, NS_MAP) + + item = playlist.find('./channel/item') + + title = xpath_text(item, 'title', 'title') or self._og_search_title(webpage) + description = self._og_search_description(webpage, default=None) + thumbnail = xpath_text( + item, ns('./jwplayer:image'), 'thumbnail') or self._og_search_thumbnail(webpage, default=None) + duration = parse_duration(self._html_search_meta( + 'duration', webpage, 'duration', default=None)) + timestamp = parse_iso8601(self._html_search_meta( + 'uploadDate', webpage, 'upload date', default=None), ' ') + + view_count = int_or_none(self._search_regex( + r'UserPlays:(\d+)', webpage, 'view count', default=None)) + comment_count = int_or_none(self._search_regex( + r'UserComments:(\d+)', webpage, 'comment count', default=None)) + + formats = [] + for source in item.findall(ns('./jwplayer:source')): + format_id = source.attrib['label'] + f = { + 'url': source.attrib['file'], + 'format_id': format_id, + } + m = re.search(r'^(?P<height>\d+)[pP]', format_id) + if m: + f['height'] = int(m.group('height')) + formats.append(f) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'timestamp': timestamp, + 'view_count': view_count, + 'comment_count': comment_count, + 'formats': formats, + } diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 238b6556b..475fad3c9 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -900,8 +900,8 @@ def _windows_write_string(s, out): def not_a_console(handle): if handle == INVALID_HANDLE_VALUE or handle is None: return True - return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR - or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0) + return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or + GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0) if not_a_console(h): return False diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 537e8cf60..7c8b29c3b 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.02.19.3' +__version__ = '2015.02.21' |