diff options
Diffstat (limited to 'youtube_dl')
33 files changed, 589 insertions, 175 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index e2b823f66..806e7b239 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1333,7 +1333,9 @@ class YoutubeDL(object): formats = info_dict.get('formats', [info_dict]) idlen = max(len('format code'), max(len(f['format_id']) for f in formats)) - formats_s = [line(f, idlen) for f in formats] + formats_s = [ + line(f, idlen) for f in formats + if f.get('preference') is None or f['preference'] >= -1000] if len(formats) > 1: formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)' formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)' diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 5bb0f3cfd..aa58b52ab 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -11,7 +11,6 @@ from ..compat import ( compat_urllib_request, ) from ..utils import ( - check_executable, encodeFilename, ) @@ -27,16 +26,13 @@ class HlsFD(FileDownloader): '-bsf:a', 'aac_adtstoasc', encodeFilename(tmpfilename, for_subprocess=True)] - for program in ['avconv', 'ffmpeg']: - if check_executable(program, ['-version']): - break - else: + ffpp = FFmpegPostProcessor(downloader=self) + program = ffpp._executable + if program is None: self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.') return False - cmd = [program] + args - - ffpp = FFmpegPostProcessor(downloader=self) ffpp.check_version() + cmd = [program] + args retval = subprocess.call(cmd) if retval == 0: diff --git a/youtube_dl/downloader/mplayer.py b/youtube_dl/downloader/mplayer.py index c53195da0..72cef30ea 100644 --- a/youtube_dl/downloader/mplayer.py +++ b/youtube_dl/downloader/mplayer.py @@ -4,8 +4,8 @@ import os import subprocess from .common import FileDownloader -from ..compat import compat_subprocess_get_DEVNULL from ..utils import ( + check_executable, encodeFilename, ) @@ -20,11 +20,7 @@ class MplayerFD(FileDownloader): 'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', '-dumpstream', '-dumpfile', tmpfilename, url] # Check for mplayer first - try: - subprocess.call( - ['mplayer', '-h'], - stdout=compat_subprocess_get_DEVNULL(), stderr=subprocess.STDOUT) - except (OSError, IOError): + if not check_executable('mplayer', ['-h']): self.report_error('MMS or RTSP download detected but "%s" could not be run' % args[0]) return False diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 9ccd1b32e..b523e9644 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -159,6 +159,7 @@ from .gametrailers import GametrailersIE from .gdcvault import GDCVaultIE from .generic import GenericIE from .giantbomb import GiantBombIE +from .giga import GigaIE from .glide import GlideIE from .globo import GloboIE from .godtube import GodTubeIE @@ -325,6 +326,7 @@ from .prosiebensat1 import ProSiebenSat1IE from .pyvideo import PyvideoIE from .quickvid import QuickVidIE from .radiode import RadioDeIE +from .radiobremen import RadioBremenIE from .radiofrance import RadioFranceIE from .rai import RaiIE from .rbmaradio import RBMARadioIE @@ -345,6 +347,7 @@ from .ruhd import RUHDIE from .rutube import ( RutubeIE, RutubeChannelIE, + RutubeEmbedIE, RutubeMovieIE, RutubePersonIE, ) @@ -510,6 +513,7 @@ from .wdr import ( WDRMobileIE, WDRMausIE, ) +from .webofstories import WebOfStoriesIE from .weibo import WeiboIE from .wimp import WimpIE from .wistia import WistiaIE @@ -545,7 +549,6 @@ from .youtube import ( YoutubeSearchURLIE, YoutubeShowIE, YoutubeSubscriptionsIE, - YoutubeTopListIE, YoutubeTruncatedIDIE, YoutubeTruncatedURLIE, YoutubeUserIE, diff --git a/youtube_dl/extractor/auengine.py b/youtube_dl/extractor/auengine.py index 014a21952..a1b666be0 100644 --- a/youtube_dl/extractor/auengine.py +++ b/youtube_dl/extractor/auengine.py @@ -7,6 +7,7 @@ from ..compat import compat_urllib_parse from ..utils import ( determine_ext, ExtractorError, + remove_end, ) @@ -27,23 +28,18 @@ class AUEngineIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - title = self._html_search_regex(r'<title>(?P<title>.+?)</title>', webpage, 'title') - title = title.strip() - links = re.findall(r'\s(?:file|url):\s*["\']([^\'"]+)["\']', webpage) - links = map(compat_urllib_parse.unquote, links) - - thumbnail = None - video_url = None - for link in links: - if link.endswith('.png'): - thumbnail = link - elif '/videos/' in link: - video_url = link + title = self._html_search_regex( + r'<title>\s*(?P<title>.+?)\s*</title>', webpage, 'title') + video_urls = re.findall(r'http://\w+.auengine.com/vod/.*[^\W]', webpage) + video_url = compat_urllib_parse.unquote(video_urls[0]) + thumbnails = re.findall(r'http://\w+.auengine.com/thumb/.*[^\W]', webpage) + thumbnail = compat_urllib_parse.unquote(thumbnails[0]) + if not video_url: raise ExtractorError('Could not find video URL') + ext = '.' + determine_ext(video_url) - if ext == title[-len(ext):]: - title = title[:-len(ext)] + title = remove_end(title, ext) return { 'id': video_id, diff --git a/youtube_dl/extractor/bbccouk.py b/youtube_dl/extractor/bbccouk.py index 73fe66b01..1cf48fe0d 100644 --- a/youtube_dl/extractor/bbccouk.py +++ b/youtube_dl/extractor/bbccouk.py @@ -10,7 +10,7 @@ from ..compat import compat_HTTPError class BBCCoUkIE(SubtitlesInfoExtractor): IE_NAME = 'bbc.co.uk' IE_DESC = 'BBC iPlayer' - _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:programmes|iplayer/(?:episode|playlist))/(?P<id>[\da-z]{8})' + _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})' _TESTS = [ { @@ -18,8 +18,8 @@ class BBCCoUkIE(SubtitlesInfoExtractor): 'info_dict': { 'id': 'b039d07m', 'ext': 'flv', - 'title': 'Kaleidoscope: Leonard Cohen', - 'description': 'md5:db4755d7a665ae72343779f7dacb402c', + 'title': 'Kaleidoscope, Leonard Cohen', + 'description': 'The Canadian poet and songwriter reflects on his musical career.', 'duration': 1740, }, 'params': { @@ -85,8 +85,39 @@ class BBCCoUkIE(SubtitlesInfoExtractor): 'skip_download': True, } }, { + 'url': 'http://www.bbc.co.uk/music/clips/p02frcc3', + 'note': 'Audio', + 'info_dict': { + 'id': 'p02frcch', + 'ext': 'flv', + 'title': 'Pete Tong, Past, Present and Future Special, Madeon - After Hours mix', + 'description': 'French house superstar Madeon takes us out of the club and onto the after party.', + 'duration': 3507, + }, + 'params': { + # rtmp download + 'skip_download': True, + } + }, { + 'url': 'http://www.bbc.co.uk/music/clips/p025c0zz', + 'note': 'Video', + 'info_dict': { + 'id': 'p025c103', + 'ext': 'flv', + 'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)', + 'description': 'Rae Morris performs Closer for BBC Three at Reading 2014', + 'duration': 226, + }, + 'params': { + # rtmp download + 'skip_download': True, + } + }, { 'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4', 'only_matching': True, + }, { + 'url': 'http://www.bbc.co.uk/music/clips#p02frcc3', + 'only_matching': True, } ] diff --git a/youtube_dl/extractor/bet.py b/youtube_dl/extractor/bet.py index 003e50002..4fbdd6f1c 100644 --- a/youtube_dl/extractor/bet.py +++ b/youtube_dl/extractor/bet.py @@ -16,7 +16,7 @@ class BetIE(InfoExtractor): { 'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html', 'info_dict': { - 'id': '417cd61c-c793-4e8e-b006-e445ecc45add', + 'id': '406429c6-1b8a-463e-83fc-814adb81a9db', 'display_id': 'in-bet-exclusive-obama-talks-race-and-racism', 'ext': 'flv', 'title': 'BET News Presents: A Conversation With President Obama', diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py index 9873728df..11d18d74a 100644 --- a/youtube_dl/extractor/canalplus.py +++ b/youtube_dl/extractor/canalplus.py @@ -5,6 +5,8 @@ import re from .common import InfoExtractor from ..utils import ( + ExtractorError, + HEADRequest, unified_strdate, url_basename, qualities, @@ -76,6 +78,16 @@ class CanalplusIE(InfoExtractor): preference = qualities(['MOBILE', 'BAS_DEBIT', 'HAUT_DEBIT', 'HD', 'HLS', 'HDS']) + fmt_url = next(iter(media.find('VIDEOS'))).text + if '/geo' in fmt_url.lower(): + response = self._request_webpage( + HEADRequest(fmt_url), video_id, + 'Checking if the video is georestricted') + if '/blocage' in response.geturl(): + raise ExtractorError( + 'The video is not available in your country', + expected=True) + formats = [] for fmt in media.find('VIDEOS'): format_url = fmt.text diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 6e264f687..562e656e0 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -92,6 +92,8 @@ class InfoExtractor(object): by this field, regardless of all other values. -1 for default (order by other properties), -2 or smaller for less than default. + < -1000 to hide the format (if there is + another one which is strictly better) * language_preference Is this in the correct requested language? 10 if it's what the URL is about, diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 354046a9e..1680f532f 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -228,7 +228,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, 'thumbnail', fatal=False) formats = [] - for fmt in re.findall(r'\?p([0-9]{3,4})=1', webpage): + for fmt in re.findall(r'showmedia\.([0-9]{3,4})p', webpage): stream_quality, stream_format = self._FORMAT_IDS[fmt] video_format = fmt + 'p' streamdata_req = compat_urllib_request.Request('http://www.crunchyroll.com/xml/') diff --git a/youtube_dl/extractor/ellentv.py b/youtube_dl/extractor/ellentv.py index 3e7923648..fc92ff825 100644 --- a/youtube_dl/extractor/ellentv.py +++ b/youtube_dl/extractor/ellentv.py @@ -1,7 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import re import json from .common import InfoExtractor @@ -12,32 +11,49 @@ from ..utils import ( class EllenTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ellentv\.com/videos/(?P<id>[a-z0-9_-]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?(?:ellentv|ellentube)\.com/videos/(?P<id>[a-z0-9_-]+)' + _TESTS = [{ 'url': 'http://www.ellentv.com/videos/0-7jqrsr18/', 'md5': 'e4af06f3bf0d5f471921a18db5764642', 'info_dict': { 'id': '0-7jqrsr18', 'ext': 'mp4', 'title': 'What\'s Wrong with These Photos? A Whole Lot', + 'description': 'md5:35f152dc66b587cf13e6d2cf4fa467f6', 'timestamp': 1406876400, 'upload_date': '20140801', } - } + }, { + 'url': 'http://ellentube.com/videos/0-dvzmabd5/', + 'md5': '98238118eaa2bbdf6ad7f708e3e4f4eb', + 'info_dict': { + 'id': '0-dvzmabd5', + 'ext': 'mp4', + 'title': '1 year old twin sister makes her brother laugh', + 'description': '1 year old twin sister makes her brother laugh', + 'timestamp': 1419542075, + 'upload_date': '20141225', + } + }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + video_url = self._html_search_meta('VideoURL', webpage, 'url') + title = self._og_search_title(webpage, default=None) or self._search_regex( + r'pageName\s*=\s*"([^"]+)"', webpage, 'title') + description = self._html_search_meta( + 'description', webpage, 'description') or self._og_search_description(webpage) timestamp = parse_iso8601(self._search_regex( r'<span class="publish-date"><time datetime="([^"]+)">', webpage, 'timestamp')) return { 'id': video_id, - 'title': self._og_search_title(webpage), - 'url': self._html_search_meta('VideoURL', webpage, 'url'), + 'url': video_url, + 'title': title, + 'description': description, 'timestamp': timestamp, } @@ -55,8 +71,7 @@ class EllenTVClipsIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - playlist_id = mobj.group('id') + playlist_id = self._match_id(url) webpage = self._download_webpage(url, playlist_id) playlist = self._extract_playlist(webpage) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 493afb57d..7a5bf9392 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -131,12 +131,13 @@ class GenericIE(InfoExtractor): # ooyala video { 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219', - 'md5': '5644c6ca5d5782c1d0d350dad9bd840c', + 'md5': '166dd577b433b4d4ebfee10b0824d8ff', 'info_dict': { 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ', 'ext': 'mp4', 'title': '2cc213299525360.mov', # that's what we get }, + 'add_ie': ['Ooyala'], }, # google redirect { @@ -146,7 +147,7 @@ class GenericIE(InfoExtractor): 'ext': 'mp4', 'upload_date': '20130224', 'uploader_id': 'TheVerge', - 'description': 'Chris Ziegler takes a look at the Alcatel OneTouch Fire and the ZTE Open; two of the first Firefox OS handsets to be officially announced.', + 'description': 're:^Chris Ziegler takes a look at the\.*', 'uploader': 'The Verge', 'title': 'First Firefox OS phones side-by-side', }, @@ -925,7 +926,7 @@ class GenericIE(InfoExtractor): # Look for embedded TED player mobj = re.search( - r'<iframe[^>]+?src=(["\'])(?P<url>http://embed\.ted\.com/.+?)\1', webpage) + r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage) if mobj is not None: return self.url_result(mobj.group('url'), 'TED') diff --git a/youtube_dl/extractor/giga.py b/youtube_dl/extractor/giga.py new file mode 100644 index 000000000..775890112 --- /dev/null +++ b/youtube_dl/extractor/giga.py @@ -0,0 +1,101 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import itertools + +from .common import InfoExtractor +from ..utils import ( + qualities, + compat_str, + parse_duration, + parse_iso8601, + str_to_int, +) + + +class GigaIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?giga\.de/(?:[^/]+/)*(?P<id>[^/]+)' + _TESTS = [{ + 'url': 'http://www.giga.de/filme/anime-awesome/trailer/anime-awesome-chihiros-reise-ins-zauberland-das-beste-kommt-zum-schluss/', + 'md5': '6bc5535e945e724640664632055a584f', + 'info_dict': { + 'id': '2622086', + 'display_id': 'anime-awesome-chihiros-reise-ins-zauberland-das-beste-kommt-zum-schluss', + 'ext': 'mp4', + 'title': 'Anime Awesome: Chihiros Reise ins Zauberland – Das Beste kommt zum Schluss', + 'description': 'md5:afdf5862241aded4718a30dff6a57baf', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 578, + 'timestamp': 1414749706, + 'upload_date': '20141031', + 'uploader': 'Robin Schweiger', + 'view_count': int, + }, + }, { + 'url': 'http://www.giga.de/games/channel/giga-top-montag/giga-topmontag-die-besten-serien-2014/', + 'only_matching': True, + }, { + 'url': 'http://www.giga.de/extra/netzkultur/videos/giga-games-tom-mats-robin-werden-eigene-wege-gehen-eine-ankuendigung/', + 'only_matching': True, + }, { + 'url': 'http://www.giga.de/tv/jonas-liest-spieletitel-eingedeutscht-episode-2/', + 'only_matching': True, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + video_id = self._search_regex( + [r'data-video-id="(\d+)"', r'/api/video/jwplayer/#v=(\d+)'], + webpage, 'video id') + + playlist = self._download_json( + 'http://www.giga.de/api/syndication/video/video_id/%s/playlist.json?content=syndication/key/368b5f151da4ae05ced7fa296bdff65a/' + % video_id, video_id)[0] + + quality = qualities(['normal', 'hd720']) + + formats = [] + for format_id in itertools.count(0): + fmt = playlist.get(compat_str(format_id)) + if not fmt: + break + formats.append({ + 'url': fmt['src'], + 'format_id': '%s-%s' % (fmt['quality'], fmt['type'].split('/')[-1]), + 'quality': quality(fmt['quality']), + }) + self._sort_formats(formats) + + title = self._html_search_meta( + 'title', webpage, 'title', fatal=True) + description = self._html_search_meta( + 'description', webpage, 'description') + thumbnail = self._og_search_thumbnail(webpage) + + duration = parse_duration(self._search_regex( + r'(?s)(?:data-video-id="{0}"|data-video="[^"]*/api/video/jwplayer/#v={0}[^"]*")[^>]*>.+?<span class="duration">([^<]+)</span>'.format(video_id), + webpage, 'duration', fatal=False)) + + timestamp = parse_iso8601(self._search_regex( + r'datetime="([^"]+)"', webpage, 'upload date', fatal=False)) + uploader = self._search_regex( + r'class="author">([^<]+)</a>', webpage, 'uploader', fatal=False) + + view_count = str_to_int(self._search_regex( + r'<span class="views"><strong>([\d.]+)</strong>', webpage, 'view count', fatal=False)) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'timestamp': timestamp, + 'uploader': uploader, + 'view_count': view_count, + 'formats': formats, + } diff --git a/youtube_dl/extractor/khanacademy.py b/youtube_dl/extractor/khanacademy.py index 408d00944..08a671fa8 100644 --- a/youtube_dl/extractor/khanacademy.py +++ b/youtube_dl/extractor/khanacademy.py @@ -22,8 +22,10 @@ class KhanAcademyIE(InfoExtractor): 'description': 'The perfect cipher', 'duration': 176, 'uploader': 'Brit Cruise', + 'uploader_id': 'khanacademy', 'upload_date': '20120411', - } + }, + 'add_ie': ['Youtube'], }, { 'url': 'https://www.khanacademy.org/math/applied-math/cryptography', 'info_dict': { diff --git a/youtube_dl/extractor/kontrtube.py b/youtube_dl/extractor/kontrtube.py index 41fd62009..720bc939b 100644 --- a/youtube_dl/extractor/kontrtube.py +++ b/youtube_dl/extractor/kontrtube.py @@ -10,13 +10,14 @@ from ..utils import int_or_none class KontrTubeIE(InfoExtractor): IE_NAME = 'kontrtube' IE_DESC = 'KontrTube.ru - Труба зовёт' - _VALID_URL = r'http://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/.+' + _VALID_URL = r'http://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/(?P<display_id>[^/]+)/' _TEST = { 'url': 'http://www.kontrtube.ru/videos/2678/nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag/', 'md5': '975a991a4926c9a85f383a736a2e6b80', 'info_dict': { 'id': '2678', + 'display_id': 'nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag', 'ext': 'mp4', 'title': 'Над олимпийской деревней в Сочи поднят российский флаг', 'description': 'md5:80edc4c613d5887ae8ccf1d59432be41', @@ -28,21 +29,28 @@ class KontrTubeIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') + display_id = mobj.group('display_id') - webpage = self._download_webpage(url, video_id, 'Downloading page') + webpage = self._download_webpage( + url, display_id, 'Downloading page') - video_url = self._html_search_regex(r"video_url: '(.+?)/?',", webpage, 'video URL') - thumbnail = self._html_search_regex(r"preview_url: '(.+?)/?',", webpage, 'video thumbnail', fatal=False) + video_url = self._html_search_regex( + r"video_url\s*:\s*'(.+?)/?',", webpage, 'video URL') + thumbnail = self._html_search_regex( + r"preview_url\s*:\s*'(.+?)/?',", webpage, 'video thumbnail', fatal=False) title = self._html_search_regex( r'<title>(.+?)</title>', webpage, 'video title') - description = self._html_search_meta('description', webpage, 'video description') + description = self._html_search_meta( + 'description', webpage, 'video description') mobj = re.search( - r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>', webpage) + r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>', + webpage) duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None view_count = self._html_search_regex( - r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', webpage, 'view count', fatal=False) + r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', + webpage, 'view count', fatal=False) comment_count = None comment_str = self._html_search_regex( @@ -56,6 +64,7 @@ class KontrTubeIE(InfoExtractor): return { 'id': video_id, + 'display_id': display_id, 'url': video_url, 'thumbnail': thumbnail, 'title': title, diff --git a/youtube_dl/extractor/lrt.py b/youtube_dl/extractor/lrt.py index d72d470aa..9c2fbdd96 100644 --- a/youtube_dl/extractor/lrt.py +++ b/youtube_dl/extractor/lrt.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals import re -import json from .common import InfoExtractor from ..utils import ( @@ -28,7 +27,6 @@ class LRTIE(InfoExtractor): 'params': { 'skip_download': True, # HLS download }, - } def _real_extract(self, url): @@ -44,7 +42,9 @@ class LRTIE(InfoExtractor): formats = [] for js in re.findall(r'(?s)config:\s*(\{.*?\})', webpage): - data = json.loads(js_to_json(js)) + data = self._parse_json(js, video_id, transform_source=js_to_json) + if 'provider' not in data: + continue if data['provider'] == 'rtmp': formats.append({ 'format_id': 'rtmp', diff --git a/youtube_dl/extractor/motorsport.py b/youtube_dl/extractor/motorsport.py index f5ca74e97..c1a482dba 100644 --- a/youtube_dl/extractor/motorsport.py +++ b/youtube_dl/extractor/motorsport.py @@ -1,63 +1,49 @@ # coding: utf-8 from __future__ import unicode_literals -import hashlib -import json -import time - from .common import InfoExtractor from ..compat import ( - compat_parse_qs, - compat_str, -) -from ..utils import ( - int_or_none, + compat_urlparse, ) class MotorsportIE(InfoExtractor): IE_DESC = 'motorsport.com' - _VALID_URL = r'http://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/(?:$|[?#])' + _VALID_URL = r'http://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/?(?:$|[?#])' _TEST = { 'url': 'http://www.motorsport.com/f1/video/main-gallery/red-bull-racing-2014-rules-explained/', - 'md5': '5592cb7c5005d9b2c163df5ac3dc04e4', 'info_dict': { - 'id': '7063', + 'id': '2-T3WuR-KMM', 'ext': 'mp4', 'title': 'Red Bull Racing: 2014 Rules Explained', - 'duration': 207, + 'duration': 208, 'description': 'A new clip from Red Bull sees Daniel Ricciardo and Sebastian Vettel explain the 2014 Formula One regulations – which are arguably the most complex the sport has ever seen.', - 'uploader': 'rainiere', - 'thumbnail': r're:^http://.*motorsport\.com/.+\.jpg$' - } + 'uploader': 'mcomstaff', + 'uploader_id': 'UC334JIYKkVnyFoNCclfZtHQ', + 'upload_date': '20140903', + 'thumbnail': r're:^https?://.+\.jpg$' + }, + 'add_ie': ['Youtube'], + 'params': { + 'skip_download': True, + }, } def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - flashvars_code = self._html_search_regex( - r'<embed id="player".*?flashvars="([^"]+)"', webpage, 'flashvars') - flashvars = compat_parse_qs(flashvars_code) - params = json.loads(flashvars['parameters'][0]) - - e = compat_str(int(time.time()) + 24 * 60 * 60) - base_video_url = params['location'] + '?e=' + e - s = 'h3hg713fh32' - h = hashlib.md5((s + base_video_url).encode('utf-8')).hexdigest() - video_url = base_video_url + '&h=' + h - - uploader = self._html_search_regex( - r'(?s)<span class="label">Video by: </span>(.*?)</a>', webpage, - 'uploader', fatal=False) + iframe_path = self._html_search_regex( + r'<iframe id="player_iframe"[^>]+src="([^"]+)"', webpage, + 'iframe path') + iframe = self._download_webpage( + compat_urlparse.urljoin(url, iframe_path), display_id, + 'Downloading iframe') + youtube_id = self._search_regex( + r'www.youtube.com/embed/(.{11})', iframe, 'youtube id') return { - 'id': params['video_id'], + '_type': 'url_transparent', 'display_id': display_id, - 'title': params['title'], - 'url': video_url, - 'description': params.get('description'), - 'thumbnail': params.get('main_thumb'), - 'duration': int_or_none(params.get('duration')), - 'uploader': uploader, + 'url': 'https://youtube.com/watch?v=%s' % youtube_id, } diff --git a/youtube_dl/extractor/normalboots.py b/youtube_dl/extractor/normalboots.py index 3d35b11ac..c13ff0d65 100644 --- a/youtube_dl/extractor/normalboots.py +++ b/youtube_dl/extractor/normalboots.py @@ -22,7 +22,11 @@ class NormalbootsIE(InfoExtractor): 'description': 'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for ‘Tense Battle Theme’:\xa0http://www.youtube.com/Kiamet/', 'uploader': 'JonTron', 'upload_date': '20140125', - } + }, + 'params': { + # rtmp download + 'skip_download': True, + }, } def _real_extract(self, url): diff --git a/youtube_dl/extractor/played.py b/youtube_dl/extractor/played.py index 449d4836c..45716c75d 100644 --- a/youtube_dl/extractor/played.py +++ b/youtube_dl/extractor/played.py @@ -26,6 +26,7 @@ class PlayedIE(InfoExtractor): 'ext': 'flv', 'title': 'youtube-dl_test_video.mp4', }, + 'skip': 'Removed for copyright infringement.', # oh wow } def _real_extract(self, url): diff --git a/youtube_dl/extractor/radiobremen.py b/youtube_dl/extractor/radiobremen.py new file mode 100644 index 000000000..0d706312e --- /dev/null +++ b/youtube_dl/extractor/radiobremen.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- + +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import parse_duration + + +class RadioBremenIE(InfoExtractor): + _VALID_URL = r'http?://(?:www\.)?radiobremen\.de/mediathek/(?:index\.html)?\?id=(?P<id>[0-9]+)' + IE_NAME = 'radiobremen' + + _TEST = { + 'url': 'http://www.radiobremen.de/mediathek/index.html?id=114720', + 'info_dict': { + 'id': '114720', + 'ext': 'mp4', + 'duration': 1685, + 'width': 512, + 'title': 'buten un binnen vom 22. Dezember', + 'thumbnail': 're:https?://.*\.jpg$', + 'description': 'Unter anderem mit diesen Themen: 45 Flüchtlinge sind in Worpswede angekommen +++ Freies Internet für alle: Bremer arbeiten an einem flächendeckenden W-Lan-Netzwerk +++ Aktivisten kämpfen für das Unibad +++ So war das Wetter 2014 +++', + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + meta_url = "http://www.radiobremen.de/apps/php/mediathek/metadaten.php?id=%s" % video_id + meta_doc = self._download_webpage( + meta_url, video_id, 'Downloading metadata') + title = self._html_search_regex( + r"<h1.*>(?P<title>.+)</h1>", meta_doc, "title") + description = self._html_search_regex( + r"<p>(?P<description>.*)</p>", meta_doc, "description", fatal=False) + duration = parse_duration(self._html_search_regex( + r"Länge:</td>\s+<td>(?P<duration>[0-9]+:[0-9]+)</td>", + meta_doc, "duration", fatal=False)) + + page_doc = self._download_webpage( + url, video_id, 'Downloading video information') + mobj = re.search( + r"ardformatplayerclassic\(\'playerbereich\',\'(?P<width>[0-9]+)\',\'.*\',\'(?P<video_id>[0-9]+)\',\'(?P<secret>[0-9]+)\',\'(?P<thumbnail>.+)\',\'\'\)", + page_doc) + video_url = ( + "http://dl-ondemand.radiobremen.de/mediabase/%s/%s_%s_%s.mp4" % + (video_id, video_id, mobj.group("secret"), mobj.group('width'))) + + formats = [{ + 'url': video_url, + 'ext': 'mp4', + 'width': int(mobj.group("width")), + }] + return { + 'id': video_id, + 'title': title, + 'description': description, + 'duration': duration, + 'formats': formats, + 'thumbnail': mobj.group('thumbnail'), + } diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py index d029b0ec5..a3ca79f2c 100644 --- a/youtube_dl/extractor/rtlnl.py +++ b/youtube_dl/extractor/rtlnl.py @@ -8,7 +8,7 @@ from ..utils import parse_duration class RtlXlIE(InfoExtractor): IE_NAME = 'rtlxl.nl' - _VALID_URL = r'https?://www\.rtlxl\.nl/#!/[^/]+/(?P<uuid>[^/?]+)' + _VALID_URL = r'https?://(www\.)?rtlxl\.nl/#!/[^/]+/(?P<uuid>[^/?]+)' _TEST = { 'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677', diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index b72b5a586..5b1c3577a 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -70,6 +70,37 @@ class RutubeIE(InfoExtractor): } +class RutubeEmbedIE(InfoExtractor): + IE_NAME = 'rutube:embed' + IE_DESC = 'Rutube embedded videos' + _VALID_URL = 'https?://rutube\.ru/video/embed/(?P<id>[0-9]+)' + + _TEST = { + 'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=', + 'info_dict': { + 'id': 'a10e53b86e8f349080f718582ce4c661', + 'ext': 'mp4', + 'upload_date': '20131223', + 'uploader_id': '297833', + 'description': 'Видео группы ★http://vk.com/foxkidsreset★ музей Fox Kids и Jetix<br/><br/> восстановлено и сделано в шикоформате subziro89 http://vk.com/subziro89', + 'uploader': 'subziro89 ILya', + 'title': 'Мистический городок Эйри в Индиан 5 серия озвучка subziro89', + }, + 'params': { + 'skip_download': 'Requires ffmpeg', + }, + } + + def _real_extract(self, url): + embed_id = self._match_id(url) + webpage = self._download_webpage(url, embed_id) + + canonical_url = self._html_search_regex( + r'<link\s+rel="canonical"\s+href="([^"]+?)"', webpage, + 'Canonical URL') + return self.url_result(canonical_url, 'Rutube') + + class RutubeChannelIE(InfoExtractor): IE_NAME = 'rutube:channel' IE_DESC = 'Rutube channels' diff --git a/youtube_dl/extractor/soulanime.py b/youtube_dl/extractor/soulanime.py new file mode 100644 index 000000000..feef33e27 --- /dev/null +++ b/youtube_dl/extractor/soulanime.py @@ -0,0 +1,80 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + HEADRequest, + urlhandle_detect_ext, +) + + +class SoulAnimeWatchingIE(InfoExtractor): + IE_NAME = "soulanime:watching" + IE_DESC = "SoulAnime video" + _TEST = { + 'url': 'http://www.soul-anime.net/watching/seirei-tsukai-no-blade-dance-episode-9/', + 'md5': '05fae04abf72298098b528e98abf4298', + 'info_dict': { + 'id': 'seirei-tsukai-no-blade-dance-episode-9', + 'ext': 'mp4', + 'title': 'seirei-tsukai-no-blade-dance-episode-9', + 'description': 'seirei-tsukai-no-blade-dance-episode-9' + } + } + _VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/watch[^/]*/(?P<id>[^/]+)' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + domain = mobj.group('domain') + + page = self._download_webpage(url, video_id) + + video_url_encoded = self._html_search_regex( + r'<div id="download">[^<]*<a href="(?P<url>[^"]+)"', page, 'url') + video_url = "http://www.soul-anime." + domain + video_url_encoded + + ext_req = HEADRequest(video_url) + ext_handle = self._request_webpage( + ext_req, video_id, note='Determining extension') + ext = urlhandle_detect_ext(ext_handle) + + return { + 'id': video_id, + 'url': video_url, + 'ext': ext, + 'title': video_id, + 'description': video_id + } + + +class SoulAnimeSeriesIE(InfoExtractor): + IE_NAME = "soulanime:series" + IE_DESC = "SoulAnime Series" + + _VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/anime./(?P<id>[^/]+)' + + _EPISODE_REGEX = r'<option value="(/watch[^/]*/[^"]+)">[^<]*</option>' + + _TEST = { + 'url': 'http://www.soul-anime.net/anime1/black-rock-shooter-tv/', + 'info_dict': { + 'id': 'black-rock-shooter-tv' + }, + 'playlist_count': 8 + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + series_id = mobj.group('id') + domain = mobj.group('domain') + + pattern = re.compile(self._EPISODE_REGEX) + + page = self._download_webpage(url, series_id, "Downloading series page") + mobj = pattern.findall(page) + + entries = [self.url_result("http://www.soul-anime." + domain + obj) for obj in mobj] + + return self.playlist_result(entries, series_id) diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index 944177426..10b3b706a 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -13,7 +13,7 @@ from ..compat import ( class TEDIE(SubtitlesInfoExtractor): _VALID_URL = r'''(?x) (?P<proto>https?://) - (?P<type>www|embed)(?P<urlmain>\.ted\.com/ + (?P<type>www|embed(?:-ssl)?)(?P<urlmain>\.ted\.com/ ( (?P<type_playlist>playlists(?:/\d+)?) # We have a playlist | @@ -98,7 +98,7 @@ class TEDIE(SubtitlesInfoExtractor): def _real_extract(self, url): m = re.match(self._VALID_URL, url, re.VERBOSE) - if m.group('type') == 'embed': + if m.group('type').startswith('embed'): desktop_url = m.group('proto') + 'www' + m.group('urlmain') return self.url_result(desktop_url, 'TED') name = m.group('name') diff --git a/youtube_dl/extractor/tf1.py b/youtube_dl/extractor/tf1.py index 6e61cc9e2..025d0877c 100644 --- a/youtube_dl/extractor/tf1.py +++ b/youtube_dl/extractor/tf1.py @@ -1,15 +1,13 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor class TF1IE(InfoExtractor): """TF1 uses the wat.tv player.""" - _VALID_URL = r'http://videos\.tf1\.fr/.*-(?P<id>.*?)\.html' - _TEST = { + _VALID_URL = r'http://(?:videos\.tf1|www\.tfou)\.fr/.*?-(?P<id>\d+)(?:-\d+)?\.html' + _TESTS = { 'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html', 'info_dict': { 'id': '10635995', @@ -21,14 +19,26 @@ class TF1IE(InfoExtractor): # Sometimes wat serves the whole file with the --test option 'skip_download': True, }, + }, { + 'url': 'http://www.tfou.fr/chuggington/videos/le-grand-mysterioso-chuggington-7085291-739.html', + 'info_dict': { + 'id': '12043945', + 'ext': 'mp4', + 'title': 'Le grand Mystérioso - Chuggington', + 'description': 'Le grand Mystérioso - Emery rêve qu\'un article lui soit consacré dans le journal.', + 'upload_date': '20150103', + }, + 'params': { + # Sometimes wat serves the whole file with the --test option + 'skip_download': True, + }, } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) embed_url = self._html_search_regex( - r'"(https://www.wat.tv/embedframe/.*?)"', webpage, 'embed url') + r'["\'](https?://www.wat.tv/embedframe/.*?)["\']', webpage, 'embed url') embed_page = self._download_webpage(embed_url, video_id, 'Downloading embed player page') wat_id = self._search_regex(r'UVID=(.*?)&', embed_page, 'wat id') diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py index 0d9fb09a7..619039e51 100644 --- a/youtube_dl/extractor/vier.py +++ b/youtube_dl/extractor/vier.py @@ -63,7 +63,7 @@ class VierIE(InfoExtractor): class VierVideosIE(InfoExtractor): IE_NAME = 'vier:videos' - _VALID_URL = r'https?://(?:www\.)?vier\.be/(?P<program>[^/]+)/videos(?:\?.*\bpage=(?P<page>\d+))?' + _VALID_URL = r'https?://(?:www\.)?vier\.be/(?P<program>[^/]+)/videos(?:\?.*\bpage=(?P<page>\d+)|$)' _TESTS = [{ 'url': 'http://www.vier.be/demoestuin/videos', 'info_dict': { diff --git a/youtube_dl/extractor/vimple.py b/youtube_dl/extractor/vimple.py index 33d370e1c..ee3d86117 100644 --- a/youtube_dl/extractor/vimple.py +++ b/youtube_dl/extractor/vimple.py @@ -14,28 +14,17 @@ class VimpleIE(InfoExtractor): IE_DESC = 'Vimple.ru' _VALID_URL = r'https?://(player.vimple.ru/iframe|vimple.ru)/(?P<id>[a-f0-9]{10,})' _TESTS = [ - # Quality: Large, from iframe { - 'url': 'http://player.vimple.ru/iframe/b132bdfd71b546d3972f9ab9a25f201c', + 'url': 'http://vimple.ru/c0f6b1687dcd4000a97ebe70068039cf', + 'md5': '2e750a330ed211d3fd41821c6ad9a279', 'info_dict': { - 'id': 'b132bdfd71b546d3972f9ab9a25f201c', - 'title': 'great-escape-minecraft.flv', + 'id': 'c0f6b1687dcd4000a97ebe70068039cf', 'ext': 'mp4', - 'duration': 352, - 'webpage_url': 'http://vimple.ru/b132bdfd71b546d3972f9ab9a25f201c', + 'title': 'Sunset', + 'duration': 20, + 'thumbnail': 're:https?://.*?\.jpg', }, }, - # Quality: Medium, from mainpage - { - 'url': 'http://vimple.ru/a15950562888453b8e6f9572dc8600cd', - 'info_dict': { - 'id': 'a15950562888453b8e6f9572dc8600cd', - 'title': 'DB 01', - 'ext': 'flv', - 'duration': 1484, - 'webpage_url': 'http://vimple.ru/a15950562888453b8e6f9572dc8600cd', - } - }, ] def _real_extract(self, url): diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 542e9198a..129de6cf3 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -164,6 +164,15 @@ class VKIE(InfoExtractor): self.to_screen('Youtube video detected') return self.url_result(m_yt.group(1), 'Youtube') + m_rutube = re.search( + r'\ssrc="((?:https?:)?//rutube\.ru\\?/video\\?/embed(?:.*?))\\?"', info_page) + assert m_rutube + if m_rutube is not None: + self.to_screen('rutube video detected') + rutube_url = self._proto_relative_url( + m_rutube.group(1).replace('\\', '')) + return self.url_result(rutube_url) + m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.*?});', info_page) if m_opts: m_opts_url = re.search(r"url\s*:\s*'([^']+)", m_opts.group(1)) diff --git a/youtube_dl/extractor/webofstories.py b/youtube_dl/extractor/webofstories.py new file mode 100644 index 000000000..396cf4e83 --- /dev/null +++ b/youtube_dl/extractor/webofstories.py @@ -0,0 +1,102 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import int_or_none + + +class WebOfStoriesIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?webofstories\.com/play/(?:[^/]+/)?(?P<id>[0-9]+)' + _VIDEO_DOMAIN = 'http://eu-mobile.webofstories.com/' + _GREAT_LIFE_STREAMER = 'rtmp://eu-cdn1.webofstories.com/cfx/st/' + _USER_STREAMER = 'rtmp://eu-users.webofstories.com/cfx/st/' + _TESTS = [ + { + 'url': 'http://www.webofstories.com/play/hans.bethe/71', + 'md5': '373e4dd915f60cfe3116322642ddf364', + 'info_dict': { + 'id': '4536', + 'ext': 'mp4', + 'title': 'The temperature of the sun', + 'thumbnail': 're:^https?://.*\.jpg$', + 'description': 'Hans Bethe talks about calculating the temperature of the sun', + 'duration': 238, + } + }, + { + 'url': 'http://www.webofstories.com/play/55908', + 'md5': '2985a698e1fe3211022422c4b5ed962c', + 'info_dict': { + 'id': '55908', + 'ext': 'mp4', + 'title': 'The story of Gemmata obscuriglobus', + 'thumbnail': 're:^https?://.*\.jpg$', + 'description': 'Planctomycete talks about The story of Gemmata obscuriglobus', + 'duration': 169, + } + }, + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + title = self._og_search_title(webpage) + description = self._html_search_meta('description', webpage) + thumbnail = self._og_search_thumbnail(webpage) + + story_filename = self._search_regex( + r'\.storyFileName\("([^"]+)"\)', webpage, 'story filename') + speaker_id = self._search_regex( + r'\.speakerId\("([^"]+)"\)', webpage, 'speaker ID') + story_id = self._search_regex( + r'\.storyId\((\d+)\)', webpage, 'story ID') + speaker_type = self._search_regex( + r'\.speakerType\("([^"]+)"\)', webpage, 'speaker type') + great_life = self._search_regex( + r'isGreatLifeStory\s*=\s*(true|false)', webpage, 'great life story') + is_great_life_series = great_life == 'true' + duration = int_or_none(self._search_regex( + r'\.duration\((\d+)\)', webpage, 'duration', fatal=False)) + + # URL building, see: http://www.webofstories.com/scripts/player.js + ms_prefix = '' + if speaker_type.lower() == 'ms': + ms_prefix = 'mini_sites/' + + if is_great_life_series: + mp4_url = '{0:}lives/{1:}/{2:}.mp4'.format( + self._VIDEO_DOMAIN, speaker_id, story_filename) + rtmp_ext = 'flv' + streamer = self._GREAT_LIFE_STREAMER + play_path = 'stories/{0:}/{1:}'.format( + speaker_id, story_filename) + else: + mp4_url = '{0:}{1:}{2:}/{3:}.mp4'.format( + self._VIDEO_DOMAIN, ms_prefix, speaker_id, story_filename) + rtmp_ext = 'mp4' + streamer = self._USER_STREAMER + play_path = 'mp4:{0:}{1:}/{2}.mp4'.format( + ms_prefix, speaker_id, story_filename) + + formats = [{ + 'format_id': 'mp4_sd', + 'url': mp4_url, + }, { + 'format_id': 'rtmp_sd', + 'page_url': url, + 'url': streamer, + 'ext': rtmp_ext, + 'play_path': play_path, + }] + + self._sort_formats(formats) + + return { + 'id': story_id, + 'title': title, + 'formats': formats, + 'thumbnail': thumbnail, + 'description': description, + 'duration': duration, + } diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 8c7842ee8..f0efaf0d9 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -256,7 +256,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '138': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, + '138': {'ext': 'mp4', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559) '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'}, @@ -287,7 +287,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'}, '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'}, + '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'}, '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'VP9'}, + '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'}, # Dash webm audio '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50}, @@ -736,6 +738,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): 'format_id': format_id, 'url': video_url, 'width': int_or_none(r.attrib.get('width')), + 'height': int_or_none(r.attrib.get('height')), 'tbr': int_or_none(r.attrib.get('bandwidth'), 1000), 'asr': int_or_none(r.attrib.get('audioSamplingRate')), 'filesize': filesize, @@ -746,7 +749,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): fo for fo in formats if fo['format_id'] == format_id) except StopIteration: - f.update(self._formats.get(format_id, {})) + f.update(self._formats.get(format_id, {}).items()) formats.append(f) else: existing_format.update(f) @@ -1040,6 +1043,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): self.report_warning( 'Skipping DASH manifest: %r' % e, video_id) else: + # Hide the formats we found through non-DASH + dash_keys = set(df['format_id'] for df in dash_formats) + for f in formats: + if f['format_id'] in dash_keys: + f['format_id'] = 'nondash-%s' % f['format_id'] + f['preference'] = f.get('preference', 0) - 10000 formats.extend(dash_formats) self._sort_formats(formats) @@ -1199,9 +1208,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): if playlist_id.startswith('RD'): # Mixes require a custom extraction process return self._extract_mix(playlist_id) - if playlist_id.startswith('TL'): - raise ExtractorError('For downloading YouTube.com top lists, use ' - 'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True) url = self._TEMPLATE_URL % playlist_id page = self._download_webpage(url, playlist_id) @@ -1247,49 +1253,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): return self.playlist_result(url_results, playlist_id, playlist_title) -class YoutubeTopListIE(YoutubePlaylistIE): - IE_NAME = 'youtube:toplist' - IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"' - ' (Example: "yttoplist:music:Top Tracks")') - _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$' - _TESTS = [{ - 'url': 'yttoplist:music:Trending', - 'playlist_mincount': 5, - 'skip': 'Only works for logged-in users', - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - channel = mobj.group('chann') - title = mobj.group('title') - query = compat_urllib_parse.urlencode({'title': title}) - channel_page = self._download_webpage( - 'https://www.youtube.com/%s' % channel, title) - link = self._html_search_regex( - r'''(?x) - <a\s+href="([^"]+)".*?>\s* - <span\s+class="branded-page-module-title-text">\s* - <span[^>]*>.*?%s.*?</span>''' % re.escape(query), - channel_page, 'list') - url = compat_urlparse.urljoin('https://www.youtube.com/', link) - - video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"' - ids = [] - # sometimes the webpage doesn't contain the videos - # retry until we get them - for i in itertools.count(0): - msg = 'Downloading Youtube mix' - if i > 0: - msg += ', retry #%d' % i - - webpage = self._download_webpage(url, title, msg) - ids = orderedSet(re.findall(video_re, webpage)) - if ids: - break - url_results = self._ids_to_results(ids) - return self.playlist_result(url_results, playlist_title=title) - - class YoutubeChannelIE(InfoExtractor): IE_DESC = 'YouTube.com channels' _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)' diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 048525efc..473536dcc 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -520,7 +520,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor): class FFmpegMergerPP(FFmpegPostProcessor): def run(self, info): filename = info['filepath'] - args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0', '-shortest'] + args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0'] self._downloader.to_screen('[ffmpeg] Merging formats into "%s"' % filename) self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args) return True, info diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index efbe64fb3..d4951c406 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1550,3 +1550,13 @@ def ytdl_is_updateable(): def args_to_str(args): # Get a short string representation for a subprocess command return ' '.join(shlex_quote(a) for a in args) + + +def urlhandle_detect_ext(url_handle): + try: + url_handle.headers + getheader = lambda h: url_handle.headers[h] + except AttributeError: # Python < 3 + getheader = url_handle.info().getheader + + return getheader('Content-Type').split("/")[1] diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 58b5021dc..2124e954f 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.01.02' +__version__ = '2015.01.05.1' |