diff options
Diffstat (limited to 'youtube_dl')
-rw-r--r-- | youtube_dl/FileDownloader.py | 17 | ||||
-rwxr-xr-x | youtube_dl/InfoExtractors.py | 96 | ||||
-rw-r--r-- | youtube_dl/__init__.py | 9 | ||||
-rwxr-xr-x | youtube_dl/__main__.py | 3 | ||||
-rw-r--r-- | youtube_dl/update.py | 2 | ||||
-rw-r--r-- | youtube_dl/version.py | 2 |
6 files changed, 102 insertions, 27 deletions
diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 4dabbb440..03346ab04 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -388,7 +388,11 @@ class FileDownloader(object): template_dict = dict(info_dict) template_dict['epoch'] = int(time.time()) - template_dict['autonumber'] = u'%05d' % self._num_downloads + autonumber_size = self.params.get('autonumber_size') + if autonumber_size is None: + autonumber_size = 5 + autonumber_templ = u'%0' + str(autonumber_size) + u'd' + template_dict['autonumber'] = autonumber_templ % self._num_downloads if template_dict['playlist_index'] is not None: template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index'] @@ -524,6 +528,10 @@ class FileDownloader(object): #We increment the download the download count here to match the previous behaviour. self.increment_downloads() + info_dict['fulltitle'] = info_dict['title'] + if len(info_dict['title']) > 200: + info_dict['title'] = info_dict['title'][:197] + u'...' + # Keep for backwards compatibility info_dict['stitle'] = info_dict['title'] @@ -692,7 +700,7 @@ class FileDownloader(object): except (IOError, OSError): self.report_warning(u'Unable to remove downloaded video file') - def _download_with_rtmpdump(self, filename, url, player_url, page_url): + def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path): self.report_destination(filename) tmpfilename = self.temp_name(filename) @@ -711,6 +719,8 @@ class FileDownloader(object): basic_args += ['-W', player_url] if page_url is not None: basic_args += ['--pageUrl', page_url] + if play_path is not None: + basic_args += ['-y', play_path] args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)] if self.params.get('verbose', False): try: @@ -765,7 +775,8 @@ class FileDownloader(object): if url.startswith('rtmp'): return self._download_with_rtmpdump(filename, url, info_dict.get('player_url', None), - info_dict.get('page_url', None)) + info_dict.get('page_url', None), + info_dict.get('play_path', None)) tmpfilename = self.temp_name(filename) stream = None diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index a7fdf1607..ae36558d7 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -115,7 +115,8 @@ class InfoExtractor(object): """ Returns the response handle """ if note is None: note = u'Downloading video webpage' - self._downloader.to_screen(u'[%s] %s: %s' % (self.IE_NAME, video_id, note)) + if note is not False: + self._downloader.to_screen(u'[%s] %s: %s' % (self.IE_NAME, video_id, note)) try: return compat_urllib_request.urlopen(url_or_request) except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: @@ -133,6 +134,14 @@ class InfoExtractor(object): else: encoding = 'utf-8' webpage_bytes = urlh.read() + if self._downloader.params.get('dump_intermediate_pages', False): + try: + url = url_or_request.get_full_url() + except AttributeError: + url = url_or_request + self._downloader.to_screen(u'Dumping request to ' + url) + dump = base64.b64encode(webpage_bytes).decode('ascii') + self._downloader.to_screen(dump) return webpage_bytes.decode(encoding, 'replace') #Methods for following #608 @@ -485,18 +494,14 @@ class YoutubeIE(InfoExtractor): # Get video info self.report_video_info_webpage_download(video_id) for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: - video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' + video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' % (video_id, el_type)) - request = compat_urllib_request.Request(video_info_url) - try: - video_info_webpage_bytes = compat_urllib_request.urlopen(request).read() - video_info_webpage = video_info_webpage_bytes.decode('utf-8', 'ignore') - video_info = compat_parse_qs(video_info_webpage) - if 'token' in video_info: - break - except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.report_error(u'unable to download video info webpage: %s' % compat_str(err)) - return + video_info_webpage = self._download_webpage(video_info_url, video_id, + note=False, + errnote='unable to download video info webpage') + video_info = compat_parse_qs(video_info_webpage) + if 'token' in video_info: + break if 'token' not in video_info: if 'reason' in video_info: self._downloader.report_error(u'YouTube said: %s' % video_info['reason'][0]) @@ -1151,7 +1156,7 @@ class VimeoIE(InfoExtractor): # Extract video description video_description = get_element_by_attribute("itemprop", "description", webpage) if video_description: video_description = clean_html(video_description) - else: video_description = '' + else: video_description = u'' # Extract upload date video_upload_date = None @@ -1794,9 +1799,13 @@ class YoutubePlaylistIE(InfoExtractor): self._downloader.report_error(u'Invalid JSON in API response: ' + compat_str(err)) return - if not 'feed' in response or not 'entry' in response['feed']: + if 'feed' not in response: self._downloader.report_error(u'Got a malformed response from YouTube API') return + if 'entry' not in response['feed']: + # Number of videos is a multiple of self._MAX_RESULTS + break + videos += [ (entry['yt$position']['$t'], entry['content']['src']) for entry in response['feed']['entry'] if 'content' in entry ] @@ -2144,7 +2153,7 @@ class FacebookIE(InfoExtractor): url = 'https://www.facebook.com/video/video.php?v=%s' % video_id webpage = self._download_webpage(url, video_id) - BEFORE = '[["allowFullScreen","true"],["allowScriptAccess","always"],["salign","tl"],["scale","noscale"],["wmode","opaque"]].forEach(function(param) {swf.addParam(param[0], param[1]);});\n' + BEFORE = '{swf.addParam(param[0], param[1]);});\n' AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});' m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage) if not m: @@ -2152,12 +2161,14 @@ class FacebookIE(InfoExtractor): data = dict(json.loads(m.group(1))) params_raw = compat_urllib_parse.unquote(data['params']) params = json.loads(params_raw) - video_url = params['hd_src'] + video_data = params['video_data'][0] + video_url = video_data.get('hd_src') if not video_url: - video_url = params['sd_src'] + video_url = video_data['sd_src'] if not video_url: raise ExtractorError(u'Cannot find video URL') - video_duration = int(params['video_duration']) + video_duration = int(video_data['video_duration']) + thumbnail = video_data['thumbnail_src'] m = re.search('<h2 class="uiHeaderTitle">([^<]+)</h2>', webpage) if not m: @@ -2170,7 +2181,7 @@ class FacebookIE(InfoExtractor): 'url': video_url, 'ext': 'mp4', 'duration': video_duration, - 'thumbnail': params['thumbnail_src'], + 'thumbnail': thumbnail, } return [info] @@ -3685,7 +3696,9 @@ class FunnyOrDieIE(InfoExtractor): m = re.search(r"<h1 class='player_page_h1'.*?>(?P<title>.*?)</h1>", webpage, flags=re.DOTALL) if not m: - self._downloader.trouble(u'Cannot find video title') + m = re.search(r'<title>(?P<title>[^<]+?)</title>', webpage) + if not m: + self._downloader.trouble(u'Cannot find video title') title = clean_html(m.group('title')) m = re.search(r'<meta property="og:description" content="(?P<desc>.*?)"', webpage) @@ -4119,7 +4132,7 @@ class KeekIE(InfoExtractor): video_url = u'http://cdn.keek.com/keek/video/%s' % video_id thumbnail = u'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id webpage = self._download_webpage(url, video_id) - m = re.search(r'<meta property="og:title" content="(?P<title>.+)"', webpage) + m = re.search(r'<meta property="og:title" content="(?P<title>.*?)"', webpage) title = unescapeHTML(m.group('title')) m = re.search(r'<div class="user-name-and-bio">[\S\s]+?<h2>(?P<uploader>.+?)</h2>', webpage) uploader = clean_html(m.group('uploader')) @@ -4344,6 +4357,46 @@ class LiveLeakIE(InfoExtractor): return [info] +class ARDIE(InfoExtractor): + _VALID_URL = r'^(?:https?://)?(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[^/\?]+)(?:\?.*)?' + _TITLE = r'<h1(?: class="boxTopHeadline")?>(?P<title>.*)</h1>' + _MEDIA_STREAM = r'mediaCollection\.addMediaStream\((?P<media_type>\d+), (?P<quality>\d+), "(?P<rtmp_url>[^"]*)", "(?P<video_url>[^"]*)", "[^"]*"\)' + + def _real_extract(self, url): + # determine video id from url + m = re.match(self._VALID_URL, url) + + numid = re.search(r'documentId=([0-9]+)', url) + if numid: + video_id = numid.group(1) + else: + video_id = m.group('video_id') + + # determine title and media streams from webpage + html = self._download_webpage(url, video_id) + title = re.search(self._TITLE, html).group('title') + streams = [m.groupdict() for m in re.finditer(self._MEDIA_STREAM, html)] + if not streams: + assert '"fsk"' in html + self._downloader.report_error(u'this video is only available after 8:00 pm') + return + + # choose default media type and highest quality for now + stream = max([s for s in streams if int(s["media_type"]) == 0], + key=lambda s: int(s["quality"])) + + # there's two possibilities: RTMP stream or HTTP download + info = {'id': video_id, 'title': title, 'ext': 'mp4'} + if stream['rtmp_url']: + self._downloader.to_screen(u'[%s] RTMP download detected' % self.IE_NAME) + assert stream['video_url'].startswith('mp4:') + info["url"] = stream["rtmp_url"] + info["play_path"] = stream['video_url'] + else: + assert stream["video_url"].endswith('.mp4') + info["url"] = stream["video_url"] + return [info] + def gen_extractors(): """ Return a list of an instance of every supported extractor. @@ -4397,5 +4450,6 @@ def gen_extractors(): MySpassIE(), SpiegelIE(), LiveLeakIE(), + ARDIE(), GenericIE() ] diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 807b73541..f46143e01 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -24,6 +24,7 @@ __authors__ = ( 'Jaime Marquínez Ferrándiz', 'Jeff Crouse', 'Osama Khalid', + 'Michael Walter', ) __license__ = 'Public Domain' @@ -223,6 +224,9 @@ def parseOpts(): help='display progress in console titlebar', default=False) verbosity.add_option('-v', '--verbose', action='store_true', dest='verbose', help='print various debugging information', default=False) + verbosity.add_option('--dump-intermediate-pages', + action='store_true', dest='dump_intermediate_pages', default=False, + help='print downloaded pages to debug problems(very verbose)') filesystem.add_option('-t', '--title', action='store_true', dest='usetitle', help='use title in file name', default=False) @@ -235,6 +239,9 @@ def parseOpts(): help='number downloaded files starting from 00000', default=False) filesystem.add_option('-o', '--output', dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout. Can also be used to download to a different directory, for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .') + filesystem.add_option('--autonumber-size', + dest='autonumber_size', metavar='NUMBER', + help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --autonumber option is given') filesystem.add_option('--restrict-filenames', action='store_true', dest='restrictfilenames', help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False) @@ -451,6 +458,7 @@ def _real_main(): 'format_limit': opts.format_limit, 'listformats': opts.listformats, 'outtmpl': outtmpl, + 'autonumber_size': opts.autonumber_size, 'restrictfilenames': opts.restrictfilenames, 'ignoreerrors': opts.ignoreerrors, 'ratelimit': opts.ratelimit, @@ -480,6 +488,7 @@ def _real_main(): 'max_downloads': opts.max_downloads, 'prefer_free_formats': opts.prefer_free_formats, 'verbose': opts.verbose, + 'dump_intermediate_pages': opts.dump_intermediate_pages, 'test': opts.test, 'keepvideo': opts.keepvideo, 'min_filesize': opts.min_filesize, diff --git a/youtube_dl/__main__.py b/youtube_dl/__main__.py index 7022ea4be..3fe29c91f 100755 --- a/youtube_dl/__main__.py +++ b/youtube_dl/__main__.py @@ -9,7 +9,8 @@ import sys if __package__ is None and not hasattr(sys, "frozen"): # direct call of __main__.py import os.path - sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + path = os.path.realpath(os.path.abspath(__file__)) + sys.path.append(os.path.dirname(os.path.dirname(path))) import youtube_dl diff --git a/youtube_dl/update.py b/youtube_dl/update.py index b446dd94c..d6e293875 100644 --- a/youtube_dl/update.py +++ b/youtube_dl/update.py @@ -37,7 +37,7 @@ def rsa_verify(message, signature, key): def update_self(to_screen, verbose, filename): """Update the program file with the latest version from the repository""" - UPDATE_URL = "http://rg3.github.com/youtube-dl/update/" + UPDATE_URL = "http://rg3.github.io/youtube-dl/update/" VERSION_URL = UPDATE_URL + 'LATEST_VERSION' JSON_URL = UPDATE_URL + 'versions.json' UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index c433e2eaa..2fd5f40c8 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.04.03' +__version__ = '2013.04.18' |