From bae611f216ac7b1f1a24a506da6dffc518d09d5b Mon Sep 17 00:00:00 2001 From: Arvydas Sidorenko Date: Sun, 1 Jul 2012 18:21:27 +0200 Subject: Simplified preferredencoding() Not sure what is the point to use yield to return encoding, thus it will simplify the whole function. Signed-off-by: Arvydas Sidorenko --- youtube_dl/utils.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 2853ba50f..7faa046c8 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -32,15 +32,13 @@ def preferredencoding(): Returns the best encoding scheme for the system, based on locale.getpreferredencoding() and some further tweaks. """ - def yield_preferredencoding(): - try: - pref = locale.getpreferredencoding() - u'TEST'.encode(pref) - except: - pref = 'UTF-8' - while True: - yield pref - return yield_preferredencoding().next() + try: + pref = locale.getpreferredencoding() + u'TEST'.encode(pref) + except: + pref = 'UTF-8' + + return pref def htmlentity_transform(matchobj): -- cgit v1.2.3 From 891d7f232959f85810011fe32b107a0dfd5db85b Mon Sep 17 00:00:00 2001 From: Joel Verhagen Date: Sat, 14 Jul 2012 16:47:19 -0400 Subject: Added options to set download buffer size and disable automatic buffer resizing. --- README.md | 3 +++ youtube-dl | Bin 40554 -> 40754 bytes youtube-dl.exe | Bin 3989631 -> 3989797 bytes youtube_dl/FileDownloader.py | 7 +++++-- youtube_dl/__init__.py | 12 ++++++++++++ 5 files changed, 20 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index f04b96128..fc9c1a6b8 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,9 @@ which means you can modify it, redistribute it or use it however you like. -i, --ignore-errors continue on download errors -r, --rate-limit LIMIT download rate limit (e.g. 50k or 44.6m) -R, --retries RETRIES number of retries (default is 10) + -b, --buffer-size SIZE size of download buffer (e.g. 1024 or 16k) (default + is 1024) + --no-resize-buffer do not automatically adjust the buffer size --dump-user-agent display the current browser identification --list-extractors List all supported extractors and the URLs they would handle diff --git a/youtube-dl b/youtube-dl index b3e0cd422..6e6932b52 100755 Binary files a/youtube-dl and b/youtube-dl differ diff --git a/youtube-dl.exe b/youtube-dl.exe index c55f5fa8d..4ff820dc8 100755 Binary files a/youtube-dl.exe and b/youtube-dl.exe differ diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 14e872a98..724de17c7 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -61,6 +61,8 @@ class FileDownloader(object): ratelimit: Download speed limit, in bytes/sec. nooverwrites: Prevent overwriting files. retries: Number of times to retry for HTTP error 5xx + buffersize: Size of download buffer in bytes. + noresizebuffer: Do not automatically resize the download buffer. continuedl: Try to continue downloads if possible. noprogress: Do not print the progress bar. playliststart: Playlist item to start at. @@ -633,7 +635,7 @@ class FileDownloader(object): data_len = long(data_len) + resume_len data_len_str = self.format_bytes(data_len) byte_counter = 0 + resume_len - block_size = 1024 + block_size = self.params.get('buffersize', 1024) start = time.time() while True: # Download and write @@ -659,7 +661,8 @@ class FileDownloader(object): except (IOError, OSError), err: self.trouble(u'\nERROR: unable to write data: %s' % str(err)) return False - block_size = self.best_block_size(after - before, len(data_block)) + if not self.params.get('noresizebuffer', False): + block_size = self.best_block_size(after - before, len(data_block)) # Progress message speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index f10822db1..7983b24eb 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -187,6 +187,11 @@ def parseOpts(): dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)') general.add_option('-R', '--retries', dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10) + general.add_option('-b', '--buffer-size', + dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16k) (default is 1024)', default="1024") + general.add_option('--no-resize-buffer', + action='store_true', dest='noresizebuffer', + help='do not automatically adjust the buffer size', default=False) general.add_option('--dump-user-agent', action='store_true', dest='dump_user_agent', help='display the current browser identification', default=False) @@ -428,6 +433,11 @@ def _real_main(): opts.retries = long(opts.retries) except (TypeError, ValueError), err: parser.error(u'invalid retry count specified') + if opts.buffersize is not None: + numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize) + if numeric_buffersize is None: + parser.error(u'invalid buffer size specified') + opts.buffersize = numeric_buffersize try: opts.playliststart = int(opts.playliststart) if opts.playliststart <= 0: @@ -475,6 +485,8 @@ def _real_main(): 'ratelimit': opts.ratelimit, 'nooverwrites': opts.nooverwrites, 'retries': opts.retries, + 'buffersize': opts.buffersize, + 'noresizebuffer': opts.noresizebuffer, 'continuedl': opts.continue_dl, 'noprogress': opts.noprogress, 'playliststart': opts.playliststart, -- cgit v1.2.3 From aab4fca4225d1d7935154486b72c3edf73ecf77a Mon Sep 17 00:00:00 2001 From: Joel Verhagen Date: Mon, 16 Jul 2012 10:59:21 -0400 Subject: Updated --no-resize-buffer docs, removed -b option --- README.md | 6 ++++-- youtube-dl | Bin 40754 -> 40781 bytes youtube-dl.exe | Bin 3989797 -> 3989826 bytes youtube_dl/__init__.py | 4 ++-- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index fc9c1a6b8..0d0552a47 100644 --- a/README.md +++ b/README.md @@ -17,9 +17,11 @@ which means you can modify it, redistribute it or use it however you like. -i, --ignore-errors continue on download errors -r, --rate-limit LIMIT download rate limit (e.g. 50k or 44.6m) -R, --retries RETRIES number of retries (default is 10) - -b, --buffer-size SIZE size of download buffer (e.g. 1024 or 16k) (default + --buffer-size SIZE size of download buffer (e.g. 1024 or 16k) (default is 1024) - --no-resize-buffer do not automatically adjust the buffer size + --no-resize-buffer do not automatically adjust the buffer size. By + default, the buffer size is automatically resized + from an initial value of SIZE. --dump-user-agent display the current browser identification --list-extractors List all supported extractors and the URLs they would handle diff --git a/youtube-dl b/youtube-dl index 6e6932b52..f615f6f9b 100755 Binary files a/youtube-dl and b/youtube-dl differ diff --git a/youtube-dl.exe b/youtube-dl.exe index 4ff820dc8..f74249a20 100755 Binary files a/youtube-dl.exe and b/youtube-dl.exe differ diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 7983b24eb..15f835a31 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -187,11 +187,11 @@ def parseOpts(): dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)') general.add_option('-R', '--retries', dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10) - general.add_option('-b', '--buffer-size', + general.add_option('--buffer-size', dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16k) (default is 1024)', default="1024") general.add_option('--no-resize-buffer', action='store_true', dest='noresizebuffer', - help='do not automatically adjust the buffer size', default=False) + help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False) general.add_option('--dump-user-agent', action='store_true', dest='dump_user_agent', help='display the current browser identification', default=False) -- cgit v1.2.3 From f2ad10a97d59a7abf994facdd2180a356fdd701f Mon Sep 17 00:00:00 2001 From: Christian Albrecht Date: Sun, 26 Aug 2012 09:11:19 +0200 Subject: Add arte.tv Info Extractor --- youtube_dl/InfoExtractors.py | 155 +++++++++++++++++++++++++++++++++++++++++++ youtube_dl/__init__.py | 1 + 2 files changed, 156 insertions(+) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index ddb4aa16b..82459e7a8 100644 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -1077,6 +1077,161 @@ class VimeoIE(InfoExtractor): }] +class ArteTvIE(InfoExtractor): + """arte.tv information extractor.""" + + _VALID_URL = r'(?:http://)?videos\.arte\.tv/(?:fr|de)/videos/.*' + _LIVE_URL = r'index-[0-9]+\.html$' + + IE_NAME = u'arte.tv' + + def __init__(self, downloader=None): + InfoExtractor.__init__(self, downloader) + + def report_download_webpage(self, video_id): + """Report webpage download.""" + self._downloader.to_screen(u'[arte.tv] %s: Downloading webpage' % video_id) + + def report_extraction(self, video_id): + """Report information extraction.""" + self._downloader.to_screen(u'[arte.tv] %s: Extracting information' % video_id) + + def fetch_webpage(self, url): + self._downloader.increment_downloads() + request = urllib2.Request(url) + try: + self.report_download_webpage(url) + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + return + except ValueError, err: + self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) + return + return webpage + + def grep_webpage(self, url, regex, regexFlags, matchTuples): + page = self.fetch_webpage(url) + mobj = re.search(regex, page, regexFlags) + info = {} + + if mobj is None: + self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) + return + + for (i, key, err) in matchTuples: + if mobj.group(i) is None: + self._downloader.trouble(err) + return + else: + info[key] = mobj.group(i) + + return info + + def extractLiveStream(self, url): + + video_lang = url.split('/')[-4] + + info = self.grep_webpage( + url, + r'src="(.*?/videothek_js.*?\.js)', + 0, + [ + (1, 'url', u'ERROR: Invalid URL: %s' % url) + ] + ) + + http_host = url.split('/')[2] + next_url = 'http://%s%s' % (http_host, urllib.unquote(info.get('url'))) + + info = self.grep_webpage( + next_url, + r'(s_artestras_scst_geoFRDE_' + video_lang + '.*?)\'.*?' + + '(http://.*?\.swf).*?' + + '(rtmp://.*?)\'', + re.DOTALL, + [ + (1, 'path', u'ERROR: could not extract video path: %s' % url), + (2, 'player', u'ERROR: could not extract video player: %s' % url), + (3, 'url', u'ERROR: could not extract video url: %s' % url) + ] + ) + + video_url = u'%s/%s' % (info.get('url'), info.get('path')) + + print u'rtmpdump --swfVfy \'%s\' --rtmp \'%s\' --live -o arte-live.mp4' % (info.get('player'), video_url) + + def extractPlus7Stream(self, url): + + video_lang = url.split('/')[-3] + + info = self.grep_webpage( + url, + r'param name="movie".*?videorefFileUrl=(http[^\'"&]*)', + 0, + [ + (1, 'url', u'ERROR: Invalid URL: %s' % url) + ] + ) + + next_url = urllib.unquote(info.get('url')) + + info = self.grep_webpage( + next_url, + r'