diff options
-rw-r--r-- | README.md | 28 | ||||
-rw-r--r-- | test/test_youtube_lists.py | 2 | ||||
-rw-r--r-- | youtube_dl/YoutubeDL.py | 19 | ||||
-rw-r--r-- | youtube_dl/downloader/__init__.py | 2 | ||||
-rw-r--r-- | youtube_dl/downloader/common.py | 1 | ||||
-rw-r--r-- | youtube_dl/downloader/http.py | 6 | ||||
-rw-r--r-- | youtube_dl/downloader/mplayer.py | 4 | ||||
-rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/common.py | 3 | ||||
-rw-r--r-- | youtube_dl/extractor/condenast.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/freespeech.py | 37 | ||||
-rw-r--r-- | youtube_dl/extractor/hotnewhiphop.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/rtlnow.py | 150 | ||||
-rw-r--r-- | youtube_dl/extractor/vimeo.py | 10 | ||||
-rw-r--r-- | youtube_dl/extractor/youtube.py | 28 | ||||
-rw-r--r-- | youtube_dl/version.py | 2 |
16 files changed, 189 insertions, 108 deletions
@@ -325,11 +325,27 @@ Since June 2012 (#342) youtube-dl is packed as an executable zipfile, simply unz To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29). -# COPYRIGHT +# BUILD INSTRUCTIONS -youtube-dl is released into the public domain by the copyright holders. +Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution. -This README file was originally written by Daniel Bolton (<https://github.com/dbbolton>) and is likewise released into the public domain. +To run youtube-dl as a developer, you don't need to build anything either. Simply execute + + python -m youtube_dl + +To run the test, simply invoke your favorite test runner, or execute a test file directly; any of the following work: + + python -m unittest discover + python test/test_download.py + nosetests + +If you want to create a build of youtube-dl yourself, you'll need + +* python +* make +* pandoc +* zip +* nosetests # BUGS @@ -388,3 +404,9 @@ Only post features that you (or an incapicated friend you can personally talk to ### Is your question about youtube-dl? It may sound strange, but some bug reports we receive are completely unrelated to youtube-dl and relate to a different or even the reporter's own application. Please make sure that you are actually using youtube-dl. If you are using a UI for youtube-dl, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for youtube-dl fails in some way you believe is related to youtube-dl, by all means, go ahead and report the bug. + +# COPYRIGHT + +youtube-dl is released into the public domain by the copyright holders. + +This README file was originally written by Daniel Bolton (<https://github.com/dbbolton>) and is likewise released into the public domain. diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index d9fe5af4e..de157f657 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -120,7 +120,7 @@ class TestYoutubeLists(unittest.TestCase): def test_youtube_toplist(self): dl = FakeYDL() ie = YoutubeTopListIE(dl) - result = ie.extract('yttoplist:music:Top Tracks') + result = ie.extract('yttoplist:music:Trending') entries = result['entries'] self.assertTrue(len(entries) >= 5) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index f202ba4f0..42cbcf699 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -331,7 +331,7 @@ class YoutubeDL(object): def __exit__(self, *args): self.restore_console_title() - + if self.params.get('cookiefile') is not None: self.cookiejar.save() @@ -710,10 +710,10 @@ class YoutubeDL(object): # TODO Central sorting goes here - if formats[0] is not info_dict: + if formats[0] is not info_dict: # only set the 'formats' fields if the original info_dict list them # otherwise we end up with a circular reference, the first (and unique) - # element in the 'formats' field in info_dict is info_dict itself, + # element in the 'formats' field in info_dict is info_dict itself, # wich can't be exported to json info_dict['formats'] = formats if self.params.get('listformats', None): @@ -1094,9 +1094,15 @@ class YoutubeDL(object): res += fdict['format_note'] + ' ' if fdict.get('tbr') is not None: res += '%4dk ' % fdict['tbr'] + if fdict.get('container') is not None: + if res: + res += ', ' + res += '%s container' % fdict['container'] if (fdict.get('vcodec') is not None and fdict.get('vcodec') != 'none'): - res += '%-5s' % fdict['vcodec'] + if res: + res += ', ' + res += fdict['vcodec'] if fdict.get('vbr') is not None: res += '@' elif fdict.get('vbr') is not None and fdict.get('abr') is not None: @@ -1106,7 +1112,10 @@ class YoutubeDL(object): if fdict.get('acodec') is not None: if res: res += ', ' - res += '%-5s' % fdict['acodec'] + if fdict['acodec'] == 'none': + res += 'video only' + else: + res += '%-5s' % fdict['acodec'] elif fdict.get('abr') is not None: if res: res += ', ' diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py index f19b490f1..0d9eb0001 100644 --- a/youtube_dl/downloader/__init__.py +++ b/youtube_dl/downloader/__init__.py @@ -8,6 +8,7 @@ from ..utils import ( determine_ext, ) + def get_suitable_downloader(info_dict): """Get the downloader class that can handle the info dict.""" url = info_dict['url'] @@ -20,4 +21,3 @@ def get_suitable_downloader(info_dict): return MplayerFD else: return HttpFD - diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 10143d56a..5a068aa8b 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -314,4 +314,3 @@ class FileDownloader(object): if the download is successful. """ self._progress_hooks.append(ph) - diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 8407727ba..748f9f3ad 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -27,7 +27,7 @@ class HttpFD(FileDownloader): request = compat_urllib_request.Request(url, None, headers) if self.params.get('test', False): - request.add_header('Range','bytes=0-10240') + request.add_header('Range', 'bytes=0-10240') # Establish possible resume length if os.path.isfile(encodeFilename(tmpfilename)): @@ -39,7 +39,7 @@ class HttpFD(FileDownloader): if resume_len != 0: if self.params.get('continuedl', False): self.report_resuming_byte(resume_len) - request.add_header('Range','bytes=%d-' % resume_len) + request.add_header('Range', 'bytes=%d-' % resume_len) open_mode = 'ab' else: resume_len = 0 @@ -100,7 +100,7 @@ class HttpFD(FileDownloader): if data_len is not None: data_len = int(data_len) + resume_len min_data_len = self.params.get("min_filesize", None) - max_data_len = self.params.get("max_filesize", None) + max_data_len = self.params.get("max_filesize", None) if min_data_len is not None and data_len < min_data_len: self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len)) return False diff --git a/youtube_dl/downloader/mplayer.py b/youtube_dl/downloader/mplayer.py index 67e0e4189..4de7f15f4 100644 --- a/youtube_dl/downloader/mplayer.py +++ b/youtube_dl/downloader/mplayer.py @@ -18,10 +18,10 @@ class MplayerFD(FileDownloader): try: subprocess.call(['mplayer', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT) except (OSError, IOError): - self.report_error(u'MMS or RTSP download detected but "%s" could not be run' % args[0] ) + self.report_error(u'MMS or RTSP download detected but "%s" could not be run' % args[0]) return False - # Download using mplayer. + # Download using mplayer. retval = subprocess.call(args) if retval == 0: fsize = os.path.getsize(encodeFilename(tmpfilename)) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 90c6a8fdb..8daf995b9 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -72,6 +72,7 @@ from .francetv import ( CultureboxIE, ) from .freesound import FreesoundIE +from .freespeech import FreespeechIE from .funnyordie import FunnyOrDieIE from .gamekings import GamekingsIE from .gamespot import GameSpotIE diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 02a82dc57..3cf742a3b 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -66,6 +66,7 @@ class InfoExtractor(object): * asr Audio sampling rate in Hertz * vbr Average video bitrate in KBit/s * vcodec Name of the video codec in use + * container Name of the container format * filesize The number of bytes, if known in advance * player_url SWF Player URL (used for rtmpdump). * protocol The protocol that will be used for the actual @@ -239,7 +240,7 @@ class InfoExtractor(object): except AttributeError: url = url_or_request if len(url) > 200: - h = u'___' + hashlib.md5(url).hexdigest() + h = u'___' + hashlib.md5(url.encode('utf-8')).hexdigest() url = url[:200 - len(h)] + h raw_filename = ('%s_%s.dump' % (video_id, url)) filename = sanitize_filename(raw_filename, restricted=True) diff --git a/youtube_dl/extractor/condenast.py b/youtube_dl/extractor/condenast.py index 03b75b80d..91c1c1348 100644 --- a/youtube_dl/extractor/condenast.py +++ b/youtube_dl/extractor/condenast.py @@ -30,7 +30,7 @@ class CondeNastIE(InfoExtractor): 'vanityfair': 'Vanity Fair', } - _VALID_URL = r'http://(video|www).(?P<site>%s).com/(?P<type>watch|series|video)/(?P<id>.+)' % '|'.join(_SITES.keys()) + _VALID_URL = r'http://(video|www)\.(?P<site>%s)\.com/(?P<type>watch|series|video)/(?P<id>.+)' % '|'.join(_SITES.keys()) IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values())) _TEST = { diff --git a/youtube_dl/extractor/freespeech.py b/youtube_dl/extractor/freespeech.py new file mode 100644 index 000000000..c210177f7 --- /dev/null +++ b/youtube_dl/extractor/freespeech.py @@ -0,0 +1,37 @@ +from __future__ import unicode_literals + +import re +import json + +from .common import InfoExtractor + + +class FreespeechIE(InfoExtractor): + IE_NAME = 'freespeech.org' + _VALID_URL = r'https://www\.freespeech\.org/video/(?P<title>.+)' + _TEST = { + 'add_ie': ['Youtube'], + 'url': 'https://www.freespeech.org/video/obama-romney-campaign-colorado-ahead-debate-0', + 'info_dict': { + 'id': 'poKsVCZ64uU', + 'ext': 'mp4', + 'title': 'Obama, Romney Campaign in Colorado Ahead of Debate', + 'description': 'Obama, Romney Campaign in Colorado Ahead of Debate', + 'uploader': 'freespeechtv', + 'uploader_id': 'freespeechtv', + 'upload_date': '20121002', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + title = mobj.group('title') + webpage = self._download_webpage(url, title) + info_json = self._search_regex(r'jQuery.extend\(Drupal.settings, ({.*?})\);', webpage, 'info') + info = json.loads(info_json) + + return { + '_type': 'url', + 'url': info['jw_player']['basic_video_node_player']['file'], + 'ie_key': 'Youtube', + } diff --git a/youtube_dl/extractor/hotnewhiphop.py b/youtube_dl/extractor/hotnewhiphop.py index a106f81d2..80b48b1b3 100644 --- a/youtube_dl/extractor/hotnewhiphop.py +++ b/youtube_dl/extractor/hotnewhiphop.py @@ -13,7 +13,7 @@ from ..utils import ( class HotNewHipHopIE(InfoExtractor): - _VALID_URL = r'http://www\.hotnewhiphop.com/.*\.(?P<id>.*)\.html' + _VALID_URL = r'http://www\.hotnewhiphop\.com/.*\.(?P<id>.*)\.html' _TEST = { 'url': 'http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html', 'file': '1435540.mp3', diff --git a/youtube_dl/extractor/rtlnow.py b/youtube_dl/extractor/rtlnow.py index a43d6ced5..cd50f708d 100644 --- a/youtube_dl/extractor/rtlnow.py +++ b/youtube_dl/extractor/rtlnow.py @@ -1,4 +1,7 @@ # encoding: utf-8 + +from __future__ import unicode_literals + import re from .common import InfoExtractor @@ -12,78 +15,77 @@ class RTLnowIE(InfoExtractor): """Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW, VOX NOW and n-tv NOW""" _VALID_URL = r'(?:http://)?(?P<url>(?P<domain>rtl-now\.rtl\.de|rtl2now\.rtl2\.de|(?:www\.)?voxnow\.de|(?:www\.)?rtlnitronow\.de|(?:www\.)?superrtlnow\.de|(?:www\.)?n-tvnow\.de)/+[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)' _TESTS = [{ - u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1', - u'file': u'90419.flv', - u'info_dict': { - u'upload_date': u'20070416', - u'title': u'Ahornallee - Folge 1 - Der Einzug', - u'description': u'Folge 1 - Der Einzug', + 'url': 'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1', + 'file': '90419.flv', + 'info_dict': { + 'upload_date': '20070416', + 'title': 'Ahornallee - Folge 1 - Der Einzug', + 'description': 'Folge 1 - Der Einzug', }, - u'params': { - u'skip_download': True, + 'params': { + 'skip_download': True, }, - u'skip': u'Only works from Germany', + 'skip': 'Only works from Germany', }, { - u'url': u'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5', - u'file': u'69756.flv', - u'info_dict': { - u'upload_date': u'20120519', - u'title': u'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit...', - u'description': u'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit u.a.', - u'thumbnail': u'http://autoimg.static-fra.de/rtl2now/219850/1500x1500/image2.jpg', + 'url': 'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5', + 'file': '69756.flv', + 'info_dict': { + 'upload_date': '20120519', + 'title': 'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit...', + 'description': 'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit u.a.', + 'thumbnail': 'http://autoimg.static-fra.de/rtl2now/219850/1500x1500/image2.jpg', }, - u'params': { - u'skip_download': True, + 'params': { + 'skip_download': True, }, - u'skip': u'Only works from Germany', + 'skip': 'Only works from Germany', }, { - u'url': u'http://www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17', - u'file': u'13883.flv', - u'info_dict': { - u'upload_date': u'20090627', - u'title': u'Voxtours - Südafrika-Reporter II', - u'description': u'Südafrika-Reporter II', + 'url': 'http://www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17', + 'file': '13883.flv', + 'info_dict': { + 'upload_date': '20090627', + 'title': 'Voxtours - Südafrika-Reporter II', + 'description': 'Südafrika-Reporter II', }, - u'params': { - u'skip_download': True, + 'params': { + 'skip_download': True, }, }, { - u'url': u'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1', - u'file': u'99205.flv', - u'info_dict': { - u'upload_date': u'20080928', - u'title': u'Medicopter 117 - Angst!', - u'description': u'Angst!', - u'thumbnail': u'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg' + 'url': 'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1', + 'file': '99205.flv', + 'info_dict': { + 'upload_date': '20080928', + 'title': 'Medicopter 117 - Angst!', + 'description': 'Angst!', + 'thumbnail': 'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg' }, - u'params': { - u'skip_download': True, + 'params': { + 'skip_download': True, }, }, { - u'url': u'http://www.n-tvnow.de/top-gear/episode-1-2013-01-01-00-00-00.php?film_id=124903&player=1&season=10', - u'file': u'124903.flv', - u'info_dict': { - u'upload_date': u'20130101', - u'title': u'Top Gear vom 01.01.2013', - u'description': u'Episode 1', + 'url': 'http://www.n-tvnow.de/top-gear/episode-1-2013-01-01-00-00-00.php?film_id=124903&player=1&season=10', + 'file': '124903.flv', + 'info_dict': { + 'upload_date': '20130101', + 'title': 'Top Gear vom 01.01.2013', + 'description': 'Episode 1', }, - u'params': { - u'skip_download': True, + 'params': { + 'skip_download': True, }, - u'skip': u'Only works from Germany', + 'skip': 'Only works from Germany', }] - - def _real_extract(self,url): + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - webpage_url = u'http://' + mobj.group('url') - video_page_url = u'http://' + mobj.group('domain') + u'/' - video_id = mobj.group(u'video_id') + webpage_url = 'http://' + mobj.group('url') + video_page_url = 'http://' + mobj.group('domain') + '/' + video_id = mobj.group('video_id') webpage = self._download_webpage(webpage_url, video_id) @@ -94,51 +96,53 @@ class RTLnowIE(InfoExtractor): msg = clean_html(note_m.group(1)) raise ExtractorError(msg) - video_title = self._html_search_regex(r'<title>(?P<title>[^<]+?)( \| [^<]*)?</title>', - webpage, u'title') - playerdata_url = self._html_search_regex(r'\'playerdata\': \'(?P<playerdata_url>[^\']+)\'', - webpage, u'playerdata_url') + video_title = self._html_search_regex( + r'<title>(?P<title>[^<]+?)( \| [^<]*)?</title>', + webpage, 'title') + playerdata_url = self._html_search_regex( + r'\'playerdata\': \'(?P<playerdata_url>[^\']+)\'', + webpage, 'playerdata_url') playerdata = self._download_webpage(playerdata_url, video_id) mobj = re.search(r'<title><!\[CDATA\[(?P<description>.+?)(?:\s+- (?:Sendung )?vom (?P<upload_date_d>[0-9]{2})\.(?P<upload_date_m>[0-9]{2})\.(?:(?P<upload_date_Y>[0-9]{4})|(?P<upload_date_y>[0-9]{2})) [0-9]{2}:[0-9]{2} Uhr)?\]\]></title>', playerdata) if mobj: - video_description = mobj.group(u'description') + video_description = mobj.group('description') if mobj.group('upload_date_Y'): video_upload_date = mobj.group('upload_date_Y') elif mobj.group('upload_date_y'): - video_upload_date = u'20' + mobj.group('upload_date_y') + video_upload_date = '20' + mobj.group('upload_date_y') else: video_upload_date = None if video_upload_date: - video_upload_date += mobj.group('upload_date_m')+mobj.group('upload_date_d') + video_upload_date += mobj.group('upload_date_m') + mobj.group('upload_date_d') else: video_description = None video_upload_date = None - self._downloader.report_warning(u'Unable to extract description and upload date') + self._downloader.report_warning('Unable to extract description and upload date') # Thumbnail: not every video has an thumbnail mobj = re.search(r'<meta property="og:image" content="(?P<thumbnail>[^"]+)">', webpage) if mobj: - video_thumbnail = mobj.group(u'thumbnail') + video_thumbnail = mobj.group('thumbnail') else: video_thumbnail = None mobj = re.search(r'<filename [^>]+><!\[CDATA\[(?P<url>rtmpe://(?:[^/]+/){2})(?P<play_path>[^\]]+)\]\]></filename>', playerdata) if mobj is None: - raise ExtractorError(u'Unable to extract media URL') - video_url = mobj.group(u'url') - video_play_path = u'mp4:' + mobj.group(u'play_path') - video_player_url = video_page_url + u'includes/vodplayer.swf' + raise ExtractorError('Unable to extract media URL') + video_url = mobj.group('url') + video_play_path = 'mp4:' + mobj.group('play_path') + video_player_url = video_page_url + 'includes/vodplayer.swf' - return [{ - 'id': video_id, - 'url': video_url, - 'play_path': video_play_path, - 'page_url': video_page_url, - 'player_url': video_player_url, - 'ext': 'flv', - 'title': video_title, + return { + 'id': video_id, + 'url': video_url, + 'play_path': video_play_path, + 'page_url': video_page_url, + 'player_url': video_player_url, + 'ext': 'flv', + 'title': video_title, 'description': video_description, 'upload_date': video_upload_date, - 'thumbnail': video_thumbnail, - }] + 'thumbnail': video_thumbnail, + } diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 193675549..a50170ce7 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -291,7 +291,7 @@ class VimeoIE(InfoExtractor): class VimeoChannelIE(InfoExtractor): IE_NAME = 'vimeo:channel' - _VALID_URL = r'(?:https?://)?vimeo.\com/channels/(?P<id>[^/]+)' + _VALID_URL = r'(?:https?://)?vimeo\.com/channels/(?P<id>[^/]+)' _MORE_PAGES_INDICATOR = r'<a.+?rel="next"' _TITLE_RE = r'<link rel="alternate"[^>]+?title="(.*?)"' @@ -327,7 +327,7 @@ class VimeoChannelIE(InfoExtractor): class VimeoUserIE(VimeoChannelIE): IE_NAME = 'vimeo:user' - _VALID_URL = r'(?:https?://)?vimeo.\com/(?P<name>[^/]+)(?:/videos|[#?]|$)' + _VALID_URL = r'(?:https?://)?vimeo\.com/(?P<name>[^/]+)(?:/videos|[#?]|$)' _TITLE_RE = r'<a[^>]+?class="user">([^<>]+?)</a>' @classmethod @@ -344,7 +344,7 @@ class VimeoUserIE(VimeoChannelIE): class VimeoAlbumIE(VimeoChannelIE): IE_NAME = 'vimeo:album' - _VALID_URL = r'(?:https?://)?vimeo.\com/album/(?P<id>\d+)' + _VALID_URL = r'(?:https?://)?vimeo\.com/album/(?P<id>\d+)' _TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>' def _page_url(self, base_url, pagenum): @@ -358,7 +358,7 @@ class VimeoAlbumIE(VimeoChannelIE): class VimeoGroupsIE(VimeoAlbumIE): IE_NAME = 'vimeo:group' - _VALID_URL = r'(?:https?://)?vimeo.\com/groups/(?P<name>[^/]+)' + _VALID_URL = r'(?:https?://)?vimeo\.com/groups/(?P<name>[^/]+)' def _extract_list_title(self, webpage): return self._og_search_title(webpage) @@ -372,7 +372,7 @@ class VimeoGroupsIE(VimeoAlbumIE): class VimeoReviewIE(InfoExtractor): IE_NAME = 'vimeo:review' IE_DESC = 'Review pages on vimeo' - _VALID_URL = r'(?:https?://)?vimeo.\com/[^/]+/review/(?P<id>[^/]+)' + _VALID_URL = r'(?:https?://)?vimeo\.com/[^/]+/review/(?P<id>[^/]+)' _TEST = { 'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d', 'file': '75524534.mp4', diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 6deb56447..87a5a452e 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -40,7 +40,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): """Provide base functions for Youtube extractors""" _LOGIN_URL = 'https://accounts.google.com/ServiceLogin' _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' - _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' + _AGE_URL = 'https://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' _NETRC_MACHINE = 'youtube' # If True it will raise an error if no login info is provided _LOGIN_REQUIRED = False @@ -111,7 +111,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'next_url': '/', 'action_confirm': 'Confirm', } - req = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form)) + req = compat_urllib_request.Request(self._AGE_URL, + compat_urllib_parse.urlencode(age_form).encode('ascii')) self._download_webpage( req, None, @@ -207,6 +208,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 256, 'preference': -50}, # Dash webm + '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, + '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, + '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, + '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, + '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, + '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, '242': {'ext': 'webm', 'height': 240, 'resolution': '240p', 'format_note': 'DASH webm', 'preference': -40}, '243': {'ext': 'webm', 'height': 360, 'resolution': '360p', 'format_note': 'DASH webm', 'preference': -40}, '244': {'ext': 'webm', 'height': 480, 'resolution': '480p', 'format_note': 'DASH webm', 'preference': -40}, @@ -1008,7 +1015,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): def _get_available_subtitles(self, video_id, webpage): try: sub_list = self._download_webpage( - 'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id, + 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id, video_id, note=False) except ExtractorError as err: self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err)) @@ -1024,7 +1031,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): 'fmt': self._downloader.params.get('subtitlesformat', 'srt'), 'name': unescapeHTML(l[0]).encode('utf-8'), }) - url = u'http://www.youtube.com/api/timedtext?' + params + url = u'https://www.youtube.com/api/timedtext?' + params sub_lang_list[lang] = url if not sub_lang_list: self._downloader.report_warning(u'video doesn\'t have subtitles') @@ -1290,7 +1297,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): 'url': video_real_url, 'player_url': player_url, } - dct.update(self._formats[itag]) + if itag in self._formats: + dct.update(self._formats[itag]) formats.append(dct) return formats @@ -1522,7 +1530,7 @@ class YoutubeTopListIE(YoutubePlaylistIE): channel = mobj.group('chann') title = mobj.group('title') query = compat_urllib_parse.urlencode({'title': title}) - playlist_re = 'href="([^"]+?%s[^"]+?)"' % re.escape(query) + playlist_re = 'href="([^"]+?%s.*?)"' % re.escape(query) channel_page = self._download_webpage('https://www.youtube.com/%s' % channel, title) link = self._html_search_regex(playlist_re, channel_page, u'list') url = compat_urlparse.urljoin('https://www.youtube.com/', link) @@ -1547,7 +1555,7 @@ class YoutubeChannelIE(InfoExtractor): IE_DESC = u'YouTube.com channels' _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)" _MORE_PAGES_INDICATOR = 'yt-uix-load-more' - _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s' + _MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s' IE_NAME = u'youtube:channel' def extract_videos_from_page(self, page): @@ -1603,9 +1611,9 @@ class YoutubeChannelIE(InfoExtractor): class YoutubeUserIE(InfoExtractor): IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)' _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)' - _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s' + _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s' _GDATA_PAGE_SIZE = 50 - _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json' + _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json' IE_NAME = u'youtube:user' @classmethod @@ -1736,7 +1744,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): action = 'action_load_system_feed' if self._PERSONAL_FEED: action = 'action_load_personal_feed' - return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME) + return 'https://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME) @property def IE_NAME(self): diff --git a/youtube_dl/version.py b/youtube_dl/version.py index bcec90fe4..0701961a5 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.01.23.2' +__version__ = '2014.01.23.4' |