diff options
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/appletrailers.py | 4 | ||||
-rw-r--r-- | youtube_dl/extractor/atresplayer.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/audiomack.py | 13 | ||||
-rw-r--r-- | youtube_dl/extractor/bliptv.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/common.py | 3 | ||||
-rw-r--r-- | youtube_dl/extractor/fc2.py | 58 | ||||
-rw-r--r-- | youtube_dl/extractor/folketinget.py | 4 | ||||
-rw-r--r-- | youtube_dl/extractor/krasview.py | 22 | ||||
-rw-r--r-- | youtube_dl/extractor/mtv.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/smotri.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/testtube.py | 16 | ||||
-rw-r--r-- | youtube_dl/extractor/ubu.py | 27 | ||||
-rw-r--r-- | youtube_dl/extractor/videomega.py | 4 | ||||
-rw-r--r-- | youtube_dl/extractor/wdr.py | 4 |
14 files changed, 117 insertions, 45 deletions
diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py index 7cd0482c7..70621946d 100644 --- a/youtube_dl/extractor/appletrailers.py +++ b/youtube_dl/extractor/appletrailers.py @@ -129,7 +129,9 @@ class AppleTrailersIE(InfoExtractor): 'thumbnail': thumbnail, 'upload_date': upload_date, 'uploader_id': uploader_id, - 'user_agent': 'QuickTime compatible (youtube-dl)', + 'http_headers': { + 'User-Agent': 'QuickTime compatible (youtube-dl)', + }, }) return { diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index f42862be3..f016368fa 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -95,7 +95,7 @@ class AtresPlayerIE(SubtitlesInfoExtractor): for fmt in ['windows', 'android_tablet']: request = compat_urllib_request.Request( self._URL_VIDEO_TEMPLATE.format(fmt, episode_id, timestamp_shifted, token)) - request.add_header('Youtubedl-user-agent', self._USER_AGENT) + request.add_header('User-Agent', self._USER_AGENT) fmt_json = self._download_json( request, video_id, 'Downloading %s video JSON' % fmt) diff --git a/youtube_dl/extractor/audiomack.py b/youtube_dl/extractor/audiomack.py index 8bfe50214..693ba22c6 100644 --- a/youtube_dl/extractor/audiomack.py +++ b/youtube_dl/extractor/audiomack.py @@ -88,16 +88,21 @@ class AudiomackAlbumIE(InfoExtractor): # Album playlist ripped from fakeshoredrive with no metadata { 'url': 'http://www.audiomack.com/album/fakeshoredrive/ppp-pistol-p-project', + 'info_dict': { + 'title': 'PPP (Pistol P Project)', + 'id': '837572', + }, 'playlist': [{ 'info_dict': { - 'title': '9.-heaven-or-hell-chimaca-ft-zuse-prod-by-dj-fu', - 'id': '9.-heaven-or-hell-chimaca-ft-zuse-prod-by-dj-fu', + 'title': 'PPP (Pistol P Project) - 9. Heaven or Hell (CHIMACA) ft Zuse (prod by DJ FU)', + 'id': '837577', 'ext': 'mp3', + 'uploader': 'Lil Herb a.k.a. G Herbo', } }], 'params': { - 'playliststart': 8, - 'playlistend': 8, + 'playliststart': 9, + 'playlistend': 9, } } ] diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py index 14b814120..436cc5155 100644 --- a/youtube_dl/extractor/bliptv.py +++ b/youtube_dl/extractor/bliptv.py @@ -199,7 +199,7 @@ class BlipTVIE(SubtitlesInfoExtractor): # For some weird reason, blip.tv serves a video instead of subtitles # when we request with a common UA req = compat_urllib_request.Request(url) - req.add_header('Youtubedl-user-agent', 'youtube-dl') + req.add_header('User-Agent', 'youtube-dl') return self._download_webpage(req, None, note=False) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 03f3f18c8..7b7a832dc 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -108,7 +108,6 @@ class InfoExtractor(object): (quality takes higher priority) -1 for default (order by other properties), -2 or smaller for less than default. - * http_referer HTTP Referer header value to set. * http_method HTTP method to use for the download. * http_headers A dictionary of additional HTTP headers to add to the request. @@ -130,7 +129,9 @@ class InfoExtractor(object): something like "4234987", title "Dancing naked mole rats", and display_id "dancing-naked-mole-rats" thumbnails: A list of dictionaries, with the following entries: + * "id" (optional, string) - Thumbnail format ID * "url" + * "preference" (optional, int) - quality of the image * "width" (optional, int) * "height" (optional, int) * "resolution" (optional, string "{width}x{height"}, diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py index 81ceace53..1ccc1a964 100644 --- a/youtube_dl/extractor/fc2.py +++ b/youtube_dl/extractor/fc2.py @@ -5,6 +5,7 @@ import hashlib from .common import InfoExtractor from ..compat import ( + compat_urllib_parse, compat_urllib_request, compat_urlparse, ) @@ -16,7 +17,8 @@ from ..utils import ( class FC2IE(InfoExtractor): _VALID_URL = r'^http://video\.fc2\.com/(?:[^/]+/)?content/(?P<id>[^/]+)' IE_NAME = 'fc2' - _TEST = { + _NETRC_MACHINE = 'fc2' + _TESTS = [{ 'url': 'http://video.fc2.com/en/content/20121103kUan1KHs', 'md5': 'a6ebe8ebe0396518689d963774a54eb7', 'info_dict': { @@ -24,12 +26,57 @@ class FC2IE(InfoExtractor): 'ext': 'flv', 'title': 'Boxing again with Puff', }, - } + }, { + 'url': 'http://video.fc2.com/en/content/20150125cEva0hDn/', + 'info_dict': { + 'id': '20150125cEva0hDn', + 'ext': 'mp4', + }, + 'params': { + 'username': 'ytdl@yt-dl.org', + 'password': '(snip)', + 'skip': 'requires actual password' + } + }] + + def _login(self): + (username, password) = self._get_login_info() + if username is None or password is None: + return False + + # Log in + login_form_strs = { + 'email': username, + 'password': password, + 'done': 'video', + 'Submit': ' Login ', + } + + # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode + # chokes on unicode + login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items()) + login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8') + request = compat_urllib_request.Request( + 'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data) + + login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in') + if 'mode=redirect&login=done' not in login_results: + self.report_warning('unable to log in: bad username or password') + return False + + # this is also needed + login_redir = compat_urllib_request.Request('http://id.fc2.com/?mode=redirect&login=done') + self._download_webpage( + login_redir, None, note='Login redirect', errnote='Login redirect failed') + + return True def _real_extract(self, url): video_id = self._match_id(url) + self._login() webpage = self._download_webpage(url, video_id) self._downloader.cookiejar.clear_session_cookies() # must clear + self._login() title = self._og_search_title(webpage) thumbnail = self._og_search_thumbnail(webpage) @@ -46,7 +93,12 @@ class FC2IE(InfoExtractor): info = compat_urlparse.parse_qs(info_webpage) if 'err_code' in info: - raise ExtractorError('Error code: %s' % info['err_code'][0]) + # most of the time we can still download wideo even if err_code is 403 or 602 + self.report_warning( + 'Error code was: %s... but still trying' % info['err_code'][0]) + + if 'filepath' not in info: + raise ExtractorError('Cannot download file. Are you logged in?') video_url = info['filepath'][0] + '?mid=' + info['mid'][0] title_info = info.get('title') diff --git a/youtube_dl/extractor/folketinget.py b/youtube_dl/extractor/folketinget.py index 68e2db943..0fb29de75 100644 --- a/youtube_dl/extractor/folketinget.py +++ b/youtube_dl/extractor/folketinget.py @@ -16,6 +16,7 @@ class FolketingetIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?ft\.dk/webtv/video/[^?#]*?\.(?P<id>[0-9]+)\.aspx' _TEST = { 'url': 'http://www.ft.dk/webtv/video/20141/eru/td.1165642.aspx?as=1#player', + 'md5': '6269e8626fa1a891bf5369b386ae996a', 'info_dict': { 'id': '1165642', 'ext': 'mp4', @@ -29,9 +30,6 @@ class FolketingetIE(InfoExtractor): 'upload_date': '20141120', 'duration': 3960, }, - 'params': { - 'skip_download': 'rtmpdump required', - } } def _real_extract(self, url): diff --git a/youtube_dl/extractor/krasview.py b/youtube_dl/extractor/krasview.py index 6f3d2345b..e46954b47 100644 --- a/youtube_dl/extractor/krasview.py +++ b/youtube_dl/extractor/krasview.py @@ -2,18 +2,17 @@ from __future__ import unicode_literals import json -import re from .common import InfoExtractor from ..utils import ( int_or_none, - unescapeHTML, + js_to_json, ) class KrasViewIE(InfoExtractor): IE_DESC = 'Красвью' - _VALID_URL = r'https?://krasview\.ru/video/(?P<id>\d+)' + _VALID_URL = r'https?://krasview\.ru/(?:video|embed)/(?P<id>\d+)' _TEST = { 'url': 'http://krasview.ru/video/512228', @@ -29,20 +28,18 @@ class KrasViewIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - flashvars = json.loads(self._search_regex( - r'flashvars\s*:\s*({.+?})\s*}\);', webpage, 'flashvars')) + flashvars = json.loads(js_to_json(self._search_regex( + r'video_Init\(({.+?})', webpage, 'flashvars'))) video_url = flashvars['url'] - title = unescapeHTML(flashvars['title']) - description = unescapeHTML(flashvars.get('subtitle') or self._og_search_description(webpage, default=None)) - thumbnail = flashvars['image'] - duration = int(flashvars['duration']) - filesize = int(flashvars['size']) + title = self._og_search_title(webpage) + description = self._og_search_description(webpage, default=None) + thumbnail = flashvars.get('image') or self._og_search_thumbnail(webpage) + duration = int_or_none(flashvars.get('duration')) width = int_or_none(self._og_search_property('video:width', webpage, 'video width')) height = int_or_none(self._og_search_property('video:height', webpage, 'video height')) @@ -53,7 +50,6 @@ class KrasViewIE(InfoExtractor): 'description': description, 'thumbnail': thumbnail, 'duration': duration, - 'filesize': filesize, 'width': width, 'height': height, } diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 5ebc78033..22a726327 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -53,7 +53,7 @@ class MTVServicesInfoExtractor(InfoExtractor): webpage_url = self._MOBILE_TEMPLATE % mtvn_id req = compat_urllib_request.Request(webpage_url) # Otherwise we get a webpage that would execute some javascript - req.add_header('Youtubedl-user-agent', 'curl/7') + req.add_header('User-Agent', 'curl/7') webpage = self._download_webpage(req, mtvn_id, 'Downloading mobile page') metrics_url = unescapeHTML(self._search_regex(r'<a href="(http://metrics.+?)"', webpage, 'url')) diff --git a/youtube_dl/extractor/smotri.py b/youtube_dl/extractor/smotri.py index 26f361c93..e94f41362 100644 --- a/youtube_dl/extractor/smotri.py +++ b/youtube_dl/extractor/smotri.py @@ -102,6 +102,7 @@ class SmotriIE(InfoExtractor): 'uploader_id': 'mopeder', 'duration': 71, 'thumbnail': 'http://frame9.loadup.ru/d7/32/2888853.2.3.jpg', + 'upload_date': '20150114', }, }, # swf player diff --git a/youtube_dl/extractor/testtube.py b/youtube_dl/extractor/testtube.py index fd47e71a2..6a7b5e49d 100644 --- a/youtube_dl/extractor/testtube.py +++ b/youtube_dl/extractor/testtube.py @@ -1,7 +1,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( + int_or_none, + qualities, +) class TestTubeIE(InfoExtractor): @@ -46,13 +49,22 @@ class TestTubeIE(InfoExtractor): self._sort_formats(formats) duration = int_or_none(info.get('duration')) + images = info.get('images') + thumbnails = None + preference = qualities(['mini', 'small', 'medium', 'large']) + if images: + thumbnails = [{ + 'id': thumbnail_id, + 'url': img_url, + 'preference': preference(thumbnail_id) + } for thumbnail_id, img_url in images.items()] return { 'id': video_id, 'display_id': display_id, 'title': info['title'], 'description': info.get('summary'), - 'thumbnail': info.get('images', {}).get('large'), + 'thumbnails': thumbnails, 'uploader': info.get('show', {}).get('name'), 'uploader_id': info.get('show', {}).get('slug'), 'duration': duration, diff --git a/youtube_dl/extractor/ubu.py b/youtube_dl/extractor/ubu.py index 0182d67ec..d50237758 100644 --- a/youtube_dl/extractor/ubu.py +++ b/youtube_dl/extractor/ubu.py @@ -3,50 +3,51 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( + int_or_none, + qualities, +) class UbuIE(InfoExtractor): _VALID_URL = r'http://(?:www\.)?ubu\.com/film/(?P<id>[\da-z_-]+)\.html' _TEST = { 'url': 'http://ubu.com/film/her_noise.html', - 'md5': '8edd46ee8aa6b265fb5ed6cf05c36bc9', + 'md5': '138d5652618bf0f03878978db9bef1ee', 'info_dict': { 'id': 'her_noise', - 'ext': 'mp4', + 'ext': 'm4v', 'title': 'Her Noise - The Making Of (2007)', 'duration': 3600, }, } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) title = self._html_search_regex( r'<title>.+?Film & Video: ([^<]+)</title>', webpage, 'title') duration = int_or_none(self._html_search_regex( - r'Duration: (\d+) minutes', webpage, 'duration', fatal=False, default=None)) - if duration: - duration *= 60 + r'Duration: (\d+) minutes', webpage, 'duration', fatal=False), + invscale=60) formats = [] - FORMAT_REGEXES = [ - ['sq', r"'flashvars'\s*,\s*'file=([^']+)'"], - ['hq', r'href="(http://ubumexico\.centro\.org\.mx/video/[^"]+)"'] + ('sq', r"'flashvars'\s*,\s*'file=([^']+)'"), + ('hq', r'href="(http://ubumexico\.centro\.org\.mx/video/[^"]+)"'), ] - + preference = qualities([fid for fid, _ in FORMAT_REGEXES]) for format_id, format_regex in FORMAT_REGEXES: m = re.search(format_regex, webpage) if m: formats.append({ 'url': m.group(1), 'format_id': format_id, + 'preference': preference(format_id), }) + self._sort_formats(formats) return { 'id': video_id, diff --git a/youtube_dl/extractor/videomega.py b/youtube_dl/extractor/videomega.py index fc6e05fe0..273030316 100644 --- a/youtube_dl/extractor/videomega.py +++ b/youtube_dl/extractor/videomega.py @@ -62,5 +62,7 @@ class VideoMegaIE(InfoExtractor): 'title': title, 'formats': formats, 'thumbnail': thumbnail, - 'http_referer': iframe_url, + 'http_headers': { + 'Referer': iframe_url, + }, } diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 45466e31b..313b9c15d 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -169,7 +169,9 @@ class WDRMobileIE(InfoExtractor): 'title': mobj.group('title'), 'age_limit': int(mobj.group('age_limit')), 'url': url, - 'user_agent': 'mobile', + 'http_headers': { + 'User-Agent': 'mobile', + }, } |