aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/appletrailers.py4
-rw-r--r--youtube_dl/extractor/atresplayer.py2
-rw-r--r--youtube_dl/extractor/audiomack.py13
-rw-r--r--youtube_dl/extractor/bliptv.py2
-rw-r--r--youtube_dl/extractor/common.py3
-rw-r--r--youtube_dl/extractor/fc2.py58
-rw-r--r--youtube_dl/extractor/folketinget.py4
-rw-r--r--youtube_dl/extractor/krasview.py22
-rw-r--r--youtube_dl/extractor/mtv.py2
-rw-r--r--youtube_dl/extractor/smotri.py1
-rw-r--r--youtube_dl/extractor/testtube.py16
-rw-r--r--youtube_dl/extractor/ubu.py27
-rw-r--r--youtube_dl/extractor/videomega.py4
-rw-r--r--youtube_dl/extractor/wdr.py4
14 files changed, 117 insertions, 45 deletions
diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py
index 7cd0482c7..70621946d 100644
--- a/youtube_dl/extractor/appletrailers.py
+++ b/youtube_dl/extractor/appletrailers.py
@@ -129,7 +129,9 @@ class AppleTrailersIE(InfoExtractor):
'thumbnail': thumbnail,
'upload_date': upload_date,
'uploader_id': uploader_id,
- 'user_agent': 'QuickTime compatible (youtube-dl)',
+ 'http_headers': {
+ 'User-Agent': 'QuickTime compatible (youtube-dl)',
+ },
})
return {
diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py
index f42862be3..f016368fa 100644
--- a/youtube_dl/extractor/atresplayer.py
+++ b/youtube_dl/extractor/atresplayer.py
@@ -95,7 +95,7 @@ class AtresPlayerIE(SubtitlesInfoExtractor):
for fmt in ['windows', 'android_tablet']:
request = compat_urllib_request.Request(
self._URL_VIDEO_TEMPLATE.format(fmt, episode_id, timestamp_shifted, token))
- request.add_header('Youtubedl-user-agent', self._USER_AGENT)
+ request.add_header('User-Agent', self._USER_AGENT)
fmt_json = self._download_json(
request, video_id, 'Downloading %s video JSON' % fmt)
diff --git a/youtube_dl/extractor/audiomack.py b/youtube_dl/extractor/audiomack.py
index 8bfe50214..693ba22c6 100644
--- a/youtube_dl/extractor/audiomack.py
+++ b/youtube_dl/extractor/audiomack.py
@@ -88,16 +88,21 @@ class AudiomackAlbumIE(InfoExtractor):
# Album playlist ripped from fakeshoredrive with no metadata
{
'url': 'http://www.audiomack.com/album/fakeshoredrive/ppp-pistol-p-project',
+ 'info_dict': {
+ 'title': 'PPP (Pistol P Project)',
+ 'id': '837572',
+ },
'playlist': [{
'info_dict': {
- 'title': '9.-heaven-or-hell-chimaca-ft-zuse-prod-by-dj-fu',
- 'id': '9.-heaven-or-hell-chimaca-ft-zuse-prod-by-dj-fu',
+ 'title': 'PPP (Pistol P Project) - 9. Heaven or Hell (CHIMACA) ft Zuse (prod by DJ FU)',
+ 'id': '837577',
'ext': 'mp3',
+ 'uploader': 'Lil Herb a.k.a. G Herbo',
}
}],
'params': {
- 'playliststart': 8,
- 'playlistend': 8,
+ 'playliststart': 9,
+ 'playlistend': 9,
}
}
]
diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py
index 14b814120..436cc5155 100644
--- a/youtube_dl/extractor/bliptv.py
+++ b/youtube_dl/extractor/bliptv.py
@@ -199,7 +199,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
# For some weird reason, blip.tv serves a video instead of subtitles
# when we request with a common UA
req = compat_urllib_request.Request(url)
- req.add_header('Youtubedl-user-agent', 'youtube-dl')
+ req.add_header('User-Agent', 'youtube-dl')
return self._download_webpage(req, None, note=False)
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 03f3f18c8..7b7a832dc 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -108,7 +108,6 @@ class InfoExtractor(object):
(quality takes higher priority)
-1 for default (order by other properties),
-2 or smaller for less than default.
- * http_referer HTTP Referer header value to set.
* http_method HTTP method to use for the download.
* http_headers A dictionary of additional HTTP headers
to add to the request.
@@ -130,7 +129,9 @@ class InfoExtractor(object):
something like "4234987", title "Dancing naked mole rats",
and display_id "dancing-naked-mole-rats"
thumbnails: A list of dictionaries, with the following entries:
+ * "id" (optional, string) - Thumbnail format ID
* "url"
+ * "preference" (optional, int) - quality of the image
* "width" (optional, int)
* "height" (optional, int)
* "resolution" (optional, string "{width}x{height"},
diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py
index 81ceace53..1ccc1a964 100644
--- a/youtube_dl/extractor/fc2.py
+++ b/youtube_dl/extractor/fc2.py
@@ -5,6 +5,7 @@ import hashlib
from .common import InfoExtractor
from ..compat import (
+ compat_urllib_parse,
compat_urllib_request,
compat_urlparse,
)
@@ -16,7 +17,8 @@ from ..utils import (
class FC2IE(InfoExtractor):
_VALID_URL = r'^http://video\.fc2\.com/(?:[^/]+/)?content/(?P<id>[^/]+)'
IE_NAME = 'fc2'
- _TEST = {
+ _NETRC_MACHINE = 'fc2'
+ _TESTS = [{
'url': 'http://video.fc2.com/en/content/20121103kUan1KHs',
'md5': 'a6ebe8ebe0396518689d963774a54eb7',
'info_dict': {
@@ -24,12 +26,57 @@ class FC2IE(InfoExtractor):
'ext': 'flv',
'title': 'Boxing again with Puff',
},
- }
+ }, {
+ 'url': 'http://video.fc2.com/en/content/20150125cEva0hDn/',
+ 'info_dict': {
+ 'id': '20150125cEva0hDn',
+ 'ext': 'mp4',
+ },
+ 'params': {
+ 'username': 'ytdl@yt-dl.org',
+ 'password': '(snip)',
+ 'skip': 'requires actual password'
+ }
+ }]
+
+ def _login(self):
+ (username, password) = self._get_login_info()
+ if username is None or password is None:
+ return False
+
+ # Log in
+ login_form_strs = {
+ 'email': username,
+ 'password': password,
+ 'done': 'video',
+ 'Submit': ' Login ',
+ }
+
+ # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
+ # chokes on unicode
+ login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items())
+ login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8')
+ request = compat_urllib_request.Request(
+ 'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data)
+
+ login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in')
+ if 'mode=redirect&login=done' not in login_results:
+ self.report_warning('unable to log in: bad username or password')
+ return False
+
+ # this is also needed
+ login_redir = compat_urllib_request.Request('http://id.fc2.com/?mode=redirect&login=done')
+ self._download_webpage(
+ login_redir, None, note='Login redirect', errnote='Login redirect failed')
+
+ return True
def _real_extract(self, url):
video_id = self._match_id(url)
+ self._login()
webpage = self._download_webpage(url, video_id)
self._downloader.cookiejar.clear_session_cookies() # must clear
+ self._login()
title = self._og_search_title(webpage)
thumbnail = self._og_search_thumbnail(webpage)
@@ -46,7 +93,12 @@ class FC2IE(InfoExtractor):
info = compat_urlparse.parse_qs(info_webpage)
if 'err_code' in info:
- raise ExtractorError('Error code: %s' % info['err_code'][0])
+ # most of the time we can still download wideo even if err_code is 403 or 602
+ self.report_warning(
+ 'Error code was: %s... but still trying' % info['err_code'][0])
+
+ if 'filepath' not in info:
+ raise ExtractorError('Cannot download file. Are you logged in?')
video_url = info['filepath'][0] + '?mid=' + info['mid'][0]
title_info = info.get('title')
diff --git a/youtube_dl/extractor/folketinget.py b/youtube_dl/extractor/folketinget.py
index 68e2db943..0fb29de75 100644
--- a/youtube_dl/extractor/folketinget.py
+++ b/youtube_dl/extractor/folketinget.py
@@ -16,6 +16,7 @@ class FolketingetIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ft\.dk/webtv/video/[^?#]*?\.(?P<id>[0-9]+)\.aspx'
_TEST = {
'url': 'http://www.ft.dk/webtv/video/20141/eru/td.1165642.aspx?as=1#player',
+ 'md5': '6269e8626fa1a891bf5369b386ae996a',
'info_dict': {
'id': '1165642',
'ext': 'mp4',
@@ -29,9 +30,6 @@ class FolketingetIE(InfoExtractor):
'upload_date': '20141120',
'duration': 3960,
},
- 'params': {
- 'skip_download': 'rtmpdump required',
- }
}
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/krasview.py b/youtube_dl/extractor/krasview.py
index 6f3d2345b..e46954b47 100644
--- a/youtube_dl/extractor/krasview.py
+++ b/youtube_dl/extractor/krasview.py
@@ -2,18 +2,17 @@
from __future__ import unicode_literals
import json
-import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
- unescapeHTML,
+ js_to_json,
)
class KrasViewIE(InfoExtractor):
IE_DESC = 'Красвью'
- _VALID_URL = r'https?://krasview\.ru/video/(?P<id>\d+)'
+ _VALID_URL = r'https?://krasview\.ru/(?:video|embed)/(?P<id>\d+)'
_TEST = {
'url': 'http://krasview.ru/video/512228',
@@ -29,20 +28,18 @@ class KrasViewIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- flashvars = json.loads(self._search_regex(
- r'flashvars\s*:\s*({.+?})\s*}\);', webpage, 'flashvars'))
+ flashvars = json.loads(js_to_json(self._search_regex(
+ r'video_Init\(({.+?})', webpage, 'flashvars')))
video_url = flashvars['url']
- title = unescapeHTML(flashvars['title'])
- description = unescapeHTML(flashvars.get('subtitle') or self._og_search_description(webpage, default=None))
- thumbnail = flashvars['image']
- duration = int(flashvars['duration'])
- filesize = int(flashvars['size'])
+ title = self._og_search_title(webpage)
+ description = self._og_search_description(webpage, default=None)
+ thumbnail = flashvars.get('image') or self._og_search_thumbnail(webpage)
+ duration = int_or_none(flashvars.get('duration'))
width = int_or_none(self._og_search_property('video:width', webpage, 'video width'))
height = int_or_none(self._og_search_property('video:height', webpage, 'video height'))
@@ -53,7 +50,6 @@ class KrasViewIE(InfoExtractor):
'description': description,
'thumbnail': thumbnail,
'duration': duration,
- 'filesize': filesize,
'width': width,
'height': height,
}
diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py
index 5ebc78033..22a726327 100644
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@@ -53,7 +53,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
webpage_url = self._MOBILE_TEMPLATE % mtvn_id
req = compat_urllib_request.Request(webpage_url)
# Otherwise we get a webpage that would execute some javascript
- req.add_header('Youtubedl-user-agent', 'curl/7')
+ req.add_header('User-Agent', 'curl/7')
webpage = self._download_webpage(req, mtvn_id,
'Downloading mobile page')
metrics_url = unescapeHTML(self._search_regex(r'<a href="(http://metrics.+?)"', webpage, 'url'))
diff --git a/youtube_dl/extractor/smotri.py b/youtube_dl/extractor/smotri.py
index 26f361c93..e94f41362 100644
--- a/youtube_dl/extractor/smotri.py
+++ b/youtube_dl/extractor/smotri.py
@@ -102,6 +102,7 @@ class SmotriIE(InfoExtractor):
'uploader_id': 'mopeder',
'duration': 71,
'thumbnail': 'http://frame9.loadup.ru/d7/32/2888853.2.3.jpg',
+ 'upload_date': '20150114',
},
},
# swf player
diff --git a/youtube_dl/extractor/testtube.py b/youtube_dl/extractor/testtube.py
index fd47e71a2..6a7b5e49d 100644
--- a/youtube_dl/extractor/testtube.py
+++ b/youtube_dl/extractor/testtube.py
@@ -1,7 +1,10 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..utils import int_or_none
+from ..utils import (
+ int_or_none,
+ qualities,
+)
class TestTubeIE(InfoExtractor):
@@ -46,13 +49,22 @@ class TestTubeIE(InfoExtractor):
self._sort_formats(formats)
duration = int_or_none(info.get('duration'))
+ images = info.get('images')
+ thumbnails = None
+ preference = qualities(['mini', 'small', 'medium', 'large'])
+ if images:
+ thumbnails = [{
+ 'id': thumbnail_id,
+ 'url': img_url,
+ 'preference': preference(thumbnail_id)
+ } for thumbnail_id, img_url in images.items()]
return {
'id': video_id,
'display_id': display_id,
'title': info['title'],
'description': info.get('summary'),
- 'thumbnail': info.get('images', {}).get('large'),
+ 'thumbnails': thumbnails,
'uploader': info.get('show', {}).get('name'),
'uploader_id': info.get('show', {}).get('slug'),
'duration': duration,
diff --git a/youtube_dl/extractor/ubu.py b/youtube_dl/extractor/ubu.py
index 0182d67ec..d50237758 100644
--- a/youtube_dl/extractor/ubu.py
+++ b/youtube_dl/extractor/ubu.py
@@ -3,50 +3,51 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..utils import int_or_none
+from ..utils import (
+ int_or_none,
+ qualities,
+)
class UbuIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?ubu\.com/film/(?P<id>[\da-z_-]+)\.html'
_TEST = {
'url': 'http://ubu.com/film/her_noise.html',
- 'md5': '8edd46ee8aa6b265fb5ed6cf05c36bc9',
+ 'md5': '138d5652618bf0f03878978db9bef1ee',
'info_dict': {
'id': 'her_noise',
- 'ext': 'mp4',
+ 'ext': 'm4v',
'title': 'Her Noise - The Making Of (2007)',
'duration': 3600,
},
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(
r'<title>.+?Film &amp; Video: ([^<]+)</title>', webpage, 'title')
duration = int_or_none(self._html_search_regex(
- r'Duration: (\d+) minutes', webpage, 'duration', fatal=False, default=None))
- if duration:
- duration *= 60
+ r'Duration: (\d+) minutes', webpage, 'duration', fatal=False),
+ invscale=60)
formats = []
-
FORMAT_REGEXES = [
- ['sq', r"'flashvars'\s*,\s*'file=([^']+)'"],
- ['hq', r'href="(http://ubumexico\.centro\.org\.mx/video/[^"]+)"']
+ ('sq', r"'flashvars'\s*,\s*'file=([^']+)'"),
+ ('hq', r'href="(http://ubumexico\.centro\.org\.mx/video/[^"]+)"'),
]
-
+ preference = qualities([fid for fid, _ in FORMAT_REGEXES])
for format_id, format_regex in FORMAT_REGEXES:
m = re.search(format_regex, webpage)
if m:
formats.append({
'url': m.group(1),
'format_id': format_id,
+ 'preference': preference(format_id),
})
+ self._sort_formats(formats)
return {
'id': video_id,
diff --git a/youtube_dl/extractor/videomega.py b/youtube_dl/extractor/videomega.py
index fc6e05fe0..273030316 100644
--- a/youtube_dl/extractor/videomega.py
+++ b/youtube_dl/extractor/videomega.py
@@ -62,5 +62,7 @@ class VideoMegaIE(InfoExtractor):
'title': title,
'formats': formats,
'thumbnail': thumbnail,
- 'http_referer': iframe_url,
+ 'http_headers': {
+ 'Referer': iframe_url,
+ },
}
diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py
index 45466e31b..313b9c15d 100644
--- a/youtube_dl/extractor/wdr.py
+++ b/youtube_dl/extractor/wdr.py
@@ -169,7 +169,9 @@ class WDRMobileIE(InfoExtractor):
'title': mobj.group('title'),
'age_limit': int(mobj.group('age_limit')),
'url': url,
- 'user_agent': 'mobile',
+ 'http_headers': {
+ 'User-Agent': 'mobile',
+ },
}