aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl')
-rwxr-xr-xyoutube_dl/YoutubeDL.py6
-rw-r--r--youtube_dl/extractor/__init__.py4
-rw-r--r--youtube_dl/extractor/common.py4
-rw-r--r--youtube_dl/extractor/instagram.py8
-rw-r--r--youtube_dl/extractor/letv.py4
-rw-r--r--youtube_dl/extractor/qqmusic.py4
-rw-r--r--youtube_dl/extractor/sbs.py1
-rw-r--r--youtube_dl/extractor/sohu.py4
-rw-r--r--youtube_dl/extractor/tv2.py126
-rw-r--r--youtube_dl/extractor/ultimedia.py10
-rw-r--r--youtube_dl/extractor/vier.py9
-rw-r--r--youtube_dl/extractor/vuclip.py2
-rw-r--r--youtube_dl/extractor/vulture.py2
-rw-r--r--youtube_dl/extractor/wimp.py3
-rw-r--r--youtube_dl/extractor/xminus.py4
-rw-r--r--youtube_dl/extractor/yahoo.py18
-rw-r--r--youtube_dl/utils.py10
-rw-r--r--youtube_dl/version.py2
18 files changed, 188 insertions, 33 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 5df889945..58b34e087 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -1368,7 +1368,7 @@ class YoutubeDL(object):
postprocessors = []
self.report_warning('You have requested multiple '
'formats but ffmpeg or avconv are not installed.'
- ' The formats won\'t be merged')
+ ' The formats won\'t be merged.')
else:
postprocessors = [merger]
@@ -1395,8 +1395,8 @@ class YoutubeDL(object):
requested_formats = info_dict['requested_formats']
if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
info_dict['ext'] = 'mkv'
- self.report_warning('You have requested formats incompatible for merge. '
- 'The formats will be merged into mkv')
+ self.report_warning(
+ 'Requested formats are incompatible for merge and will be merged into mkv.')
# Ensure filename always has a correct extension for successful merge
filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
if os.path.exists(encodeFilename(filename)):
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 173e9a155..24efb7ce5 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -572,6 +572,10 @@ from .tumblr import TumblrIE
from .tunein import TuneInIE
from .turbo import TurboIE
from .tutv import TutvIE
+from .tv2 import (
+ TV2IE,
+ TV2ArticleIE,
+)
from .tv4 import TV4IE
from .tvigle import TvigleIE
from .tvp import TvpIE, TvpSeriesIE
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 65bb77086..cecf917ff 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -786,8 +786,8 @@ class InfoExtractor(object):
return True
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError):
- self.report_warning(
- '%s URL is invalid, skipping' % item, video_id)
+ self.to_screen(
+ '%s: %s URL is invalid, skipping' % (video_id, item))
return False
raise
diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py
index 65f6ca103..b10755788 100644
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@@ -7,9 +7,9 @@ from ..utils import int_or_none
class InstagramIE(InfoExtractor):
- _VALID_URL = r'https?://instagram\.com/p/(?P<id>[\da-zA-Z]+)'
+ _VALID_URL = r'https://instagram\.com/p/(?P<id>[\da-zA-Z]+)'
_TEST = {
- 'url': 'http://instagram.com/p/aye83DjauH/?foo=bar#abc',
+ 'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
'md5': '0d2da106a9d2631273e192b372806516',
'info_dict': {
'id': 'aye83DjauH',
@@ -41,11 +41,11 @@ class InstagramIE(InfoExtractor):
class InstagramUserIE(InfoExtractor):
- _VALID_URL = r'http://instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])'
+ _VALID_URL = r'https://instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])'
IE_DESC = 'Instagram user profile'
IE_NAME = 'instagram:user'
_TEST = {
- 'url': 'http://instagram.com/porsche',
+ 'url': 'https://instagram.com/porsche',
'info_dict': {
'id': 'porsche',
'title': 'porsche',
diff --git a/youtube_dl/extractor/letv.py b/youtube_dl/extractor/letv.py
index 1484ac0d2..da896caf1 100644
--- a/youtube_dl/extractor/letv.py
+++ b/youtube_dl/extractor/letv.py
@@ -50,9 +50,7 @@ class LetvIE(InfoExtractor):
'title': '与龙共舞 完整版',
'description': 'md5:7506a5eeb1722bb9d4068f85024e3986',
},
- 'params': {
- 'cn_verification_proxy': 'http://proxy.uku.im:8888'
- },
+ 'skip': 'Only available in China',
}]
@staticmethod
diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py
index 13113820b..b540033e2 100644
--- a/youtube_dl/extractor/qqmusic.py
+++ b/youtube_dl/extractor/qqmusic.py
@@ -26,7 +26,7 @@ class QQMusicIE(InfoExtractor):
'title': '可惜没如果',
'upload_date': '20141227',
'creator': '林俊杰',
- 'description': 'md5:4348ff1dd24036906baa7b6f973f8d30',
+ 'description': 'md5:d327722d0361576fde558f1ac68a7065',
}
}]
@@ -60,6 +60,8 @@ class QQMusicIE(InfoExtractor):
lrc_content = self._html_search_regex(
r'<div class="content" id="lrc_content"[^<>]*>([^<>]+)</div>',
detail_info_page, 'LRC lyrics', default=None)
+ if lrc_content:
+ lrc_content = lrc_content.replace('\\n', '\n')
guid = self.m_r_get_ruin()
diff --git a/youtube_dl/extractor/sbs.py b/youtube_dl/extractor/sbs.py
index 3073e5e86..d4bd1a0d7 100644
--- a/youtube_dl/extractor/sbs.py
+++ b/youtube_dl/extractor/sbs.py
@@ -1,7 +1,6 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
-import json
import re
from .common import InfoExtractor
from ..utils import (
diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py
index eab4adfca..29bd9ce6f 100644
--- a/youtube_dl/extractor/sohu.py
+++ b/youtube_dl/extractor/sohu.py
@@ -23,9 +23,7 @@ class SohuIE(InfoExtractor):
'ext': 'mp4',
'title': 'MV:Far East Movement《The Illest》',
},
- 'params': {
- 'cn_verification_proxy': 'proxy.uku.im:8888'
- }
+ 'skip': 'On available in China',
}, {
'url': 'http://tv.sohu.com/20150305/n409385080.shtml',
'md5': 'ac9a5d322b4bf9ae184d53e4711e4f1a',
diff --git a/youtube_dl/extractor/tv2.py b/youtube_dl/extractor/tv2.py
new file mode 100644
index 000000000..fa338b936
--- /dev/null
+++ b/youtube_dl/extractor/tv2.py
@@ -0,0 +1,126 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ float_or_none,
+ parse_iso8601,
+ remove_end,
+)
+
+
+class TV2IE(InfoExtractor):
+ _VALID_URL = 'http://(?:www\.)?tv2\.no/v/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.tv2.no/v/916509/',
+ 'md5': '9cb9e3410b18b515d71892f27856e9b1',
+ 'info_dict': {
+ 'id': '916509',
+ 'ext': 'flv',
+ 'title': 'Se Gryttens hyllest av Steven Gerrard',
+ 'description': 'TV 2 Sportens huspoet tar avskjed med Liverpools kaptein Steven Gerrard.',
+ 'timestamp': 1431715610,
+ 'upload_date': '20150515',
+ 'duration': 156.967,
+ 'view_count': int,
+ 'categories': list,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ formats = []
+ format_urls = []
+ for protocol in ('HDS', 'HLS'):
+ data = self._download_json(
+ 'http://sumo.tv2.no/api/web/asset/%s/play.json?protocol=%s&videoFormat=SMIL+ISMUSP' % (video_id, protocol),
+ video_id, 'Downloading play JSON')['playback']
+ for item in data['items']['item']:
+ video_url = item.get('url')
+ if not video_url or video_url in format_urls:
+ continue
+ format_id = '%s-%s' % (protocol.lower(), item.get('mediaFormat'))
+ if not self._is_valid_url(video_url, video_id, format_id):
+ continue
+ format_urls.append(video_url)
+ ext = determine_ext(video_url)
+ if ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ video_url, video_id, f4m_id=format_id))
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', m3u8_id=format_id))
+ elif ext == 'ism' or video_url.endswith('.ism/Manifest'):
+ pass
+ else:
+ formats.append({
+ 'url': video_url,
+ 'format_id': format_id,
+ 'tbr': int_or_none(item.get('bitrate')),
+ 'filesize': int_or_none(item.get('fileSize')),
+ })
+ self._sort_formats(formats)
+
+ asset = self._download_json(
+ 'http://sumo.tv2.no/api/web/asset/%s.json' % video_id,
+ video_id, 'Downloading metadata JSON')['asset']
+
+ title = asset['title']
+ description = asset.get('description')
+ timestamp = parse_iso8601(asset.get('createTime'))
+ duration = float_or_none(asset.get('accurateDuration') or asset.get('duration'))
+ view_count = int_or_none(asset.get('views'))
+ categories = asset.get('keywords', '').split(',')
+
+ thumbnails = [{
+ 'id': thumbnail.get('@type'),
+ 'url': thumbnail.get('url'),
+ } for _, thumbnail in asset.get('imageVersions', {}).items()]
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'thumbnails': thumbnails,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'categories': categories,
+ 'formats': formats,
+ }
+
+
+class TV2ArticleIE(InfoExtractor):
+ _VALID_URL = 'http://(?:www\.)?tv2\.no/(?:a|\d{4}/\d{2}/\d{2}(/[^/]+)+)/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.tv2.no/2015/05/16/nyheter/alesund/krim/pingvin/6930542',
+ 'info_dict': {
+ 'id': '6930542',
+ 'title': 'Russen hetses etter pingvintyveri – innrømmer å ha åpnet luken på buret',
+ 'description': 'md5:339573779d3eea3542ffe12006190954',
+ },
+ 'playlist_count': 2,
+ }, {
+ 'url': 'http://www.tv2.no/a/6930542',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ entries = [
+ self.url_result('http://www.tv2.no/v/%s' % video_id, 'TV2')
+ for video_id in re.findall(r'data-assetid="(\d+)"', webpage)]
+
+ title = remove_end(self._og_search_title(webpage), ' - TV2.no')
+ description = remove_end(self._og_search_description(webpage), ' - TV2.no')
+
+ return self.playlist_result(entries, playlist_id, title, description)
diff --git a/youtube_dl/extractor/ultimedia.py b/youtube_dl/extractor/ultimedia.py
index 96c809eaf..c4751050e 100644
--- a/youtube_dl/extractor/ultimedia.py
+++ b/youtube_dl/extractor/ultimedia.py
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
+from ..compat import compat_urllib_parse_urlparse
from ..utils import (
ExtractorError,
qualities,
@@ -44,9 +45,9 @@ class UltimediaIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- deliver_url = self._search_regex(
- r'<iframe[^>]+src="(https?://(?:www\.)?ultimedia\.com/deliver/[^"]+)"',
- webpage, 'deliver URL')
+ deliver_url = self._proto_relative_url(self._search_regex(
+ r'<iframe[^>]+src="((?:https?:)?//(?:www\.)?ultimedia\.com/deliver/[^"]+)"',
+ webpage, 'deliver URL'), compat_urllib_parse_urlparse(url).scheme + ':')
deliver_page = self._download_webpage(
deliver_url, video_id, 'Downloading iframe page')
@@ -57,7 +58,8 @@ class UltimediaIE(InfoExtractor):
player = self._parse_json(
self._search_regex(
- r"jwplayer\('player(?:_temp)?'\)\.setup\(({.+?})\)\.on", deliver_page, 'player'),
+ r"jwplayer\('player(?:_temp)?'\)\.setup\(({.+?})\)\.on",
+ deliver_page, 'player'),
video_id)
quality = qualities(['flash', 'html5'])
diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py
index 619039e51..15377097e 100644
--- a/youtube_dl/extractor/vier.py
+++ b/youtube_dl/extractor/vier.py
@@ -38,11 +38,14 @@ class VierIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
- r'"nid"\s*:\s*"(\d+)"', webpage, 'video id')
+ [r'data-nid="(\d+)"', r'"nid"\s*:\s*"(\d+)"'],
+ webpage, 'video id')
application = self._search_regex(
- r'"application"\s*:\s*"([^"]+)"', webpage, 'application', default='vier_vod')
+ [r'data-application="([^"]+)"', r'"application"\s*:\s*"([^"]+)"'],
+ webpage, 'application', default='vier_vod')
filename = self._search_regex(
- r'"filename"\s*:\s*"([^"]+)"', webpage, 'filename')
+ [r'data-filename="([^"]+)"', r'"filename"\s*:\s*"([^"]+)"'],
+ webpage, 'filename')
playlist_url = 'http://vod.streamcloud.be/%s/mp4:_definst_/%s.mp4/playlist.m3u8' % (application, filename)
formats = self._extract_m3u8_formats(playlist_url, display_id, 'mp4')
diff --git a/youtube_dl/extractor/vuclip.py b/youtube_dl/extractor/vuclip.py
index c3fde53f5..a6d9b5fee 100644
--- a/youtube_dl/extractor/vuclip.py
+++ b/youtube_dl/extractor/vuclip.py
@@ -49,7 +49,7 @@ class VuClipIE(InfoExtractor):
links_code = self._search_regex(
r'''(?xs)
(?:
- <img\s+src="/im/play.gif".*?>|
+ <img\s+src="[^"]*/play.gif".*?>|
<!--\ player\ end\ -->\s*</div><!--\ thumb\ end-->
)
(.*?)
diff --git a/youtube_dl/extractor/vulture.py b/youtube_dl/extractor/vulture.py
index 1eb24a3d6..faa167e65 100644
--- a/youtube_dl/extractor/vulture.py
+++ b/youtube_dl/extractor/vulture.py
@@ -44,7 +44,7 @@ class VultureIE(InfoExtractor):
query_webpage = self._download_webpage(
query_url, display_id, note='Downloading query page')
params_json = self._search_regex(
- r'(?sm)new MagnifyEmbeddablePlayer\({.*?contentItem:\s*(\{.*?\})\n,\n',
+ r'(?sm)new MagnifyEmbeddablePlayer\({.*?contentItem:\s*(\{.*?\})\n?,\n',
query_webpage,
'player params')
params = json.loads(params_json)
diff --git a/youtube_dl/extractor/wimp.py b/youtube_dl/extractor/wimp.py
index d6dec25ca..f69d46a28 100644
--- a/youtube_dl/extractor/wimp.py
+++ b/youtube_dl/extractor/wimp.py
@@ -37,7 +37,8 @@ class WimpIE(InfoExtractor):
video_id = mobj.group(1)
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(
- r"[\"']file[\"']\s*[:,]\s*[\"'](.+?)[\"']", webpage, 'video URL')
+ [r"[\"']file[\"']\s*[:,]\s*[\"'](.+?)[\"']", r"videoId\s*:\s*[\"']([^\"']+)[\"']"],
+ webpage, 'video URL')
if YoutubeIE.suitable(video_url):
self.to_screen('Found YouTube video')
return {
diff --git a/youtube_dl/extractor/xminus.py b/youtube_dl/extractor/xminus.py
index 8c6241aed..7c9d8af6f 100644
--- a/youtube_dl/extractor/xminus.py
+++ b/youtube_dl/extractor/xminus.py
@@ -43,7 +43,7 @@ class XMinusIE(InfoExtractor):
r'minus_track\.dur_sec=\'([0-9]*?)\'',
webpage, 'duration', fatal=False))
filesize_approx = parse_filesize(self._html_search_regex(
- r'<div class="filesize[^"]*"></div>\s*([0-9.]+\s*[a-zA-Z][bB])',
+ r'<div id="finfo"[^>]*>\s*↓\s*([0-9.]+\s*[a-zA-Z][bB])',
webpage, 'approximate filesize', fatal=False))
tbr = int_or_none(self._html_search_regex(
r'<div class="quality[^"]*"></div>\s*([0-9]+)\s*kbps',
@@ -58,7 +58,7 @@ class XMinusIE(InfoExtractor):
description = re.sub(' *\r *', '\n', description)
enc_token = self._html_search_regex(
- r'minus_track\.tkn="(.+?)"', webpage, 'enc_token')
+ r'minus_track\.s?tkn="(.+?)"', webpage, 'enc_token')
token = ''.join(
c if pos == 3 else compat_chr(compat_ord(c) - 1)
for pos, c in enumerate(reversed(enc_token)))
diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py
index bf4e659ac..f9afbdbab 100644
--- a/youtube_dl/extractor/yahoo.py
+++ b/youtube_dl/extractor/yahoo.py
@@ -15,6 +15,7 @@ from ..utils import (
unescapeHTML,
ExtractorError,
int_or_none,
+ mimetype2ext,
)
from .nbc import NBCSportsVPlayerIE
@@ -236,6 +237,22 @@ class YahooIE(InfoExtractor):
self._sort_formats(formats)
+ closed_captions = self._html_search_regex(
+ r'"closedcaptions":(\[[^\]]+\])', webpage, 'closed captions',
+ default='[]')
+
+ cc_json = self._parse_json(closed_captions, video_id, fatal=False)
+ subtitles = {}
+ if cc_json:
+ for closed_caption in cc_json:
+ lang = closed_caption['lang']
+ if lang not in subtitles:
+ subtitles[lang] = []
+ subtitles[lang].append({
+ 'url': closed_caption['url'],
+ 'ext': mimetype2ext(closed_caption['content_type']),
+ })
+
return {
'id': video_id,
'display_id': display_id,
@@ -244,6 +261,7 @@ class YahooIE(InfoExtractor):
'description': clean_html(meta['description']),
'thumbnail': meta['thumbnail'] if meta.get('thumbnail') else self._og_search_thumbnail(webpage),
'duration': int_or_none(meta.get('duration')),
+ 'subtitles': subtitles,
}
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index ed9ed9ed6..52d198fa3 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1665,6 +1665,7 @@ def mimetype2ext(mt):
return {
'x-ms-wmv': 'wmv',
'x-mp4-fragmented': 'mp4',
+ 'ttml+xml': 'ttml',
}.get(res, res)
@@ -1848,9 +1849,9 @@ def dfxp2srt(dfxp_data):
out = str_or_empty(node.text)
for child in node:
- if child.tag == _x('ttml:br'):
+ if child.tag in (_x('ttml:br'), 'br'):
out += '\n' + str_or_empty(child.tail)
- elif child.tag == _x('ttml:span'):
+ elif child.tag in (_x('ttml:span'), 'span'):
out += str_or_empty(parse_node(child))
else:
out += str_or_empty(xml.etree.ElementTree.tostring(child))
@@ -1859,7 +1860,10 @@ def dfxp2srt(dfxp_data):
dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8'))
out = []
- paras = dfxp.findall(_x('.//ttml:p'))
+ paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
+
+ if not paras:
+ raise ValueError('Invalid dfxp/TTML subtitle')
for para, index in zip(paras, itertools.count(1)):
begin_time = parse_dfxp_time_expr(para.attrib['begin'])
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 38f00bc9b..b33385153 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
-__version__ = '2015.05.15'
+__version__ = '2015.05.20'