aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl')
-rwxr-xr-xyoutube_dl/YoutubeDL.py7
-rw-r--r--youtube_dl/aes.py2
-rw-r--r--youtube_dl/extractor/__init__.py13
-rw-r--r--youtube_dl/extractor/chilloutzone.py2
-rw-r--r--youtube_dl/extractor/cnn.py2
-rw-r--r--youtube_dl/extractor/drtv.py37
-rw-r--r--youtube_dl/extractor/empflix.py32
-rw-r--r--youtube_dl/extractor/instagram.py8
-rw-r--r--youtube_dl/extractor/letv.py4
-rw-r--r--youtube_dl/extractor/mitele.py9
-rw-r--r--youtube_dl/extractor/nba.py20
-rw-r--r--youtube_dl/extractor/nextmedia.py40
-rw-r--r--youtube_dl/extractor/prosiebensat1.py2
-rw-r--r--youtube_dl/extractor/qqmusic.py4
-rw-r--r--youtube_dl/extractor/rtve.py2
-rw-r--r--youtube_dl/extractor/rutv.py2
-rw-r--r--youtube_dl/extractor/shared.py2
-rw-r--r--youtube_dl/extractor/sohu.py4
-rw-r--r--youtube_dl/extractor/sportbox.py26
-rw-r--r--youtube_dl/extractor/telecinco.py4
-rw-r--r--youtube_dl/extractor/tnaflix.py34
-rw-r--r--youtube_dl/extractor/tutv.py2
-rw-r--r--youtube_dl/extractor/tv2.py33
-rw-r--r--youtube_dl/extractor/ultimedia.py10
-rw-r--r--youtube_dl/extractor/videott.py2
-rw-r--r--youtube_dl/extractor/vier.py9
-rw-r--r--youtube_dl/extractor/viki.py333
-rw-r--r--youtube_dl/extractor/vuclip.py2
-rw-r--r--youtube_dl/extractor/vulture.py2
-rw-r--r--youtube_dl/extractor/wimp.py3
-rw-r--r--youtube_dl/extractor/xminus.py4
-rw-r--r--youtube_dl/extractor/yahoo.py18
-rw-r--r--youtube_dl/options.py4
-rw-r--r--youtube_dl/utils.py10
-rw-r--r--youtube_dl/version.py2
35 files changed, 469 insertions, 221 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 5df889945..d1953c18f 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -1368,7 +1368,7 @@ class YoutubeDL(object):
postprocessors = []
self.report_warning('You have requested multiple '
'formats but ffmpeg or avconv are not installed.'
- ' The formats won\'t be merged')
+ ' The formats won\'t be merged.')
else:
postprocessors = [merger]
@@ -1395,8 +1395,8 @@ class YoutubeDL(object):
requested_formats = info_dict['requested_formats']
if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
info_dict['ext'] = 'mkv'
- self.report_warning('You have requested formats incompatible for merge. '
- 'The formats will be merged into mkv')
+ self.report_warning(
+ 'Requested formats are incompatible for merge and will be merged into mkv.')
# Ensure filename always has a correct extension for successful merge
filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
if os.path.exists(encodeFilename(filename)):
@@ -1527,6 +1527,7 @@ class YoutubeDL(object):
pps_chain.extend(ie_info['__postprocessors'])
pps_chain.extend(self._pps)
for pp in pps_chain:
+ files_to_delete = []
try:
files_to_delete, info = pp.run(info)
except PostProcessingError as e:
diff --git a/youtube_dl/aes.py b/youtube_dl/aes.py
index 07224d508..7817adcfd 100644
--- a/youtube_dl/aes.py
+++ b/youtube_dl/aes.py
@@ -152,7 +152,7 @@ def aes_decrypt_text(data, password, key_size_bytes):
"""
NONCE_LENGTH_BYTES = 8
- data = bytes_to_intlist(base64.b64decode(data))
+ data = bytes_to_intlist(base64.b64decode(data.encode('utf-8')))
password = bytes_to_intlist(password.encode('utf-8'))
key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password))
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index d131d3ec3..79bcd9106 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -339,8 +339,7 @@ from .newstube import NewstubeIE
from .nextmedia import (
NextMediaIE,
NextMediaActionNewsIE,
- AppleDailyRealtimeNewsIE,
- AppleDailyAnimationNewsIE
+ AppleDailyIE,
)
from .nfb import NFBIE
from .nfl import NFLIE
@@ -573,7 +572,10 @@ from .tumblr import TumblrIE
from .tunein import TuneInIE
from .turbo import TurboIE
from .tutv import TutvIE
-from .tv2 import TV2IE
+from .tv2 import (
+ TV2IE,
+ TV2ArticleIE,
+)
from .tv4 import TV4IE
from .tvigle import TvigleIE
from .tvp import TvpIE, TvpSeriesIE
@@ -645,7 +647,10 @@ from .vine import (
VineIE,
VineUserIE,
)
-from .viki import VikiIE
+from .viki import (
+ VikiIE,
+ VikiChannelIE,
+)
from .vk import (
VKIE,
VKUserVideosIE,
diff --git a/youtube_dl/extractor/chilloutzone.py b/youtube_dl/extractor/chilloutzone.py
index c922f6959..0206d96db 100644
--- a/youtube_dl/extractor/chilloutzone.py
+++ b/youtube_dl/extractor/chilloutzone.py
@@ -57,7 +57,7 @@ class ChilloutzoneIE(InfoExtractor):
base64_video_info = self._html_search_regex(
r'var cozVidData = "(.+?)";', webpage, 'video data')
- decoded_video_info = base64.b64decode(base64_video_info).decode("utf-8")
+ decoded_video_info = base64.b64decode(base64_video_info.encode('utf-8')).decode('utf-8')
video_info_dict = json.loads(decoded_video_info)
# get video information from dict
diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py
index 5efc5f4fe..3b1bd4033 100644
--- a/youtube_dl/extractor/cnn.py
+++ b/youtube_dl/extractor/cnn.py
@@ -12,7 +12,7 @@ from ..utils import (
class CNNIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://(?:(?:edition|www)\.)?cnn\.com/video/(?:data/.+?|\?)/
- (?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z]{3,5})(?:-ap)?|(?=&)))'''
+ (?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z\-]+)|(?=&)))'''
_TESTS = [{
'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py
index f25ab319e..baa24c6d1 100644
--- a/youtube_dl/extractor/drtv.py
+++ b/youtube_dl/extractor/drtv.py
@@ -1,8 +1,11 @@
# coding: utf-8
from __future__ import unicode_literals
-from .common import InfoExtractor, ExtractorError
-from ..utils import parse_iso8601
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ parse_iso8601,
+)
class DRTVIE(InfoExtractor):
@@ -60,19 +63,31 @@ class DRTVIE(InfoExtractor):
restricted_to_denmark = asset['RestrictedToDenmark']
spoken_subtitles = asset['Target'] == 'SpokenSubtitles'
for link in asset['Links']:
- target = link['Target']
uri = link['Uri']
+ target = link['Target']
format_id = target
- preference = -1 if target == 'HDS' else -2
+ preference = None
if spoken_subtitles:
- preference -= 2
+ preference = -1
format_id += '-spoken-subtitles'
- formats.append({
- 'url': uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43' if target == 'HDS' else uri,
- 'format_id': format_id,
- 'ext': link['FileFormat'],
- 'preference': preference,
- })
+ if target == 'HDS':
+ formats.extend(self._extract_f4m_formats(
+ uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43',
+ video_id, preference, f4m_id=format_id))
+ elif target == 'HLS':
+ formats.extend(self._extract_m3u8_formats(
+ uri, video_id, 'mp4', preference=preference,
+ m3u8_id=format_id))
+ else:
+ bitrate = link.get('Bitrate')
+ if bitrate:
+ format_id += '-%s' % bitrate
+ formats.append({
+ 'url': uri,
+ 'format_id': format_id,
+ 'tbr': bitrate,
+ 'ext': link.get('FileFormat'),
+ })
subtitles_list = asset.get('SubtitlesList')
if isinstance(subtitles_list, list):
LANGS = {
diff --git a/youtube_dl/extractor/empflix.py b/youtube_dl/extractor/empflix.py
index 70f8efe27..9a5a8f4bb 100644
--- a/youtube_dl/extractor/empflix.py
+++ b/youtube_dl/extractor/empflix.py
@@ -4,22 +4,28 @@ from .tnaflix import TNAFlixIE
class EMPFlixIE(TNAFlixIE):
- _VALID_URL = r'^https?://www\.empflix\.com/videos/(?P<display_id>[0-9a-zA-Z-]+)-(?P<id>[0-9]+)\.html'
+ _VALID_URL = r'https?://(?:www\.)?empflix\.com/videos/(?P<display_id>.+?)-(?P<id>[0-9]+)\.html'
_TITLE_REGEX = r'name="title" value="(?P<title>[^"]*)"'
_DESCRIPTION_REGEX = r'name="description" value="([^"]*)"'
_CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"'
- _TEST = {
- 'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html',
- 'md5': 'b1bc15b6412d33902d6e5952035fcabc',
- 'info_dict': {
- 'id': '33051',
- 'display_id': 'Amateur-Finger-Fuck',
- 'ext': 'mp4',
- 'title': 'Amateur Finger Fuck',
- 'description': 'Amateur solo finger fucking.',
- 'thumbnail': 're:https?://.*\.jpg$',
- 'age_limit': 18,
+ _TESTS = [
+ {
+ 'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html',
+ 'md5': 'b1bc15b6412d33902d6e5952035fcabc',
+ 'info_dict': {
+ 'id': '33051',
+ 'display_id': 'Amateur-Finger-Fuck',
+ 'ext': 'mp4',
+ 'title': 'Amateur Finger Fuck',
+ 'description': 'Amateur solo finger fucking.',
+ 'thumbnail': 're:https?://.*\.jpg$',
+ 'age_limit': 18,
+ }
+ },
+ {
+ 'url': 'http://www.empflix.com/videos/[AROMA][ARMD-718]-Aoi-Yoshino-Sawa-25826.html',
+ 'matching_only': True,
}
- }
+ ]
diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py
index 65f6ca103..b10755788 100644
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@@ -7,9 +7,9 @@ from ..utils import int_or_none
class InstagramIE(InfoExtractor):
- _VALID_URL = r'https?://instagram\.com/p/(?P<id>[\da-zA-Z]+)'
+ _VALID_URL = r'https://instagram\.com/p/(?P<id>[\da-zA-Z]+)'
_TEST = {
- 'url': 'http://instagram.com/p/aye83DjauH/?foo=bar#abc',
+ 'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
'md5': '0d2da106a9d2631273e192b372806516',
'info_dict': {
'id': 'aye83DjauH',
@@ -41,11 +41,11 @@ class InstagramIE(InfoExtractor):
class InstagramUserIE(InfoExtractor):
- _VALID_URL = r'http://instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])'
+ _VALID_URL = r'https://instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])'
IE_DESC = 'Instagram user profile'
IE_NAME = 'instagram:user'
_TEST = {
- 'url': 'http://instagram.com/porsche',
+ 'url': 'https://instagram.com/porsche',
'info_dict': {
'id': 'porsche',
'title': 'porsche',
diff --git a/youtube_dl/extractor/letv.py b/youtube_dl/extractor/letv.py
index 1484ac0d2..da896caf1 100644
--- a/youtube_dl/extractor/letv.py
+++ b/youtube_dl/extractor/letv.py
@@ -50,9 +50,7 @@ class LetvIE(InfoExtractor):
'title': '与龙共舞 完整版',
'description': 'md5:7506a5eeb1722bb9d4068f85024e3986',
},
- 'params': {
- 'cn_verification_proxy': 'http://proxy.uku.im:8888'
- },
+ 'skip': 'Only available in China',
}]
@staticmethod
diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py
index d8897eb90..7091f3335 100644
--- a/youtube_dl/extractor/mitele.py
+++ b/youtube_dl/extractor/mitele.py
@@ -20,7 +20,6 @@ class MiTeleIE(InfoExtractor):
_TESTS = [{
'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
- 'md5': '6a75fe9d0d3275bead0cb683c616fddb',
'info_dict': {
'id': '0fce117d',
'ext': 'mp4',
@@ -29,6 +28,10 @@ class MiTeleIE(InfoExtractor):
'display_id': 'programa-144',
'duration': 2913,
},
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
}]
def _real_extract(self, url):
@@ -56,12 +59,14 @@ class MiTeleIE(InfoExtractor):
episode,
transform_source=strip_jsonp
)
+ formats = self._extract_m3u8_formats(
+ token_info['tokenizedUrl'], episode, ext='mp4')
return {
'id': embed_data['videoId'],
'display_id': episode,
'title': info_el.find('title').text,
- 'url': token_info['tokenizedUrl'],
+ 'formats': formats,
'description': get_element_by_attribute('class', 'text', webpage),
'thumbnail': info_el.find('thumb').text,
'duration': parse_duration(info_el.find('duration').text),
diff --git a/youtube_dl/extractor/nba.py b/youtube_dl/extractor/nba.py
index 862b706bf..944096e1c 100644
--- a/youtube_dl/extractor/nba.py
+++ b/youtube_dl/extractor/nba.py
@@ -22,6 +22,18 @@ class NBAIE(InfoExtractor):
}, {
'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/',
'only_matching': True,
+ }, {
+ 'url': 'http://watch.nba.com/nba/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
+ 'info_dict': {
+ 'id': '0041400301-cle-atl-recap.nba',
+ 'ext': 'mp4',
+ 'title': 'NBA GAME TIME | Video: Hawks vs. Cavaliers Game 1',
+ 'description': 'md5:8094c3498d35a9bd6b1a8c396a071b4d',
+ 'duration': 228,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
}]
def _real_extract(self, url):
@@ -35,8 +47,12 @@ class NBAIE(InfoExtractor):
self._og_search_title(webpage, default=shortened_video_id), ' : NBA.com')
description = self._og_search_description(webpage)
- duration = parse_duration(
- self._html_search_meta('duration', webpage, 'duration'))
+ duration_str = self._html_search_meta(
+ 'duration', webpage, 'duration', default=None)
+ if not duration_str:
+ duration_str = self._html_search_regex(
+ r'Duration:</b>\s*(\d+:\d+)', webpage, 'duration', fatal=False)
+ duration = parse_duration(duration_str)
return {
'id': shortened_video_id,
diff --git a/youtube_dl/extractor/nextmedia.py b/youtube_dl/extractor/nextmedia.py
index 02dba4ef6..d1b7cff4c 100644
--- a/youtube_dl/extractor/nextmedia.py
+++ b/youtube_dl/extractor/nextmedia.py
@@ -89,8 +89,8 @@ class NextMediaActionNewsIE(NextMediaIE):
return self._extract_from_nextmedia_page(news_id, url, article_page)
-class AppleDailyRealtimeNewsIE(NextMediaIE):
- _VALID_URL = r'http://(www|ent).appledaily.com.tw/(realtimenews|enews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
+class AppleDailyIE(NextMediaIE):
+ _VALID_URL = r'http://(www|ent).appledaily.com.tw/(?:animation|appledaily|enews|realtimenews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
_TESTS = [{
'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694',
'md5': 'a843ab23d150977cc55ef94f1e2c1e4d',
@@ -99,7 +99,7 @@ class AppleDailyRealtimeNewsIE(NextMediaIE):
'ext': 'mp4',
'title': '周亭羽走過摩鐵陰霾2男陪吃 九把刀孤寒看醫生',
'thumbnail': 're:^https?://.*\.jpg$',
- 'description': 'md5:b23787119933404ce515c6356a8c355c',
+ 'description': 'md5:2acd430e59956dc47cd7f67cb3c003f4',
'upload_date': '20150128',
}
}, {
@@ -110,26 +110,10 @@ class AppleDailyRealtimeNewsIE(NextMediaIE):
'ext': 'mp4',
'title': '不滿被踩腳 山東兩大媽一路打下車',
'thumbnail': 're:^https?://.*\.jpg$',
- 'description': 'md5:2648aaf6fc4f401f6de35a91d111aa1d',
+ 'description': 'md5:175b4260c1d7c085993474217e4ab1b4',
'upload_date': '20150128',
}
- }]
-
- _URL_PATTERN = r'\{url: \'(.+)\'\}'
-
- def _fetch_title(self, page):
- return self._html_search_regex(r'<h1 id="h1">([^<>]+)</h1>', page, 'news title')
-
- def _fetch_thumbnail(self, page):
- return self._html_search_regex(r"setInitialImage\(\'([^']+)'\)", page, 'video thumbnail', fatal=False)
-
- def _fetch_timestamp(self, page):
- return None
-
-
-class AppleDailyAnimationNewsIE(AppleDailyRealtimeNewsIE):
- _VALID_URL = 'http://www.appledaily.com.tw/animation/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
- _TESTS = [{
+ }, {
'url': 'http://www.appledaily.com.tw/animation/realtimenews/new/20150128/5003671',
'md5': '03df296d95dedc2d5886debbb80cb43f',
'info_dict': {
@@ -154,10 +138,22 @@ class AppleDailyAnimationNewsIE(AppleDailyRealtimeNewsIE):
'expected_warnings': [
'video thumbnail',
]
+ }, {
+ 'url': 'http://www.appledaily.com.tw/appledaily/article/supplement/20140417/35770334/',
+ 'only_matching': True,
}]
+ _URL_PATTERN = r'\{url: \'(.+)\'\}'
+
def _fetch_title(self, page):
- return self._html_search_meta('description', page, 'news title')
+ return (self._html_search_regex(r'<h1 id="h1">([^<>]+)</h1>', page, 'news title', default=None) or
+ self._html_search_meta('description', page, 'news title'))
+
+ def _fetch_thumbnail(self, page):
+ return self._html_search_regex(r"setInitialImage\(\'([^']+)'\)", page, 'video thumbnail', fatal=False)
+
+ def _fetch_timestamp(self, page):
+ return None
def _fetch_description(self, page):
return self._html_search_meta('description', page, 'news description')
diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py
index 7cc799664..255d4abc1 100644
--- a/youtube_dl/extractor/prosiebensat1.py
+++ b/youtube_dl/extractor/prosiebensat1.py
@@ -17,7 +17,7 @@ from ..utils import (
class ProSiebenSat1IE(InfoExtractor):
IE_NAME = 'prosiebensat1'
IE_DESC = 'ProSiebenSat.1 Digital'
- _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|ran|the-voice-of-germany)\.de|fem\.com)/(?P<id>.+)'
+ _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany)\.(?:de|at)|ran\.de|fem\.com)/(?P<id>.+)'
_TESTS = [
{
diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py
index 13113820b..b540033e2 100644
--- a/youtube_dl/extractor/qqmusic.py
+++ b/youtube_dl/extractor/qqmusic.py
@@ -26,7 +26,7 @@ class QQMusicIE(InfoExtractor):
'title': '可惜没如果',
'upload_date': '20141227',
'creator': '林俊杰',
- 'description': 'md5:4348ff1dd24036906baa7b6f973f8d30',
+ 'description': 'md5:d327722d0361576fde558f1ac68a7065',
}
}]
@@ -60,6 +60,8 @@ class QQMusicIE(InfoExtractor):
lrc_content = self._html_search_regex(
r'<div class="content" id="lrc_content"[^<>]*>([^<>]+)</div>',
detail_info_page, 'LRC lyrics', default=None)
+ if lrc_content:
+ lrc_content = lrc_content.replace('\\n', '\n')
guid = self.m_r_get_ruin()
diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py
index 849300140..82cd98ac7 100644
--- a/youtube_dl/extractor/rtve.py
+++ b/youtube_dl/extractor/rtve.py
@@ -17,7 +17,7 @@ from ..utils import (
def _decrypt_url(png):
- encrypted_data = base64.b64decode(png)
+ encrypted_data = base64.b64decode(png.encode('utf-8'))
text_index = encrypted_data.find(b'tEXt')
text_chunk = encrypted_data[text_index - 4:]
length = struct_unpack('!I', text_chunk[:4])[0]
diff --git a/youtube_dl/extractor/rutv.py b/youtube_dl/extractor/rutv.py
index 55604637d..d9df06861 100644
--- a/youtube_dl/extractor/rutv.py
+++ b/youtube_dl/extractor/rutv.py
@@ -104,7 +104,7 @@ class RUTVIE(InfoExtractor):
@classmethod
def _extract_url(cls, webpage):
mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.rutv\.ru/(?:iframe/(?:swf|video|live)/id|index/iframe/cast_id)/.+?)\1', webpage)
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.(?:rutv\.ru|vgtrk\.com)/(?:iframe/(?:swf|video|live)/id|index/iframe/cast_id)/.+?)\1', webpage)
if mobj:
return mobj.group('url')
diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py
index 26ced716e..9f3e944e7 100644
--- a/youtube_dl/extractor/shared.py
+++ b/youtube_dl/extractor/shared.py
@@ -47,7 +47,7 @@ class SharedIE(InfoExtractor):
video_url = self._html_search_regex(
r'data-url="([^"]+)"', video_page, 'video URL')
title = base64.b64decode(self._html_search_meta(
- 'full:title', webpage, 'title')).decode('utf-8')
+ 'full:title', webpage, 'title').encode('utf-8')).decode('utf-8')
filesize = int_or_none(self._html_search_meta(
'full:size', webpage, 'file size', fatal=False))
thumbnail = self._html_search_regex(
diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py
index eab4adfca..29bd9ce6f 100644
--- a/youtube_dl/extractor/sohu.py
+++ b/youtube_dl/extractor/sohu.py
@@ -23,9 +23,7 @@ class SohuIE(InfoExtractor):
'ext': 'mp4',
'title': 'MV:Far East Movement《The Illest》',
},
- 'params': {
- 'cn_verification_proxy': 'proxy.uku.im:8888'
- }
+ 'skip': 'On available in China',
}, {
'url': 'http://tv.sohu.com/20150305/n409385080.shtml',
'md5': 'ac9a5d322b4bf9ae184d53e4711e4f1a',
diff --git a/youtube_dl/extractor/sportbox.py b/youtube_dl/extractor/sportbox.py
index 8686f9d11..86d509ae5 100644
--- a/youtube_dl/extractor/sportbox.py
+++ b/youtube_dl/extractor/sportbox.py
@@ -6,8 +6,7 @@ import re
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
- parse_duration,
- parse_iso8601,
+ unified_strdate,
)
@@ -20,11 +19,9 @@ class SportBoxIE(InfoExtractor):
'id': '80822',
'ext': 'mp4',
'title': 'Гонка 2 заезд ««Объединенный 2000»: классы Туринг и Супер-продакшн',
- 'description': 'md5:81715fa9c4ea3d9e7915dc8180c778ed',
+ 'description': 'md5:3d72dc4a006ab6805d82f037fdc637ad',
'thumbnail': 're:^https?://.*\.jpg$',
- 'timestamp': 1411896237,
'upload_date': '20140928',
- 'duration': 4846,
},
'params': {
# m3u8 download
@@ -48,17 +45,13 @@ class SportBoxIE(InfoExtractor):
r'src="/?(vdl/player/[^"]+)"', webpage, 'player')
title = self._html_search_regex(
- r'<h1 itemprop="name">([^<]+)</h1>', webpage, 'title')
- description = self._html_search_regex(
- r'(?s)<div itemprop="description">(.+?)</div>',
- webpage, 'description', fatal=False)
+ [r'"nodetitle"\s*:\s*"([^"]+)"', r'class="node-header_{1,2}title">([^<]+)'],
+ webpage, 'title')
+ description = self._og_search_description(webpage) or self._html_search_meta(
+ 'description', webpage, 'description')
thumbnail = self._og_search_thumbnail(webpage)
- timestamp = parse_iso8601(self._search_regex(
- r'<span itemprop="uploadDate">([^<]+)</span>',
- webpage, 'timestamp', fatal=False))
- duration = parse_duration(self._html_search_regex(
- r'<meta itemprop="duration" content="PT([^"]+)">',
- webpage, 'duration', fatal=False))
+ upload_date = unified_strdate(self._html_search_meta(
+ 'dateCreated', webpage, 'upload date'))
return {
'_type': 'url_transparent',
@@ -67,8 +60,7 @@ class SportBoxIE(InfoExtractor):
'title': title,
'description': description,
'thumbnail': thumbnail,
- 'timestamp': timestamp,
- 'duration': duration,
+ 'upload_date': upload_date,
}
diff --git a/youtube_dl/extractor/telecinco.py b/youtube_dl/extractor/telecinco.py
index 251a68680..a0c744fd1 100644
--- a/youtube_dl/extractor/telecinco.py
+++ b/youtube_dl/extractor/telecinco.py
@@ -16,6 +16,10 @@ class TelecincoIE(MiTeleIE):
'title': 'Con Martín Berasategui, hacer un bacalao al ...',
'duration': 662,
},
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
}, {
'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html',
'only_matching': True,
diff --git a/youtube_dl/extractor/tnaflix.py b/youtube_dl/extractor/tnaflix.py
index d48cbbf14..59af9aba0 100644
--- a/youtube_dl/extractor/tnaflix.py
+++ b/youtube_dl/extractor/tnaflix.py
@@ -10,26 +10,32 @@ from ..utils import (
class TNAFlixIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?tnaflix\.com/(?P<cat_id>[\w-]+)/(?P<display_id>[\w-]+)/video(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?tnaflix\.com/[^/]+/(?P<display_id>[^/]+)/video(?P<id>\d+)'
_TITLE_REGEX = r'<title>(.+?) - TNAFlix Porn Videos</title>'
_DESCRIPTION_REGEX = r'<h3 itemprop="description">([^<]+)</h3>'
_CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"'
- _TEST = {
- 'url': 'http://www.tnaflix.com/porn-stars/Carmella-Decesare-striptease/video553878',
- 'md5': 'ecf3498417d09216374fc5907f9c6ec0',
- 'info_dict': {
- 'id': '553878',
- 'display_id': 'Carmella-Decesare-striptease',
- 'ext': 'mp4',
- 'title': 'Carmella Decesare - striptease',
- 'description': '',
- 'thumbnail': 're:https?://.*\.jpg$',
- 'duration': 91,
- 'age_limit': 18,
+ _TESTS = [
+ {
+ 'url': 'http://www.tnaflix.com/porn-stars/Carmella-Decesare-striptease/video553878',
+ 'md5': 'ecf3498417d09216374fc5907f9c6ec0',
+ 'info_dict': {
+ 'id': '553878',
+ 'display_id': 'Carmella-Decesare-striptease',
+ 'ext': 'mp4',
+ 'title': 'Carmella Decesare - striptease',
+ 'description': '',
+ 'thumbnail': 're:https?://.*\.jpg$',
+ 'duration': 91,
+ 'age_limit': 18,
+ }
+ },
+ {
+ 'url': 'https://www.tnaflix.com/amateur-porn/bunzHD-Ms.Donk/video358632',
+ 'matching_only': True,
}
- }
+ ]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/tutv.py b/youtube_dl/extractor/tutv.py
index 4de0aac52..fad720b68 100644
--- a/youtube_dl/extractor/tutv.py
+++ b/youtube_dl/extractor/tutv.py
@@ -26,7 +26,7 @@ class TutvIE(InfoExtractor):
data_content = self._download_webpage(
'http://tu.tv/flvurl.php?codVideo=%s' % internal_id, video_id, 'Downloading video info')
- video_url = base64.b64decode(compat_parse_qs(data_content)['kpt'][0]).decode('utf-8')
+ video_url = base64.b64decode(compat_parse_qs(data_content)['kpt'][0].encode('utf-8')).decode('utf-8')
return {
'id': internal_id,
diff --git a/youtube_dl/extractor/tv2.py b/youtube_dl/extractor/tv2.py
index 2dcc0e971..fa338b936 100644
--- a/youtube_dl/extractor/tv2.py
+++ b/youtube_dl/extractor/tv2.py
@@ -1,12 +1,15 @@
# encoding: utf-8
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
from ..utils import (
determine_ext,
int_or_none,
float_or_none,
parse_iso8601,
+ remove_end,
)
@@ -91,3 +94,33 @@ class TV2IE(InfoExtractor):
'categories': categories,
'formats': formats,
}
+
+
+class TV2ArticleIE(InfoExtractor):
+ _VALID_URL = 'http://(?:www\.)?tv2\.no/(?:a|\d{4}/\d{2}/\d{2}(/[^/]+)+)/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.tv2.no/2015/05/16/nyheter/alesund/krim/pingvin/6930542',
+ 'info_dict': {
+ 'id': '6930542',
+ 'title': 'Russen hetses etter pingvintyveri – innrømmer å ha åpnet luken på buret',
+ 'description': 'md5:339573779d3eea3542ffe12006190954',
+ },
+ 'playlist_count': 2,
+ }, {
+ 'url': 'http://www.tv2.no/a/6930542',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ entries = [
+ self.url_result('http://www.tv2.no/v/%s' % video_id, 'TV2')
+ for video_id in re.findall(r'data-assetid="(\d+)"', webpage)]
+
+ title = remove_end(self._og_search_title(webpage), ' - TV2.no')
+ description = remove_end(self._og_search_description(webpage), ' - TV2.no')
+
+ return self.playlist_result(entries, playlist_id, title, description)
diff --git a/youtube_dl/extractor/ultimedia.py b/youtube_dl/extractor/ultimedia.py
index 96c809eaf..c4751050e 100644
--- a/youtube_dl/extractor/ultimedia.py
+++ b/youtube_dl/extractor/ultimedia.py
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
+from ..compat import compat_urllib_parse_urlparse
from ..utils import (
ExtractorError,
qualities,
@@ -44,9 +45,9 @@ class UltimediaIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- deliver_url = self._search_regex(
- r'<iframe[^>]+src="(https?://(?:www\.)?ultimedia\.com/deliver/[^"]+)"',
- webpage, 'deliver URL')
+ deliver_url = self._proto_relative_url(self._search_regex(
+ r'<iframe[^>]+src="((?:https?:)?//(?:www\.)?ultimedia\.com/deliver/[^"]+)"',
+ webpage, 'deliver URL'), compat_urllib_parse_urlparse(url).scheme + ':')
deliver_page = self._download_webpage(
deliver_url, video_id, 'Downloading iframe page')
@@ -57,7 +58,8 @@ class UltimediaIE(InfoExtractor):
player = self._parse_json(
self._search_regex(
- r"jwplayer\('player(?:_temp)?'\)\.setup\(({.+?})\)\.on", deliver_page, 'player'),
+ r"jwplayer\('player(?:_temp)?'\)\.setup\(({.+?})\)\.on",
+ deliver_page, 'player'),
video_id)
quality = qualities(['flash', 'html5'])
diff --git a/youtube_dl/extractor/videott.py b/youtube_dl/extractor/videott.py
index ececc7ee0..591024ead 100644
--- a/youtube_dl/extractor/videott.py
+++ b/youtube_dl/extractor/videott.py
@@ -43,7 +43,7 @@ class VideoTtIE(InfoExtractor):
formats = [
{
- 'url': base64.b64decode(res['u']).decode('utf-8'),
+ 'url': base64.b64decode(res['u'].encode('utf-8')).decode('utf-8'),
'ext': 'flv',
'format_id': res['l'],
} for res in settings['res'] if res['u']
diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py
index 619039e51..15377097e 100644
--- a/youtube_dl/extractor/vier.py
+++ b/youtube_dl/extractor/vier.py
@@ -38,11 +38,14 @@ class VierIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
- r'"nid"\s*:\s*"(\d+)"', webpage, 'video id')
+ [r'data-nid="(\d+)"', r'"nid"\s*:\s*"(\d+)"'],
+ webpage, 'video id')
application = self._search_regex(
- r'"application"\s*:\s*"([^"]+)"', webpage, 'application', default='vier_vod')
+ [r'data-application="([^"]+)"', r'"application"\s*:\s*"([^"]+)"'],
+ webpage, 'application', default='vier_vod')
filename = self._search_regex(
- r'"filename"\s*:\s*"([^"]+)"', webpage, 'filename')
+ [r'data-filename="([^"]+)"', r'"filename"\s*:\s*"([^"]+)"'],
+ webpage, 'filename')
playlist_url = 'http://vod.streamcloud.be/%s/mp4:_definst_/%s.mp4/playlist.m3u8' % (application, filename)
formats = self._extract_m3u8_formats(playlist_url, display_id, 'mp4')
diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py
index cf6af1e5c..7f2fb1ca8 100644
--- a/youtube_dl/extractor/viki.py
+++ b/youtube_dl/extractor/viki.py
@@ -1,29 +1,65 @@
from __future__ import unicode_literals
-import re
+import time
+import hmac
+import hashlib
+import itertools
-from ..compat import (
- compat_urlparse,
- compat_urllib_request,
-)
from ..utils import (
ExtractorError,
- unescapeHTML,
- unified_strdate,
- US_RATINGS,
- determine_ext,
- mimetype2ext,
+ int_or_none,
+ parse_age_limit,
+ parse_iso8601,
)
from .common import InfoExtractor
-class VikiIE(InfoExtractor):
- IE_NAME = 'viki'
+class VikiBaseIE(InfoExtractor):
+ _VALID_URL_BASE = r'https?://(?:www\.)?viki\.(?:com|net|mx|jp|fr)/'
+ _API_QUERY_TEMPLATE = '/v4/%sapp=%s&t=%s&site=www.viki.com'
+ _API_URL_TEMPLATE = 'http://api.viki.io%s&sig=%s'
+
+ _APP = '65535a'
+ _APP_VERSION = '2.2.5.1428709186'
+ _APP_SECRET = '-$iJ}@p7!G@SyU/je1bEyWg}upLu-6V6-Lg9VD(]siH,r.,m-r|ulZ,U4LC/SeR)'
+
+ def _prepare_call(self, path, timestamp=None):
+ path += '?' if '?' not in path else '&'
+ if not timestamp:
+ timestamp = int(time.time())
+ query = self._API_QUERY_TEMPLATE % (path, self._APP, timestamp)
+ sig = hmac.new(
+ self._APP_SECRET.encode('ascii'),
+ query.encode('ascii'),
+ hashlib.sha1
+ ).hexdigest()
+ return self._API_URL_TEMPLATE % (query, sig)
+
+ def _call_api(self, path, video_id, note, timestamp=None):
+ resp = self._download_json(
+ self._prepare_call(path, timestamp), video_id, note)
+
+ error = resp.get('error')
+ if error:
+ if error == 'invalid timestamp':
+ resp = self._download_json(
+ self._prepare_call(path, int(resp['current_timestamp'])),
+ video_id, '%s (retry)' % note)
+ error = resp.get('error')
+ if error:
+ self._raise_error(resp['error'])
+
+ return resp
- # iPad2
- _USER_AGENT = 'Mozilla/5.0(iPad; U; CPU OS 4_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8F191 Safari/6533.18.5'
+ def _raise_error(self, error):
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, error),
+ expected=True)
- _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
+
+class VikiIE(VikiBaseIE):
+ IE_NAME = 'viki'
+ _VALID_URL = r'%s(?:videos|player)/(?P<id>[0-9]+v)' % VikiBaseIE._VALID_URL_BASE
_TESTS = [{
'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
'info_dict': {
@@ -37,111 +73,218 @@ class VikiIE(InfoExtractor):
},
'skip': 'Blocked in the US',
}, {
+ # clip
'url': 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference',
- 'md5': 'ca6493e6f0a6ec07da9aa8d6304b4b2c',
+ 'md5': '86c0b5dbd4d83a6611a79987cc7a1989',
'info_dict': {
'id': '1067139v',
'ext': 'mp4',
+ 'title': "'The Avengers: Age of Ultron' Press Conference",
'description': 'md5:d70b2f9428f5488321bfe1db10d612ea',
+ 'duration': 352,
+ 'timestamp': 1430380829,
'upload_date': '20150430',
- 'title': '\'The Avengers: Age of Ultron\' Press Conference',
+ 'uploader': 'Arirang TV',
+ 'like_count': int,
+ 'age_limit': 0,
}
}, {
'url': 'http://www.viki.com/videos/1048879v-ankhon-dekhi',
'info_dict': {
'id': '1048879v',
'ext': 'mp4',
- 'upload_date': '20140820',
- 'description': 'md5:54ff56d51bdfc7a30441ec967394e91c',
'title': 'Ankhon Dekhi',
+ 'duration': 6512,
+ 'timestamp': 1408532356,
+ 'upload_date': '20140820',
+ 'uploader': 'Spuul',
+ 'like_count': int,
+ 'age_limit': 13,
},
'params': {
- # requires ffmpeg
+ # m3u8 download
'skip_download': True,
}
+ }, {
+ # episode
+ 'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
+ 'md5': '190f3ef426005ba3a080a63325955bc3',
+ 'info_dict': {
+ 'id': '44699v',
+ 'ext': 'mp4',
+ 'title': 'Boys Over Flowers - Episode 1',
+ 'description': 'md5:52617e4f729c7d03bfd4bcbbb6e946f2',
+ 'duration': 4155,
+ 'timestamp': 1270496524,
+ 'upload_date': '20100405',
+ 'uploader': 'group8',
+ 'like_count': int,
+ 'age_limit': 13,
+ }
+ }, {
+ # youtube external
+ 'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1',
+ 'md5': '216d1afdc0c64d1febc1e9f2bd4b864b',
+ 'info_dict': {
+ 'id': '50562v',
+ 'ext': 'mp4',
+ 'title': 'Poor Nastya [COMPLETE] - Episode 1',
+ 'description': '',
+ 'duration': 607,
+ 'timestamp': 1274949505,
+ 'upload_date': '20101213',
+ 'uploader': 'ad14065n',
+ 'uploader_id': 'ad14065n',
+ 'like_count': int,
+ 'age_limit': 13,
+ }
+ }, {
+ 'url': 'http://www.viki.com/player/44699v',
+ 'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- title = self._og_search_title(webpage)
- description = self._og_search_description(webpage)
- thumbnail = self._og_search_thumbnail(webpage)
-
- uploader_m = re.search(
- r'<strong>Broadcast Network: </strong>\s*([^<]*)<', webpage)
- if uploader_m is None:
- uploader = None
- else:
- uploader = uploader_m.group(1).strip()
-
- rating_str = self._html_search_regex(
- r'<strong>Rating: </strong>\s*([^<]*)<', webpage,
- 'rating information', default='').strip()
- age_limit = US_RATINGS.get(rating_str)
-
- req = compat_urllib_request.Request(
- 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id)
- req.add_header('User-Agent', self._USER_AGENT)
- info_webpage = self._download_webpage(
- req, video_id, note='Downloading info page')
- err_msg = self._html_search_regex(r'<div[^>]+class="video-error[^>]+>(.+)</div>', info_webpage, 'error message', default=None)
- if err_msg:
- if 'not available in your region' in err_msg:
- raise ExtractorError(
- 'Video %s is blocked from your location.' % video_id,
- expected=True)
- else:
- raise ExtractorError('Viki said: ' + err_msg)
- mobj = re.search(
- r'<source[^>]+type="(?P<mime_type>[^"]+)"[^>]+src="(?P<url>[^"]+)"', info_webpage)
- if not mobj:
- raise ExtractorError('Unable to find video URL')
- video_url = unescapeHTML(mobj.group('url'))
- video_ext = mimetype2ext(mobj.group('mime_type'))
-
- if determine_ext(video_url) == 'm3u8':
- formats = self._extract_m3u8_formats(
- video_url, video_id, ext=video_ext)
- else:
- formats = [{
- 'url': video_url,
- 'ext': video_ext,
- }]
-
- upload_date_str = self._html_search_regex(
- r'"created_at":"([^"]+)"', info_webpage, 'upload date')
- upload_date = (
- unified_strdate(upload_date_str)
- if upload_date_str is not None
- else None
- )
-
- # subtitles
- video_subtitles = self.extract_subtitles(video_id, info_webpage)
-
- return {
+ video = self._call_api(
+ 'videos/%s.json' % video_id, video_id, 'Downloading video JSON')
+
+ title = None
+ titles = video.get('titles')
+ if titles:
+ title = titles.get('en') or titles[titles.keys()[0]]
+ if not title:
+ title = 'Episode %d' % video.get('number') if video.get('type') == 'episode' else video.get('id') or video_id
+ container_titles = video.get('container', {}).get('titles')
+ if container_titles:
+ container_title = container_titles.get('en') or container_titles[container_titles.keys()[0]]
+ title = '%s - %s' % (container_title, title)
+
+ descriptions = video.get('descriptions')
+ description = descriptions.get('en') or descriptions[titles.keys()[0]] if descriptions else None
+
+ duration = int_or_none(video.get('duration'))
+ timestamp = parse_iso8601(video.get('created_at'))
+ uploader = video.get('author')
+ like_count = int_or_none(video.get('likes', {}).get('count'))
+ age_limit = parse_age_limit(video.get('rating'))
+
+ thumbnails = []
+ for thumbnail_id, thumbnail in video.get('images', {}).items():
+ thumbnails.append({
+ 'id': thumbnail_id,
+ 'url': thumbnail.get('url'),
+ })
+
+ subtitles = {}
+ for subtitle_lang, _ in video.get('subtitle_completions', {}).items():
+ subtitles[subtitle_lang] = [{
+ 'ext': subtitles_format,
+ 'url': self._prepare_call(
+ 'videos/%s/subtitles/%s.%s' % (video_id, subtitle_lang, subtitles_format)),
+ } for subtitles_format in ('srt', 'vtt')]
+
+ result = {
'id': video_id,
'title': title,
- 'formats': formats,
'description': description,
- 'thumbnail': thumbnail,
- 'age_limit': age_limit,
+ 'duration': duration,
+ 'timestamp': timestamp,
'uploader': uploader,
- 'subtitles': video_subtitles,
- 'upload_date': upload_date,
+ 'like_count': like_count,
+ 'age_limit': age_limit,
+ 'thumbnails': thumbnails,
+ 'subtitles': subtitles,
}
- def _get_subtitles(self, video_id, info_webpage):
- res = {}
- for sturl_html in re.findall(r'<track src="([^"]+)"', info_webpage):
- sturl = unescapeHTML(sturl_html)
- m = re.search(r'/(?P<lang>[a-z]+)\.vtt', sturl)
- if not m:
- continue
- res[m.group('lang')] = [{
- 'url': compat_urlparse.urljoin('http://www.viki.com', sturl),
- 'ext': 'vtt',
- }]
- return res
+ streams = self._call_api(
+ 'videos/%s/streams.json' % video_id, video_id,
+ 'Downloading video streams JSON')
+
+ if 'external' in streams:
+ result.update({
+ '_type': 'url_transparent',
+ 'url': streams['external']['url'],
+ })
+ return result
+
+ formats = []
+ for format_id, stream_dict in streams.items():
+ height = self._search_regex(
+ r'^(\d+)[pP]$', format_id, 'height', default=None)
+ for protocol, format_dict in stream_dict.items():
+ if format_id == 'm3u8':
+ formats = self._extract_m3u8_formats(
+ format_dict['url'], video_id, 'mp4', m3u8_id='m3u8-%s' % protocol)
+ else:
+ formats.append({
+ 'url': format_dict['url'],
+ 'format_id': '%s-%s' % (format_id, protocol),
+ 'height': height,
+ })
+ self._sort_formats(formats)
+
+ result['formats'] = formats
+ return result
+
+
+class VikiChannelIE(VikiBaseIE):
+ IE_NAME = 'viki:channel'
+ _VALID_URL = r'%s(?:tv|news|movies|artists)/(?P<id>[0-9]+c)' % VikiBaseIE._VALID_URL_BASE
+ _TESTS = [{
+ 'url': 'http://www.viki.com/tv/50c-boys-over-flowers',
+ 'info_dict': {
+ 'id': '50c',
+ 'title': 'Boys Over Flowers',
+ 'description': 'md5:ecd3cff47967fe193cff37c0bec52790',
+ },
+ 'playlist_count': 70,
+ }, {
+ 'url': 'http://www.viki.com/tv/1354c-poor-nastya-complete',
+ 'info_dict': {
+ 'id': '1354c',
+ 'title': 'Poor Nastya [COMPLETE]',
+ 'description': 'md5:05bf5471385aa8b21c18ad450e350525',
+ },
+ 'playlist_count': 127,
+ }, {
+ 'url': 'http://www.viki.com/news/24569c-showbiz-korea',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.viki.com/movies/22047c-pride-and-prejudice-2005',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.viki.com/artists/2141c-shinee',
+ 'only_matching': True,
+ }]
+
+ _PER_PAGE = 25
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+
+ channel = self._call_api(
+ 'containers/%s.json' % channel_id, channel_id,
+ 'Downloading channel JSON')
+
+ titles = channel['titles']
+ title = titles.get('en') or titles[titles.keys()[0]]
+
+ descriptions = channel['descriptions']
+ description = descriptions.get('en') or descriptions[descriptions.keys()[0]]
+
+ entries = []
+ for video_type in ('episodes', 'clips', 'movies'):
+ for page_num in itertools.count(1):
+ page = self._call_api(
+ 'containers/%s/%s.json?per_page=%d&sort=number&direction=asc&with_paging=true&page=%d'
+ % (channel_id, video_type, self._PER_PAGE, page_num), channel_id,
+ 'Downloading %s JSON page #%d' % (video_type, page_num))
+ for video in page['response']:
+ video_id = video['id']
+ entries.append(self.url_result(
+ 'http://www.viki.com/videos/%s' % video_id, 'Viki'))
+ if not page['pagination']['next']:
+ break
+
+ return self.playlist_result(entries, channel_id, title, description)
diff --git a/youtube_dl/extractor/vuclip.py b/youtube_dl/extractor/vuclip.py
index c3fde53f5..a6d9b5fee 100644
--- a/youtube_dl/extractor/vuclip.py
+++ b/youtube_dl/extractor/vuclip.py
@@ -49,7 +49,7 @@ class VuClipIE(InfoExtractor):
links_code = self._search_regex(
r'''(?xs)
(?:
- <img\s+src="/im/play.gif".*?>|
+ <img\s+src="[^"]*/play.gif".*?>|
<!--\ player\ end\ -->\s*</div><!--\ thumb\ end-->
)
(.*?)
diff --git a/youtube_dl/extractor/vulture.py b/youtube_dl/extractor/vulture.py
index 1eb24a3d6..faa167e65 100644
--- a/youtube_dl/extractor/vulture.py
+++ b/youtube_dl/extractor/vulture.py
@@ -44,7 +44,7 @@ class VultureIE(InfoExtractor):
query_webpage = self._download_webpage(
query_url, display_id, note='Downloading query page')
params_json = self._search_regex(
- r'(?sm)new MagnifyEmbeddablePlayer\({.*?contentItem:\s*(\{.*?\})\n,\n',
+ r'(?sm)new MagnifyEmbeddablePlayer\({.*?contentItem:\s*(\{.*?\})\n?,\n',
query_webpage,
'player params')
params = json.loads(params_json)
diff --git a/youtube_dl/extractor/wimp.py b/youtube_dl/extractor/wimp.py
index d6dec25ca..f69d46a28 100644
--- a/youtube_dl/extractor/wimp.py
+++ b/youtube_dl/extractor/wimp.py
@@ -37,7 +37,8 @@ class WimpIE(InfoExtractor):
video_id = mobj.group(1)
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(
- r"[\"']file[\"']\s*[:,]\s*[\"'](.+?)[\"']", webpage, 'video URL')
+ [r"[\"']file[\"']\s*[:,]\s*[\"'](.+?)[\"']", r"videoId\s*:\s*[\"']([^\"']+)[\"']"],
+ webpage, 'video URL')
if YoutubeIE.suitable(video_url):
self.to_screen('Found YouTube video')
return {
diff --git a/youtube_dl/extractor/xminus.py b/youtube_dl/extractor/xminus.py
index 8c6241aed..7c9d8af6f 100644
--- a/youtube_dl/extractor/xminus.py
+++ b/youtube_dl/extractor/xminus.py
@@ -43,7 +43,7 @@ class XMinusIE(InfoExtractor):
r'minus_track\.dur_sec=\'([0-9]*?)\'',
webpage, 'duration', fatal=False))
filesize_approx = parse_filesize(self._html_search_regex(
- r'<div class="filesize[^"]*"></div>\s*([0-9.]+\s*[a-zA-Z][bB])',
+ r'<div id="finfo"[^>]*>\s*↓\s*([0-9.]+\s*[a-zA-Z][bB])',
webpage, 'approximate filesize', fatal=False))
tbr = int_or_none(self._html_search_regex(
r'<div class="quality[^"]*"></div>\s*([0-9]+)\s*kbps',
@@ -58,7 +58,7 @@ class XMinusIE(InfoExtractor):
description = re.sub(' *\r *', '\n', description)
enc_token = self._html_search_regex(
- r'minus_track\.tkn="(.+?)"', webpage, 'enc_token')
+ r'minus_track\.s?tkn="(.+?)"', webpage, 'enc_token')
token = ''.join(
c if pos == 3 else compat_chr(compat_ord(c) - 1)
for pos, c in enumerate(reversed(enc_token)))
diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py
index bf4e659ac..f9afbdbab 100644
--- a/youtube_dl/extractor/yahoo.py
+++ b/youtube_dl/extractor/yahoo.py
@@ -15,6 +15,7 @@ from ..utils import (
unescapeHTML,
ExtractorError,
int_or_none,
+ mimetype2ext,
)
from .nbc import NBCSportsVPlayerIE
@@ -236,6 +237,22 @@ class YahooIE(InfoExtractor):
self._sort_formats(formats)
+ closed_captions = self._html_search_regex(
+ r'"closedcaptions":(\[[^\]]+\])', webpage, 'closed captions',
+ default='[]')
+
+ cc_json = self._parse_json(closed_captions, video_id, fatal=False)
+ subtitles = {}
+ if cc_json:
+ for closed_caption in cc_json:
+ lang = closed_caption['lang']
+ if lang not in subtitles:
+ subtitles[lang] = []
+ subtitles[lang].append({
+ 'url': closed_caption['url'],
+ 'ext': mimetype2ext(closed_caption['content_type']),
+ })
+
return {
'id': video_id,
'display_id': display_id,
@@ -244,6 +261,7 @@ class YahooIE(InfoExtractor):
'description': clean_html(meta['description']),
'thumbnail': meta['thumbnail'] if meta.get('thumbnail') else self._og_search_thumbnail(webpage),
'duration': int_or_none(meta.get('duration')),
+ 'subtitles': subtitles,
}
diff --git a/youtube_dl/options.py b/youtube_dl/options.py
index 22dbc3aec..5a2315bd9 100644
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -537,7 +537,7 @@ def parseOpts(overrideArguments=None):
verbosity.add_option(
'--dump-pages', '--dump-intermediate-pages',
action='store_true', dest='dump_intermediate_pages', default=False,
- help='Print downloaded pages to debug problems (very verbose)')
+ help='Print downloaded pages encoded using base64 to debug problems (very verbose)')
verbosity.add_option(
'--write-pages',
action='store_true', dest='write_pages', default=False,
@@ -713,7 +713,7 @@ def parseOpts(overrideArguments=None):
help='Parse additional metadata like song title / artist from the video title. '
'The format syntax is the same as --output, '
'the parsed parameters replace existing values. '
- 'Additional templates: %(album), %(artist). '
+ 'Additional templates: %(album)s, %(artist)s. '
'Example: --metadata-from-title "%(artist)s - %(title)s" matches a title like '
'"Coldplay - Paradise"')
postproc.add_option(
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index ed9ed9ed6..52d198fa3 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1665,6 +1665,7 @@ def mimetype2ext(mt):
return {
'x-ms-wmv': 'wmv',
'x-mp4-fragmented': 'mp4',
+ 'ttml+xml': 'ttml',
}.get(res, res)
@@ -1848,9 +1849,9 @@ def dfxp2srt(dfxp_data):
out = str_or_empty(node.text)
for child in node:
- if child.tag == _x('ttml:br'):
+ if child.tag in (_x('ttml:br'), 'br'):
out += '\n' + str_or_empty(child.tail)
- elif child.tag == _x('ttml:span'):
+ elif child.tag in (_x('ttml:span'), 'span'):
out += str_or_empty(parse_node(child))
else:
out += str_or_empty(xml.etree.ElementTree.tostring(child))
@@ -1859,7 +1860,10 @@ def dfxp2srt(dfxp_data):
dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8'))
out = []
- paras = dfxp.findall(_x('.//ttml:p'))
+ paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
+
+ if not paras:
+ raise ValueError('Invalid dfxp/TTML subtitle')
for para, index in zip(paras, itertools.count(1)):
begin_time = parse_dfxp_time_expr(para.attrib['begin'])
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 38f00bc9b..b33385153 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
-__version__ = '2015.05.15'
+__version__ = '2015.05.20'