aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl')
-rw-r--r--youtube_dl/extractor/__init__.py11
-rw-r--r--youtube_dl/extractor/arte.py16
-rw-r--r--youtube_dl/extractor/cbsnews.py52
-rw-r--r--youtube_dl/extractor/generic.py17
-rw-r--r--youtube_dl/extractor/kuwo.py4
-rw-r--r--youtube_dl/extractor/spankbang.py11
-rw-r--r--youtube_dl/extractor/srgssr.py5
-rw-r--r--youtube_dl/extractor/vidme.py71
-rw-r--r--youtube_dl/extractor/youtube.py5
-rw-r--r--youtube_dl/postprocessor/ffmpeg.py4
-rw-r--r--youtube_dl/utils.py29
-rw-r--r--youtube_dl/version.py2
12 files changed, 186 insertions, 41 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index e61a88de7..2fbc7f812 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -90,7 +90,10 @@ from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE
from .canvas import CanvasIE
from .cbs import CBSIE
-from .cbsnews import CBSNewsIE
+from .cbsnews import (
+ CBSNewsIE,
+ CBSNewsLiveVideoIE,
+)
from .cbssports import CBSSportsIE
from .ccc import CCCIE
from .ceskatelevize import CeskaTelevizeIE
@@ -819,7 +822,11 @@ from .videomore import (
)
from .videopremium import VideoPremiumIE
from .videott import VideoTtIE
-from .vidme import VidmeIE
+from .vidme import (
+ VidmeIE,
+ VidmeUserIE,
+ VidmeUserLikesIE,
+)
from .vidzi import VidziIE
from .vier import VierIE, VierVideosIE
from .viewster import ViewsterIE
diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py
index b9e07f0ef..6ed855a57 100644
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -13,6 +13,7 @@ from ..utils import (
unified_strdate,
get_element_by_attribute,
int_or_none,
+ NO_DEFAULT,
qualities,
)
@@ -93,9 +94,18 @@ class ArteTVPlus7IE(InfoExtractor):
json_url = self._html_search_regex(
patterns, webpage, 'json vp url', default=None)
if not json_url:
- iframe_url = self._html_search_regex(
- r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1',
- webpage, 'iframe url', group='url')
+ def find_iframe_url(webpage, default=NO_DEFAULT):
+ return self._html_search_regex(
+ r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1',
+ webpage, 'iframe url', group='url', default=default)
+
+ iframe_url = find_iframe_url(webpage, None)
+ if not iframe_url:
+ embed_url = self._html_search_regex(
+ r'arte_vp_url_oembed=\'([^\']+?)\'', webpage, 'embed url')
+ player = self._download_json(
+ embed_url, video_id, 'Downloading player page')
+ iframe_url = find_iframe_url(player['html'])
json_url = compat_parse_qs(
compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0]
return self._extract_from_json_url(json_url, video_id, lang)
diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py
index cabf7e73b..8f864699f 100644
--- a/youtube_dl/extractor/cbsnews.py
+++ b/youtube_dl/extractor/cbsnews.py
@@ -1,15 +1,14 @@
# encoding: utf-8
from __future__ import unicode_literals
-import re
-import json
-
+from .common import InfoExtractor
from .theplatform import ThePlatformIE
+from ..utils import parse_duration
class CBSNewsIE(ThePlatformIE):
IE_DESC = 'CBS News'
- _VALID_URL = r'http://(?:www\.)?cbsnews\.com/(?:[^/]+/)+(?P<id>[\da-z_-]+)'
+ _VALID_URL = r'http://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)'
_TESTS = [
{
@@ -48,14 +47,13 @@ class CBSNewsIE(ThePlatformIE):
]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- video_info = json.loads(self._html_search_regex(
+ video_info = self._parse_json(self._html_search_regex(
r'(?:<ul class="media-list items" id="media-related-items"><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'',
- webpage, 'video JSON info'))
+ webpage, 'video JSON info'), video_id)
item = video_info['item'] if 'item' in video_info else video_info
title = item.get('articleTitle') or item.get('hed')
@@ -88,3 +86,41 @@ class CBSNewsIE(ThePlatformIE):
'formats': formats,
'subtitles': subtitles,
}
+
+
+class CBSNewsLiveVideoIE(InfoExtractor):
+ IE_DESC = 'CBS News Live Videos'
+ _VALID_URL = r'http://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)'
+
+ _TEST = {
+ 'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/',
+ 'info_dict': {
+ 'id': 'clinton-sanders-prepare-to-face-off-in-nh',
+ 'ext': 'flv',
+ 'title': 'Clinton, Sanders Prepare To Face Off In NH',
+ 'duration': 334,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ video_info = self._parse_json(self._html_search_regex(
+ r'data-story-obj=\'({.+?})\'', webpage, 'video JSON info'), video_id)['story']
+
+ hdcore_sign = 'hdcore=3.3.1'
+ f4m_formats = self._extract_f4m_formats(video_info['url'] + '&' + hdcore_sign, video_id)
+ if f4m_formats:
+ for entry in f4m_formats:
+ # URLs without the extra param induce an 404 error
+ entry.update({'extra_param_to_segment_url': hdcore_sign})
+
+ return {
+ 'id': video_id,
+ 'title': video_info['headline'],
+ 'thumbnail': video_info.get('thumbnail_url_hd') or video_info.get('thumbnail_url_sd'),
+ 'duration': parse_duration(video_info.get('segmentDur')),
+ 'formats': f4m_formats,
+ }
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index b18e734c4..c02fe201c 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -1229,19 +1229,24 @@ class GenericIE(InfoExtractor):
# Check for direct link to a video
content_type = head_response.headers.get('Content-Type', '')
- m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
+ m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>.+)$', content_type)
if m:
upload_date = unified_strdate(
head_response.headers.get('Last-Modified'))
+ formats = []
+ if m.group('format_id').endswith('mpegurl'):
+ formats = self._extract_m3u8_formats(url, video_id, 'mp4')
+ else:
+ formats = [{
+ 'format_id': m.group('format_id'),
+ 'url': url,
+ 'vcodec': 'none' if m.group('type') == 'audio' else None
+ }]
return {
'id': video_id,
'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
'direct': True,
- 'formats': [{
- 'format_id': m.group('format_id'),
- 'url': url,
- 'vcodec': 'none' if m.group('type') == 'audio' else None
- }],
+ 'formats': formats,
'upload_date': upload_date,
}
diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py
index 0c8ed5d07..f641edef8 100644
--- a/youtube_dl/extractor/kuwo.py
+++ b/youtube_dl/extractor/kuwo.py
@@ -31,6 +31,10 @@ class KuwoBaseIE(InfoExtractor):
(file_format['ext'], file_format.get('br', ''), song_id),
song_id, note='Download %s url info' % file_format['format'],
)
+
+ if song_url == 'IPDeny':
+ raise ExtractorError('This song is blocked in this region', expected=True)
+
if song_url.startswith('http://') or song_url.startswith('https://'):
formats.append({
'url': song_url,
diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py
index 3cfa671ed..50433d0f6 100644
--- a/youtube_dl/extractor/spankbang.py
+++ b/youtube_dl/extractor/spankbang.py
@@ -7,7 +7,7 @@ from .common import InfoExtractor
class SpankBangIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www|[a-z]{2})\.)?spankbang\.com/(?P<id>[\da-z]+)/video'
- _TEST = {
+ _TESTS = [{
'url': 'http://spankbang.com/3vvn/video/fantasy+solo',
'md5': '1cc433e1d6aa14bc376535b8679302f7',
'info_dict': {
@@ -19,7 +19,11 @@ class SpankBangIE(InfoExtractor):
'uploader': 'silly2587',
'age_limit': 18,
}
- }
+ }, {
+ # 480p only
+ 'url': 'http://spankbang.com/1vt0/video/solvane+gangbang',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -34,7 +38,8 @@ class SpankBangIE(InfoExtractor):
'ext': 'mp4',
'format_id': '%sp' % height,
'height': int(height),
- } for height in re.findall(r'<(?:span|li)[^>]+q_(\d+)p', webpage)]
+ } for height in re.findall(r'<(?:span|li|p)[^>]+[qb]_(\d+)p', webpage)]
+ self._check_formats(formats, video_id)
self._sort_formats(formats)
title = self._html_search_regex(
diff --git a/youtube_dl/extractor/srgssr.py b/youtube_dl/extractor/srgssr.py
index 4707029ca..246970c4d 100644
--- a/youtube_dl/extractor/srgssr.py
+++ b/youtube_dl/extractor/srgssr.py
@@ -70,14 +70,11 @@ class SRGSSRIE(InfoExtractor):
asset_url, media_id, 'mp4', 'm3u8_native',
m3u8_id=format_id, fatal=False))
else:
- ext = None
- if protocol == 'RTMP':
- ext = self._search_regex(r'([a-z0-9]+):[^/]+', asset_url, 'ext')
formats.append({
'format_id': format_id,
'url': asset_url,
'preference': preference(quality),
- 'ext': ext,
+ 'ext': 'flv' if protocol == 'RTMP' else None,
})
self._sort_formats(formats)
diff --git a/youtube_dl/extractor/vidme.py b/youtube_dl/extractor/vidme.py
index 3d63ed4f0..b1156d531 100644
--- a/youtube_dl/extractor/vidme.py
+++ b/youtube_dl/extractor/vidme.py
@@ -1,5 +1,7 @@
from __future__ import unicode_literals
+import itertools
+
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..utils import (
@@ -11,7 +13,8 @@ from ..utils import (
class VidmeIE(InfoExtractor):
- _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]+)'
+ IE_NAME = 'vidme'
+ _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{,5})(?:[^\da-zA-Z]|$)'
_TESTS = [{
'url': 'https://vid.me/QNB',
'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
@@ -202,3 +205,69 @@ class VidmeIE(InfoExtractor):
'comment_count': comment_count,
'formats': formats,
}
+
+
+class VidmeListBaseIE(InfoExtractor):
+ # Max possible limit according to https://docs.vid.me/#api-Videos-List
+ _LIMIT = 100
+
+ def _entries(self, user_id, user_name):
+ for page_num in itertools.count(1):
+ page = self._download_json(
+ 'https://api.vid.me/videos/%s?user=%s&limit=%d&offset=%d'
+ % (self._API_ITEM, user_id, self._LIMIT, (page_num - 1) * self._LIMIT),
+ user_name, 'Downloading user %s page %d' % (self._API_ITEM, page_num))
+
+ videos = page.get('videos', [])
+ if not videos:
+ break
+
+ for video in videos:
+ video_url = video.get('full_url') or video.get('embed_url')
+ if video_url:
+ yield self.url_result(video_url, VidmeIE.ie_key())
+
+ total = int_or_none(page.get('page', {}).get('total'))
+ if total and self._LIMIT * page_num >= total:
+ break
+
+ def _real_extract(self, url):
+ user_name = self._match_id(url)
+
+ user_id = self._download_json(
+ 'https://api.vid.me/userByUsername?username=%s' % user_name,
+ user_name)['user']['user_id']
+
+ return self.playlist_result(
+ self._entries(user_id, user_name), user_id,
+ '%s - %s' % (user_name, self._TITLE))
+
+
+class VidmeUserIE(VidmeListBaseIE):
+ IE_NAME = 'vidme:user'
+ _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{6,})(?!/likes)(?:[^\da-zA-Z]|$)'
+ _API_ITEM = 'list'
+ _TITLE = 'Videos'
+ _TEST = {
+ 'url': 'https://vid.me/EFARCHIVE',
+ 'info_dict': {
+ 'id': '3834632',
+ 'title': 'EFARCHIVE - %s' % _TITLE,
+ },
+ 'playlist_mincount': 238,
+ }
+
+
+class VidmeUserLikesIE(VidmeListBaseIE):
+ IE_NAME = 'vidme:user:likes'
+ _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]{6,})/likes'
+ _API_ITEM = 'likes'
+ _TITLE = 'Likes'
+ _TEST = {
+ 'url': 'https://vid.me/ErinAlexis/likes',
+ 'info_dict': {
+ 'id': '6483530',
+ 'title': 'ErinAlexis - %s' % _TITLE,
+ },
+ 'playlist_mincount': 415,
+ }
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 828f5d1f4..63abe5477 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -369,6 +369,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# RTMP (unnamed)
'_rtmp': {'protocol': 'rtmp'},
}
+ _SUBTITLE_FORMATS = ('ttml', 'vtt')
IE_NAME = 'youtube'
_TESTS = [
@@ -918,7 +919,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if lang in sub_lang_list:
continue
sub_formats = []
- for ext in ['sbv', 'vtt', 'srt']:
+ for ext in self._SUBTITLE_FORMATS:
params = compat_urllib_parse.urlencode({
'lang': lang,
'v': video_id,
@@ -988,7 +989,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
for lang_node in caption_list.findall('target'):
sub_lang = lang_node.attrib['lang_code']
sub_formats = []
- for ext in ['sbv', 'vtt', 'srt']:
+ for ext in self._SUBTITLE_FORMATS:
params = compat_urllib_parse.urlencode({
'lang': original_lang,
'tlang': sub_lang,
diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py
index 16a64802a..22d7ac65a 100644
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -391,6 +391,10 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
for (name, value) in metadata.items():
options.extend(['-metadata', '%s=%s' % (name, value)])
+ # https://github.com/rg3/youtube-dl/issues/8350
+ if info.get('protocol') == 'm3u8_native' or info.get('protocol') == 'm3u8' and self._downloader.params.get('hls_prefer_native', False):
+ options.extend(['-bsf:a', 'aac_adtstoasc'])
+
self._downloader.to_screen('[ffmpeg] Adding metadata to \'%s\'' % filename)
self.run_ffmpeg(filename, temp_filename, options)
os.remove(encodeFilename(filename))
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index c63b61598..4262ad6ac 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -2017,20 +2017,27 @@ def dfxp2srt(dfxp_data):
'ttaf1': 'http://www.w3.org/2006/10/ttaf1',
})
- def parse_node(node):
- str_or_empty = functools.partial(str_or_none, default='')
+ class TTMLPElementParser(object):
+ out = ''
- out = str_or_empty(node.text)
+ def start(self, tag, attrib):
+ if tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'):
+ self.out += '\n'
- for child in node:
- if child.tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'):
- out += '\n' + str_or_empty(child.tail)
- elif child.tag in (_x('ttml:span'), _x('ttaf1:span'), 'span'):
- out += str_or_empty(parse_node(child))
- else:
- out += str_or_empty(xml.etree.ElementTree.tostring(child))
+ def end(self, tag):
+ pass
- return out
+ def data(self, data):
+ self.out += data
+
+ def close(self):
+ return self.out.strip()
+
+ def parse_node(node):
+ target = TTMLPElementParser()
+ parser = xml.etree.ElementTree.XMLParser(target=target)
+ parser.feed(xml.etree.ElementTree.tostring(node))
+ return parser.close()
dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8'))
out = []
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 6da42c5a5..3fec14ab1 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
-__version__ = '2016.02.01'
+__version__ = '2016.02.05.1'