aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--youtube_dl/compat.py3
-rw-r--r--youtube_dl/extractor/bilibili.py11
-rw-r--r--youtube_dl/extractor/bliptv.py1
-rw-r--r--youtube_dl/extractor/common.py4
-rw-r--r--youtube_dl/extractor/francetv.py47
-rw-r--r--youtube_dl/extractor/generic.py27
-rw-r--r--youtube_dl/extractor/rtlnl.py25
-rw-r--r--youtube_dl/extractor/vk.py45
8 files changed, 132 insertions, 31 deletions
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index db0da5828..0c57c7aeb 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -79,7 +79,8 @@ try:
from urllib.parse import unquote as compat_urllib_parse_unquote
from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
except ImportError: # Python 2
- _asciire = re.compile('([\x00-\x7f]+)') if sys.version_info < (2, 7) else compat_urllib_parse._asciire
+ _asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
+ else re.compile('([\x00-\x7f]+)'))
# HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
# implementations from cpython 3.4.3's stdlib. Python 2's version
diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py
index bf60450c2..ecc17ebeb 100644
--- a/youtube_dl/extractor/bilibili.py
+++ b/youtube_dl/extractor/bilibili.py
@@ -41,8 +41,15 @@ class BiliBiliIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- if self._search_regex(r'(此视频不存在或被删除)', webpage, 'error message', default=None):
- raise ExtractorError('The video does not exist or was deleted', expected=True)
+ if '(此视频不存在或被删除)' in webpage:
+ raise ExtractorError(
+ 'The video does not exist or was deleted', expected=True)
+
+ if '>你没有权限浏览! 由于版权相关问题 我们不对您所在的地区提供服务<' in webpage:
+ raise ExtractorError(
+ 'The video is not available in your region due to copyright reasons',
+ expected=True)
+
video_code = self._search_regex(
r'(?s)<div itemprop="video".*?>(.*?)</div>', webpage, 'video code')
diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py
index a69ee482b..c3296283d 100644
--- a/youtube_dl/extractor/bliptv.py
+++ b/youtube_dl/extractor/bliptv.py
@@ -5,7 +5,6 @@ import re
from .common import InfoExtractor
from ..compat import (
- compat_str,
compat_urllib_request,
compat_urlparse,
)
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 5a2d0d995..b9014fc23 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -996,7 +996,7 @@ class InfoExtractor(object):
def _parse_smil_video(self, video, video_id, base, rtmp_count):
src = video.get('src')
if not src:
- return ([], rtmp_count)
+ return [], rtmp_count
bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
width = int_or_none(video.get('width'))
height = int_or_none(video.get('height'))
@@ -1009,7 +1009,7 @@ class InfoExtractor(object):
proto = 'http'
ext = video.get('ext')
if proto == 'm3u8':
- return (self._extract_m3u8_formats(src, video_id, ext), rtmp_count)
+ return self._extract_m3u8_formats(src, video_id, ext), rtmp_count
elif proto == 'rtmp':
rtmp_count += 1
streamer = video.get('streamer') or base
diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py
index b2c984bf2..d4f98ca16 100644
--- a/youtube_dl/extractor/francetv.py
+++ b/youtube_dl/extractor/francetv.py
@@ -87,7 +87,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
'title': info['titre'],
'description': clean_html(info['synopsis']),
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']),
- 'duration': float_or_none(info.get('real_duration'), 1000) or parse_duration(info['duree']),
+ 'duration': int_or_none(info.get('real_duration')) or parse_duration(info['duree']),
'timestamp': int_or_none(info['diffusion']['timestamp']),
'formats': formats,
}
@@ -160,11 +160,19 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
class FranceTVIE(FranceTVBaseInfoExtractor):
IE_NAME = 'francetv'
IE_DESC = 'France 2, 3, 4, 5 and Ô'
- _VALID_URL = r'''(?x)https?://www\.france[2345o]\.fr/
- (?:
- emissions/.*?/(videos|emissions)/(?P<id>[^/?]+)
- | (emissions?|jt)/(?P<key>[^/?]+)
- )'''
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:www\.)?france[2345o]\.fr/
+ (?:
+ emissions/[^/]+/(?:videos|diffusions)?|
+ videos
+ )
+ /|
+ embed\.francetv\.fr/\?ue=
+ )
+ (?P<id>[^/?]+)
+ '''
_TESTS = [
# france2
@@ -232,13 +240,34 @@ class FranceTVIE(FranceTVBaseInfoExtractor):
'timestamp': 1410822000,
},
},
+ {
+ # francetv embed
+ 'url': 'http://embed.francetv.fr/?ue=8d7d3da1e3047c42ade5a5d7dfd3fc87',
+ 'info_dict': {
+ 'id': 'EV_30231',
+ 'ext': 'flv',
+ 'title': 'Alcaline, le concert avec Calogero',
+ 'description': 'md5:',
+ 'upload_date': '20150226',
+ 'timestamp': 1424989860,
+ 'duration': 5400,
+ },
+ },
+ {
+ 'url': 'http://www.france4.fr/emission/highlander/diffusion-du-17-07-2015-04h05',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://www.franceo.fr/videos/125377617',
+ 'only_matching': True,
+ }
]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- webpage = self._download_webpage(url, mobj.group('key') or mobj.group('id'))
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
video_id, catalogue = self._html_search_regex(
- r'href="http://videos\.francetv\.fr/video/([^@]+@[^"]+)"',
+ r'href="http://videos?\.francetv\.fr/video/([^@]+@[^"]+)"',
webpage, 'video ID').split('@')
return self._extract_video(video_id, catalogue)
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index a62287e50..dc24a8a8b 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -407,6 +407,26 @@ class GenericIE(InfoExtractor):
'skip_download': 'Requires rtmpdump'
}
},
+ # francetv embed
+ {
+ 'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
+ 'info_dict': {
+ 'id': 'EV_30231',
+ 'ext': 'mp4',
+ 'title': 'Alcaline, le concert avec Calogero',
+ 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
+ 'upload_date': '20150226',
+ 'timestamp': 1424989860,
+ 'duration': 5400,
+ },
+ 'params': {
+ # m3u8 downloads
+ 'skip_download': True,
+ },
+ 'expected_warnings': [
+ 'Forbidden'
+ ]
+ },
# Condé Nast embed
{
'url': 'http://www.wired.com/2014/04/honda-asimo/',
@@ -1431,6 +1451,13 @@ class GenericIE(InfoExtractor):
if mobj is not None:
return self.url_result(mobj.group('url'), 'ArteTVEmbed')
+ # Look for embedded francetv player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
+ webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+
# Look for embedded smotri.com player
smotri_url = SmotriIE._extract_url(webpage)
if smotri_url:
diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py
index a4d3d73ff..e0c530d64 100644
--- a/youtube_dl/extractor/rtlnl.py
+++ b/youtube_dl/extractor/rtlnl.py
@@ -44,6 +44,21 @@ class RtlNlIE(InfoExtractor):
'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.',
}
}, {
+ # empty synopsis and missing episodes (see https://github.com/rg3/youtube-dl/issues/6275)
+ 'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a/autoplay=false',
+ 'info_dict': {
+ 'id': 'f536aac0-1dc3-4314-920e-3bd1c5b3811a',
+ 'ext': 'mp4',
+ 'title': 'RTL Nieuws - Meer beelden van overval juwelier',
+ 'thumbnail': 're:^https?://screenshots\.rtl\.nl/system/thumb/sz=[0-9]+x[0-9]+/uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a$',
+ 'timestamp': 1437233400,
+ 'upload_date': '20150718',
+ 'duration': 30.474,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
# encrypted m3u8 streams, georestricted
'url': 'http://www.rtlxl.nl/#!/afl-2-257632/52a74543-c504-4cde-8aa8-ec66fe8d68a7',
'only_matching': True,
@@ -59,9 +74,11 @@ class RtlNlIE(InfoExtractor):
uuid)
material = info['material'][0]
- progname = info['abstracts'][0]['name']
- subtitle = material['title'] or info['episodes'][0]['name']
- description = material.get('synopsis') or info['episodes'][0]['synopsis']
+ title = info['abstracts'][0]['name']
+ subtitle = material.get('title')
+ if subtitle:
+ title += ' - %s' % subtitle
+ description = material.get('synopsis')
meta = info.get('meta', {})
@@ -107,7 +124,7 @@ class RtlNlIE(InfoExtractor):
return {
'id': uuid,
- 'title': '%s - %s' % (progname, subtitle),
+ 'title': title,
'formats': formats,
'timestamp': material['original_date'],
'description': description,
diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py
index 8f677cae3..c30c5a8e5 100644
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@@ -20,7 +20,8 @@ from ..utils import (
class VKIE(InfoExtractor):
- IE_NAME = 'vk.com'
+ IE_NAME = 'vk'
+ IE_DESC = 'VK'
_VALID_URL = r'''(?x)
https?://
(?:
@@ -154,6 +155,11 @@ class VKIE(InfoExtractor):
'only_matching': True,
},
{
+ # age restricted video, requires vk account credentials
+ 'url': 'https://vk.com/video205387401_164765225',
+ 'only_matching': True,
+ },
+ {
# vk wrapper
'url': 'http://www.biqle.ru/watch/847655_160197695',
'only_matching': True,
@@ -204,6 +210,12 @@ class VKIE(InfoExtractor):
info_page = self._download_webpage(info_url, video_id)
+ error_message = self._html_search_regex(
+ r'(?s)<!><div[^>]+class="video_layer_message"[^>]*>(.+?)</div>',
+ info_page, 'error message', default=None)
+ if error_message:
+ raise ExtractorError(error_message, expected=True)
+
if re.search(r'<!>/login\.php\?.*\bact=security_check', info_page):
raise ExtractorError(
'You are trying to log in from an unusual location. You should confirm ownership at vk.com to log in with this IP.',
@@ -289,25 +301,34 @@ class VKIE(InfoExtractor):
class VKUserVideosIE(InfoExtractor):
- IE_NAME = 'vk.com:user-videos'
- IE_DESC = 'vk.com:All of a user\'s videos'
- _VALID_URL = r'https?://vk\.com/videos(?P<id>[0-9]+)(?:m\?.*)?'
+ IE_NAME = 'vk:uservideos'
+ IE_DESC = "VK - User's Videos"
+ _VALID_URL = r'https?://vk\.com/videos(?P<id>-?[0-9]+)$'
_TEMPLATE_URL = 'https://vk.com/videos'
- _TEST = {
+ _TESTS = [{
'url': 'http://vk.com/videos205387401',
'info_dict': {
'id': '205387401',
+ 'title': "Tom Cruise's Videos",
},
'playlist_mincount': 4,
- }
+ }, {
+ 'url': 'http://vk.com/videos-77521',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
page_id = self._match_id(url)
- page = self._download_webpage(url, page_id)
- video_ids = orderedSet(
- m.group(1) for m in re.finditer(r'href="/video([0-9_]+)"', page))
- url_entries = [
+
+ webpage = self._download_webpage(url, page_id)
+
+ entries = [
self.url_result(
'http://vk.com/video' + video_id, 'VK', video_id=video_id)
- for video_id in video_ids]
- return self.playlist_result(url_entries, page_id)
+ for video_id in orderedSet(re.findall(r'href="/video(-?[0-9_]+)"', webpage))]
+
+ title = unescapeHTML(self._search_regex(
+ r'<title>\s*([^<]+?)\s+\|\s+\d+\s+videos',
+ webpage, 'title', default=page_id))
+
+ return self.playlist_result(entries, page_id, title)