aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--AUTHORS1
-rw-r--r--docs/supportedsites.md11
-rw-r--r--youtube_dl/extractor/__init__.py1
-rw-r--r--youtube_dl/extractor/beeg.py4
-rw-r--r--youtube_dl/extractor/canvas.py65
-rw-r--r--youtube_dl/extractor/ntvde.py33
-rw-r--r--youtube_dl/extractor/orf.py16
-rw-r--r--youtube_dl/extractor/prosiebensat1.py30
-rw-r--r--youtube_dl/extractor/unistra.py2
-rw-r--r--youtube_dl/extractor/vodlocker.py26
-rw-r--r--youtube_dl/version.py2
11 files changed, 157 insertions, 34 deletions
diff --git a/AUTHORS b/AUTHORS
index 3d8bebbb0..f4238e1d3 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -152,3 +152,4 @@ Evan Lu
flatgreen
Brian Foley
Vignesh Venkat
+Tom Gijselinck
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index 8d0c7b97a..eb160bd2f 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -65,6 +65,7 @@
- **Beeg**
- **BehindKink**
- **Bet**
+ - **Bigflix**
- **Bild**: Bild.de
- **BiliBili**
- **BleacherReport**
@@ -251,7 +252,7 @@
- **Instagram**
- **instagram:user**: Instagram user profile
- **InternetVideoArchive**
- - **IPrima**
+ - **IPrima** (Currently broken)
- **iqiyi**: 爱奇艺
- **Ir90Tv**
- **ivi**: ivi.ru
@@ -602,7 +603,9 @@
- **TruTube**
- **Tube8**
- **TubiTv**
- - **Tudou**
+ - **tudou**
+ - **tudou:album**
+ - **tudou:playlist**
- **Tumblr**
- **tunein:clip**
- **tunein:program**
@@ -655,12 +658,12 @@
- **video.mit.edu**
- **VideoDetective**
- **videofy.me**
- - **VideoMega**
+ - **VideoMega** (Currently broken)
- **videomore**
- **videomore:season**
- **videomore:video**
- **VideoPremium**
- - **VideoTt**: video.tt - Your True Tube
+ - **VideoTt**: video.tt - Your True Tube (Currently broken)
- **videoweed**: VideoWeed
- **Vidme**
- **Vidzi**
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index e4ae9332d..5621c9eb0 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -86,6 +86,7 @@ from .camdemy import (
)
from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE
+from .canvas import CanvasIE
from .cbs import CBSIE
from .cbsnews import CBSNewsIE
from .cbssports import CBSSportsIE
diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py
index c8d921daf..34c2a756f 100644
--- a/youtube_dl/extractor/beeg.py
+++ b/youtube_dl/extractor/beeg.py
@@ -34,7 +34,7 @@ class BeegIE(InfoExtractor):
video_id = self._match_id(url)
video = self._download_json(
- 'http://beeg.com/api/v5/video/%s' % video_id, video_id)
+ 'https://api.beeg.com/api/v5/video/%s' % video_id, video_id)
def split(o, e):
def cut(s, x):
@@ -60,7 +60,7 @@ class BeegIE(InfoExtractor):
def decrypt_url(encrypted_url):
encrypted_url = self._proto_relative_url(
- encrypted_url.replace('{DATA_MARKERS}', ''), 'http:')
+ encrypted_url.replace('{DATA_MARKERS}', ''), 'https:')
key = self._search_regex(
r'/key=(.*?)%2Cend=', encrypted_url, 'key', default=None)
if not key:
diff --git a/youtube_dl/extractor/canvas.py b/youtube_dl/extractor/canvas.py
new file mode 100644
index 000000000..ee19ff836
--- /dev/null
+++ b/youtube_dl/extractor/canvas.py
@@ -0,0 +1,65 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import float_or_none
+
+
+class CanvasIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?canvas\.be/video/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week',
+ 'md5': 'ea838375a547ac787d4064d8c7860a6c',
+ 'info_dict': {
+ 'id': 'mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
+ 'display_id': 'de-afspraak-veilt-voor-de-warmste-week',
+ 'ext': 'mp4',
+ 'title': 'De afspraak veilt voor de Warmste Week',
+ 'description': 'md5:24cb860c320dc2be7358e0e5aa317ba6',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'duration': 49.02,
+ }
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ title = self._search_regex(
+ r'<h1[^>]+class="video__body__header__title"[^>]*>(.+?)</h1>',
+ webpage, 'title', default=None) or self._og_search_title(webpage)
+
+ video_id = self._html_search_regex(
+ r'data-video=(["\'])(?P<id>.+?)\1', webpage, 'video id', group='id')
+
+ data = self._download_json(
+ 'https://mediazone.vrt.be/api/v1/canvas/assets/%s' % video_id, display_id)
+
+ formats = []
+ for target in data['targetUrls']:
+ format_url, format_type = target.get('url'), target.get('type')
+ if not format_url or not format_type:
+ continue
+ if format_type == 'HLS':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, display_id, entry_protocol='m3u8_native',
+ ext='mp4', preference=0, fatal=False, m3u8_id=format_type))
+ elif format_type == 'HDS':
+ formats.extend(self._extract_f4m_formats(
+ format_url, display_id, f4m_id=format_type, fatal=False))
+ else:
+ formats.append({
+ 'format_id': format_type,
+ 'url': format_url,
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': self._og_search_description(webpage),
+ 'formats': formats,
+ 'duration': float_or_none(data.get('duration'), 1000),
+ 'thumbnail': data.get('posterImageUrl'),
+ }
diff --git a/youtube_dl/extractor/ntvde.py b/youtube_dl/extractor/ntvde.py
index d2cfe0961..a83e85cb8 100644
--- a/youtube_dl/extractor/ntvde.py
+++ b/youtube_dl/extractor/ntvde.py
@@ -2,6 +2,7 @@
from __future__ import unicode_literals
from .common import InfoExtractor
+from ..compat import compat_urlparse
from ..utils import (
int_or_none,
js_to_json,
@@ -34,7 +35,7 @@ class NTVDeIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
info = self._parse_json(self._search_regex(
- r'(?s)ntv.pageInfo.article =\s(\{.*?\});', webpage, 'info'),
+ r'(?s)ntv\.pageInfo\.article\s*=\s*(\{.*?\});', webpage, 'info'),
video_id, transform_source=js_to_json)
timestamp = int_or_none(info.get('publishedDateAsUnixTimeStamp'))
vdata = self._parse_json(self._search_regex(
@@ -42,18 +43,24 @@ class NTVDeIE(InfoExtractor):
webpage, 'player data'),
video_id, transform_source=js_to_json)
duration = parse_duration(vdata.get('duration'))
- formats = [{
- 'format_id': 'flash',
- 'url': 'rtmp://fms.n-tv.de/' + vdata['video'],
- }, {
- 'format_id': 'mobile',
- 'url': 'http://video.n-tv.de' + vdata['videoMp4'],
- 'tbr': 400, # estimation
- }]
- m3u8_url = 'http://video.n-tv.de' + vdata['videoM3u8']
- formats.extend(self._extract_m3u8_formats(
- m3u8_url, video_id, ext='mp4',
- entry_protocol='m3u8_native', preference=0))
+
+ formats = []
+ if vdata.get('video'):
+ formats.append({
+ 'format_id': 'flash',
+ 'url': 'rtmp://fms.n-tv.de/%s' % vdata['video'],
+ })
+ if vdata.get('videoMp4'):
+ formats.append({
+ 'format_id': 'mobile',
+ 'url': compat_urlparse.urljoin('http://video.n-tv.de', vdata['videoMp4']),
+ 'tbr': 400, # estimation
+ })
+ if vdata.get('videoM3u8'):
+ m3u8_url = compat_urlparse.urljoin('http://video.n-tv.de', vdata['videoM3u8'])
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
+ preference=0, m3u8_id='hls', fatal=False))
self._sort_formats(formats)
return {
diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py
index 2e6c9872b..c54775d54 100644
--- a/youtube_dl/extractor/orf.py
+++ b/youtube_dl/extractor/orf.py
@@ -170,7 +170,21 @@ class ORFOE1IE(InfoExtractor):
class ORFFM4IE(InfoExtractor):
IE_NAME = 'orf:fm4'
IE_DESC = 'radio FM4'
- _VALID_URL = r'http://fm4\.orf\.at/7tage/?#(?P<date>[0-9]+)/(?P<show>\w+)'
+ _VALID_URL = r'http://fm4\.orf\.at/(?:7tage/?#|player/)(?P<date>[0-9]+)/(?P<show>\w+)'
+
+ _TEST = {
+ 'url': 'http://fm4.orf.at/player/20160110/IS/',
+ 'md5': '01e736e8f1cef7e13246e880a59ad298',
+ 'info_dict': {
+ 'id': '2016-01-10_2100_tl_54_7DaysSun13_11244',
+ 'ext': 'mp3',
+ 'title': 'Im Sumpf',
+ 'description': 'md5:384c543f866c4e422a55f66a62d669cd',
+ 'duration': 7173,
+ 'timestamp': 1452456073,
+ 'upload_date': '20160110',
+ },
+ }
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py
index baa54a3af..670e6950f 100644
--- a/youtube_dl/extractor/prosiebensat1.py
+++ b/youtube_dl/extractor/prosiebensat1.py
@@ -20,7 +20,7 @@ from ..utils import (
class ProSiebenSat1IE(InfoExtractor):
IE_NAME = 'prosiebensat1'
IE_DESC = 'ProSiebenSat.1 Digital'
- _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany)\.(?:de|at|ch)|ran\.de|fem\.com)/(?P<id>.+)'
+ _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany|7tv)\.(?:de|at|ch)|ran\.de|fem\.com)/(?P<id>.+)'
_TESTS = [
{
@@ -32,7 +32,7 @@ class ProSiebenSat1IE(InfoExtractor):
'url': 'http://www.prosieben.de/tv/circus-halligalli/videos/218-staffel-2-episode-18-jahresrueckblick-ganze-folge',
'info_dict': {
'id': '2104602',
- 'ext': 'mp4',
+ 'ext': 'flv',
'title': 'Episode 18 - Staffel 2',
'description': 'md5:8733c81b702ea472e069bc48bb658fc1',
'upload_date': '20131231',
@@ -138,14 +138,13 @@ class ProSiebenSat1IE(InfoExtractor):
'url': 'http://www.the-voice-of-germany.de/video/31-andreas-kuemmert-rocket-man-clip',
'info_dict': {
'id': '2572814',
- 'ext': 'mp4',
+ 'ext': 'flv',
'title': 'Andreas Kümmert: Rocket Man',
'description': 'md5:6ddb02b0781c6adf778afea606652e38',
'upload_date': '20131017',
'duration': 469.88,
},
'params': {
- # rtmp download
'skip_download': True,
},
},
@@ -153,13 +152,12 @@ class ProSiebenSat1IE(InfoExtractor):
'url': 'http://www.fem.com/wellness/videos/wellness-video-clip-kurztripps-zum-valentinstag.html',
'info_dict': {
'id': '2156342',
- 'ext': 'mp4',
+ 'ext': 'flv',
'title': 'Kurztrips zum Valentinstag',
- 'description': 'Romantischer Kurztrip zum Valentinstag? Wir verraten, was sich hier wirklich lohnt.',
+ 'description': 'Romantischer Kurztrip zum Valentinstag? Nina Heinemann verrät, was sich hier wirklich lohnt.',
'duration': 307.24,
},
'params': {
- # rtmp download
'skip_download': True,
},
},
@@ -172,12 +170,26 @@ class ProSiebenSat1IE(InfoExtractor):
},
'playlist_count': 2,
},
+ {
+ 'url': 'http://www.7tv.de/circus-halligalli/615-best-of-circus-halligalli-ganze-folge',
+ 'info_dict': {
+ 'id': '4187506',
+ 'ext': 'flv',
+ 'title': 'Best of Circus HalliGalli',
+ 'description': 'md5:8849752efd90b9772c9db6fdf87fb9e9',
+ 'upload_date': '20151229',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
]
_CLIPID_REGEXES = [
r'"clip_id"\s*:\s+"(\d+)"',
r'clipid: "(\d+)"',
r'clip[iI]d=(\d+)',
+ r'clip[iI]d\s*=\s*["\'](\d+)',
r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)",
]
_TITLE_REGEXES = [
@@ -186,12 +198,16 @@ class ProSiebenSat1IE(InfoExtractor):
r'<!-- start video -->\s*<h1>(.+?)</h1>',
r'<h1 class="att-name">\s*(.+?)</h1>',
r'<header class="module_header">\s*<h2>([^<]+)</h2>\s*</header>',
+ r'<h2 class="video-title" itemprop="name">\s*(.+?)</h2>',
+ r'<div[^>]+id="veeseoTitle"[^>]*>(.+?)</div>',
]
_DESCRIPTION_REGEXES = [
r'<p itemprop="description">\s*(.+?)</p>',
r'<div class="videoDecription">\s*<p><strong>Beschreibung</strong>: (.+?)</p>',
r'<div class="g-plusone" data-size="medium"></div>\s*</div>\s*</header>\s*(.+?)\s*<footer>',
r'<p class="att-description">\s*(.+?)\s*</p>',
+ r'<p class="video-description" itemprop="description">\s*(.+?)</p>',
+ r'<div[^>]+id="veeseoDescription"[^>]*>(.+?)</div>',
]
_UPLOAD_DATE_REGEXES = [
r'<meta property="og:published_time" content="(.+?)">',
diff --git a/youtube_dl/extractor/unistra.py b/youtube_dl/extractor/unistra.py
index f70978299..594bee4f9 100644
--- a/youtube_dl/extractor/unistra.py
+++ b/youtube_dl/extractor/unistra.py
@@ -38,7 +38,7 @@ class UnistraIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
- files = set(re.findall(r'file\s*:\s*"([^"]+)"', webpage))
+ files = set(re.findall(r'file\s*:\s*"(/[^"]+)"', webpage))
quality = qualities(['SD', 'HD'])
formats = []
diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py
index 357594a11..a97995a6d 100644
--- a/youtube_dl/extractor/vodlocker.py
+++ b/youtube_dl/extractor/vodlocker.py
@@ -5,12 +5,13 @@ from .common import InfoExtractor
from ..compat import compat_urllib_parse
from ..utils import (
ExtractorError,
+ NO_DEFAULT,
sanitized_Request,
)
class VodlockerIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?vodlocker\.com/(?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:\..*?)?'
+ _VALID_URL = r'https?://(?:www\.)?vodlocker\.(?:com|city)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:\..*?)?'
_TESTS = [{
'url': 'http://vodlocker.com/e8wvyzz4sl42',
@@ -43,16 +44,31 @@ class VodlockerIE(InfoExtractor):
webpage = self._download_webpage(
req, video_id, 'Downloading video page')
+ def extract_file_url(html, default=NO_DEFAULT):
+ return self._search_regex(
+ r'file:\s*"(http[^\"]+)",', html, 'file url', default=default)
+
+ video_url = extract_file_url(webpage, default=None)
+
+ if not video_url:
+ embed_url = self._search_regex(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?vodlocker\.(?:com|city)/embed-.+?)\1',
+ webpage, 'embed url', group='url')
+ embed_webpage = self._download_webpage(
+ embed_url, video_id, 'Downloading embed webpage')
+ video_url = extract_file_url(embed_webpage)
+ thumbnail_webpage = embed_webpage
+ else:
+ thumbnail_webpage = webpage
+
title = self._search_regex(
r'id="file_title".*?>\s*(.*?)\s*<(?:br|span)', webpage, 'title')
thumbnail = self._search_regex(
- r'image:\s*"(http[^\"]+)",', webpage, 'thumbnail')
- url = self._search_regex(
- r'file:\s*"(http[^\"]+)",', webpage, 'file url')
+ r'image:\s*"(http[^\"]+)",', thumbnail_webpage, 'thumbnail', fatal=False)
formats = [{
'format_id': 'sd',
- 'url': url,
+ 'url': video_url,
}]
return {
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 7030903c0..4d433b667 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
-__version__ = '2016.01.09'
+__version__ = '2016.01.14'