aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl')
-rwxr-xr-xyoutube_dl/YoutubeDL.py2
-rw-r--r--youtube_dl/__init__.py14
-rw-r--r--youtube_dl/downloader/common.py1
-rw-r--r--youtube_dl/downloader/http.py8
-rw-r--r--youtube_dl/extractor/abc.py22
-rw-r--r--youtube_dl/extractor/adobepass.py2
-rw-r--r--youtube_dl/extractor/allocine.py85
-rw-r--r--youtube_dl/extractor/amcnetworks.py1
-rw-r--r--youtube_dl/extractor/arte.py18
-rw-r--r--youtube_dl/extractor/beatport.py (renamed from youtube_dl/extractor/beatportpro.py)10
-rw-r--r--youtube_dl/extractor/brightcove.py2
-rw-r--r--youtube_dl/extractor/byutv.py90
-rw-r--r--youtube_dl/extractor/canalplus.py66
-rw-r--r--youtube_dl/extractor/carambatv.py24
-rw-r--r--youtube_dl/extractor/cbsinteractive.py2
-rw-r--r--youtube_dl/extractor/cbsnews.py2
-rw-r--r--youtube_dl/extractor/ceskatelevize.py2
-rw-r--r--youtube_dl/extractor/chirbit.py13
-rw-r--r--youtube_dl/extractor/clipfish.py21
-rw-r--r--youtube_dl/extractor/clubic.py11
-rw-r--r--youtube_dl/extractor/cmt.py7
-rw-r--r--youtube_dl/extractor/comcarcoff.py2
-rw-r--r--youtube_dl/extractor/common.py32
-rw-r--r--youtube_dl/extractor/commonprotocols.py36
-rw-r--r--youtube_dl/extractor/criterion.py12
-rw-r--r--youtube_dl/extractor/crunchyroll.py3
-rw-r--r--youtube_dl/extractor/dailymotion.py6
-rw-r--r--youtube_dl/extractor/daum.py2
-rw-r--r--youtube_dl/extractor/dctp.py67
-rw-r--r--youtube_dl/extractor/dramafever.py2
-rw-r--r--youtube_dl/extractor/eitb.py2
-rw-r--r--youtube_dl/extractor/embedly.py2
-rw-r--r--youtube_dl/extractor/extractors.py24
-rw-r--r--youtube_dl/extractor/facebook.py2
-rw-r--r--youtube_dl/extractor/faz.py2
-rw-r--r--youtube_dl/extractor/firsttv.py2
-rw-r--r--youtube_dl/extractor/folketinget.py2
-rw-r--r--youtube_dl/extractor/footyroom.py32
-rw-r--r--youtube_dl/extractor/francetv.py2
-rw-r--r--youtube_dl/extractor/generic.py29
-rw-r--r--youtube_dl/extractor/goshgay.py2
-rw-r--r--youtube_dl/extractor/hark.py2
-rw-r--r--youtube_dl/extractor/hbo.py70
-rw-r--r--youtube_dl/extractor/helsinki.py2
-rw-r--r--youtube_dl/extractor/huajiao.py56
-rw-r--r--youtube_dl/extractor/ina.py2
-rw-r--r--youtube_dl/extractor/iprima.py3
-rw-r--r--youtube_dl/extractor/jpopsukitv.py2
-rw-r--r--youtube_dl/extractor/jwplatform.py12
-rw-r--r--youtube_dl/extractor/kickstarter.py2
-rw-r--r--youtube_dl/extractor/kontrtube.py2
-rw-r--r--youtube_dl/extractor/krasview.py2
-rw-r--r--youtube_dl/extractor/lego.py128
-rw-r--r--youtube_dl/extractor/lifenews.py2
-rw-r--r--youtube_dl/extractor/lynda.py72
-rw-r--r--youtube_dl/extractor/m6.py2
-rw-r--r--youtube_dl/extractor/mailru.py2
-rw-r--r--youtube_dl/extractor/moviezine.py2
-rw-r--r--youtube_dl/extractor/musicplayon.py2
-rw-r--r--youtube_dl/extractor/myspace.py2
-rw-r--r--youtube_dl/extractor/nationalgeographic.py37
-rw-r--r--youtube_dl/extractor/naver.py2
-rw-r--r--youtube_dl/extractor/newstube.py2
-rw-r--r--youtube_dl/extractor/nextmedia.py5
-rw-r--r--youtube_dl/extractor/nhl.py37
-rw-r--r--youtube_dl/extractor/niconico.py2
-rw-r--r--youtube_dl/extractor/noco.py2
-rw-r--r--youtube_dl/extractor/normalboots.py2
-rw-r--r--youtube_dl/extractor/nova.py2
-rw-r--r--youtube_dl/extractor/nowness.py2
-rw-r--r--youtube_dl/extractor/npo.py41
-rw-r--r--youtube_dl/extractor/nrk.py20
-rw-r--r--youtube_dl/extractor/ntvru.py2
-rw-r--r--youtube_dl/extractor/nuevo.py2
-rw-r--r--youtube_dl/extractor/nytimes.py82
-rw-r--r--youtube_dl/extractor/nzz.py36
-rw-r--r--youtube_dl/extractor/oktoberfesttv.py2
-rw-r--r--youtube_dl/extractor/orf.py56
-rw-r--r--youtube_dl/extractor/pandoratv.py2
-rw-r--r--youtube_dl/extractor/parliamentliveuk.py9
-rw-r--r--youtube_dl/extractor/patreon.py2
-rw-r--r--youtube_dl/extractor/periscope.py2
-rw-r--r--youtube_dl/extractor/pluralsight.py5
-rw-r--r--youtube_dl/extractor/porn91.py2
-rw-r--r--youtube_dl/extractor/pornoxo.py31
-rw-r--r--youtube_dl/extractor/prosiebensat1.py2
-rw-r--r--youtube_dl/extractor/puls4.py2
-rw-r--r--youtube_dl/extractor/radiobremen.py2
-rw-r--r--youtube_dl/extractor/reverbnation.py31
-rw-r--r--youtube_dl/extractor/rmcdecouverte.py2
-rw-r--r--youtube_dl/extractor/rtl2.py2
-rw-r--r--youtube_dl/extractor/rtve.py2
-rw-r--r--youtube_dl/extractor/rudo.py2
-rw-r--r--youtube_dl/extractor/ruhd.py2
-rw-r--r--youtube_dl/extractor/rutube.py2
-rw-r--r--youtube_dl/extractor/rutv.py2
-rw-r--r--youtube_dl/extractor/ruutu.py16
-rw-r--r--youtube_dl/extractor/safari.py14
-rw-r--r--youtube_dl/extractor/sapo.py2
-rw-r--r--youtube_dl/extractor/sbs.py2
-rw-r--r--youtube_dl/extractor/screencast.py2
-rw-r--r--youtube_dl/extractor/screenwavemedia.py2
-rw-r--r--youtube_dl/extractor/slutload.py8
-rw-r--r--youtube_dl/extractor/smotri.py2
-rw-r--r--youtube_dl/extractor/sohu.py2
-rw-r--r--youtube_dl/extractor/soundcloud.py2
-rw-r--r--youtube_dl/extractor/southpark.py2
-rw-r--r--youtube_dl/extractor/spiegel.py2
-rw-r--r--youtube_dl/extractor/srmediathek.py2
-rw-r--r--youtube_dl/extractor/streamable.py10
-rw-r--r--youtube_dl/extractor/streamcz.py2
-rw-r--r--youtube_dl/extractor/swrmediathek.py2
-rw-r--r--youtube_dl/extractor/sztvhu.py2
-rw-r--r--youtube_dl/extractor/tagesschau.py2
-rw-r--r--youtube_dl/extractor/tass.py2
-rw-r--r--youtube_dl/extractor/teachertube.py2
-rw-r--r--youtube_dl/extractor/teamcoco.py2
-rw-r--r--youtube_dl/extractor/techtalks.py9
-rw-r--r--youtube_dl/extractor/theintercept.py2
-rw-r--r--youtube_dl/extractor/theplatform.py2
-rw-r--r--youtube_dl/extractor/theweatherchannel.py79
-rw-r--r--youtube_dl/extractor/thisoldhouse.py32
-rw-r--r--youtube_dl/extractor/tlc.py2
-rw-r--r--youtube_dl/extractor/tonline.py59
-rw-r--r--youtube_dl/extractor/toypics.py2
-rw-r--r--youtube_dl/extractor/tumblr.py2
-rw-r--r--youtube_dl/extractor/tv2.py2
-rw-r--r--youtube_dl/extractor/tvigle.py2
-rw-r--r--youtube_dl/extractor/twitch.py1
-rw-r--r--youtube_dl/extractor/urplay.py32
-rw-r--r--youtube_dl/extractor/vbox7.py2
-rw-r--r--youtube_dl/extractor/vesti.py2
-rw-r--r--youtube_dl/extractor/videomore.py5
-rw-r--r--youtube_dl/extractor/vidzi.py19
-rw-r--r--youtube_dl/extractor/vier.py2
-rw-r--r--youtube_dl/extractor/vimeo.py47
-rw-r--r--youtube_dl/extractor/vk.py4
-rw-r--r--youtube_dl/extractor/vodlocker.py2
-rw-r--r--youtube_dl/extractor/wdr.py2
-rw-r--r--youtube_dl/extractor/wrzuta.py2
-rw-r--r--youtube_dl/extractor/wsj.py2
-rw-r--r--youtube_dl/extractor/xboxclips.py2
-rw-r--r--youtube_dl/extractor/xnxx.py2
-rw-r--r--youtube_dl/extractor/xuite.py2
-rw-r--r--youtube_dl/extractor/youtube.py34
-rw-r--r--youtube_dl/extractor/zingmp3.py2
-rw-r--r--youtube_dl/postprocessor/embedthumbnail.py2
-rw-r--r--youtube_dl/postprocessor/ffmpeg.py6
-rw-r--r--youtube_dl/utils.py38
-rw-r--r--youtube_dl/version.py2
150 files changed, 1432 insertions, 526 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 442aa663b..99825e343 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python
-# -*- coding: utf-8 -*-
+# coding: utf-8
from __future__ import absolute_import, unicode_literals
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 1cf3140a0..643393558 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python
-# -*- coding: utf-8 -*-
+# coding: utf-8
from __future__ import unicode_literals
@@ -266,8 +266,6 @@ def _real_main(argv=None):
postprocessors.append({
'key': 'FFmpegEmbedSubtitle',
})
- if opts.xattrs:
- postprocessors.append({'key': 'XAttrMetadata'})
if opts.embedthumbnail:
already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails
postprocessors.append({
@@ -276,6 +274,10 @@ def _real_main(argv=None):
})
if not already_have_thumbnail:
opts.writethumbnail = True
+ # XAttrMetadataPP should be run after post-processors that may change file
+ # contents
+ if opts.xattrs:
+ postprocessors.append({'key': 'XAttrMetadata'})
# Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
# So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
if opts.exec_cmd:
@@ -283,12 +285,6 @@ def _real_main(argv=None):
'key': 'ExecAfterDownload',
'exec_cmd': opts.exec_cmd,
})
- if opts.xattr_set_filesize:
- try:
- import xattr
- xattr # Confuse flake8
- except ImportError:
- parser.error('setting filesize xattr requested but python-xattr is not available')
external_downloader_args = None
if opts.external_downloader_args:
external_downloader_args = compat_shlex_split(opts.external_downloader_args)
diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py
index 8482cbd84..3dc144b4e 100644
--- a/youtube_dl/downloader/common.py
+++ b/youtube_dl/downloader/common.py
@@ -346,7 +346,6 @@ class FileDownloader(object):
min_sleep_interval = self.params.get('sleep_interval')
if min_sleep_interval:
max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
- print(min_sleep_interval, max_sleep_interval)
sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
self.to_screen('[download] Sleeping %s seconds...' % sleep_interval)
time.sleep(sleep_interval)
diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py
index f8b69d186..af405b950 100644
--- a/youtube_dl/downloader/http.py
+++ b/youtube_dl/downloader/http.py
@@ -13,6 +13,9 @@ from ..utils import (
encodeFilename,
sanitize_open,
sanitized_Request,
+ write_xattr,
+ XAttrMetadataError,
+ XAttrUnavailableError,
)
@@ -179,9 +182,8 @@ class HttpFD(FileDownloader):
if self.params.get('xattr_set_filesize', False) and data_len is not None:
try:
- import xattr
- xattr.setxattr(tmpfilename, 'user.ytdl.filesize', str(data_len))
- except(OSError, IOError, ImportError) as err:
+ write_xattr(tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8'))
+ except (XAttrUnavailableError, XAttrMetadataError) as err:
self.report_error('unable to set filesize xattr: %s' % str(err))
try:
diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py
index 465249bbf..0247cabf9 100644
--- a/youtube_dl/extractor/abc.py
+++ b/youtube_dl/extractor/abc.py
@@ -102,16 +102,16 @@ class ABCIViewIE(InfoExtractor):
# ABC iview programs are normally available for 14 days only.
_TESTS = [{
- 'url': 'http://iview.abc.net.au/programs/gardening-australia/FA1505V024S00',
- 'md5': '979d10b2939101f0d27a06b79edad536',
+ 'url': 'http://iview.abc.net.au/programs/diaries-of-a-broken-mind/ZX9735A001S00',
+ 'md5': 'cde42d728b3b7c2b32b1b94b4a548afc',
'info_dict': {
- 'id': 'FA1505V024S00',
+ 'id': 'ZX9735A001S00',
'ext': 'mp4',
- 'title': 'Series 27 Ep 24',
- 'description': 'md5:b28baeae7504d1148e1d2f0e3ed3c15d',
- 'upload_date': '20160820',
- 'uploader_id': 'abc1',
- 'timestamp': 1471719600,
+ 'title': 'Diaries Of A Broken Mind',
+ 'description': 'md5:7de3903874b7a1be279fe6b68718fc9e',
+ 'upload_date': '20161010',
+ 'uploader_id': 'abc2',
+ 'timestamp': 1476064920,
},
'skip': 'Video gone',
}]
@@ -121,7 +121,7 @@ class ABCIViewIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
video_params = self._parse_json(self._search_regex(
r'videoParams\s*=\s*({.+?});', webpage, 'video params'), video_id)
- title = video_params['title']
+ title = video_params.get('title') or video_params['seriesTitle']
stream = next(s for s in video_params['playlist'] if s.get('type') == 'program')
formats = self._extract_akamai_formats(stream['hds-unmetered'], video_id)
@@ -144,8 +144,8 @@ class ABCIViewIE(InfoExtractor):
'timestamp': parse_iso8601(video_params.get('pubDate'), ' '),
'series': video_params.get('seriesTitle'),
'series_id': video_params.get('seriesHouseNumber') or video_id[:7],
- 'episode_number': int_or_none(self._html_search_meta('episodeNumber', webpage)),
- 'episode': self._html_search_meta('episode_title', webpage),
+ 'episode_number': int_or_none(self._html_search_meta('episodeNumber', webpage, default=None)),
+ 'episode': self._html_search_meta('episode_title', webpage, default=None),
'uploader_id': video_params.get('channel'),
'formats': formats,
'subtitles': subtitles,
diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py
index 76ab69d8c..b6d215a55 100644
--- a/youtube_dl/extractor/adobepass.py
+++ b/youtube_dl/extractor/adobepass.py
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
from __future__ import unicode_literals
import re
diff --git a/youtube_dl/extractor/allocine.py b/youtube_dl/extractor/allocine.py
index 190bc2cc8..517b06def 100644
--- a/youtube_dl/extractor/allocine.py
+++ b/youtube_dl/extractor/allocine.py
@@ -1,29 +1,26 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
from __future__ import unicode_literals
-import re
-import json
-
from .common import InfoExtractor
-from ..compat import compat_str
from ..utils import (
+ remove_end,
qualities,
- unescapeHTML,
- xpath_element,
+ url_basename,
)
class AllocineIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?P<typ>article|video|film)/(fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=|video-)(?P<id>[0-9]+)(?:\.html)?'
+ _VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?:article|video|film)/(?:fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=|video-)(?P<id>[0-9]+)(?:\.html)?'
_TESTS = [{
'url': 'http://www.allocine.fr/article/fichearticle_gen_carticle=18635087.html',
'md5': '0c9fcf59a841f65635fa300ac43d8269',
'info_dict': {
'id': '19546517',
+ 'display_id': '18635087',
'ext': 'mp4',
'title': 'Astérix - Le Domaine des Dieux Teaser VF',
- 'description': 'md5:abcd09ce503c6560512c14ebfdb720d2',
+ 'description': 'md5:4a754271d9c6f16c72629a8a993ee884',
'thumbnail': 're:http://.*\.jpg',
},
}, {
@@ -31,64 +28,82 @@ class AllocineIE(InfoExtractor):
'md5': 'd0cdce5d2b9522ce279fdfec07ff16e0',
'info_dict': {
'id': '19540403',
+ 'display_id': '19540403',
'ext': 'mp4',
'title': 'Planes 2 Bande-annonce VF',
'description': 'Regardez la bande annonce du film Planes 2 (Planes 2 Bande-annonce VF). Planes 2, un film de Roberts Gannaway',
'thumbnail': 're:http://.*\.jpg',
},
}, {
- 'url': 'http://www.allocine.fr/film/fichefilm_gen_cfilm=181290.html',
+ 'url': 'http://www.allocine.fr/video/player_gen_cmedia=19544709&cfilm=181290.html',
'md5': '101250fb127ef9ca3d73186ff22a47ce',
'info_dict': {
'id': '19544709',
+ 'display_id': '19544709',
'ext': 'mp4',
'title': 'Dragons 2 - Bande annonce finale VF',
- 'description': 'md5:601d15393ac40f249648ef000720e7e3',
+ 'description': 'md5:6cdd2d7c2687d4c6aafe80a35e17267a',
'thumbnail': 're:http://.*\.jpg',
},
}, {
'url': 'http://www.allocine.fr/video/video-19550147/',
- 'only_matching': True,
+ 'md5': '3566c0668c0235e2d224fd8edb389f67',
+ 'info_dict': {
+ 'id': '19550147',
+ 'ext': 'mp4',
+ 'title': 'Faux Raccord N°123 - Les gaffes de Cliffhanger',
+ 'description': 'md5:bc734b83ffa2d8a12188d9eb48bb6354',
+ 'thumbnail': 're:http://.*\.jpg',
+ },
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- typ = mobj.group('typ')
- display_id = mobj.group('id')
+ display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
- if typ == 'film':
- video_id = self._search_regex(r'href="/video/player_gen_cmedia=([0-9]+).+"', webpage, 'video id')
- else:
- player = self._search_regex(r'data-player=\'([^\']+)\'>', webpage, 'data player', default=None)
- if player:
- player_data = json.loads(player)
- video_id = compat_str(player_data['refMedia'])
- else:
- model = self._search_regex(r'data-model="([^"]+)">', webpage, 'data model')
- model_data = self._parse_json(unescapeHTML(model), display_id)
- video_id = compat_str(model_data['id'])
+ formats = []
+ quality = qualities(['ld', 'md', 'hd'])
- xml = self._download_xml('http://www.allocine.fr/ws/AcVisiondataV4.ashx?media=%s' % video_id, display_id)
+ model = self._html_search_regex(
+ r'data-model="([^"]+)"', webpage, 'data model', default=None)
+ if model:
+ model_data = self._parse_json(model, display_id)
- video = xpath_element(xml, './/AcVisionVideo').attrib
- quality = qualities(['ld', 'md', 'hd'])
+ for video_url in model_data['sources'].values():
+ video_id, format_id = url_basename(video_url).split('_')[:2]
+ formats.append({
+ 'format_id': format_id,
+ 'quality': quality(format_id),
+ 'url': video_url,
+ })
- formats = []
- for k, v in video.items():
- if re.match(r'.+_path', k):
- format_id = k.split('_')[0]
+ title = model_data['title']
+ else:
+ video_id = display_id
+ media_data = self._download_json(
+ 'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id)
+ for key, value in media_data['video'].items():
+ if not key.endswith('Path'):
+ continue
+
+ format_id = key[:-len('Path')]
formats.append({
'format_id': format_id,
'quality': quality(format_id),
- 'url': v,
+ 'url': value,
})
+
+ title = remove_end(self._html_search_regex(
+ r'(?s)<title>(.+?)</title>', webpage, 'title'
+ ).strip(), ' - AlloCiné')
+
self._sort_formats(formats)
return {
'id': video_id,
- 'title': video['videoTitle'],
+ 'display_id': display_id,
+ 'title': title,
'thumbnail': self._og_search_thumbnail(webpage),
'formats': formats,
'description': self._og_search_description(webpage),
diff --git a/youtube_dl/extractor/amcnetworks.py b/youtube_dl/extractor/amcnetworks.py
index c739d2c99..d2b03b177 100644
--- a/youtube_dl/extractor/amcnetworks.py
+++ b/youtube_dl/extractor/amcnetworks.py
@@ -28,6 +28,7 @@ class AMCNetworksIE(ThePlatformIE):
# m3u8 download
'skip_download': True,
},
+ 'skip': 'Requires TV provider accounts',
}, {
'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge',
'only_matching': True,
diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py
index e0c5c1804..69a23e88c 100644
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import re
@@ -410,6 +410,22 @@ class ArteTVEmbedIE(ArteTVPlus7IE):
return self._extract_from_json_url(json_url, video_id, lang)
+class TheOperaPlatformIE(ArteTVPlus7IE):
+ IE_NAME = 'theoperaplatform'
+ _VALID_URL = r'https?://(?:www\.)?theoperaplatform\.eu/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)'
+
+ _TESTS = [{
+ 'url': 'http://www.theoperaplatform.eu/de/opera/verdi-otello',
+ 'md5': '970655901fa2e82e04c00b955e9afe7b',
+ 'info_dict': {
+ 'id': '060338-009-A',
+ 'ext': 'mp4',
+ 'title': 'Verdi - OTELLO',
+ 'upload_date': '20160927',
+ },
+ }]
+
+
class ArteTVPlaylistIE(ArteTVBaseIE):
IE_NAME = 'arte.tv:playlist'
_VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/[^#]*#collection/(?P<id>PL-\d+)'
diff --git a/youtube_dl/extractor/beatportpro.py b/youtube_dl/extractor/beatport.py
index 3c7775d3e..e60709417 100644
--- a/youtube_dl/extractor/beatportpro.py
+++ b/youtube_dl/extractor/beatport.py
@@ -8,10 +8,10 @@ from ..compat import compat_str
from ..utils import int_or_none
-class BeatportProIE(InfoExtractor):
- _VALID_URL = r'https?://pro\.beatport\.com/track/(?P<display_id>[^/]+)/(?P<id>[0-9]+)'
+class BeatportIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.|pro\.)?beatport\.com/track/(?P<display_id>[^/]+)/(?P<id>[0-9]+)'
_TESTS = [{
- 'url': 'https://pro.beatport.com/track/synesthesia-original-mix/5379371',
+ 'url': 'https://beatport.com/track/synesthesia-original-mix/5379371',
'md5': 'b3c34d8639a2f6a7f734382358478887',
'info_dict': {
'id': '5379371',
@@ -20,7 +20,7 @@ class BeatportProIE(InfoExtractor):
'title': 'Froxic - Synesthesia (Original Mix)',
},
}, {
- 'url': 'https://pro.beatport.com/track/love-and-war-original-mix/3756896',
+ 'url': 'https://beatport.com/track/love-and-war-original-mix/3756896',
'md5': 'e44c3025dfa38c6577fbaeb43da43514',
'info_dict': {
'id': '3756896',
@@ -29,7 +29,7 @@ class BeatportProIE(InfoExtractor):
'title': 'Wolfgang Gartner - Love & War (Original Mix)',
},
}, {
- 'url': 'https://pro.beatport.com/track/birds-original-mix/4991738',
+ 'url': 'https://beatport.com/track/birds-original-mix/4991738',
'md5': 'a1fd8e8046de3950fd039304c186c05f',
'info_dict': {
'id': '4991738',
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py
index 2ec55b185..945cf19e8 100644
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import re
diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py
index 3aec601f8..4be175d70 100644
--- a/youtube_dl/extractor/byutv.py
+++ b/youtube_dl/extractor/byutv.py
@@ -1,6 +1,5 @@
from __future__ import unicode_literals
-import json
import re
from .common import InfoExtractor
@@ -8,15 +7,15 @@ from ..utils import ExtractorError
class BYUtvIE(InfoExtractor):
- _VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<video_id>[^/?#]+)'
- _TEST = {
+ _VALID_URL = r'https?://(?:www\.)?byutv\.org/watch/(?!event/)(?P<id>[0-9a-f-]+)(?:/(?P<display_id>[^/?#&]+))?'
+ _TESTS = [{
'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
- 'md5': '05850eb8c749e2ee05ad5a1c34668493',
'info_dict': {
- 'id': 'studio-c-season-5-episode-5',
+ 'id': '6587b9a3-89d2-42a6-a7f7-fd2f81840a7d',
+ 'display_id': 'studio-c-season-5-episode-5',
'ext': 'mp4',
- 'description': 'md5:e07269172baff037f8e8bf9956bc9747',
'title': 'Season 5 Episode 5',
+ 'description': 'md5:e07269172baff037f8e8bf9956bc9747',
'thumbnail': 're:^https?://.*\.jpg$',
'duration': 1486.486,
},
@@ -24,28 +23,71 @@ class BYUtvIE(InfoExtractor):
'skip_download': True,
},
'add_ie': ['Ooyala'],
- }
+ }, {
+ 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('video_id')
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id') or video_id
- webpage = self._download_webpage(url, video_id)
+ webpage = self._download_webpage(url, display_id)
episode_code = self._search_regex(
r'(?s)episode:(.*?\}),\s*\n', webpage, 'episode information')
- episode_json = re.sub(
- r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', episode_code)
- ep = json.loads(episode_json)
-
- if ep['providerType'] == 'Ooyala':
- return {
- '_type': 'url_transparent',
- 'ie_key': 'Ooyala',
- 'url': 'ooyala:%s' % ep['providerId'],
- 'id': video_id,
- 'title': ep['title'],
- 'description': ep.get('description'),
- 'thumbnail': ep.get('imageThumbnail'),
- }
- else:
+
+ ep = self._parse_json(
+ episode_code, display_id, transform_source=lambda s:
+ re.sub(r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', s))
+
+ if ep['providerType'] != 'Ooyala':
raise ExtractorError('Unsupported provider %s' % ep['provider'])
+
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': 'Ooyala',
+ 'url': 'ooyala:%s' % ep['providerId'],
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': ep['title'],
+ 'description': ep.get('description'),
+ 'thumbnail': ep.get('imageThumbnail'),
+ }
+
+
+class BYUtvEventIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?byutv\.org/watch/event/(?P<id>[0-9a-f-]+)'
+ _TEST = {
+ 'url': 'http://www.byutv.org/watch/event/29941b9b-8bf6-48d2-aebf-7a87add9e34b',
+ 'info_dict': {
+ 'id': '29941b9b-8bf6-48d2-aebf-7a87add9e34b',
+ 'ext': 'mp4',
+ 'title': 'Toledo vs. BYU (9/30/16)',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['Ooyala'],
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ ooyala_id = self._search_regex(
+ r'providerId\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
+ webpage, 'ooyala id', group='id')
+
+ title = self._search_regex(
+ r'class=["\']description["\'][^>]*>\s*<h1>([^<]+)</h1>', webpage,
+ 'title').strip()
+
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': 'Ooyala',
+ 'url': 'ooyala:%s' % ooyala_id,
+ 'id': video_id,
+ 'title': title,
+ }
diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py
index 69e8f4f57..1c3c41d26 100644
--- a/youtube_dl/extractor/canalplus.py
+++ b/youtube_dl/extractor/canalplus.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import re
@@ -6,11 +6,13 @@ import re
from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlparse
from ..utils import (
+ dict_get,
ExtractorError,
HEADRequest,
- unified_strdate,
- qualities,
int_or_none,
+ qualities,
+ remove_end,
+ unified_strdate,
)
@@ -43,47 +45,46 @@ class CanalplusIE(InfoExtractor):
_TESTS = [{
'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1192814',
- 'md5': '41f438a4904f7664b91b4ed0dec969dc',
'info_dict': {
- 'id': '1192814',
+ 'id': '1405510',
+ 'display_id': 'pid1830-c-zapping',
'ext': 'mp4',
- 'title': "L'Année du Zapping 2014 - L'Année du Zapping 2014",
- 'description': "Toute l'année 2014 dans un Zapping exceptionnel !",
- 'upload_date': '20150105',
+ 'title': 'Zapping - 02/07/2016',
+ 'description': 'Le meilleur de toutes les chaînes, tous les jours',
+ 'upload_date': '20160702',
},
}, {
'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190',
'info_dict': {
'id': '1108190',
- 'ext': 'flv',
- 'title': 'Le labyrinthe - Boing super ranger',
+ 'display_id': 'pid1405-le-labyrinthe-boing-super-ranger',
+ 'ext': 'mp4',
+ 'title': 'BOING SUPER RANGER - Ep : Le labyrinthe',
'description': 'md5:4cea7a37153be42c1ba2c1d3064376ff',
'upload_date': '20140724',
},
'skip': 'Only works from France',
}, {
- 'url': 'http://www.d8.tv/d8-docs-mags/pid5198-d8-en-quete-d-actualite.html?vid=1390231',
+ 'url': 'http://www.c8.fr/c8-divertissement/ms-touche-pas-a-mon-poste/pid6318-videos-integrales.html',
+ 'md5': '4b47b12b4ee43002626b97fad8fb1de5',
'info_dict': {
- 'id': '1390231',
+ 'id': '1420213',
+ 'display_id': 'pid6318-videos-integrales',
'ext': 'mp4',
- 'title': "Vacances pas chères : prix discount ou grosses dépenses ? - En quête d'actualité",
- 'description': 'md5:edb6cf1cb4a1e807b5dd089e1ac8bfc6',
- 'upload_date': '20160512',
- },
- 'params': {
- 'skip_download': True,
+ 'title': 'TPMP ! Même le matin - Les 35H de Baba - 14/10/2016',
+ 'description': 'md5:f96736c1b0ffaa96fd5b9e60ad871799',
+ 'upload_date': '20161014',
},
+ 'skip': 'Only works from France',
}, {
- 'url': 'http://www.itele.fr/chroniques/invite-bruce-toussaint/thierry-solere-nicolas-sarkozy-officialisera-sa-candidature-a-la-primaire-quand-il-le-voudra-167224',
+ 'url': 'http://www.itele.fr/chroniques/invite-michael-darmon/rachida-dati-nicolas-sarkozy-est-le-plus-en-phase-avec-les-inquietudes-des-francais-171510',
'info_dict': {
- 'id': '1398334',
+ 'id': '1420176',
+ 'display_id': 'rachida-dati-nicolas-sarkozy-est-le-plus-en-phase-avec-les-inquietudes-des-francais-171510',
'ext': 'mp4',
- 'title': "L'invité de Bruce Toussaint du 07/06/2016 - ",
- 'description': 'md5:40ac7c9ad0feaeb6f605bad986f61324',
- 'upload_date': '20160607',
- },
- 'params': {
- 'skip_download': True,
+ 'title': 'L\'invité de Michaël Darmon du 14/10/2016 - ',
+ 'description': 'Chaque matin du lundi au vendredi, Michaël Darmon reçoit un invité politique à 8h25.',
+ 'upload_date': '20161014',
},
}, {
'url': 'http://m.canalplus.fr/?vid=1398231',
@@ -95,18 +96,17 @@ class CanalplusIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
- video_id = mobj.groupdict().get('id') or mobj.groupdict().get('vid')
site_id = self._SITE_ID_MAP[compat_urllib_parse_urlparse(url).netloc.rsplit('.', 2)[-2]]
# Beware, some subclasses do not define an id group
- display_id = mobj.group('display_id') or video_id
+ display_id = remove_end(dict_get(mobj.groupdict(), ('display_id', 'id', 'vid')), '.html')
- if video_id is None:
- webpage = self._download_webpage(url, display_id)
- video_id = self._search_regex(
- [r'<canal:player[^>]+?videoId=(["\'])(?P<id>\d+)', r'id=["\']canal_video_player(?P<id>\d+)'],
- webpage, 'video id', group='id')
+ webpage = self._download_webpage(url, display_id)
+ video_id = self._search_regex(
+ [r'<canal:player[^>]+?videoId=(["\'])(?P<id>\d+)',
+ r'id=["\']canal_video_player(?P<id>\d+)'],
+ webpage, 'video id', group='id')
info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id)
video_data = self._download_json(info_url, video_id, 'Downloading video JSON')
diff --git a/youtube_dl/extractor/carambatv.py b/youtube_dl/extractor/carambatv.py
index 5797fb951..66c0f900a 100644
--- a/youtube_dl/extractor/carambatv.py
+++ b/youtube_dl/extractor/carambatv.py
@@ -9,6 +9,8 @@ from ..utils import (
try_get,
)
+from .videomore import VideomoreIE
+
class CarambaTVIE(InfoExtractor):
_VALID_URL = r'(?:carambatv:|https?://video1\.carambatv\.ru/v/)(?P<id>\d+)'
@@ -62,14 +64,16 @@ class CarambaTVPageIE(InfoExtractor):
_VALID_URL = r'https?://carambatv\.ru/(?:[^/]+/)+(?P<id>[^/?#&]+)'
_TEST = {
'url': 'http://carambatv.ru/movie/bad-comedian/razborka-v-manile/',
- 'md5': '',
+ 'md5': 'a49fb0ec2ad66503eeb46aac237d3c86',
'info_dict': {
- 'id': '191910501',
- 'ext': 'mp4',
+ 'id': '475222',
+ 'ext': 'flv',
'title': '[BadComedian] - Разборка в Маниле (Абсолютный обзор)',
- 'thumbnail': 're:^https?://.*\.jpg$',
- 'duration': 2678.31,
+ 'thumbnail': 're:^https?://.*\.jpg',
+ # duration reported by videomore is incorrect
+ 'duration': int,
},
+ 'add_ie': [VideomoreIE.ie_key()],
}
def _real_extract(self, url):
@@ -77,6 +81,16 @@ class CarambaTVPageIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
+ videomore_url = VideomoreIE._extract_url(webpage)
+ if videomore_url:
+ title = self._og_search_title(webpage)
+ return {
+ '_type': 'url_transparent',
+ 'url': videomore_url,
+ 'ie_key': VideomoreIE.ie_key(),
+ 'title': title,
+ }
+
video_url = self._og_search_property('video:iframe', webpage, default=None)
if not video_url:
diff --git a/youtube_dl/extractor/cbsinteractive.py b/youtube_dl/extractor/cbsinteractive.py
index 821db20b2..57b18e81d 100644
--- a/youtube_dl/extractor/cbsinteractive.py
+++ b/youtube_dl/extractor/cbsinteractive.py
@@ -63,7 +63,7 @@ class CBSInteractiveIE(ThePlatformIE):
webpage = self._download_webpage(url, display_id)
data_json = self._html_search_regex(
- r"data-(?:cnet|zdnet)-video(?:-uvp)?-options='([^']+)'",
+ r"data-(?:cnet|zdnet)-video(?:-uvp(?:js)?)?-options='([^']+)'",
webpage, 'data json')
data = self._parse_json(data_json, display_id)
vdata = data.get('video') or data['videos'][0]
diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py
index 216989230..91b0f5fa9 100644
--- a/youtube_dl/extractor/cbsnews.py
+++ b/youtube_dl/extractor/cbsnews.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py
index 87c2e7089..4ec79d19d 100644
--- a/youtube_dl/extractor/ceskatelevize.py
+++ b/youtube_dl/extractor/ceskatelevize.py
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
from __future__ import unicode_literals
import re
diff --git a/youtube_dl/extractor/chirbit.py b/youtube_dl/extractor/chirbit.py
index 61aed0167..f35df143a 100644
--- a/youtube_dl/extractor/chirbit.py
+++ b/youtube_dl/extractor/chirbit.py
@@ -2,6 +2,7 @@
from __future__ import unicode_literals
import base64
+import re
from .common import InfoExtractor
from ..utils import parse_duration
@@ -70,7 +71,6 @@ class ChirbitProfileIE(InfoExtractor):
'url': 'http://chirbit.com/ScarletBeauty',
'info_dict': {
'id': 'ScarletBeauty',
- 'title': 'Chirbits by ScarletBeauty',
},
'playlist_mincount': 3,
}
@@ -78,13 +78,10 @@ class ChirbitProfileIE(InfoExtractor):
def _real_extract(self, url):
profile_id = self._match_id(url)
- rss = self._download_xml(
- 'http://chirbit.com/rss/%s' % profile_id, profile_id)
+ webpage = self._download_webpage(url, profile_id)
entries = [
- self.url_result(audio_url.text, 'Chirbit')
- for audio_url in rss.findall('./channel/item/link')]
+ self.url_result(self._proto_relative_url('//chirb.it/' + video_id))
+ for _, video_id in re.findall(r'<input[^>]+id=([\'"])copy-btn-(?P<id>[0-9a-zA-Z]+)\1', webpage)]
- title = rss.find('./channel/title').text
-
- return self.playlist_result(entries, profile_id, title)
+ return self.playlist_result(entries, profile_id)
diff --git a/youtube_dl/extractor/clipfish.py b/youtube_dl/extractor/clipfish.py
index 3a47f6fa4..bb52e0c6f 100644
--- a/youtube_dl/extractor/clipfish.py
+++ b/youtube_dl/extractor/clipfish.py
@@ -1,3 +1,4 @@
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
@@ -10,15 +11,15 @@ from ..utils import (
class ClipfishIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?clipfish\.de/(?:[^/]+/)+video/(?P<id>[0-9]+)'
_TEST = {
- 'url': 'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
- 'md5': '79bc922f3e8a9097b3d68a93780fd475',
+ 'url': 'http://www.clipfish.de/special/ugly-americans/video/4343170/s01-e01-ugly-americans-date-in-der-hoelle/',
+ 'md5': '720563e467b86374c194bdead08d207d',
'info_dict': {
- 'id': '3966754',
+ 'id': '4343170',
'ext': 'mp4',
- 'title': 'FIFA 14 - E3 2013 Trailer',
- 'description': 'Video zu FIFA 14: E3 2013 Trailer',
- 'upload_date': '20130611',
- 'duration': 82,
+ 'title': 'S01 E01 - Ugly Americans - Date in der Hölle',
+ 'description': 'Mark Lilly arbeitet im Sozialdienst der Stadt New York und soll Immigranten bei ihrer Einbürgerung in die USA zur Seite stehen.',
+ 'upload_date': '20161005',
+ 'duration': 1291,
'view_count': int,
}
}
@@ -50,10 +51,14 @@ class ClipfishIE(InfoExtractor):
'tbr': int_or_none(video_info.get('bitrate')),
})
+ descr = video_info.get('descr')
+ if descr:
+ descr = descr.strip()
+
return {
'id': video_id,
'title': video_info['title'],
- 'description': video_info.get('descr'),
+ 'description': descr,
'formats': formats,
'thumbnail': video_info.get('media_content_thumbnail_large') or video_info.get('media_thumbnail'),
'duration': int_or_none(video_info.get('media_length')),
diff --git a/youtube_dl/extractor/clubic.py b/youtube_dl/extractor/clubic.py
index 2fba93543..f7ee3a8f8 100644
--- a/youtube_dl/extractor/clubic.py
+++ b/youtube_dl/extractor/clubic.py
@@ -1,9 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import json
-import re
-
from .common import InfoExtractor
from ..utils import (
clean_html,
@@ -30,16 +27,14 @@ class ClubicIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
player_url = 'http://player.m6web.fr/v1/player/clubic/%s.html' % video_id
player_page = self._download_webpage(player_url, video_id)
- config_json = self._search_regex(
+ config = self._parse_json(self._search_regex(
r'(?m)M6\.Player\.config\s*=\s*(\{.+?\});$', player_page,
- 'configuration')
- config = json.loads(config_json)
+ 'configuration'), video_id)
video_info = config['videoInfo']
sources = config['sources']
diff --git a/youtube_dl/extractor/cmt.py b/youtube_dl/extractor/cmt.py
index ac3bdfe8f..7d3e9b0c9 100644
--- a/youtube_dl/extractor/cmt.py
+++ b/youtube_dl/extractor/cmt.py
@@ -26,7 +26,7 @@ class CMTIE(MTVIE):
'id': '1504699',
'ext': 'mp4',
'title': 'Still The King Ep. 109 in 3 Minutes',
- 'description': 'Relive or catch up with Still The King by watching this recap of season 1, episode 9. New episodes Sundays 9/8c.',
+ 'description': 'Relive or catch up with Still The King by watching this recap of season 1, episode 9.',
'timestamp': 1469421000.0,
'upload_date': '20160725',
},
@@ -42,3 +42,8 @@ class CMTIE(MTVIE):
'%s said: video is not available' % cls.IE_NAME, expected=True)
return super(CMTIE, cls)._transform_rtmp_url(rtmp_video_url)
+
+ def _extract_mgid(self, webpage):
+ return self._search_regex(
+ r'MTVN\.VIDEO\.contentUri\s*=\s*([\'"])(?P<mgid>.+?)\1',
+ webpage, 'mgid', group='mgid')
diff --git a/youtube_dl/extractor/comcarcoff.py b/youtube_dl/extractor/comcarcoff.py
index 747c245c8..588aad0d9 100644
--- a/youtube_dl/extractor/comcarcoff.py
+++ b/youtube_dl/extractor/comcarcoff.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 1076b46da..415dc84c8 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -21,6 +21,7 @@ from ..compat import (
compat_os_name,
compat_str,
compat_urllib_error,
+ compat_urllib_parse_unquote,
compat_urllib_parse_urlencode,
compat_urllib_request,
compat_urlparse,
@@ -234,7 +235,7 @@ class InfoExtractor(object):
chapter_id: Id of the chapter the video belongs to, as a unicode string.
The following fields should only be used when the video is an episode of some
- series or programme:
+ series, programme or podcast:
series: Title of the series or programme the video episode belongs to.
season: Title of the season the video episode belongs to.
@@ -1099,6 +1100,13 @@ class InfoExtractor(object):
manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'],
'bootstrap info', default=None)
+ vcodec = None
+ mime_type = xpath_text(
+ manifest, ['{http://ns.adobe.com/f4m/1.0}mimeType', '{http://ns.adobe.com/f4m/2.0}mimeType'],
+ 'base URL', default=None)
+ if mime_type and mime_type.startswith('audio/'):
+ vcodec = 'none'
+
for i, media_el in enumerate(media_nodes):
tbr = int_or_none(media_el.attrib.get('bitrate'))
width = int_or_none(media_el.attrib.get('width'))
@@ -1139,6 +1147,7 @@ class InfoExtractor(object):
'width': f.get('width') or width,
'height': f.get('height') or height,
'format_id': f.get('format_id') if not tbr else format_id,
+ 'vcodec': vcodec,
})
formats.extend(f4m_formats)
continue
@@ -1155,6 +1164,7 @@ class InfoExtractor(object):
'tbr': tbr,
'width': width,
'height': height,
+ 'vcodec': vcodec,
'preference': preference,
})
return formats
@@ -1801,7 +1811,11 @@ class InfoExtractor(object):
return is_plain_url, formats
entries = []
- for media_tag, media_type, media_content in re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage):
+ media_tags = [(media_tag, media_type, '')
+ for media_tag, media_type
+ in re.findall(r'(?s)(<(video|audio)[^>]*/>)', webpage)]
+ media_tags.extend(re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage))
+ for media_tag, media_type, media_content in media_tags:
media_info = {
'formats': [],
'subtitles': {},
@@ -1870,11 +1884,11 @@ class InfoExtractor(object):
formats.extend(self._extract_f4m_formats(
http_base_url + '/manifest.f4m',
video_id, f4m_id='hds', fatal=False))
+ if 'dash' not in skip_protocols:
+ formats.extend(self._extract_mpd_formats(
+ http_base_url + '/manifest.mpd',
+ video_id, mpd_id='dash', fatal=False))
if re.search(r'(?:/smil:|\.smil)', url_base):
- if 'dash' not in skip_protocols:
- formats.extend(self._extract_mpd_formats(
- http_base_url + '/manifest.mpd',
- video_id, mpd_id='dash', fatal=False))
if 'smil' not in skip_protocols:
rtmp_formats = self._extract_smil_formats(
http_base_url + '/jwplayer.smil',
@@ -2020,6 +2034,12 @@ class InfoExtractor(object):
headers['Ytdl-request-proxy'] = geo_verification_proxy
return headers
+ def _generic_id(self, url):
+ return compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
+
+ def _generic_title(self, url):
+ return compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
+
class SearchInfoExtractor(InfoExtractor):
"""
diff --git a/youtube_dl/extractor/commonprotocols.py b/youtube_dl/extractor/commonprotocols.py
index 5d130a170..d98331a4e 100644
--- a/youtube_dl/extractor/commonprotocols.py
+++ b/youtube_dl/extractor/commonprotocols.py
@@ -1,13 +1,9 @@
from __future__ import unicode_literals
-import os
-
from .common import InfoExtractor
from ..compat import (
- compat_urllib_parse_unquote,
compat_urlparse,
)
-from ..utils import url_basename
class RtmpIE(InfoExtractor):
@@ -23,8 +19,8 @@ class RtmpIE(InfoExtractor):
}]
def _real_extract(self, url):
- video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
- title = compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
+ video_id = self._generic_id(url)
+ title = self._generic_title(url)
return {
'id': video_id,
'title': title,
@@ -34,3 +30,31 @@ class RtmpIE(InfoExtractor):
'format_id': compat_urlparse.urlparse(url).scheme,
}],
}
+
+
+class MmsIE(InfoExtractor):
+ IE_DESC = False # Do not list
+ _VALID_URL = r'(?i)mms://.+'
+
+ _TEST = {
+ # Direct MMS link
+ 'url': 'mms://kentro.kaist.ac.kr/200907/MilesReid(0709).wmv',
+ 'info_dict': {
+ 'id': 'MilesReid(0709)',
+ 'ext': 'wmv',
+ 'title': 'MilesReid(0709)',
+ },
+ 'params': {
+ 'skip_download': True, # rtsp downloads, requiring mplayer or mpv
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._generic_id(url)
+ title = self._generic_title(url)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': url,
+ }
diff --git a/youtube_dl/extractor/criterion.py b/youtube_dl/extractor/criterion.py
index ad32673a8..cf6a5d6cb 100644
--- a/youtube_dl/extractor/criterion.py
+++ b/youtube_dl/extractor/criterion.py
@@ -1,8 +1,6 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
@@ -16,20 +14,20 @@ class CriterionIE(InfoExtractor):
'ext': 'mp4',
'title': 'Le Samouraï',
'description': 'md5:a2b4b116326558149bef81f76dcbb93f',
+ 'thumbnail': 're:^https?://.*\.jpg$',
}
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
final_url = self._search_regex(
- r'so.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url')
+ r'so\.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url')
title = self._og_search_title(webpage)
description = self._html_search_meta('description', webpage)
thumbnail = self._search_regex(
- r'so.addVariable\("thumbnailURL", "(.+?)"\)\;',
+ r'so\.addVariable\("thumbnailURL", "(.+?)"\)\;',
webpage, 'thumbnail url')
return {
diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py
index e4c10ad24..cc141f68e 100644
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import re
@@ -150,6 +150,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
# rtmp
'skip_download': True,
},
+ 'skip': 'Video gone',
}, {
'url': 'http://www.crunchyroll.com/rezero-starting-life-in-another-world-/episode-5-the-morning-of-our-promise-is-still-distant-702409',
'info_dict': {
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py
index 62b0747a5..4a3314ea7 100644
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -94,7 +94,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]',
'uploader': 'HotWaves1012',
'age_limit': 18,
- }
+ },
+ 'skip': 'video gone',
},
# geo-restricted, player v5
{
@@ -144,7 +145,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
player_v5 = self._search_regex(
[r'buildPlayer\(({.+?})\);\n', # See https://github.com/rg3/youtube-dl/issues/7826
r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);',
- r'buildPlayer\(({.+?})\);'],
+ r'buildPlayer\(({.+?})\);',
+ r'var\s+config\s*=\s*({.+?});'],
webpage, 'player v5', default=None)
if player_v5:
player = self._parse_json(player_v5, video_id)
diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py
index b5c310ccb..732b4362a 100644
--- a/youtube_dl/extractor/daum.py
+++ b/youtube_dl/extractor/daum.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
diff --git a/youtube_dl/extractor/dctp.py b/youtube_dl/extractor/dctp.py
index a47e04993..14ba88715 100644
--- a/youtube_dl/extractor/dctp.py
+++ b/youtube_dl/extractor/dctp.py
@@ -1,61 +1,54 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..compat import compat_str
+from ..utils import unified_strdate
class DctpTvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dctp\.tv/(#/)?filme/(?P<id>.+?)/$'
_TEST = {
'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
+ 'md5': '174dd4a8a6225cf5655952f969cfbe24',
'info_dict': {
- 'id': '1324',
+ 'id': '95eaa4f33dad413aa17b4ee613cccc6c',
'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
- 'ext': 'flv',
- 'title': 'Videoinstallation für eine Kaufhausfassade'
+ 'ext': 'mp4',
+ 'title': 'Videoinstallation für eine Kaufhausfassade',
+ 'description': 'Kurzfilm',
+ 'upload_date': '20110407',
+ 'thumbnail': 're:^https?://.*\.jpg$',
},
- 'params': {
- # rtmp download
- 'skip_download': True,
- }
}
def _real_extract(self, url):
video_id = self._match_id(url)
- base_url = 'http://dctp-ivms2-restapi.s3.amazonaws.com/'
- version_json = self._download_json(
- base_url + 'version.json',
- video_id, note='Determining file version')
- version = version_json['version_name']
- info_json = self._download_json(
- '{0}{1}/restapi/slugs/{2}.json'.format(base_url, version, video_id),
- video_id, note='Fetching object ID')
- object_id = compat_str(info_json['object_id'])
- meta_json = self._download_json(
- '{0}{1}/restapi/media/{2}.json'.format(base_url, version, object_id),
- video_id, note='Downloading metadata')
- uuid = meta_json['uuid']
- title = meta_json['title']
- wide = meta_json['is_wide']
- if wide:
- ratio = '16x9'
- else:
- ratio = '4x3'
- play_path = 'mp4:{0}_dctp_0500_{1}.m4v'.format(uuid, ratio)
+ webpage = self._download_webpage(url, video_id)
+
+ object_id = self._html_search_meta('DC.identifier', webpage)
servers_json = self._download_json(
- 'http://www.dctp.tv/streaming_servers/',
+ 'http://www.dctp.tv/elastic_streaming_client/get_streaming_server/',
video_id, note='Downloading server list')
- url = servers_json[0]['endpoint']
+ server = servers_json[0]['server']
+ m3u8_path = self._search_regex(
+ r'\'([^\'"]+/playlist\.m3u8)"', webpage, 'm3u8 path')
+ formats = self._extract_m3u8_formats(
+ 'http://%s%s' % (server, m3u8_path), video_id, ext='mp4',
+ entry_protocol='m3u8_native')
+
+ title = self._og_search_title(webpage)
+ description = self._html_search_meta('DC.description', webpage)
+ upload_date = unified_strdate(
+ self._html_search_meta('DC.date.created', webpage))
+ thumbnail = self._og_search_thumbnail(webpage)
return {
'id': object_id,
'title': title,
- 'format': 'rtmp',
- 'url': url,
- 'play_path': play_path,
- 'rtmp_real_time': True,
- 'ext': 'flv',
- 'display_id': video_id
+ 'formats': formats,
+ 'display_id': video_id,
+ 'description': description,
+ 'upload_date': upload_date,
+ 'thumbnail': thumbnail,
}
diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py
index 3b6529f4b..c11595612 100644
--- a/youtube_dl/extractor/dramafever.py
+++ b/youtube_dl/extractor/dramafever.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import itertools
diff --git a/youtube_dl/extractor/eitb.py b/youtube_dl/extractor/eitb.py
index 713cb7b32..ee5ead18b 100644
--- a/youtube_dl/extractor/eitb.py
+++ b/youtube_dl/extractor/eitb.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
diff --git a/youtube_dl/extractor/embedly.py b/youtube_dl/extractor/embedly.py
index 1cdb11e34..a5820b21e 100644
--- a/youtube_dl/extractor/embedly.py
+++ b/youtube_dl/extractor/embedly.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 09b3b4942..a693f8c56 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -66,6 +66,7 @@ from .arte import (
ArteTVDDCIE,
ArteTVMagazineIE,
ArteTVEmbedIE,
+ TheOperaPlatformIE,
ArteTVPlaylistIE,
)
from .atresplayer import AtresPlayerIE
@@ -93,7 +94,7 @@ from .bbc import (
from .beeg import BeegIE
from .behindkink import BehindKinkIE
from .bellmedia import BellMediaIE
-from .beatportpro import BeatportProIE
+from .beatport import BeatportIE
from .bet import BetIE
from .bigflix import BigflixIE
from .bild import BildIE
@@ -116,7 +117,10 @@ from .brightcove import (
BrightcoveNewIE,
)
from .buzzfeed import BuzzFeedIE
-from .byutv import BYUtvIE
+from .byutv import (
+ BYUtvIE,
+ BYUtvEventIE,
+)
from .c56 import C56IE
from .camdemy import (
CamdemyIE,
@@ -183,7 +187,10 @@ from .comedycentral import (
)
from .comcarcoff import ComCarCoffIE
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
-from .commonprotocols import RtmpIE
+from .commonprotocols import (
+ MmsIE,
+ RtmpIE,
+)
from .condenast import CondeNastIE
from .cracked import CrackedIE
from .crackle import CrackleIE
@@ -342,7 +349,10 @@ from .goshgay import GoshgayIE
from .gputechconf import GPUTechConfIE
from .groupon import GrouponIE
from .hark import HarkIE
-from .hbo import HBOIE
+from .hbo import (
+ HBOIE,
+ HBOEpisodeIE,
+)
from .hearthisat import HearThisAtIE
from .heise import HeiseIE
from .hellporno import HellPornoIE
@@ -363,6 +373,7 @@ from .hrti import (
HRTiIE,
HRTiPlaylistIE,
)
+from .huajiao import HuajiaoIE
from .huffpost import HuffPostIE
from .hypem import HypemIE
from .iconosquare import IconosquareIE
@@ -434,6 +445,7 @@ from .lcp import (
)
from .learnr import LearnrIE
from .lecture2go import Lecture2GoIE
+from .lego import LEGOIE
from .lemonde import LemondeIE
from .leeco import (
LeIE,
@@ -634,6 +646,7 @@ from .nytimes import (
NYTimesArticleIE,
)
from .nuvid import NuvidIE
+from .nzz import NZZIE
from .odatv import OdaTVIE
from .odnoklassniki import OdnoklassnikiIE
from .oktoberfesttv import OktoberfestTVIE
@@ -887,8 +900,10 @@ from .theplatform import (
from .thescene import TheSceneIE
from .thesixtyone import TheSixtyOneIE
from .thestar import TheStarIE
+from .theweatherchannel import TheWeatherChannelIE
from .thisamericanlife import ThisAmericanLifeIE
from .thisav import ThisAVIE
+from .thisoldhouse import ThisOldHouseIE
from .threeqsdn import ThreeQSDNIE
from .tinypic import TinyPicIE
from .tlc import TlcDeIE
@@ -903,6 +918,7 @@ from .tnaflix import (
MovieFapIE,
)
from .toggle import ToggleIE
+from .tonline import TOnlineIE
from .toutv import TouTvIE
from .toypics import ToypicsUserIE, ToypicsIE
from .traileraddict import TrailerAddictIE
diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py
index 3a220e995..801573459 100644
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@@ -258,7 +258,7 @@ class FacebookIE(InfoExtractor):
if not video_data:
server_js_data = self._parse_json(self._search_regex(
- r'handleServerJS\(({.+})\);', webpage, 'server js data', default='{}'), video_id)
+ r'handleServerJS\(({.+})(?:\);|,")', webpage, 'server js data', default='{}'), video_id)
for item in server_js_data.get('instances', []):
if item[1][0] == 'VideoConfig':
video_data = video_data_list2dict(item[2][0]['videoData'])
diff --git a/youtube_dl/extractor/faz.py b/youtube_dl/extractor/faz.py
index fd535457d..4bc8fc512 100644
--- a/youtube_dl/extractor/faz.py
+++ b/youtube_dl/extractor/faz.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
diff --git a/youtube_dl/extractor/firsttv.py b/youtube_dl/extractor/firsttv.py
index 332d12020..6b662cc3c 100644
--- a/youtube_dl/extractor/firsttv.py
+++ b/youtube_dl/extractor/firsttv.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
diff --git a/youtube_dl/extractor/folketinget.py b/youtube_dl/extractor/folketinget.py
index 75399fa7d..b3df93f28 100644
--- a/youtube_dl/extractor/folketinget.py
+++ b/youtube_dl/extractor/folketinget.py
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
diff --git a/youtube_dl/extractor/footyroom.py b/youtube_dl/extractor/footyroom.py
index d2503ae2e..118325b6d 100644
--- a/youtube_dl/extractor/footyroom.py
+++ b/youtube_dl/extractor/footyroom.py
@@ -2,25 +2,27 @@
from __future__ import unicode_literals
from .common import InfoExtractor
+from .streamable import StreamableIE
class FootyRoomIE(InfoExtractor):
- _VALID_URL = r'https?://footyroom\.com/(?P<id>[^/]+)'
+ _VALID_URL = r'https?://footyroom\.com/matches/(?P<id>\d+)'
_TESTS = [{
- 'url': 'http://footyroom.com/schalke-04-0-2-real-madrid-2015-02/',
+ 'url': 'http://footyroom.com/matches/79922154/hull-city-vs-chelsea/review',
'info_dict': {
- 'id': 'schalke-04-0-2-real-madrid-2015-02',
- 'title': 'Schalke 04 0 – 2 Real Madrid',
+ 'id': '79922154',
+ 'title': 'VIDEO Hull City 0 - 2 Chelsea',
},
- 'playlist_count': 3,
- 'skip': 'Video for this match is not available',
+ 'playlist_count': 2,
+ 'add_ie': [StreamableIE.ie_key()],
}, {
- 'url': 'http://footyroom.com/georgia-0-2-germany-2015-03/',
+ 'url': 'http://footyroom.com/matches/75817984/georgia-vs-germany/review',
'info_dict': {
- 'id': 'georgia-0-2-germany-2015-03',
- 'title': 'Georgia 0 – 2 Germany',
+ 'id': '75817984',
+ 'title': 'VIDEO Georgia 0 - 2 Germany',
},
'playlist_count': 1,
+ 'add_ie': ['Playwire']
}]
def _real_extract(self, url):
@@ -28,9 +30,8 @@ class FootyRoomIE(InfoExtractor):
webpage = self._download_webpage(url, playlist_id)
- playlist = self._parse_json(
- self._search_regex(
- r'VideoSelector\.load\((\[.+?\])\);', webpage, 'video selector'),
+ playlist = self._parse_json(self._search_regex(
+ r'DataStore\.media\s*=\s*([^;]+)', webpage, 'media data'),
playlist_id)
playlist_title = self._og_search_title(webpage)
@@ -40,11 +41,16 @@ class FootyRoomIE(InfoExtractor):
payload = video.get('payload')
if not payload:
continue
- playwire_url = self._search_regex(
+ playwire_url = self._html_search_regex(
r'data-config="([^"]+)"', payload,
'playwire url', default=None)
if playwire_url:
entries.append(self.url_result(self._proto_relative_url(
playwire_url, 'http:'), 'Playwire'))
+ streamable_url = StreamableIE._extract_url(payload)
+ if streamable_url:
+ entries.append(self.url_result(
+ streamable_url, StreamableIE.ie_key()))
+
return self.playlist_result(entries, playlist_id, playlist_title)
diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py
index 3233f66d5..e7068d1ae 100644
--- a/youtube_dl/extractor/francetv.py
+++ b/youtube_dl/extractor/francetv.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index c1792c534..7b8a9cf9a 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
@@ -27,7 +27,6 @@ from ..utils import (
unified_strdate,
unsmuggle_url,
UnsupportedError,
- url_basename,
xpath_text,
)
from .brightcove import (
@@ -1549,7 +1548,7 @@ class GenericIE(InfoExtractor):
force_videoid = smuggled_data['force_videoid']
video_id = force_videoid
else:
- video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
+ video_id = self._generic_id(url)
self.to_screen('%s: Requesting header' % video_id)
@@ -1578,7 +1577,7 @@ class GenericIE(InfoExtractor):
info_dict = {
'id': video_id,
- 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
+ 'title': self._generic_title(url),
'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
}
@@ -1754,9 +1753,9 @@ class GenericIE(InfoExtractor):
if matches:
return _playlist_from_matches(matches, ie='RtlNl')
- vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
- if vimeo_url is not None:
- return self.url_result(vimeo_url)
+ vimeo_urls = VimeoIE._extract_urls(url, webpage)
+ if vimeo_urls:
+ return _playlist_from_matches(vimeo_urls, ie=VimeoIE.ie_key())
vid_me_embed_url = self._search_regex(
r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
@@ -2332,12 +2331,23 @@ class GenericIE(InfoExtractor):
info_dict.update(json_ld)
return info_dict
+ # Look for HTML5 media
+ entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
+ if entries:
+ for entry in entries:
+ entry.update({
+ 'id': video_id,
+ 'title': video_title,
+ })
+ self._sort_formats(entry['formats'])
+ return self.playlist_result(entries)
+
def check_video(vurl):
if YoutubeIE.suitable(vurl):
return True
vpath = compat_urlparse.urlparse(vurl).path
vext = determine_ext(vpath)
- return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
+ return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js')
def filter_video(urls):
return list(filter(check_video, urls))
@@ -2388,9 +2398,6 @@ class GenericIE(InfoExtractor):
if m_video_type is not None:
found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
if not found:
- # HTML5 video
- found = re.findall(r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
- if not found:
REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
found = re.search(
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
diff --git a/youtube_dl/extractor/goshgay.py b/youtube_dl/extractor/goshgay.py
index a43abd154..74e1720ee 100644
--- a/youtube_dl/extractor/goshgay.py
+++ b/youtube_dl/extractor/goshgay.py
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
diff --git a/youtube_dl/extractor/hark.py b/youtube_dl/extractor/hark.py
index 749e9154f..342a6130e 100644
--- a/youtube_dl/extractor/hark.py
+++ b/youtube_dl/extractor/hark.py
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
diff --git a/youtube_dl/extractor/hbo.py b/youtube_dl/extractor/hbo.py
index dad0f3994..cbf774377 100644
--- a/youtube_dl/extractor/hbo.py
+++ b/youtube_dl/extractor/hbo.py
@@ -12,17 +12,7 @@ from ..utils import (
)
-class HBOIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P<id>[0-9]+)'
- _TEST = {
- 'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839',
- 'md5': '1c33253f0c7782142c993c0ba62a8753',
- 'info_dict': {
- 'id': '1437839',
- 'ext': 'mp4',
- 'title': 'Ep. 64 Clip: Encryption',
- }
- }
+class HBOBaseIE(InfoExtractor):
_FORMATS_INFO = {
'1920': {
'width': 1280,
@@ -50,8 +40,7 @@ class HBOIE(InfoExtractor):
},
}
- def _real_extract(self, url):
- video_id = self._match_id(url)
+ def _extract_from_id(self, video_id):
video_data = self._download_xml(
'http://render.lv3.hbo.com/data/content/global/videos/data/%s.xml' % video_id, video_id)
title = xpath_text(video_data, 'title', 'title', True)
@@ -116,7 +105,60 @@ class HBOIE(InfoExtractor):
return {
'id': video_id,
'title': title,
- 'duration': parse_duration(xpath_element(video_data, 'duration/tv14')),
+ 'duration': parse_duration(xpath_text(video_data, 'duration/tv14')),
'formats': formats,
'thumbnails': thumbnails,
}
+
+
+class HBOIE(HBOBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839',
+ 'md5': '1c33253f0c7782142c993c0ba62a8753',
+ 'info_dict': {
+ 'id': '1437839',
+ 'ext': 'mp4',
+ 'title': 'Ep. 64 Clip: Encryption',
+ 'thumbnail': 're:https?://.*\.jpg$',
+ 'duration': 1072,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self._extract_from_id(video_id)
+
+
+class HBOEpisodeIE(HBOBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?hbo\.com/(?!video)([^/]+/)+video/(?P<id>[0-9a-z-]+)\.html'
+
+ _TESTS = [{
+ 'url': 'http://www.hbo.com/girls/episodes/5/52-i-love-you-baby/video/ep-52-inside-the-episode.html?autoplay=true',
+ 'md5': '689132b253cc0ab7434237fc3a293210',
+ 'info_dict': {
+ 'id': '1439518',
+ 'display_id': 'ep-52-inside-the-episode',
+ 'ext': 'mp4',
+ 'title': 'Ep. 52: Inside the Episode',
+ 'thumbnail': 're:https?://.*\.jpg$',
+ 'duration': 240,
+ },
+ }, {
+ 'url': 'http://www.hbo.com/game-of-thrones/about/video/season-5-invitation-to-the-set.html?autoplay=true',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ video_id = self._search_regex(
+ r'(?P<q1>[\'"])videoId(?P=q1)\s*:\s*(?P<q2>[\'"])(?P<video_id>\d+)(?P=q2)',
+ webpage, 'video ID', group='video_id')
+
+ info_dict = self._extract_from_id(video_id)
+ info_dict['display_id'] = display_id
+
+ return info_dict
diff --git a/youtube_dl/extractor/helsinki.py b/youtube_dl/extractor/helsinki.py
index 93107b306..575fb332a 100644
--- a/youtube_dl/extractor/helsinki.py
+++ b/youtube_dl/extractor/helsinki.py
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
from __future__ import unicode_literals
diff --git a/youtube_dl/extractor/huajiao.py b/youtube_dl/extractor/huajiao.py
new file mode 100644
index 000000000..cec0df09a
--- /dev/null
+++ b/youtube_dl/extractor/huajiao.py
@@ -0,0 +1,56 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+ parse_iso8601,
+)
+
+
+class HuajiaoIE(InfoExtractor):
+ IE_DESC = '花椒直播'
+ _VALID_URL = r'https?://(?:www\.)?huajiao\.com/l/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://www.huajiao.com/l/38941232',
+ 'md5': 'd08bf9ac98787d24d1e4c0283f2d372d',
+ 'info_dict': {
+ 'id': '38941232',
+ 'ext': 'mp4',
+ 'title': '#新人求关注#',
+ 'description': 're:.*',
+ 'duration': 2424.0,
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'timestamp': 1475866459,
+ 'upload_date': '20161007',
+ 'uploader': 'Penny_余姿昀',
+ 'uploader_id': '75206005',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ feed_json = self._search_regex(
+ r'var\s+feed\s*=\s*({.+})', webpage, 'feed json')
+ feed = self._parse_json(feed_json, video_id)
+
+ description = self._html_search_meta(
+ 'description', webpage, 'description', fatal=False)
+
+ def get(section, field):
+ return feed.get(section, {}).get(field)
+
+ return {
+ 'id': video_id,
+ 'title': feed['feed']['formated_title'],
+ 'description': description,
+ 'duration': parse_duration(get('feed', 'duration')),
+ 'thumbnail': get('feed', 'image'),
+ 'timestamp': parse_iso8601(feed.get('creatime'), ' '),
+ 'uploader': get('author', 'nickname'),
+ 'uploader_id': get('author', 'uid'),
+ 'formats': self._extract_m3u8_formats(
+ feed['feed']['m3u8'], video_id, 'mp4', 'm3u8_native'),
+ }
diff --git a/youtube_dl/extractor/ina.py b/youtube_dl/extractor/ina.py
index 65712abc2..9544ff9d4 100644
--- a/youtube_dl/extractor/ina.py
+++ b/youtube_dl/extractor/ina.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import re
diff --git a/youtube_dl/extractor/iprima.py b/youtube_dl/extractor/iprima.py
index 788bbe0d5..da2cdc656 100644
--- a/youtube_dl/extractor/iprima.py
+++ b/youtube_dl/extractor/iprima.py
@@ -81,6 +81,9 @@ class IPrimaIE(InfoExtractor):
for _, src in re.findall(r'src["\']\s*:\s*(["\'])(.+?)\1', playerpage):
extract_formats(src)
+ if not formats and '>GEO_IP_NOT_ALLOWED<' in playerpage:
+ self.raise_geo_restricted()
+
self._sort_formats(formats)
return {
diff --git a/youtube_dl/extractor/jpopsukitv.py b/youtube_dl/extractor/jpopsukitv.py
index 122e2dd8c..4b5f346d1 100644
--- a/youtube_dl/extractor/jpopsukitv.py
+++ b/youtube_dl/extractor/jpopsukitv.py
@@ -1,4 +1,4 @@
-# coding=utf-8
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py
index 38199fcd0..5d56e0a28 100644
--- a/youtube_dl/extractor/jwplatform.py
+++ b/youtube_dl/extractor/jwplatform.py
@@ -32,13 +32,20 @@ class JWPlatformBaseIE(InfoExtractor):
return self._parse_jwplayer_data(
jwplayer_data, video_id, *args, **kwargs)
- def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True, m3u8_id=None, rtmp_params=None, base_url=None):
+ def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
+ m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
# JWPlayer backward compatibility: flattened playlists
# https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
if 'playlist' not in jwplayer_data:
jwplayer_data = {'playlist': [jwplayer_data]}
entries = []
+
+ # JWPlayer backward compatibility: single playlist item
+ # https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10
+ if not isinstance(jwplayer_data['playlist'], list):
+ jwplayer_data['playlist'] = [jwplayer_data['playlist']]
+
for video_data in jwplayer_data['playlist']:
# JWPlayer backward compatibility: flattened sources
# https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35
@@ -57,6 +64,9 @@ class JWPlatformBaseIE(InfoExtractor):
if source_type == 'hls' or ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False))
+ elif ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ source_url, this_video_id, mpd_id=mpd_id, fatal=False))
# https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
formats.append({
diff --git a/youtube_dl/extractor/kickstarter.py b/youtube_dl/extractor/kickstarter.py
index fbe499497..d4da8f484 100644
--- a/youtube_dl/extractor/kickstarter.py
+++ b/youtube_dl/extractor/kickstarter.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
diff --git a/youtube_dl/extractor/kontrtube.py b/youtube_dl/extractor/kontrtube.py
index 704bd7b34..1fda45107 100644
--- a/youtube_dl/extractor/kontrtube.py
+++ b/youtube_dl/extractor/kontrtube.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import re
diff --git a/youtube_dl/extractor/krasview.py b/youtube_dl/extractor/krasview.py
index 0ae8ebd68..cf8876fa1 100644
--- a/youtube_dl/extractor/krasview.py
+++ b/youtube_dl/extractor/krasview.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import json
diff --git a/youtube_dl/extractor/lego.py b/youtube_dl/extractor/lego.py
new file mode 100644
index 000000000..d3bca6435
--- /dev/null
+++ b/youtube_dl/extractor/lego.py
@@ -0,0 +1,128 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ unescapeHTML,
+ parse_duration,
+ get_element_by_class,
+)
+
+
+class LEGOIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?lego\.com/(?P<locale>[^/]+)/(?:[^/]+/)*videos/(?:[^/]+/)*[^/?#]+-(?P<id>[0-9a-f]+)'
+ _TESTS = [{
+ 'url': 'http://www.lego.com/en-us/videos/themes/club/blocumentary-kawaguchi-55492d823b1b4d5e985787fa8c2973b1',
+ 'md5': 'f34468f176cfd76488767fc162c405fa',
+ 'info_dict': {
+ 'id': '55492d823b1b4d5e985787fa8c2973b1',
+ 'ext': 'mp4',
+ 'title': 'Blocumentary Great Creations: Akiyuki Kawaguchi',
+ 'description': 'Blocumentary Great Creations: Akiyuki Kawaguchi',
+ },
+ }, {
+ # geo-restricted but the contentUrl contain a valid url
+ 'url': 'http://www.lego.com/nl-nl/videos/themes/nexoknights/episode-20-kingdom-of-heroes-13bdc2299ab24d9685701a915b3d71e7##sp=399',
+ 'md5': '4c3fec48a12e40c6e5995abc3d36cc2e',
+ 'info_dict': {
+ 'id': '13bdc2299ab24d9685701a915b3d71e7',
+ 'ext': 'mp4',
+ 'title': 'Aflevering 20 - Helden van het koninkrijk',
+ 'description': 'md5:8ee499aac26d7fa8bcb0cedb7f9c3941',
+ },
+ }, {
+ # special characters in title
+ 'url': 'http://www.lego.com/en-us/starwars/videos/lego-star-wars-force-surprise-9685ee9d12e84ff38e84b4e3d0db533d',
+ 'info_dict': {
+ 'id': '9685ee9d12e84ff38e84b4e3d0db533d',
+ 'ext': 'mp4',
+ 'title': 'Force Surprise – LEGO® Star Wars™ Microfighters',
+ 'description': 'md5:9c673c96ce6f6271b88563fe9dc56de3',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+ _BITRATES = [256, 512, 1024, 1536, 2560]
+
+ def _real_extract(self, url):
+ locale, video_id = re.match(self._VALID_URL, url).groups()
+ webpage = self._download_webpage(url, video_id)
+ title = get_element_by_class('video-header', webpage).strip()
+ progressive_base = 'https://lc-mediaplayerns-live-s.legocdn.com/'
+ streaming_base = 'http://legoprod-f.akamaihd.net/'
+ content_url = self._html_search_meta('contentUrl', webpage)
+ path = self._search_regex(
+ r'(?:https?:)?//[^/]+/(?:[iz]/s/)?public/(.+)_[0-9,]+\.(?:mp4|webm)',
+ content_url, 'video path', default=None)
+ if not path:
+ player_url = self._proto_relative_url(self._search_regex(
+ r'<iframe[^>]+src="((?:https?)?//(?:www\.)?lego\.com/[^/]+/mediaplayer/video/[^"]+)',
+ webpage, 'player url', default=None))
+ if not player_url:
+ base_url = self._proto_relative_url(self._search_regex(
+ r'data-baseurl="([^"]+)"', webpage, 'base url',
+ default='http://www.lego.com/%s/mediaplayer/video/' % locale))
+ player_url = base_url + video_id
+ player_webpage = self._download_webpage(player_url, video_id)
+ video_data = self._parse_json(unescapeHTML(self._search_regex(
+ r"video='([^']+)'", player_webpage, 'video data')), video_id)
+ progressive_base = self._search_regex(
+ r'data-video-progressive-url="([^"]+)"',
+ player_webpage, 'progressive base', default='https://lc-mediaplayerns-live-s.legocdn.com/')
+ streaming_base = self._search_regex(
+ r'data-video-streaming-url="([^"]+)"',
+ player_webpage, 'streaming base', default='http://legoprod-f.akamaihd.net/')
+ item_id = video_data['ItemId']
+
+ net_storage_path = video_data.get('NetStoragePath') or '/'.join([item_id[:2], item_id[2:4]])
+ base_path = '_'.join([item_id, video_data['VideoId'], video_data['Locale'], compat_str(video_data['VideoVersion'])])
+ path = '/'.join([net_storage_path, base_path])
+ streaming_path = ','.join(map(lambda bitrate: compat_str(bitrate), self._BITRATES))
+
+ formats = self._extract_akamai_formats(
+ '%si/s/public/%s_,%s,.mp4.csmil/master.m3u8' % (streaming_base, path, streaming_path), video_id)
+ m3u8_formats = list(filter(
+ lambda f: f.get('protocol') == 'm3u8_native' and f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
+ formats))
+ if len(m3u8_formats) == len(self._BITRATES):
+ self._sort_formats(m3u8_formats)
+ for bitrate, m3u8_format in zip(self._BITRATES, m3u8_formats):
+ progressive_base_url = '%spublic/%s_%d.' % (progressive_base, path, bitrate)
+ mp4_f = m3u8_format.copy()
+ mp4_f.update({
+ 'url': progressive_base_url + 'mp4',
+ 'format_id': m3u8_format['format_id'].replace('hls', 'mp4'),
+ 'protocol': 'http',
+ })
+ web_f = {
+ 'url': progressive_base_url + 'webm',
+ 'format_id': m3u8_format['format_id'].replace('hls', 'webm'),
+ 'width': m3u8_format['width'],
+ 'height': m3u8_format['height'],
+ 'tbr': m3u8_format.get('tbr'),
+ 'ext': 'webm',
+ }
+ formats.extend([web_f, mp4_f])
+ else:
+ for bitrate in self._BITRATES:
+ for ext in ('web', 'mp4'):
+ formats.append({
+ 'format_id': '%s-%s' % (ext, bitrate),
+ 'url': '%spublic/%s_%d.%s' % (progressive_base, path, bitrate, ext),
+ 'tbr': bitrate,
+ 'ext': ext,
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': self._html_search_meta('description', webpage),
+ 'thumbnail': self._html_search_meta('thumbnail', webpage),
+ 'duration': parse_duration(self._html_search_meta('duration', webpage)),
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/lifenews.py b/youtube_dl/extractor/lifenews.py
index 87120ecd1..afce2010e 100644
--- a/youtube_dl/extractor/lifenews.py
+++ b/youtube_dl/extractor/lifenews.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import re
diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py
index 299873ecc..f4dcfd93f 100644
--- a/youtube_dl/extractor/lynda.py
+++ b/youtube_dl/extractor/lynda.py
@@ -94,12 +94,12 @@ class LyndaBaseIE(InfoExtractor):
class LyndaIE(LyndaBaseIE):
IE_NAME = 'lynda'
IE_DESC = 'lynda.com videos'
- _VALID_URL = r'https?://(?:www\.)?lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?lynda\.com/(?:[^/]+/[^/]+/(?P<course_id>\d+)|player/embed)/(?P<id>\d+)'
_TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'
_TESTS = [{
- 'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
+ 'url': 'https://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
# md5 is unstable
'info_dict': {
'id': '114408',
@@ -112,19 +112,71 @@ class LyndaIE(LyndaBaseIE):
'only_matching': True,
}]
+ def _raise_unavailable(self, video_id):
+ self.raise_login_required(
+ 'Video %s is only available for members' % video_id)
+
def _real_extract(self, url):
- video_id = self._match_id(url)
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ course_id = mobj.group('course_id')
+
+ query = {
+ 'videoId': video_id,
+ 'type': 'video',
+ }
video = self._download_json(
- 'http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id,
- video_id, 'Downloading video JSON')
+ 'https://www.lynda.com/ajax/player', video_id,
+ 'Downloading video JSON', fatal=False, query=query)
+
+ # Fallback scenario
+ if not video:
+ query['courseId'] = course_id
+
+ play = self._download_json(
+ 'https://www.lynda.com/ajax/course/%s/%s/play'
+ % (course_id, video_id), video_id, 'Downloading play JSON')
+
+ if not play:
+ self._raise_unavailable(video_id)
+
+ formats = []
+ for formats_dict in play:
+ urls = formats_dict.get('urls')
+ if not isinstance(urls, dict):
+ continue
+ cdn = formats_dict.get('name')
+ for format_id, format_url in urls.items():
+ if not format_url:
+ continue
+ formats.append({
+ 'url': format_url,
+ 'format_id': '%s-%s' % (cdn, format_id) if cdn else format_id,
+ 'height': int_or_none(format_id),
+ })
+ self._sort_formats(formats)
+
+ conviva = self._download_json(
+ 'https://www.lynda.com/ajax/player/conviva', video_id,
+ 'Downloading conviva JSON', query=query)
+
+ return {
+ 'id': video_id,
+ 'title': conviva['VideoTitle'],
+ 'description': conviva.get('VideoDescription'),
+ 'release_year': int_or_none(conviva.get('ReleaseYear')),
+ 'duration': int_or_none(conviva.get('Duration')),
+ 'creator': conviva.get('Author'),
+ 'formats': formats,
+ }
if 'Status' in video:
raise ExtractorError(
'lynda returned error: %s' % video['Message'], expected=True)
if video.get('HasAccess') is False:
- self.raise_login_required('Video %s is only available for members' % video_id)
+ self._raise_unavailable(video_id)
video_id = compat_str(video.get('ID') or video_id)
duration = int_or_none(video.get('DurationInSeconds'))
@@ -148,7 +200,7 @@ class LyndaIE(LyndaBaseIE):
for prioritized_stream_id, prioritized_stream in prioritized_streams.items():
formats.extend([{
'url': video_url,
- 'width': int_or_none(format_id),
+ 'height': int_or_none(format_id),
'format_id': '%s-%s' % (prioritized_stream_id, format_id),
} for format_id, video_url in prioritized_stream.items()])
@@ -187,7 +239,7 @@ class LyndaIE(LyndaBaseIE):
return srt
def _get_subtitles(self, video_id):
- url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
+ url = 'https://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
subs = self._download_json(url, None, False)
if subs:
return {'en': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]}
@@ -209,7 +261,7 @@ class LyndaCourseIE(LyndaBaseIE):
course_id = mobj.group('courseid')
course = self._download_json(
- 'http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
+ 'https://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
course_id, 'Downloading course JSON')
if course.get('Status') == 'NotFound':
@@ -231,7 +283,7 @@ class LyndaCourseIE(LyndaBaseIE):
if video_id:
entries.append({
'_type': 'url_transparent',
- 'url': 'http://www.lynda.com/%s/%s-4.html' % (course_path, video_id),
+ 'url': 'https://www.lynda.com/%s/%s-4.html' % (course_path, video_id),
'ie_key': LyndaIE.ie_key(),
'chapter': chapter.get('Title'),
'chapter_number': int_or_none(chapter.get('ChapterIndex')),
diff --git a/youtube_dl/extractor/m6.py b/youtube_dl/extractor/m6.py
index 39d2742c8..9806875e8 100644
--- a/youtube_dl/extractor/m6.py
+++ b/youtube_dl/extractor/m6.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
diff --git a/youtube_dl/extractor/mailru.py b/youtube_dl/extractor/mailru.py
index 9a7098c43..f7cc3c832 100644
--- a/youtube_dl/extractor/mailru.py
+++ b/youtube_dl/extractor/mailru.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import re
diff --git a/youtube_dl/extractor/moviezine.py b/youtube_dl/extractor/moviezine.py
index aa091a62c..478e39967 100644
--- a/youtube_dl/extractor/moviezine.py
+++ b/youtube_dl/extractor/moviezine.py
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
from __future__ import unicode_literals
import re
diff --git a/youtube_dl/extractor/musicplayon.py b/youtube_dl/extractor/musicplayon.py
index 2174e5665..1854d59a5 100644
--- a/youtube_dl/extractor/musicplayon.py
+++ b/youtube_dl/extractor/musicplayon.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
diff --git a/youtube_dl/extractor/myspace.py b/youtube_dl/extractor/myspace.py
index 0d5238d77..ab32e632e 100644
--- a/youtube_dl/extractor/myspace.py
+++ b/youtube_dl/extractor/myspace.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import re
diff --git a/youtube_dl/extractor/nationalgeographic.py b/youtube_dl/extractor/nationalgeographic.py
index 1dcf27afe..b91d86528 100644
--- a/youtube_dl/extractor/nationalgeographic.py
+++ b/youtube_dl/extractor/nationalgeographic.py
@@ -4,6 +4,7 @@ import re
from .common import InfoExtractor
from .adobepass import AdobePassIE
+from .theplatform import ThePlatformIE
from ..utils import (
smuggle_url,
url_basename,
@@ -65,7 +66,7 @@ class NationalGeographicVideoIE(InfoExtractor):
}
-class NationalGeographicIE(AdobePassIE):
+class NationalGeographicIE(ThePlatformIE, AdobePassIE):
IE_NAME = 'natgeo'
_VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?[^/]+/(?:videos|episodes)/(?P<id>[^/?]+)'
@@ -110,25 +111,39 @@ class NationalGeographicIE(AdobePassIE):
release_url = self._search_regex(
r'video_auth_playlist_url\s*=\s*"([^"]+)"',
webpage, 'release url')
+ theplatform_path = self._search_regex(r'https?://link.theplatform.com/s/([^?]+)', release_url, 'theplatform path')
+ video_id = theplatform_path.split('/')[-1]
query = {
'mbr': 'true',
- 'switch': 'http',
}
is_auth = self._search_regex(r'video_is_auth\s*=\s*"([^"]+)"', webpage, 'is auth', fatal=False)
if is_auth == 'auth':
auth_resource_id = self._search_regex(
r"video_auth_resourceId\s*=\s*'([^']+)'",
webpage, 'auth resource id')
- query['auth'] = self._extract_mvpd_auth(url, display_id, 'natgeo', auth_resource_id)
-
- return {
- '_type': 'url_transparent',
- 'ie_key': 'ThePlatform',
- 'url': smuggle_url(
- update_url_query(release_url, query),
- {'force_smil_url': True}),
+ query['auth'] = self._extract_mvpd_auth(url, video_id, 'natgeo', auth_resource_id)
+
+ formats = []
+ subtitles = {}
+ for key, value in (('switch', 'http'), ('manifest', 'm3u')):
+ tp_query = query.copy()
+ tp_query.update({
+ key: value,
+ })
+ tp_formats, tp_subtitles = self._extract_theplatform_smil(
+ update_url_query(release_url, tp_query), video_id, 'Downloading %s SMIL data' % value)
+ formats.extend(tp_formats)
+ subtitles = self._merge_subtitles(subtitles, tp_subtitles)
+ self._sort_formats(formats)
+
+ info = self._extract_theplatform_metadata(theplatform_path, display_id)
+ info.update({
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
'display_id': display_id,
- }
+ })
+ return info
class NationalGeographicEpisodeGuideIE(InfoExtractor):
diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py
index 0891d2772..055070ff5 100644
--- a/youtube_dl/extractor/naver.py
+++ b/youtube_dl/extractor/naver.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import re
diff --git a/youtube_dl/extractor/newstube.py b/youtube_dl/extractor/newstube.py
index 0092b85ce..e3f35f1d8 100644
--- a/youtube_dl/extractor/newstube.py
+++ b/youtube_dl/extractor/newstube.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import re
diff --git a/youtube_dl/extractor/nextmedia.py b/youtube_dl/extractor/nextmedia.py
index a08e48c4b..dee9056d3 100644
--- a/youtube_dl/extractor/nextmedia.py
+++ b/youtube_dl/extractor/nextmedia.py
@@ -93,7 +93,7 @@ class NextMediaActionNewsIE(NextMediaIE):
class AppleDailyIE(NextMediaIE):
IE_DESC = '臺灣蘋果日報'
- _VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/(?:animation|appledaily|enews|realtimenews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
+ _VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/(?:animation|appledaily|enews|realtimenews|actionnews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?'
_TESTS = [{
'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694',
'md5': 'a843ab23d150977cc55ef94f1e2c1e4d',
@@ -154,6 +154,9 @@ class AppleDailyIE(NextMediaIE):
'description': 'md5:7b859991a6a4fedbdf3dd3b66545c748',
'upload_date': '20140417',
},
+ }, {
+ 'url': 'http://www.appledaily.com.tw/actionnews/appledaily/7/20161003/960588/',
+ 'only_matching': True,
}]
_URL_PATTERN = r'\{url: \'(.+)\'\}'
diff --git a/youtube_dl/extractor/nhl.py b/youtube_dl/extractor/nhl.py
index b04d21113..62ce800c0 100644
--- a/youtube_dl/extractor/nhl.py
+++ b/youtube_dl/extractor/nhl.py
@@ -245,7 +245,11 @@ class NHLVideocenterCategoryIE(NHLBaseInfoExtractor):
class NHLIE(InfoExtractor):
IE_NAME = 'nhl.com'
- _VALID_URL = r'https?://(?:www\.)?nhl\.com/([^/]+/)*c-(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?(?P<site>nhl|wch2016)\.com/(?:[^/]+/)*c-(?P<id>\d+)'
+ _SITES_MAP = {
+ 'nhl': 'nhl',
+ 'wch2016': 'wch',
+ }
_TESTS = [{
# type=video
'url': 'https://www.nhl.com/video/anisimov-cleans-up-mess/t-277752844/c-43663503',
@@ -270,13 +274,32 @@ class NHLIE(InfoExtractor):
'upload_date': '20160204',
'timestamp': 1454544904,
},
+ }, {
+ # Some m3u8 URLs are invalid (https://github.com/rg3/youtube-dl/issues/10713)
+ 'url': 'https://www.nhl.com/predators/video/poile-laviolette-on-subban-trade/t-277437416/c-44315003',
+ 'md5': '50b2bb47f405121484dda3ccbea25459',
+ 'info_dict': {
+ 'id': '44315003',
+ 'ext': 'mp4',
+ 'title': 'Poile, Laviolette on Subban trade',
+ 'description': 'General manager David Poile and head coach Peter Laviolette share their thoughts on acquiring P.K. Subban from Montreal (06/29/16)',
+ 'timestamp': 1467242866,
+ 'upload_date': '20160629',
+ },
+ }, {
+ 'url': 'https://www.wch2016.com/video/caneur-best-of-game-2-micd-up/t-281230378/c-44983703',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.wch2016.com/news/3-stars-team-europe-vs-team-canada/c-282195068',
+ 'only_matching': True,
}]
def _real_extract(self, url):
- tmp_id = self._match_id(url)
+ mobj = re.match(self._VALID_URL, url)
+ tmp_id, site = mobj.group('id'), mobj.group('site')
video_data = self._download_json(
- 'https://nhl.bamcontent.com/nhl/id/v1/%s/details/web-v1.json' % tmp_id,
- tmp_id)
+ 'https://nhl.bamcontent.com/%s/id/v1/%s/details/web-v1.json'
+ % (self._SITES_MAP[site], tmp_id), tmp_id)
if video_data.get('type') == 'article':
video_data = video_data['media']
@@ -290,9 +313,11 @@ class NHLIE(InfoExtractor):
continue
ext = determine_ext(playback_url)
if ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
+ m3u8_formats = self._extract_m3u8_formats(
playback_url, video_id, 'mp4', 'm3u8_native',
- m3u8_id=playback.get('name', 'hls'), fatal=False))
+ m3u8_id=playback.get('name', 'hls'), fatal=False)
+ self._check_formats(m3u8_formats, video_id)
+ formats.extend(m3u8_formats)
else:
height = int_or_none(playback.get('height'))
formats.append({
diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py
index 6eaaa8416..a104e33f8 100644
--- a/youtube_dl/extractor/niconico.py
+++ b/youtube_dl/extractor/niconico.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import re
diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py
index 06f2bda07..70ff2ab36 100644
--- a/youtube_dl/extractor/noco.py
+++ b/youtube_dl/extractor/noco.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import re
diff --git a/youtube_dl/extractor/normalboots.py b/youtube_dl/extractor/normalboots.py
index af44c3bb5..6aa0895b8 100644
--- a/youtube_dl/extractor/normalboots.py
+++ b/youtube_dl/extractor/normalboots.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
diff --git a/youtube_dl/extractor/nova.py b/youtube_dl/extractor/nova.py
index 17671ad39..103952345 100644
--- a/youtube_dl/extractor/nova.py
+++ b/youtube_dl/extractor/nova.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import re
diff --git a/youtube_dl/extractor/nowness.py b/youtube_dl/extractor/nowness.py
index 74860eb20..7e5346316 100644
--- a/youtube_dl/extractor/nowness.py
+++ b/youtube_dl/extractor/nowness.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
from .brightcove import (
diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index 9c7cc777b..c91f58461 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
+from ..compat import compat_HTTPError
from ..utils import (
fix_xml_ampersands,
orderedSet,
@@ -10,6 +11,7 @@ from ..utils import (
qualities,
strip_jsonp,
unified_strdate,
+ ExtractorError,
)
@@ -181,9 +183,16 @@ class NPOIE(NPOBaseIE):
continue
streams = format_info.get('streams')
if streams:
- video_info = self._download_json(
- streams[0] + '&type=json',
- video_id, 'Downloading %s stream JSON' % format_id)
+ try:
+ video_info = self._download_json(
+ streams[0] + '&type=json',
+ video_id, 'Downloading %s stream JSON' % format_id)
+ except ExtractorError as ee:
+ if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
+ error = (self._parse_json(ee.cause.read().decode(), video_id, fatal=False) or {}).get('errorstring')
+ if error:
+ raise ExtractorError(error, expected=True)
+ raise
else:
video_info = format_info
video_url = video_info.get('url')
@@ -459,8 +468,9 @@ class NPOPlaylistBaseIE(NPOIE):
class VPROIE(NPOPlaylistBaseIE):
IE_NAME = 'vpro'
- _VALID_URL = r'https?://(?:www\.)?(?:tegenlicht\.)?vpro\.nl/(?:[^/]+/){2,}(?P<id>[^/]+)\.html'
- _PLAYLIST_TITLE_RE = r'<h1[^>]+class=["\'].*?\bmedia-platform-title\b.*?["\'][^>]*>([^<]+)'
+ _VALID_URL = r'https?://(?:www\.)?(?:(?:tegenlicht\.)?vpro|2doc)\.nl/(?:[^/]+/)*(?P<id>[^/]+)\.html'
+ _PLAYLIST_TITLE_RE = (r'<h1[^>]+class=["\'].*?\bmedia-platform-title\b.*?["\'][^>]*>([^<]+)',
+ r'<h5[^>]+class=["\'].*?\bmedia-platform-subtitle\b.*?["\'][^>]*>([^<]+)')
_PLAYLIST_ENTRY_RE = r'data-media-id="([^"]+)"'
_TESTS = [
@@ -492,6 +502,27 @@ class VPROIE(NPOPlaylistBaseIE):
'title': 'education education',
},
'playlist_count': 2,
+ },
+ {
+ 'url': 'http://www.2doc.nl/documentaires/series/2doc/2015/oktober/de-tegenprestatie.html',
+ 'info_dict': {
+ 'id': 'de-tegenprestatie',
+ 'title': 'De Tegenprestatie',
+ },
+ 'playlist_count': 2,
+ }, {
+ 'url': 'http://www.2doc.nl/speel~VARA_101375237~mh17-het-verdriet-van-nederland~.html',
+ 'info_dict': {
+ 'id': 'VARA_101375237',
+ 'ext': 'm4v',
+ 'title': 'MH17: Het verdriet van Nederland',
+ 'description': 'md5:09e1a37c1fdb144621e22479691a9f18',
+ 'upload_date': '20150716',
+ },
+ 'params': {
+ # Skip because of m3u8 download
+ 'skip_download': True
+ },
}
]
diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py
index ed42eb301..3700b7ab2 100644
--- a/youtube_dl/extractor/nrk.py
+++ b/youtube_dl/extractor/nrk.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import re
@@ -113,7 +113,17 @@ class NRKBaseIE(InfoExtractor):
class NRKIE(NRKBaseIE):
- _VALID_URL = r'(?:nrk:|https?://(?:www\.)?nrk\.no/video/PS\*)(?P<id>\d+)'
+ _VALID_URL = r'''(?x)
+ (?:
+ nrk:|
+ https?://
+ (?:
+ (?:www\.)?nrk\.no/video/PS\*|
+ v8-psapi\.nrk\.no/mediaelement/
+ )
+ )
+ (?P<id>[^/?#&]+)
+ '''
_API_HOST = 'v8.psapi.nrk.no'
_TESTS = [{
# video
@@ -137,6 +147,12 @@ class NRKIE(NRKBaseIE):
'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
'duration': 20,
}
+ }, {
+ 'url': 'nrk:ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://v8-psapi.nrk.no/mediaelement/ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
+ 'only_matching': True,
}]
diff --git a/youtube_dl/extractor/ntvru.py b/youtube_dl/extractor/ntvru.py
index e8702ebcd..7d7a785ab 100644
--- a/youtube_dl/extractor/ntvru.py
+++ b/youtube_dl/extractor/ntvru.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
diff --git a/youtube_dl/extractor/nuevo.py b/youtube_dl/extractor/nuevo.py
index ef093dec2..87fb94d1f 100644
--- a/youtube_dl/extractor/nuevo.py
+++ b/youtube_dl/extractor/nuevo.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
diff --git a/youtube_dl/extractor/nytimes.py b/youtube_dl/extractor/nytimes.py
index 142c34256..2bb77ab24 100644
--- a/youtube_dl/extractor/nytimes.py
+++ b/youtube_dl/extractor/nytimes.py
@@ -1,3 +1,4 @@
+# coding: utf-8
from __future__ import unicode_literals
import hmac
@@ -6,11 +7,13 @@ import base64
from .common import InfoExtractor
from ..utils import (
+ determine_ext,
float_or_none,
int_or_none,
- parse_iso8601,
+ js_to_json,
mimetype2ext,
- determine_ext,
+ parse_iso8601,
+ remove_start,
)
@@ -139,15 +142,82 @@ class NYTimesArticleIE(NYTimesBaseIE):
'uploader': 'Matthew Williams',
}
}, {
+ 'url': 'http://www.nytimes.com/2016/10/14/podcasts/revelations-from-the-final-weeks.html',
+ 'md5': 'e0d52040cafb07662acf3c9132db3575',
+ 'info_dict': {
+ 'id': '100000004709062',
+ 'title': 'The Run-Up: ‘He Was Like an Octopus’',
+ 'ext': 'mp3',
+ 'description': 'md5:fb5c6b93b12efc51649b4847fe066ee4',
+ 'series': 'The Run-Up',
+ 'episode': '‘He Was Like an Octopus’',
+ 'episode_number': 20,
+ 'duration': 2130,
+ }
+ }, {
+ 'url': 'http://www.nytimes.com/2016/10/16/books/review/inside-the-new-york-times-book-review-the-rise-of-hitler.html',
+ 'info_dict': {
+ 'id': '100000004709479',
+ 'title': 'The Rise of Hitler',
+ 'ext': 'mp3',
+ 'description': 'md5:bce877fd9e3444990cb141875fab0028',
+ 'creator': 'Pamela Paul',
+ 'duration': 3475,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
'url': 'http://www.nytimes.com/news/minute/2014/03/17/times-minute-whats-next-in-crimea/?_php=true&_type=blogs&_php=true&_type=blogs&_r=1',
'only_matching': True,
}]
+ def _extract_podcast_from_json(self, json, page_id, webpage):
+ podcast_audio = self._parse_json(
+ json, page_id, transform_source=js_to_json)
+
+ audio_data = podcast_audio['data']
+ track = audio_data['track']
+
+ episode_title = track['title']
+ video_url = track['source']
+
+ description = track.get('description') or self._html_search_meta(
+ ['og:description', 'twitter:description'], webpage)
+
+ podcast_title = audio_data.get('podcast', {}).get('title')
+ title = ('%s: %s' % (podcast_title, episode_title)
+ if podcast_title else episode_title)
+
+ episode = audio_data.get('podcast', {}).get('episode') or ''
+ episode_number = int_or_none(self._search_regex(
+ r'[Ee]pisode\s+(\d+)', episode, 'episode number', default=None))
+
+ return {
+ 'id': remove_start(podcast_audio.get('target'), 'FT') or page_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'creator': track.get('credit'),
+ 'series': podcast_title,
+ 'episode': episode_title,
+ 'episode_number': episode_number,
+ 'duration': int_or_none(track.get('duration')),
+ }
+
def _real_extract(self, url):
- video_id = self._match_id(url)
+ page_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
+ webpage = self._download_webpage(url, page_id)
- video_id = self._html_search_regex(r'data-videoid="(\d+)"', webpage, 'video id')
+ video_id = self._search_regex(
+ r'data-videoid=["\'](\d+)', webpage, 'video id',
+ default=None, fatal=False)
+ if video_id is not None:
+ return self._extract_video_from_id(video_id)
- return self._extract_video_from_id(video_id)
+ podcast_data = self._search_regex(
+ (r'NYTD\.FlexTypes\.push\s*\(\s*({.+?})\s*\)\s*;\s*</script',
+ r'NYTD\.FlexTypes\.push\s*\(\s*({.+})\s*\)\s*;'),
+ webpage, 'podcast data')
+ return self._extract_podcast_from_json(podcast_data, page_id, webpage)
diff --git a/youtube_dl/extractor/nzz.py b/youtube_dl/extractor/nzz.py
new file mode 100644
index 000000000..2d352f53f
--- /dev/null
+++ b/youtube_dl/extractor/nzz.py
@@ -0,0 +1,36 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ extract_attributes,
+)
+
+
+class NZZIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?nzz\.ch/(?:[^/]+/)*[^/?#]+-ld\.(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.nzz.ch/zuerich/gymizyte/gymizyte-schreiben-schueler-heute-noch-diktate-ld.9153',
+ 'info_dict': {
+ 'id': '9153',
+ },
+ 'playlist_mincount': 6,
+ }
+
+ def _real_extract(self, url):
+ page_id = self._match_id(url)
+ webpage = self._download_webpage(url, page_id)
+
+ entries = []
+ for player_element in re.findall(r'(<[^>]+class="kalturaPlayer"[^>]*>)', webpage):
+ player_params = extract_attributes(player_element)
+ if player_params.get('data-type') not in ('kaltura_singleArticle',):
+ self.report_warning('Unsupported player type')
+ continue
+ entry_id = player_params['data-id']
+ entries.append(self.url_result(
+ 'kaltura:1750922:' + entry_id, 'Kaltura', entry_id))
+
+ return self.playlist_result(entries, page_id)
diff --git a/youtube_dl/extractor/oktoberfesttv.py b/youtube_dl/extractor/oktoberfesttv.py
index f2ccc53dc..50fbbc79c 100644
--- a/youtube_dl/extractor/oktoberfesttv.py
+++ b/youtube_dl/extractor/oktoberfesttv.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py
index 6ae30679a..b4cce7ea9 100644
--- a/youtube_dl/extractor/orf.py
+++ b/youtube_dl/extractor/orf.py
@@ -1,28 +1,28 @@
# coding: utf-8
from __future__ import unicode_literals
-import json
import re
import calendar
import datetime
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
HEADRequest,
unified_strdate,
- ExtractorError,
strip_jsonp,
int_or_none,
float_or_none,
determine_ext,
remove_end,
+ unescapeHTML,
)
class ORFTVthekIE(InfoExtractor):
IE_NAME = 'orf:tvthek'
IE_DESC = 'ORF TVthek'
- _VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics?/.+?|program/[^/]+)/(?P<id>\d+)'
+ _VALID_URL = r'https?://tvthek\.orf\.at/(?:[^/]+/)+(?P<id>\d+)'
_TESTS = [{
'url': 'http://tvthek.orf.at/program/Aufgetischt/2745173/Aufgetischt-Mit-der-Steirischen-Tafelrunde/8891389',
@@ -51,26 +51,23 @@ class ORFTVthekIE(InfoExtractor):
'skip_download': True, # rtsp downloads
},
'_skip': 'Blocked outside of Austria / Germany',
+ }, {
+ 'url': 'http://tvthek.orf.at/topic/Fluechtlingskrise/10463081/Heimat-Fremde-Heimat/13879132/Senioren-betreuen-Migrantenkinder/13879141',
+ 'skip_download': True,
+ }, {
+ 'url': 'http://tvthek.orf.at/profile/Universum/35429',
+ 'skip_download': True,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
- data_json = self._search_regex(
- r'initializeAdworx\((.+?)\);\n', webpage, 'video info')
- all_data = json.loads(data_json)
-
- def get_segments(all_data):
- for data in all_data:
- if data['name'] in (
- 'Tracker::EPISODE_DETAIL_PAGE_OVER_PROGRAM',
- 'Tracker::EPISODE_DETAIL_PAGE_OVER_TOPIC'):
- return data['values']['segments']
-
- sdata = get_segments(all_data)
- if not sdata:
- raise ExtractorError('Unable to extract segments')
+ data_jsb = self._parse_json(
+ self._search_regex(
+ r'<div[^>]+class=(["\']).*?VideoPlaylist.*?\1[^>]+data-jsb=(["\'])(?P<json>.+?)\2',
+ webpage, 'playlist', group='json'),
+ playlist_id, transform_source=unescapeHTML)['playlist']['videos']
def quality_to_int(s):
m = re.search('([0-9]+)', s)
@@ -79,8 +76,11 @@ class ORFTVthekIE(InfoExtractor):
return int(m.group(1))
entries = []
- for sd in sdata:
- video_id = sd['id']
+ for sd in data_jsb:
+ video_id, title = sd.get('id'), sd.get('title')
+ if not video_id or not title:
+ continue
+ video_id = compat_str(video_id)
formats = [{
'preference': -10 if fd['delivery'] == 'hls' else None,
'format_id': '%s-%s-%s' % (
@@ -88,7 +88,7 @@ class ORFTVthekIE(InfoExtractor):
'url': fd['src'],
'protocol': fd['protocol'],
'quality': quality_to_int(fd['quality']),
- } for fd in sd['playlist_item_array']['sources']]
+ } for fd in sd['sources']]
# Check for geoblocking.
# There is a property is_geoprotection, but that's always false
@@ -115,14 +115,24 @@ class ORFTVthekIE(InfoExtractor):
self._check_formats(formats, video_id)
self._sort_formats(formats)
- upload_date = unified_strdate(sd['created_date'])
+ subtitles = {}
+ for sub in sd.get('subtitles', []):
+ sub_src = sub.get('src')
+ if not sub_src:
+ continue
+ subtitles.setdefault(sub.get('lang', 'de-AT'), []).append({
+ 'url': sub_src,
+ })
+
+ upload_date = unified_strdate(sd.get('created_date'))
entries.append({
'_type': 'video',
'id': video_id,
- 'title': sd['header'],
+ 'title': title,
'formats': formats,
+ 'subtitles': subtitles,
'description': sd.get('description'),
- 'duration': int(sd['duration_in_seconds']),
+ 'duration': int_or_none(sd.get('duration_in_seconds')),
'upload_date': upload_date,
'thumbnail': sd.get('image_full_url'),
})
diff --git a/youtube_dl/extractor/pandoratv.py b/youtube_dl/extractor/pandoratv.py
index 8d49f5c4a..2b07958bb 100644
--- a/youtube_dl/extractor/pandoratv.py
+++ b/youtube_dl/extractor/pandoratv.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
diff --git a/youtube_dl/extractor/parliamentliveuk.py b/youtube_dl/extractor/parliamentliveuk.py
index 874aacc55..ebdab8db9 100644
--- a/youtube_dl/extractor/parliamentliveuk.py
+++ b/youtube_dl/extractor/parliamentliveuk.py
@@ -6,9 +6,9 @@ from .common import InfoExtractor
class ParliamentLiveUKIE(InfoExtractor):
IE_NAME = 'parliamentlive.tv'
IE_DESC = 'UK parliament videos'
- _VALID_URL = r'https?://(?:www\.)?parliamentlive\.tv/Event/Index/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+ _VALID_URL = r'(?i)https?://(?:www\.)?parliamentlive\.tv/Event/Index/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
- _TEST = {
+ _TESTS = [{
'url': 'http://parliamentlive.tv/Event/Index/c1e9d44d-fd6c-4263-b50f-97ed26cc998b',
'info_dict': {
'id': 'c1e9d44d-fd6c-4263-b50f-97ed26cc998b',
@@ -18,7 +18,10 @@ class ParliamentLiveUKIE(InfoExtractor):
'timestamp': 1422696664,
'upload_date': '20150131',
},
- }
+ }, {
+ 'url': 'http://parliamentlive.tv/event/index/3f24936f-130f-40bf-9a5d-b3d6479da6a4',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
diff --git a/youtube_dl/extractor/patreon.py b/youtube_dl/extractor/patreon.py
index 229750665..a6a2c273f 100644
--- a/youtube_dl/extractor/patreon.py
+++ b/youtube_dl/extractor/patreon.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py
index 61043cad5..0e3623024 100644
--- a/youtube_dl/extractor/periscope.py
+++ b/youtube_dl/extractor/periscope.py
@@ -132,7 +132,7 @@ class PeriscopeUserIE(PeriscopeBaseIE):
user = list(data_store['UserCache']['users'].values())[0]['user']
user_id = user['id']
- session_id = data_store['SessionToken']['broadcastHistory']['token']['session_id']
+ session_id = data_store['SessionToken']['public']['broadcastHistory']['token']['session_id']
broadcasts = self._call_api(
'getUserBroadcastsPublic',
diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py
index ea5caefa9..b66adfc00 100644
--- a/youtube_dl/extractor/pluralsight.py
+++ b/youtube_dl/extractor/pluralsight.py
@@ -28,7 +28,7 @@ class PluralsightBaseIE(InfoExtractor):
class PluralsightIE(PluralsightBaseIE):
IE_NAME = 'pluralsight'
- _VALID_URL = r'https?://(?:(?:www|app)\.)?pluralsight\.com/training/player\?'
+ _VALID_URL = r'https?://(?:(?:www|app)\.)?pluralsight\.com/(?:training/)?player\?'
_LOGIN_URL = 'https://app.pluralsight.com/id/'
_NETRC_MACHINE = 'pluralsight'
@@ -50,6 +50,9 @@ class PluralsightIE(PluralsightBaseIE):
# available without pluralsight account
'url': 'http://app.pluralsight.com/training/player?author=scott-allen&name=angularjs-get-started-m1-introduction&mode=live&clip=0&course=angularjs-get-started',
'only_matching': True,
+ }, {
+ 'url': 'https://app.pluralsight.com/player?course=ccna-intro-networking&author=ross-bagurdes&name=ccna-intro-networking-m06&clip=0',
+ 'only_matching': True,
}]
def _real_initialize(self):
diff --git a/youtube_dl/extractor/porn91.py b/youtube_dl/extractor/porn91.py
index 9894f3262..073fc3e21 100644
--- a/youtube_dl/extractor/porn91.py
+++ b/youtube_dl/extractor/porn91.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
from ..compat import (
diff --git a/youtube_dl/extractor/pornoxo.py b/youtube_dl/extractor/pornoxo.py
index 202f58673..3c9087f2d 100644
--- a/youtube_dl/extractor/pornoxo.py
+++ b/youtube_dl/extractor/pornoxo.py
@@ -2,13 +2,13 @@ from __future__ import unicode_literals
import re
-from .common import InfoExtractor
+from .jwplatform import JWPlatformBaseIE
from ..utils import (
str_to_int,
)
-class PornoXOIE(InfoExtractor):
+class PornoXOIE(JWPlatformBaseIE):
_VALID_URL = r'https?://(?:www\.)?pornoxo\.com/videos/(?P<id>\d+)/(?P<display_id>[^/]+)\.html'
_TEST = {
'url': 'http://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary.html',
@@ -17,7 +17,8 @@ class PornoXOIE(InfoExtractor):
'id': '7564',
'ext': 'flv',
'title': 'Striptease From Sexy Secretary!',
- 'description': 'Striptease From Sexy Secretary!',
+ 'display_id': 'striptease-from-sexy-secretary',
+ 'description': 'md5:0ee35252b685b3883f4a1d38332f9980',
'categories': list, # NSFW
'thumbnail': 're:https?://.*\.jpg$',
'age_limit': 18,
@@ -26,23 +27,14 @@ class PornoXOIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id, display_id = mobj.groups()
webpage = self._download_webpage(url, video_id)
-
- video_url = self._html_search_regex(
- r'\'file\'\s*:\s*"([^"]+)"', webpage, 'video_url')
+ video_data = self._extract_jwplayer_data(webpage, video_id, require_title=False)
title = self._html_search_regex(
r'<title>([^<]+)\s*-\s*PornoXO', webpage, 'title')
- description = self._html_search_regex(
- r'<meta name="description" content="([^"]+)\s*featuring',
- webpage, 'description', fatal=False)
-
- thumbnail = self._html_search_regex(
- r'\'image\'\s*:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
-
view_count = str_to_int(self._html_search_regex(
r'[vV]iews:\s*([0-9,]+)', webpage, 'view count', fatal=False))
@@ -53,13 +45,14 @@ class PornoXOIE(InfoExtractor):
None if categories_str is None
else categories_str.split(','))
- return {
+ video_data.update({
'id': video_id,
- 'url': video_url,
'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
+ 'display_id': display_id,
+ 'description': self._html_search_meta('description', webpage),
'categories': categories,
'view_count': view_count,
'age_limit': 18,
- }
+ })
+
+ return video_data
diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py
index 873d4f981..7cc07a2ad 100644
--- a/youtube_dl/extractor/prosiebensat1.py
+++ b/youtube_dl/extractor/prosiebensat1.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import re
diff --git a/youtube_dl/extractor/puls4.py b/youtube_dl/extractor/puls4.py
index 9c2ccbe2d..1c54af002 100644
--- a/youtube_dl/extractor/puls4.py
+++ b/youtube_dl/extractor/puls4.py
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
from __future__ import unicode_literals
from .prosiebensat1 import ProSiebenSat1BaseIE
diff --git a/youtube_dl/extractor/radiobremen.py b/youtube_dl/extractor/radiobremen.py
index 19a751da0..0aa8d059b 100644
--- a/youtube_dl/extractor/radiobremen.py
+++ b/youtube_dl/extractor/radiobremen.py
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
from __future__ import unicode_literals
diff --git a/youtube_dl/extractor/reverbnation.py b/youtube_dl/extractor/reverbnation.py
index 3c6725aeb..4875009e5 100644
--- a/youtube_dl/extractor/reverbnation.py
+++ b/youtube_dl/extractor/reverbnation.py
@@ -1,29 +1,29 @@
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
-from ..utils import str_or_none
+from ..utils import (
+ qualities,
+ str_or_none,
+)
class ReverbNationIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$'
_TESTS = [{
'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa',
- 'md5': '3da12ebca28c67c111a7f8b262d3f7a7',
+ 'md5': 'c0aaf339bcee189495fdf5a8c8ba8645',
'info_dict': {
'id': '16965047',
'ext': 'mp3',
'title': 'MONA LISA',
'uploader': 'ALKILADOS',
'uploader_id': '216429',
- 'thumbnail': 're:^https://gp1\.wac\.edgecastcdn\.net/.*?\.jpg$'
+ 'thumbnail': 're:^https?://.*\.jpg',
},
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- song_id = mobj.group('id')
+ song_id = self._match_id(url)
api_res = self._download_json(
'https://api.reverbnation.com/song/%s' % song_id,
@@ -31,14 +31,23 @@ class ReverbNationIE(InfoExtractor):
note='Downloading information of song %s' % song_id
)
+ THUMBNAILS = ('thumbnail', 'image')
+ quality = qualities(THUMBNAILS)
+ thumbnails = []
+ for thumb_key in THUMBNAILS:
+ if api_res.get(thumb_key):
+ thumbnails.append({
+ 'url': api_res[thumb_key],
+ 'preference': quality(thumb_key)
+ })
+
return {
'id': song_id,
- 'title': api_res.get('name'),
- 'url': api_res.get('url'),
+ 'title': api_res['name'],
+ 'url': api_res['url'],
'uploader': api_res.get('artist', {}).get('name'),
'uploader_id': str_or_none(api_res.get('artist', {}).get('id')),
- 'thumbnail': self._proto_relative_url(
- api_res.get('image', api_res.get('thumbnail'))),
+ 'thumbnails': thumbnails,
'ext': 'mp3',
'vcodec': 'none',
}
diff --git a/youtube_dl/extractor/rmcdecouverte.py b/youtube_dl/extractor/rmcdecouverte.py
index f3bb4fa66..2340dae53 100644
--- a/youtube_dl/extractor/rmcdecouverte.py
+++ b/youtube_dl/extractor/rmcdecouverte.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
diff --git a/youtube_dl/extractor/rtl2.py b/youtube_dl/extractor/rtl2.py
index de004671d..cb4ee8803 100644
--- a/youtube_dl/extractor/rtl2.py
+++ b/youtube_dl/extractor/rtl2.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import re
diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py
index f1b92f6da..6a43b036e 100644
--- a/youtube_dl/extractor/rtve.py
+++ b/youtube_dl/extractor/rtve.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import base64
diff --git a/youtube_dl/extractor/rudo.py b/youtube_dl/extractor/rudo.py
index 38366b784..9a330c196 100644
--- a/youtube_dl/extractor/rudo.py
+++ b/youtube_dl/extractor/rudo.py
@@ -43,7 +43,7 @@ class RudoIE(JWPlatformBaseIE):
transform_source=lambda s: js_to_json(re.sub(r'encodeURI\([^)]+\)', '""', s)))
info_dict = self._parse_jwplayer_data(
- jwplayer_data, video_id, require_title=False, m3u8_id='hls')
+ jwplayer_data, video_id, require_title=False, m3u8_id='hls', mpd_id='dash')
info_dict.update({
'title': self._og_search_title(webpage),
diff --git a/youtube_dl/extractor/ruhd.py b/youtube_dl/extractor/ruhd.py
index 1f7c26299..ce631b46c 100644
--- a/youtube_dl/extractor/ruhd.py
+++ b/youtube_dl/extractor/ruhd.py
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py
index 5d0ace5bf..fd1df925b 100644
--- a/youtube_dl/extractor/rutube.py
+++ b/youtube_dl/extractor/rutube.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import re
diff --git a/youtube_dl/extractor/rutv.py b/youtube_dl/extractor/rutv.py
index a2379eb04..a5e672c0a 100644
--- a/youtube_dl/extractor/rutv.py
+++ b/youtube_dl/extractor/rutv.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import re
diff --git a/youtube_dl/extractor/ruutu.py b/youtube_dl/extractor/ruutu.py
index ffea438cc..2fce4e81b 100644
--- a/youtube_dl/extractor/ruutu.py
+++ b/youtube_dl/extractor/ruutu.py
@@ -12,7 +12,7 @@ from ..utils import (
class RuutuIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?ruutu\.fi/video/(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?(?:ruutu|supla)\.fi/(?:video|supla)/(?P<id>\d+)'
_TESTS = [
{
'url': 'http://www.ruutu.fi/video/2058907',
@@ -34,12 +34,24 @@ class RuutuIE(InfoExtractor):
'id': '2057306',
'ext': 'mp4',
'title': 'Superpesis: katso koko kausi Ruudussa',
- 'description': 'md5:da2736052fef3b2bd5e0005e63c25eac',
+ 'description': 'md5:bfb7336df2a12dc21d18fa696c9f8f23',
'thumbnail': 're:^https?://.*\.jpg$',
'duration': 40,
'age_limit': 0,
},
},
+ {
+ 'url': 'http://www.supla.fi/supla/2231370',
+ 'md5': 'df14e782d49a2c0df03d3be2a54ef949',
+ 'info_dict': {
+ 'id': '2231370',
+ 'ext': 'mp4',
+ 'title': 'Osa 1: Mikael Jungner',
+ 'description': 'md5:7d90f358c47542e3072ff65d7b1bcffe',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'age_limit': 0,
+ },
+ },
]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py
index eabe41efe..c3aec1edd 100644
--- a/youtube_dl/extractor/safari.py
+++ b/youtube_dl/extractor/safari.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import re
@@ -157,7 +157,14 @@ class SafariCourseIE(SafariBaseIE):
IE_NAME = 'safari:course'
IE_DESC = 'safaribooksonline.com online courses'
- _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)/(?P<id>[^/]+)/?(?:[#?]|$)'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)|
+ techbus\.safaribooksonline\.com
+ )
+ /(?P<id>[^/]+)/?(?:[#?]|$)
+ '''
_TESTS = [{
'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/',
@@ -170,6 +177,9 @@ class SafariCourseIE(SafariBaseIE):
}, {
'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json',
'only_matching': True,
+ }, {
+ 'url': 'http://techbus.safaribooksonline.com/9780134426365',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/sapo.py b/youtube_dl/extractor/sapo.py
index 172cc1275..49a9b313a 100644
--- a/youtube_dl/extractor/sapo.py
+++ b/youtube_dl/extractor/sapo.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import re
diff --git a/youtube_dl/extractor/sbs.py b/youtube_dl/extractor/sbs.py
index 96472fbc4..43131fb7e 100644
--- a/youtube_dl/extractor/sbs.py
+++ b/youtube_dl/extractor/sbs.py
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
diff --git a/youtube_dl/extractor/screencast.py b/youtube_dl/extractor/screencast.py
index 356631700..ed9de9648 100644
--- a/youtube_dl/extractor/screencast.py
+++ b/youtube_dl/extractor/screencast.py
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
diff --git a/youtube_dl/extractor/screenwavemedia.py b/youtube_dl/extractor/screenwavemedia.py
index 40333c825..7d77e8825 100644
--- a/youtube_dl/extractor/screenwavemedia.py
+++ b/youtube_dl/extractor/screenwavemedia.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import re
diff --git a/youtube_dl/extractor/slutload.py b/youtube_dl/extractor/slutload.py
index 7efb29f65..18cc7721e 100644
--- a/youtube_dl/extractor/slutload.py
+++ b/youtube_dl/extractor/slutload.py
@@ -1,7 +1,5 @@
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
@@ -9,7 +7,7 @@ class SlutloadIE(InfoExtractor):
_VALID_URL = r'^https?://(?:\w+\.)?slutload\.com/video/[^/]+/(?P<id>[^/]+)/?$'
_TEST = {
'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/',
- 'md5': '0cf531ae8006b530bd9df947a6a0df77',
+ 'md5': '868309628ba00fd488cf516a113fd717',
'info_dict': {
'id': 'TD73btpBqSxc',
'ext': 'mp4',
@@ -20,9 +18,7 @@ class SlutloadIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_title = self._html_search_regex(r'<h1><strong>([^<]+)</strong>',
diff --git a/youtube_dl/extractor/smotri.py b/youtube_dl/extractor/smotri.py
index 114358786..def46abda 100644
--- a/youtube_dl/extractor/smotri.py
+++ b/youtube_dl/extractor/smotri.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import re
diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py
index 48e2ba2dd..30760ca06 100644
--- a/youtube_dl/extractor/sohu.py
+++ b/youtube_dl/extractor/sohu.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import re
diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py
index 1a8114aa7..3b7ecb3c3 100644
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import re
diff --git a/youtube_dl/extractor/southpark.py b/youtube_dl/extractor/southpark.py
index e2a9e45ac..08f8c5744 100644
--- a/youtube_dl/extractor/southpark.py
+++ b/youtube_dl/extractor/southpark.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
from .mtv import MTVServicesInfoExtractor
diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py
index b41d9f59f..ec1b60388 100644
--- a/youtube_dl/extractor/spiegel.py
+++ b/youtube_dl/extractor/spiegel.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import re
diff --git a/youtube_dl/extractor/srmediathek.py b/youtube_dl/extractor/srmediathek.py
index 409d50304..b03272f7a 100644
--- a/youtube_dl/extractor/srmediathek.py
+++ b/youtube_dl/extractor/srmediathek.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
from .ard import ARDMediathekIE
diff --git a/youtube_dl/extractor/streamable.py b/youtube_dl/extractor/streamable.py
index 1c61437a4..2c26fa689 100644
--- a/youtube_dl/extractor/streamable.py
+++ b/youtube_dl/extractor/streamable.py
@@ -1,6 +1,8 @@
# coding: utf-8
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
from ..utils import (
ExtractorError,
@@ -48,6 +50,14 @@ class StreamableIE(InfoExtractor):
}
]
+ @staticmethod
+ def _extract_url(webpage):
+ mobj = re.search(
+ r'<iframe[^>]+src=(?P<q1>[\'"])(?P<src>(?:https?:)?//streamable\.com/(?:(?!\1).+))(?P=q1)',
+ webpage)
+ if mobj:
+ return mobj.group('src')
+
def _real_extract(self, url):
video_id = self._match_id(url)
diff --git a/youtube_dl/extractor/streamcz.py b/youtube_dl/extractor/streamcz.py
index d3d2b7eb7..9e533103c 100644
--- a/youtube_dl/extractor/streamcz.py
+++ b/youtube_dl/extractor/streamcz.py
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
from __future__ import unicode_literals
import hashlib
diff --git a/youtube_dl/extractor/swrmediathek.py b/youtube_dl/extractor/swrmediathek.py
index 58073eefe..6d69f7686 100644
--- a/youtube_dl/extractor/swrmediathek.py
+++ b/youtube_dl/extractor/swrmediathek.py
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
from __future__ import unicode_literals
import re
diff --git a/youtube_dl/extractor/sztvhu.py b/youtube_dl/extractor/sztvhu.py
index f562aa6d3..cfad33146 100644
--- a/youtube_dl/extractor/sztvhu.py
+++ b/youtube_dl/extractor/sztvhu.py
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
diff --git a/youtube_dl/extractor/tagesschau.py b/youtube_dl/extractor/tagesschau.py
index 136e18f96..8670cee28 100644
--- a/youtube_dl/extractor/tagesschau.py
+++ b/youtube_dl/extractor/tagesschau.py
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
from __future__ import unicode_literals
import re
diff --git a/youtube_dl/extractor/tass.py b/youtube_dl/extractor/tass.py
index c4ef70778..5293393ef 100644
--- a/youtube_dl/extractor/tass.py
+++ b/youtube_dl/extractor/tass.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import json
diff --git a/youtube_dl/extractor/teachertube.py b/youtube_dl/extractor/teachertube.py
index 82675431f..df5d5556f 100644
--- a/youtube_dl/extractor/teachertube.py
+++ b/youtube_dl/extractor/teachertube.py
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
from __future__ import unicode_literals
import re
diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py
index 79a778920..75346393b 100644
--- a/youtube_dl/extractor/teamcoco.py
+++ b/youtube_dl/extractor/teamcoco.py
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
from __future__ import unicode_literals
import base64
diff --git a/youtube_dl/extractor/techtalks.py b/youtube_dl/extractor/techtalks.py
index 16e945d8e..a5b62c717 100644
--- a/youtube_dl/extractor/techtalks.py
+++ b/youtube_dl/extractor/techtalks.py
@@ -10,9 +10,9 @@ from ..utils import (
class TechTalksIE(InfoExtractor):
- _VALID_URL = r'https?://techtalks\.tv/talks/[^/]*/(?P<id>\d+)/'
+ _VALID_URL = r'https?://techtalks\.tv/talks/(?:[^/]+/)?(?P<id>\d+)'
- _TEST = {
+ _TESTS = [{
'url': 'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/',
'info_dict': {
'id': '57758',
@@ -38,7 +38,10 @@ class TechTalksIE(InfoExtractor):
# rtmp download
'skip_download': True,
},
- }
+ }, {
+ 'url': 'http://techtalks.tv/talks/57758',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/theintercept.py b/youtube_dl/extractor/theintercept.py
index ec6f4ecaa..f23b58713 100644
--- a/youtube_dl/extractor/theintercept.py
+++ b/youtube_dl/extractor/theintercept.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py
index 6febf805b..cfbf7f4e1 100644
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
from __future__ import unicode_literals
import re
diff --git a/youtube_dl/extractor/theweatherchannel.py b/youtube_dl/extractor/theweatherchannel.py
new file mode 100644
index 000000000..c34a49d03
--- /dev/null
+++ b/youtube_dl/extractor/theweatherchannel.py
@@ -0,0 +1,79 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .theplatform import ThePlatformIE
+from ..utils import (
+ determine_ext,
+ parse_duration,
+)
+
+
+class TheWeatherChannelIE(ThePlatformIE):
+ _VALID_URL = r'https?://(?:www\.)?weather\.com/(?:[^/]+/)*video/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://weather.com/series/great-outdoors/video/ice-climber-is-in-for-a-shock',
+ 'md5': 'ab924ac9574e79689c24c6b95e957def',
+ 'info_dict': {
+ 'id': 'cc82397e-cc3f-4d11-9390-a785add090e8',
+ 'ext': 'mp4',
+ 'title': 'Ice Climber Is In For A Shock',
+ 'description': 'md5:55606ce1378d4c72e6545e160c9d9695',
+ 'uploader': 'TWC - Digital (No Distro)',
+ 'uploader_id': '6ccd5455-16bb-46f2-9c57-ff858bb9f62c',
+ }
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ drupal_settings = self._parse_json(self._search_regex(
+ r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
+ webpage, 'drupal settings'), display_id)
+ video_id = drupal_settings['twc']['contexts']['node']['uuid']
+ video_data = self._download_json(
+ 'https://dsx.weather.com/cms/v4/asset-collection/en_US/' + video_id, video_id)
+ seo_meta = video_data.get('seometa', {})
+ title = video_data.get('title') or seo_meta['title']
+
+ urls = []
+ thumbnails = []
+ formats = []
+ for variant_id, variant_url in video_data.get('variants', []).items():
+ variant_url = variant_url.strip()
+ if not variant_url or variant_url in urls:
+ continue
+ urls.append(variant_url)
+ ext = determine_ext(variant_url)
+ if ext == 'jpg':
+ thumbnails.append({
+ 'url': variant_url,
+ 'id': variant_id,
+ })
+ elif ThePlatformIE.suitable(variant_url):
+ tp_formats, _ = self._extract_theplatform_smil(variant_url, video_id)
+ formats.extend(tp_formats)
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ variant_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id=variant_id, fatal=False))
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ variant_url, video_id, f4m_id=variant_id, fatal=False))
+ else:
+ formats.append({
+ 'url': variant_url,
+ 'format_id': variant_id,
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': video_data.get('description') or seo_meta.get('description') or seo_meta.get('og:description'),
+ 'duration': parse_duration(video_data.get('duration')),
+ 'uploader': video_data.get('providername'),
+ 'uploader_id': video_data.get('providerid'),
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/thisoldhouse.py b/youtube_dl/extractor/thisoldhouse.py
new file mode 100644
index 000000000..7629f0d10
--- /dev/null
+++ b/youtube_dl/extractor/thisoldhouse.py
@@ -0,0 +1,32 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class ThisOldHouseIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to)/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://www.thisoldhouse.com/how-to/how-to-build-storage-bench',
+ 'md5': '568acf9ca25a639f0c4ff905826b662f',
+ 'info_dict': {
+ 'id': '2REGtUDQ',
+ 'ext': 'mp4',
+ 'title': 'How to Build a Storage Bench',
+ 'description': 'In the workshop, Tom Silva and Kevin O\'Connor build a storage bench for an entryway.',
+ 'timestamp': 1442548800,
+ 'upload_date': '20150918',
+ }
+ }, {
+ 'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ drupal_settings = self._parse_json(self._search_regex(
+ r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
+ webpage, 'drupal settings'), display_id)
+ video_id = drupal_settings['jwplatform']['video_id']
+ return self.url_result('jwplatform:' + video_id, 'JWPlatform', video_id)
diff --git a/youtube_dl/extractor/tlc.py b/youtube_dl/extractor/tlc.py
index ce4f91f46..fd145ba42 100644
--- a/youtube_dl/extractor/tlc.py
+++ b/youtube_dl/extractor/tlc.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import re
diff --git a/youtube_dl/extractor/tonline.py b/youtube_dl/extractor/tonline.py
new file mode 100644
index 000000000..cc11eae2a
--- /dev/null
+++ b/youtube_dl/extractor/tonline.py
@@ -0,0 +1,59 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class TOnlineIE(InfoExtractor):
+ IE_NAME = 't-online.de'
+ _VALID_URL = r'https?://(?:www\.)?t-online\.de/tv/(?:[^/]+/)*id_(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.t-online.de/tv/sport/fussball/id_79166266/drittes-remis-zidane-es-muss-etwas-passieren-.html',
+ 'md5': '7d94dbdde5f9d77c5accc73c39632c29',
+ 'info_dict': {
+ 'id': '79166266',
+ 'ext': 'mp4',
+ 'title': 'Drittes Remis! Zidane: "Es muss etwas passieren"',
+ 'description': 'Es läuft nicht rund bei Real Madrid. Das 1:1 gegen den SD Eibar war das dritte Unentschieden in Folge in der Liga.',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_data = self._download_json(
+ 'http://www.t-online.de/tv/id_%s/tid_json_video' % video_id, video_id)
+ title = video_data['subtitle']
+
+ formats = []
+ for asset in video_data.get('assets', []):
+ asset_source = asset.get('source') or asset.get('source2')
+ if not asset_source:
+ continue
+ formats_id = []
+ for field_key in ('type', 'profile'):
+ field_value = asset.get(field_key)
+ if field_value:
+ formats_id.append(field_value)
+ formats.append({
+ 'format_id': '-'.join(formats_id),
+ 'url': asset_source,
+ })
+
+ thumbnails = []
+ for image in video_data.get('images', []):
+ image_source = image.get('source')
+ if not image_source:
+ continue
+ thumbnails.append({
+ 'url': image_source,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video_data.get('description'),
+ 'duration': int_or_none(video_data.get('duration')),
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/toypics.py b/youtube_dl/extractor/toypics.py
index 2579ba8c6..938e05076 100644
--- a/youtube_dl/extractor/toypics.py
+++ b/youtube_dl/extractor/toypics.py
@@ -1,4 +1,4 @@
-# -*- coding:utf-8 -*-
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
diff --git a/youtube_dl/extractor/tumblr.py b/youtube_dl/extractor/tumblr.py
index 4d8b57111..ebe411e12 100644
--- a/youtube_dl/extractor/tumblr.py
+++ b/youtube_dl/extractor/tumblr.py
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
from __future__ import unicode_literals
import re
diff --git a/youtube_dl/extractor/tv2.py b/youtube_dl/extractor/tv2.py
index f225ec684..bd28267b0 100644
--- a/youtube_dl/extractor/tv2.py
+++ b/youtube_dl/extractor/tv2.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import re
diff --git a/youtube_dl/extractor/tvigle.py b/youtube_dl/extractor/tvigle.py
index ead4c00c7..f3817ab28 100644
--- a/youtube_dl/extractor/tvigle.py
+++ b/youtube_dl/extractor/tvigle.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import re
diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py
index bc352391e..46c2cfe7b 100644
--- a/youtube_dl/extractor/twitch.py
+++ b/youtube_dl/extractor/twitch.py
@@ -247,6 +247,7 @@ class TwitchVodIE(TwitchItemBaseIE):
# m3u8 download
'skip_download': True,
},
+ 'skip': 'HTTP Error 404: Not Found',
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/urplay.py b/youtube_dl/extractor/urplay.py
index ce3bf6b02..8e6fd4731 100644
--- a/youtube_dl/extractor/urplay.py
+++ b/youtube_dl/extractor/urplay.py
@@ -5,17 +5,20 @@ from .common import InfoExtractor
class URPlayIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?urplay\.se/program/(?P<id>[0-9]+)'
- _TEST = {
+ _VALID_URL = r'https?://(?:www\.)?ur(?:play|skola)\.se/(?:program|Produkter)/(?P<id>[0-9]+)'
+ _TESTS = [{
'url': 'http://urplay.se/program/190031-tripp-trapp-trad-sovkudde',
- 'md5': '15ca67b63fd8fb320ac2bcd854bad7b6',
+ 'md5': 'ad5f0de86f16ca4c8062cd103959a9eb',
'info_dict': {
'id': '190031',
'ext': 'mp4',
'title': 'Tripp, Trapp, Träd : Sovkudde',
'description': 'md5:b86bffdae04a7e9379d1d7e5947df1d1',
- }
- }
+ },
+ }, {
+ 'url': 'http://urskola.se/Produkter/155794-Smasagor-meankieli-Grodan-i-vida-varlden',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -27,30 +30,17 @@ class URPlayIE(InfoExtractor):
formats = []
for quality_attr, quality, preference in (('', 'sd', 0), ('_hd', 'hd', 1)):
- file_rtmp = urplayer_data.get('file_rtmp' + quality_attr)
- if file_rtmp:
- formats.append({
- 'url': 'rtmp://%s/urplay/mp4:%s' % (host, file_rtmp),
- 'format_id': quality + '-rtmp',
- 'ext': 'flv',
- 'preference': preference,
- })
file_http = urplayer_data.get('file_http' + quality_attr) or urplayer_data.get('file_http_sub' + quality_attr)
if file_http:
- file_http_base_url = 'http://%s/%s' % (host, file_http)
- formats.extend(self._extract_f4m_formats(
- file_http_base_url + 'manifest.f4m', video_id,
- preference, '%s-hds' % quality, fatal=False))
- formats.extend(self._extract_m3u8_formats(
- file_http_base_url + 'playlist.m3u8', video_id, 'mp4',
- 'm3u8_native', preference, '%s-hls' % quality, fatal=False))
+ formats.extend(self._extract_wowza_formats(
+ 'http://%s/%splaylist.m3u8' % (host, file_http), video_id, skip_protocols=['rtmp', 'rtsp']))
self._sort_formats(formats)
subtitles = {}
for subtitle in urplayer_data.get('subtitles', []):
subtitle_url = subtitle.get('file')
kind = subtitle.get('kind')
- if subtitle_url or kind and kind != 'captions':
+ if not subtitle_url or (kind and kind != 'captions'):
continue
subtitles.setdefault(subtitle.get('label', 'Svenska'), []).append({
'url': subtitle_url,
diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py
index e17988573..a1e0851b7 100644
--- a/youtube_dl/extractor/vbox7.py
+++ b/youtube_dl/extractor/vbox7.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import re
diff --git a/youtube_dl/extractor/vesti.py b/youtube_dl/extractor/vesti.py
index cb64ae0bd..5ab716880 100644
--- a/youtube_dl/extractor/vesti.py
+++ b/youtube_dl/extractor/vesti.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import re
diff --git a/youtube_dl/extractor/videomore.py b/youtube_dl/extractor/videomore.py
index 8a11ff848..7f2566586 100644
--- a/youtube_dl/extractor/videomore.py
+++ b/youtube_dl/extractor/videomore.py
@@ -86,6 +86,11 @@ class VideomoreIE(InfoExtractor):
mobj = re.search(
r'<object[^>]+data=(["\'])https?://videomore\.ru/player\.swf\?.*config=(?P<url>https?://videomore\.ru/(?:[^/]+/)+\d+\.xml).*\1',
webpage)
+ if not mobj:
+ mobj = re.search(
+ r'<iframe[^>]+src=([\'"])(?P<url>https?://videomore\.ru/embed/\d+)',
+ webpage)
+
if mobj:
return mobj.group('url')
diff --git a/youtube_dl/extractor/vidzi.py b/youtube_dl/extractor/vidzi.py
index d49cc6cbc..9950c62ad 100644
--- a/youtube_dl/extractor/vidzi.py
+++ b/youtube_dl/extractor/vidzi.py
@@ -1,10 +1,14 @@
# coding: utf-8
from __future__ import unicode_literals
+import re
+
from .jwplatform import JWPlatformBaseIE
from ..utils import (
decode_packed_codes,
js_to_json,
+ NO_DEFAULT,
+ PACKED_CODES_RE,
)
@@ -35,10 +39,17 @@ class VidziIE(JWPlatformBaseIE):
title = self._html_search_regex(
r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title')
- code = decode_packed_codes(webpage).replace('\\\'', '\'')
- jwplayer_data = self._parse_json(
- self._search_regex(r'setup\(([^)]+)\)', code, 'jwplayer data'),
- video_id, transform_source=js_to_json)
+ packed_codes = [mobj.group(0) for mobj in re.finditer(
+ PACKED_CODES_RE, webpage)]
+ for num, pc in enumerate(packed_codes, 1):
+ code = decode_packed_codes(pc).replace('\\\'', '\'')
+ jwplayer_data = self._parse_json(
+ self._search_regex(
+ r'setup\(([^)]+)\)', code, 'jwplayer data',
+ default=NO_DEFAULT if num == len(packed_codes) else '{}'),
+ video_id, transform_source=js_to_json)
+ if jwplayer_data:
+ break
info_dict = self._parse_jwplayer_data(jwplayer_data, video_id, require_title=False)
info_dict['title'] = title
diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py
index dc142a245..d26fb49b3 100644
--- a/youtube_dl/extractor/vier.py
+++ b/youtube_dl/extractor/vier.py
@@ -49,7 +49,7 @@ class VierIE(InfoExtractor):
webpage, 'filename')
playlist_url = 'http://vod.streamcloud.be/%s/_definst_/mp4:%s.mp4/playlist.m3u8' % (application, filename)
- formats = self._extract_wowza_formats(playlist_url, display_id)
+ formats = self._extract_wowza_formats(playlist_url, display_id, skip_protocols=['dash'])
self._sort_formats(formats)
title = self._og_search_title(webpage, default=display_id)
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index 50aacc6ac..b566241cc 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import json
@@ -355,23 +355,28 @@ class VimeoIE(VimeoBaseInfoExtractor):
return smuggle_url(url, {'http_headers': {'Referer': referrer_url}})
@staticmethod
- def _extract_vimeo_url(url, webpage):
+ def _extract_urls(url, webpage):
+ urls = []
# Look for embedded (iframe) Vimeo player
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
- if mobj:
- player_url = unescapeHTML(mobj.group('url'))
- return VimeoIE._smuggle_referrer(player_url, url)
- # Look for embedded (swf embed) Vimeo player
- mobj = re.search(
- r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
- if mobj:
- return mobj.group(1)
- # Look more for non-standard embedded Vimeo player
- mobj = re.search(
- r'<video[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/[0-9]+)(?P=q1)', webpage)
- if mobj:
- return mobj.group('url')
+ for mobj in re.finditer(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1',
+ webpage):
+ urls.append(VimeoIE._smuggle_referrer(unescapeHTML(mobj.group('url')), url))
+ PLAIN_EMBED_RE = (
+ # Look for embedded (swf embed) Vimeo player
+ r'<embed[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)\1',
+ # Look more for non-standard embedded Vimeo player
+ r'<video[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/[0-9]+)\1',
+ )
+ for embed_re in PLAIN_EMBED_RE:
+ for mobj in re.finditer(embed_re, webpage):
+ urls.append(mobj.group('url'))
+ return urls
+
+ @staticmethod
+ def _extract_url(url, webpage):
+ urls = VimeoIE._extract_urls(url, webpage)
+ return urls[0] if urls else None
def _verify_player_video_password(self, url, video_id):
password = self._downloader.params.get('videopassword')
@@ -832,6 +837,7 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
'params': {
'videopassword': 'holygrail',
},
+ 'skip': 'video gone',
}]
def _real_initialize(self):
@@ -839,9 +845,10 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
def _get_config_url(self, webpage_url, video_id, video_password_verified=False):
webpage = self._download_webpage(webpage_url, video_id)
- config_url = self._html_search_regex(
- r'data-config-url="([^"]+)"', webpage, 'config URL',
- default=NO_DEFAULT if video_password_verified else None)
+ data = self._parse_json(self._search_regex(
+ r'window\s*=\s*_extend\(window,\s*({.+?})\);', webpage, 'data',
+ default=NO_DEFAULT if video_password_verified else '{}'), video_id)
+ config_url = data.get('vimeo_esi', {}).get('config', {}).get('configUrl')
if config_url is None:
self._verify_video_password(webpage_url, video_id, webpage)
config_url = self._get_config_url(
diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py
index 58799d413..df43ba867 100644
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
import collections
@@ -341,7 +341,7 @@ class VKIE(VKBaseIE):
if youtube_url:
return self.url_result(youtube_url, 'Youtube')
- vimeo_url = VimeoIE._extract_vimeo_url(url, info_page)
+ vimeo_url = VimeoIE._extract_url(url, info_page)
if vimeo_url is not None:
return self.url_result(vimeo_url)
diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py
index a938a4007..c85b474d2 100644
--- a/youtube_dl/extractor/vodlocker.py
+++ b/youtube_dl/extractor/vodlocker.py
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py
index 390f9e830..f7e6360a3 100644
--- a/youtube_dl/extractor/wdr.py
+++ b/youtube_dl/extractor/wdr.py
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
from __future__ import unicode_literals
import re
diff --git a/youtube_dl/extractor/wrzuta.py b/youtube_dl/extractor/wrzuta.py
index bdd7097ba..0f53f1bcb 100644
--- a/youtube_dl/extractor/wrzuta.py
+++ b/youtube_dl/extractor/wrzuta.py
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
from __future__ import unicode_literals
import re
diff --git a/youtube_dl/extractor/wsj.py b/youtube_dl/extractor/wsj.py
index a83e68b17..deb7483ae 100644
--- a/youtube_dl/extractor/wsj.py
+++ b/youtube_dl/extractor/wsj.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
diff --git a/youtube_dl/extractor/xboxclips.py b/youtube_dl/extractor/xboxclips.py
index b113ab1c4..d9c277bc3 100644
--- a/youtube_dl/extractor/xboxclips.py
+++ b/youtube_dl/extractor/xboxclips.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
diff --git a/youtube_dl/extractor/xnxx.py b/youtube_dl/extractor/xnxx.py
index bcb140305..e0a6255dc 100644
--- a/youtube_dl/extractor/xnxx.py
+++ b/youtube_dl/extractor/xnxx.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
diff --git a/youtube_dl/extractor/xuite.py b/youtube_dl/extractor/xuite.py
index a66daee46..4b9c1ee9c 100644
--- a/youtube_dl/extractor/xuite.py
+++ b/youtube_dl/extractor/xuite.py
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
from __future__ import unicode_literals
import base64
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index f86823112..545246bcd 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1867,7 +1867,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
'title': 'Uploads from Interstellar Movie',
'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
},
- 'playlist_mincout': 21,
+ 'playlist_mincount': 21,
}, {
# Playlist URL that does not actually serve a playlist
'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
@@ -1891,6 +1891,27 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
},
'add_ie': [YoutubeIE.ie_key()],
}, {
+ 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
+ 'info_dict': {
+ 'id': 'yeWKywCrFtk',
+ 'ext': 'mp4',
+ 'title': 'Small Scale Baler and Braiding Rugs',
+ 'uploader': 'Backus-Page House Museum',
+ 'uploader_id': 'backuspagemuseum',
+ 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
+ 'upload_date': '20161008',
+ 'license': 'Standard YouTube License',
+ 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
+ 'categories': ['Nonprofits & Activism'],
+ 'tags': list,
+ 'like_count': int,
+ 'dislike_count': int,
+ },
+ 'params': {
+ 'noplaylist': True,
+ 'skip_download': True,
+ },
+ }, {
'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
'only_matching': True,
}]
@@ -1971,8 +1992,10 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
def _check_download_just_video(self, url, playlist_id):
# Check if it's a video-specific URL
query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
- if 'v' in query_dict:
- video_id = query_dict['v'][0]
+ video_id = query_dict.get('v', [None])[0] or self._search_regex(
+ r'(?:^|//)youtu\.be/([0-9A-Za-z_-]{11})', url,
+ 'video id', default=None)
+ if video_id:
if self._downloader.params.get('noplaylist'):
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
@@ -2152,7 +2175,7 @@ class YoutubeUserIE(YoutubeChannelIE):
class YoutubeLiveIE(YoutubeBaseInfoExtractor):
IE_DESC = 'YouTube.com live streams'
- _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+))/live'
+ _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:user|channel|c)/(?P<id>[^/]+))/live'
IE_NAME = 'youtube:live'
_TESTS = [{
@@ -2178,6 +2201,9 @@ class YoutubeLiveIE(YoutubeBaseInfoExtractor):
}, {
'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/zingmp3.py b/youtube_dl/extractor/zingmp3.py
index bd708b42c..0f0e9d0eb 100644
--- a/youtube_dl/extractor/zingmp3.py
+++ b/youtube_dl/extractor/zingmp3.py
@@ -1,4 +1,4 @@
-# coding=utf-8
+# coding: utf-8
from __future__ import unicode_literals
import re
diff --git a/youtube_dl/postprocessor/embedthumbnail.py b/youtube_dl/postprocessor/embedthumbnail.py
index 3bad5a266..2e4789eb2 100644
--- a/youtube_dl/postprocessor/embedthumbnail.py
+++ b/youtube_dl/postprocessor/embedthumbnail.py
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
from __future__ import unicode_literals
diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py
index 8d1214ee2..1881f4849 100644
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -279,6 +279,9 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
prefix, sep, ext = path.rpartition('.') # not os.path.splitext, since the latter does not work on unicode in all setups
new_path = prefix + sep + extension
+ information['filepath'] = new_path
+ information['ext'] = extension
+
# If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
if (new_path == path or
(self._nopostoverwrites and os.path.exists(encodeFilename(new_path)))):
@@ -300,9 +303,6 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
new_path, time.time(), information['filetime'],
errnote='Cannot update utime of audio file')
- information['filepath'] = new_path
- information['ext'] = extension
-
return [path], information
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index d2dfa8013..28941673f 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1,5 +1,5 @@
#!/usr/bin/env python
-# -*- coding: utf-8 -*-
+# coding: utf-8
from __future__ import unicode_literals
@@ -165,6 +165,8 @@ DATE_FORMATS_MONTH_FIRST.extend([
'%m/%d/%Y %H:%M:%S',
])
+PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
+
def preferredencoding():
"""Get preferred encoding.
@@ -782,6 +784,7 @@ class XAttrMetadataError(Exception):
def __init__(self, code=None, msg='Unknown error'):
super(XAttrMetadataError, self).__init__(msg)
self.code = code
+ self.msg = msg
# Parsing code and msg
if (self.code in (errno.ENOSPC, errno.EDQUOT) or
@@ -3016,9 +3019,7 @@ def encode_base_n(num, n, table=None):
def decode_packed_codes(code):
- mobj = re.search(
- r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)",
- code)
+ mobj = re.search(PACKED_CODES_RE, code)
obfucasted_code, base, count, symbols = mobj.groups()
base = int(base)
count = int(count)
@@ -3161,20 +3162,25 @@ def write_xattr(path, key, value):
# try the pyxattr module...
import xattr
- # Unicode arguments are not supported in python-pyxattr until
- # version 0.5.0
- # See https://github.com/rg3/youtube-dl/issues/5498
- pyxattr_required_version = '0.5.0'
- if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
- # TODO: fallback to CLI tools
- raise XAttrUnavailableError(
- 'python-pyxattr is detected but is too old. '
- 'youtube-dl requires %s or above while your version is %s. '
- 'Falling back to other xattr implementations' % (
- pyxattr_required_version, xattr.__version__))
+ if hasattr(xattr, 'set'): # pyxattr
+ # Unicode arguments are not supported in python-pyxattr until
+ # version 0.5.0
+ # See https://github.com/rg3/youtube-dl/issues/5498
+ pyxattr_required_version = '0.5.0'
+ if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
+ # TODO: fallback to CLI tools
+ raise XAttrUnavailableError(
+ 'python-pyxattr is detected but is too old. '
+ 'youtube-dl requires %s or above while your version is %s. '
+ 'Falling back to other xattr implementations' % (
+ pyxattr_required_version, xattr.__version__))
+
+ setxattr = xattr.set
+ else: # xattr
+ setxattr = xattr.setxattr
try:
- xattr.set(path, key, value)
+ setxattr(path, key, value)
except EnvironmentError as e:
raise XAttrMetadataError(e.errno, e.strerror)
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index af0c2cfc4..b883dbdff 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
-__version__ = '2016.09.27'
+__version__ = '2016.10.19'