aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--AUTHORS1
-rw-r--r--README.md2
-rw-r--r--docs/supportedsites.md8
-rw-r--r--test/test_YoutubeDL.py3
-rwxr-xr-xyoutube_dl/YoutubeDL.py45
-rw-r--r--youtube_dl/downloader/fragment.py3
-rw-r--r--youtube_dl/extractor/__init__.py11
-rw-r--r--youtube_dl/extractor/aol.py70
-rw-r--r--youtube_dl/extractor/arte.py11
-rw-r--r--youtube_dl/extractor/cinemassacre.py24
-rw-r--r--youtube_dl/extractor/cnet.py6
-rw-r--r--youtube_dl/extractor/common.py2
-rw-r--r--youtube_dl/extractor/elpais.py31
-rw-r--r--youtube_dl/extractor/engadget.py25
-rw-r--r--youtube_dl/extractor/facebook.py98
-rw-r--r--youtube_dl/extractor/fivemin.py51
-rw-r--r--youtube_dl/extractor/freespeech.py2
-rw-r--r--youtube_dl/extractor/iqiyi.py2
-rw-r--r--youtube_dl/extractor/jeuxvideo.py2
-rw-r--r--youtube_dl/extractor/khanacademy.py4
-rw-r--r--youtube_dl/extractor/livestream.py6
-rw-r--r--youtube_dl/extractor/mit.py2
-rw-r--r--youtube_dl/extractor/pyvideo.py6
-rw-r--r--youtube_dl/extractor/revision3.py97
-rw-r--r--youtube_dl/extractor/safari.py5
-rw-r--r--youtube_dl/extractor/sexu.py25
-rw-r--r--youtube_dl/extractor/ted.py2
-rw-r--r--youtube_dl/extractor/twitch.py2
-rw-r--r--youtube_dl/extractor/vgtv.py17
-rw-r--r--youtube_dl/extractor/viki.py6
-rw-r--r--youtube_dl/extractor/vimeo.py66
-rw-r--r--youtube_dl/extractor/vk.py4
-rw-r--r--youtube_dl/extractor/wimp.py2
-rw-r--r--youtube_dl/postprocessor/__init__.py2
-rw-r--r--youtube_dl/postprocessor/ffmpeg.py19
-rw-r--r--youtube_dl/version.py2
36 files changed, 420 insertions, 244 deletions
diff --git a/AUTHORS b/AUTHORS
index b51e23f2d..b6b47ac57 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -161,3 +161,4 @@ Jens Wille
Robin Houtevelts
Patrick Griffis
Aidan Rowe
+mutantmonkey
diff --git a/README.md b/README.md
index d66804a7a..a774e73e7 100644
--- a/README.md
+++ b/README.md
@@ -80,6 +80,8 @@ which means you can modify it, redistribute it or use it however you like.
on Windows)
--flat-playlist Do not extract the videos of a playlist,
only list them.
+ --mark-watched Mark videos watched (YouTube only)
+ --no-mark-watched Do not mark videos watched (YouTube only)
--no-color Do not emit color codes in output
## Network Options:
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index 43403233d..0682e4af0 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -194,6 +194,7 @@
- **faz.net**
- **fc2**
- **Fczenit**
+ - **features.aol.com**
- **fernsehkritik.tv**
- **Firstpost**
- **FiveTV**
@@ -293,6 +294,7 @@
- **kontrtube**: KontrTube.ru - Труба зовёт
- **KrasView**: Красвью
- **Ku6**
+ - **KUSI**
- **kuwo:album**: 酷我音乐 - 专辑
- **kuwo:category**: 酷我音乐 - 分类
- **kuwo:chart**: 酷我音乐 - 排行榜
@@ -301,12 +303,11 @@
- **kuwo:song**: 酷我音乐
- **la7.tv**
- **Laola1Tv**
+ - **Le**: 乐视网
- **Lecture2Go**
- **Lemonde**
- - **Letv**: 乐视网
+ - **LePlaylist**
- **LetvCloud**: 乐视云
- - **LetvPlaylist**
- - **LetvTv**
- **Libsyn**
- **life:embed**
- **lifenews**: LIFE | NEWS
@@ -421,6 +422,7 @@
- **Npr**
- **NRK**
- **NRKPlaylist**
+ - **NRKSkole**: NRK Skole
- **NRKTV**: NRK TV and NRK Radio
- **ntv.ru**
- **Nuvid**
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
index 59f7ab49d..efbee3b71 100644
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -502,6 +502,9 @@ class TestYoutubeDL(unittest.TestCase):
assertRegexpMatches(self, ydl._format_note({
'vbr': 10,
}), '^\s*10k$')
+ assertRegexpMatches(self, ydl._format_note({
+ 'fps': 30,
+ }), '^30fps$')
def test_postprocessors(self):
filename = 'post-processor-testfile.mp4'
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index dcc867e45..b3391088f 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -85,6 +85,7 @@ from .extractor import get_info_extractor, gen_extractors
from .downloader import get_suitable_downloader
from .downloader.rtmp import rtmpdump_version
from .postprocessor import (
+ FFmpegFixupM3u8PP,
FFmpegFixupM4aPP,
FFmpegFixupStretchedPP,
FFmpegMergerPP,
@@ -567,7 +568,7 @@ class YoutubeDL(object):
elif template_dict.get('height'):
template_dict['resolution'] = '%sp' % template_dict['height']
elif template_dict.get('width'):
- template_dict['resolution'] = '?x%d' % template_dict['width']
+ template_dict['resolution'] = '%dx?' % template_dict['width']
sanitize = lambda k, v: sanitize_filename(
compat_str(v),
@@ -1638,6 +1639,8 @@ class YoutubeDL(object):
if fixup_policy is None:
fixup_policy = 'detect_or_warn'
+ INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
+
stretched_ratio = info_dict.get('stretched_ratio')
if stretched_ratio is not None and stretched_ratio != 1:
if fixup_policy == 'warn':
@@ -1650,15 +1653,18 @@ class YoutubeDL(object):
info_dict['__postprocessors'].append(stretched_pp)
else:
self.report_warning(
- '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
- info_dict['id'], stretched_ratio))
+ '%s: Non-uniform pixel ratio (%s). %s'
+ % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
else:
assert fixup_policy in ('ignore', 'never')
- if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
+ if (info_dict.get('requested_formats') is None and
+ info_dict.get('container') == 'm4a_dash'):
if fixup_policy == 'warn':
- self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
- info_dict['id']))
+ self.report_warning(
+ '%s: writing DASH m4a. '
+ 'Only some players support this container.'
+ % info_dict['id'])
elif fixup_policy == 'detect_or_warn':
fixup_pp = FFmpegFixupM4aPP(self)
if fixup_pp.available:
@@ -1666,8 +1672,27 @@ class YoutubeDL(object):
info_dict['__postprocessors'].append(fixup_pp)
else:
self.report_warning(
- '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
- info_dict['id']))
+ '%s: writing DASH m4a. '
+ 'Only some players support this container. %s'
+ % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
+ else:
+ assert fixup_policy in ('ignore', 'never')
+
+ if (info_dict.get('protocol') == 'm3u8_native' or
+ info_dict.get('protocol') == 'm3u8' and
+ self.params.get('hls_prefer_native')):
+ if fixup_policy == 'warn':
+ self.report_warning('%s: malformated aac bitstream.' % (
+ info_dict['id']))
+ elif fixup_policy == 'detect_or_warn':
+ fixup_pp = FFmpegFixupM3u8PP(self)
+ if fixup_pp.available:
+ info_dict.setdefault('__postprocessors', [])
+ info_dict['__postprocessors'].append(fixup_pp)
+ else:
+ self.report_warning(
+ '%s: malformated aac bitstream. %s'
+ % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
else:
assert fixup_policy in ('ignore', 'never')
@@ -1831,7 +1856,9 @@ class YoutubeDL(object):
if fdict.get('vbr') is not None:
res += '%4dk' % fdict['vbr']
if fdict.get('fps') is not None:
- res += ', %sfps' % fdict['fps']
+ if res:
+ res += ', '
+ res += '%sfps' % fdict['fps']
if fdict.get('acodec') is not None:
if res:
res += ', '
diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py
index 5bc99492b..a5bae9669 100644
--- a/youtube_dl/downloader/fragment.py
+++ b/youtube_dl/downloader/fragment.py
@@ -99,7 +99,8 @@ class FragmentFD(FileDownloader):
state['eta'] = self.calc_eta(
start, time_now, estimated_size,
state['downloaded_bytes'])
- state['speed'] = s.get('speed')
+ state['speed'] = s.get('speed') or ctx.get('speed')
+ ctx['speed'] = state['speed']
ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
self._hook_progress(state)
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 08b3dc673..98de5ddff 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -23,7 +23,10 @@ from .alphaporno import AlphaPornoIE
from .animeondemand import AnimeOnDemandIE
from .anitube import AnitubeIE
from .anysex import AnySexIE
-from .aol import AolIE
+from .aol import (
+ AolIE,
+ AolFeaturesIE,
+)
from .allocine import AllocineIE
from .aparat import AparatIE
from .appleconnect import AppleConnectIE
@@ -209,10 +212,7 @@ from .everyonesmixtape import EveryonesMixtapeIE
from .exfm import ExfmIE
from .expotv import ExpoTVIE
from .extremetube import ExtremeTubeIE
-from .facebook import (
- FacebookIE,
- FacebookPostIE,
-)
+from .facebook import FacebookIE
from .faz import FazIE
from .fc2 import FC2IE
from .fczenit import FczenitIE
@@ -856,6 +856,7 @@ from .vimeo import (
VimeoChannelIE,
VimeoGroupsIE,
VimeoLikesIE,
+ VimeoOndemandIE,
VimeoReviewIE,
VimeoUserIE,
VimeoWatchLaterIE,
diff --git a/youtube_dl/extractor/aol.py b/youtube_dl/extractor/aol.py
index b51eafc45..b761b2cc4 100644
--- a/youtube_dl/extractor/aol.py
+++ b/youtube_dl/extractor/aol.py
@@ -1,24 +1,11 @@
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
class AolIE(InfoExtractor):
IE_NAME = 'on.aol.com'
- _VALID_URL = r'''(?x)
- (?:
- aol-video:|
- http://on\.aol\.com/
- (?:
- video/.*-|
- playlist/(?P<playlist_display_id>[^/?#]+?)-(?P<playlist_id>[0-9]+)[?#].*_videoid=
- )
- )
- (?P<id>[0-9]+)
- (?:$|\?)
- '''
+ _VALID_URL = r'(?:aol-video:|http://on\.aol\.com/video/.*-)(?P<id>[0-9]+)(?:$|\?)'
_TESTS = [{
'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
@@ -29,42 +16,31 @@ class AolIE(InfoExtractor):
'title': 'U.S. Official Warns Of \'Largest Ever\' IRS Phone Scam',
},
'add_ie': ['FiveMin'],
- }, {
- 'url': 'http://on.aol.com/playlist/brace-yourself---todays-weirdest-news-152147?icid=OnHomepageC4_Omg_Img#_videoid=518184316',
- 'info_dict': {
- 'id': '152147',
- 'title': 'Brace Yourself - Today\'s Weirdest News',
- },
- 'playlist_mincount': 10,
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- playlist_id = mobj.group('playlist_id')
- if not playlist_id or self._downloader.params.get('noplaylist'):
- return self.url_result('5min:%s' % video_id)
+ video_id = self._match_id(url)
+ return self.url_result('5min:%s' % video_id)
- self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
- webpage = self._download_webpage(url, playlist_id)
- title = self._html_search_regex(
- r'<h1 class="video-title[^"]*">(.+?)</h1>', webpage, 'title')
- playlist_html = self._search_regex(
- r"(?s)<ul\s+class='video-related[^']*'>(.*?)</ul>", webpage,
- 'playlist HTML')
- entries = [{
- '_type': 'url',
- 'url': 'aol-video:%s' % m.group('id'),
- 'ie_key': 'Aol',
- } for m in re.finditer(
- r"<a\s+href='.*videoid=(?P<id>[0-9]+)'\s+class='video-thumb'>",
- playlist_html)]
+class AolFeaturesIE(InfoExtractor):
+ IE_NAME = 'features.aol.com'
+ _VALID_URL = r'http://features\.aol\.com/video/(?P<id>[^/?#]+)'
- return {
- '_type': 'playlist',
- 'id': playlist_id,
- 'display_id': mobj.group('playlist_display_id'),
- 'title': title,
- 'entries': entries,
- }
+ _TESTS = [{
+ 'url': 'http://features.aol.com/video/behind-secret-second-careers-late-night-talk-show-hosts',
+ 'md5': '7db483bb0c09c85e241f84a34238cc75',
+ 'info_dict': {
+ 'id': '519507715',
+ 'ext': 'mp4',
+ 'title': 'What To Watch - February 17, 2016',
+ },
+ 'add_ie': ['FiveMin'],
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ return self.url_result(self._search_regex(
+ r'<script type="text/javascript" src="(https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js[^"]+)"',
+ webpage, '5min embed url'), 'FiveMin')
diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py
index efde7e207..3e119e21b 100644
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -121,15 +121,18 @@ class ArteTVPlus7IE(InfoExtractor):
json_url = compat_parse_qs(
compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0]
if json_url:
- return self._extract_from_json_url(json_url, video_id, lang)
- # Differend kind of embed URL (e.g.
+ title = self._search_regex(
+ r'<h3[^>]+title=(["\'])(?P<title>.+?)\1',
+ webpage, 'title', default=None, group='title')
+ return self._extract_from_json_url(json_url, video_id, lang, title=title)
+ # Different kind of embed URL (e.g.
# http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium)
embed_url = self._search_regex(
r'<iframe[^>]+src=(["\'])(?P<url>.+?)\1',
webpage, 'embed url', group='url')
return self.url_result(embed_url)
- def _extract_from_json_url(self, json_url, video_id, lang):
+ def _extract_from_json_url(self, json_url, video_id, lang, title=None):
info = self._download_json(json_url, video_id)
player_info = info['videoJsonPlayer']
@@ -137,7 +140,7 @@ class ArteTVPlus7IE(InfoExtractor):
if not upload_date_str:
upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0]
- title = player_info['VTI'].strip()
+ title = (player_info.get('VTI') or title or player_info['VID']).strip()
subtitle = player_info.get('VSU', '').strip()
if subtitle:
title += ' - %s' % subtitle
diff --git a/youtube_dl/extractor/cinemassacre.py b/youtube_dl/extractor/cinemassacre.py
index 6d9cd8abd..042c4f2f1 100644
--- a/youtube_dl/extractor/cinemassacre.py
+++ b/youtube_dl/extractor/cinemassacre.py
@@ -21,6 +21,10 @@ class CinemassacreIE(InfoExtractor):
'title': '“Angry Video Game Nerd: The Movie” – Trailer',
'description': 'md5:fb87405fcb42a331742a0dce2708560b',
},
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
},
{
'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
@@ -31,14 +35,18 @@ class CinemassacreIE(InfoExtractor):
'upload_date': '20131002',
'title': 'The Mummy’s Hand (1940)',
},
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
},
{
# Youtube embedded video
'url': 'http://cinemassacre.com/2006/12/07/chronologically-confused-about-bad-movie-and-video-game-sequel-titles/',
- 'md5': 'df4cf8a1dcedaec79a73d96d83b99023',
+ 'md5': 'ec9838a5520ef5409b3e4e42fcb0a3b9',
'info_dict': {
'id': 'OEVzPCY2T-g',
- 'ext': 'mp4',
+ 'ext': 'webm',
'title': 'AVGN: Chronologically Confused about Bad Movie and Video Game Sequel Titles',
'upload_date': '20061207',
'uploader': 'Cinemassacre',
@@ -49,12 +57,12 @@ class CinemassacreIE(InfoExtractor):
{
# Youtube embedded video
'url': 'http://cinemassacre.com/2006/09/01/mckids/',
- 'md5': '6eb30961fa795fedc750eac4881ad2e1',
+ 'md5': '7393c4e0f54602ad110c793eb7a6513a',
'info_dict': {
'id': 'FnxsNhuikpo',
- 'ext': 'mp4',
+ 'ext': 'webm',
'upload_date': '20060901',
- 'uploader': 'Cinemassacre Extras',
+ 'uploader': 'Cinemassacre Extra',
'description': 'md5:de9b751efa9e45fbaafd9c8a1123ed53',
'uploader_id': 'Cinemassacre',
'title': 'AVGN: McKids',
@@ -69,7 +77,11 @@ class CinemassacreIE(InfoExtractor):
'description': 'Let’s Play Mario Kart 64 !! Mario Kart 64 is a classic go-kart racing game released for the Nintendo 64 (N64). Today James & Mike do 4 player Battle Mode with Kyle and Bootsy!',
'title': 'Mario Kart 64 (Nintendo 64) James & Mike Mondays',
'upload_date': '20150525',
- }
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
}
]
diff --git a/youtube_dl/extractor/cnet.py b/youtube_dl/extractor/cnet.py
index 5c3908f72..3cf0bf95b 100644
--- a/youtube_dl/extractor/cnet.py
+++ b/youtube_dl/extractor/cnet.py
@@ -51,9 +51,7 @@ class CNETIE(ThePlatformIE):
uploader = None
uploader_id = None
- mpx_account = data['config']['uvpConfig']['default']['mpx_account']
-
- metadata = self.get_metadata('%s/%s' % (mpx_account, list(vdata['files'].values())[0]), video_id)
+ metadata = self.get_metadata('kYEXFC/%s' % list(vdata['files'].values())[0], video_id)
description = vdata.get('description') or metadata.get('description')
duration = int_or_none(vdata.get('duration')) or metadata.get('duration')
@@ -62,7 +60,7 @@ class CNETIE(ThePlatformIE):
for (fkey, vid) in vdata['files'].items():
if fkey == 'hls_phone' and 'hls_tablet' in vdata['files']:
continue
- release_url = 'http://link.theplatform.com/s/%s/%s?format=SMIL&mbr=true' % (mpx_account, vid)
+ release_url = 'http://link.theplatform.com/s/kYEXFC/%s?format=SMIL&mbr=true' % vid
if fkey == 'hds':
release_url += '&manifest=f4m'
tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % fkey)
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index f337caf20..393677251 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -106,7 +106,7 @@ class InfoExtractor(object):
* protocol The protocol that will be used for the actual
download, lower-case.
"http", "https", "rtsp", "rtmp", "rtmpe",
- "m3u8", or "m3u8_native".
+ "m3u8", "m3u8_native" or "http_dash_segments".
* preference Order number of this format. If this field is
present and not None, the formats get sorted
by this field, regardless of all other values.
diff --git a/youtube_dl/extractor/elpais.py b/youtube_dl/extractor/elpais.py
index 00a69e631..8c725a4e6 100644
--- a/youtube_dl/extractor/elpais.py
+++ b/youtube_dl/extractor/elpais.py
@@ -9,7 +9,7 @@ class ElPaisIE(InfoExtractor):
_VALID_URL = r'https?://(?:[^.]+\.)?elpais\.com/.*/(?P<id>[^/#?]+)\.html(?:$|[?#])'
IE_DESC = 'El País'
- _TEST = {
+ _TESTS = [{
'url': 'http://blogs.elpais.com/la-voz-de-inaki/2014/02/tiempo-nuevo-recetas-viejas.html',
'md5': '98406f301f19562170ec071b83433d55',
'info_dict': {
@@ -19,30 +19,41 @@ class ElPaisIE(InfoExtractor):
'description': 'De lunes a viernes, a partir de las ocho de la mañana, Iñaki Gabilondo nos cuenta su visión de la actualidad nacional e internacional.',
'upload_date': '20140206',
}
- }
+ }, {
+ 'url': 'http://elcomidista.elpais.com/elcomidista/2016/02/24/articulo/1456340311_668921.html#?id_externo_nwl=newsletter_diaria20160303t',
+ 'md5': '3bd5b09509f3519d7d9e763179b013de',
+ 'info_dict': {
+ 'id': '1456340311_668921',
+ 'ext': 'mp4',
+ 'title': 'Cómo hacer el mejor café con cafetera italiana',
+ 'description': 'Que sí, que las cápsulas son cómodas. Pero si le pides algo más a la vida, quizá deberías aprender a usar bien la cafetera italiana. No tienes más que ver este vídeo y seguir sus siete normas básicas.',
+ 'upload_date': '20160303',
+ }
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
prefix = self._html_search_regex(
- r'var url_cache = "([^"]+)";', webpage, 'URL prefix')
+ r'var\s+url_cache\s*=\s*"([^"]+)";', webpage, 'URL prefix')
video_suffix = self._search_regex(
- r"URLMediaFile = url_cache \+ '([^']+)'", webpage, 'video URL')
+ r"(?:URLMediaFile|urlVideo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'", webpage, 'video URL')
video_url = prefix + video_suffix
thumbnail_suffix = self._search_regex(
- r"URLMediaStill = url_cache \+ '([^']+)'", webpage, 'thumbnail URL',
- fatal=False)
+ r"(?:URLMediaStill|urlFotogramaFijo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'",
+ webpage, 'thumbnail URL', fatal=False)
thumbnail = (
None if thumbnail_suffix is None
else prefix + thumbnail_suffix)
title = self._html_search_regex(
- '<h2 class="entry-header entry-title.*?>(.*?)</h2>',
+ (r"tituloVideo\s*=\s*'([^']+)'", webpage, 'title',
+ r'<h2 class="entry-header entry-title.*?>(.*?)</h2>'),
webpage, 'title')
- date_str = self._search_regex(
+ upload_date = unified_strdate(self._search_regex(
r'<p class="date-header date-int updated"\s+title="([^"]+)">',
- webpage, 'upload date', fatal=False)
- upload_date = (None if date_str is None else unified_strdate(date_str))
+ webpage, 'upload date', default=None) or self._html_search_meta(
+ 'datePublished', webpage, 'timestamp'))
return {
'id': video_id,
diff --git a/youtube_dl/extractor/engadget.py b/youtube_dl/extractor/engadget.py
index e4180701d..e5e57d485 100644
--- a/youtube_dl/extractor/engadget.py
+++ b/youtube_dl/extractor/engadget.py
@@ -1,21 +1,13 @@
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
-from ..utils import (
- url_basename,
-)
class EngadgetIE(InfoExtractor):
- _VALID_URL = r'''(?x)https?://www.engadget.com/
- (?:video(?:/5min)?/(?P<id>\d+)|
- [\d/]+/.*?)
- '''
+ _VALID_URL = r'https?://www.engadget.com/video/(?P<id>\d+)'
_TEST = {
- 'url': 'http://www.engadget.com/video/5min/518153925/',
+ 'url': 'http://www.engadget.com/video/518153925/',
'md5': 'c6820d4828a5064447a4d9fc73f312c9',
'info_dict': {
'id': '518153925',
@@ -27,15 +19,4 @@ class EngadgetIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
-
- if video_id is not None:
- return self.url_result('5min:%s' % video_id)
- else:
- title = url_basename(url)
- webpage = self._download_webpage(url, title)
- ids = re.findall(r'<iframe[^>]+?playList=(\d+)', webpage)
- return {
- '_type': 'playlist',
- 'title': title,
- 'entries': [self.url_result('5min:%s' % vid) for vid in ids]
- }
+ return self.url_result('5min:%s' % video_id)
diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py
index 6c6c3b1bd..f5bbd39d2 100644
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@@ -37,7 +37,9 @@ class FacebookIE(InfoExtractor):
video/embed|
story\.php
)\?(?:.*?)(?:v|video_id|story_fbid)=|
- [^/]+/videos/(?:[^/]+/)?
+ [^/]+/videos/(?:[^/]+/)?|
+ [^/]+/posts/|
+ groups/[^/]+/permalink/
)|
facebook:
)
@@ -50,6 +52,8 @@ class FacebookIE(InfoExtractor):
_CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
+ _VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
+
_TESTS = [{
'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
'md5': '6a40d33c0eccbb1af76cf0485a052659',
@@ -82,6 +86,33 @@ class FacebookIE(InfoExtractor):
'uploader': 'Demy de Zeeuw',
},
}, {
+ 'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570',
+ 'md5': '037b1fa7f3c2d02b7a0d7bc16031ecc6',
+ 'info_dict': {
+ 'id': '544765982287235',
+ 'ext': 'mp4',
+ 'title': '"What are you doing running in the snow?"',
+ 'uploader': 'FailArmy',
+ }
+ }, {
+ 'url': 'https://m.facebook.com/story.php?story_fbid=1035862816472149&id=116132035111903',
+ 'md5': '1deb90b6ac27f7efcf6d747c8a27f5e3',
+ 'info_dict': {
+ 'id': '1035862816472149',
+ 'ext': 'mp4',
+ 'title': 'What the Flock Is Going On In New Zealand Credit: ViralHog',
+ 'uploader': 'S. Saint',
+ },
+ }, {
+ 'note': 'swf params escaped',
+ 'url': 'https://www.facebook.com/barackobama/posts/10153664894881749',
+ 'md5': '97ba073838964d12c70566e0085c2b91',
+ 'info_dict': {
+ 'id': '10153664894881749',
+ 'ext': 'mp4',
+ 'title': 'Facebook video #10153664894881749',
+ },
+ }, {
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
'only_matching': True,
}, {
@@ -94,7 +125,7 @@ class FacebookIE(InfoExtractor):
'url': 'facebook:544765982287235',
'only_matching': True,
}, {
- 'url': 'https://m.facebook.com/story.php?story_fbid=1035862816472149&id=116132035111903',
+ 'url': 'https://www.facebook.com/groups/164828000315060/permalink/764967300301124/',
'only_matching': True,
}]
@@ -164,19 +195,19 @@ class FacebookIE(InfoExtractor):
def _real_initialize(self):
self._login()
- def _real_extract(self, url):
- video_id = self._match_id(url)
- req = sanitized_Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
+ def _extract_from_url(self, url, video_id, fatal_if_no_video=True):
+ req = sanitized_Request(url)
req.add_header('User-Agent', self._CHROME_USER_AGENT)
webpage = self._download_webpage(req, video_id)
video_data = None
- BEFORE = '{swf.addParam(param[0], param[1]);});\n'
+ BEFORE = '{swf.addParam(param[0], param[1]);});'
AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
- m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage)
+ m = re.search(re.escape(BEFORE) + '(?:\n|\\\\n)(.*?)' + re.escape(AFTER), webpage)
if m:
- data = dict(json.loads(m.group(1)))
+ swf_params = m.group(1).replace('\\\\', '\\').replace('\\"', '"')
+ data = dict(json.loads(swf_params))
params_raw = compat_urllib_parse_unquote(data['params'])
video_data = json.loads(params_raw)['video_data']
@@ -189,13 +220,15 @@ class FacebookIE(InfoExtractor):
if not video_data:
server_js_data = self._parse_json(self._search_regex(
- r'handleServerJS\(({.+})\);', webpage, 'server js data'), video_id)
+ r'handleServerJS\(({.+})\);', webpage, 'server js data', default='{}'), video_id)
for item in server_js_data.get('instances', []):
if item[1][0] == 'VideoConfig':
video_data = video_data_list2dict(item[2][0]['videoData'])
break
if not video_data:
+ if not fatal_if_no_video:
+ return webpage, False
m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
if m_msg is not None:
raise ExtractorError(
@@ -241,39 +274,36 @@ class FacebookIE(InfoExtractor):
video_title = 'Facebook video #%s' % video_id
uploader = clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
- return {
+ info_dict = {
'id': video_id,
'title': video_title,
'formats': formats,
'uploader': uploader,
}
-
-class FacebookPostIE(InfoExtractor):
- IE_NAME = 'facebook:post'
- _VALID_URL = r'https?://(?:\w+\.)?facebook\.com/[^/]+/posts/(?P<id>\d+)'
- _TEST = {
- 'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570',
- 'md5': '037b1fa7f3c2d02b7a0d7bc16031ecc6',
- 'info_dict': {
- 'id': '544765982287235',
- 'ext': 'mp4',
- 'title': '"What are you doing running in the snow?"',
- 'uploader': 'FailArmy',
- }
- }
+ return webpage, info_dict
def _real_extract(self, url):
- post_id = self._match_id(url)
+ video_id = self._match_id(url)
+
+ real_url = self._VIDEO_PAGE_TEMPLATE % video_id if url.startswith('facebook:') else url
+ webpage, info_dict = self._extract_from_url(real_url, video_id, fatal_if_no_video=False)
- webpage = self._download_webpage(url, post_id)
+ if info_dict:
+ return info_dict
- entries = [
- self.url_result('facebook:%s' % video_id, FacebookIE.ie_key())
- for video_id in self._parse_json(
- self._search_regex(
- r'(["\'])video_ids\1\s*:\s*(?P<ids>\[.+?\])',
- webpage, 'video ids', group='ids'),
- post_id)]
+ if '/posts/' in url:
+ entries = [
+ self.url_result('facebook:%s' % vid, FacebookIE.ie_key())
+ for vid in self._parse_json(
+ self._search_regex(
+ r'(["\'])video_ids\1\s*:\s*(?P<ids>\[.+?\])',
+ webpage, 'video ids', group='ids'),
+ video_id)]
- return self.playlist_result(entries, post_id)
+ return self.playlist_result(entries, video_id)
+ else:
+ _, info_dict = self._extract_from_url(
+ self._VIDEO_PAGE_TEMPLATE % video_id,
+ video_id, fatal_if_no_video=True)
+ return info_dict
diff --git a/youtube_dl/extractor/fivemin.py b/youtube_dl/extractor/fivemin.py
index 2955965d9..67d50a386 100644
--- a/youtube_dl/extractor/fivemin.py
+++ b/youtube_dl/extractor/fivemin.py
@@ -1,5 +1,7 @@
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse,
@@ -16,12 +18,7 @@ from ..utils import (
class FiveMinIE(InfoExtractor):
IE_NAME = '5min'
- _VALID_URL = r'''(?x)
- (?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?:.*?&)?playList=|
- https?://(?:(?:massively|www)\.)?joystiq\.com/video/|
- 5min:)
- (?P<id>\d+)
- '''
+ _VALID_URL = r'(?:5min:(?P<id>\d+)(?::(?P<sid>\d+))?|https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?P<query>.*))'
_TESTS = [
{
@@ -45,6 +42,7 @@ class FiveMinIE(InfoExtractor):
'title': 'How to Make a Next-Level Fruit Salad',
'duration': 184,
},
+ 'skip': 'no longer available',
},
]
_ERRORS = {
@@ -91,20 +89,33 @@ class FiveMinIE(InfoExtractor):
}
def _real_extract(self, url):
- video_id = self._match_id(url)
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ sid = mobj.group('sid')
+
+ if mobj.group('query'):
+ qs = compat_parse_qs(mobj.group('query'))
+ if not qs.get('playList'):
+ raise ExtractorError('Invalid URL', expected=True)
+ video_id = qs['playList'][0]
+ if qs.get('sid'):
+ sid = qs['sid'][0]
+
embed_url = 'https://embed.5min.com/playerseed/?playList=%s' % video_id
- embed_page = self._download_webpage(embed_url, video_id,
- 'Downloading embed page')
- sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid')
- query = compat_urllib_parse.urlencode({
- 'func': 'GetResults',
- 'playlist': video_id,
- 'sid': sid,
- 'isPlayerSeed': 'true',
- 'url': embed_url,
- })
+ if not sid:
+ embed_page = self._download_webpage(embed_url, video_id,
+ 'Downloading embed page')
+ sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid')
+
response = self._download_json(
- 'https://syn.5min.com/handlers/SenseHandler.ashx?' + query,
+ 'https://syn.5min.com/handlers/SenseHandler.ashx?' +
+ compat_urllib_parse.urlencode({
+ 'func': 'GetResults',
+ 'playlist': video_id,
+ 'sid': sid,
+ 'isPlayerSeed': 'true',
+ 'url': embed_url,
+ }),
video_id)
if not response['success']:
raise ExtractorError(
@@ -118,9 +129,7 @@ class FiveMinIE(InfoExtractor):
parsed_video_url = compat_urllib_parse_urlparse(compat_parse_qs(
compat_urllib_parse_urlparse(info['EmbededURL']).query)['videoUrl'][0])
for rendition in info['Renditions']:
- if rendition['RenditionType'] == 'm3u8':
- formats.extend(self._extract_m3u8_formats(rendition['Url'], video_id, m3u8_id='hls'))
- elif rendition['RenditionType'] == 'aac':
+ if rendition['RenditionType'] == 'aac' or rendition['RenditionType'] == 'm3u8':
continue
else:
rendition_url = compat_urlparse.urlunparse(parsed_video_url._replace(path=replace_extension(parsed_video_url.path.replace('//', '/%s/' % rendition['ID']), rendition['RenditionType'])))
diff --git a/youtube_dl/extractor/freespeech.py b/youtube_dl/extractor/freespeech.py
index c210177f7..1477708bb 100644
--- a/youtube_dl/extractor/freespeech.py
+++ b/youtube_dl/extractor/freespeech.py
@@ -14,7 +14,7 @@ class FreespeechIE(InfoExtractor):
'url': 'https://www.freespeech.org/video/obama-romney-campaign-colorado-ahead-debate-0',
'info_dict': {
'id': 'poKsVCZ64uU',
- 'ext': 'mp4',
+ 'ext': 'webm',
'title': 'Obama, Romney Campaign in Colorado Ahead of Debate',
'description': 'Obama, Romney Campaign in Colorado Ahead of Debate',
'uploader': 'freespeechtv',
diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py
index d3bee3a19..e7c0cb3f6 100644
--- a/youtube_dl/extractor/iqiyi.py
+++ b/youtube_dl/extractor/iqiyi.py
@@ -501,7 +501,7 @@ class IqiyiIE(InfoExtractor):
def get_enc_key(self, video_id):
# TODO: automatic key extraction
# last update at 2016-01-22 for Zombie::bite
- enc_key = '6ab6d0280511493ba85594779759d4ed'
+ enc_key = '8ed797d224d043e7ac23d95b70227d32'
return enc_key
def _extract_playlist(self, webpage):
diff --git a/youtube_dl/extractor/jeuxvideo.py b/youtube_dl/extractor/jeuxvideo.py
index eef7daa29..137db873c 100644
--- a/youtube_dl/extractor/jeuxvideo.py
+++ b/youtube_dl/extractor/jeuxvideo.py
@@ -30,7 +30,7 @@ class JeuxVideoIE(InfoExtractor):
webpage = self._download_webpage(url, title)
title = self._html_search_meta('name', webpage) or self._og_search_title(webpage)
config_url = self._html_search_regex(
- r'data-src="(/contenu/medias/video.php.*?)"',
+ r'data-src(?:set-video)?="(/contenu/medias/video.php.*?)"',
webpage, 'config URL')
config_url = 'http://www.jeuxvideo.com' + config_url
diff --git a/youtube_dl/extractor/khanacademy.py b/youtube_dl/extractor/khanacademy.py
index 08a671fa8..61739efa7 100644
--- a/youtube_dl/extractor/khanacademy.py
+++ b/youtube_dl/extractor/khanacademy.py
@@ -14,10 +14,10 @@ class KhanAcademyIE(InfoExtractor):
_TESTS = [{
'url': 'http://www.khanacademy.org/video/one-time-pad',
- 'md5': '7021db7f2d47d4fff89b13177cb1e8f4',
+ 'md5': '7b391cce85e758fb94f763ddc1bbb979',
'info_dict': {
'id': 'one-time-pad',
- 'ext': 'mp4',
+ 'ext': 'webm',
'title': 'The one-time pad',
'description': 'The perfect cipher',
'duration': 176,
diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py
index 988436226..eada7c299 100644
--- a/youtube_dl/extractor/livestream.py
+++ b/youtube_dl/extractor/livestream.py
@@ -14,6 +14,7 @@ from ..utils import (
xpath_with_ns,
xpath_text,
orderedSet,
+ update_url_query,
int_or_none,
float_or_none,
parse_iso8601,
@@ -72,7 +73,10 @@ class LivestreamIE(InfoExtractor):
for vn in video_nodes:
tbr = int_or_none(vn.attrib.get('system-bitrate'), 1000)
furl = (
- '%s%s?v=3.0.3&fp=WIN%%2014,0,0,145' % (base, vn.attrib['src']))
+ update_url_query(compat_urlparse.urljoin(base, vn.attrib['src']), {
+ 'v': '3.0.3',
+ 'fp': 'WIN% 14,0,0,145',
+ }))
if 'clipBegin' in vn.attrib:
furl += '&ssek=' + vn.attrib['clipBegin']
formats.append({
diff --git a/youtube_dl/extractor/mit.py b/youtube_dl/extractor/mit.py
index 29ca45778..819c1b90b 100644
--- a/youtube_dl/extractor/mit.py
+++ b/youtube_dl/extractor/mit.py
@@ -99,7 +99,7 @@ class OCWMITIE(InfoExtractor):
'url': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/',
'info_dict': {
'id': 'EObHWIEKGjA',
- 'ext': 'mp4',
+ 'ext': 'webm',
'title': 'Lecture 7: Multiple Discrete Random Variables: Expectations, Conditioning, Independence',
'description': 'In this lecture, the professor discussed multiple random variables, expectations, and binomial distribution.',
'upload_date': '20121109',
diff --git a/youtube_dl/extractor/pyvideo.py b/youtube_dl/extractor/pyvideo.py
index 6d5732d45..30a5f2de4 100644
--- a/youtube_dl/extractor/pyvideo.py
+++ b/youtube_dl/extractor/pyvideo.py
@@ -12,14 +12,14 @@ class PyvideoIE(InfoExtractor):
_TESTS = [
{
'url': 'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes',
- 'md5': 'de317418c8bc76b1fd8633e4f32acbc6',
+ 'md5': '520915673e53a5c5d487c36e0c4d85b5',
'info_dict': {
'id': '24_4WWkSmNo',
- 'ext': 'mp4',
+ 'ext': 'webm',
'title': 'Become a logging expert in 30 minutes',
'description': 'md5:9665350d466c67fb5b1598de379021f7',
'upload_date': '20130320',
- 'uploader': 'NextDayVideo',
+ 'uploader': 'Next Day Video',
'uploader_id': 'NextDayVideo',
},
'add_ie': ['Youtube'],
diff --git a/youtube_dl/extractor/revision3.py b/youtube_dl/extractor/revision3.py
index b1b8800b9..99979ebe1 100644
--- a/youtube_dl/extractor/revision3.py
+++ b/youtube_dl/extractor/revision3.py
@@ -19,7 +19,7 @@ class Revision3IE(InfoExtractor):
'url': 'http://www.revision3.com/technobuffalo/5-google-predictions-for-2016',
'md5': 'd94a72d85d0a829766de4deb8daaf7df',
'info_dict': {
- 'id': '73034',
+ 'id': '71089',
'display_id': 'technobuffalo/5-google-predictions-for-2016',
'ext': 'webm',
'title': '5 Google Predictions for 2016',
@@ -31,6 +31,7 @@ class Revision3IE(InfoExtractor):
'uploader_id': 'technobuffalo',
}
}, {
+ # Show
'url': 'http://testtube.com/brainstuff',
'info_dict': {
'id': '251',
@@ -41,7 +42,7 @@ class Revision3IE(InfoExtractor):
}, {
'url': 'https://testtube.com/dnews/5-weird-ways-plants-can-eat-animals?utm_source=FB&utm_medium=DNews&utm_campaign=DNewsSocial',
'info_dict': {
- 'id': '60163',
+ 'id': '58227',
'display_id': 'dnews/5-weird-ways-plants-can-eat-animals',
'duration': 275,
'ext': 'webm',
@@ -52,18 +53,72 @@ class Revision3IE(InfoExtractor):
'uploader': 'DNews',
'uploader_id': 'dnews',
},
+ }, {
+ 'url': 'http://testtube.com/tt-editors-picks/the-israel-palestine-conflict-explained-in-ten-min',
+ 'info_dict': {
+ 'id': '71618',
+ 'ext': 'mp4',
+ 'display_id': 'tt-editors-picks/the-israel-palestine-conflict-explained-in-ten-min',
+ 'title': 'The Israel-Palestine Conflict Explained in Ten Minutes',
+ 'description': 'If you\'d like to learn about the struggle between Israelis and Palestinians, this video is a great place to start',
+ 'uploader': 'Editors\' Picks',
+ 'uploader_id': 'tt-editors-picks',
+ 'timestamp': 1453309200,
+ 'upload_date': '20160120',
+ },
+ 'add_ie': ['Youtube'],
+ }, {
+ # Tag
+ 'url': 'http://testtube.com/tech-news',
+ 'info_dict': {
+ 'id': '21018',
+ 'title': 'tech news',
+ },
+ 'playlist_mincount': 9,
}]
_PAGE_DATA_TEMPLATE = 'http://www.%s/apiProxy/ddn/%s?domain=%s'
_API_KEY = 'ba9c741bce1b9d8e3defcc22193f3651b8867e62'
def _real_extract(self, url):
domain, display_id = re.match(self._VALID_URL, url).groups()
+ site = domain.split('.')[0]
page_info = self._download_json(
self._PAGE_DATA_TEMPLATE % (domain, display_id, domain), display_id)
- if page_info['data']['type'] == 'episode':
- episode_data = page_info['data']
- video_id = compat_str(episode_data['video']['data']['id'])
+ page_data = page_info['data']
+ page_type = page_data['type']
+ if page_type in ('episode', 'embed'):
+ show_data = page_data['show']['data']
+ page_id = compat_str(page_data['id'])
+ video_id = compat_str(page_data['video']['data']['id'])
+
+ preference = qualities(['mini', 'small', 'medium', 'large'])
+ thumbnails = [{
+ 'url': image_url,
+ 'id': image_id,
+ 'preference': preference(image_id)
+ } for image_id, image_url in page_data.get('images', {}).items()]
+
+ info = {
+ 'id': page_id,
+ 'display_id': display_id,
+ 'title': unescapeHTML(page_data['name']),
+ 'description': unescapeHTML(page_data.get('summary')),
+ 'timestamp': parse_iso8601(page_data.get('publishTime'), ' '),
+ 'author': page_data.get('author'),
+ 'uploader': show_data.get('name'),
+ 'uploader_id': show_data.get('slug'),
+ 'thumbnails': thumbnails,
+ 'extractor_key': site,
+ }
+
+ if page_type == 'embed':
+ info.update({
+ '_type': 'url_transparent',
+ 'url': page_data['video']['data']['embed'],
+ })
+ return info
+
video_data = self._download_json(
'http://revision3.com/api/getPlaylist.json?api_key=%s&codecs=h264,vp8,theora&video_id=%s' % (self._API_KEY, video_id),
video_id)['items'][0]
@@ -84,36 +139,30 @@ class Revision3IE(InfoExtractor):
})
self._sort_formats(formats)
- preference = qualities(['mini', 'small', 'medium', 'large'])
- thumbnails = [{
- 'url': image_url,
- 'id': image_id,
- 'preference': preference(image_id)
- } for image_id, image_url in video_data.get('images', {}).items()]
-
- return {
- 'id': video_id,
- 'display_id': display_id,
+ info.update({
'title': unescapeHTML(video_data['title']),
'description': unescapeHTML(video_data.get('summary')),
- 'timestamp': parse_iso8601(episode_data.get('publishTime'), ' '),
- 'author': episode_data.get('author'),
'uploader': video_data.get('show', {}).get('name'),
'uploader_id': video_data.get('show', {}).get('slug'),
'duration': int_or_none(video_data.get('duration')),
- 'thumbnails': thumbnails,
'formats': formats,
- }
+ })
+ return info
else:
- show_data = page_info['show']['data']
+ list_data = page_info[page_type]['data']
episodes_data = page_info['episodes']['data']
num_episodes = page_info['meta']['totalEpisodes']
processed_episodes = 0
entries = []
page_num = 1
while True:
- entries.extend([self.url_result(
- 'http://%s/%s/%s' % (domain, display_id, episode['slug'])) for episode in episodes_data])
+ entries.extend([{
+ '_type': 'url',
+ 'url': 'http://%s%s' % (domain, episode['path']),
+ 'id': compat_str(episode['id']),
+ 'ie_key': 'Revision3',
+ 'extractor_key': site,
+ } for episode in episodes_data])
processed_episodes += len(episodes_data)
if processed_episodes == num_episodes:
break
@@ -123,5 +172,5 @@ class Revision3IE(InfoExtractor):
display_id)['episodes']['data']
return self.playlist_result(
- entries, compat_str(show_data['id']),
- show_data.get('name'), show_data.get('summary'))
+ entries, compat_str(list_data['id']),
+ list_data.get('name'), list_data.get('summary'))
diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py
index 7de7b7273..a65fc8ed7 100644
--- a/youtube_dl/extractor/safari.py
+++ b/youtube_dl/extractor/safari.py
@@ -36,12 +36,13 @@ class SafariBaseIE(InfoExtractor):
if username is None:
self.raise_login_required('safaribooksonline.com account is required')
- headers = std_headers
+ headers = std_headers.copy()
if 'Referer' not in headers:
headers['Referer'] = self._LOGIN_URL
+ login_page_request = sanitized_Request(self._LOGIN_URL, headers=headers)
login_page = self._download_webpage(
- self._LOGIN_URL, None,
+ login_page_request, None,
'Downloading login form')
csrf = self._html_search_regex(
diff --git a/youtube_dl/extractor/sexu.py b/youtube_dl/extractor/sexu.py
index 6365a8779..a99b2a8e7 100644
--- a/youtube_dl/extractor/sexu.py
+++ b/youtube_dl/extractor/sexu.py
@@ -1,7 +1,5 @@
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
@@ -14,7 +12,7 @@ class SexuIE(InfoExtractor):
'id': '961791',
'ext': 'mp4',
'title': 'md5:4d05a19a5fc049a63dbbaf05fb71d91b',
- 'description': 'md5:c5ed8625eb386855d5a7967bd7b77a54',
+ 'description': 'md5:2b75327061310a3afb3fbd7d09e2e403',
'categories': list, # NSFW
'thumbnail': 're:https?://.*\.jpg$',
'age_limit': 18,
@@ -25,13 +23,18 @@ class SexuIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- quality_arr = self._search_regex(
- r'sources:\s*\[([^\]]+)\]', webpage, 'forrmat string')
+ jwvideo = self._parse_json(
+ self._search_regex(r'\.setup\(\s*({.+?})\s*\);', webpage, 'jwvideo'),
+ video_id)
+
+ sources = jwvideo['sources']
+
formats = [{
- 'url': fmt[0].replace('\\', ''),
- 'format_id': fmt[1],
- 'height': int(fmt[1][:3]),
- } for fmt in re.findall(r'"file":"([^"]+)","label":"([^"]+)"', quality_arr)]
+ 'url': source['file'].replace('\\', ''),
+ 'format_id': source.get('label'),
+ 'height': self._search_regex(
+ r'^(\d+)[pP]', source.get('label', ''), 'height', default=None),
+ } for source in sources if source.get('file')]
self._sort_formats(formats)
title = self._html_search_regex(
@@ -40,9 +43,7 @@ class SexuIE(InfoExtractor):
description = self._html_search_meta(
'description', webpage, 'description')
- thumbnail = self._html_search_regex(
- r'image:\s*"([^"]+)"',
- webpage, 'thumbnail', fatal=False)
+ thumbnail = jwvideo.get('image')
categories_str = self._html_search_meta(
'keywords', webpage, 'categories')
diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py
index a48d77c30..cf8851438 100644
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@@ -73,7 +73,7 @@ class TEDIE(InfoExtractor):
'add_ie': ['Youtube'],
'info_dict': {
'id': '_ZG8HBuDjgc',
- 'ext': 'mp4',
+ 'ext': 'webm',
'title': 'Douglas Adams: Parrots the Universe and Everything',
'description': 'md5:01ad1e199c49ac640cb1196c0e9016af',
'uploader': 'University of California Television (UCTV)',
diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py
index 8639293e3..958bf8fff 100644
--- a/youtube_dl/extractor/twitch.py
+++ b/youtube_dl/extractor/twitch.py
@@ -252,6 +252,7 @@ class TwitchVodIE(TwitchItemBaseIE):
self._USHER_BASE, item_id,
compat_urllib_parse.urlencode({
'allow_source': 'true',
+ 'allow_audio_only': 'true',
'allow_spectre': 'true',
'player': 'twitchweb',
'nauth': access_token['token'],
@@ -431,6 +432,7 @@ class TwitchStreamIE(TwitchBaseIE):
query = {
'allow_source': 'true',
+ 'allow_audio_only': 'true',
'p': random.randint(1000000, 10000000),
'player': 'twitchweb',
'segment_preference': '4',
diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py
index 14e945d49..e148b1ef5 100644
--- a/youtube_dl/extractor/vgtv.py
+++ b/youtube_dl/extractor/vgtv.py
@@ -20,6 +20,7 @@ class VGTVIE(XstreamIE):
'aftenbladet.no/tv': 'satv',
'fvn.no/fvntv': 'fvntv',
'aftenposten.no/webtv': 'aptv',
+ 'ap.vgtv.no/webtv': 'aptv',
}
_APP_NAME_TO_VENDOR = {
@@ -35,7 +36,7 @@ class VGTVIE(XstreamIE):
(?P<host>
%s
)
- /
+ /?
(?:
\#!/(?:video|live)/|
embed?.*id=
@@ -107,19 +108,27 @@ class VGTVIE(XstreamIE):
'md5': 'fd828cd29774a729bf4d4425fe192972',
'info_dict': {
'id': '21039',
- 'ext': 'mov',
+ 'ext': 'mp4',
'title': 'TRAILER: «SWEATSHOP» - I can´t take any more',
'description': 'md5:21891f2b0dd7ec2f78d84a50e54f8238',
'duration': 66,
'timestamp': 1417002452,
'upload_date': '20141126',
'view_count': int,
- }
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
},
{
'url': 'http://www.bt.no/tv/#!/video/100250/norling-dette-er-forskjellen-paa-1-divisjon-og-eliteserien',
'only_matching': True,
},
+ {
+ 'url': 'http://ap.vgtv.no/webtv#!/video/111084/de-nye-bysyklene-lettere-bedre-gir-stoerre-hjul-og-feste-til-mobil',
+ 'only_matching': True,
+ },
]
def _real_extract(self, url):
@@ -144,8 +153,6 @@ class VGTVIE(XstreamIE):
if len(video_id) == 5:
if appname == 'bttv':
info = self._extract_video_info('btno', video_id)
- elif appname == 'aptv':
- info = self._extract_video_info('ap', video_id)
streams = data['streamUrls']
stream_type = data.get('streamType')
diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py
index 433fc9914..e04b814c8 100644
--- a/youtube_dl/extractor/viki.py
+++ b/youtube_dl/extractor/viki.py
@@ -176,13 +176,13 @@ class VikiIE(VikiBaseIE):
}, {
# youtube external
'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1',
- 'md5': '216d1afdc0c64d1febc1e9f2bd4b864b',
+ 'md5': '63f8600c1da6f01b7640eee7eca4f1da',
'info_dict': {
'id': '50562v',
- 'ext': 'mp4',
+ 'ext': 'webm',
'title': 'Poor Nastya [COMPLETE] - Episode 1',
'description': '',
- 'duration': 607,
+ 'duration': 606,
'timestamp': 1274949505,
'upload_date': '20101213',
'uploader': 'ad14065n',
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index 9f282a1da..71c30d2cd 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -73,15 +73,26 @@ class VimeoIE(VimeoBaseInfoExtractor):
# _VALID_URL matches Vimeo URLs
_VALID_URL = r'''(?x)
- https?://
- (?:(?:www|(?P<player>player))\.)?
- vimeo(?P<pro>pro)?\.com/
- (?!channels/[^/?#]+/?(?:$|[?#])|album/)
- (?:.*?/)?
- (?:(?:play_redirect_hls|moogaloop\.swf)\?clip_id=)?
- (?:videos?/)?
- (?P<id>[0-9]+)
- /?(?:[?&].*)?(?:[#].*)?$'''
+ https?://
+ (?:
+ (?:
+ www|
+ (?P<player>player)
+ )
+ \.
+ )?
+ vimeo(?P<pro>pro)?\.com/
+ (?!channels/[^/?#]+/?(?:$|[?#])|(?:album|ondemand)/)
+ (?:.*?/)?
+ (?:
+ (?:
+ play_redirect_hls|
+ moogaloop\.swf)\?clip_id=
+ )?
+ (?:videos?/)?
+ (?P<id>[0-9]+)
+ /?(?:[?&].*)?(?:[#].*)?$
+ '''
IE_NAME = 'vimeo'
_TESTS = [
{
@@ -277,9 +288,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
def _real_extract(self, url):
url, data = unsmuggle_url(url, {})
- headers = std_headers
+ headers = std_headers.copy()
if 'http_headers' in data:
- headers = headers.copy()
headers.update(data['http_headers'])
if 'Referer' not in headers:
headers['Referer'] = url
@@ -294,7 +304,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
url = 'https://vimeo.com/' + video_id
# Retrieve video webpage to extract further information
- request = sanitized_Request(url, None, headers)
+ request = sanitized_Request(url, headers=headers)
try:
webpage = self._download_webpage(request, video_id)
except ExtractorError as ee:
@@ -498,6 +508,38 @@ class VimeoIE(VimeoBaseInfoExtractor):
}
+class VimeoOndemandIE(VimeoBaseInfoExtractor):
+ IE_NAME = 'vimeo:ondemand'
+ _VALID_URL = r'https?://(?:www\.)?vimeo\.com/ondemand/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ # ondemand video not available via https://vimeo.com/id
+ 'url': 'https://vimeo.com/ondemand/20704',
+ 'md5': 'c424deda8c7f73c1dfb3edd7630e2f35',
+ 'info_dict': {
+ 'id': '105442900',
+ 'ext': 'mp4',
+ 'title': 'המעבדה - במאי יותם פלדמן',
+ 'uploader': 'גם סרטים',
+ 'uploader_url': 're:https?://(?:www\.)?vimeo\.com/gumfilms',
+ 'uploader_id': 'gumfilms',
+ },
+ }, {
+ 'url': 'https://vimeo.com/ondemand/nazmaalik',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://vimeo.com/ondemand/141692381',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://vimeo.com/ondemand/thelastcolony/150274832',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ return self.url_result(self._og_search_video_url(webpage), VimeoIE.ie_key())
+
+
class VimeoChannelIE(VimeoBaseInfoExtractor):
IE_NAME = 'vimeo:channel'
_VALID_URL = r'https://vimeo\.com/channels/(?P<id>[^/?#]+)/?(?:$|[?#])'
diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py
index 670a438af..d560a4b5e 100644
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@@ -142,10 +142,10 @@ class VKIE(InfoExtractor):
'url': 'https://vk.com/video276849682_170681728',
'info_dict': {
'id': 'V3K4mi0SYkc',
- 'ext': 'mp4',
+ 'ext': 'webm',
'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
- 'duration': 179,
+ 'duration': 178,
'upload_date': '20130116',
'uploader': "Children's Joy Foundation",
'uploader_id': 'thecjf',
diff --git a/youtube_dl/extractor/wimp.py b/youtube_dl/extractor/wimp.py
index 041ff6c55..fb0accac7 100644
--- a/youtube_dl/extractor/wimp.py
+++ b/youtube_dl/extractor/wimp.py
@@ -20,7 +20,7 @@ class WimpIE(InfoExtractor):
'md5': '4e2986c793694b55b37cf92521d12bb4',
'info_dict': {
'id': 'clowncar',
- 'ext': 'mp4',
+ 'ext': 'webm',
'title': 'It\'s like a clown car.',
'description': 'md5:0e56db1370a6e49c5c1d19124c0d2fb2',
},
diff --git a/youtube_dl/postprocessor/__init__.py b/youtube_dl/postprocessor/__init__.py
index 0d8ef6ca2..3ea518399 100644
--- a/youtube_dl/postprocessor/__init__.py
+++ b/youtube_dl/postprocessor/__init__.py
@@ -6,6 +6,7 @@ from .ffmpeg import (
FFmpegEmbedSubtitlePP,
FFmpegExtractAudioPP,
FFmpegFixupStretchedPP,
+ FFmpegFixupM3u8PP,
FFmpegFixupM4aPP,
FFmpegMergerPP,
FFmpegMetadataPP,
@@ -26,6 +27,7 @@ __all__ = [
'ExecAfterDownloadPP',
'FFmpegEmbedSubtitlePP',
'FFmpegExtractAudioPP',
+ 'FFmpegFixupM3u8PP',
'FFmpegFixupM4aPP',
'FFmpegFixupStretchedPP',
'FFmpegMergerPP',
diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py
index 380bc6f29..81102f9bb 100644
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -391,10 +391,6 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
for (name, value) in metadata.items():
options.extend(['-metadata', '%s=%s' % (name, value)])
- # https://github.com/rg3/youtube-dl/issues/8350
- if info.get('protocol') == 'm3u8_native' or info.get('protocol') == 'm3u8' and self._downloader.params.get('hls_prefer_native', False):
- options.extend(['-bsf:a', 'aac_adtstoasc'])
-
self._downloader.to_screen('[ffmpeg] Adding metadata to \'%s\'' % filename)
self.run_ffmpeg(filename, temp_filename, options)
os.remove(encodeFilename(filename))
@@ -467,6 +463,21 @@ class FFmpegFixupM4aPP(FFmpegPostProcessor):
return [], info
+class FFmpegFixupM3u8PP(FFmpegPostProcessor):
+ def run(self, info):
+ filename = info['filepath']
+ temp_filename = prepend_extension(filename, 'temp')
+
+ options = ['-c', 'copy', '-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
+ self._downloader.to_screen('[ffmpeg] Fixing malformated aac bitstream in "%s"' % filename)
+ self.run_ffmpeg(filename, temp_filename, options)
+
+ os.remove(encodeFilename(filename))
+ os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+
+ return [], info
+
+
class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
def __init__(self, downloader=None, format=None):
super(FFmpegSubtitlesConvertorPP, self).__init__(downloader)
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index adafd601b..246f5740d 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
-__version__ = '2016.03.01'
+__version__ = '2016.03.06'