aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore2
-rw-r--r--.travis.yml1
-rw-r--r--AUTHORS4
-rw-r--r--CONTRIBUTING.md2
-rw-r--r--Makefile4
-rw-r--r--README.md29
-rwxr-xr-xdevscripts/gh-pages/update-sites.py2
-rw-r--r--devscripts/make_supportedsites.py4
-rw-r--r--test/helper.py28
-rw-r--r--test/test_InfoExtractor.py18
-rw-r--r--test/test_YoutubeDL.py35
-rw-r--r--test/test_age_restriction.py5
-rw-r--r--test/test_subtitles.py28
-rw-r--r--test/test_utils.py8
-rwxr-xr-xyoutube_dl/YoutubeDL.py30
-rw-r--r--youtube_dl/__init__.py23
-rw-r--r--youtube_dl/downloader/f4m.py45
-rw-r--r--youtube_dl/downloader/hls.py12
-rw-r--r--youtube_dl/downloader/mplayer.py8
-rw-r--r--youtube_dl/extractor/__init__.py20
-rw-r--r--youtube_dl/extractor/auengine.py24
-rw-r--r--youtube_dl/extractor/bbccouk.py44
-rw-r--r--youtube_dl/extractor/bet.py5
-rw-r--r--youtube_dl/extractor/bilibili.py67
-rw-r--r--youtube_dl/extractor/buzzfeed.py6
-rw-r--r--youtube_dl/extractor/canalplus.py12
-rw-r--r--youtube_dl/extractor/ceskatelevize.py133
-rw-r--r--youtube_dl/extractor/common.py45
-rw-r--r--youtube_dl/extractor/commonmistakes.py29
-rw-r--r--youtube_dl/extractor/crunchyroll.py2
-rw-r--r--youtube_dl/extractor/discovery.py46
-rw-r--r--youtube_dl/extractor/ellentv.py35
-rw-r--r--youtube_dl/extractor/elpais.py6
-rw-r--r--youtube_dl/extractor/fktv.py21
-rw-r--r--youtube_dl/extractor/gameone.py3
-rw-r--r--youtube_dl/extractor/gdcvault.py3
-rw-r--r--youtube_dl/extractor/generic.py24
-rw-r--r--youtube_dl/extractor/giga.py101
-rw-r--r--youtube_dl/extractor/huffpost.py20
-rw-r--r--youtube_dl/extractor/imdb.py1
-rw-r--r--youtube_dl/extractor/khanacademy.py4
-rw-r--r--youtube_dl/extractor/kontrtube.py23
-rw-r--r--youtube_dl/extractor/lrt.py6
-rw-r--r--youtube_dl/extractor/mit.py6
-rw-r--r--youtube_dl/extractor/motorsport.py60
-rw-r--r--youtube_dl/extractor/netzkino.py86
-rw-r--r--youtube_dl/extractor/normalboots.py6
-rw-r--r--youtube_dl/extractor/nrk.py126
-rw-r--r--youtube_dl/extractor/played.py1
-rw-r--r--youtube_dl/extractor/radiobremen.py63
-rw-r--r--youtube_dl/extractor/rtlnl.py2
-rw-r--r--youtube_dl/extractor/rutube.py31
-rw-r--r--youtube_dl/extractor/sexykarma.py5
-rw-r--r--youtube_dl/extractor/soulanime.py80
-rw-r--r--youtube_dl/extractor/teachertube.py4
-rw-r--r--youtube_dl/extractor/ted.py4
-rw-r--r--youtube_dl/extractor/tf1.py24
-rw-r--r--youtube_dl/extractor/tudou.py19
-rw-r--r--youtube_dl/extractor/tunein.py15
-rw-r--r--youtube_dl/extractor/vier.py118
-rw-r--r--youtube_dl/extractor/viki.py4
-rw-r--r--youtube_dl/extractor/vimple.py23
-rw-r--r--youtube_dl/extractor/vk.py8
-rw-r--r--youtube_dl/extractor/washingtonpost.py11
-rw-r--r--youtube_dl/extractor/wdr.py31
-rw-r--r--youtube_dl/extractor/webofstories.py102
-rw-r--r--youtube_dl/extractor/xhamster.py11
-rw-r--r--youtube_dl/extractor/xtube.py4
-rw-r--r--youtube_dl/extractor/youtube.py84
-rw-r--r--youtube_dl/extractor/zdf.py2
-rw-r--r--youtube_dl/options.py23
-rw-r--r--youtube_dl/postprocessor/ffmpeg.py11
-rw-r--r--youtube_dl/update.py12
-rw-r--r--youtube_dl/utils.py24
-rw-r--r--youtube_dl/version.py2
75 files changed, 1497 insertions, 473 deletions
diff --git a/.gitignore b/.gitignore
index 86312d4e4..0422adf44 100644
--- a/.gitignore
+++ b/.gitignore
@@ -31,3 +31,5 @@ updates_key.pem
test/testdata
.tox
youtube-dl.zsh
+.idea
+.idea/* \ No newline at end of file
diff --git a/.travis.yml b/.travis.yml
index c6cc7a994..f14014414 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -9,7 +9,6 @@ notifications:
email:
- filippo.valsorda@gmail.com
- phihag@phihag.de
- - jaime.marquinez.ferrandiz+travis@gmail.com
- yasoob.khld@gmail.com
# irc:
# channels:
diff --git a/AUTHORS b/AUTHORS
index 29ce9e3e4..8f2010803 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -97,3 +97,7 @@ Petr Kutalek
Will Glynn
Max Reimann
Cédric Luthi
+Thijs Vermeir
+Joel Leclerc
+Christopher Krooss
+Ondřej Caletka
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 0ff7b395a..7917abfc6 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -44,7 +44,7 @@ In particular, every site support request issue should only pertain to services
### Is anyone going to need the feature?
-Only post features that you (or an incapicated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them.
+Only post features that you (or an incapacitated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them.
### Is your question about youtube-dl?
diff --git a/Makefile b/Makefile
index 71470eedb..578079879 100644
--- a/Makefile
+++ b/Makefile
@@ -46,7 +46,7 @@ test:
ot: offlinetest
offlinetest: codetest
- nosetests --verbose test --exclude test_download --exclude test_age_restriction --exclude test_subtitles --exclude test_write_annotations
+ nosetests --verbose test --exclude test_download --exclude test_age_restriction --exclude test_subtitles --exclude test_write_annotations --exclude test_youtube_lists
tar: youtube-dl.tar.gz
@@ -63,7 +63,7 @@ youtube-dl: youtube_dl/*.py youtube_dl/*/*.py
chmod a+x youtube-dl
README.md: youtube_dl/*.py youtube_dl/*/*.py
- COLUMNS=80 python -m youtube_dl --help | python devscripts/make_readme.py
+ COLUMNS=80 python youtube_dl/__main__.py --help | python devscripts/make_readme.py
CONTRIBUTING.md: README.md
python devscripts/make_contributing.py README.md CONTRIBUTING.md
diff --git a/README.md b/README.md
index 915bcd0cd..3d632a553 100644
--- a/README.md
+++ b/README.md
@@ -219,6 +219,8 @@ which means you can modify it, redistribute it or use it however you like.
for each command-line argument. If the URL
refers to a playlist, dump the whole
playlist information in a single line.
+ --print-json Be quiet and print the video information as
+ JSON (video is still being downloaded).
--newline output progress bar as new lines
--no-progress do not print progress bar
--console-title display progress in console titlebar
@@ -248,14 +250,15 @@ which means you can modify it, redistribute it or use it however you like.
## Video Format Options:
-f, --format FORMAT video format code, specify the order of
- preference using slashes: -f 22/17/18 . -f
- mp4 , -f m4a and -f flv are also
- supported. You can also use the special
- names "best", "bestvideo", "bestaudio",
- "worst", "worstvideo" and "worstaudio". By
- default, youtube-dl will pick the best
- quality. Use commas to download multiple
- audio formats, such as -f
+ preference using slashes, as in -f 22/17/18
+ . Instead of format codes, you can select
+ by extension for the extensions aac, m4a,
+ mp3, mp4, ogg, wav, webm. You can also use
+ the special names "best", "bestvideo",
+ "bestaudio", "worst". By default, youtube-
+ dl will pick the best quality. Use commas
+ to download multiple audio formats, such as
+ -f
136/137/mp4/bestvideo,140/m4a/bestaudio.
You can merge the video and audio of two
formats into a single file using -f <video-
@@ -326,7 +329,7 @@ which means you can modify it, redistribute it or use it however you like.
# CONFIGURATION
-You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl/config`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<Yourname>\youtube-dl.conf`.
+You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl/config`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<user name>\youtube-dl.conf`.
# OUTPUT TEMPLATE
@@ -449,6 +452,14 @@ Since June 2012 (#342) youtube-dl is packed as an executable zipfile, simply unz
To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29).
+### On Windows, how should I set up ffmpeg and youtube-dl? Where should I put the exe files?
+
+If you put youtube-dl and ffmpeg in the same directory that you're running the command from, it will work, but that's rather cumbersome.
+
+To make a different directory work - either for ffmpeg, or for youtube-dl, or for both - simply create the directory (say, `C:\bin`, or `C:\Users\<User name>\bin`), put all the executables directly in there, and then [set your PATH environment variable](https://www.java.com/en/download/help/path.xml) to include that directory.
+
+From then on, after restarting your shell, you will be able to access both youtube-dl and ffmpeg (and youtube-dl will be able to find ffmpeg) by simply typing `youtube-dl` or `ffmpeg`, no matter what directory you're in.
+
### How can I detect whether a given URL is supported by youtube-dl?
For one, have a look at the [list of supported sites](docs/supportedsites). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
diff --git a/devscripts/gh-pages/update-sites.py b/devscripts/gh-pages/update-sites.py
index f0f0481c7..d3ef5f0b5 100755
--- a/devscripts/gh-pages/update-sites.py
+++ b/devscripts/gh-pages/update-sites.py
@@ -16,7 +16,7 @@ def main():
template = tmplf.read()
ie_htmls = []
- for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME.lower()):
+ for ie in youtube_dl.list_extractors(age_limit=None):
ie_html = '<b>{}</b>'.format(ie.IE_NAME)
ie_desc = getattr(ie, 'IE_DESC', None)
if ie_desc is False:
diff --git a/devscripts/make_supportedsites.py b/devscripts/make_supportedsites.py
index 140010644..3df4385a6 100644
--- a/devscripts/make_supportedsites.py
+++ b/devscripts/make_supportedsites.py
@@ -23,12 +23,12 @@ def main():
def gen_ies_md(ies):
for ie in ies:
- ie_md = '**{}**'.format(ie.IE_NAME)
+ ie_md = '**{0}**'.format(ie.IE_NAME)
ie_desc = getattr(ie, 'IE_DESC', None)
if ie_desc is False:
continue
if ie_desc is not None:
- ie_md += ': {}'.format(ie.IE_DESC)
+ ie_md += ': {0}'.format(ie.IE_DESC)
if not ie.working():
ie_md += ' (Currently broken)'
yield ie_md
diff --git a/test/helper.py b/test/helper.py
index 96d58b7c1..c416f388c 100644
--- a/test/helper.py
+++ b/test/helper.py
@@ -82,18 +82,8 @@ class FakeYDL(YoutubeDL):
def gettestcases(include_onlymatching=False):
for ie in youtube_dl.extractor.gen_extractors():
- t = getattr(ie, '_TEST', None)
- if t:
- assert not hasattr(ie, '_TESTS'), \
- '%s has _TEST and _TESTS' % type(ie).__name__
- tests = [t]
- else:
- tests = getattr(ie, '_TESTS', [])
- for t in tests:
- if not include_onlymatching and t.get('only_matching', False):
- continue
- t['name'] = type(ie).__name__[:-len('IE')]
- yield t
+ for tc in ie.get_testcases(include_onlymatching):
+ yield tc
md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
@@ -120,6 +110,20 @@ def expect_info_dict(self, got_dict, expected_dict):
else:
if isinstance(expected, compat_str) and expected.startswith('md5:'):
got = 'md5:' + md5(got_dict.get(info_field))
+ elif isinstance(expected, compat_str) and expected.startswith('mincount:'):
+ got = got_dict.get(info_field)
+ self.assertTrue(
+ isinstance(got, list),
+ 'Expected field %s to be a list, but it is of type %s' % (
+ info_field, type(got).__name__))
+ expected_num = int(expected.partition(':')[2])
+ assertGreaterEqual(
+ self, len(got), expected_num,
+ 'Expected %d items in field %s, but only got %d' % (
+ expected_num, info_field, len(got)
+ )
+ )
+ continue
else:
got = got_dict.get(info_field)
self.assertEqual(expected, got,
diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index 13c18ed95..be8d12997 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -40,5 +40,23 @@ class TestInfoExtractor(unittest.TestCase):
self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2')
+ def test_html_search_meta(self):
+ ie = self.ie
+ html = '''
+ <meta name="a" content="1" />
+ <meta name='b' content='2'>
+ <meta name="c" content='3'>
+ <meta name=d content='4'>
+ <meta property="e" content='5' >
+ <meta content="6" name="f">
+ '''
+
+ self.assertEqual(ie._html_search_meta('a', html), '1')
+ self.assertEqual(ie._html_search_meta('b', html), '2')
+ self.assertEqual(ie._html_search_meta('c', html), '3')
+ self.assertEqual(ie._html_search_meta('d', html), '4')
+ self.assertEqual(ie._html_search_meta('e', html), '5')
+ self.assertEqual(ie._html_search_meta('f', html), '6')
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
index f8e4f930e..85d87f2c3 100644
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -8,6 +8,8 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+import copy
+
from test.helper import FakeYDL, assertRegexpMatches
from youtube_dl import YoutubeDL
from youtube_dl.extractor import YoutubeIE
@@ -192,6 +194,37 @@ class TestFormatSelection(unittest.TestCase):
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'vid-high')
+ def test_format_selection_audio_exts(self):
+ formats = [
+ {'format_id': 'mp3-64', 'ext': 'mp3', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'},
+ {'format_id': 'ogg-64', 'ext': 'ogg', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'},
+ {'format_id': 'aac-64', 'ext': 'aac', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'},
+ {'format_id': 'mp3-32', 'ext': 'mp3', 'abr': 32, 'url': 'http://_', 'vcodec': 'none'},
+ {'format_id': 'aac-32', 'ext': 'aac', 'abr': 32, 'url': 'http://_', 'vcodec': 'none'},
+ ]
+
+ info_dict = _make_result(formats)
+ ydl = YDL({'format': 'best'})
+ ie = YoutubeIE(ydl)
+ ie._sort_formats(info_dict['formats'])
+ ydl.process_ie_result(copy.deepcopy(info_dict))
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'aac-64')
+
+ ydl = YDL({'format': 'mp3'})
+ ie = YoutubeIE(ydl)
+ ie._sort_formats(info_dict['formats'])
+ ydl.process_ie_result(copy.deepcopy(info_dict))
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'mp3-64')
+
+ ydl = YDL({'prefer_free_formats': True})
+ ie = YoutubeIE(ydl)
+ ie._sort_formats(info_dict['formats'])
+ ydl.process_ie_result(copy.deepcopy(info_dict))
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'ogg-64')
+
def test_format_selection_video(self):
formats = [
{'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': '_'},
@@ -218,7 +251,7 @@ class TestFormatSelection(unittest.TestCase):
# 3D
'85', '84', '102', '83', '101', '82', '100',
# Dash video
- '138', '137', '248', '136', '247', '135', '246',
+ '137', '248', '136', '247', '135', '246',
'245', '244', '134', '243', '133', '242', '160',
# Dash audio
'141', '172', '140', '171', '139',
diff --git a/test/test_age_restriction.py b/test/test_age_restriction.py
index 5be065c43..6f5513faa 100644
--- a/test/test_age_restriction.py
+++ b/test/test_age_restriction.py
@@ -45,11 +45,6 @@ class TestAgeRestriction(unittest.TestCase):
'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
'505835.mp4', 2, old_age=25)
- def test_pornotube(self):
- self._assert_restricted(
- 'http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing',
- '1689755.flv', 13)
-
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_subtitles.py b/test/test_subtitles.py
index d34565191..6336dd317 100644
--- a/test/test_subtitles.py
+++ b/test/test_subtitles.py
@@ -17,6 +17,7 @@ from youtube_dl.extractor import (
TEDIE,
VimeoIE,
WallaIE,
+ CeskaTelevizeIE,
)
@@ -317,5 +318,32 @@ class TestWallaSubtitles(BaseTestSubtitles):
self.assertEqual(len(subtitles), 0)
+class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
+ url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
+ IE = CeskaTelevizeIE
+
+ def test_list_subtitles(self):
+ self.DL.expect_warning('Automatic Captions not supported by this server')
+ self.DL.params['listsubtitles'] = True
+ info_dict = self.getInfoDict()
+ self.assertEqual(info_dict, None)
+
+ def test_allsubtitles(self):
+ self.DL.expect_warning('Automatic Captions not supported by this server')
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), set(['cs']))
+ self.assertEqual(md5(subtitles['cs']), '9bf52d9549533c32c427e264bf0847d4')
+
+ def test_nosubtitles(self):
+ self.DL.expect_warning('video doesn\'t have subtitles')
+ self.url = 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220'
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(len(subtitles), 0)
+
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_utils.py b/test/test_utils.py
index dd49a6d17..16e1a1ddf 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -16,6 +16,7 @@ import json
import xml.etree.ElementTree
from youtube_dl.utils import (
+ age_restricted,
args_to_str,
clean_html,
DateRange,
@@ -402,5 +403,12 @@ Trying to open render node...
Success at /dev/dri/renderD128.
ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
+ def test_age_restricted(self):
+ self.assertFalse(age_restricted(None, 10)) # unrestricted content
+ self.assertFalse(age_restricted(1, None)) # unrestricted policy
+ self.assertFalse(age_restricted(8, 10))
+ self.assertTrue(age_restricted(18, 14))
+ self.assertFalse(age_restricted(18, 18))
+
if __name__ == '__main__':
unittest.main()
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index e2b823f66..61675d8ec 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -63,6 +63,7 @@ from .utils import (
YoutubeDLHandler,
prepend_extension,
args_to_str,
+ age_restricted,
)
from .cache import Cache
from .extractor import get_info_extractor, gen_extractors
@@ -202,6 +203,7 @@ class YoutubeDL(object):
Progress hooks are guaranteed to be called at least once
(with status "finished") if the download is successful.
+ merge_output_format: Extension to use when merging formats.
The following parameters are not used by YoutubeDL itself, they are used by
@@ -550,13 +552,8 @@ class YoutubeDL(object):
max_views = self.params.get('max_views')
if max_views is not None and view_count > max_views:
return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
- age_limit = self.params.get('age_limit')
- if age_limit is not None:
- actual_age_limit = info_dict.get('age_limit')
- if actual_age_limit is None:
- actual_age_limit = 0
- if age_limit < actual_age_limit:
- return 'Skipping "' + title + '" because it is age restricted'
+ if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
+ return 'Skipping "%s" because it is age restricted' % title
if self.in_download_archive(info_dict):
return '%s has already been recorded in archive' % video_title
return None
@@ -790,7 +787,7 @@ class YoutubeDL(object):
if video_formats:
return video_formats[0]
else:
- extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
+ extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
if format_spec in extensions:
filter_f = lambda f: f['ext'] == format_spec
else:
@@ -913,10 +910,23 @@ class YoutubeDL(object):
'contain the video, try using '
'"-f %s+%s"' % (format_2, format_1))
return
+ output_ext = (
+ formats_info[0]['ext']
+ if self.params.get('merge_output_format') is None
+ else self.params['merge_output_format'])
selected_format = {
'requested_formats': formats_info,
'format': rf,
'ext': formats_info[0]['ext'],
+ 'width': formats_info[0].get('width'),
+ 'height': formats_info[0].get('height'),
+ 'resolution': formats_info[0].get('resolution'),
+ 'fps': formats_info[0].get('fps'),
+ 'vcodec': formats_info[0].get('vcodec'),
+ 'vbr': formats_info[0].get('vbr'),
+ 'acodec': formats_info[1].get('acodec'),
+ 'abr': formats_info[1].get('abr'),
+ 'ext': output_ext,
}
else:
selected_format = None
@@ -1333,7 +1343,9 @@ class YoutubeDL(object):
formats = info_dict.get('formats', [info_dict])
idlen = max(len('format code'),
max(len(f['format_id']) for f in formats))
- formats_s = [line(f, idlen) for f in formats]
+ formats_s = [
+ line(f, idlen) for f in formats
+ if f.get('preference') is None or f['preference'] >= -1000]
if len(formats) > 1:
formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index e79320323..8e7b74466 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -38,7 +38,7 @@ from .update import update_self
from .downloader import (
FileDownloader,
)
-from .extractor import gen_extractors
+from .extractor import gen_extractors, list_extractors
from .YoutubeDL import YoutubeDL
@@ -95,24 +95,22 @@ def _real_main(argv=None):
_enc = preferredencoding()
all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
- extractors = gen_extractors()
-
if opts.list_extractors:
- for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
+ for ie in list_extractors(opts.age_limit):
compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else ''))
matchedUrls = [url for url in all_urls if ie.suitable(url)]
for mu in matchedUrls:
compat_print(' ' + mu)
sys.exit(0)
if opts.list_extractor_descriptions:
- for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
+ for ie in list_extractors(opts.age_limit):
if not ie._WORKING:
continue
desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
if desc is False:
continue
if hasattr(ie, 'SEARCH_KEY'):
- _SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny')
+ _SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow')
_COUNTS = ('', '5', '10', 'all')
desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES))
compat_print(desc)
@@ -168,6 +166,7 @@ def _real_main(argv=None):
if opts.recodevideo is not None:
if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv']:
parser.error('invalid video recode format specified')
+
if opts.date is not None:
date = DateRange.day(opts.date)
else:
@@ -199,7 +198,8 @@ def _real_main(argv=None):
' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
' template'.format(outtmpl))
- any_printing = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
+ any_getting = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
+ any_printing = opts.print_json
download_archive_fn = compat_expanduser(opts.download_archive) if opts.download_archive is not None else opts.download_archive
# PostProcessors
@@ -245,7 +245,7 @@ def _real_main(argv=None):
'password': opts.password,
'twofactor': opts.twofactor,
'videopassword': opts.videopassword,
- 'quiet': (opts.quiet or any_printing),
+ 'quiet': (opts.quiet or any_getting or any_printing),
'no_warnings': opts.no_warnings,
'forceurl': opts.geturl,
'forcetitle': opts.gettitle,
@@ -255,9 +255,9 @@ def _real_main(argv=None):
'forceduration': opts.getduration,
'forcefilename': opts.getfilename,
'forceformat': opts.getformat,
- 'forcejson': opts.dumpjson,
+ 'forcejson': opts.dumpjson or opts.print_json,
'dump_single_json': opts.dump_single_json,
- 'simulate': opts.simulate or any_printing,
+ 'simulate': opts.simulate or any_getting,
'skip_download': opts.skip_download,
'format': opts.format,
'format_limit': opts.format_limit,
@@ -324,6 +324,7 @@ def _real_main(argv=None):
'encoding': opts.encoding,
'exec_cmd': opts.exec_cmd,
'extract_flat': opts.extract_flat,
+ 'merge_output_format': opts.merge_output_format,
'postprocessors': postprocessors,
}
@@ -365,3 +366,5 @@ def main(argv=None):
sys.exit('ERROR: fixed output name but more than one file to download')
except KeyboardInterrupt:
sys.exit('\nERROR: Interrupted by user')
+
+__all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors']
diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py
index f9f6f3e73..c460c167a 100644
--- a/youtube_dl/downloader/f4m.py
+++ b/youtube_dl/downloader/f4m.py
@@ -187,24 +187,34 @@ def build_fragments_list(boot_info):
return res
-def write_flv_header(stream, metadata):
- """Writes the FLV header and the metadata to stream"""
+def write_unsigned_int(stream, val):
+ stream.write(struct_pack('!I', val))
+
+
+def write_unsigned_int_24(stream, val):
+ stream.write(struct_pack('!I', val)[1:])
+
+
+def write_flv_header(stream):
+ """Writes the FLV header to stream"""
# FLV header
stream.write(b'FLV\x01')
stream.write(b'\x05')
stream.write(b'\x00\x00\x00\x09')
- # FLV File body
stream.write(b'\x00\x00\x00\x00')
- # FLVTAG
- # Script data
- stream.write(b'\x12')
- # Size of the metadata with 3 bytes
- stream.write(struct_pack('!L', len(metadata))[1:])
- stream.write(b'\x00\x00\x00\x00\x00\x00\x00')
- stream.write(metadata)
- # Magic numbers extracted from the output files produced by AdobeHDS.php
- # (https://github.com/K-S-V/Scripts)
- stream.write(b'\x00\x00\x01\x73')
+
+
+def write_metadata_tag(stream, metadata):
+ """Writes optional metadata tag to stream"""
+ SCRIPT_TAG = b'\x12'
+ FLV_TAG_HEADER_LEN = 11
+
+ if metadata:
+ stream.write(SCRIPT_TAG)
+ write_unsigned_int_24(stream, len(metadata))
+ stream.write(b'\x00\x00\x00\x00\x00\x00\x00')
+ stream.write(metadata)
+ write_unsigned_int(stream, FLV_TAG_HEADER_LEN + len(metadata))
def _add_ns(prop):
@@ -256,7 +266,11 @@ class F4mFD(FileDownloader):
bootstrap = self.ydl.urlopen(bootstrap_url).read()
else:
bootstrap = base64.b64decode(bootstrap_node.text)
- metadata = base64.b64decode(media.find(_add_ns('metadata')).text)
+ metadata_node = media.find(_add_ns('metadata'))
+ if metadata_node is not None:
+ metadata = base64.b64decode(metadata_node.text)
+ else:
+ metadata = None
boot_info = read_bootstrap_info(bootstrap)
fragments_list = build_fragments_list(boot_info)
@@ -269,7 +283,8 @@ class F4mFD(FileDownloader):
tmpfilename = self.temp_name(filename)
(dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
- write_flv_header(dest_stream, metadata)
+ write_flv_header(dest_stream)
+ write_metadata_tag(dest_stream, metadata)
# This dict stores the download progress, it's updated by the progress
# hook
diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py
index 5bb0f3cfd..aa58b52ab 100644
--- a/youtube_dl/downloader/hls.py
+++ b/youtube_dl/downloader/hls.py
@@ -11,7 +11,6 @@ from ..compat import (
compat_urllib_request,
)
from ..utils import (
- check_executable,
encodeFilename,
)
@@ -27,16 +26,13 @@ class HlsFD(FileDownloader):
'-bsf:a', 'aac_adtstoasc',
encodeFilename(tmpfilename, for_subprocess=True)]
- for program in ['avconv', 'ffmpeg']:
- if check_executable(program, ['-version']):
- break
- else:
+ ffpp = FFmpegPostProcessor(downloader=self)
+ program = ffpp._executable
+ if program is None:
self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
return False
- cmd = [program] + args
-
- ffpp = FFmpegPostProcessor(downloader=self)
ffpp.check_version()
+ cmd = [program] + args
retval = subprocess.call(cmd)
if retval == 0:
diff --git a/youtube_dl/downloader/mplayer.py b/youtube_dl/downloader/mplayer.py
index c53195da0..72cef30ea 100644
--- a/youtube_dl/downloader/mplayer.py
+++ b/youtube_dl/downloader/mplayer.py
@@ -4,8 +4,8 @@ import os
import subprocess
from .common import FileDownloader
-from ..compat import compat_subprocess_get_DEVNULL
from ..utils import (
+ check_executable,
encodeFilename,
)
@@ -20,11 +20,7 @@ class MplayerFD(FileDownloader):
'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy',
'-dumpstream', '-dumpfile', tmpfilename, url]
# Check for mplayer first
- try:
- subprocess.call(
- ['mplayer', '-h'],
- stdout=compat_subprocess_get_DEVNULL(), stderr=subprocess.STDOUT)
- except (OSError, IOError):
+ if not check_executable('mplayer', ['-h']):
self.report_error('MMS or RTSP download detected but "%s" could not be run' % args[0])
return False
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index e4c51f238..f544e87f1 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -71,6 +71,7 @@ from .cnn import (
from .collegehumor import CollegeHumorIE
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
from .comcarcoff import ComCarCoffIE
+from .commonmistakes import CommonMistakesIE
from .condenast import CondeNastIE
from .cracked import CrackedIE
from .criterion import CriterionIE
@@ -158,6 +159,7 @@ from .gametrailers import GametrailersIE
from .gdcvault import GDCVaultIE
from .generic import GenericIE
from .giantbomb import GiantBombIE
+from .giga import GigaIE
from .glide import GlideIE
from .globo import GloboIE
from .godtube import GodTubeIE
@@ -272,6 +274,7 @@ from .nbc import (
)
from .ndr import NDRIE
from .ndtv import NDTVIE
+from .netzkino import NetzkinoIE
from .nerdcubed import NerdCubedFeedIE
from .newgrounds import NewgroundsIE
from .newstube import NewstubeIE
@@ -324,6 +327,7 @@ from .prosiebensat1 import ProSiebenSat1IE
from .pyvideo import PyvideoIE
from .quickvid import QuickVidIE
from .radiode import RadioDeIE
+from .radiobremen import RadioBremenIE
from .radiofrance import RadioFranceIE
from .rai import RaiIE
from .rbmaradio import RBMARadioIE
@@ -344,6 +348,7 @@ from .ruhd import RUHDIE
from .rutube import (
RutubeIE,
RutubeChannelIE,
+ RutubeEmbedIE,
RutubeMovieIE,
RutubePersonIE,
)
@@ -473,6 +478,7 @@ from .videott import VideoTtIE
from .videoweed import VideoWeedIE
from .vidme import VidmeIE
from .vidzi import VidziIE
+from .vier import VierIE, VierVideosIE
from .vimeo import (
VimeoIE,
VimeoAlbumIE,
@@ -508,6 +514,7 @@ from .wdr import (
WDRMobileIE,
WDRMausIE,
)
+from .webofstories import WebOfStoriesIE
from .weibo import WeiboIE
from .wimp import WimpIE
from .wistia import WistiaIE
@@ -543,7 +550,7 @@ from .youtube import (
YoutubeSearchURLIE,
YoutubeShowIE,
YoutubeSubscriptionsIE,
- YoutubeTopListIE,
+ YoutubeTruncatedIDIE,
YoutubeTruncatedURLIE,
YoutubeUserIE,
YoutubeWatchLaterIE,
@@ -569,6 +576,17 @@ def gen_extractors():
return [klass() for klass in _ALL_CLASSES]
+def list_extractors(age_limit):
+ """
+ Return a list of extractors that are suitable for the given age,
+ sorted by extractor ID.
+ """
+
+ return sorted(
+ filter(lambda ie: ie.is_suitable(age_limit), gen_extractors()),
+ key=lambda ie: ie.IE_NAME.lower())
+
+
def get_info_extractor(ie_name):
"""Returns the info extractor class with the given ie_name"""
return globals()[ie_name + 'IE']
diff --git a/youtube_dl/extractor/auengine.py b/youtube_dl/extractor/auengine.py
index 014a21952..a1b666be0 100644
--- a/youtube_dl/extractor/auengine.py
+++ b/youtube_dl/extractor/auengine.py
@@ -7,6 +7,7 @@ from ..compat import compat_urllib_parse
from ..utils import (
determine_ext,
ExtractorError,
+ remove_end,
)
@@ -27,23 +28,18 @@ class AUEngineIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- title = self._html_search_regex(r'<title>(?P<title>.+?)</title>', webpage, 'title')
- title = title.strip()
- links = re.findall(r'\s(?:file|url):\s*["\']([^\'"]+)["\']', webpage)
- links = map(compat_urllib_parse.unquote, links)
-
- thumbnail = None
- video_url = None
- for link in links:
- if link.endswith('.png'):
- thumbnail = link
- elif '/videos/' in link:
- video_url = link
+ title = self._html_search_regex(
+ r'<title>\s*(?P<title>.+?)\s*</title>', webpage, 'title')
+ video_urls = re.findall(r'http://\w+.auengine.com/vod/.*[^\W]', webpage)
+ video_url = compat_urllib_parse.unquote(video_urls[0])
+ thumbnails = re.findall(r'http://\w+.auengine.com/thumb/.*[^\W]', webpage)
+ thumbnail = compat_urllib_parse.unquote(thumbnails[0])
+
if not video_url:
raise ExtractorError('Could not find video URL')
+
ext = '.' + determine_ext(video_url)
- if ext == title[-len(ext):]:
- title = title[:-len(ext)]
+ title = remove_end(title, ext)
return {
'id': video_id,
diff --git a/youtube_dl/extractor/bbccouk.py b/youtube_dl/extractor/bbccouk.py
index f690dc803..1cf48fe0d 100644
--- a/youtube_dl/extractor/bbccouk.py
+++ b/youtube_dl/extractor/bbccouk.py
@@ -10,7 +10,7 @@ from ..compat import compat_HTTPError
class BBCCoUkIE(SubtitlesInfoExtractor):
IE_NAME = 'bbc.co.uk'
IE_DESC = 'BBC iPlayer'
- _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:programmes|iplayer/episode)/(?P<id>[\da-z]{8})'
+ _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
_TESTS = [
{
@@ -18,8 +18,8 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
'info_dict': {
'id': 'b039d07m',
'ext': 'flv',
- 'title': 'Kaleidoscope: Leonard Cohen',
- 'description': 'md5:db4755d7a665ae72343779f7dacb402c',
+ 'title': 'Kaleidoscope, Leonard Cohen',
+ 'description': 'The Canadian poet and songwriter reflects on his musical career.',
'duration': 1740,
},
'params': {
@@ -84,6 +84,40 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
# rtmp download
'skip_download': True,
}
+ }, {
+ 'url': 'http://www.bbc.co.uk/music/clips/p02frcc3',
+ 'note': 'Audio',
+ 'info_dict': {
+ 'id': 'p02frcch',
+ 'ext': 'flv',
+ 'title': 'Pete Tong, Past, Present and Future Special, Madeon - After Hours mix',
+ 'description': 'French house superstar Madeon takes us out of the club and onto the after party.',
+ 'duration': 3507,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'http://www.bbc.co.uk/music/clips/p025c0zz',
+ 'note': 'Video',
+ 'info_dict': {
+ 'id': 'p025c103',
+ 'ext': 'flv',
+ 'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)',
+ 'description': 'Rae Morris performs Closer for BBC Three at Reading 2014',
+ 'duration': 226,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.bbc.co.uk/music/clips#p02frcc3',
+ 'only_matching': True,
}
]
@@ -241,8 +275,8 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
# fallback to legacy playlist
playlist = self._download_xml(
- 'http://www.bbc.co.uk/iplayer/playlist/%s' % playlist_id,
- playlist_id, 'Downloading legacy playlist XML')
+ 'http://www.bbc.co.uk/iplayer/playlist/%s' % playlist_id,
+ playlist_id, 'Downloading legacy playlist XML')
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
if no_items is not None:
diff --git a/youtube_dl/extractor/bet.py b/youtube_dl/extractor/bet.py
index 003e50002..d2abd4d77 100644
--- a/youtube_dl/extractor/bet.py
+++ b/youtube_dl/extractor/bet.py
@@ -16,7 +16,7 @@ class BetIE(InfoExtractor):
{
'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html',
'info_dict': {
- 'id': '417cd61c-c793-4e8e-b006-e445ecc45add',
+ 'id': '740ab250-bb94-4a8a-8787-fe0de7c74471',
'display_id': 'in-bet-exclusive-obama-talks-race-and-racism',
'ext': 'flv',
'title': 'BET News Presents: A Conversation With President Obama',
@@ -35,7 +35,7 @@ class BetIE(InfoExtractor):
{
'url': 'http://www.bet.com/video/news/national/2014/justice-for-ferguson-a-community-reacts.html',
'info_dict': {
- 'id': '4160e53b-ad41-43b1-980f-8d85f63121f4',
+ 'id': 'bcd1b1df-673a-42cf-8d01-b282db608f2d',
'display_id': 'justice-for-ferguson-a-community-reacts',
'ext': 'flv',
'title': 'Justice for Ferguson: A Community Reacts',
@@ -55,7 +55,6 @@ class BetIE(InfoExtractor):
def _real_extract(self, url):
display_id = self._match_id(url)
-
webpage = self._download_webpage(url, display_id)
media_url = compat_urllib_parse.unquote(self._search_regex(
diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py
index 241b904a9..75d744852 100644
--- a/youtube_dl/extractor/bilibili.py
+++ b/youtube_dl/extractor/bilibili.py
@@ -4,9 +4,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..compat import compat_parse_qs
from ..utils import (
- ExtractorError,
int_or_none,
unified_strdate,
)
@@ -54,45 +52,38 @@ class BiliBiliIE(InfoExtractor):
thumbnail = self._html_search_meta(
'thumbnailUrl', video_code, 'thumbnail', fatal=False)
- player_params = compat_parse_qs(self._html_search_regex(
- r'<iframe .*?class="player" src="https://secure\.bilibili\.(?:tv|com)/secure,([^"]+)"',
- webpage, 'player params'))
+ cid = self._search_regex(r'cid=(\d+)', webpage, 'cid')
- if 'cid' in player_params:
- cid = player_params['cid'][0]
+ lq_doc = self._download_xml(
+ 'http://interface.bilibili.com/v_cdn_play?appkey=1&cid=%s' % cid,
+ video_id,
+ note='Downloading LQ video info'
+ )
+ lq_durl = lq_doc.find('./durl')
+ formats = [{
+ 'format_id': 'lq',
+ 'quality': 1,
+ 'url': lq_durl.find('./url').text,
+ 'filesize': int_or_none(
+ lq_durl.find('./size'), get_attr='text'),
+ }]
- lq_doc = self._download_xml(
- 'http://interface.bilibili.cn/v_cdn_play?cid=%s' % cid,
- video_id,
- note='Downloading LQ video info'
- )
- lq_durl = lq_doc.find('.//durl')
- formats = [{
- 'format_id': 'lq',
- 'quality': 1,
- 'url': lq_durl.find('./url').text,
+ hq_doc = self._download_xml(
+ 'http://interface.bilibili.com/playurl?appkey=1&cid=%s' % cid,
+ video_id,
+ note='Downloading HQ video info',
+ fatal=False,
+ )
+ if hq_doc is not False:
+ hq_durl = hq_doc.find('./durl')
+ formats.append({
+ 'format_id': 'hq',
+ 'quality': 2,
+ 'ext': 'flv',
+ 'url': hq_durl.find('./url').text,
'filesize': int_or_none(
- lq_durl.find('./size'), get_attr='text'),
- }]
-
- hq_doc = self._download_xml(
- 'http://interface.bilibili.cn/playurl?cid=%s' % cid,
- video_id,
- note='Downloading HQ video info',
- fatal=False,
- )
- if hq_doc is not False:
- hq_durl = hq_doc.find('.//durl')
- formats.append({
- 'format_id': 'hq',
- 'quality': 2,
- 'ext': 'flv',
- 'url': hq_durl.find('./url').text,
- 'filesize': int_or_none(
- hq_durl.find('./size'), get_attr='text'),
- })
- else:
- raise ExtractorError('Unsupported player parameters: %r' % (player_params,))
+ hq_durl.find('./size'), get_attr='text'),
+ })
self._sort_formats(formats)
return {
diff --git a/youtube_dl/extractor/buzzfeed.py b/youtube_dl/extractor/buzzfeed.py
index a40a1bbc4..a5d2af174 100644
--- a/youtube_dl/extractor/buzzfeed.py
+++ b/youtube_dl/extractor/buzzfeed.py
@@ -33,7 +33,7 @@ class BuzzFeedIE(InfoExtractor):
'skip_download': True, # Got enough YouTube download tests
},
'info_dict': {
- 'description': 'Munchkin the Teddy Bear is back !',
+ 'description': 're:Munchkin the Teddy Bear is back ?!',
'title': 'You Need To Stop What You\'re Doing And Watching This Dog Walk On A Treadmill',
},
'playlist': [{
@@ -42,9 +42,9 @@ class BuzzFeedIE(InfoExtractor):
'ext': 'mp4',
'upload_date': '20141124',
'uploader_id': 'CindysMunchkin',
- 'description': '© 2014 Munchkin the Shih Tzu\nAll rights reserved\nFacebook: http://facebook.com/MunchkintheShihTzu',
+ 'description': 're:© 2014 Munchkin the Shih Tzu',
'uploader': 'Munchkin the Shih Tzu',
- 'title': 'Munchkin the Teddy Bear gets her exercise',
+ 'title': 're:Munchkin the Teddy Bear gets her exercise',
},
}]
}]
diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py
index 9873728df..11d18d74a 100644
--- a/youtube_dl/extractor/canalplus.py
+++ b/youtube_dl/extractor/canalplus.py
@@ -5,6 +5,8 @@ import re
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
+ HEADRequest,
unified_strdate,
url_basename,
qualities,
@@ -76,6 +78,16 @@ class CanalplusIE(InfoExtractor):
preference = qualities(['MOBILE', 'BAS_DEBIT', 'HAUT_DEBIT', 'HD', 'HLS', 'HDS'])
+ fmt_url = next(iter(media.find('VIDEOS'))).text
+ if '/geo' in fmt_url.lower():
+ response = self._request_webpage(
+ HEADRequest(fmt_url), video_id,
+ 'Checking if the video is georestricted')
+ if '/blocage' in response.geturl():
+ raise ExtractorError(
+ 'The video is not available in your country',
+ expected=True)
+
formats = []
for fmt in media.find('VIDEOS'):
format_url = fmt.text
diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py
index 2f866f3ef..f70e090bb 100644
--- a/youtube_dl/extractor/ceskatelevize.py
+++ b/youtube_dl/extractor/ceskatelevize.py
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
import re
-from .common import InfoExtractor
+from .subtitles import SubtitlesInfoExtractor
from ..compat import (
compat_urllib_request,
compat_urllib_parse,
@@ -11,49 +11,42 @@ from ..compat import (
)
from ..utils import (
ExtractorError,
+ float_or_none,
)
-class CeskaTelevizeIE(InfoExtractor):
+class CeskaTelevizeIE(SubtitlesInfoExtractor):
_VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)'
_TESTS = [
{
- 'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/213512120230004-spanelska-chripka',
+ 'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
'info_dict': {
- 'id': '213512120230004',
- 'ext': 'flv',
- 'title': 'První republika: Španělská chřipka',
- 'duration': 3107.4,
+ 'id': '214411058091220',
+ 'ext': 'mp4',
+ 'title': 'Hyde Park Civilizace',
+ 'description': 'Věda a současná civilizace. Interaktivní pořad - prostor pro vaše otázky a komentáře',
+ 'thumbnail': 're:^https?://.*\.jpg',
+ 'duration': 3350,
},
'params': {
- 'skip_download': True, # requires rtmpdump
+ # m3u8 download
+ 'skip_download': True,
},
- 'skip': 'Works only from Czech Republic.',
- },
- {
- 'url': 'http://www.ceskatelevize.cz/ivysilani/1030584952-tsatsiki-maminka-a-policajt',
- 'info_dict': {
- 'id': '20138143440',
- 'ext': 'flv',
- 'title': 'Tsatsiki, maminka a policajt',
- 'duration': 6754.1,
- },
- 'params': {
- 'skip_download': True, # requires rtmpdump
- },
- 'skip': 'Works only from Czech Republic.',
},
{
'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/bonus/14716-zpevacka-z-duparny-bobina',
'info_dict': {
'id': '14716',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'První republika: Zpěvačka z Dupárny Bobina',
- 'duration': 90,
+ 'description': 'Sága mapující atmosféru první republiky od r. 1918 do r. 1945.',
+ 'thumbnail': 're:^https?://.*\.jpg',
+ 'duration': 88.4,
},
'params': {
- 'skip_download': True, # requires rtmpdump
+ # m3u8 download
+ 'skip_download': True,
},
},
]
@@ -80,8 +73,9 @@ class CeskaTelevizeIE(InfoExtractor):
'requestSource': 'iVysilani',
}
- req = compat_urllib_request.Request('http://www.ceskatelevize.cz/ivysilani/ajax/get-playlist-url',
- data=compat_urllib_parse.urlencode(data))
+ req = compat_urllib_request.Request(
+ 'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
+ data=compat_urllib_parse.urlencode(data))
req.add_header('Content-type', 'application/x-www-form-urlencoded')
req.add_header('x-addr', '127.0.0.1')
@@ -90,39 +84,72 @@ class CeskaTelevizeIE(InfoExtractor):
playlistpage = self._download_json(req, video_id)
- req = compat_urllib_request.Request(compat_urllib_parse.unquote(playlistpage['url']))
+ playlist_url = playlistpage['url']
+ if playlist_url == 'error_region':
+ raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
+
+ req = compat_urllib_request.Request(compat_urllib_parse.unquote(playlist_url))
req.add_header('Referer', url)
- playlist = self._download_xml(req, video_id)
+ playlist = self._download_json(req, video_id)
+ item = playlist['playlist'][0]
formats = []
- for i in playlist.find('smilRoot/body'):
- if 'AD' not in i.attrib['id']:
- base_url = i.attrib['base']
- parsedurl = compat_urllib_parse_urlparse(base_url)
- duration = i.attrib['duration']
-
- for video in i.findall('video'):
- if video.attrib['label'] != 'AD':
- format_id = video.attrib['label']
- play_path = video.attrib['src']
- vbr = int(video.attrib['system-bitrate'])
-
- formats.append({
- 'format_id': format_id,
- 'url': base_url,
- 'vbr': vbr,
- 'play_path': play_path,
- 'app': parsedurl.path[1:] + '?' + parsedurl.query,
- 'rtmp_live': True,
- 'ext': 'flv',
- })
-
+ for format_id, stream_url in item['streamUrls'].items():
+ formats.extend(self._extract_m3u8_formats(stream_url, video_id, 'mp4'))
self._sort_formats(formats)
+ title = self._og_search_title(webpage)
+ description = self._og_search_description(webpage)
+ duration = float_or_none(item.get('duration'))
+ thumbnail = item.get('previewImageUrl')
+
+ subtitles = {}
+ subs = item.get('subtitles')
+ if subs:
+ subtitles['cs'] = subs[0]['url']
+
+ if self._downloader.params.get('listsubtitles', False):
+ self._list_available_subtitles(video_id, subtitles)
+ return
+
+ subtitles = self._fix_subtitles(self.extract_subtitles(video_id, subtitles))
+
return {
'id': episode_id,
- 'title': self._html_search_regex(r'<title>(.+?) — iVysílání — Česká televize</title>', webpage, 'title'),
- 'duration': float(duration),
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
'formats': formats,
+ 'subtitles': subtitles,
}
+
+ @staticmethod
+ def _fix_subtitles(subtitles):
+ """ Convert millisecond-based subtitles to SRT """
+ if subtitles is None:
+ return subtitles # subtitles not requested
+
+ def _msectotimecode(msec):
+ """ Helper utility to convert milliseconds to timecode """
+ components = []
+ for divider in [1000, 60, 60, 100]:
+ components.append(msec % divider)
+ msec //= divider
+ return "{3:02}:{2:02}:{1:02},{0:03}".format(*components)
+
+ def _fix_subtitle(subtitle):
+ for line in subtitle.splitlines():
+ m = re.match(r"^\s*([0-9]+);\s*([0-9]+)\s+([0-9]+)\s*$", line)
+ if m:
+ yield m.group(1)
+ start, stop = (_msectotimecode(int(t)) for t in m.groups()[1:])
+ yield "{0} --> {1}".format(start, stop)
+ else:
+ yield line
+
+ fixed_subtitles = {}
+ for k, v in subtitles.items():
+ fixed_subtitles[k] = "\r\n".join(_fix_subtitle(v))
+ return fixed_subtitles
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 6e264f687..b4cd59e43 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -21,6 +21,7 @@ from ..compat import (
compat_str,
)
from ..utils import (
+ age_restricted,
clean_html,
compiled_regex_type,
ExtractorError,
@@ -92,6 +93,8 @@ class InfoExtractor(object):
by this field, regardless of all other values.
-1 for default (order by other properties),
-2 or smaller for less than default.
+ < -1000 to hide the format (if there is
+ another one which is strictly better)
* language_preference Is this in the correct requested
language?
10 if it's what the URL is about,
@@ -144,6 +147,17 @@ class InfoExtractor(object):
like_count: Number of positive ratings of the video
dislike_count: Number of negative ratings of the video
comment_count: Number of comments on the video
+ comments: A list of comments, each with one or more of the following
+ properties (all but one of text or html optional):
+ * "author" - human-readable name of the comment author
+ * "author_id" - user ID of the comment author
+ * "id" - Comment ID
+ * "html" - Comment as HTML
+ * "text" - Plain text of the comment
+ * "timestamp" - UNIX timestamp of comment
+ * "parent" - ID of the comment this one is replying to.
+ Set to "root" to indicate that this is a
+ comment to the original video.
age_limit: Age restriction for the video, as an integer (years)
webpage_url: The url to the video webpage, if given to youtube-dl it
should allow to get the same result again. (It will be set
@@ -591,7 +605,7 @@ class InfoExtractor(object):
return self._html_search_regex(
r'''(?isx)<meta
(?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1)
- [^>]+content=(["\'])(?P<content>.*?)\1''' % re.escape(name),
+ [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(name),
html, display_name, fatal=fatal, group='content', **kwargs)
def _dc_search_uploader(self, html):
@@ -875,6 +889,35 @@ class InfoExtractor(object):
None, '/', True, False, expire_time, '', None, None, None)
self._downloader.cookiejar.set_cookie(cookie)
+ def get_testcases(self, include_onlymatching=False):
+ t = getattr(self, '_TEST', None)
+ if t:
+ assert not hasattr(self, '_TESTS'), \
+ '%s has _TEST and _TESTS' % type(self).__name__
+ tests = [t]
+ else:
+ tests = getattr(self, '_TESTS', [])
+ for t in tests:
+ if not include_onlymatching and t.get('only_matching', False):
+ continue
+ t['name'] = type(self).__name__[:-len('IE')]
+ yield t
+
+ def is_suitable(self, age_limit):
+ """ Test whether the extractor is generally suitable for the given
+ age limit (i.e. pornographic sites are not, all others usually are) """
+
+ any_restricted = False
+ for tc in self.get_testcases(include_onlymatching=False):
+ if 'playlist' in tc:
+ tc = tc['playlist'][0]
+ is_restricted = age_restricted(
+ tc.get('info_dict', {}).get('age_limit'), age_limit)
+ if not is_restricted:
+ return True
+ any_restricted = any_restricted or is_restricted
+ return not any_restricted
+
class SearchInfoExtractor(InfoExtractor):
"""
diff --git a/youtube_dl/extractor/commonmistakes.py b/youtube_dl/extractor/commonmistakes.py
new file mode 100644
index 000000000..75c06903f
--- /dev/null
+++ b/youtube_dl/extractor/commonmistakes.py
@@ -0,0 +1,29 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class CommonMistakesIE(InfoExtractor):
+ IE_DESC = False # Do not list
+ _VALID_URL = r'''(?x)
+ (?:url|URL)
+ '''
+
+ _TESTS = [{
+ 'url': 'url',
+ 'only_matching': True,
+ }, {
+ 'url': 'URL',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ msg = (
+ 'You\'ve asked youtube-dl to download the URL "%s". '
+ 'That doesn\'t make any sense. '
+ 'Simply remove the parameter in your command or configuration.'
+ ) % url
+ if self._downloader.params.get('verbose'):
+ msg += ' Add -v to the command line to see what arguments and configuration youtube-dl got.'
+ raise ExtractorError(msg, expected=True)
diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py
index 354046a9e..1680f532f 100644
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@@ -228,7 +228,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, 'thumbnail', fatal=False)
formats = []
- for fmt in re.findall(r'\?p([0-9]{3,4})=1', webpage):
+ for fmt in re.findall(r'showmedia\.([0-9]{3,4})p', webpage):
stream_quality, stream_format = self._FORMAT_IDS[fmt]
video_format = fmt + 'p'
streamdata_req = compat_urllib_request.Request('http://www.crunchyroll.com/xml/')
diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py
index 52c2d7ddf..d3e667528 100644
--- a/youtube_dl/extractor/discovery.py
+++ b/youtube_dl/extractor/discovery.py
@@ -1,47 +1,45 @@
from __future__ import unicode_literals
-import re
-import json
-
from .common import InfoExtractor
+from ..utils import (
+ parse_iso8601,
+ int_or_none,
+)
class DiscoveryIE(InfoExtractor):
- _VALID_URL = r'http://www\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?'
+ _VALID_URL = r'http://www\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9_\-]*)(?:\.htm)?'
_TEST = {
'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
- 'md5': 'e12614f9ee303a6ccef415cb0793eba2',
+ 'md5': '3c69d77d9b0d82bfd5e5932a60f26504',
'info_dict': {
- 'id': '614784',
- 'ext': 'mp4',
- 'title': 'MythBusters: Mission Impossible Outtakes',
+ 'id': 'mission-impossible-outtakes',
+ 'ext': 'flv',
+ 'title': 'Mission Impossible Outtakes',
'description': ('Watch Jamie Hyneman and Adam Savage practice being'
' each other -- to the point of confusing Jamie\'s dog -- and '
'don\'t miss Adam moon-walking as Jamie ... behind Jamie\'s'
' back.'),
'duration': 156,
+ 'timestamp': 1303099200,
+ 'upload_date': '20110418',
},
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- video_list_json = self._search_regex(r'var videoListJSON = ({.*?});',
- webpage, 'video list', flags=re.DOTALL)
- video_list = json.loads(video_list_json)
- info = video_list['clips'][0]
- formats = []
- for f in info['mp4']:
- formats.append(
- {'url': f['src'], 'ext': 'mp4', 'tbr': int(f['bitrate'][:-1])})
+ info = self._parse_json(self._search_regex(
+ r'(?s)<script type="application/ld\+json">(.*?)</script>',
+ webpage, 'video info'), video_id)
return {
- 'id': info['contentId'],
- 'title': video_list['name'],
- 'formats': formats,
- 'description': info['videoCaption'],
- 'thumbnail': info.get('videoStillURL') or info.get('thumbnailURL'),
- 'duration': info['duration'],
+ 'id': video_id,
+ 'title': info['name'],
+ 'url': info['contentURL'],
+ 'description': info.get('description'),
+ 'thumbnail': info.get('thumbnailUrl'),
+ 'timestamp': parse_iso8601(info.get('uploadDate')),
+ 'duration': int_or_none(info.get('duration')),
}
diff --git a/youtube_dl/extractor/ellentv.py b/youtube_dl/extractor/ellentv.py
index 3e7923648..fc92ff825 100644
--- a/youtube_dl/extractor/ellentv.py
+++ b/youtube_dl/extractor/ellentv.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
import json
from .common import InfoExtractor
@@ -12,32 +11,49 @@ from ..utils import (
class EllenTVIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?ellentv\.com/videos/(?P<id>[a-z0-9_-]+)'
- _TEST = {
+ _VALID_URL = r'https?://(?:www\.)?(?:ellentv|ellentube)\.com/videos/(?P<id>[a-z0-9_-]+)'
+ _TESTS = [{
'url': 'http://www.ellentv.com/videos/0-7jqrsr18/',
'md5': 'e4af06f3bf0d5f471921a18db5764642',
'info_dict': {
'id': '0-7jqrsr18',
'ext': 'mp4',
'title': 'What\'s Wrong with These Photos? A Whole Lot',
+ 'description': 'md5:35f152dc66b587cf13e6d2cf4fa467f6',
'timestamp': 1406876400,
'upload_date': '20140801',
}
- }
+ }, {
+ 'url': 'http://ellentube.com/videos/0-dvzmabd5/',
+ 'md5': '98238118eaa2bbdf6ad7f708e3e4f4eb',
+ 'info_dict': {
+ 'id': '0-dvzmabd5',
+ 'ext': 'mp4',
+ 'title': '1 year old twin sister makes her brother laugh',
+ 'description': '1 year old twin sister makes her brother laugh',
+ 'timestamp': 1419542075,
+ 'upload_date': '20141225',
+ }
+ }]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
+ video_url = self._html_search_meta('VideoURL', webpage, 'url')
+ title = self._og_search_title(webpage, default=None) or self._search_regex(
+ r'pageName\s*=\s*"([^"]+)"', webpage, 'title')
+ description = self._html_search_meta(
+ 'description', webpage, 'description') or self._og_search_description(webpage)
timestamp = parse_iso8601(self._search_regex(
r'<span class="publish-date"><time datetime="([^"]+)">',
webpage, 'timestamp'))
return {
'id': video_id,
- 'title': self._og_search_title(webpage),
- 'url': self._html_search_meta('VideoURL', webpage, 'url'),
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
'timestamp': timestamp,
}
@@ -55,8 +71,7 @@ class EllenTVClipsIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- playlist_id = mobj.group('id')
+ playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
playlist = self._extract_playlist(webpage)
diff --git a/youtube_dl/extractor/elpais.py b/youtube_dl/extractor/elpais.py
index 4277202a2..00a69e631 100644
--- a/youtube_dl/extractor/elpais.py
+++ b/youtube_dl/extractor/elpais.py
@@ -1,8 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
from ..utils import unified_strdate
@@ -24,9 +22,7 @@ class ElPaisIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
prefix = self._html_search_regex(
diff --git a/youtube_dl/extractor/fktv.py b/youtube_dl/extractor/fktv.py
index d09d1c13a..190d9f9ad 100644
--- a/youtube_dl/extractor/fktv.py
+++ b/youtube_dl/extractor/fktv.py
@@ -13,7 +13,7 @@ from ..utils import (
class FKTVIE(InfoExtractor):
IE_NAME = 'fernsehkritik.tv'
- _VALID_URL = r'http://(?:www\.)?fernsehkritik\.tv/folge-(?P<ep>[0-9]+)(?:/.*)?'
+ _VALID_URL = r'http://(?:www\.)?fernsehkritik\.tv/folge-(?P<id>[0-9]+)(?:/.*)?'
_TEST = {
'url': 'http://fernsehkritik.tv/folge-1',
@@ -26,29 +26,32 @@ class FKTVIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- episode = int(mobj.group('ep'))
+ episode = int(self._match_id(url))
- server = random.randint(2, 4)
- video_thumbnail = 'http://fernsehkritik.tv/images/magazin/folge%d.jpg' % episode
- start_webpage = self._download_webpage('http://fernsehkritik.tv/folge-%d/Start' % episode,
+ video_thumbnail = 'http://fernsehkritik.tv/images/magazin/folge%s.jpg' % episode
+ start_webpage = self._download_webpage('http://fernsehkritik.tv/folge-%s/Start' % episode,
episode)
playlist = self._search_regex(r'playlist = (\[.*?\]);', start_webpage,
'playlist', flags=re.DOTALL)
files = json.loads(re.sub('{[^{}]*?}', '{}', playlist))
- # TODO: return a single multipart video
+
videos = []
for i, _ in enumerate(files, 1):
video_id = '%04d%d' % (episode, i)
- video_url = 'http://dl%d.fernsehkritik.tv/fernsehkritik%d%s.flv' % (server, episode, '' if i == 1 else '-%d' % i)
+ video_url = 'http://fernsehkritik.tv/js/directme.php?file=%s%s.flv' % (episode, '' if i == 1 else '-%d' % i)
videos.append({
+ 'ext': 'flv',
'id': video_id,
'url': video_url,
'title': clean_html(get_element_by_id('eptitle', start_webpage)),
'description': clean_html(get_element_by_id('contentlist', start_webpage)),
'thumbnail': video_thumbnail
})
- return videos
+ return {
+ '_type': 'multi_video',
+ 'entries': videos,
+ 'id': 'folge-%s' % episode,
+ }
class FKTVPosteckeIE(InfoExtractor):
diff --git a/youtube_dl/extractor/gameone.py b/youtube_dl/extractor/gameone.py
index 75f180928..a07d69841 100644
--- a/youtube_dl/extractor/gameone.py
+++ b/youtube_dl/extractor/gameone.py
@@ -57,8 +57,7 @@ class GameOneIE(InfoExtractor):
]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
og_video = self._og_search_video_url(webpage, secure=False)
diff --git a/youtube_dl/extractor/gdcvault.py b/youtube_dl/extractor/gdcvault.py
index d453ec010..fed968f51 100644
--- a/youtube_dl/extractor/gdcvault.py
+++ b/youtube_dl/extractor/gdcvault.py
@@ -39,7 +39,8 @@ class GDCVaultIE(InfoExtractor):
'id': '1015301',
'ext': 'flv',
'title': 'Thexder Meets Windows 95, or Writing Great Games in the Windows 95 Environment',
- }
+ },
+ 'skip': 'Requires login',
}
]
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 40b2791c7..7a5bf9392 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -131,12 +131,13 @@ class GenericIE(InfoExtractor):
# ooyala video
{
'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
- 'md5': '5644c6ca5d5782c1d0d350dad9bd840c',
+ 'md5': '166dd577b433b4d4ebfee10b0824d8ff',
'info_dict': {
'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
'ext': 'mp4',
'title': '2cc213299525360.mov', # that's what we get
},
+ 'add_ie': ['Ooyala'],
},
# google redirect
{
@@ -146,7 +147,7 @@ class GenericIE(InfoExtractor):
'ext': 'mp4',
'upload_date': '20130224',
'uploader_id': 'TheVerge',
- 'description': 'Chris Ziegler takes a look at the Alcatel OneTouch Fire and the ZTE Open; two of the first Firefox OS handsets to be officially announced.',
+ 'description': 're:^Chris Ziegler takes a look at the\.*',
'uploader': 'The Verge',
'title': 'First Firefox OS phones side-by-side',
},
@@ -181,6 +182,14 @@ class GenericIE(InfoExtractor):
'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
},
},
+ # BBC iPlayer embeds
+ {
+ 'url': 'http://www.bbc.co.uk/blogs/adamcurtis/posts/BUGGER',
+ 'info_dict': {
+ 'title': 'BBC - Blogs - Adam Curtis - BUGGER',
+ },
+ 'playlist_mincount': 18,
+ },
# RUTV embed
{
'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
@@ -699,9 +708,9 @@ class GenericIE(InfoExtractor):
r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
# Helper method
- def _playlist_from_matches(matches, getter, ie=None):
+ def _playlist_from_matches(matches, getter=None, ie=None):
urlrs = orderedSet(
- self.url_result(self._proto_relative_url(getter(m)), ie)
+ self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
for m in matches)
return self.playlist_result(
urlrs, playlist_id=video_id, playlist_title=video_title)
@@ -905,6 +914,11 @@ class GenericIE(InfoExtractor):
return _playlist_from_matches(
matches, getter=unescapeHTML, ie='FunnyOrDie')
+ # Look for BBC iPlayer embed
+ matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
+ if matches:
+ return _playlist_from_matches(matches, ie='BBCCoUk')
+
# Look for embedded RUTV player
rutv_url = RUTVIE._extract_url(webpage)
if rutv_url:
@@ -912,7 +926,7 @@ class GenericIE(InfoExtractor):
# Look for embedded TED player
mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>http://embed\.ted\.com/.+?)\1', webpage)
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'TED')
diff --git a/youtube_dl/extractor/giga.py b/youtube_dl/extractor/giga.py
new file mode 100644
index 000000000..775890112
--- /dev/null
+++ b/youtube_dl/extractor/giga.py
@@ -0,0 +1,101 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import itertools
+
+from .common import InfoExtractor
+from ..utils import (
+ qualities,
+ compat_str,
+ parse_duration,
+ parse_iso8601,
+ str_to_int,
+)
+
+
+class GigaIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?giga\.de/(?:[^/]+/)*(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'http://www.giga.de/filme/anime-awesome/trailer/anime-awesome-chihiros-reise-ins-zauberland-das-beste-kommt-zum-schluss/',
+ 'md5': '6bc5535e945e724640664632055a584f',
+ 'info_dict': {
+ 'id': '2622086',
+ 'display_id': 'anime-awesome-chihiros-reise-ins-zauberland-das-beste-kommt-zum-schluss',
+ 'ext': 'mp4',
+ 'title': 'Anime Awesome: Chihiros Reise ins Zauberland – Das Beste kommt zum Schluss',
+ 'description': 'md5:afdf5862241aded4718a30dff6a57baf',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'duration': 578,
+ 'timestamp': 1414749706,
+ 'upload_date': '20141031',
+ 'uploader': 'Robin Schweiger',
+ 'view_count': int,
+ },
+ }, {
+ 'url': 'http://www.giga.de/games/channel/giga-top-montag/giga-topmontag-die-besten-serien-2014/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.giga.de/extra/netzkultur/videos/giga-games-tom-mats-robin-werden-eigene-wege-gehen-eine-ankuendigung/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.giga.de/tv/jonas-liest-spieletitel-eingedeutscht-episode-2/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ video_id = self._search_regex(
+ [r'data-video-id="(\d+)"', r'/api/video/jwplayer/#v=(\d+)'],
+ webpage, 'video id')
+
+ playlist = self._download_json(
+ 'http://www.giga.de/api/syndication/video/video_id/%s/playlist.json?content=syndication/key/368b5f151da4ae05ced7fa296bdff65a/'
+ % video_id, video_id)[0]
+
+ quality = qualities(['normal', 'hd720'])
+
+ formats = []
+ for format_id in itertools.count(0):
+ fmt = playlist.get(compat_str(format_id))
+ if not fmt:
+ break
+ formats.append({
+ 'url': fmt['src'],
+ 'format_id': '%s-%s' % (fmt['quality'], fmt['type'].split('/')[-1]),
+ 'quality': quality(fmt['quality']),
+ })
+ self._sort_formats(formats)
+
+ title = self._html_search_meta(
+ 'title', webpage, 'title', fatal=True)
+ description = self._html_search_meta(
+ 'description', webpage, 'description')
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ duration = parse_duration(self._search_regex(
+ r'(?s)(?:data-video-id="{0}"|data-video="[^"]*/api/video/jwplayer/#v={0}[^"]*")[^>]*>.+?<span class="duration">([^<]+)</span>'.format(video_id),
+ webpage, 'duration', fatal=False))
+
+ timestamp = parse_iso8601(self._search_regex(
+ r'datetime="([^"]+)"', webpage, 'upload date', fatal=False))
+ uploader = self._search_regex(
+ r'class="author">([^<]+)</a>', webpage, 'uploader', fatal=False)
+
+ view_count = str_to_int(self._search_regex(
+ r'<span class="views"><strong>([\d.]+)</strong>', webpage, 'view count', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'uploader': uploader,
+ 'view_count': view_count,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/huffpost.py b/youtube_dl/extractor/huffpost.py
index 4ccf6b9b8..a38eae421 100644
--- a/youtube_dl/extractor/huffpost.py
+++ b/youtube_dl/extractor/huffpost.py
@@ -39,8 +39,9 @@ class HuffPostIE(InfoExtractor):
data = self._download_json(api_url, video_id)['data']
video_title = data['title']
- duration = parse_duration(data['running_time'])
- upload_date = unified_strdate(data['schedule']['starts_at'])
+ duration = parse_duration(data.get('running_time'))
+ upload_date = unified_strdate(
+ data.get('schedule', {}).get('starts_at') or data.get('segment_start_date_time'))
description = data.get('description')
thumbnails = []
@@ -59,16 +60,11 @@ class HuffPostIE(InfoExtractor):
'ext': 'mp4',
'url': url,
'vcodec': 'none' if key.startswith('audio/') else None,
- } for key, url in data['sources']['live'].items()]
- if data.get('fivemin_id'):
- fid = data['fivemin_id']
- fcat = str(int(fid) // 100 + 1)
- furl = 'http://avideos.5min.com/2/' + fcat[-3:] + '/' + fcat + '/' + fid + '.mp4'
- formats.append({
- 'format': 'fivemin',
- 'url': furl,
- 'preference': 1,
- })
+ } for key, url in data.get('sources', {}).get('live', {}).items()]
+
+ if not formats and data.get('fivemin_id'):
+ return self.url_result('5min:%s' % data['fivemin_id'])
+
self._sort_formats(formats)
return {
diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py
index 13a53a0cb..f29df36b5 100644
--- a/youtube_dl/extractor/imdb.py
+++ b/youtube_dl/extractor/imdb.py
@@ -16,7 +16,6 @@ class ImdbIE(InfoExtractor):
_TEST = {
'url': 'http://www.imdb.com/video/imdb/vi2524815897',
- 'md5': '9f34fa777ade3a6e57a054fdbcb3a068',
'info_dict': {
'id': '2524815897',
'ext': 'mp4',
diff --git a/youtube_dl/extractor/khanacademy.py b/youtube_dl/extractor/khanacademy.py
index 408d00944..08a671fa8 100644
--- a/youtube_dl/extractor/khanacademy.py
+++ b/youtube_dl/extractor/khanacademy.py
@@ -22,8 +22,10 @@ class KhanAcademyIE(InfoExtractor):
'description': 'The perfect cipher',
'duration': 176,
'uploader': 'Brit Cruise',
+ 'uploader_id': 'khanacademy',
'upload_date': '20120411',
- }
+ },
+ 'add_ie': ['Youtube'],
}, {
'url': 'https://www.khanacademy.org/math/applied-math/cryptography',
'info_dict': {
diff --git a/youtube_dl/extractor/kontrtube.py b/youtube_dl/extractor/kontrtube.py
index 41fd62009..720bc939b 100644
--- a/youtube_dl/extractor/kontrtube.py
+++ b/youtube_dl/extractor/kontrtube.py
@@ -10,13 +10,14 @@ from ..utils import int_or_none
class KontrTubeIE(InfoExtractor):
IE_NAME = 'kontrtube'
IE_DESC = 'KontrTube.ru - Труба зовёт'
- _VALID_URL = r'http://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/.+'
+ _VALID_URL = r'http://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/(?P<display_id>[^/]+)/'
_TEST = {
'url': 'http://www.kontrtube.ru/videos/2678/nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag/',
'md5': '975a991a4926c9a85f383a736a2e6b80',
'info_dict': {
'id': '2678',
+ 'display_id': 'nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag',
'ext': 'mp4',
'title': 'Над олимпийской деревней в Сочи поднят российский флаг',
'description': 'md5:80edc4c613d5887ae8ccf1d59432be41',
@@ -28,21 +29,28 @@ class KontrTubeIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
+ display_id = mobj.group('display_id')
- webpage = self._download_webpage(url, video_id, 'Downloading page')
+ webpage = self._download_webpage(
+ url, display_id, 'Downloading page')
- video_url = self._html_search_regex(r"video_url: '(.+?)/?',", webpage, 'video URL')
- thumbnail = self._html_search_regex(r"preview_url: '(.+?)/?',", webpage, 'video thumbnail', fatal=False)
+ video_url = self._html_search_regex(
+ r"video_url\s*:\s*'(.+?)/?',", webpage, 'video URL')
+ thumbnail = self._html_search_regex(
+ r"preview_url\s*:\s*'(.+?)/?',", webpage, 'video thumbnail', fatal=False)
title = self._html_search_regex(
r'<title>(.+?)</title>', webpage, 'video title')
- description = self._html_search_meta('description', webpage, 'video description')
+ description = self._html_search_meta(
+ 'description', webpage, 'video description')
mobj = re.search(
- r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>', webpage)
+ r'<div class="col_2">Длительность: <span>(?P<minutes>\d+)м:(?P<seconds>\d+)с</span></div>',
+ webpage)
duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None
view_count = self._html_search_regex(
- r'<div class="col_2">Просмотров: <span>(\d+)</span></div>', webpage, 'view count', fatal=False)
+ r'<div class="col_2">Просмотров: <span>(\d+)</span></div>',
+ webpage, 'view count', fatal=False)
comment_count = None
comment_str = self._html_search_regex(
@@ -56,6 +64,7 @@ class KontrTubeIE(InfoExtractor):
return {
'id': video_id,
+ 'display_id': display_id,
'url': video_url,
'thumbnail': thumbnail,
'title': title,
diff --git a/youtube_dl/extractor/lrt.py b/youtube_dl/extractor/lrt.py
index d72d470aa..9c2fbdd96 100644
--- a/youtube_dl/extractor/lrt.py
+++ b/youtube_dl/extractor/lrt.py
@@ -2,7 +2,6 @@
from __future__ import unicode_literals
import re
-import json
from .common import InfoExtractor
from ..utils import (
@@ -28,7 +27,6 @@ class LRTIE(InfoExtractor):
'params': {
'skip_download': True, # HLS download
},
-
}
def _real_extract(self, url):
@@ -44,7 +42,9 @@ class LRTIE(InfoExtractor):
formats = []
for js in re.findall(r'(?s)config:\s*(\{.*?\})', webpage):
- data = json.loads(js_to_json(js))
+ data = self._parse_json(js, video_id, transform_source=js_to_json)
+ if 'provider' not in data:
+ continue
if data['provider'] == 'rtmp':
formats.append({
'format_id': 'rtmp',
diff --git a/youtube_dl/extractor/mit.py b/youtube_dl/extractor/mit.py
index 78787e8f1..3c61a850f 100644
--- a/youtube_dl/extractor/mit.py
+++ b/youtube_dl/extractor/mit.py
@@ -105,6 +105,9 @@ class OCWMITIE(InfoExtractor):
'ext': 'mp4',
'title': 'Lecture 7: Multiple Discrete Random Variables: Expectations, Conditioning, Independence',
'description': 'In this lecture, the professor discussed multiple random variables, expectations, and binomial distribution.',
+ 'upload_date': '20121109',
+ 'uploader_id': 'MIT',
+ 'uploader': 'MIT OpenCourseWare',
# 'subtitles': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/MIT6_041F11_lec07_300k.mp4.srt'
}
},
@@ -114,6 +117,9 @@ class OCWMITIE(InfoExtractor):
'id': '7K1sB05pE0A',
'ext': 'mp4',
'title': 'Session 1: Introduction to Derivatives',
+ 'upload_date': '20090818',
+ 'uploader_id': 'MIT',
+ 'uploader': 'MIT OpenCourseWare',
'description': 'This section contains lecture video excerpts, lecture notes, an interactive mathlet with supporting documents, and problem solving videos.',
# 'subtitles': 'http://ocw.mit.edu//courses/mathematics/18-01sc-single-variable-calculus-fall-2010/ocw-18.01-f07-lec01_300k.SRT'
}
diff --git a/youtube_dl/extractor/motorsport.py b/youtube_dl/extractor/motorsport.py
index f5ca74e97..c1a482dba 100644
--- a/youtube_dl/extractor/motorsport.py
+++ b/youtube_dl/extractor/motorsport.py
@@ -1,63 +1,49 @@
# coding: utf-8
from __future__ import unicode_literals
-import hashlib
-import json
-import time
-
from .common import InfoExtractor
from ..compat import (
- compat_parse_qs,
- compat_str,
-)
-from ..utils import (
- int_or_none,
+ compat_urlparse,
)
class MotorsportIE(InfoExtractor):
IE_DESC = 'motorsport.com'
- _VALID_URL = r'http://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/(?:$|[?#])'
+ _VALID_URL = r'http://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/?(?:$|[?#])'
_TEST = {
'url': 'http://www.motorsport.com/f1/video/main-gallery/red-bull-racing-2014-rules-explained/',
- 'md5': '5592cb7c5005d9b2c163df5ac3dc04e4',
'info_dict': {
- 'id': '7063',
+ 'id': '2-T3WuR-KMM',
'ext': 'mp4',
'title': 'Red Bull Racing: 2014 Rules Explained',
- 'duration': 207,
+ 'duration': 208,
'description': 'A new clip from Red Bull sees Daniel Ricciardo and Sebastian Vettel explain the 2014 Formula One regulations – which are arguably the most complex the sport has ever seen.',
- 'uploader': 'rainiere',
- 'thumbnail': r're:^http://.*motorsport\.com/.+\.jpg$'
- }
+ 'uploader': 'mcomstaff',
+ 'uploader_id': 'UC334JIYKkVnyFoNCclfZtHQ',
+ 'upload_date': '20140903',
+ 'thumbnail': r're:^https?://.+\.jpg$'
+ },
+ 'add_ie': ['Youtube'],
+ 'params': {
+ 'skip_download': True,
+ },
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
- flashvars_code = self._html_search_regex(
- r'<embed id="player".*?flashvars="([^"]+)"', webpage, 'flashvars')
- flashvars = compat_parse_qs(flashvars_code)
- params = json.loads(flashvars['parameters'][0])
-
- e = compat_str(int(time.time()) + 24 * 60 * 60)
- base_video_url = params['location'] + '?e=' + e
- s = 'h3hg713fh32'
- h = hashlib.md5((s + base_video_url).encode('utf-8')).hexdigest()
- video_url = base_video_url + '&h=' + h
-
- uploader = self._html_search_regex(
- r'(?s)<span class="label">Video by: </span>(.*?)</a>', webpage,
- 'uploader', fatal=False)
+ iframe_path = self._html_search_regex(
+ r'<iframe id="player_iframe"[^>]+src="([^"]+)"', webpage,
+ 'iframe path')
+ iframe = self._download_webpage(
+ compat_urlparse.urljoin(url, iframe_path), display_id,
+ 'Downloading iframe')
+ youtube_id = self._search_regex(
+ r'www.youtube.com/embed/(.{11})', iframe, 'youtube id')
return {
- 'id': params['video_id'],
+ '_type': 'url_transparent',
'display_id': display_id,
- 'title': params['title'],
- 'url': video_url,
- 'description': params.get('description'),
- 'thumbnail': params.get('main_thumb'),
- 'duration': int_or_none(params.get('duration')),
- 'uploader': uploader,
+ 'url': 'https://youtube.com/watch?v=%s' % youtube_id,
}
diff --git a/youtube_dl/extractor/netzkino.py b/youtube_dl/extractor/netzkino.py
new file mode 100644
index 000000000..93567d1e3
--- /dev/null
+++ b/youtube_dl/extractor/netzkino.py
@@ -0,0 +1,86 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ int_or_none,
+ js_to_json,
+ parse_iso8601,
+)
+
+
+class NetzkinoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?netzkino\.de/\#!/(?P<category>[^/]+)/(?P<id>[^/]+)'
+
+ _TEST = {
+ 'url': 'http://www.netzkino.de/#!/scifikino/rakete-zum-mond',
+ 'md5': '92a3f8b76f8d7220acce5377ea5d4873',
+ 'info_dict': {
+ 'id': 'rakete-zum-mond',
+ 'ext': 'mp4',
+ 'title': 'Rakete zum Mond (Endstation Mond, Destination Moon)',
+ 'comments': 'mincount:3',
+ 'description': 'md5:1eddeacc7e62d5a25a2d1a7290c64a28',
+ 'upload_date': '20120813',
+ 'thumbnail': 're:https?://.*\.jpg$',
+ 'timestamp': 1344858571,
+ 'age_limit': 12,
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ category_id = mobj.group('category')
+ video_id = mobj.group('id')
+
+ api_url = 'http://api.netzkino.de.simplecache.net/capi-2.0a/categories/%s.json?d=www' % category_id
+ api_info = self._download_json(api_url, video_id)
+ info = next(
+ p for p in api_info['posts'] if p['slug'] == video_id)
+ custom_fields = info['custom_fields']
+
+ production_js = self._download_webpage(
+ 'http://www.netzkino.de/beta/dist/production.min.js', video_id,
+ note='Downloading player code')
+ avo_js = self._search_regex(
+ r'window\.avoCore\s*=.*?urlTemplate:\s*(\{.*?"\})',
+ production_js, 'URL templates')
+ templates = self._parse_json(
+ avo_js, video_id, transform_source=js_to_json)
+
+ suffix = {
+ 'hds': '.mp4/manifest.f4m',
+ 'hls': '.mp4/master.m3u8',
+ 'pmd': '.mp4',
+ }
+ film_fn = custom_fields['Streaming'][0]
+ formats = [{
+ 'format_id': key,
+ 'ext': 'mp4',
+ 'url': tpl.replace('{}', film_fn) + suffix[key],
+ } for key, tpl in templates.items()]
+ self._sort_formats(formats)
+
+ comments = [{
+ 'timestamp': parse_iso8601(c.get('date'), delimiter=' '),
+ 'id': c['id'],
+ 'author': c['name'],
+ 'html': c['content'],
+ 'parent': 'root' if c.get('parent', 0) == 0 else c['parent'],
+ } for c in info.get('comments', [])]
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'comments': comments,
+ 'title': info['title'],
+ 'age_limit': int_or_none(custom_fields.get('FSK')[0]),
+ 'timestamp': parse_iso8601(info.get('date'), delimiter=' '),
+ 'description': clean_html(info.get('content')),
+ 'thumbnail': info.get('thumbnail'),
+ 'playlist_title': api_info.get('title'),
+ 'playlist_id': category_id,
+ }
diff --git a/youtube_dl/extractor/normalboots.py b/youtube_dl/extractor/normalboots.py
index 3d35b11ac..c13ff0d65 100644
--- a/youtube_dl/extractor/normalboots.py
+++ b/youtube_dl/extractor/normalboots.py
@@ -22,7 +22,11 @@ class NormalbootsIE(InfoExtractor):
'description': 'Jon is late for Christmas. Typical. Thanks to: Paul Ritchey for Co-Writing/Filming: http://www.youtube.com/user/ContinueShow Michael Azzi for Christmas Intro Animation: http://michafrar.tumblr.com/ Jerrod Waters for Christmas Intro Music: http://www.youtube.com/user/xXJerryTerryXx Casey Ormond for ‘Tense Battle Theme’:\xa0http://www.youtube.com/Kiamet/',
'uploader': 'JonTron',
'upload_date': '20140125',
- }
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
}
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py
index 43e8e619f..321ce5ce7 100644
--- a/youtube_dl/extractor/nrk.py
+++ b/youtube_dl/extractor/nrk.py
@@ -72,7 +72,7 @@ class NRKIE(InfoExtractor):
class NRKTVIE(InfoExtractor):
- _VALID_URL = r'http://tv\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})'
+ _VALID_URL = r'http://tv\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
_TESTS = [
{
@@ -85,7 +85,7 @@ class NRKTVIE(InfoExtractor):
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
'upload_date': '20140523',
'duration': 1741.52,
- }
+ },
},
{
'url': 'http://tv.nrk.no/program/mdfp15000514',
@@ -97,39 +97,119 @@ class NRKTVIE(InfoExtractor):
'description': 'md5:654c12511f035aed1e42bdf5db3b206a',
'upload_date': '20140524',
'duration': 4605.0,
- }
+ },
},
+ {
+ # single playlist video
+ 'url': 'http://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
+ 'md5': 'adbd1dbd813edaf532b0a253780719c2',
+ 'info_dict': {
+ 'id': 'MSPO40010515-part2',
+ 'ext': 'flv',
+ 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
+ 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
+ 'upload_date': '20150106',
+ },
+ 'skip': 'Only works from Norway',
+ },
+ {
+ 'url': 'http://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
+ 'playlist': [
+ {
+ 'md5': '9480285eff92d64f06e02a5367970a7a',
+ 'info_dict': {
+ 'id': 'MSPO40010515-part1',
+ 'ext': 'flv',
+ 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 1:2)',
+ 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
+ 'upload_date': '20150106',
+ },
+ },
+ {
+ 'md5': 'adbd1dbd813edaf532b0a253780719c2',
+ 'info_dict': {
+ 'id': 'MSPO40010515-part2',
+ 'ext': 'flv',
+ 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
+ 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
+ 'upload_date': '20150106',
+ },
+ },
+ ],
+ 'info_dict': {
+ 'id': 'MSPO40010515',
+ 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn',
+ 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
+ 'upload_date': '20150106',
+ 'duration': 6947.5199999999995,
+ },
+ 'skip': 'Only works from Norway',
+ }
]
+ def _extract_f4m(self, manifest_url, video_id):
+ return self._extract_f4m_formats(manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id)
+
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
-
- page = self._download_webpage(url, video_id)
-
- title = self._html_search_meta('title', page, 'title')
- description = self._html_search_meta('description', page, 'description')
- thumbnail = self._html_search_regex(r'data-posterimage="([^"]+)"', page, 'thumbnail', fatal=False)
- upload_date = unified_strdate(self._html_search_meta('rightsfrom', page, 'upload date', fatal=False))
- duration = float_or_none(
- self._html_search_regex(r'data-duration="([^"]+)"', page, 'duration', fatal=False))
+ part_id = mobj.group('part_id')
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_meta(
+ 'title', webpage, 'title')
+ description = self._html_search_meta(
+ 'description', webpage, 'description')
+
+ thumbnail = self._html_search_regex(
+ r'data-posterimage="([^"]+)"',
+ webpage, 'thumbnail', fatal=False)
+ upload_date = unified_strdate(self._html_search_meta(
+ 'rightsfrom', webpage, 'upload date', fatal=False))
+ duration = float_or_none(self._html_search_regex(
+ r'data-duration="([^"]+)"',
+ webpage, 'duration', fatal=False))
+
+ # playlist
+ parts = re.findall(
+ r'<a href="#del=(\d+)"[^>]+data-argument="([^"]+)">([^<]+)</a>', webpage)
+ if parts:
+ entries = []
+ for current_part_id, stream_url, part_title in parts:
+ if part_id and current_part_id != part_id:
+ continue
+ video_part_id = '%s-part%s' % (video_id, current_part_id)
+ formats = self._extract_f4m(stream_url, video_part_id)
+ entries.append({
+ 'id': video_part_id,
+ 'title': part_title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'formats': formats,
+ })
+ if part_id:
+ if entries:
+ return entries[0]
+ else:
+ playlist = self.playlist_result(entries, video_id, title, description)
+ playlist.update({
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'duration': duration,
+ })
+ return playlist
formats = []
- f4m_url = re.search(r'data-media="([^"]+)"', page)
+ f4m_url = re.search(r'data-media="([^"]+)"', webpage)
if f4m_url:
- formats.append({
- 'url': f4m_url.group(1) + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124',
- 'format_id': 'f4m',
- 'ext': 'flv',
- })
+ formats.extend(self._extract_f4m(f4m_url.group(1), video_id))
- m3u8_url = re.search(r'data-hls-media="([^"]+)"', page)
+ m3u8_url = re.search(r'data-hls-media="([^"]+)"', webpage)
if m3u8_url:
- formats.append({
- 'url': m3u8_url.group(1),
- 'format_id': 'm3u8',
- })
+ formats.extend(self._extract_m3u8_formats(m3u8_url.group(1), video_id, 'mp4'))
self._sort_formats(formats)
diff --git a/youtube_dl/extractor/played.py b/youtube_dl/extractor/played.py
index 449d4836c..45716c75d 100644
--- a/youtube_dl/extractor/played.py
+++ b/youtube_dl/extractor/played.py
@@ -26,6 +26,7 @@ class PlayedIE(InfoExtractor):
'ext': 'flv',
'title': 'youtube-dl_test_video.mp4',
},
+ 'skip': 'Removed for copyright infringement.', # oh wow
}
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/radiobremen.py b/youtube_dl/extractor/radiobremen.py
new file mode 100644
index 000000000..0d706312e
--- /dev/null
+++ b/youtube_dl/extractor/radiobremen.py
@@ -0,0 +1,63 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import parse_duration
+
+
+class RadioBremenIE(InfoExtractor):
+ _VALID_URL = r'http?://(?:www\.)?radiobremen\.de/mediathek/(?:index\.html)?\?id=(?P<id>[0-9]+)'
+ IE_NAME = 'radiobremen'
+
+ _TEST = {
+ 'url': 'http://www.radiobremen.de/mediathek/index.html?id=114720',
+ 'info_dict': {
+ 'id': '114720',
+ 'ext': 'mp4',
+ 'duration': 1685,
+ 'width': 512,
+ 'title': 'buten un binnen vom 22. Dezember',
+ 'thumbnail': 're:https?://.*\.jpg$',
+ 'description': 'Unter anderem mit diesen Themen: 45 Flüchtlinge sind in Worpswede angekommen +++ Freies Internet für alle: Bremer arbeiten an einem flächendeckenden W-Lan-Netzwerk +++ Aktivisten kämpfen für das Unibad +++ So war das Wetter 2014 +++',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ meta_url = "http://www.radiobremen.de/apps/php/mediathek/metadaten.php?id=%s" % video_id
+ meta_doc = self._download_webpage(
+ meta_url, video_id, 'Downloading metadata')
+ title = self._html_search_regex(
+ r"<h1.*>(?P<title>.+)</h1>", meta_doc, "title")
+ description = self._html_search_regex(
+ r"<p>(?P<description>.*)</p>", meta_doc, "description", fatal=False)
+ duration = parse_duration(self._html_search_regex(
+ r"L&auml;nge:</td>\s+<td>(?P<duration>[0-9]+:[0-9]+)</td>",
+ meta_doc, "duration", fatal=False))
+
+ page_doc = self._download_webpage(
+ url, video_id, 'Downloading video information')
+ mobj = re.search(
+ r"ardformatplayerclassic\(\'playerbereich\',\'(?P<width>[0-9]+)\',\'.*\',\'(?P<video_id>[0-9]+)\',\'(?P<secret>[0-9]+)\',\'(?P<thumbnail>.+)\',\'\'\)",
+ page_doc)
+ video_url = (
+ "http://dl-ondemand.radiobremen.de/mediabase/%s/%s_%s_%s.mp4" %
+ (video_id, video_id, mobj.group("secret"), mobj.group('width')))
+
+ formats = [{
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'width': int(mobj.group("width")),
+ }]
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'formats': formats,
+ 'thumbnail': mobj.group('thumbnail'),
+ }
diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py
index d029b0ec5..a3ca79f2c 100644
--- a/youtube_dl/extractor/rtlnl.py
+++ b/youtube_dl/extractor/rtlnl.py
@@ -8,7 +8,7 @@ from ..utils import parse_duration
class RtlXlIE(InfoExtractor):
IE_NAME = 'rtlxl.nl'
- _VALID_URL = r'https?://www\.rtlxl\.nl/#!/[^/]+/(?P<uuid>[^/?]+)'
+ _VALID_URL = r'https?://(www\.)?rtlxl\.nl/#!/[^/]+/(?P<uuid>[^/?]+)'
_TEST = {
'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677',
diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py
index b72b5a586..5b1c3577a 100644
--- a/youtube_dl/extractor/rutube.py
+++ b/youtube_dl/extractor/rutube.py
@@ -70,6 +70,37 @@ class RutubeIE(InfoExtractor):
}
+class RutubeEmbedIE(InfoExtractor):
+ IE_NAME = 'rutube:embed'
+ IE_DESC = 'Rutube embedded videos'
+ _VALID_URL = 'https?://rutube\.ru/video/embed/(?P<id>[0-9]+)'
+
+ _TEST = {
+ 'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=',
+ 'info_dict': {
+ 'id': 'a10e53b86e8f349080f718582ce4c661',
+ 'ext': 'mp4',
+ 'upload_date': '20131223',
+ 'uploader_id': '297833',
+ 'description': 'Видео группы ★http://vk.com/foxkidsreset★ музей Fox Kids и Jetix<br/><br/> восстановлено и сделано в шикоформате subziro89 http://vk.com/subziro89',
+ 'uploader': 'subziro89 ILya',
+ 'title': 'Мистический городок Эйри в Индиан 5 серия озвучка subziro89',
+ },
+ 'params': {
+ 'skip_download': 'Requires ffmpeg',
+ },
+ }
+
+ def _real_extract(self, url):
+ embed_id = self._match_id(url)
+ webpage = self._download_webpage(url, embed_id)
+
+ canonical_url = self._html_search_regex(
+ r'<link\s+rel="canonical"\s+href="([^"]+?)"', webpage,
+ 'Canonical URL')
+ return self.url_result(canonical_url, 'Rutube')
+
+
class RutubeChannelIE(InfoExtractor):
IE_NAME = 'rutube:channel'
IE_DESC = 'Rutube channels'
diff --git a/youtube_dl/extractor/sexykarma.py b/youtube_dl/extractor/sexykarma.py
index c833fc8ee..6446d26dc 100644
--- a/youtube_dl/extractor/sexykarma.py
+++ b/youtube_dl/extractor/sexykarma.py
@@ -24,7 +24,7 @@ class SexyKarmaIE(InfoExtractor):
'title': 'Taking a quick pee.',
'thumbnail': 're:^https?://.*\.jpg$',
'uploader': 'wildginger7',
- 'upload_date': '20141007',
+ 'upload_date': '20141008',
'duration': 22,
'view_count': int,
'comment_count': int,
@@ -45,6 +45,7 @@ class SexyKarmaIE(InfoExtractor):
'view_count': int,
'comment_count': int,
'categories': list,
+ 'age_limit': 18,
}
}, {
'url': 'http://www.watchindianporn.net/video/desi-dancer-namrata-stripping-completely-nude-and-dancing-on-a-hot-number-dW2mtctxJfs.html',
@@ -61,6 +62,7 @@ class SexyKarmaIE(InfoExtractor):
'view_count': int,
'comment_count': int,
'categories': list,
+ 'age_limit': 18,
}
}]
@@ -114,4 +116,5 @@ class SexyKarmaIE(InfoExtractor):
'view_count': view_count,
'comment_count': comment_count,
'categories': categories,
+ 'age_limit': 18,
}
diff --git a/youtube_dl/extractor/soulanime.py b/youtube_dl/extractor/soulanime.py
new file mode 100644
index 000000000..feef33e27
--- /dev/null
+++ b/youtube_dl/extractor/soulanime.py
@@ -0,0 +1,80 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ HEADRequest,
+ urlhandle_detect_ext,
+)
+
+
+class SoulAnimeWatchingIE(InfoExtractor):
+ IE_NAME = "soulanime:watching"
+ IE_DESC = "SoulAnime video"
+ _TEST = {
+ 'url': 'http://www.soul-anime.net/watching/seirei-tsukai-no-blade-dance-episode-9/',
+ 'md5': '05fae04abf72298098b528e98abf4298',
+ 'info_dict': {
+ 'id': 'seirei-tsukai-no-blade-dance-episode-9',
+ 'ext': 'mp4',
+ 'title': 'seirei-tsukai-no-blade-dance-episode-9',
+ 'description': 'seirei-tsukai-no-blade-dance-episode-9'
+ }
+ }
+ _VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/watch[^/]*/(?P<id>[^/]+)'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ domain = mobj.group('domain')
+
+ page = self._download_webpage(url, video_id)
+
+ video_url_encoded = self._html_search_regex(
+ r'<div id="download">[^<]*<a href="(?P<url>[^"]+)"', page, 'url')
+ video_url = "http://www.soul-anime." + domain + video_url_encoded
+
+ ext_req = HEADRequest(video_url)
+ ext_handle = self._request_webpage(
+ ext_req, video_id, note='Determining extension')
+ ext = urlhandle_detect_ext(ext_handle)
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': ext,
+ 'title': video_id,
+ 'description': video_id
+ }
+
+
+class SoulAnimeSeriesIE(InfoExtractor):
+ IE_NAME = "soulanime:series"
+ IE_DESC = "SoulAnime Series"
+
+ _VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/anime./(?P<id>[^/]+)'
+
+ _EPISODE_REGEX = r'<option value="(/watch[^/]*/[^"]+)">[^<]*</option>'
+
+ _TEST = {
+ 'url': 'http://www.soul-anime.net/anime1/black-rock-shooter-tv/',
+ 'info_dict': {
+ 'id': 'black-rock-shooter-tv'
+ },
+ 'playlist_count': 8
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ series_id = mobj.group('id')
+ domain = mobj.group('domain')
+
+ pattern = re.compile(self._EPISODE_REGEX)
+
+ page = self._download_webpage(url, series_id, "Downloading series page")
+ mobj = pattern.findall(page)
+
+ entries = [self.url_result("http://www.soul-anime." + domain + obj) for obj in mobj]
+
+ return self.playlist_result(entries, series_id)
diff --git a/youtube_dl/extractor/teachertube.py b/youtube_dl/extractor/teachertube.py
index 6c3445d79..82675431f 100644
--- a/youtube_dl/extractor/teachertube.py
+++ b/youtube_dl/extractor/teachertube.py
@@ -57,9 +57,7 @@ class TeacherTubeIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._html_search_meta('title', webpage, 'title', fatal=True)
diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py
index 944177426..10b3b706a 100644
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@@ -13,7 +13,7 @@ from ..compat import (
class TEDIE(SubtitlesInfoExtractor):
_VALID_URL = r'''(?x)
(?P<proto>https?://)
- (?P<type>www|embed)(?P<urlmain>\.ted\.com/
+ (?P<type>www|embed(?:-ssl)?)(?P<urlmain>\.ted\.com/
(
(?P<type_playlist>playlists(?:/\d+)?) # We have a playlist
|
@@ -98,7 +98,7 @@ class TEDIE(SubtitlesInfoExtractor):
def _real_extract(self, url):
m = re.match(self._VALID_URL, url, re.VERBOSE)
- if m.group('type') == 'embed':
+ if m.group('type').startswith('embed'):
desktop_url = m.group('proto') + 'www' + m.group('urlmain')
return self.url_result(desktop_url, 'TED')
name = m.group('name')
diff --git a/youtube_dl/extractor/tf1.py b/youtube_dl/extractor/tf1.py
index 6e61cc9e2..025d0877c 100644
--- a/youtube_dl/extractor/tf1.py
+++ b/youtube_dl/extractor/tf1.py
@@ -1,15 +1,13 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
class TF1IE(InfoExtractor):
"""TF1 uses the wat.tv player."""
- _VALID_URL = r'http://videos\.tf1\.fr/.*-(?P<id>.*?)\.html'
- _TEST = {
+ _VALID_URL = r'http://(?:videos\.tf1|www\.tfou)\.fr/.*?-(?P<id>\d+)(?:-\d+)?\.html'
+ _TESTS = {
'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
'info_dict': {
'id': '10635995',
@@ -21,14 +19,26 @@ class TF1IE(InfoExtractor):
# Sometimes wat serves the whole file with the --test option
'skip_download': True,
},
+ }, {
+ 'url': 'http://www.tfou.fr/chuggington/videos/le-grand-mysterioso-chuggington-7085291-739.html',
+ 'info_dict': {
+ 'id': '12043945',
+ 'ext': 'mp4',
+ 'title': 'Le grand Mystérioso - Chuggington',
+ 'description': 'Le grand Mystérioso - Emery rêve qu\'un article lui soit consacré dans le journal.',
+ 'upload_date': '20150103',
+ },
+ 'params': {
+ # Sometimes wat serves the whole file with the --test option
+ 'skip_download': True,
+ },
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
embed_url = self._html_search_regex(
- r'"(https://www.wat.tv/embedframe/.*?)"', webpage, 'embed url')
+ r'["\'](https?://www.wat.tv/embedframe/.*?)["\']', webpage, 'embed url')
embed_page = self._download_webpage(embed_url, video_id,
'Downloading embed player page')
wat_id = self._search_regex(r'UVID=(.*?)&', embed_page, 'wat id')
diff --git a/youtube_dl/extractor/tudou.py b/youtube_dl/extractor/tudou.py
index 161e47624..c89de5ba4 100644
--- a/youtube_dl/extractor/tudou.py
+++ b/youtube_dl/extractor/tudou.py
@@ -9,7 +9,7 @@ from .common import InfoExtractor
class TudouIE(InfoExtractor):
- _VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs|albumplay)/(?:view|(.+?))/(?:([^/]+)|([^/]+))(?:\.html)?'
+ _VALID_URL = r'https?://(?:www\.)?tudou\.com/(?:listplay|programs(?:/view)?|albumplay)/.*?/(?P<id>[^/?#]+?)(?:\.html)?/?(?:$|[?#])'
_TESTS = [{
'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html',
'md5': '140a49ed444bd22f93330985d8475fcb',
@@ -27,13 +27,6 @@ class TudouIE(InfoExtractor):
'title': 'La Sylphide-Bolshoi-Ekaterina Krysanova & Vyacheslav Lopatin 2012',
'thumbnail': 're:^https?://.*\.jpg$',
}
- }, {
- 'url': 'http://www.tudou.com/albumplay/TenTw_JgiPM/PzsAs5usU9A.html',
- 'info_dict': {
- 'title': 'todo.mp4',
- },
- 'add_ie': ['Youku'],
- 'skip': 'Only works from China'
}]
def _url_for_id(self, id, quality=None):
@@ -45,8 +38,7 @@ class TudouIE(InfoExtractor):
return final_url
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group(2)
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
m = re.search(r'vcode:\s*[\'"](.+?)[\'"]', webpage)
@@ -87,4 +79,9 @@ class TudouIE(InfoExtractor):
}
result.append(part_info)
- return result
+ return {
+ '_type': 'multi_video',
+ 'entries': result,
+ 'id': video_id,
+ 'title': title,
+ }
diff --git a/youtube_dl/extractor/tunein.py b/youtube_dl/extractor/tunein.py
index 4ce5aeeba..b6b1f2568 100644
--- a/youtube_dl/extractor/tunein.py
+++ b/youtube_dl/extractor/tunein.py
@@ -24,7 +24,7 @@ class TuneInIE(InfoExtractor):
_INFO_DICT = {
'id': '34682',
'title': 'Jazz 24 on 88.5 Jazz24 - KPLU-HD2',
- 'ext': 'AAC',
+ 'ext': 'aac',
'thumbnail': 're:^https?://.*\.png$',
'location': 'Tacoma, WA',
}
@@ -78,14 +78,21 @@ class TuneInIE(InfoExtractor):
for stream in streams:
if stream.get('Type') == 'Live':
is_live = True
+ reliability = stream.get('Reliability')
+ format_note = (
+ 'Reliability: %d%%' % reliability
+ if reliability is not None else None)
formats.append({
+ 'preference': (
+ 0 if reliability is None or reliability > 90
+ else 1),
'abr': stream.get('Bandwidth'),
- 'ext': stream.get('MediaType'),
+ 'ext': stream.get('MediaType').lower(),
'acodec': stream.get('MediaType'),
'vcodec': 'none',
'url': stream.get('Url'),
- # Sometimes streams with the highest quality do not exist
- 'preference': stream.get('Reliability'),
+ 'source_preference': reliability,
+ 'format_note': format_note,
})
self._sort_formats(formats)
diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py
new file mode 100644
index 000000000..619039e51
--- /dev/null
+++ b/youtube_dl/extractor/vier.py
@@ -0,0 +1,118 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class VierIE(InfoExtractor):
+ IE_NAME = 'vier'
+ _VALID_URL = r'https?://(?:www\.)?vier\.be/(?:[^/]+/videos/(?P<display_id>[^/]+)(?:/(?P<id>\d+))?|video/v3/embed/(?P<embed_id>\d+))'
+ _TESTS = [{
+ 'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129',
+ 'info_dict': {
+ 'id': '16129',
+ 'display_id': 'het-wordt-warm-de-moestuin',
+ 'ext': 'mp4',
+ 'title': 'Het wordt warm in De Moestuin',
+ 'description': 'De vele uren werk eisen hun tol. Wim droomt van assistentie...',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.vier.be/planb/videos/mieren-herders-van-de-bladluizen',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.vier.be/video/v3/embed/16129',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ embed_id = mobj.group('embed_id')
+ display_id = mobj.group('display_id') or embed_id
+
+ webpage = self._download_webpage(url, display_id)
+
+ video_id = self._search_regex(
+ r'"nid"\s*:\s*"(\d+)"', webpage, 'video id')
+ application = self._search_regex(
+ r'"application"\s*:\s*"([^"]+)"', webpage, 'application', default='vier_vod')
+ filename = self._search_regex(
+ r'"filename"\s*:\s*"([^"]+)"', webpage, 'filename')
+
+ playlist_url = 'http://vod.streamcloud.be/%s/mp4:_definst_/%s.mp4/playlist.m3u8' % (application, filename)
+ formats = self._extract_m3u8_formats(playlist_url, display_id, 'mp4')
+
+ title = self._og_search_title(webpage, default=display_id)
+ description = self._og_search_description(webpage, default=None)
+ thumbnail = self._og_search_thumbnail(webpage, default=None)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ }
+
+
+class VierVideosIE(InfoExtractor):
+ IE_NAME = 'vier:videos'
+ _VALID_URL = r'https?://(?:www\.)?vier\.be/(?P<program>[^/]+)/videos(?:\?.*\bpage=(?P<page>\d+)|$)'
+ _TESTS = [{
+ 'url': 'http://www.vier.be/demoestuin/videos',
+ 'info_dict': {
+ 'id': 'demoestuin',
+ },
+ 'playlist_mincount': 153,
+ }, {
+ 'url': 'http://www.vier.be/demoestuin/videos?page=6',
+ 'info_dict': {
+ 'id': 'demoestuin-page6',
+ },
+ 'playlist_mincount': 20,
+ }, {
+ 'url': 'http://www.vier.be/demoestuin/videos?page=7',
+ 'info_dict': {
+ 'id': 'demoestuin-page7',
+ },
+ 'playlist_mincount': 13,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ program = mobj.group('program')
+
+ webpage = self._download_webpage(url, program)
+
+ page_id = mobj.group('page')
+ if page_id:
+ page_id = int(page_id)
+ start_page = page_id
+ last_page = start_page + 1
+ playlist_id = '%s-page%d' % (program, page_id)
+ else:
+ start_page = 0
+ last_page = int(self._search_regex(
+ r'videos\?page=(\d+)">laatste</a>',
+ webpage, 'last page', default=0)) + 1
+ playlist_id = program
+
+ entries = []
+ for current_page_id in range(start_page, last_page):
+ current_page = self._download_webpage(
+ 'http://www.vier.be/%s/videos?page=%d' % (program, current_page_id),
+ program,
+ 'Downloading page %d' % (current_page_id + 1)) if current_page_id != page_id else webpage
+ page_entries = [
+ self.url_result('http://www.vier.be' + video_url, 'Vier')
+ for video_url in re.findall(
+ r'<h3><a href="(/[^/]+/videos/[^/]+(?:/\d+)?)">', current_page)]
+ entries.extend(page_entries)
+
+ return self.playlist_result(entries, playlist_id)
diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py
index 15f315298..944901e14 100644
--- a/youtube_dl/extractor/viki.py
+++ b/youtube_dl/extractor/viki.py
@@ -17,7 +17,6 @@ class VikiIE(SubtitlesInfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
_TEST = {
'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
- 'md5': 'a21454021c2646f5433514177e2caa5f',
'info_dict': {
'id': '1023585v',
'ext': 'mp4',
@@ -31,8 +30,7 @@ class VikiIE(SubtitlesInfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group(1)
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._og_search_title(webpage)
diff --git a/youtube_dl/extractor/vimple.py b/youtube_dl/extractor/vimple.py
index 33d370e1c..ee3d86117 100644
--- a/youtube_dl/extractor/vimple.py
+++ b/youtube_dl/extractor/vimple.py
@@ -14,28 +14,17 @@ class VimpleIE(InfoExtractor):
IE_DESC = 'Vimple.ru'
_VALID_URL = r'https?://(player.vimple.ru/iframe|vimple.ru)/(?P<id>[a-f0-9]{10,})'
_TESTS = [
- # Quality: Large, from iframe
{
- 'url': 'http://player.vimple.ru/iframe/b132bdfd71b546d3972f9ab9a25f201c',
+ 'url': 'http://vimple.ru/c0f6b1687dcd4000a97ebe70068039cf',
+ 'md5': '2e750a330ed211d3fd41821c6ad9a279',
'info_dict': {
- 'id': 'b132bdfd71b546d3972f9ab9a25f201c',
- 'title': 'great-escape-minecraft.flv',
+ 'id': 'c0f6b1687dcd4000a97ebe70068039cf',
'ext': 'mp4',
- 'duration': 352,
- 'webpage_url': 'http://vimple.ru/b132bdfd71b546d3972f9ab9a25f201c',
+ 'title': 'Sunset',
+ 'duration': 20,
+ 'thumbnail': 're:https?://.*?\.jpg',
},
},
- # Quality: Medium, from mainpage
- {
- 'url': 'http://vimple.ru/a15950562888453b8e6f9572dc8600cd',
- 'info_dict': {
- 'id': 'a15950562888453b8e6f9572dc8600cd',
- 'title': 'DB 01',
- 'ext': 'flv',
- 'duration': 1484,
- 'webpage_url': 'http://vimple.ru/a15950562888453b8e6f9572dc8600cd',
- }
- },
]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py
index 542e9198a..81e02a624 100644
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@@ -164,6 +164,14 @@ class VKIE(InfoExtractor):
self.to_screen('Youtube video detected')
return self.url_result(m_yt.group(1), 'Youtube')
+ m_rutube = re.search(
+ r'\ssrc="((?:https?:)?//rutube\.ru\\?/video\\?/embed(?:.*?))\\?"', info_page)
+ if m_rutube is not None:
+ self.to_screen('rutube video detected')
+ rutube_url = self._proto_relative_url(
+ m_rutube.group(1).replace('\\', ''))
+ return self.url_result(rutube_url)
+
m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.*?});', info_page)
if m_opts:
m_opts_url = re.search(r"url\s*:\s*'([^']+)", m_opts.group(1))
diff --git a/youtube_dl/extractor/washingtonpost.py b/youtube_dl/extractor/washingtonpost.py
index 88bbbb219..c17bebd6e 100644
--- a/youtube_dl/extractor/washingtonpost.py
+++ b/youtube_dl/extractor/washingtonpost.py
@@ -10,14 +10,14 @@ from ..utils import (
class WashingtonPostIE(InfoExtractor):
- _VALID_URL = r'^https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
+ _VALID_URL = r'https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
_TEST = {
'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/',
'info_dict': {
'title': 'Sinkhole of bureaucracy',
},
'playlist': [{
- 'md5': 'c3f4b4922ffa259243f68e928db2db8c',
+ 'md5': '79132cc09ec5309fa590ae46e4cc31bc',
'info_dict': {
'id': 'fc433c38-b146-11e3-b8b3-44b1d1cd4c1f',
'ext': 'mp4',
@@ -29,7 +29,7 @@ class WashingtonPostIE(InfoExtractor):
'upload_date': '20140322',
},
}, {
- 'md5': 'f645a07652c2950cd9134bb852c5f5eb',
+ 'md5': 'e1d5734c06865cc504ad99dc2de0d443',
'info_dict': {
'id': '41255e28-b14a-11e3-b8b3-44b1d1cd4c1f',
'ext': 'mp4',
@@ -44,10 +44,9 @@ class WashingtonPostIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- page_id = mobj.group('id')
-
+ page_id = self._match_id(url)
webpage = self._download_webpage(url, page_id)
+
title = self._og_search_title(webpage)
uuids = re.findall(r'data-video-uuid="([^"]+)"', webpage)
entries = []
diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py
index 8e25ecf28..45466e31b 100644
--- a/youtube_dl/extractor/wdr.py
+++ b/youtube_dl/extractor/wdr.py
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
+import itertools
import re
from .common import InfoExtractor
@@ -67,6 +68,10 @@ class WDRIE(InfoExtractor):
'upload_date': '20140717',
},
},
+ {
+ 'url': 'http://www1.wdr.de/mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100.html',
+ 'playlist_mincount': 146,
+ }
]
def _real_extract(self, url):
@@ -81,6 +86,27 @@ class WDRIE(InfoExtractor):
self.url_result(page_url + href, 'WDR')
for href in re.findall(r'<a href="/?(.+?%s\.html)" rel="nofollow"' % self._PLAYER_REGEX, webpage)
]
+
+ if entries: # Playlist page
+ return self.playlist_result(entries, page_id)
+
+ # Overview page
+ entries = []
+ for page_num in itertools.count(2):
+ hrefs = re.findall(
+ r'<li class="mediathekvideo"\s*>\s*<img[^>]*>\s*<a href="(/mediathek/video/[^"]+)"',
+ webpage)
+ entries.extend(
+ self.url_result(page_url + href, 'WDR')
+ for href in hrefs)
+ next_url_m = re.search(
+ r'<li class="nextToLast">\s*<a href="([^"]+)"', webpage)
+ if not next_url_m:
+ break
+ next_url = page_url + next_url_m.group(1)
+ webpage = self._download_webpage(
+ next_url, page_id,
+ note='Downloading playlist page %d' % page_num)
return self.playlist_result(entries, page_id)
flashvars = compat_parse_qs(
@@ -172,8 +198,7 @@ class WDRMausIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
param_code = self._html_search_regex(
@@ -224,5 +249,3 @@ class WDRMausIE(InfoExtractor):
'thumbnail': thumbnail,
'upload_date': upload_date,
}
-
-# TODO test _1
diff --git a/youtube_dl/extractor/webofstories.py b/youtube_dl/extractor/webofstories.py
new file mode 100644
index 000000000..396cf4e83
--- /dev/null
+++ b/youtube_dl/extractor/webofstories.py
@@ -0,0 +1,102 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class WebOfStoriesIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?webofstories\.com/play/(?:[^/]+/)?(?P<id>[0-9]+)'
+ _VIDEO_DOMAIN = 'http://eu-mobile.webofstories.com/'
+ _GREAT_LIFE_STREAMER = 'rtmp://eu-cdn1.webofstories.com/cfx/st/'
+ _USER_STREAMER = 'rtmp://eu-users.webofstories.com/cfx/st/'
+ _TESTS = [
+ {
+ 'url': 'http://www.webofstories.com/play/hans.bethe/71',
+ 'md5': '373e4dd915f60cfe3116322642ddf364',
+ 'info_dict': {
+ 'id': '4536',
+ 'ext': 'mp4',
+ 'title': 'The temperature of the sun',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'description': 'Hans Bethe talks about calculating the temperature of the sun',
+ 'duration': 238,
+ }
+ },
+ {
+ 'url': 'http://www.webofstories.com/play/55908',
+ 'md5': '2985a698e1fe3211022422c4b5ed962c',
+ 'info_dict': {
+ 'id': '55908',
+ 'ext': 'mp4',
+ 'title': 'The story of Gemmata obscuriglobus',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'description': 'Planctomycete talks about The story of Gemmata obscuriglobus',
+ 'duration': 169,
+ }
+ },
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+ title = self._og_search_title(webpage)
+ description = self._html_search_meta('description', webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ story_filename = self._search_regex(
+ r'\.storyFileName\("([^"]+)"\)', webpage, 'story filename')
+ speaker_id = self._search_regex(
+ r'\.speakerId\("([^"]+)"\)', webpage, 'speaker ID')
+ story_id = self._search_regex(
+ r'\.storyId\((\d+)\)', webpage, 'story ID')
+ speaker_type = self._search_regex(
+ r'\.speakerType\("([^"]+)"\)', webpage, 'speaker type')
+ great_life = self._search_regex(
+ r'isGreatLifeStory\s*=\s*(true|false)', webpage, 'great life story')
+ is_great_life_series = great_life == 'true'
+ duration = int_or_none(self._search_regex(
+ r'\.duration\((\d+)\)', webpage, 'duration', fatal=False))
+
+ # URL building, see: http://www.webofstories.com/scripts/player.js
+ ms_prefix = ''
+ if speaker_type.lower() == 'ms':
+ ms_prefix = 'mini_sites/'
+
+ if is_great_life_series:
+ mp4_url = '{0:}lives/{1:}/{2:}.mp4'.format(
+ self._VIDEO_DOMAIN, speaker_id, story_filename)
+ rtmp_ext = 'flv'
+ streamer = self._GREAT_LIFE_STREAMER
+ play_path = 'stories/{0:}/{1:}'.format(
+ speaker_id, story_filename)
+ else:
+ mp4_url = '{0:}{1:}{2:}/{3:}.mp4'.format(
+ self._VIDEO_DOMAIN, ms_prefix, speaker_id, story_filename)
+ rtmp_ext = 'mp4'
+ streamer = self._USER_STREAMER
+ play_path = 'mp4:{0:}{1:}/{2}.mp4'.format(
+ ms_prefix, speaker_id, story_filename)
+
+ formats = [{
+ 'format_id': 'mp4_sd',
+ 'url': mp4_url,
+ }, {
+ 'format_id': 'rtmp_sd',
+ 'page_url': url,
+ 'url': streamer,
+ 'ext': rtmp_ext,
+ 'play_path': play_path,
+ }]
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': story_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': thumbnail,
+ 'description': description,
+ 'duration': duration,
+ }
diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py
index 6b37bcbc9..4527567f8 100644
--- a/youtube_dl/extractor/xhamster.py
+++ b/youtube_dl/extractor/xhamster.py
@@ -14,7 +14,7 @@ from ..utils import (
class XHamsterIE(InfoExtractor):
"""Information Extractor for xHamster"""
- _VALID_URL = r'http://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
+ _VALID_URL = r'(?P<proto>https?)://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
_TESTS = [
{
'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
@@ -39,7 +39,11 @@ class XHamsterIE(InfoExtractor):
'duration': 200,
'age_limit': 18,
}
- }
+ },
+ {
+ 'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html',
+ 'only_matching': True,
+ },
]
def _real_extract(self, url):
@@ -57,7 +61,8 @@ class XHamsterIE(InfoExtractor):
video_id = mobj.group('id')
seo = mobj.group('seo')
- mrss_url = 'http://xhamster.com/movies/%s/%s.html' % (video_id, seo)
+ proto = mobj.group('proto')
+ mrss_url = '%s://xhamster.com/movies/%s/%s.html' % (proto, video_id, seo)
webpage = self._download_webpage(mrss_url, video_id)
title = self._html_search_regex(r'<title>(?P<title>.+?) - xHamster\.com</title>', webpage, 'title')
diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py
index cf74d4fd5..e8490b028 100644
--- a/youtube_dl/extractor/xtube.py
+++ b/youtube_dl/extractor/xtube.py
@@ -40,7 +40,7 @@ class XTubeIE(InfoExtractor):
r'<p class="title">([^<]+)', webpage, 'title')
video_uploader = self._html_search_regex(
[r"var\s+contentOwnerId\s*=\s*'([^']+)",
- r'By:\s*<a href="/community/profile\.php?user=([^"]+)'],
+ r'By:\s*<a href="/community/profile\.php\?user=([^"]+)'],
webpage, 'uploader', fatal=False)
video_description = self._html_search_regex(
r'<p class="fieldsDesc">([^<]+)',
@@ -95,6 +95,7 @@ class XTubeUserIE(InfoExtractor):
'url': 'http://www.xtube.com/community/profile.php?user=greenshowers',
'info_dict': {
'id': 'greenshowers',
+ 'age_limit': 18,
},
'playlist_mincount': 155,
}
@@ -124,6 +125,7 @@ class XTubeUserIE(InfoExtractor):
return {
'_type': 'playlist',
'id': username,
+ 'age_limit': 18,
'entries': [{
'_type': 'url',
'url': eurl,
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 7f5aeb25b..bc18276d6 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -256,7 +256,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
- '138': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
+ '138': {'ext': 'mp4', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'},
@@ -264,9 +264,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'h264'},
# Dash mp4 audio
- '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50},
- '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 128, 'preference': -50},
- '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 256, 'preference': -50},
+ '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 48, 'preference': -50},
+ '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 128, 'preference': -50},
+ '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 256, 'preference': -50},
# Dash webm
'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
@@ -287,7 +287,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
+ '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'VP9'},
+ '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
# Dash webm audio
'171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
@@ -412,7 +414,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'id': 'HtVdAasjOgU',
'ext': 'mp4',
'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
- 'description': 'md5:eca57043abae25130f58f655ad9a7771',
+ 'description': 're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
'uploader': 'The Witcher',
'uploader_id': 'WitcherGame',
'upload_date': '20140605',
@@ -736,6 +738,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'format_id': format_id,
'url': video_url,
'width': int_or_none(r.attrib.get('width')),
+ 'height': int_or_none(r.attrib.get('height')),
'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
'asr': int_or_none(r.attrib.get('audioSamplingRate')),
'filesize': filesize,
@@ -746,7 +749,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
fo for fo in formats
if fo['format_id'] == format_id)
except StopIteration:
- f.update(self._formats.get(format_id, {}))
+ f.update(self._formats.get(format_id, {}).items())
formats.append(f)
else:
existing_format.update(f)
@@ -1040,6 +1043,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
self.report_warning(
'Skipping DASH manifest: %r' % e, video_id)
else:
+ # Hide the formats we found through non-DASH
+ dash_keys = set(df['format_id'] for df in dash_formats)
+ for f in formats:
+ if f['format_id'] in dash_keys:
+ f['format_id'] = 'nondash-%s' % f['format_id']
+ f['preference'] = f.get('preference', 0) - 10000
formats.extend(dash_formats)
self._sort_formats(formats)
@@ -1199,9 +1208,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
if playlist_id.startswith('RD'):
# Mixes require a custom extraction process
return self._extract_mix(playlist_id)
- if playlist_id.startswith('TL'):
- raise ExtractorError('For downloading YouTube.com top lists, use '
- 'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
url = self._TEMPLATE_URL % playlist_id
page = self._download_webpage(url, playlist_id)
@@ -1247,49 +1253,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
return self.playlist_result(url_results, playlist_id, playlist_title)
-class YoutubeTopListIE(YoutubePlaylistIE):
- IE_NAME = 'youtube:toplist'
- IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"'
- ' (Example: "yttoplist:music:Top Tracks")')
- _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$'
- _TESTS = [{
- 'url': 'yttoplist:music:Trending',
- 'playlist_mincount': 5,
- 'skip': 'Only works for logged-in users',
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- channel = mobj.group('chann')
- title = mobj.group('title')
- query = compat_urllib_parse.urlencode({'title': title})
- channel_page = self._download_webpage(
- 'https://www.youtube.com/%s' % channel, title)
- link = self._html_search_regex(
- r'''(?x)
- <a\s+href="([^"]+)".*?>\s*
- <span\s+class="branded-page-module-title-text">\s*
- <span[^>]*>.*?%s.*?</span>''' % re.escape(query),
- channel_page, 'list')
- url = compat_urlparse.urljoin('https://www.youtube.com/', link)
-
- video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"'
- ids = []
- # sometimes the webpage doesn't contain the videos
- # retry until we get them
- for i in itertools.count(0):
- msg = 'Downloading Youtube mix'
- if i > 0:
- msg += ', retry #%d' % i
-
- webpage = self._download_webpage(url, title, msg)
- ids = orderedSet(re.findall(video_re, webpage))
- if ids:
- break
- url_results = self._ids_to_results(ids)
- return self.playlist_result(url_results, playlist_title=title)
-
-
class YoutubeChannelIE(InfoExtractor):
IE_DESC = 'YouTube.com channels'
_VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
@@ -1701,3 +1664,20 @@ class YoutubeTruncatedURLIE(InfoExtractor):
'"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
' or simply youtube-dl BaW_jenozKc .',
expected=True)
+
+
+class YoutubeTruncatedIDIE(InfoExtractor):
+ IE_NAME = 'youtube:truncated_id'
+ IE_DESC = False # Do not list
+ _VALID_URL = r'https?://(?:www\.)youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
+
+ _TESTS = [{
+ 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ raise ExtractorError(
+ 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
+ expected=True)
diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py
index 74c76a9a0..98f15177b 100644
--- a/youtube_dl/extractor/zdf.py
+++ b/youtube_dl/extractor/zdf.py
@@ -119,7 +119,7 @@ class ZDFChannelIE(InfoExtractor):
'info_dict': {
'id': '1586442',
},
- 'playlist_count': 4,
+ 'playlist_count': 3,
}
_PAGE_SIZE = 50
diff --git a/youtube_dl/options.py b/youtube_dl/options.py
index 21c452141..14006178d 100644
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -109,7 +109,7 @@ def parseOpts(overrideArguments=None):
kw = {
'version': __version__,
'formatter': fmt,
- 'usage': '%prog [options] url [url...]',
+ 'usage': '%prog [OPTIONS] URL [URL...]',
'conflict_handler': 'resolve',
}
@@ -267,10 +267,12 @@ def parseOpts(overrideArguments=None):
action='store', dest='format', metavar='FORMAT', default=None,
help=(
'video format code, specify the order of preference using'
- ' slashes: -f 22/17/18 . -f mp4 , -f m4a and -f flv are also'
- ' supported. You can also use the special names "best",'
- ' "bestvideo", "bestaudio", "worst", "worstvideo" and'
- ' "worstaudio". By default, youtube-dl will pick the best quality.'
+ ' slashes, as in -f 22/17/18 . '
+ ' Instead of format codes, you can select by extension for the '
+ 'extensions aac, m4a, mp3, mp4, ogg, wav, webm. '
+ 'You can also use the special names "best",'
+ ' "bestvideo", "bestaudio", "worst". '
+ ' By default, youtube-dl will pick the best quality.'
' Use commas to download multiple audio formats, such as'
' -f 136/137/mp4/bestvideo,140/m4a/bestaudio.'
' You can merge the video and audio of two formats into a single'
@@ -300,6 +302,12 @@ def parseOpts(overrideArguments=None):
'--youtube-skip-dash-manifest',
action='store_false', dest='youtube_include_dash_manifest',
help='Do not download the DASH manifest on YouTube videos')
+ video_format.add_option(
+ '--merge-output-format',
+ action='store', dest='merge_output_format', metavar='FORMAT', default=None,
+ help=(
+ 'If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv.'
+ 'Ignored if no merge is required'))
subtitles = optparse.OptionGroup(parser, 'Subtitle Options')
subtitles.add_option(
@@ -444,6 +452,11 @@ def parseOpts(overrideArguments=None):
action='store_true', dest='dump_single_json', default=False,
help='simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist information in a single line.')
verbosity.add_option(
+ '--print-json',
+ action='store_true', dest='print_json', default=False,
+ help='Be quiet and print the video information as JSON (video is still being downloaded).',
+ )
+ verbosity.add_option(
'--newline',
action='store_true', dest='progress_with_newline', default=False,
help='output progress bar as new lines')
diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py
index 048525efc..d1b342c7a 100644
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -80,8 +80,9 @@ class FFmpegPostProcessor(PostProcessor):
files_cmd = []
for path in input_paths:
- files_cmd.extend(['-i', encodeFilename(path, True)])
- cmd = ([self._executable, '-y'] + files_cmd
+ files_cmd.extend([encodeArgument('-i'), encodeFilename(path, True)])
+ cmd = ([encodeFilename(self._executable, True), encodeArgument('-y')] +
+ files_cmd
+ [encodeArgument(o) for o in opts] +
[encodeFilename(self._ffmpeg_filename_argument(out_path), True)])
@@ -122,8 +123,8 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
raise PostProcessingError('ffprobe or avprobe not found. Please install one.')
try:
cmd = [
- self._probe_executable,
- '-show_streams',
+ encodeFilename(self._probe_executable, True),
+ encodeArgument('-show_streams'),
encodeFilename(self._ffmpeg_filename_argument(path), True)]
handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE)
output = handle.communicate()[0]
@@ -520,7 +521,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
class FFmpegMergerPP(FFmpegPostProcessor):
def run(self, info):
filename = info['filepath']
- args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0', '-shortest']
+ args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0']
self._downloader.to_screen('[ffmpeg] Merging formats into "%s"' % filename)
self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args)
return True, info
diff --git a/youtube_dl/update.py b/youtube_dl/update.py
index 2d2703368..3f9c5249d 100644
--- a/youtube_dl/update.py
+++ b/youtube_dl/update.py
@@ -13,6 +13,7 @@ from .compat import (
compat_str,
compat_urllib_request,
)
+from .utils import make_HTTPS_handler
from .version import __version__
@@ -58,9 +59,12 @@ def update_self(to_screen, verbose):
to_screen('It looks like you installed youtube-dl with a package manager, pip, setup.py or a tarball. Please use that to update.')
return
+ https_handler = make_HTTPS_handler(False)
+ opener = compat_urllib_request.build_opener(https_handler)
+
# Check if there is a new version
try:
- newversion = compat_urllib_request.urlopen(VERSION_URL).read().decode('utf-8').strip()
+ newversion = opener.open(VERSION_URL).read().decode('utf-8').strip()
except:
if verbose:
to_screen(compat_str(traceback.format_exc()))
@@ -72,7 +76,7 @@ def update_self(to_screen, verbose):
# Download and check versions info
try:
- versions_info = compat_urllib_request.urlopen(JSON_URL).read().decode('utf-8')
+ versions_info = opener.open(JSON_URL).read().decode('utf-8')
versions_info = json.loads(versions_info)
except:
if verbose:
@@ -120,7 +124,7 @@ def update_self(to_screen, verbose):
return
try:
- urlh = compat_urllib_request.urlopen(version['exe'][0])
+ urlh = opener.open(version['exe'][0])
newcontent = urlh.read()
urlh.close()
except (IOError, OSError):
@@ -166,7 +170,7 @@ start /b "" cmd /c del "%%~f0"&exit /b"
# Zip unix package
elif isinstance(globals().get('__loader__'), zipimporter):
try:
- urlh = compat_urllib_request.urlopen(version['bin'][0])
+ urlh = opener.open(version['bin'][0])
newcontent = urlh.read()
urlh.close()
except (IOError, OSError):
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index efbe64fb3..079e8d2c3 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -205,6 +205,10 @@ def get_element_by_attribute(attribute, value, html):
def clean_html(html):
"""Clean an HTML snippet into a readable string"""
+
+ if html is None: # Convenience for sanitizing descriptions etc.
+ return html
+
# Newline vs <br />
html = html.replace('\n', ' ')
html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
@@ -1550,3 +1554,23 @@ def ytdl_is_updateable():
def args_to_str(args):
# Get a short string representation for a subprocess command
return ' '.join(shlex_quote(a) for a in args)
+
+
+def urlhandle_detect_ext(url_handle):
+ try:
+ url_handle.headers
+ getheader = lambda h: url_handle.headers[h]
+ except AttributeError: # Python < 3
+ getheader = url_handle.info().getheader
+
+ return getheader('Content-Type').split("/")[1]
+
+
+def age_restricted(content_limit, age_limit):
+ """ Returns True iff the content should be blocked """
+
+ if age_limit is None: # No limit set
+ return False
+ if content_limit is None:
+ return False # Content available for everyone
+ return age_limit < content_limit
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 1420af746..8c57c7413 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
-__version__ = '2014.12.17.2'
+__version__ = '2015.01.09.2'