aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile8
-rw-r--r--README.md178
-rw-r--r--test/helper.py5
-rw-r--r--test/test_all_urls.py1
-rw-r--r--test/test_download.py32
-rw-r--r--test/test_playlists.py17
-rw-r--r--test/test_utils.py2
-rw-r--r--test/test_youtube_signature.py68
-rwxr-xr-xyoutube_dl/YoutubeDL.py29
-rw-r--r--youtube_dl/__init__.py113
-rw-r--r--youtube_dl/downloader/common.py2
-rw-r--r--youtube_dl/downloader/f4m.py11
-rw-r--r--youtube_dl/extractor/__init__.py28
-rw-r--r--youtube_dl/extractor/abc.py48
-rw-r--r--youtube_dl/extractor/aparat.py19
-rw-r--r--youtube_dl/extractor/appletrailers.py5
-rw-r--r--youtube_dl/extractor/ard.py8
-rw-r--r--youtube_dl/extractor/arte.py14
-rw-r--r--youtube_dl/extractor/blinkx.py2
-rw-r--r--youtube_dl/extractor/bloomberg.py5
-rw-r--r--youtube_dl/extractor/br.py11
-rw-r--r--youtube_dl/extractor/common.py27
-rw-r--r--youtube_dl/extractor/dfb.py2
-rw-r--r--youtube_dl/extractor/dump.py39
-rw-r--r--youtube_dl/extractor/ellentv.py79
-rw-r--r--youtube_dl/extractor/escapist.py2
-rw-r--r--youtube_dl/extractor/facebook.py2
-rw-r--r--youtube_dl/extractor/firedrive.py3
-rw-r--r--youtube_dl/extractor/francetv.py36
-rw-r--r--youtube_dl/extractor/gamestar.py74
-rw-r--r--youtube_dl/extractor/gdcvault.py42
-rw-r--r--youtube_dl/extractor/generic.py16
-rw-r--r--youtube_dl/extractor/godtube.py58
-rw-r--r--youtube_dl/extractor/howstuffworks.py134
-rw-r--r--youtube_dl/extractor/izlesene.py97
-rw-r--r--youtube_dl/extractor/jove.py80
-rw-r--r--youtube_dl/extractor/justintv.py17
-rw-r--r--youtube_dl/extractor/krasview.py59
-rw-r--r--youtube_dl/extractor/livestream.py104
-rw-r--r--youtube_dl/extractor/metacafe.py88
-rw-r--r--youtube_dl/extractor/mitele.py60
-rw-r--r--youtube_dl/extractor/mojvideo.py58
-rw-r--r--youtube_dl/extractor/nowness.py42
-rw-r--r--youtube_dl/extractor/oe1.py40
-rw-r--r--youtube_dl/extractor/ooyala.py66
-rw-r--r--youtube_dl/extractor/orf.py76
-rw-r--r--youtube_dl/extractor/patreon.py101
-rw-r--r--youtube_dl/extractor/pbs.py62
-rw-r--r--youtube_dl/extractor/reverbnation.py19
-rw-r--r--youtube_dl/extractor/rtlnl.py52
-rw-r--r--youtube_dl/extractor/shared.py57
-rw-r--r--youtube_dl/extractor/streamcloud.py30
-rw-r--r--youtube_dl/extractor/swrmediathek.py16
-rw-r--r--youtube_dl/extractor/teamcoco.py2
-rw-r--r--youtube_dl/extractor/tvplay.py85
-rw-r--r--youtube_dl/extractor/ubu.py56
-rw-r--r--youtube_dl/extractor/vevo.py1
-rw-r--r--youtube_dl/extractor/vidme.py68
-rw-r--r--youtube_dl/extractor/vimeo.py19
-rw-r--r--youtube_dl/extractor/vube.py71
-rw-r--r--youtube_dl/extractor/xboxclips.py57
-rw-r--r--youtube_dl/extractor/yahoo.py15
-rw-r--r--youtube_dl/extractor/youtube.py128
-rw-r--r--youtube_dl/jsinterp.py85
-rw-r--r--youtube_dl/utils.py88
-rw-r--r--youtube_dl/version.py2
66 files changed, 2415 insertions, 506 deletions
diff --git a/Makefile b/Makefile
index c079761ef..088a9320b 100644
--- a/Makefile
+++ b/Makefile
@@ -6,10 +6,10 @@ clean:
cleanall: clean
rm -f youtube-dl youtube-dl.exe
-PREFIX=/usr/local
-BINDIR=$(PREFIX)/bin
-MANDIR=$(PREFIX)/man
-PYTHON=/usr/bin/env python
+PREFIX ?= /usr/local
+BINDIR ?= $(PREFIX)/bin
+MANDIR ?= $(PREFIX)/man
+PYTHON ?= /usr/bin/env python
# set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local
ifeq ($(PREFIX),/usr)
diff --git a/README.md b/README.md
index fb2f776c9..ba350b905 100644
--- a/README.md
+++ b/README.md
@@ -17,6 +17,14 @@ If you do not have curl, you can alternatively use a recent wget:
Windows users can [download a .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in their home directory or any other location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29).
+OS X users can install **youtube-dl** with [Homebrew](http://brew.sh/).
+
+ brew install youtube-dl
+
+You can also use pip:
+
+ sudo pip install youtube-dl
+
Alternatively, refer to the developer instructions below for how to check out and work with the git repository. For further options, including PGP signatures, see https://rg3.github.io/youtube-dl/download.html .
# DESCRIPTION
@@ -38,12 +46,6 @@ which means you can modify it, redistribute it or use it however you like.
playlist or the command line) if an error
occurs
--dump-user-agent display the current browser identification
- --user-agent UA specify a custom user agent
- --referer REF specify a custom referer, use if the video
- access is restricted to one domain
- --add-header FIELD:VALUE specify a custom HTTP header and its value,
- separated by a colon ':'. You can use this
- option multiple times
--list-extractors List all supported extractors and the URLs
they would handle
--extractor-descriptions Output descriptions of all supported
@@ -51,35 +53,22 @@ which means you can modify it, redistribute it or use it however you like.
--proxy URL Use the specified HTTP/HTTPS proxy. Pass in
an empty string (--proxy "") for direct
connection
- --no-check-certificate Suppress HTTPS certificate validation.
- --prefer-insecure Use an unencrypted connection to retrieve
- information about the video. (Currently
- supported only for YouTube)
- --cache-dir DIR Location in the filesystem where youtube-dl
- can store some downloaded information
- permanently. By default $XDG_CACHE_HOME
- /youtube-dl or ~/.cache/youtube-dl . At the
- moment, only YouTube player files (for
- videos with obfuscated signatures) are
- cached, but that may change.
- --no-cache-dir Disable filesystem caching
--socket-timeout None Time to wait before giving up, in seconds
- --bidi-workaround Work around terminals that lack
- bidirectional text support. Requires bidiv
- or fribidi executable in PATH
--default-search PREFIX Use this prefix for unqualified URLs. For
example "gvsearch2:" downloads two videos
from google videos for youtube-dl "large
apple". Use the value "auto" to let
- youtube-dl guess. The default value "error"
- just throws an error.
+ youtube-dl guess ("auto_warning" to emit a
+ warning when guessing). "error" just throws
+ an error. The default value "fixup_error"
+ repairs broken URLs, but emits an error if
+ this is not possible instead of searching.
--ignore-config Do not read configuration files. When given
in the global configuration file /etc
/youtube-dl.conf: do not read the user
configuration in ~/.config/youtube-dl.conf
(%APPDATA%/youtube-dl/config.txt on
Windows)
- --encoding ENCODING Force the specified encoding (experimental)
## Video Selection:
--playlist-start NUMBER playlist video to start at (default is 1)
@@ -125,9 +114,9 @@ which means you can modify it, redistribute it or use it however you like.
of SIZE.
## Filesystem Options:
- -t, --title use title in file name (default)
+ -a, --batch-file FILE file containing URLs to download ('-' for
+ stdin)
--id use only video ID in file name
- -l, --literal [deprecated] alias of --title
-A, --auto-number number downloaded files starting from 00000
-o, --output TEMPLATE output filename template. Use %(title)s to
get the title, %(uploader)s for the
@@ -160,18 +149,15 @@ which means you can modify it, redistribute it or use it however you like.
--restrict-filenames Restrict filenames to only ASCII
characters, and avoid "&" and spaces in
filenames
- -a, --batch-file FILE file containing URLs to download ('-' for
- stdin)
- --load-info FILE json file containing the video information
- (created with the "--write-json" option)
+ -t, --title [deprecated] use title in file name
+ (default)
+ -l, --literal [deprecated] alias of --title
-w, --no-overwrites do not overwrite files
-c, --continue force resume of partially downloaded files.
By default, youtube-dl will resume
downloads if possible.
--no-continue do not resume partially downloaded files
(restart from beginning)
- --cookies FILE file to read cookies from and dump cookie
- jar in
--no-part do not use .part files
--no-mtime do not use the Last-modified header to set
the file modification time
@@ -181,6 +167,19 @@ which means you can modify it, redistribute it or use it however you like.
--write-annotations write video annotations to a .annotation
file
--write-thumbnail write thumbnail image to disk
+ --load-info FILE json file containing the video information
+ (created with the "--write-json" option)
+ --cookies FILE file to read cookies from and dump cookie
+ jar in
+ --cache-dir DIR Location in the filesystem where youtube-dl
+ can store some downloaded information
+ permanently. By default $XDG_CACHE_HOME
+ /youtube-dl or ~/.cache/youtube-dl . At the
+ moment, only YouTube player files (for
+ videos with obfuscated signatures) are
+ cached, but that may change.
+ --no-cache-dir Disable filesystem caching
+ --rm-cache-dir Delete all filesystem cache files
## Verbosity / Simulation Options:
-q, --quiet activates quiet mode
@@ -210,6 +209,22 @@ which means you can modify it, redistribute it or use it however you like.
problems
--print-traffic Display sent and read HTTP traffic
+## Workarounds:
+ --encoding ENCODING Force the specified encoding (experimental)
+ --no-check-certificate Suppress HTTPS certificate validation.
+ --prefer-insecure Use an unencrypted connection to retrieve
+ information about the video. (Currently
+ supported only for YouTube)
+ --user-agent UA specify a custom user agent
+ --referer REF specify a custom referer, use if the video
+ access is restricted to one domain
+ --add-header FIELD:VALUE specify a custom HTTP header and its value,
+ separated by a colon ':'. You can use this
+ option multiple times
+ --bidi-workaround Work around terminals that lack
+ bidirectional text support. Requires bidiv
+ or fribidi executable in PATH
+
## Video Format Options:
-f, --format FORMAT video format code, specify the order of
preference using slashes: "-f 22/17/18".
@@ -296,10 +311,12 @@ The current default template is `%(title)s-%(id)s.%(ext)s`.
In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title:
- $ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc
- youtube-dl test video ''_ä↭𝕐.mp4 # All kinds of weird characters
- $ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc --restrict-filenames
- youtube-dl_test_video_.mp4 # A simple file name
+```bash
+$ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc
+youtube-dl test video ''_ä↭𝕐.mp4 # All kinds of weird characters
+$ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc --restrict-filenames
+youtube-dl_test_video_.mp4 # A simple file name
+```
# VIDEO SELECTION
@@ -310,14 +327,16 @@ Videos can be filtered by their upload date using the options `--date`, `--dateb
Examples:
- # Download only the videos uploaded in the last 6 months
- $ youtube-dl --dateafter now-6months
+```bash
+# Download only the videos uploaded in the last 6 months
+$ youtube-dl --dateafter now-6months
- # Download only the videos uploaded on January 1, 1970
- $ youtube-dl --date 19700101
+# Download only the videos uploaded on January 1, 1970
+$ youtube-dl --date 19700101
- $ # will only download the videos uploaded in the 200x decade
- $ youtube-dl --dateafter 20000101 --datebefore 20091231
+$ # will only download the videos uploaded in the 200x decade
+$ youtube-dl --dateafter 20000101 --datebefore 20091231
+```
# FAQ
@@ -392,49 +411,48 @@ If you want to add support for a new site, you can follow this quick list (assum
2. Check out the source code with `git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git`
3. Start a new git branch with `cd youtube-dl; git checkout -b yourextractor`
4. Start with this simple template and save it to `youtube_dl/extractor/yourextractor.py`:
-
- # coding: utf-8
- from __future__ import unicode_literals
-
- import re
-
- from .common import InfoExtractor
-
-
- class YourExtractorIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
- _TEST = {
- 'url': 'http://yourextractor.com/watch/42',
- 'md5': 'TODO: md5 sum of the first 10KiB of the video file',
- 'info_dict': {
- 'id': '42',
- 'ext': 'mp4',
- 'title': 'Video title goes here',
- # TODO more properties, either as:
- # * A value
- # * MD5 checksum; start the string with md5:
- # * A regular expression; start the string with re:
- # * Any Python type (for example int or float)
- }
+ ```python
+ # coding: utf-8
+ from __future__ import unicode_literals
+
+ import re
+
+ from .common import InfoExtractor
+
+
+ class YourExtractorIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://yourextractor.com/watch/42',
+ 'md5': 'TODO: md5 sum of the first 10KiB of the video file',
+ 'info_dict': {
+ 'id': '42',
+ 'ext': 'mp4',
+ 'title': 'Video title goes here',
+ # TODO more properties, either as:
+ # * A value
+ # * MD5 checksum; start the string with md5:
+ # * A regular expression; start the string with re:
+ # * Any Python type (for example int or float)
}
+ }
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
- # TODO more code goes here, for example ...
- webpage = self._download_webpage(url, video_id)
- title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
-
- return {
- 'id': video_id,
- 'title': title,
- # TODO more properties (see youtube_dl/extractor/common.py)
- }
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ # TODO more code goes here, for example ...
+ webpage = self._download_webpage(url, video_id)
+ title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
+ return {
+ 'id': video_id,
+ 'title': title,
+ # TODO more properties (see youtube_dl/extractor/common.py)
+ }
+ ```
5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py).
-6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done.
+6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc.
7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want.
8. If you can, check the code with [pyflakes](https://pypi.python.org/pypi/pyflakes) (a good idea) and [pep8](https://pypi.python.org/pypi/pep8) (optional, ignore E501).
9. When the tests pass, [add](https://www.kernel.org/pub/software/scm/git/docs/git-add.html) the new files and [commit](https://www.kernel.org/pub/software/scm/git/docs/git-commit.html) them and [push](https://www.kernel.org/pub/software/scm/git/docs/git-push.html) the result, like this:
diff --git a/test/helper.py b/test/helper.py
index b7299fb82..22d763860 100644
--- a/test/helper.py
+++ b/test/helper.py
@@ -117,8 +117,9 @@ def expect_info_dict(self, expected_dict, got_dict):
u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
# Check for the presence of mandatory fields
- for key in ('id', 'url', 'title', 'ext'):
- self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key)
+ if got_dict.get('_type') != 'playlist':
+ for key in ('id', 'url', 'title', 'ext'):
+ self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key)
# Check for mandatory fields that are automatically set by YoutubeDL
for key in ['webpage_url', 'extractor', 'extractor_key']:
self.assertTrue(got_dict.get(key), u'Missing field: %s' % key)
diff --git a/test/test_all_urls.py b/test/test_all_urls.py
index 0ff47cf1e..b1ad30bf1 100644
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@@ -99,6 +99,7 @@ class TestAllURLsMatching(unittest.TestCase):
def test_facebook_matching(self):
self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
+ self.assertTrue(FacebookIE.suitable('https://www.facebook.com/cindyweather?fref=ts#!/photo.php?v=10152183998945793'))
def test_no_duplicates(self):
ies = gen_extractors()
diff --git a/test/test_download.py b/test/test_download.py
index d6540588c..c8d4ec2c8 100644
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -63,15 +63,21 @@ def generator(test_case):
def test_template(self):
ie = youtube_dl.extractor.get_info_extractor(test_case['name'])
other_ies = [get_info_extractor(ie_key) for ie_key in test_case.get('add_ie', [])]
+ is_playlist = any(k.startswith('playlist') for k in test_case)
+ test_cases = test_case.get(
+ 'playlist', [] if is_playlist else [test_case])
+
def print_skipping(reason):
print('Skipping %s: %s' % (test_case['name'], reason))
if not ie.working():
print_skipping('IE marked as not _WORKING')
return
- if 'playlist' not in test_case:
- info_dict = test_case.get('info_dict', {})
- if not test_case.get('file') and not (info_dict.get('id') and info_dict.get('ext')):
+
+ for tc in test_cases:
+ info_dict = tc.get('info_dict', {})
+ if not tc.get('file') and not (info_dict.get('id') and info_dict.get('ext')):
raise Exception('Test definition incorrect. The output file cannot be known. Are both \'id\' and \'ext\' keys present?')
+
if 'skip' in test_case:
print_skipping(test_case['skip'])
return
@@ -81,6 +87,9 @@ def generator(test_case):
return
params = get_params(test_case.get('params', {}))
+ if is_playlist and 'playlist' not in test_case:
+ params.setdefault('extract_flat', True)
+ params.setdefault('skip_download', True)
ydl = YoutubeDL(params)
ydl.add_default_info_extractors()
@@ -93,7 +102,6 @@ def generator(test_case):
def get_tc_filename(tc):
return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {}))
- test_cases = test_case.get('playlist', [test_case])
def try_rm_tcs_files():
for tc in test_cases:
tc_filename = get_tc_filename(tc)
@@ -105,7 +113,10 @@ def generator(test_case):
try_num = 1
while True:
try:
- ydl.download([test_case['url']])
+ # We're not using .download here sine that is just a shim
+ # for outside error handling, and returns the exit code
+ # instead of the result dict.
+ res_dict = ydl.extract_info(test_case['url'])
except (DownloadError, ExtractorError) as err:
# Check if the exception is not a network related one
if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError, compat_http_client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
@@ -121,6 +132,17 @@ def generator(test_case):
else:
break
+ if is_playlist:
+ self.assertEqual(res_dict['_type'], 'playlist')
+ expect_info_dict(self, test_case.get('info_dict', {}), res_dict)
+ if 'playlist_mincount' in test_case:
+ self.assertGreaterEqual(
+ len(res_dict['entries']),
+ test_case['playlist_mincount'],
+ 'Expected at least %d in playlist %s, but got only %d' % (
+ test_case['playlist_mincount'], test_case['url'],
+ len(res_dict['entries'])))
+
for tc in test_cases:
tc_filename = get_tc_filename(tc)
if not test_case.get('params', {}).get('skip_download', False):
diff --git a/test/test_playlists.py b/test/test_playlists.py
index c221c47b9..6448fea38 100644
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@@ -1,6 +1,17 @@
#!/usr/bin/env python
# encoding: utf-8
+## DEPRECATED FILE!
+# Add new tests to the extractors themselves, like this:
+# _TEST = {
+# 'url': 'http://example.com/playlist/42',
+# 'playlist_mincount': 99,
+# 'info_dict': {
+# 'id': '42',
+# 'title': 'Playlist number forty-two',
+# }
+# }
+
from __future__ import unicode_literals
# Allow direct execution
@@ -193,10 +204,10 @@ class TestPlaylists(unittest.TestCase):
def test_bandcamp_album(self):
dl = FakeYDL()
ie = BandcampAlbumIE(dl)
- result = ie.extract('http://mpallante.bandcamp.com/album/nightmare-night-ep')
+ result = ie.extract('http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave')
self.assertIsPlaylist(result)
- self.assertEqual(result['title'], 'Nightmare Night EP')
- assertGreaterEqual(self, len(result['entries']), 4)
+ self.assertEqual(result['title'], 'Hierophany of the Open Grave')
+ assertGreaterEqual(self, len(result['entries']), 9)
def test_smotri_community(self):
dl = FakeYDL()
diff --git a/test/test_utils.py b/test/test_utils.py
index 51eb0b6b9..e26cc5b0c 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -280,7 +280,7 @@ class TestUtil(unittest.TestCase):
d = json.loads(stripped)
self.assertEqual(d, [{"id": "532cb", "x": 3}])
- def test_uppercase_escpae(self):
+ def test_uppercase_escape(self):
self.assertEqual(uppercase_escape(u'aä'), u'aä')
self.assertEqual(uppercase_escape(u'\\U0001d550'), u'𝕐')
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index f0f33f1db..604e76ab6 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -1,5 +1,7 @@
#!/usr/bin/env python
+from __future__ import unicode_literals
+
# Allow direct execution
import os
import sys
@@ -16,52 +18,64 @@ from youtube_dl.utils import compat_str, compat_urlretrieve
_TESTS = [
(
- u'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js',
- u'js',
+ 'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js',
+ 'js',
86,
- u'>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321',
+ '>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321',
),
(
- u'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js',
- u'js',
+ 'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js',
+ 'js',
85,
- u'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@',
+ '3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@',
),
(
- u'https://s.ytimg.com/yts/jsbin/html5player-vfle-mVwz.js',
- u'js',
+ 'https://s.ytimg.com/yts/jsbin/html5player-vfle-mVwz.js',
+ 'js',
90,
- u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',
+ ']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',
),
(
- u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl0Cbn9e.js',
- u'js',
+ 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl0Cbn9e.js',
+ 'js',
84,
- u'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVW@YZ!"#$%&\'()*+,-./:;<=',
+ 'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVW@YZ!"#$%&\'()*+,-./:;<=',
),
(
- u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js',
- u'js',
- u'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA',
- u'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2',
+ 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js',
+ 'js',
+ '2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA',
+ 'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2',
),
(
- u'http://s.ytimg.com/yts/swfbin/player-vfl5vIhK2/watch_as3.swf',
- u'swf',
+ 'http://s.ytimg.com/yts/swfbin/player-vfl5vIhK2/watch_as3.swf',
+ 'swf',
86,
- u'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVWXY\\!"#$%&\'()*+,-./:;<=>?'
+ 'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVWXY\\!"#$%&\'()*+,-./:;<=>?'
),
(
- u'http://s.ytimg.com/yts/swfbin/player-vflmDyk47/watch_as3.swf',
- u'swf',
- u'F375F75BF2AFDAAF2666E43868D46816F83F13E81C46.3725A8218E446A0DECD33F79DC282994D6AA92C92C9',
- u'9C29AA6D499282CD97F33DCED0A644E8128A5273.64C18E31F38361864D86834E6662FAADFA2FB57F'
+ 'http://s.ytimg.com/yts/swfbin/player-vflmDyk47/watch_as3.swf',
+ 'swf',
+ 'F375F75BF2AFDAAF2666E43868D46816F83F13E81C46.3725A8218E446A0DECD33F79DC282994D6AA92C92C9',
+ '9C29AA6D499282CD97F33DCED0A644E8128A5273.64C18E31F38361864D86834E6662FAADFA2FB57F'
),
(
- u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js',
- u'js',
+ 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js',
+ 'js',
84,
- u'123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>'
+ '123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>'
+ ),
+ (
+ 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl9FYC6l.js',
+ 'js',
+ 83,
+ '123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F'
+ ),
+ (
+ 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflCGk6yw/html5player.js',
+ 'js',
+ '4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288',
+ '82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B'
)
]
@@ -75,7 +89,7 @@ class TestSignature(unittest.TestCase):
def make_tfunc(url, stype, sig_input, expected_sig):
- m = re.match(r'.*-([a-zA-Z0-9_-]+)(?:/watch_as3)?\.[a-z]+$', url)
+ m = re.match(r'.*-([a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$', url)
assert m, '%r should follow URL format' % url
test_id = m.group(1)
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 4ff1ae0e8..e7194f3e3 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -162,6 +162,7 @@ class YoutubeDL(object):
default_search: Prepend this string if an input url is not valid.
'auto' for elaborate guessing
encoding: Use this encoding instead of the system-specified.
+ extract_flat: Do not resolve URLs, return the immediate result.
The following parameters are not used by YoutubeDL itself, they are used by
the FileDownloader:
@@ -275,7 +276,7 @@ class YoutubeDL(object):
return message
assert hasattr(self, '_output_process')
- assert type(message) == type('')
+ assert isinstance(message, compat_str)
line_count = message.count('\n') + 1
self._output_process.stdin.write((message + '\n').encode('utf-8'))
self._output_process.stdin.flush()
@@ -303,7 +304,7 @@ class YoutubeDL(object):
def to_stderr(self, message):
"""Print message to stderr."""
- assert type(message) == type('')
+ assert isinstance(message, compat_str)
if self.params.get('logger'):
self.params['logger'].error(message)
else:
@@ -558,7 +559,12 @@ class YoutubeDL(object):
Returns the resolved ie_result.
"""
- result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
+ result_type = ie_result.get('_type', 'video')
+
+ if self.params.get('extract_flat', False):
+ if result_type in ('url', 'url_transparent'):
+ return ie_result
+
if result_type == 'video':
self.add_extra_info(ie_result, extra_info)
return self.process_video_result(ie_result, download=download)
@@ -849,7 +855,7 @@ class YoutubeDL(object):
# Keep for backwards compatibility
info_dict['stitle'] = info_dict['title']
- if not 'format' in info_dict:
+ if 'format' not in info_dict:
info_dict['format'] = info_dict['ext']
reason = self._match_entry(info_dict)
@@ -1234,21 +1240,18 @@ class YoutubeDL(object):
if not self.params.get('verbose'):
return
+ if type('') is not compat_str:
+ # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
+ self.report_warning(
+ 'Your Python is broken! Update to a newer and supported version')
+
encoding_str = (
'[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
locale.getpreferredencoding(),
sys.getfilesystemencoding(),
sys.stdout.encoding,
self.get_encoding()))
- try:
- write_string(encoding_str, encoding=None)
- except:
- errmsg = 'Failed to write encoding string %r' % encoding_str
- try:
- sys.stdout.write(errmsg)
- except:
- pass
- raise IOError(errmsg)
+ write_string(encoding_str, encoding=None)
self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
try:
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index de7bc0f5f..f156ba3a0 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -66,6 +66,11 @@ __authors__ = (
'Naglis Jonaitis',
'Charles Chen',
'Hassaan Ali',
+ 'Dobrosław Żybort',
+ 'David Fabijan',
+ 'Sebastian Haas',
+ 'Alexander Kirk',
+ 'Erik Johnson',
)
__license__ = 'Public Domain'
@@ -76,6 +81,7 @@ import optparse
import os
import random
import shlex
+import shutil
import sys
@@ -222,6 +228,7 @@ def parseOpts(overrideArguments=None):
downloader = optparse.OptionGroup(parser, 'Download Options')
postproc = optparse.OptionGroup(parser, 'Post-processing Options')
filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
+ workarounds = optparse.OptionGroup(parser, 'Workarounds')
verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
general.add_option('-h', '--help',
@@ -238,14 +245,6 @@ def parseOpts(overrideArguments=None):
general.add_option('--dump-user-agent',
action='store_true', dest='dump_user_agent',
help='display the current browser identification', default=False)
- general.add_option('--user-agent',
- dest='user_agent', help='specify a custom user agent', metavar='UA')
- general.add_option('--referer',
- dest='referer', help='specify a custom referer, use if the video access is restricted to one domain',
- metavar='REF', default=None)
- general.add_option('--add-header',
- dest='headers', help='specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times', action="append",
- metavar='FIELD:VALUE')
general.add_option('--list-extractors',
action='store_true', dest='list_extractors',
help='List all supported extractors and the URLs they would handle', default=False)
@@ -255,33 +254,17 @@ def parseOpts(overrideArguments=None):
general.add_option(
'--proxy', dest='proxy', default=None, metavar='URL',
help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection')
- general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
- general.add_option(
- '--prefer-insecure', '--prefer-unsecure', action='store_true', dest='prefer_insecure',
- help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
- general.add_option(
- '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
- help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
- general.add_option(
- '--no-cache-dir', action='store_const', const=None, dest='cachedir',
- help='Disable filesystem caching')
general.add_option(
'--socket-timeout', dest='socket_timeout',
type=float, default=None, help=u'Time to wait before giving up, in seconds')
general.add_option(
- '--bidi-workaround', dest='bidi_workaround', action='store_true',
- help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
- general.add_option(
'--default-search',
dest='default_search', metavar='PREFIX',
- help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess. The default value "error" just throws an error.')
+ help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.')
general.add_option(
'--ignore-config',
action='store_true',
help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)')
- general.add_option(
- '--encoding', dest='encoding', metavar='ENCODING',
- help='Force the specified encoding (experimental)')
selection.add_option(
'--playlist-start',
@@ -382,6 +365,33 @@ def parseOpts(overrideArguments=None):
help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False)
downloader.add_option('--test', action='store_true', dest='test', default=False, help=optparse.SUPPRESS_HELP)
+ workarounds.add_option(
+ '--encoding', dest='encoding', metavar='ENCODING',
+ help='Force the specified encoding (experimental)')
+ workarounds.add_option(
+ '--no-check-certificate', action='store_true',
+ dest='no_check_certificate', default=False,
+ help='Suppress HTTPS certificate validation.')
+ workarounds.add_option(
+ '--prefer-insecure', '--prefer-unsecure', action='store_true', dest='prefer_insecure',
+ help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
+ workarounds.add_option(
+ '--user-agent', metavar='UA',
+ dest='user_agent', help='specify a custom user agent')
+ workarounds.add_option(
+ '--referer', metavar='REF',
+ dest='referer', default=None,
+ help='specify a custom referer, use if the video access is restricted to one domain',
+ )
+ workarounds.add_option(
+ '--add-header', metavar='FIELD:VALUE',
+ dest='headers', action='append',
+ help='specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times',
+ )
+ workarounds.add_option(
+ '--bidi-workaround', dest='bidi_workaround', action='store_true',
+ help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
+
verbosity.add_option('-q', '--quiet',
action='store_true', dest='quiet', help='activates quiet mode', default=False)
verbosity.add_option(
@@ -439,12 +449,10 @@ def parseOpts(overrideArguments=None):
help='Display sent and read HTTP traffic')
- filesystem.add_option('-t', '--title',
- action='store_true', dest='usetitle', help='use title in file name (default)', default=False)
+ filesystem.add_option('-a', '--batch-file',
+ dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
filesystem.add_option('--id',
action='store_true', dest='useid', help='use only video ID in file name', default=False)
- filesystem.add_option('-l', '--literal',
- action='store_true', dest='usetitle', help='[deprecated] alias of --title', default=False)
filesystem.add_option('-A', '--auto-number',
action='store_true', dest='autonumber',
help='number downloaded files starting from 00000', default=False)
@@ -470,11 +478,10 @@ def parseOpts(overrideArguments=None):
filesystem.add_option('--restrict-filenames',
action='store_true', dest='restrictfilenames',
help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False)
- filesystem.add_option('-a', '--batch-file',
- dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
- filesystem.add_option('--load-info',
- dest='load_info_filename', metavar='FILE',
- help='json file containing the video information (created with the "--write-json" option)')
+ filesystem.add_option('-t', '--title',
+ action='store_true', dest='usetitle', help='[deprecated] use title in file name (default)', default=False)
+ filesystem.add_option('-l', '--literal',
+ action='store_true', dest='usetitle', help='[deprecated] alias of --title', default=False)
filesystem.add_option('-w', '--no-overwrites',
action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
filesystem.add_option('-c', '--continue',
@@ -482,8 +489,6 @@ def parseOpts(overrideArguments=None):
filesystem.add_option('--no-continue',
action='store_false', dest='continue_dl',
help='do not resume partially downloaded files (restart from beginning)')
- filesystem.add_option('--cookies',
- dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in')
filesystem.add_option('--no-part',
action='store_true', dest='nopart', help='do not use .part files', default=False)
filesystem.add_option('--no-mtime',
@@ -501,6 +506,20 @@ def parseOpts(overrideArguments=None):
filesystem.add_option('--write-thumbnail',
action='store_true', dest='writethumbnail',
help='write thumbnail image to disk', default=False)
+ filesystem.add_option('--load-info',
+ dest='load_info_filename', metavar='FILE',
+ help='json file containing the video information (created with the "--write-json" option)')
+ filesystem.add_option('--cookies',
+ dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in')
+ filesystem.add_option(
+ '--cache-dir', dest='cachedir', default=get_cachedir(), metavar='DIR',
+ help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
+ filesystem.add_option(
+ '--no-cache-dir', action='store_const', const=None, dest='cachedir',
+ help='Disable filesystem caching')
+ filesystem.add_option(
+ '--rm-cache-dir', action='store_true', dest='rm_cachedir',
+ help='Delete all filesystem cache files')
postproc.add_option('-x', '--extract-audio', action='store_true', dest='extractaudio', default=False,
@@ -534,6 +553,7 @@ def parseOpts(overrideArguments=None):
parser.add_option_group(downloader)
parser.add_option_group(filesystem)
parser.add_option_group(verbosity)
+ parser.add_option_group(workarounds)
parser.add_option_group(video_format)
parser.add_option_group(subtitles)
parser.add_option_group(authentication)
@@ -694,7 +714,7 @@ def _real_main(argv=None):
date = DateRange.day(opts.date)
else:
date = DateRange(opts.dateafter, opts.datebefore)
- if opts.default_search not in ('auto', 'auto_warning', None) and ':' not in opts.default_search:
+ if opts.default_search not in ('auto', 'auto_warning', 'error', 'fixup_error', None) and ':' not in opts.default_search:
parser.error(u'--default-search invalid; did you forget a colon (:) at the end?')
# Do not download videos when there are audio-only formats
@@ -833,9 +853,26 @@ def _real_main(argv=None):
if opts.update_self:
update_self(ydl.to_screen, opts.verbose)
+ # Remove cache dir
+ if opts.rm_cachedir:
+ if opts.cachedir is None:
+ ydl.to_screen(u'No cache dir specified (Did you combine --no-cache-dir and --rm-cache-dir?)')
+ else:
+ if ('.cache' not in opts.cachedir) or ('youtube-dl' not in opts.cachedir):
+ ydl.to_screen(u'Not removing directory %s - this does not look like a cache dir')
+ retcode = 141
+ else:
+ ydl.to_screen(
+ u'Removing cache dir %s .' % opts.cachedir,
+ skip_eol=True)
+ if os.path.exists(opts.cachedir):
+ ydl.to_screen(u'.', skip_eol=True)
+ shutil.rmtree(opts.cachedir)
+ ydl.to_screen(u'.')
+
# Maybe do nothing
if (len(all_urls) < 1) and (opts.load_info_filename is None):
- if not opts.update_self:
+ if not (opts.update_self or opts.rm_cachedir):
parser.error(u'you must provide at least one URL')
else:
sys.exit()
diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py
index 917f3450e..9ce97f5fe 100644
--- a/youtube_dl/downloader/common.py
+++ b/youtube_dl/downloader/common.py
@@ -292,7 +292,7 @@ class FileDownloader(object):
def real_download(self, filename, info_dict):
"""Real download process. Redefine in subclasses."""
- raise NotImplementedError(u'This method must be implemented by sublcasses')
+ raise NotImplementedError(u'This method must be implemented by subclasses')
def _hook_progress(self, status):
for ph in self._progress_hooks:
diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py
index e6be6ae6c..71353f607 100644
--- a/youtube_dl/downloader/f4m.py
+++ b/youtube_dl/downloader/f4m.py
@@ -220,6 +220,7 @@ class F4mFD(FileDownloader):
def real_download(self, filename, info_dict):
man_url = info_dict['url']
+ requested_bitrate = info_dict.get('tbr')
self.to_screen('[download] Downloading f4m manifest')
manifest = self.ydl.urlopen(man_url).read()
self.report_destination(filename)
@@ -233,8 +234,14 @@ class F4mFD(FileDownloader):
doc = etree.fromstring(manifest)
formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))]
- formats = sorted(formats, key=lambda f: f[0])
- rate, media = formats[-1]
+ if requested_bitrate is None:
+ # get the best format
+ formats = sorted(formats, key=lambda f: f[0])
+ rate, media = formats[-1]
+ else:
+ rate, media = list(filter(
+ lambda f: int(f[0]) == requested_bitrate, formats))[0]
+
base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
bootstrap = base64.b64decode(doc.find(_add_ns('bootstrapInfo')).text)
metadata = base64.b64decode(media.find(_add_ns('metadata')).text)
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 8d63d9281..9be1d2e0f 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -1,3 +1,4 @@
+from .abc import ABCIE
from .academicearth import AcademicEarthCourseIE
from .addanime import AddAnimeIE
from .adultswim import AdultSwimIE
@@ -68,6 +69,7 @@ from .dfb import DFBIE
from .dotsub import DotsubIE
from .dreisat import DreiSatIE
from .drtv import DRTVIE
+from .dump import DumpIE
from .defense import DefenseGouvFrIE
from .discovery import DiscoveryIE
from .divxstage import DivxStageIE
@@ -76,6 +78,10 @@ from .ebaumsworld import EbaumsWorldIE
from .ehow import EHowIE
from .eighttracks import EightTracksIE
from .eitb import EitbIE
+from .ellentv import (
+ EllenTVIE,
+ EllenTVClipsIE,
+)
from .elpais import ElPaisIE
from .empflix import EmpflixIE
from .engadget import EngadgetIE
@@ -111,9 +117,11 @@ from .funnyordie import FunnyOrDieIE
from .gamekings import GamekingsIE
from .gameone import GameOneIE
from .gamespot import GameSpotIE
+from .gamestar import GameStarIE
from .gametrailers import GametrailersIE
from .gdcvault import GDCVaultIE
from .generic import GenericIE
+from .godtube import GodTubeIE
from .googleplus import GooglePlusIE
from .googlesearch import GoogleSearchIE
from .gorillavid import GorillaVidIE
@@ -123,6 +131,7 @@ from .helsinki import HelsinkiIE
from .hentaistigma import HentaiStigmaIE
from .hotnewhiphop import HotNewHipHopIE
from .howcast import HowcastIE
+from .howstuffworks import HowStuffWorksIE
from .huffpost import HuffPostIE
from .hypem import HypemIE
from .iconosquare import IconosquareIE
@@ -140,8 +149,10 @@ from .ivi import (
IviIE,
IviCompilationIE
)
+from .izlesene import IzleseneIE
from .jadorecettepub import JadoreCettePubIE
from .jeuxvideo import JeuxVideoIE
+from .jove import JoveIE
from .jukebox import JukeboxIE
from .justintv import JustinTVIE
from .jpopsukitv import JpopsukiIE
@@ -151,6 +162,7 @@ from .khanacademy import KhanAcademyIE
from .kickstarter import KickStarterIE
from .keek import KeekIE
from .kontrtube import KontrTubeIE
+from .krasview import KrasViewIE
from .ku6 import Ku6IE
from .la7 import LA7IE
from .lifenews import LifeNewsIE
@@ -172,10 +184,12 @@ from .mdr import MDRIE
from .metacafe import MetacafeIE
from .metacritic import MetacriticIE
from .mit import TechTVMITIE, MITIE, OCWMITIE
+from .mitele import MiTeleIE
from .mixcloud import MixcloudIE
from .mlb import MLBIE
from .mpora import MporaIE
from .mofosex import MofosexIE
+from .mojvideo import MojvideoIE
from .mooshare import MooshareIE
from .morningstar import MorningstarIE
from .motherless import MotherlessIE
@@ -219,10 +233,14 @@ from .nrk import (
from .ntv import NTVIE
from .nytimes import NYTimesIE
from .nuvid import NuvidIE
-from .oe1 import OE1IE
from .ooyala import OoyalaIE
-from .orf import ORFIE
+from .orf import (
+ ORFTVthekIE,
+ ORFOE1IE,
+ ORFFM4IE,
+)
from .parliamentliveuk import ParliamentLiveUKIE
+from .patreon import PatreonIE
from .pbs import PBSIE
from .photobucket import PhotobucketIE
from .playvid import PlayvidIE
@@ -242,6 +260,7 @@ from .ro220 import Ro220IE
from .rottentomatoes import RottenTomatoesIE
from .roxwel import RoxwelIE
from .rtbf import RTBFIE
+from .rtlnl import RtlXlIE
from .rtlnow import RTLnowIE
from .rts import RTSIE
from .rtve import RTVEALaCartaIE
@@ -258,6 +277,7 @@ from .savefrom import SaveFromIE
from .scivee import SciVeeIE
from .screencast import ScreencastIE
from .servingsys import ServingSysIE
+from .shared import SharedIE
from .sina import SinaIE
from .slideshare import SlideshareIE
from .slutload import SlutloadIE
@@ -320,6 +340,8 @@ from .tumblr import TumblrIE
from .tutv import TutvIE
from .tvigle import TvigleIE
from .tvp import TvpIE
+from .tvplay import TVPlayIE
+from .ubu import UbuIE
from .udemy import (
UdemyIE,
UdemyCourseIE
@@ -341,6 +363,7 @@ from .videofyme import VideofyMeIE
from .videopremium import VideoPremiumIE
from .videott import VideoTtIE
from .videoweed import VideoWeedIE
+from .vidme import VidmeIE
from .vimeo import (
VimeoIE,
VimeoChannelIE,
@@ -374,6 +397,7 @@ from .wistia import WistiaIE
from .worldstarhiphop import WorldStarHipHopIE
from .wrzuta import WrzutaIE
from .xbef import XBefIE
+from .xboxclips import XboxClipsIE
from .xhamster import XHamsterIE
from .xnxx import XNXXIE
from .xvideos import XVideosIE
diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py
new file mode 100644
index 000000000..7d89f44ee
--- /dev/null
+++ b/youtube_dl/extractor/abc.py
@@ -0,0 +1,48 @@
+from __future__ import unicode_literals
+
+import re
+import json
+
+from .common import InfoExtractor
+
+
+class ABCIE(InfoExtractor):
+ IE_NAME = 'abc.net.au'
+ _VALID_URL = r'http://www\.abc\.net\.au/news/[^/]+/[^/]+/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://www.abc.net.au/news/2014-07-25/bringing-asylum-seekers-to-australia-would-give/5624716',
+ 'md5': 'dad6f8ad011a70d9ddf887ce6d5d0742',
+ 'info_dict': {
+ 'id': '5624716',
+ 'ext': 'mp4',
+ 'title': 'Bringing asylum seekers to Australia would give them right to asylum claims: professor',
+ 'description': 'md5:ba36fa5e27e5c9251fd929d339aea4af',
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ webpage = self._download_webpage(url, video_id)
+
+ urls_info_json = self._search_regex(
+ r'inlineVideoData\.push\((.*?)\);', webpage, 'video urls',
+ flags=re.DOTALL)
+ urls_info = json.loads(urls_info_json.replace('\'', '"'))
+ formats = [{
+ 'url': url_info['url'],
+ 'width': int(url_info['width']),
+ 'height': int(url_info['height']),
+ 'tbr': int(url_info['bitrate']),
+ 'filesize': int(url_info['filesize']),
+ } for url_info in urls_info]
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': self._og_search_title(webpage),
+ 'formats': formats,
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ }
diff --git a/youtube_dl/extractor/aparat.py b/youtube_dl/extractor/aparat.py
index 7e93bc4df..748608826 100644
--- a/youtube_dl/extractor/aparat.py
+++ b/youtube_dl/extractor/aparat.py
@@ -1,5 +1,7 @@
#coding: utf-8
+from __future__ import unicode_literals
+
import re
from .common import InfoExtractor
@@ -13,13 +15,14 @@ class AparatIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
_TEST = {
- u'url': u'http://www.aparat.com/v/wP8On',
- u'file': u'wP8On.mp4',
- u'md5': u'6714e0af7e0d875c5a39c4dc4ab46ad1',
- u'info_dict': {
- u"title": u"تیم گلکسی 11 - زومیت",
+ 'url': 'http://www.aparat.com/v/wP8On',
+ 'md5': '6714e0af7e0d875c5a39c4dc4ab46ad1',
+ 'info_dict': {
+ 'id': 'wP8On',
+ 'ext': 'mp4',
+ 'title': 'تیم گلکسی 11 - زومیت',
},
- #u'skip': u'Extremely unreliable',
+ # 'skip': 'Extremely unreliable',
}
def _real_extract(self, url):
@@ -29,8 +32,8 @@ class AparatIE(InfoExtractor):
# Note: There is an easier-to-parse configuration at
# http://www.aparat.com/video/video/config/videohash/%video_id
# but the URL in there does not work
- embed_url = (u'http://www.aparat.com/video/video/embed/videohash/' +
- video_id + u'/vt/frame')
+ embed_url = ('http://www.aparat.com/video/video/embed/videohash/' +
+ video_id + '/vt/frame')
webpage = self._download_webpage(embed_url, video_id)
video_urls = re.findall(r'fileList\[[0-9]+\]\s*=\s*"([^"]+)"', webpage)
diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py
index dc8657b67..4359b88d1 100644
--- a/youtube_dl/extractor/appletrailers.py
+++ b/youtube_dl/extractor/appletrailers.py
@@ -6,6 +6,7 @@ import json
from .common import InfoExtractor
from ..utils import (
compat_urlparse,
+ int_or_none,
)
@@ -110,8 +111,8 @@ class AppleTrailersIE(InfoExtractor):
formats.append({
'url': format_url,
'format': format['type'],
- 'width': format['width'],
- 'height': int(format['height']),
+ 'width': int_or_none(format['width']),
+ 'height': int_or_none(format['height']),
})
self._sort_formats(formats)
diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py
index 30a85c8c1..7f0da8ab6 100644
--- a/youtube_dl/extractor/ard.py
+++ b/youtube_dl/extractor/ard.py
@@ -8,6 +8,8 @@ from ..utils import (
determine_ext,
ExtractorError,
qualities,
+ compat_urllib_parse_urlparse,
+ compat_urllib_parse,
)
@@ -44,8 +46,14 @@ class ARDIE(InfoExtractor):
else:
video_id = m.group('video_id')
+ urlp = compat_urllib_parse_urlparse(url)
+ url = urlp._replace(path=compat_urllib_parse.quote(urlp.path.encode('utf-8'))).geturl()
+
webpage = self._download_webpage(url, video_id)
+ if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage:
+ raise ExtractorError('Video %s is no longer available' % video_id, expected=True)
+
title = self._html_search_regex(
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
r'<meta name="dcterms.title" content="(.*?)"/>',
diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py
index 9591bad8a..d86dbba8e 100644
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -109,15 +109,19 @@ class ArteTVPlus7IE(InfoExtractor):
regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
return any(re.match(r, f['versionCode']) for r in regexes)
# Some formats may not be in the same language as the url
+ # TODO: Might want not to drop videos that does not match requested language
+ # but to process those formats with lower precedence
formats = filter(_match_lang, all_formats)
- formats = list(formats) # in python3 filter returns an iterator
+ formats = list(formats) # in python3 filter returns an iterator
if not formats:
# Some videos are only available in the 'Originalversion'
# they aren't tagged as being in French or German
- if all(f['versionCode'] == 'VO' or f['versionCode'] == 'VA' for f in all_formats):
- formats = all_formats
- else:
- raise ExtractorError(u'The formats list is empty')
+ # Sometimes there are neither videos of requested lang code
+ # nor original version videos available
+ # For such cases we just take all_formats as is
+ formats = all_formats
+ if not formats:
+ raise ExtractorError('The formats list is empty')
if re.match(r'[A-Z]Q', formats[0]['quality']) is not None:
def sort_key(f):
diff --git a/youtube_dl/extractor/blinkx.py b/youtube_dl/extractor/blinkx.py
index 7d558e262..3e461e715 100644
--- a/youtube_dl/extractor/blinkx.py
+++ b/youtube_dl/extractor/blinkx.py
@@ -52,7 +52,7 @@ class BlinkxIE(InfoExtractor):
'height': int(m['h']),
})
elif m['type'] == 'original':
- duration = m['d']
+ duration = float(m['d'])
elif m['type'] == 'youtube':
yt_id = m['link']
self.to_screen('Youtube video detected: %s' % yt_id)
diff --git a/youtube_dl/extractor/bloomberg.py b/youtube_dl/extractor/bloomberg.py
index 25fb79e14..c51a97ce4 100644
--- a/youtube_dl/extractor/bloomberg.py
+++ b/youtube_dl/extractor/bloomberg.py
@@ -10,7 +10,7 @@ class BloombergIE(InfoExtractor):
_TEST = {
'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
- 'md5': '7bf08858ff7c203c870e8a6190e221e5',
+ # The md5 checksum changes
'info_dict': {
'id': 'qurhIVlJSB6hzkVi229d8g',
'ext': 'flv',
@@ -31,8 +31,7 @@ class BloombergIE(InfoExtractor):
return {
'id': name.split('-')[-1],
'title': title,
- 'url': f4m_url,
- 'ext': 'flv',
+ 'formats': self._extract_f4m_formats(f4m_url, name),
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
}
diff --git a/youtube_dl/extractor/br.py b/youtube_dl/extractor/br.py
index f7f2f713a..86f0c2861 100644
--- a/youtube_dl/extractor/br.py
+++ b/youtube_dl/extractor/br.py
@@ -7,6 +7,7 @@ from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
+ parse_duration,
)
@@ -22,8 +23,9 @@ class BRIE(InfoExtractor):
'info_dict': {
'id': '25e279aa-1ffd-40fd-9955-5325bd48a53a',
'ext': 'mp4',
- 'title': 'Am 1. und 2. August in Oberammergau',
- 'description': 'md5:dfd224e5aa6819bc1fcbb7826a932021',
+ 'title': 'Wenn das Traditions-Theater wackelt',
+ 'description': 'Heimatsound-Festival 2014: Wenn das Traditions-Theater wackelt',
+ 'duration': 34,
}
},
{
@@ -34,6 +36,7 @@ class BRIE(InfoExtractor):
'ext': 'mp4',
'title': 'Über den Pass',
'description': 'Die Eroberung der Alpen: Über den Pass',
+ 'duration': 2588,
}
},
{
@@ -44,6 +47,7 @@ class BRIE(InfoExtractor):
'ext': 'aac',
'title': '"Keine neuen Schulden im nächsten Jahr"',
'description': 'Haushaltsentwurf: "Keine neuen Schulden im nächsten Jahr"',
+ 'duration': 64,
}
},
{
@@ -54,6 +58,7 @@ class BRIE(InfoExtractor):
'ext': 'mp4',
'title': 'Umweltbewusster Häuslebauer',
'description': 'Uwe Erdelt: Umweltbewusster Häuslebauer',
+ 'duration': 116,
}
},
{
@@ -64,6 +69,7 @@ class BRIE(InfoExtractor):
'ext': 'mp4',
'title': 'Folge 1 - Metaphysik',
'description': 'Kant für Anfänger: Folge 1 - Metaphysik',
+ 'duration': 893,
'uploader': 'Eva Maria Steimle',
'upload_date': '20140117',
}
@@ -84,6 +90,7 @@ class BRIE(InfoExtractor):
media = {
'id': xml_media.get('externalId'),
'title': xml_media.find('title').text,
+ 'duration': parse_duration(xml_media.find('duration').text),
'formats': self._extract_formats(xml_media.find('assets')),
'thumbnails': self._extract_thumbnails(xml_media.find('teaserImage/variants')),
'description': ' '.join(xml_media.find('shareTitle').text.splitlines()),
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 88f12797c..2e6eeac08 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -18,6 +18,7 @@ from ..utils import (
clean_html,
compiled_regex_type,
ExtractorError,
+ int_or_none,
RegexNotFoundError,
sanitize_filename,
unescapeHTML,
@@ -373,7 +374,8 @@ class InfoExtractor(object):
else:
for p in pattern:
mobj = re.search(p, string, flags)
- if mobj: break
+ if mobj:
+ break
if os.name != 'nt' and sys.stderr.isatty():
_name = u'\033[0;34m%s\033[0m' % name
@@ -461,8 +463,9 @@ class InfoExtractor(object):
return self._og_search_property('title', html, **kargs)
def _og_search_video_url(self, html, name='video url', secure=True, **kargs):
- regexes = self._og_regexes('video')
- if secure: regexes = self._og_regexes('video:secure_url') + regexes
+ regexes = self._og_regexes('video') + self._og_regexes('video:url')
+ if secure:
+ regexes = self._og_regexes('video:secure_url') + regexes
return self._html_search_regex(regexes, html, name, **kargs)
def _og_search_url(self, html, **kargs):
@@ -589,6 +592,24 @@ class InfoExtractor(object):
self.to_screen(msg)
time.sleep(timeout)
+ def _extract_f4m_formats(self, manifest_url, video_id):
+ manifest = self._download_xml(
+ manifest_url, video_id, 'Downloading f4m manifest',
+ 'Unable to download f4m manifest')
+
+ formats = []
+ for media_el in manifest.findall('{http://ns.adobe.com/f4m/1.0}media'):
+ formats.append({
+ 'url': manifest_url,
+ 'ext': 'flv',
+ 'tbr': int_or_none(media_el.attrib.get('bitrate')),
+ 'width': int_or_none(media_el.attrib.get('width')),
+ 'height': int_or_none(media_el.attrib.get('height')),
+ })
+ self._sort_formats(formats)
+
+ return formats
+
class SearchInfoExtractor(InfoExtractor):
"""
diff --git a/youtube_dl/extractor/dfb.py b/youtube_dl/extractor/dfb.py
index cb8e06822..8049779b0 100644
--- a/youtube_dl/extractor/dfb.py
+++ b/youtube_dl/extractor/dfb.py
@@ -30,7 +30,7 @@ class DFBIE(InfoExtractor):
video_id)
video_info = player_info.find('video')
- f4m_info = self._download_xml(video_info.find('url').text, video_id)
+ f4m_info = self._download_xml(self._proto_relative_url(video_info.find('url').text.strip()), video_id)
token_el = f4m_info.find('token')
manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0'
diff --git a/youtube_dl/extractor/dump.py b/youtube_dl/extractor/dump.py
new file mode 100644
index 000000000..6b651778a
--- /dev/null
+++ b/youtube_dl/extractor/dump.py
@@ -0,0 +1,39 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class DumpIE(InfoExtractor):
+ _VALID_URL = r'^https?://(?:www\.)?dump\.com/(?P<id>[a-zA-Z0-9]+)/'
+
+ _TEST = {
+ 'url': 'http://www.dump.com/oneus/',
+ 'md5': 'ad71704d1e67dfd9e81e3e8b42d69d99',
+ 'info_dict': {
+ 'id': 'oneus',
+ 'ext': 'flv',
+ 'title': "He's one of us.",
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ }
+
+ def _real_extract(self, url):
+ m = re.match(self._VALID_URL, url)
+ video_id = m.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+ video_url = self._search_regex(
+ r's1.addVariable\("file",\s*"([^"]+)"', webpage, 'video URL')
+
+ thumb = self._og_search_thumbnail(webpage)
+ title = self._search_regex(r'<b>([^"]+)</b>', webpage, 'title')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': video_url,
+ 'thumbnail': thumb,
+ }
diff --git a/youtube_dl/extractor/ellentv.py b/youtube_dl/extractor/ellentv.py
new file mode 100644
index 000000000..3e7923648
--- /dev/null
+++ b/youtube_dl/extractor/ellentv.py
@@ -0,0 +1,79 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ parse_iso8601,
+)
+
+
+class EllenTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?ellentv\.com/videos/(?P<id>[a-z0-9_-]+)'
+ _TEST = {
+ 'url': 'http://www.ellentv.com/videos/0-7jqrsr18/',
+ 'md5': 'e4af06f3bf0d5f471921a18db5764642',
+ 'info_dict': {
+ 'id': '0-7jqrsr18',
+ 'ext': 'mp4',
+ 'title': 'What\'s Wrong with These Photos? A Whole Lot',
+ 'timestamp': 1406876400,
+ 'upload_date': '20140801',
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+ timestamp = parse_iso8601(self._search_regex(
+ r'<span class="publish-date"><time datetime="([^"]+)">',
+ webpage, 'timestamp'))
+
+ return {
+ 'id': video_id,
+ 'title': self._og_search_title(webpage),
+ 'url': self._html_search_meta('VideoURL', webpage, 'url'),
+ 'timestamp': timestamp,
+ }
+
+
+class EllenTVClipsIE(InfoExtractor):
+ IE_NAME = 'EllenTV:clips'
+ _VALID_URL = r'https?://(?:www\.)?ellentv\.com/episodes/(?P<id>[a-z0-9_-]+)'
+ _TEST = {
+ 'url': 'http://www.ellentv.com/episodes/meryl-streep-vanessa-hudgens/',
+ 'info_dict': {
+ 'id': 'meryl-streep-vanessa-hudgens',
+ 'title': 'Meryl Streep, Vanessa Hudgens',
+ },
+ 'playlist_mincount': 9,
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ playlist_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, playlist_id)
+ playlist = self._extract_playlist(webpage)
+
+ return {
+ '_type': 'playlist',
+ 'id': playlist_id,
+ 'title': self._og_search_title(webpage),
+ 'entries': self._extract_entries(playlist)
+ }
+
+ def _extract_playlist(self, webpage):
+ json_string = self._search_regex(r'playerView.addClips\(\[\{(.*?)\}\]\);', webpage, 'json')
+ try:
+ return json.loads("[{" + json_string + "}]")
+ except ValueError as ve:
+ raise ExtractorError('Failed to download JSON', cause=ve)
+
+ def _extract_entries(self, playlist):
+ return [self.url_result(item['url'], 'EllenTV') for item in playlist]
diff --git a/youtube_dl/extractor/escapist.py b/youtube_dl/extractor/escapist.py
index 272dfe1f6..476fc22b9 100644
--- a/youtube_dl/extractor/escapist.py
+++ b/youtube_dl/extractor/escapist.py
@@ -36,7 +36,7 @@ class EscapistIE(InfoExtractor):
r'<meta name="description" content="([^"]*)"',
webpage, 'description', fatal=False)
- playerUrl = self._og_search_video_url(webpage, name=u'player URL')
+ playerUrl = self._og_search_video_url(webpage, name='player URL')
title = self._html_search_regex(
r'<meta name="title" content="([^"]*)"',
diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py
index f0cd8f156..f7cf700b5 100644
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@@ -20,7 +20,7 @@ from ..utils import (
class FacebookIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://(?:\w+\.)?facebook\.com/
- (?:[^#?]*\#!/)?
+ (?:[^#]*?\#!/)?
(?:video/video\.php|photo\.php|video/embed)\?(?:.*?)
(?:v|video_id)=(?P<id>[0-9]+)
(?:.*)'''
diff --git a/youtube_dl/extractor/firedrive.py b/youtube_dl/extractor/firedrive.py
index 6d73c8a4a..af439ccfe 100644
--- a/youtube_dl/extractor/firedrive.py
+++ b/youtube_dl/extractor/firedrive.py
@@ -42,7 +42,6 @@ class FiredriveIE(InfoExtractor):
fields = dict(re.findall(r'''(?x)<input\s+
type="hidden"\s+
name="([^"]+)"\s+
- (?:id="[^"]+"\s+)?
value="([^"]*)"
''', webpage))
@@ -66,7 +65,7 @@ class FiredriveIE(InfoExtractor):
ext = self._search_regex(r'type:\s?\'([^\']+)\',',
webpage, 'extension', fatal=False)
video_url = self._search_regex(
- r'file:\s?\'(http[^\']+)\',', webpage, 'file url')
+ r'file:\s?loadURL\(\'(http[^\']+)\'\),', webpage, 'file url')
formats = [{
'format_id': 'sd',
diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py
index 1fbe6d175..1b0e8e5d5 100644
--- a/youtube_dl/extractor/francetv.py
+++ b/youtube_dl/extractor/francetv.py
@@ -19,17 +19,35 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
+ video_id, video_id, 'Downloading XML config')
manifest_url = info.find('videos/video/url').text
- video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8')
- video_url = video_url.replace('/z/', '/i/')
+ manifest_url = manifest_url.replace('/z/', '/i/')
+
+ if manifest_url.startswith('rtmp'):
+ formats = [{'url': manifest_url, 'ext': 'flv'}]
+ else:
+ formats = []
+ available_formats = self._search_regex(r'/[^,]*,(.*?),k\.mp4', manifest_url, 'available formats')
+ for index, format_descr in enumerate(available_formats.split(',')):
+ format_info = {
+ 'url': manifest_url.replace('manifest.f4m', 'index_%d_av.m3u8' % index),
+ 'ext': 'mp4',
+ }
+ m_resolution = re.search(r'(?P<width>\d+)x(?P<height>\d+)', format_descr)
+ if m_resolution is not None:
+ format_info.update({
+ 'width': int(m_resolution.group('width')),
+ 'height': int(m_resolution.group('height')),
+ })
+ formats.append(format_info)
+
thumbnail_path = info.find('image').text
- return {'id': video_id,
- 'ext': 'flv' if video_url.startswith('rtmp') else 'mp4',
- 'url': video_url,
- 'title': info.find('titre').text,
- 'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path),
- 'description': info.find('synopsis').text,
- }
+ return {
+ 'id': video_id,
+ 'title': info.find('titre').text,
+ 'formats': formats,
+ 'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path),
+ 'description': info.find('synopsis').text,
+ }
class PluzzIE(FranceTVBaseInfoExtractor):
diff --git a/youtube_dl/extractor/gamestar.py b/youtube_dl/extractor/gamestar.py
new file mode 100644
index 000000000..50f8fc7e7
--- /dev/null
+++ b/youtube_dl/extractor/gamestar.py
@@ -0,0 +1,74 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_duration,
+ str_to_int,
+ unified_strdate,
+)
+
+
+class GameStarIE(InfoExtractor):
+ _VALID_URL = r'http://www\.gamestar\.de/videos/.*,(?P<id>[0-9]+)\.html'
+ _TEST = {
+ 'url': 'http://www.gamestar.de/videos/trailer,3/hobbit-3-die-schlacht-der-fuenf-heere,76110.html',
+ 'md5': '96974ecbb7fd8d0d20fca5a00810cea7',
+ 'info_dict': {
+ 'id': '76110',
+ 'ext': 'mp4',
+ 'title': 'Hobbit 3: Die Schlacht der Fünf Heere - Teaser-Trailer zum dritten Teil',
+ 'description': 'Der Teaser-Trailer zu Hobbit 3: Die Schlacht der Fünf Heere zeigt einige Szenen aus dem dritten Teil der Saga und kündigt den vollständigen Trailer an.',
+ 'thumbnail': 'http://images.gamestar.de/images/idgwpgsgp/bdb/2494525/600x.jpg',
+ 'upload_date': '20140728',
+ 'duration': 17
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+
+ og_title = self._og_search_title(webpage)
+ title = og_title.replace(' - Video bei GameStar.de', '').strip()
+
+ url = 'http://gamestar.de/_misc/videos/portal/getVideoUrl.cfm?premium=0&videoId=' + video_id
+
+ description = self._og_search_description(webpage).strip()
+
+ thumbnail = self._proto_relative_url(
+ self._og_search_thumbnail(webpage), scheme='http:')
+
+ upload_date = unified_strdate(self._html_search_regex(
+ r'<span style="float:left;font-size:11px;">Datum: ([0-9]+\.[0-9]+\.[0-9]+)&nbsp;&nbsp;',
+ webpage, 'upload_date', fatal=False))
+
+ duration = parse_duration(self._html_search_regex(
+ r'&nbsp;&nbsp;Länge: ([0-9]+:[0-9]+)</span>', webpage, 'duration',
+ fatal=False))
+
+ view_count = str_to_int(self._html_search_regex(
+ r'&nbsp;&nbsp;Zuschauer: ([0-9\.]+)&nbsp;&nbsp;', webpage,
+ 'view_count', fatal=False))
+
+ comment_count = int_or_none(self._html_search_regex(
+ r'>Kommentieren \(([0-9]+)\)</a>', webpage, 'comment_count',
+ fatal=False))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': url,
+ 'ext': 'mp4',
+ 'thumbnail': thumbnail,
+ 'description': description,
+ 'upload_date': upload_date,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'comment_count': comment_count
+ }
diff --git a/youtube_dl/extractor/gdcvault.py b/youtube_dl/extractor/gdcvault.py
index 89d5994ee..de14ae1fb 100644
--- a/youtube_dl/extractor/gdcvault.py
+++ b/youtube_dl/extractor/gdcvault.py
@@ -8,6 +8,7 @@ from ..utils import (
compat_urllib_request,
)
+
class GDCVaultIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)/(?P<name>(\w|-)+)'
_TESTS = [
@@ -31,6 +32,15 @@ class GDCVaultIE(InfoExtractor):
'skip_download': True, # Requires rtmpdump
}
},
+ {
+ 'url': 'http://www.gdcvault.com/play/1015301/Thexder-Meets-Windows-95-or',
+ 'md5': 'a5eb77996ef82118afbbe8e48731b98e',
+ 'info_dict': {
+ 'id': '1015301',
+ 'ext': 'flv',
+ 'title': 'Thexder Meets Windows 95, or Writing Great Games in the Windows 95 Environment',
+ }
+ }
]
def _parse_mp4(self, xml_description):
@@ -103,18 +113,40 @@ class GDCVaultIE(InfoExtractor):
webpage_url = 'http://www.gdcvault.com/play/' + video_id
start_page = self._download_webpage(webpage_url, video_id)
- xml_root = self._html_search_regex(r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>', start_page, 'xml root', None, False)
+ direct_url = self._search_regex(
+ r's1\.addVariable\("file",\s*encodeURIComponent\("(/[^"]+)"\)\);',
+ start_page, 'url', default=None)
+ if direct_url:
+ video_url = 'http://www.gdcvault.com/' + direct_url
+ title = self._html_search_regex(
+ r'<td><strong>Session Name</strong></td>\s*<td>(.*?)</td>',
+ start_page, 'title')
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': 'flv',
+ 'title': title,
+ }
+ xml_root = self._html_search_regex(
+ r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>',
+ start_page, 'xml root', default=None)
if xml_root is None:
# Probably need to authenticate
- start_page = self._login(webpage_url, video_id)
- if start_page is None:
+ login_res = self._login(webpage_url, video_id)
+ if login_res is None:
self.report_warning('Could not login.')
else:
+ start_page = login_res
# Grab the url from the authenticated page
- xml_root = self._html_search_regex(r'<iframe src="(?P<xml_root>.*?)player.html.*?".*?</iframe>', start_page, 'xml root')
+ xml_root = self._html_search_regex(
+ r'<iframe src="(.*?)player.html.*?".*?</iframe>',
+ start_page, 'xml root')
- xml_name = self._html_search_regex(r'<iframe src=".*?\?xml=(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename', None, False)
+ xml_name = self._html_search_regex(
+ r'<iframe src=".*?\?xml=(.+?\.xml).*?".*?</iframe>',
+ start_page, 'xml filename', default=None)
if xml_name is None:
# Fallback to the older format
xml_name = self._html_search_regex(r'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename')
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 9db27f9aa..8e915735e 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -383,13 +383,13 @@ class GenericIE(InfoExtractor):
if not parsed_url.scheme:
default_search = self._downloader.params.get('default_search')
if default_search is None:
- default_search = 'error'
+ default_search = 'fixup_error'
- if default_search in ('auto', 'auto_warning'):
+ if default_search in ('auto', 'auto_warning', 'fixup_error'):
if '/' in url:
self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
return self.url_result('http://' + url)
- else:
+ elif default_search != 'fixup_error':
if default_search == 'auto_warning':
if re.match(r'^(?:url|URL)$', url):
raise ExtractorError(
@@ -399,7 +399,8 @@ class GenericIE(InfoExtractor):
self._downloader.report_warning(
'Falling back to youtube search for %s . Set --default-search "auto" to suppress this warning.' % url)
return self.url_result('ytsearch:' + url)
- elif default_search == 'error':
+
+ if default_search in ('error', 'fixup_error'):
raise ExtractorError(
('%r is not a valid URL. '
'Set --default-search "ytsearch" (or run youtube-dl "ytsearch:%s" ) to search YouTube'
@@ -705,6 +706,13 @@ class GenericIE(InfoExtractor):
url = unescapeHTML(mobj.group('url'))
return self.url_result(url, ie='MTVServicesEmbedded')
+ # Look for embedded yahoo player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
+ webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'Yahoo')
+
# Start with something easy: JW Player in SWFObject
found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
if not found:
diff --git a/youtube_dl/extractor/godtube.py b/youtube_dl/extractor/godtube.py
new file mode 100644
index 000000000..73bd6d890
--- /dev/null
+++ b/youtube_dl/extractor/godtube.py
@@ -0,0 +1,58 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+ parse_iso8601,
+)
+
+
+class GodTubeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?godtube\.com/watch/\?v=(?P<id>[\da-zA-Z]+)'
+ _TESTS = [
+ {
+ 'url': 'https://www.godtube.com/watch/?v=0C0CNNNU',
+ 'md5': '77108c1e4ab58f48031101a1a2119789',
+ 'info_dict': {
+ 'id': '0C0CNNNU',
+ 'ext': 'mp4',
+ 'title': 'Woman at the well.',
+ 'duration': 159,
+ 'timestamp': 1205712000,
+ 'uploader': 'beverlybmusic',
+ 'upload_date': '20080317',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ },
+ ]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ config = self._download_xml(
+ 'http://www.godtube.com/resource/mediaplayer/%s.xml' % video_id.lower(),
+ video_id, 'Downloading player config XML')
+
+ video_url = config.find('.//file').text
+ uploader = config.find('.//author').text
+ timestamp = parse_iso8601(config.find('.//date').text)
+ duration = parse_duration(config.find('.//duration').text)
+ thumbnail = config.find('.//image').text
+
+ media = self._download_xml(
+ 'http://www.godtube.com/media/xml/?v=%s' % video_id, video_id, 'Downloading media XML')
+
+ title = media.find('.//title').text
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'uploader': uploader,
+ 'duration': duration,
+ }
diff --git a/youtube_dl/extractor/howstuffworks.py b/youtube_dl/extractor/howstuffworks.py
new file mode 100644
index 000000000..68684b997
--- /dev/null
+++ b/youtube_dl/extractor/howstuffworks.py
@@ -0,0 +1,134 @@
+from __future__ import unicode_literals
+
+import re
+import json
+import random
+import string
+
+from .common import InfoExtractor
+from ..utils import find_xpath_attr
+
+
+class HowStuffWorksIE(InfoExtractor):
+ _VALID_URL = r'https?://[\da-z-]+\.howstuffworks\.com/(?:[^/]+/)*\d+-(?P<id>.+?)-video\.htm'
+ _TESTS = [
+ {
+ 'url': 'http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm',
+ 'info_dict': {
+ 'id': '450221',
+ 'display_id': 'cool-jobs-iditarod-musher',
+ 'ext': 'flv',
+ 'title': 'Cool Jobs - Iditarod Musher',
+ 'description': 'md5:82bb58438a88027b8186a1fccb365f90',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ # md5 is not consistent
+ 'skip_download': True
+ }
+ },
+ {
+ 'url': 'http://adventure.howstuffworks.com/39516-deadliest-catch-jakes-farewell-pots-video.htm',
+ 'info_dict': {
+ 'id': '553470',
+ 'display_id': 'deadliest-catch-jakes-farewell-pots',
+ 'ext': 'mp4',
+ 'title': 'Deadliest Catch: Jake\'s Farewell Pots',
+ 'description': 'md5:9632c346d5e43ee238028c9cefd8dbbc',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ # md5 is not consistent
+ 'skip_download': True
+ }
+ },
+ {
+ 'url': 'http://entertainment.howstuffworks.com/arts/2706-sword-swallowing-1-by-dan-meyer-video.htm',
+ 'info_dict': {
+ 'id': '440011',
+ 'display_id': 'sword-swallowing-1-by-dan-meyer',
+ 'ext': 'flv',
+ 'title': 'Sword Swallowing #1 by Dan Meyer',
+ 'description': 'md5:b2409e88172913e2e7d3d1159b0ef735',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ # md5 is not consistent
+ 'skip_download': True
+ }
+ },
+ ]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ display_id = mobj.group('id')
+ webpage = self._download_webpage(url, display_id)
+
+ content_id = self._search_regex(r'var siteSectionId="(\d+)";', webpage, 'content id')
+
+ mp4 = self._search_regex(
+ r'''(?xs)var\s+clip\s*=\s*{\s*
+ .+?\s*
+ content_id\s*:\s*%s\s*,\s*
+ .+?\s*
+ mp4\s*:\s*\[(.*?),?\]\s*
+ };\s*
+ videoData\.push\(clip\);''' % content_id,
+ webpage, 'mp4', fatal=False, default=None)
+
+ smil = self._download_xml(
+ 'http://services.media.howstuffworks.com/videos/%s/smil-service.smil' % content_id,
+ content_id, 'Downloading video SMIL')
+
+ http_base = find_xpath_attr(
+ smil,
+ './{0}head/{0}meta'.format('{http://www.w3.org/2001/SMIL20/Language}'),
+ 'name',
+ 'httpBase').get('content')
+
+ def random_string(str_len=0):
+ return ''.join([random.choice(string.ascii_uppercase) for _ in range(str_len)])
+
+ URL_SUFFIX = '?v=2.11.3&fp=LNX 11,2,202,356&r=%s&g=%s' % (random_string(5), random_string(12))
+
+ formats = []
+
+ if mp4:
+ for video in json.loads('[%s]' % mp4):
+ bitrate = video['bitrate']
+ fmt = {
+ 'url': video['src'].replace('http://pmd.video.howstuffworks.com', http_base) + URL_SUFFIX,
+ 'format_id': bitrate,
+ }
+ m = re.search(r'(?P<vbr>\d+)[Kk]', bitrate)
+ if m:
+ fmt['vbr'] = int(m.group('vbr'))
+ formats.append(fmt)
+ else:
+ for video in smil.findall(
+ './/{0}body/{0}switch/{0}video'.format('{http://www.w3.org/2001/SMIL20/Language}')):
+ vbr = int(video.attrib['system-bitrate']) / 1000
+ formats.append({
+ 'url': '%s/%s%s' % (http_base, video.attrib['src'], URL_SUFFIX),
+ 'format_id': '%dk' % vbr,
+ 'vbr': vbr,
+ })
+
+ self._sort_formats(formats)
+
+ title = self._og_search_title(webpage)
+ TITLE_SUFFIX = ' : HowStuffWorks'
+ if title.endswith(TITLE_SUFFIX):
+ title = title[:-len(TITLE_SUFFIX)]
+
+ description = self._og_search_description(webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ return {
+ 'id': content_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/izlesene.py b/youtube_dl/extractor/izlesene.py
new file mode 100644
index 000000000..79e8430b5
--- /dev/null
+++ b/youtube_dl/extractor/izlesene.py
@@ -0,0 +1,97 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ get_element_by_id,
+ parse_iso8601,
+ determine_ext,
+ int_or_none,
+ str_to_int,
+)
+
+
+class IzleseneIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www|m)\.)?izlesene\.com/(?:video|embedplayer)/(?:[^/]+/)?(?P<id>[0-9]+)'
+ _STREAM_URL = 'http://panel.izlesene.com/api/streamurl/{id:}/{format:}'
+ _TEST = {
+ 'url': 'http://www.izlesene.com/video/sevincten-cildirtan-dogum-gunu-hediyesi/7599694',
+ 'md5': '4384f9f0ea65086734b881085ee05ac2',
+ 'info_dict': {
+ 'id': '7599694',
+ 'ext': 'mp4',
+ 'title': 'Sevinçten Çıldırtan Doğum Günü Hediyesi',
+ 'description': 'Annesi oğluna doğum günü hediyesi olarak minecraft cd si alıyor, ve çocuk hunharca seviniyor',
+ 'thumbnail': 're:^http://.*\.jpg',
+ 'uploader_id': 'pelikzzle',
+ 'timestamp': 1404298698,
+ 'upload_date': '20140702',
+ 'duration': 95.395,
+ 'age_limit': 0,
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ url = 'http://www.izlesene.com/video/%s' % video_id
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._og_search_title(webpage)
+ description = self._og_search_description(webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ uploader = self._html_search_regex(
+ r"adduserUsername\s*=\s*'([^']+)';", webpage, 'uploader', fatal=False, default='')
+ timestamp = parse_iso8601(self._html_search_meta(
+ 'uploadDate', webpage, 'upload date', fatal=False))
+
+ duration = int_or_none(self._html_search_regex(
+ r'"videoduration"\s*:\s*"([^"]+)"', webpage, 'duration', fatal=False))
+ if duration:
+ duration /= 1000.0
+
+ view_count = str_to_int(get_element_by_id('videoViewCount', webpage))
+ comment_count = self._html_search_regex(
+ r'comment_count\s*=\s*\'([^\']+)\';', webpage, 'uploader', fatal=False)
+
+ family_friendly = self._html_search_meta(
+ 'isFamilyFriendly', webpage, 'age limit', fatal=False)
+
+ content_url = self._html_search_meta(
+ 'contentURL', webpage, 'content URL', fatal=False)
+ ext = determine_ext(content_url, 'mp4')
+
+ # Might be empty for some videos.
+ qualities = self._html_search_regex(
+ r'"quality"\s*:\s*"([^"]+)"', webpage, 'qualities', fatal=False, default='')
+
+ formats = []
+ for quality in qualities.split('|'):
+ json = self._download_json(
+ self._STREAM_URL.format(id=video_id, format=quality), video_id,
+ note='Getting video URL for "%s" quality' % quality,
+ errnote='Failed to get video URL for "%s" quality' % quality
+ )
+ formats.append({
+ 'url': json.get('streamurl'),
+ 'ext': ext,
+ 'format_id': '%sp' % quality if quality else 'sd',
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'uploader_id': uploader,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'view_count': int_or_none(view_count),
+ 'comment_count': int_or_none(comment_count),
+ 'age_limit': 18 if family_friendly == 'False' else 0,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/jove.py b/youtube_dl/extractor/jove.py
new file mode 100644
index 000000000..cf73cd753
--- /dev/null
+++ b/youtube_dl/extractor/jove.py
@@ -0,0 +1,80 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ unified_strdate
+)
+
+
+class JoveIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?jove\.com/video/(?P<id>[0-9]+)'
+ _CHAPTERS_URL = 'http://www.jove.com/video-chapters?videoid={video_id:}'
+ _TESTS = [
+ {
+ 'url': 'http://www.jove.com/video/2744/electrode-positioning-montage-transcranial-direct-current',
+ 'md5': '93723888d82dbd6ba8b3d7d0cd65dd2b',
+ 'info_dict': {
+ 'id': '2744',
+ 'ext': 'mp4',
+ 'title': 'Electrode Positioning and Montage in Transcranial Direct Current Stimulation',
+ 'description': 'md5:015dd4509649c0908bc27f049e0262c6',
+ 'thumbnail': 're:^https?://.*\.png$',
+ 'upload_date': '20110523',
+ }
+ },
+ {
+ 'url': 'http://www.jove.com/video/51796/culturing-caenorhabditis-elegans-axenic-liquid-media-creation',
+ 'md5': '914aeb356f416811d911996434811beb',
+ 'info_dict': {
+ 'id': '51796',
+ 'ext': 'mp4',
+ 'title': 'Culturing Caenorhabditis elegans in Axenic Liquid Media and Creation of Transgenic Worms by Microparticle Bombardment',
+ 'description': 'md5:35ff029261900583970c4023b70f1dc9',
+ 'thumbnail': 're:^https?://.*\.png$',
+ 'upload_date': '20140802',
+ }
+ },
+
+ ]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+
+ chapters_id = self._html_search_regex(
+ r'/video-chapters\?videoid=([0-9]+)', webpage, 'chapters id')
+
+ chapters_xml = self._download_xml(
+ self._CHAPTERS_URL.format(video_id=chapters_id),
+ video_id, note='Downloading chapters XML',
+ errnote='Failed to download chapters XML')
+
+ video_url = chapters_xml.attrib.get('video')
+ if not video_url:
+ raise ExtractorError('Failed to get the video URL')
+
+ title = self._html_search_meta('citation_title', webpage, 'title')
+ thumbnail = self._og_search_thumbnail(webpage)
+ description = self._html_search_regex(
+ r'<div id="section_body_summary"><p class="jove_content">(.+?)</p>',
+ webpage, 'description', fatal=False)
+ publish_date = unified_strdate(self._html_search_meta(
+ 'citation_publication_date', webpage, 'publish date', fatal=False))
+ comment_count = self._html_search_regex(
+ r'<meta name="num_comments" content="(\d+) Comments?"',
+ webpage, 'comment count', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': video_url,
+ 'thumbnail': thumbnail,
+ 'description': description,
+ 'upload_date': publish_date,
+ 'comment_count': comment_count,
+ }
diff --git a/youtube_dl/extractor/justintv.py b/youtube_dl/extractor/justintv.py
index 7083db12e..27017e89f 100644
--- a/youtube_dl/extractor/justintv.py
+++ b/youtube_dl/extractor/justintv.py
@@ -1,5 +1,6 @@
from __future__ import unicode_literals
+import itertools
import json
import os
import re
@@ -43,10 +44,11 @@ class JustinTVIE(InfoExtractor):
}
# Return count of items, list of *valid* items
- def _parse_page(self, url, video_id):
- info_json = self._download_webpage(url, video_id,
- 'Downloading video info JSON',
- 'unable to download video info JSON')
+ def _parse_page(self, url, video_id, counter):
+ info_json = self._download_webpage(
+ url, video_id,
+ 'Downloading video info JSON on page %d' % counter,
+ 'Unable to download video info JSON %d' % counter)
response = json.loads(info_json)
if type(response) != list:
@@ -138,11 +140,10 @@ class JustinTVIE(InfoExtractor):
entries = []
offset = 0
limit = self._JUSTIN_PAGE_LIMIT
- while True:
- if paged:
- self.report_download_page(video_id, offset)
+ for counter in itertools.count(1):
page_url = api + ('?offset=%d&limit=%d' % (offset, limit))
- page_count, page_info = self._parse_page(page_url, video_id)
+ page_count, page_info = self._parse_page(
+ page_url, video_id, counter)
entries.extend(page_info)
if not paged or page_count != limit:
break
diff --git a/youtube_dl/extractor/krasview.py b/youtube_dl/extractor/krasview.py
new file mode 100644
index 000000000..6f3d2345b
--- /dev/null
+++ b/youtube_dl/extractor/krasview.py
@@ -0,0 +1,59 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ unescapeHTML,
+)
+
+
+class KrasViewIE(InfoExtractor):
+ IE_DESC = 'Красвью'
+ _VALID_URL = r'https?://krasview\.ru/video/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://krasview.ru/video/512228',
+ 'md5': '3b91003cf85fc5db277870c8ebd98eae',
+ 'info_dict': {
+ 'id': '512228',
+ 'ext': 'mp4',
+ 'title': 'Снег, лёд, заносы',
+ 'description': 'Снято в городе Нягань, в Ханты-Мансийском автономном округе.',
+ 'duration': 27,
+ 'thumbnail': 're:^https?://.*\.jpg',
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+
+ flashvars = json.loads(self._search_regex(
+ r'flashvars\s*:\s*({.+?})\s*}\);', webpage, 'flashvars'))
+
+ video_url = flashvars['url']
+ title = unescapeHTML(flashvars['title'])
+ description = unescapeHTML(flashvars.get('subtitle') or self._og_search_description(webpage, default=None))
+ thumbnail = flashvars['image']
+ duration = int(flashvars['duration'])
+ filesize = int(flashvars['size'])
+ width = int_or_none(self._og_search_property('video:width', webpage, 'video width'))
+ height = int_or_none(self._og_search_property('video:height', webpage, 'video height'))
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'filesize': filesize,
+ 'width': width,
+ 'height': height,
+ }
diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py
index 1ea1bbab4..281a0ce40 100644
--- a/youtube_dl/extractor/livestream.py
+++ b/youtube_dl/extractor/livestream.py
@@ -5,11 +5,14 @@ import json
from .common import InfoExtractor
from ..utils import (
+ compat_str,
compat_urllib_parse_urlparse,
compat_urlparse,
- xpath_with_ns,
- compat_str,
+ ExtractorError,
+ find_xpath_attr,
+ int_or_none,
orderedSet,
+ xpath_with_ns,
)
@@ -24,20 +27,82 @@ class LivestreamIE(InfoExtractor):
'ext': 'mp4',
'title': 'Live from Webster Hall NYC',
'upload_date': '20121012',
+ 'like_count': int,
+ 'view_count': int,
+ 'thumbnail': 're:^http://.*\.jpg$'
}
}
+ def _parse_smil(self, video_id, smil_url):
+ formats = []
+ _SWITCH_XPATH = (
+ './/{http://www.w3.org/2001/SMIL20/Language}body/'
+ '{http://www.w3.org/2001/SMIL20/Language}switch')
+ smil_doc = self._download_xml(
+ smil_url, video_id,
+ note='Downloading SMIL information',
+ errnote='Unable to download SMIL information',
+ fatal=False)
+ if smil_doc is False: # Download failed
+ return formats
+ title_node = find_xpath_attr(
+ smil_doc, './/{http://www.w3.org/2001/SMIL20/Language}meta',
+ 'name', 'title')
+ if title_node is None:
+ self.report_warning('Cannot find SMIL id')
+ switch_node = smil_doc.find(_SWITCH_XPATH)
+ else:
+ title_id = title_node.attrib['content']
+ switch_node = find_xpath_attr(
+ smil_doc, _SWITCH_XPATH, 'id', title_id)
+ if switch_node is None:
+ raise ExtractorError('Cannot find switch node')
+ video_nodes = switch_node.findall(
+ '{http://www.w3.org/2001/SMIL20/Language}video')
+
+ for vn in video_nodes:
+ tbr = int_or_none(vn.attrib.get('system-bitrate'))
+ furl = (
+ 'http://livestream-f.akamaihd.net/%s?v=3.0.3&fp=WIN%%2014,0,0,145' %
+ (vn.attrib['src']))
+ if 'clipBegin' in vn.attrib:
+ furl += '&ssek=' + vn.attrib['clipBegin']
+ formats.append({
+ 'url': furl,
+ 'format_id': 'smil_%d' % tbr,
+ 'ext': 'flv',
+ 'tbr': tbr,
+ 'preference': -1000,
+ })
+ return formats
+
def _extract_video_info(self, video_data):
- video_url = (
- video_data.get('progressive_url_hd') or
- video_data.get('progressive_url')
+ video_id = compat_str(video_data['id'])
+
+ FORMAT_KEYS = (
+ ('sd', 'progressive_url'),
+ ('hd', 'progressive_url_hd'),
)
+ formats = [{
+ 'format_id': format_id,
+ 'url': video_data[key],
+ 'quality': i + 1,
+ } for i, (format_id, key) in enumerate(FORMAT_KEYS)
+ if video_data.get(key)]
+
+ smil_url = video_data.get('smil_url')
+ if smil_url:
+ formats.extend(self._parse_smil(video_id, smil_url))
+ self._sort_formats(formats)
+
return {
- 'id': compat_str(video_data['id']),
- 'url': video_url,
+ 'id': video_id,
+ 'formats': formats,
'title': video_data['caption'],
- 'thumbnail': video_data['thumbnail_url'],
+ 'thumbnail': video_data.get('thumbnail_url'),
'upload_date': video_data['updated_at'].replace('-', '')[:8],
+ 'like_count': video_data.get('likes', {}).get('total'),
+ 'view_count': video_data.get('views'),
}
def _real_extract(self, url):
@@ -46,17 +111,28 @@ class LivestreamIE(InfoExtractor):
event_name = mobj.group('event_name')
webpage = self._download_webpage(url, video_id or event_name)
- if video_id is None:
- # This is an event page:
+ og_video = self._og_search_video_url(webpage, 'player url', fatal=False, default=None)
+ if og_video is None:
config_json = self._search_regex(
r'window.config = ({.*?});', webpage, 'window config')
info = json.loads(config_json)['event']
+
+ def is_relevant(vdata, vid):
+ result = vdata['type'] == 'video'
+ if video_id is not None:
+ result = result and compat_str(vdata['data']['id']) == vid
+ return result
+
videos = [self._extract_video_info(video_data['data'])
- for video_data in info['feed']['data']
- if video_data['type'] == 'video']
- return self.playlist_result(videos, info['id'], info['full_name'])
+ for video_data in info['feed']['data']
+ if is_relevant(video_data, video_id)]
+ if video_id is None:
+ # This is an event page:
+ return self.playlist_result(videos, info['id'], info['full_name'])
+ else:
+ if videos:
+ return videos[0]
else:
- og_video = self._og_search_video_url(webpage, 'player url')
query_str = compat_urllib_parse_urlparse(og_video).query
query = compat_urlparse.parse_qs(query_str)
api_url = query['play_url'][0].replace('.smil', '')
diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py
index 6436c05a3..1a896b536 100644
--- a/youtube_dl/extractor/metacafe.py
+++ b/youtube_dl/extractor/metacafe.py
@@ -9,6 +9,7 @@ from ..utils import (
compat_urllib_request,
determine_ext,
ExtractorError,
+ int_or_none,
)
@@ -83,6 +84,21 @@ class MetacafeIE(InfoExtractor):
'skip_download': True,
},
},
+ # Movieclips.com video
+ {
+ 'url': 'http://www.metacafe.com/watch/mv-Wy7ZU/my_week_with_marilyn_do_you_love_me/',
+ 'info_dict': {
+ 'id': 'mv-Wy7ZU',
+ 'ext': 'mp4',
+ 'title': 'My Week with Marilyn - Do You Love Me?',
+ 'description': 'From the movie My Week with Marilyn - Colin (Eddie Redmayne) professes his love to Marilyn (Michelle Williams) and gets her to promise to return to set and finish the movie.',
+ 'uploader': 'movie_trailers',
+ 'duration': 176,
+ },
+ 'params': {
+ 'skip_download': 'requires rtmpdump',
+ }
+ }
]
def report_disclaimer(self):
@@ -134,6 +150,7 @@ class MetacafeIE(InfoExtractor):
# Extract URL, uploader and title from webpage
self.report_extraction(video_id)
+ video_url = None
mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
if mobj is not None:
mediaURL = compat_urllib_parse.unquote(mobj.group(1))
@@ -146,16 +163,17 @@ class MetacafeIE(InfoExtractor):
else:
gdaKey = mobj.group(1)
video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
- else:
+ if video_url is None:
mobj = re.search(r'<video src="([^"]+)"', webpage)
if mobj:
video_url = mobj.group(1)
video_ext = 'mp4'
- else:
- mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
- if mobj is None:
- raise ExtractorError('Unable to extract media URL')
- vardict = compat_parse_qs(mobj.group(1))
+ if video_url is None:
+ flashvars = self._search_regex(
+ r' name="flashvars" value="(.*?)"', webpage, 'flashvars',
+ default=None)
+ if flashvars:
+ vardict = compat_parse_qs(flashvars)
if 'mediaData' not in vardict:
raise ExtractorError('Unable to extract media URL')
mobj = re.search(
@@ -165,26 +183,68 @@ class MetacafeIE(InfoExtractor):
mediaURL = mobj.group('mediaURL').replace('\\/', '/')
video_url = '%s?__gda__=%s' % (mediaURL, mobj.group('key'))
video_ext = determine_ext(video_url)
-
- video_title = self._html_search_regex(r'(?im)<title>(.*) - Video</title>', webpage, 'title')
+ if video_url is None:
+ player_url = self._search_regex(
+ r"swfobject\.embedSWF\('([^']+)'",
+ webpage, 'config URL', default=None)
+ if player_url:
+ config_url = self._search_regex(
+ r'config=(.+)$', player_url, 'config URL')
+ config_doc = self._download_xml(
+ config_url, video_id,
+ note='Downloading video config')
+ smil_url = config_doc.find('.//properties').attrib['smil_file']
+ smil_doc = self._download_xml(
+ smil_url, video_id,
+ note='Downloading SMIL document')
+ base_url = smil_doc.find('./head/meta').attrib['base']
+ video_url = []
+ for vn in smil_doc.findall('.//video'):
+ br = int(vn.attrib['system-bitrate'])
+ play_path = vn.attrib['src']
+ video_url.append({
+ 'format_id': 'smil-%d' % br,
+ 'url': base_url,
+ 'play_path': play_path,
+ 'page_url': url,
+ 'player_url': player_url,
+ 'ext': play_path.partition(':')[0],
+ })
+
+ if video_url is None:
+ raise ExtractorError('Unsupported video type')
+
+ video_title = self._html_search_regex(
+ r'(?im)<title>(.*) - Video</title>', webpage, 'title')
description = self._og_search_description(webpage)
thumbnail = self._og_search_thumbnail(webpage)
video_uploader = self._html_search_regex(
r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
webpage, 'uploader nickname', fatal=False)
+ duration = int_or_none(
+ self._html_search_meta('video:duration', webpage))
+
+ age_limit = (
+ 18
+ if re.search(r'"contentRating":"restricted"', webpage)
+ else 0)
- if re.search(r'"contentRating":"restricted"', webpage) is not None:
- age_limit = 18
+ if isinstance(video_url, list):
+ formats = video_url
else:
- age_limit = 0
+ formats = [{
+ 'url': video_url,
+ 'ext': video_ext,
+ }]
+ self._sort_formats(formats)
return {
'id': video_id,
- 'url': video_url,
'description': description,
'uploader': video_uploader,
'title': video_title,
- 'thumbnail':thumbnail,
- 'ext': video_ext,
+ 'thumbnail': thumbnail,
'age_limit': age_limit,
+ 'formats': formats,
+ 'duration': duration,
}
diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py
new file mode 100644
index 000000000..979f3d692
--- /dev/null
+++ b/youtube_dl/extractor/mitele.py
@@ -0,0 +1,60 @@
+from __future__ import unicode_literals
+
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ compat_urllib_parse,
+ get_element_by_attribute,
+ parse_duration,
+ strip_jsonp,
+)
+
+
+class MiTeleIE(InfoExtractor):
+ IE_NAME = 'mitele.es'
+ _VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<episode>[^/]+)/'
+
+ _TEST = {
+ 'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
+ 'md5': '6a75fe9d0d3275bead0cb683c616fddb',
+ 'info_dict': {
+ 'id': '0fce117d',
+ 'ext': 'mp4',
+ 'title': 'Programa 144 - Tor, la web invisible',
+ 'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
+ 'display_id': 'programa-144',
+ 'duration': 2913,
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ episode = mobj.group('episode')
+ webpage = self._download_webpage(url, episode)
+ embed_data_json = self._search_regex(
+ r'MSV\.embedData\[.*?\]\s*=\s*({.*?});', webpage, 'embed data',
+ flags=re.DOTALL
+ ).replace('\'', '"')
+ embed_data = json.loads(embed_data_json)
+
+ info_url = embed_data['flashvars']['host']
+ info_el = self._download_xml(info_url, episode).find('./video/info')
+
+ video_link = info_el.find('videoUrl/link').text
+ token_query = compat_urllib_parse.urlencode({'id': video_link})
+ token_info = self._download_json(
+ 'http://token.mitele.es/?' + token_query, episode,
+ transform_source=strip_jsonp
+ )
+
+ return {
+ 'id': embed_data['videoId'],
+ 'display_id': episode,
+ 'title': info_el.find('title').text,
+ 'url': token_info['tokenizedUrl'],
+ 'description': get_element_by_attribute('class', 'text', webpage),
+ 'thumbnail': info_el.find('thumb').text,
+ 'duration': parse_duration(info_el.find('duration').text),
+ }
diff --git a/youtube_dl/extractor/mojvideo.py b/youtube_dl/extractor/mojvideo.py
new file mode 100644
index 000000000..90b460d65
--- /dev/null
+++ b/youtube_dl/extractor/mojvideo.py
@@ -0,0 +1,58 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ parse_duration,
+)
+
+
+class MojvideoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?mojvideo\.com/video-(?P<display_id>[^/]+)/(?P<id>[a-f0-9]+)'
+ _TEST = {
+ 'url': 'http://www.mojvideo.com/video-v-avtu-pred-mano-rdecelaska-alfi-nipic/3d1ed4497707730b2906',
+ 'md5': 'f7fd662cc8ce2be107b0d4f2c0483ae7',
+ 'info_dict': {
+ 'id': '3d1ed4497707730b2906',
+ 'display_id': 'v-avtu-pred-mano-rdecelaska-alfi-nipic',
+ 'ext': 'mp4',
+ 'title': 'V avtu pred mano rdečelaska - Alfi Nipič',
+ 'thumbnail': 're:^http://.*\.jpg$',
+ 'duration': 242,
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ # XML is malformed
+ playerapi = self._download_webpage(
+ 'http://www.mojvideo.com/playerapi.php?v=%s&t=1' % video_id, display_id)
+
+ if '<error>true</error>' in playerapi:
+ error_desc = self._html_search_regex(
+ r'<errordesc>([^<]*)</errordesc>', playerapi, 'error description', fatal=False)
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, error_desc), expected=True)
+
+ title = self._html_search_regex(
+ r'<title>([^<]+)</title>', playerapi, 'title')
+ video_url = self._html_search_regex(
+ r'<file>([^<]+)</file>', playerapi, 'video URL')
+ thumbnail = self._html_search_regex(
+ r'<preview>([^<]+)</preview>', playerapi, 'thumbnail', fatal=False)
+ duration = parse_duration(self._html_search_regex(
+ r'<duration>([^<]+)</duration>', playerapi, 'duration', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'url': video_url,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ } \ No newline at end of file
diff --git a/youtube_dl/extractor/nowness.py b/youtube_dl/extractor/nowness.py
index 1c5e9401f..6b2f3f55a 100644
--- a/youtube_dl/extractor/nowness.py
+++ b/youtube_dl/extractor/nowness.py
@@ -1,3 +1,4 @@
+# encoding: utf-8
from __future__ import unicode_literals
import re
@@ -8,19 +9,34 @@ from ..utils import ExtractorError
class NownessIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?nowness\.com/[^?#]*?/(?P<id>[0-9]+)/(?P<slug>[^/]+?)(?:$|[?#])'
-
- _TEST = {
- 'url': 'http://www.nowness.com/day/2013/6/27/3131/candor--the-art-of-gesticulation',
- 'md5': '068bc0202558c2e391924cb8cc470676',
- 'info_dict': {
- 'id': '2520295746001',
- 'ext': 'mp4',
- 'description': 'Candor: The Art of Gesticulation',
- 'uploader': 'Nowness',
- 'title': 'Candor: The Art of Gesticulation',
- }
- }
+ _VALID_URL = r'https?://(?:(?:www|cn)\.)?nowness\.com/[^?#]*?/(?P<id>[0-9]+)/(?P<slug>[^/]+?)(?:$|[?#])'
+
+ _TESTS = [
+ {
+ 'url': 'http://www.nowness.com/day/2013/6/27/3131/candor--the-art-of-gesticulation',
+ 'md5': '068bc0202558c2e391924cb8cc470676',
+ 'info_dict': {
+ 'id': '2520295746001',
+ 'ext': 'mp4',
+ 'title': 'Candor: The Art of Gesticulation',
+ 'description': 'Candor: The Art of Gesticulation',
+ 'thumbnail': 're:^https?://.*\.jpg',
+ 'uploader': 'Nowness',
+ }
+ },
+ {
+ 'url': 'http://cn.nowness.com/day/2014/8/7/4069/kasper-bj-rke-ft-jaakko-eino-kalevi--tnr',
+ 'md5': 'e79cf125e387216f86b2e0a5b5c63aa3',
+ 'info_dict': {
+ 'id': '3716354522001',
+ 'ext': 'mp4',
+ 'title': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR',
+ 'description': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR',
+ 'thumbnail': 're:^https?://.*\.jpg',
+ 'uploader': 'Nowness',
+ }
+ },
+ ]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/oe1.py b/youtube_dl/extractor/oe1.py
deleted file mode 100644
index 38971ab4d..000000000
--- a/youtube_dl/extractor/oe1.py
+++ /dev/null
@@ -1,40 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import calendar
-import datetime
-import re
-
-from .common import InfoExtractor
-
-# audios on oe1.orf.at are only available for 7 days, so we can't
-# add tests.
-
-
-class OE1IE(InfoExtractor):
- IE_DESC = 'oe1.orf.at'
- _VALID_URL = r'http://oe1\.orf\.at/programm/(?P<id>[0-9]+)'
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- show_id = mobj.group('id')
-
- data = self._download_json(
- 'http://oe1.orf.at/programm/%s/konsole' % show_id,
- show_id
- )
-
- timestamp = datetime.datetime.strptime('%s %s' % (
- data['item']['day_label'],
- data['item']['time']
- ), '%d.%m.%Y %H:%M')
- unix_timestamp = calendar.timegm(timestamp.utctimetuple())
-
- return {
- 'id': show_id,
- 'title': data['item']['title'],
- 'url': data['item']['url_stream'],
- 'ext': 'mp3',
- 'description': data['item'].get('info'),
- 'timestamp': unix_timestamp
- }
diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py
index 13f12824c..2044e107e 100644
--- a/youtube_dl/extractor/ooyala.py
+++ b/youtube_dl/extractor/ooyala.py
@@ -3,23 +3,38 @@ import re
import json
from .common import InfoExtractor
-from ..utils import unescapeHTML
+from ..utils import (
+ unescapeHTML,
+ ExtractorError,
+)
class OoyalaIE(InfoExtractor):
_VALID_URL = r'(?:ooyala:|https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=)(?P<id>.+?)(&|$)'
- _TEST = {
- # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
- 'url': 'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
- 'md5': '3f5cceb3a7bf461d6c29dc466cf8033c',
- 'info_dict': {
- 'id': 'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
- 'ext': 'mp4',
- 'title': 'Explaining Data Recovery from Hard Drives and SSDs',
- 'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
+ _TESTS = [
+ {
+ # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
+ 'url': 'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
+ 'md5': '3f5cceb3a7bf461d6c29dc466cf8033c',
+ 'info_dict': {
+ 'id': 'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
+ 'ext': 'mp4',
+ 'title': 'Explaining Data Recovery from Hard Drives and SSDs',
+ 'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
+ },
+ }, {
+ # Only available for ipad
+ 'url': 'http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
+ 'md5': '4b9754921fddb68106e48c142e2a01e6',
+ 'info_dict': {
+ 'id': 'x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
+ 'ext': 'mp4',
+ 'title': 'Simulation Overview - Levels of Simulation',
+ 'description': '',
+ },
},
- }
+ ]
@staticmethod
def _url_for_embed_code(embed_code):
@@ -47,13 +62,30 @@ class OoyalaIE(InfoExtractor):
player = self._download_webpage(player_url, embedCode)
mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
player, 'mobile player url')
- mobile_player = self._download_webpage(mobile_url, embedCode)
- videos_info = self._search_regex(
- r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);',
- mobile_player, 'info').replace('\\"','"')
- videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"','"')
+ # Looks like some videos are only available for particular devices
+ # (e.g. http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0
+ # is only available for ipad)
+ # Working around with fetching URLs for all the devices found starting with 'unknown'
+ # until we succeed or eventually fail for each device.
+ devices = re.findall(r'device\s*=\s*"([^"]+)";', player)
+ devices.remove('unknown')
+ devices.insert(0, 'unknown')
+ for device in devices:
+ mobile_player = self._download_webpage(
+ '%s&device=%s' % (mobile_url, device), embedCode,
+ 'Downloading mobile player JS for %s device' % device)
+ videos_info = self._search_regex(
+ r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);',
+ mobile_player, 'info', fatal=False, default=None)
+ if videos_info:
+ break
+ if not videos_info:
+ raise ExtractorError('Unable to extract info')
+ videos_info = videos_info.replace('\\"', '"')
+ videos_more_info = self._search_regex(
+ r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"', '"')
videos_info = json.loads(videos_info)
- videos_more_info =json.loads(videos_more_info)
+ videos_more_info = json.loads(videos_more_info)
if videos_more_info.get('lineup'):
videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])]
diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py
index 03421d1d5..011e6be13 100644
--- a/youtube_dl/extractor/orf.py
+++ b/youtube_dl/extractor/orf.py
@@ -3,6 +3,8 @@ from __future__ import unicode_literals
import json
import re
+import calendar
+import datetime
from .common import InfoExtractor
from ..utils import (
@@ -12,7 +14,9 @@ from ..utils import (
)
-class ORFIE(InfoExtractor):
+class ORFTVthekIE(InfoExtractor):
+ IE_NAME = 'orf:tvthek'
+ IE_DESC = 'ORF TVthek'
_VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics/.+?|program/[^/]+)/(?P<id>\d+)'
_TEST = {
@@ -105,3 +109,73 @@ class ORFIE(InfoExtractor):
'entries': entries,
'id': playlist_id,
}
+
+
+# Audios on ORF radio are only available for 7 days, so we can't add tests.
+
+
+class ORFOE1IE(InfoExtractor):
+ IE_NAME = 'orf:oe1'
+ IE_DESC = 'Radio Österreich 1'
+ _VALID_URL = r'http://oe1\.orf\.at/programm/(?P<id>[0-9]+)'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ show_id = mobj.group('id')
+
+ data = self._download_json(
+ 'http://oe1.orf.at/programm/%s/konsole' % show_id,
+ show_id
+ )
+
+ timestamp = datetime.datetime.strptime('%s %s' % (
+ data['item']['day_label'],
+ data['item']['time']
+ ), '%d.%m.%Y %H:%M')
+ unix_timestamp = calendar.timegm(timestamp.utctimetuple())
+
+ return {
+ 'id': show_id,
+ 'title': data['item']['title'],
+ 'url': data['item']['url_stream'],
+ 'ext': 'mp3',
+ 'description': data['item'].get('info'),
+ 'timestamp': unix_timestamp
+ }
+
+
+class ORFFM4IE(InfoExtractor):
+ IE_DESC = 'orf:fm4'
+ IE_DESC = 'radio FM4'
+ _VALID_URL = r'http://fm4\.orf\.at/7tage/?#(?P<date>[0-9]+)/(?P<show>\w+)'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ show_date = mobj.group('date')
+ show_id = mobj.group('show')
+
+ data = self._download_json(
+ 'http://audioapi.orf.at/fm4/json/2.0/broadcasts/%s/4%s' % (show_date, show_id),
+ show_id
+ )
+
+ def extract_entry_dict(info, title, subtitle):
+ return {
+ 'id': info['loopStreamId'].replace('.mp3', ''),
+ 'url': 'http://loopstream01.apa.at/?channel=fm4&id=%s' % info['loopStreamId'],
+ 'title': title,
+ 'description': subtitle,
+ 'duration': (info['end'] - info['start']) / 1000,
+ 'timestamp': info['start'] / 1000,
+ 'ext': 'mp3'
+ }
+
+ entries = [extract_entry_dict(t, data['title'], data['subtitle']) for t in data['streams']]
+
+ return {
+ '_type': 'playlist',
+ 'id': show_id,
+ 'title': data['title'],
+ 'description': data['subtitle'],
+ 'entries': entries
+ } \ No newline at end of file
diff --git a/youtube_dl/extractor/patreon.py b/youtube_dl/extractor/patreon.py
new file mode 100644
index 000000000..707a54e3a
--- /dev/null
+++ b/youtube_dl/extractor/patreon.py
@@ -0,0 +1,101 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ compat_urlparse,
+ js_to_json,
+)
+
+
+class PatreonIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(.+)'
+ _TESTS = [
+ {
+ 'url': 'http://www.patreon.com/creation?hid=743933',
+ 'md5': 'e25505eec1053a6e6813b8ed369875cc',
+ 'info_dict': {
+ 'id': '743933',
+ 'ext': 'mp3',
+ 'title': 'Episode 166: David Smalley of Dogma Debate',
+ 'uploader': 'Cognitive Dissonance Podcast',
+ 'thumbnail': 're:^https?://.*$',
+ },
+ },
+ {
+ 'url': 'http://www.patreon.com/creation?hid=754133',
+ 'md5': '3eb09345bf44bf60451b8b0b81759d0a',
+ 'info_dict': {
+ 'id': '754133',
+ 'ext': 'mp3',
+ 'title': 'CD 167 Extra',
+ 'uploader': 'Cognitive Dissonance Podcast',
+ 'thumbnail': 're:^https?://.*$',
+ },
+ },
+ ]
+
+ # Currently Patreon exposes download URL via hidden CSS, so login is not
+ # needed. Keeping this commented for when this inevitably changes.
+ '''
+ def _login(self):
+ (username, password) = self._get_login_info()
+ if username is None:
+ return
+
+ login_form = {
+ 'redirectUrl': 'http://www.patreon.com/',
+ 'email': username,
+ 'password': password,
+ }
+
+ request = compat_urllib_request.Request(
+ 'https://www.patreon.com/processLogin',
+ compat_urllib_parse.urlencode(login_form).encode('utf-8')
+ )
+ login_page = self._download_webpage(request, None, note='Logging in as %s' % username)
+
+ if re.search(r'onLoginFailed', login_page):
+ raise ExtractorError('Unable to login, incorrect username and/or password', expected=True)
+
+ def _real_initialize(self):
+ self._login()
+ '''
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group(1)
+
+ webpage = self._download_webpage(url, video_id)
+ title = self._og_search_title(webpage).strip()
+
+ attach_fn = self._html_search_regex(
+ r'<div class="attach"><a target="_blank" href="([^"]+)">',
+ webpage, 'attachment URL', default=None)
+ if attach_fn is not None:
+ video_url = 'http://www.patreon.com' + attach_fn
+ thumbnail = self._og_search_thumbnail(webpage)
+ uploader = self._html_search_regex(
+ r'<strong>(.*?)</strong> is creating', webpage, 'uploader')
+ else:
+ playlist_js = self._search_regex(
+ r'(?s)new\s+jPlayerPlaylist\(\s*\{\s*[^}]*},\s*(\[.*?,?\s*\])',
+ webpage, 'playlist JSON')
+ playlist_json = js_to_json(playlist_js)
+ playlist = json.loads(playlist_json)
+ data = playlist[0]
+ video_url = self._proto_relative_url(data['mp3'])
+ thumbnail = self._proto_relative_url(data.get('cover'))
+ uploader = data.get('artist')
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': 'mp3',
+ 'title': title,
+ 'uploader': uploader,
+ 'thumbnail': thumbnail,
+ }
diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py
index 64cded707..dee4af6f1 100644
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@@ -20,25 +20,60 @@ class PBSIE(InfoExtractor):
)
'''
- _TEST = {
- 'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
- 'md5': 'ce1888486f0908d555a8093cac9a7362',
- 'info_dict': {
- 'id': '2365006249',
- 'ext': 'mp4',
- 'title': 'A More Perfect Union',
- 'description': 'md5:ba0c207295339c8d6eced00b7c363c6a',
- 'duration': 3190,
+ _TESTS = [
+ {
+ 'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
+ 'md5': 'ce1888486f0908d555a8093cac9a7362',
+ 'info_dict': {
+ 'id': '2365006249',
+ 'ext': 'mp4',
+ 'title': 'A More Perfect Union',
+ 'description': 'md5:ba0c207295339c8d6eced00b7c363c6a',
+ 'duration': 3190,
+ },
+ },
+ {
+ 'url': 'http://www.pbs.org/wgbh/pages/frontline/losing-iraq/',
+ 'md5': '143c98aa54a346738a3d78f54c925321',
+ 'info_dict': {
+ 'id': '2365297690',
+ 'ext': 'mp4',
+ 'title': 'Losing Iraq',
+ 'description': 'md5:f5bfbefadf421e8bb8647602011caf8e',
+ 'duration': 5050,
+ },
+ },
+ {
+ 'url': 'http://www.pbs.org/newshour/bb/education-jan-june12-cyberschools_02-23/',
+ 'md5': 'b19856d7f5351b17a5ab1dc6a64be633',
+ 'info_dict': {
+ 'id': '2201174722',
+ 'ext': 'mp4',
+ 'title': 'Cyber Schools Gain Popularity, but Quality Questions Persist',
+ 'description': 'md5:5871c15cba347c1b3d28ac47a73c7c28',
+ 'duration': 801,
+ },
},
- }
+ ]
- def _real_extract(self, url):
+ def _extract_ids(self, url):
mobj = re.match(self._VALID_URL, url)
presumptive_id = mobj.group('presumptive_id')
display_id = presumptive_id
if presumptive_id:
webpage = self._download_webpage(url, display_id)
+
+ MEDIA_ID_REGEXES = [
+ r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'", # frontline video embed
+ r'class="coveplayerid">([^<]+)<', # coveplayer
+ ]
+
+ media_id = self._search_regex(
+ MEDIA_ID_REGEXES, webpage, 'media ID', fatal=False, default=None)
+ if media_id:
+ return media_id, presumptive_id
+
url = self._search_regex(
r'<iframe\s+id=["\']partnerPlayer["\'].*?\s+src=["\'](.*?)["\']>',
webpage, 'player URL')
@@ -57,6 +92,11 @@ class PBSIE(InfoExtractor):
video_id = mobj.group('id')
display_id = video_id
+ return video_id, display_id
+
+ def _real_extract(self, url):
+ video_id, display_id = self._extract_ids(url)
+
info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id
info = self._download_json(info_url, display_id)
diff --git a/youtube_dl/extractor/reverbnation.py b/youtube_dl/extractor/reverbnation.py
index 49cf427a1..ec7e7df7b 100644
--- a/youtube_dl/extractor/reverbnation.py
+++ b/youtube_dl/extractor/reverbnation.py
@@ -1,23 +1,23 @@
from __future__ import unicode_literals
import re
-import time
from .common import InfoExtractor
-from ..utils import strip_jsonp
+from ..utils import str_or_none
class ReverbNationIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$'
_TESTS = [{
'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa',
- 'file': '16965047.mp3',
'md5': '3da12ebca28c67c111a7f8b262d3f7a7',
'info_dict': {
+ "id": "16965047",
+ "ext": "mp3",
"title": "MONA LISA",
"uploader": "ALKILADOS",
- "uploader_id": 216429,
- "thumbnail": "//gp1.wac.edgecastcdn.net/802892/production_public/Photo/13761700/image/1366002176_AVATAR_MONA_LISA.jpg"
+ "uploader_id": "216429",
+ "thumbnail": "re:^https://gp1\.wac\.edgecastcdn\.net/.*?\.jpg$"
},
}]
@@ -26,10 +26,8 @@ class ReverbNationIE(InfoExtractor):
song_id = mobj.group('id')
api_res = self._download_json(
- 'https://api.reverbnation.com/song/%s?callback=api_response_5&_=%d'
- % (song_id, int(time.time() * 1000)),
+ 'https://api.reverbnation.com/song/%s' % song_id,
song_id,
- transform_source=strip_jsonp,
note='Downloading information of song %s' % song_id
)
@@ -38,8 +36,9 @@ class ReverbNationIE(InfoExtractor):
'title': api_res.get('name'),
'url': api_res.get('url'),
'uploader': api_res.get('artist', {}).get('name'),
- 'uploader_id': api_res.get('artist', {}).get('id'),
- 'thumbnail': api_res.get('image', api_res.get('thumbnail')),
+ 'uploader_id': str_or_none(api_res.get('artist', {}).get('id')),
+ 'thumbnail': self._proto_relative_url(
+ api_res.get('image', api_res.get('thumbnail'))),
'ext': 'mp3',
'vcodec': 'none',
}
diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py
new file mode 100644
index 000000000..14928cd62
--- /dev/null
+++ b/youtube_dl/extractor/rtlnl.py
@@ -0,0 +1,52 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class RtlXlIE(InfoExtractor):
+ IE_NAME = 'rtlxl.nl'
+ _VALID_URL = r'https?://www\.rtlxl\.nl/#!/[^/]+/(?P<uuid>[^/?]+)'
+
+ _TEST = {
+ 'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677',
+ 'info_dict': {
+ 'id': '6e4203a6-0a5e-3596-8424-c599a59e0677',
+ 'ext': 'flv',
+ 'title': 'RTL Nieuws - Laat',
+ 'description': 'Dagelijks het laatste nieuws uit binnen- en '
+ 'buitenland. Voor nog meer nieuws kunt u ook gebruikmaken van '
+ 'onze mobiele apps.',
+ 'timestamp': 1408051800,
+ 'upload_date': '20140814',
+ },
+ 'params': {
+ # We download the first bytes of the first fragment, it can't be
+ # processed by the f4m downloader beacuse it isn't complete
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ uuid = mobj.group('uuid')
+
+ info = self._download_json(
+ 'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=flash/' % uuid,
+ uuid)
+ meta = info['meta']
+ material = info['material'][0]
+ episode_info = info['episodes'][0]
+
+ f4m_url = 'http://manifest.us.rtl.nl' + material['videopath']
+ progname = info['abstracts'][0]['name']
+ subtitle = material['title'] or info['episodes'][0]['name']
+
+ return {
+ 'id': uuid,
+ 'title': '%s - %s' % (progname, subtitle),
+ 'formats': self._extract_f4m_formats(f4m_url, uuid),
+ 'timestamp': material['original_date'],
+ 'description': episode_info['synopsis'],
+ }
diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py
new file mode 100644
index 000000000..badba2ac6
--- /dev/null
+++ b/youtube_dl/extractor/shared.py
@@ -0,0 +1,57 @@
+from __future__ import unicode_literals
+
+import re
+import base64
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ compat_urllib_request,
+ compat_urllib_parse,
+ int_or_none,
+)
+
+
+class SharedIE(InfoExtractor):
+ _VALID_URL = r'http://shared\.sx/(?P<id>[\da-z]{10})'
+
+ _TEST = {
+ 'url': 'http://shared.sx/0060718775',
+ 'md5': '106fefed92a8a2adb8c98e6a0652f49b',
+ 'info_dict': {
+ 'id': '0060718775',
+ 'ext': 'mp4',
+ 'title': 'Bmp4',
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ page = self._download_webpage(url, video_id)
+
+ if re.search(r'>File does not exist<', page) is not None:
+ raise ExtractorError('Video %s does not exist' % video_id, expected=True)
+
+ download_form = dict(re.findall(r'<input type="hidden" name="([^"]+)" value="([^"]*)"', page))
+
+ request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(download_form))
+ request.add_header('Content-Type', 'application/x-www-form-urlencoded')
+
+ video_page = self._download_webpage(request, video_id, 'Downloading video page')
+
+ video_url = self._html_search_regex(r'data-url="([^"]+)"', video_page, 'video URL')
+ title = base64.b64decode(self._html_search_meta('full:title', page, 'title')).decode('utf-8')
+ filesize = int_or_none(self._html_search_meta('full:size', page, 'file size', fatal=False))
+ thumbnail = self._html_search_regex(
+ r'data-poster="([^"]+)"', video_page, 'thumbnail', fatal=False, default=None)
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'filesize': filesize,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ } \ No newline at end of file
diff --git a/youtube_dl/extractor/streamcloud.py b/youtube_dl/extractor/streamcloud.py
index 9faf3a5e3..172def221 100644
--- a/youtube_dl/extractor/streamcloud.py
+++ b/youtube_dl/extractor/streamcloud.py
@@ -1,4 +1,6 @@
# coding: utf-8
+from __future__ import unicode_literals
+
import re
import time
@@ -10,18 +12,18 @@ from ..utils import (
class StreamcloudIE(InfoExtractor):
- IE_NAME = u'streamcloud.eu'
+ IE_NAME = 'streamcloud.eu'
_VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)/(?P<fname>[^#?]*)\.html'
_TEST = {
- u'url': u'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html',
- u'file': u'skp9j99s4bpz.mp4',
- u'md5': u'6bea4c7fa5daaacc2a946b7146286686',
- u'info_dict': {
- u'title': u'youtube-dl test video \'/\\ ä ↭',
- u'duration': 9,
+ 'url': 'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html',
+ 'md5': '6bea4c7fa5daaacc2a946b7146286686',
+ 'info_dict': {
+ 'id': 'skp9j99s4bpz',
+ 'ext': 'mp4',
+ 'title': 'youtube-dl test video \'/\\ ä ↭',
},
- u'skip': u'Only available from the EU'
+ 'skip': 'Only available from the EU'
}
def _real_extract(self, url):
@@ -46,21 +48,17 @@ class StreamcloudIE(InfoExtractor):
req = compat_urllib_request.Request(url, post, headers)
webpage = self._download_webpage(
- req, video_id, note=u'Downloading video page ...')
+ req, video_id, note='Downloading video page ...')
title = self._html_search_regex(
- r'<h1[^>]*>([^<]+)<', webpage, u'title')
+ r'<h1[^>]*>([^<]+)<', webpage, 'title')
video_url = self._search_regex(
- r'file:\s*"([^"]+)"', webpage, u'video URL')
- duration_str = self._search_regex(
- r'duration:\s*"?([0-9]+)"?', webpage, u'duration', fatal=False)
- duration = None if duration_str is None else int(duration_str)
+ r'file:\s*"([^"]+)"', webpage, 'video URL')
thumbnail = self._search_regex(
- r'image:\s*"([^"]+)"', webpage, u'thumbnail URL', fatal=False)
+ r'image:\s*"([^"]+)"', webpage, 'thumbnail URL', fatal=False)
return {
'id': video_id,
'title': title,
'url': video_url,
- 'duration': duration,
'thumbnail': thumbnail,
}
diff --git a/youtube_dl/extractor/swrmediathek.py b/youtube_dl/extractor/swrmediathek.py
index 6c688c520..5d9d70367 100644
--- a/youtube_dl/extractor/swrmediathek.py
+++ b/youtube_dl/extractor/swrmediathek.py
@@ -8,7 +8,7 @@ from ..utils import parse_duration
class SWRMediathekIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?swrmediathek\.de/player\.htm\?show=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+ _VALID_URL = r'https?://(?:www\.)?swrmediathek\.de/(?:content/)?player\.htm\?show=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
_TESTS = [{
'url': 'http://swrmediathek.de/player.htm?show=849790d0-dab8-11e3-a953-0026b975f2e6',
@@ -52,6 +52,20 @@ class SWRMediathekIE(InfoExtractor):
'uploader': 'SWR 2',
'uploader_id': '284670',
}
+ }, {
+ 'url': 'http://swrmediathek.de/content/player.htm?show=52dc7e00-15c5-11e4-84bc-0026b975f2e6',
+ 'md5': '881531487d0633080a8cc88d31ef896f',
+ 'info_dict': {
+ 'id': '52dc7e00-15c5-11e4-84bc-0026b975f2e6',
+ 'ext': 'mp4',
+ 'title': 'Familienspaß am Bodensee',
+ 'description': 'md5:0b591225a32cfde7be1629ed49fe4315',
+ 'thumbnail': 're:http://.*\.jpg',
+ 'duration': 1784,
+ 'upload_date': '20140727',
+ 'uploader': 'SWR Fernsehen BW',
+ 'uploader_id': '281130',
+ }
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py
index f8dd7e955..fa796ce72 100644
--- a/youtube_dl/extractor/teamcoco.py
+++ b/youtube_dl/extractor/teamcoco.py
@@ -37,7 +37,7 @@ class TeamcocoIE(InfoExtractor):
video_id = mobj.group("video_id")
if not video_id:
video_id = self._html_search_regex(
- r'<article class="video" data-id="(\d+?)"',
+ r'data-node-id="(\d+?)"',
webpage, 'video id')
data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py
new file mode 100644
index 000000000..a56a7ab5f
--- /dev/null
+++ b/youtube_dl/extractor/tvplay.py
@@ -0,0 +1,85 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ parse_iso8601,
+ qualities,
+)
+
+
+class TVPlayIE(InfoExtractor):
+ _VALID_URL = r'http://(?:www\.)?tvplay\.lv/parraides/[^/]+/(?P<id>\d+)'
+ _TESTS = [
+ {
+ 'url': 'http://www.tvplay.lv/parraides/vinas-melo-labak/418113?autostart=true',
+ 'info_dict': {
+ 'id': '418113',
+ 'ext': 'flv',
+ 'title': 'Kādi ir īri? - Viņas melo labāk',
+ 'description': 'Baiba apsmej īrus, kādi tie ir un ko viņi dara.',
+ 'duration': 25,
+ 'timestamp': 1406097056,
+ 'upload_date': '20140723',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ },
+ ]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ video = self._download_json(
+ 'http://playapi.mtgx.tv/v1/videos/%s' % video_id, video_id, 'Downloading video JSON')
+
+ if video['is_geo_blocked']:
+ raise ExtractorError(
+ 'This content is not available in your country due to copyright reasons', expected=True)
+
+ streams = self._download_json(
+ 'http://playapi.mtgx.tv/v1/videos/stream/%s' % video_id, video_id, 'Downloading streams JSON')
+
+ quality = qualities(['hls', 'medium', 'high'])
+ formats = []
+ for format_id, video_url in streams['streams'].items():
+ if not video_url:
+ continue
+ fmt = {
+ 'format_id': format_id,
+ 'preference': quality(format_id),
+ }
+ if video_url.startswith('rtmp'):
+ m = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', video_url)
+ if not m:
+ continue
+ fmt.update({
+ 'ext': 'flv',
+ 'url': m.group('url'),
+ 'app': m.group('app'),
+ 'play_path': m.group('playpath'),
+ })
+ else:
+ fmt.update({
+ 'url': video_url,
+ })
+ formats.append(fmt)
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': video['title'],
+ 'description': video['description'],
+ 'duration': video['duration'],
+ 'timestamp': parse_iso8601(video['created_at']),
+ 'view_count': video['views']['total'],
+ 'age_limit': video.get('age_limit', 0),
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/ubu.py b/youtube_dl/extractor/ubu.py
new file mode 100644
index 000000000..0182d67ec
--- /dev/null
+++ b/youtube_dl/extractor/ubu.py
@@ -0,0 +1,56 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class UbuIE(InfoExtractor):
+ _VALID_URL = r'http://(?:www\.)?ubu\.com/film/(?P<id>[\da-z_-]+)\.html'
+ _TEST = {
+ 'url': 'http://ubu.com/film/her_noise.html',
+ 'md5': '8edd46ee8aa6b265fb5ed6cf05c36bc9',
+ 'info_dict': {
+ 'id': 'her_noise',
+ 'ext': 'mp4',
+ 'title': 'Her Noise - The Making Of (2007)',
+ 'duration': 3600,
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(
+ r'<title>.+?Film &amp; Video: ([^<]+)</title>', webpage, 'title')
+
+ duration = int_or_none(self._html_search_regex(
+ r'Duration: (\d+) minutes', webpage, 'duration', fatal=False, default=None))
+ if duration:
+ duration *= 60
+
+ formats = []
+
+ FORMAT_REGEXES = [
+ ['sq', r"'flashvars'\s*,\s*'file=([^']+)'"],
+ ['hq', r'href="(http://ubumexico\.centro\.org\.mx/video/[^"]+)"']
+ ]
+
+ for format_id, format_regex in FORMAT_REGEXES:
+ m = re.search(format_regex, webpage)
+ if m:
+ formats.append({
+ 'url': m.group(1),
+ 'format_id': format_id,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'duration': duration,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py
index eada13ce9..d2ffd1b6b 100644
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@@ -177,6 +177,7 @@ class VevoIE(InfoExtractor):
self._downloader.report_warning(
'Cannot download SMIL information, falling back to JSON ..')
+ self._sort_formats(formats)
timestamp_ms = int(self._search_regex(
r'/Date\((\d+)\)/', video_info['launchDate'], 'launch date'))
diff --git a/youtube_dl/extractor/vidme.py b/youtube_dl/extractor/vidme.py
new file mode 100644
index 000000000..5c89824c1
--- /dev/null
+++ b/youtube_dl/extractor/vidme.py
@@ -0,0 +1,68 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ float_or_none,
+ str_to_int,
+)
+
+
+class VidmeIE(InfoExtractor):
+ _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]+)'
+ _TEST = {
+ 'url': 'https://vid.me/QNB',
+ 'md5': 'f42d05e7149aeaec5c037b17e5d3dc82',
+ 'info_dict': {
+ 'id': 'QNB',
+ 'ext': 'mp4',
+ 'title': 'Fishing for piranha - the easy way',
+ 'description': 'source: https://www.facebook.com/photo.php?v=312276045600871',
+ 'duration': 119.92,
+ 'timestamp': 1406313244,
+ 'upload_date': '20140725',
+ 'thumbnail': 're:^https?://.*\.jpg',
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+
+ video_url = self._html_search_regex(r'<source src="([^"]+)"', webpage, 'video URL')
+
+ title = self._og_search_title(webpage)
+ description = self._og_search_description(webpage, default='')
+ thumbnail = self._og_search_thumbnail(webpage)
+ timestamp = int_or_none(self._og_search_property('updated_time', webpage, fatal=False))
+ width = int_or_none(self._og_search_property('video:width', webpage, fatal=False))
+ height = int_or_none(self._og_search_property('video:height', webpage, fatal=False))
+ duration = float_or_none(self._html_search_regex(
+ r'data-duration="([^"]+)"', webpage, 'duration', fatal=False))
+ view_count = str_to_int(self._html_search_regex(
+ r'<span class="video_views">\s*([\d,\.]+)\s*plays?', webpage, 'view count', fatal=False))
+ like_count = str_to_int(self._html_search_regex(
+ r'class="score js-video-vote-score"[^>]+data-score="([\d,\.\s]+)">',
+ webpage, 'like count', fatal=False))
+ comment_count = str_to_int(self._html_search_regex(
+ r'class="js-comment-count"[^>]+data-count="([\d,\.\s]+)">',
+ webpage, 'comment count', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'width': width,
+ 'height': height,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'comment_count': comment_count,
+ }
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index a3c6e83b0..11c7d7e81 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -122,6 +122,21 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
},
},
{
+ 'url': 'http://vimeo.com/channels/keypeele/75629013',
+ 'md5': '2f86a05afe9d7abc0b9126d229bbe15d',
+ 'note': 'Video is freely available via original URL '
+ 'and protected with password when accessed via http://vimeo.com/75629013',
+ 'info_dict': {
+ 'id': '75629013',
+ 'ext': 'mp4',
+ 'title': 'Key & Peele: Terrorist Interrogation',
+ 'description': 'md5:8678b246399b070816b12313e8b4eb5c',
+ 'uploader_id': 'atencio',
+ 'uploader': 'Peter Atencio',
+ 'duration': 187,
+ },
+ },
+ {
'url': 'http://vimeo.com/76979871',
'md5': '3363dd6ffebe3784d56f4132317fd446',
'note': 'Video with subtitles',
@@ -196,8 +211,6 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
video_id = mobj.group('id')
if mobj.group('pro') or mobj.group('player'):
url = 'http://player.vimeo.com/video/' + video_id
- else:
- url = 'https://vimeo.com/' + video_id
# Retrieve video webpage to extract further information
request = compat_urllib_request.Request(url, None, headers)
@@ -263,7 +276,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
if video_thumbnail is None:
video_thumbs = config["video"].get("thumbs")
if video_thumbs and isinstance(video_thumbs, dict):
- _, video_thumbnail = sorted((int(width), t_url) for (width, t_url) in video_thumbs.items())[-1]
+ _, video_thumbnail = sorted((int(width if width.isdigit() else 0), t_url) for (width, t_url) in video_thumbs.items())[-1]
# Extract video description
video_description = None
diff --git a/youtube_dl/extractor/vube.py b/youtube_dl/extractor/vube.py
index f1b9e9a19..2544c24bd 100644
--- a/youtube_dl/extractor/vube.py
+++ b/youtube_dl/extractor/vube.py
@@ -1,10 +1,12 @@
from __future__ import unicode_literals
-import json
import re
from .common import InfoExtractor
-from ..utils import int_or_none
+from ..utils import (
+ int_or_none,
+ compat_str,
+)
class VubeIE(InfoExtractor):
@@ -29,6 +31,7 @@ class VubeIE(InfoExtractor):
'like_count': int,
'dislike_count': int,
'comment_count': int,
+ 'categories': ['pop', 'music', 'cover', 'singing', 'jessie j', 'price tag', 'chiara grispo'],
}
},
{
@@ -47,6 +50,7 @@ class VubeIE(InfoExtractor):
'like_count': int,
'dislike_count': int,
'comment_count': int,
+ 'categories': ['seraina', 'jessica', 'krewella', 'alive'],
}
}, {
'url': 'http://vube.com/vote/Siren+Gene/0nmsMY5vEq?n=2&t=s',
@@ -56,13 +60,15 @@ class VubeIE(InfoExtractor):
'ext': 'mp4',
'title': 'Frozen - Let It Go Cover by Siren Gene',
'description': 'My rendition of "Let It Go" originally sung by Idina Menzel.',
- 'uploader': 'Siren Gene',
- 'uploader_id': 'Siren',
'thumbnail': 're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/10283ab622a-86c9-4681-51f2-30d1f65774af\.jpg$',
+ 'uploader': 'Siren',
+ 'timestamp': 1395448018,
+ 'upload_date': '20140322',
'duration': 221.788,
'like_count': int,
'dislike_count': int,
'comment_count': int,
+ 'categories': ['let it go', 'cover', 'idina menzel', 'frozen', 'singing', 'disney', 'siren gene'],
}
}
]
@@ -71,47 +77,40 @@ class VubeIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
- webpage = self._download_webpage(url, video_id)
- data_json = self._search_regex(
- r'(?s)window\["(?:tapiVideoData|vubeOriginalVideoData)"\]\s*=\s*(\{.*?\n});\n',
- webpage, 'video data'
- )
- data = json.loads(data_json)
- video = (
- data.get('video') or
- data)
- assert isinstance(video, dict)
+ video = self._download_json(
+ 'http://vube.com/t-api/v1/video/%s' % video_id, video_id, 'Downloading video JSON')
public_id = video['public_id']
- formats = [
- {
- 'url': 'http://video.thestaticvube.com/video/%s/%s.mp4' % (fmt['media_resolution_id'], public_id),
- 'height': int(fmt['height']),
- 'abr': int(fmt['audio_bitrate']),
- 'vbr': int(fmt['video_bitrate']),
- 'format_id': fmt['media_resolution_id']
- } for fmt in video['mtm'] if fmt['transcoding_status'] == 'processed'
- ]
+ formats = []
+
+ for media in video['media'].get('video', []) + video['media'].get('audio', []):
+ if media['transcoding_status'] != 'processed':
+ continue
+ fmt = {
+ 'url': 'http://video.thestaticvube.com/video/%s/%s.mp4' % (media['media_resolution_id'], public_id),
+ 'abr': int(media['audio_bitrate']),
+ 'format_id': compat_str(media['media_resolution_id']),
+ }
+ vbr = int(media['video_bitrate'])
+ if vbr:
+ fmt.update({
+ 'vbr': vbr,
+ 'height': int(media['height']),
+ })
+ formats.append(fmt)
self._sort_formats(formats)
title = video['title']
description = video.get('description')
- thumbnail = self._proto_relative_url(
- video.get('thumbnail') or video.get('thumbnail_src'),
- scheme='http:')
- uploader = data.get('user', {}).get('channel', {}).get('name') or video.get('user_alias')
- uploader_id = data.get('user', {}).get('name')
+ thumbnail = self._proto_relative_url(video.get('thumbnail_src'), scheme='http:')
+ uploader = video.get('user_alias') or video.get('channel')
timestamp = int_or_none(video.get('upload_time'))
duration = video['duration']
view_count = video.get('raw_view_count')
- like_count = video.get('rlikes')
- if like_count is None:
- like_count = video.get('total_likes')
- dislike_count = video.get('rhates')
- if dislike_count is None:
- dislike_count = video.get('total_hates')
+ like_count = video.get('total_likes')
+ dislike_count = video.get('total_hates')
comments = video.get('comments')
comment_count = None
@@ -124,6 +123,8 @@ class VubeIE(InfoExtractor):
else:
comment_count = len(comments)
+ categories = [tag['text'] for tag in video['tags']]
+
return {
'id': video_id,
'formats': formats,
@@ -131,11 +132,11 @@ class VubeIE(InfoExtractor):
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
- 'uploader_id': uploader_id,
'timestamp': timestamp,
'duration': duration,
'view_count': view_count,
'like_count': like_count,
'dislike_count': dislike_count,
'comment_count': comment_count,
+ 'categories': categories,
}
diff --git a/youtube_dl/extractor/xboxclips.py b/youtube_dl/extractor/xboxclips.py
new file mode 100644
index 000000000..a9aa72e73
--- /dev/null
+++ b/youtube_dl/extractor/xboxclips.py
@@ -0,0 +1,57 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_iso8601,
+ float_or_none,
+ int_or_none,
+)
+
+
+class XboxClipsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?xboxclips\.com/video\.php\?.*vid=(?P<id>[\w-]{36})'
+ _TEST = {
+ 'url': 'https://xboxclips.com/video.php?uid=2533274823424419&gamertag=Iabdulelah&vid=074a69a9-5faf-46aa-b93b-9909c1720325',
+ 'md5': 'fbe1ec805e920aeb8eced3c3e657df5d',
+ 'info_dict': {
+ 'id': '074a69a9-5faf-46aa-b93b-9909c1720325',
+ 'ext': 'mp4',
+ 'title': 'Iabdulelah playing Upload Studio',
+ 'filesize_approx': 28101836.8,
+ 'timestamp': 1407388500,
+ 'upload_date': '20140807',
+ 'duration': 56,
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+
+ video_url = self._html_search_regex(
+ r'>Link: <a href="([^"]+)">', webpage, 'video URL')
+ title = self._html_search_regex(
+ r'<title>XboxClips \| ([^<]+)</title>', webpage, 'title')
+ timestamp = parse_iso8601(self._html_search_regex(
+ r'>Recorded: ([^<]+)<', webpage, 'upload date', fatal=False))
+ filesize = float_or_none(self._html_search_regex(
+ r'>Size: ([\d\.]+)MB<', webpage, 'file size', fatal=False), invscale=1024 * 1024)
+ duration = int_or_none(self._html_search_regex(
+ r'>Duration: (\d+) Seconds<', webpage, 'duration', fatal=False))
+ view_count = int_or_none(self._html_search_regex(
+ r'>Views: (\d+)<', webpage, 'view count', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'timestamp': timestamp,
+ 'filesize_approx': filesize,
+ 'duration': duration,
+ 'view_count': view_count,
+ }
diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py
index d84be2562..0e3b33b16 100644
--- a/youtube_dl/extractor/yahoo.py
+++ b/youtube_dl/extractor/yahoo.py
@@ -15,7 +15,7 @@ from ..utils import (
class YahooIE(InfoExtractor):
IE_DESC = 'Yahoo screen and movies'
- _VALID_URL = r'https?://(?:screen|movies)\.yahoo\.com/.*?-(?P<id>[0-9]+)(?:-[a-z]+)?\.html'
+ _VALID_URL = r'(?P<url>https?://(?:screen|movies)\.yahoo\.com/.*?-(?P<id>[0-9]+)(?:-[a-z]+)?\.html)'
_TESTS = [
{
'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
@@ -46,12 +46,23 @@ class YahooIE(InfoExtractor):
'title': 'The World Loves Spider-Man',
'description': '''People all over the world are celebrating the release of \"The Amazing Spider-Man 2.\" We're taking a look at the enthusiastic response Spider-Man has received from viewers all over the world.''',
}
- }
+ },
+ {
+ 'url': 'https://screen.yahoo.com/community/community-sizzle-reel-203225340.html?format=embed',
+ 'md5': '60e8ac193d8fb71997caa8fce54c6460',
+ 'info_dict': {
+ 'id': '4fe78544-8d48-39d8-97cd-13f205d9fcdb',
+ 'ext': 'mp4',
+ 'title': "Yahoo Saves 'Community'",
+ 'description': 'md5:4d4145af2fd3de00cbb6c1d664105053',
+ }
+ },
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
+ url = mobj.group('url')
webpage = self._download_webpage(url, video_id)
items_json = self._search_regex(
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 73a01107d..225e2b7f4 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -225,7 +225,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
# Dash webm audio
- '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 48, 'preference': -50},
+ '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
'172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
# RTMP (unnamed)
@@ -344,7 +344,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
"""Indicate the download will use the RTMP protocol."""
self.to_screen(u'RTMP download detected')
- def _extract_signature_function(self, video_id, player_url, slen):
+ def _signature_cache_id(self, example_sig):
+ """ Return a string representation of a signature """
+ return u'.'.join(compat_str(len(part)) for part in example_sig.split('.'))
+
+ def _extract_signature_function(self, video_id, player_url, example_sig):
id_m = re.match(
r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.(?P<ext>[a-z]+)$',
player_url)
@@ -354,7 +358,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
player_id = id_m.group('id')
# Read from filesystem cache
- func_id = '%s_%s_%d' % (player_type, player_id, slen)
+ func_id = '%s_%s_%s' % (
+ player_type, player_id, self._signature_cache_id(example_sig))
assert os.path.basename(func_id) == func_id
cache_dir = get_cachedir(self._downloader.params)
@@ -369,6 +374,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
return lambda s: u''.join(s[i] for i in cache_spec)
except IOError:
pass # No cache available
+ except ValueError:
+ try:
+ file_size = os.path.getsize(cache_fn)
+ except (OSError, IOError) as oe:
+ file_size = str(oe)
+ self._downloader.report_warning(
+ u'Cache %s failed (%s)' % (cache_fn, file_size))
if player_type == 'js':
code = self._download_webpage(
@@ -388,7 +400,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
if cache_enabled:
try:
- test_string = u''.join(map(compat_chr, range(slen)))
+ test_string = u''.join(map(compat_chr, range(len(example_sig))))
cache_res = res(test_string)
cache_spec = [ord(c) for c in cache_res]
try:
@@ -404,7 +416,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
return res
- def _print_sig_code(self, func, slen):
+ def _print_sig_code(self, func, example_sig):
def gen_sig_code(idxs):
def _genslice(start, end, step):
starts = u'' if start == 0 else str(start)
@@ -433,11 +445,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
else:
yield _genslice(start, i, step)
- test_string = u''.join(map(compat_chr, range(slen)))
+ test_string = u''.join(map(compat_chr, range(len(example_sig))))
cache_res = func(test_string)
cache_spec = [ord(c) for c in cache_res]
expr_code = u' + '.join(gen_sig_code(cache_spec))
- code = u'if len(s) == %d:\n return %s\n' % (slen, expr_code)
+ signature_id_tuple = '(%s)' % (
+ ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
+ code = (u'if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
+ u' return %s\n') % (signature_id_tuple, expr_code)
self.to_screen(u'Extracted signature function:\n' + code)
def _parse_sig_js(self, jscode):
@@ -465,20 +480,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
if player_url.startswith(u'//'):
player_url = u'https:' + player_url
try:
- player_id = (player_url, len(s))
+ player_id = (player_url, self._signature_cache_id(s))
if player_id not in self._player_cache:
func = self._extract_signature_function(
- video_id, player_url, len(s)
+ video_id, player_url, s
)
self._player_cache[player_id] = func
func = self._player_cache[player_id]
if self._downloader.params.get('youtube_print_sig_code'):
- self._print_sig_code(func, len(s))
+ self._print_sig_code(func, s)
return func(s)
except Exception as e:
tb = traceback.format_exc()
raise ExtractorError(
- u'Automatic signature extraction failed: ' + tb, cause=e)
+ u'Signature extraction failed: ' + tb, cause=e)
def _get_available_subtitles(self, video_id, webpage):
try:
@@ -613,7 +628,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
data = compat_urllib_parse.urlencode({
'video_id': video_id,
'eurl': 'https://youtube.googleapis.com/v/' + video_id,
- 'sts':'16268',
+ 'sts': self._search_regex(
+ r'"sts"\s*:\s*(\d+)', video_webpage, 'sts'),
})
video_info_url = proto + '://www.youtube.com/get_video_info?' + data
video_info_webpage = self._download_webpage(video_info_url, video_id,
@@ -807,50 +823,54 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
url_map = {}
for url_data_str in encoded_url_map.split(','):
url_data = compat_parse_qs(url_data_str)
- if 'itag' in url_data and 'url' in url_data:
- url = url_data['url'][0]
- if 'sig' in url_data:
- url += '&signature=' + url_data['sig'][0]
- elif 's' in url_data:
- encrypted_sig = url_data['s'][0]
-
- if not age_gate:
- jsplayer_url_json = self._search_regex(
- r'"assets":.+?"js":\s*("[^"]+")',
- video_webpage, u'JS player URL')
- player_url = json.loads(jsplayer_url_json)
+ if 'itag' not in url_data or 'url' not in url_data:
+ continue
+ format_id = url_data['itag'][0]
+ url = url_data['url'][0]
+
+ if 'sig' in url_data:
+ url += '&signature=' + url_data['sig'][0]
+ elif 's' in url_data:
+ encrypted_sig = url_data['s'][0]
+
+ if not age_gate:
+ jsplayer_url_json = self._search_regex(
+ r'"assets":.+?"js":\s*("[^"]+")',
+ video_webpage, u'JS player URL')
+ player_url = json.loads(jsplayer_url_json)
+ if player_url is None:
+ player_url_json = self._search_regex(
+ r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
+ video_webpage, u'age gate player URL')
+ player_url = json.loads(player_url_json)
+
+ if self._downloader.params.get('verbose'):
if player_url is None:
- player_url_json = self._search_regex(
- r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
- video_webpage, u'age gate player URL')
- player_url = json.loads(player_url_json)
-
- if self._downloader.params.get('verbose'):
- if player_url is None:
- player_version = 'unknown'
- player_desc = 'unknown'
+ player_version = 'unknown'
+ player_desc = 'unknown'
+ else:
+ if player_url.endswith('swf'):
+ player_version = self._search_regex(
+ r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
+ u'flash player', fatal=False)
+ player_desc = 'flash player %s' % player_version
else:
- if player_url.endswith('swf'):
- player_version = self._search_regex(
- r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
- u'flash player', fatal=False)
- player_desc = 'flash player %s' % player_version
- else:
- player_version = self._search_regex(
- r'html5player-(.+?)\.js', video_webpage,
- 'html5 player', fatal=False)
- player_desc = u'html5 player %s' % player_version
-
- parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
- self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
- (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
-
- signature = self._decrypt_signature(
- encrypted_sig, video_id, player_url, age_gate)
- url += '&signature=' + signature
- if 'ratebypass' not in url:
- url += '&ratebypass=yes'
- url_map[url_data['itag'][0]] = url
+ player_version = self._search_regex(
+ r'html5player-([^/]+?)(?:/html5player)?\.js',
+ player_url,
+ 'html5 player', fatal=False)
+ player_desc = u'html5 player %s' % player_version
+
+ parts_sizes = self._signature_cache_id(encrypted_sig)
+ self.to_screen(u'{%s} signature length %s, %s' %
+ (format_id, parts_sizes, player_desc))
+
+ signature = self._decrypt_signature(
+ encrypted_sig, video_id, player_url, age_gate)
+ url += '&signature=' + signature
+ if 'ratebypass' not in url:
+ url += '&ratebypass=yes'
+ url_map[format_id] = url
formats = _map_to_format_list(url_map)
elif video_info.get('hlsvp'):
manifest_url = video_info['hlsvp'][0]
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 13ad5ba1a..c40cd376d 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -1,5 +1,6 @@
from __future__ import unicode_literals
+import json
import re
from .utils import (
@@ -40,8 +41,9 @@ class JSInterpreter(object):
assign = lambda v: v
expr = stmt[len('return '):]
else:
- raise ExtractorError(
- 'Cannot determine left side of statement in %r' % stmt)
+ # Try interpreting it as an expression
+ expr = stmt
+ assign = lambda v: v
v = self.interpret_expression(expr, local_vars, allow_recursion)
return assign(v)
@@ -53,35 +55,63 @@ class JSInterpreter(object):
if expr.isalpha():
return local_vars[expr]
- m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
+ try:
+ return json.loads(expr)
+ except ValueError:
+ pass
+
+ m = re.match(
+ r'^(?P<var>[a-zA-Z0-9_]+)\.(?P<member>[^(]+)(?:\(+(?P<args>[^()]*)\))?$',
+ expr)
if m:
+ variable = m.group('var')
member = m.group('member')
- variable = m.group('in')
+ arg_str = m.group('args')
- if variable not in local_vars:
+ if variable in local_vars:
+ obj = local_vars[variable]
+ else:
if variable not in self._objects:
self._objects[variable] = self.extract_object(variable)
obj = self._objects[variable]
- key, args = member.split('(', 1)
- args = args.strip(')')
- argvals = [int(v) if v.isdigit() else local_vars[v]
- for v in args.split(',')]
- return obj[key](argvals)
-
- val = local_vars[variable]
- if member == 'split("")':
- return list(val)
- if member == 'join("")':
- return ''.join(val)
- if member == 'length':
- return len(val)
- if member == 'reverse()':
- return val[::-1]
- slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
- if slice_m:
- idx = self.interpret_expression(
- slice_m.group('idx'), local_vars, allow_recursion - 1)
- return val[idx:]
+
+ if arg_str is None:
+ # Member access
+ if member == 'length':
+ return len(obj)
+ return obj[member]
+
+ assert expr.endswith(')')
+ # Function call
+ if arg_str == '':
+ argvals = tuple()
+ else:
+ argvals = tuple([
+ self.interpret_expression(v, local_vars, allow_recursion)
+ for v in arg_str.split(',')])
+
+ if member == 'split':
+ assert argvals == ('',)
+ return list(obj)
+ if member == 'join':
+ assert len(argvals) == 1
+ return argvals[0].join(obj)
+ if member == 'reverse':
+ assert len(argvals) == 0
+ obj.reverse()
+ return obj
+ if member == 'slice':
+ assert len(argvals) == 1
+ return obj[argvals[0]:]
+ if member == 'splice':
+ assert isinstance(obj, list)
+ index, howMany = argvals
+ res = []
+ for i in range(index, min(index + howMany, len(obj))):
+ res.append(obj.pop(index))
+ return res
+
+ return obj[member](argvals)
m = re.match(
r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
@@ -103,10 +133,11 @@ class JSInterpreter(object):
r'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr)
if m:
fname = m.group('func')
+ argvals = tuple([
+ int(v) if v.isdigit() else local_vars[v]
+ for v in m.group('args').split(',')])
if fname not in self._functions:
self._functions[fname] = self.extract_function(fname)
- argvals = [int(v) if v.isdigit() else local_vars[v]
- for v in m.group('args').split(',')]
return self._functions[fname](argvals)
raise ExtractorError('Unsupported JS expression %r' % expr)
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 3ecd798d7..42ad520f9 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -24,6 +24,7 @@ import socket
import struct
import subprocess
import sys
+import tempfile
import traceback
import xml.etree.ElementTree
import zlib
@@ -228,22 +229,46 @@ else:
assert type(s) == type(u'')
print(s)
-# In Python 2.x, json.dump expects a bytestream.
-# In Python 3.x, it writes to a character stream
-if sys.version_info < (3,0):
- def write_json_file(obj, fn):
- with open(fn, 'wb') as f:
- json.dump(obj, f)
-else:
- def write_json_file(obj, fn):
- with open(fn, 'w', encoding='utf-8') as f:
- json.dump(obj, f)
-if sys.version_info >= (2,7):
+def write_json_file(obj, fn):
+ """ Encode obj as JSON and write it to fn, atomically """
+
+ args = {
+ 'suffix': '.tmp',
+ 'prefix': os.path.basename(fn) + '.',
+ 'dir': os.path.dirname(fn),
+ 'delete': False,
+ }
+
+ # In Python 2.x, json.dump expects a bytestream.
+ # In Python 3.x, it writes to a character stream
+ if sys.version_info < (3, 0):
+ args['mode'] = 'wb'
+ else:
+ args.update({
+ 'mode': 'w',
+ 'encoding': 'utf-8',
+ })
+
+ tf = tempfile.NamedTemporaryFile(**args)
+
+ try:
+ with tf:
+ json.dump(obj, tf)
+ os.rename(tf.name, fn)
+ except:
+ try:
+ os.remove(tf.name)
+ except OSError:
+ pass
+ raise
+
+
+if sys.version_info >= (2, 7):
def find_xpath_attr(node, xpath, key, val):
""" Find the xpath xpath[@key=val] """
- assert re.match(r'^[a-zA-Z]+$', key)
- assert re.match(r'^[a-zA-Z0-9@\s:._]*$', val)
+ assert re.match(r'^[a-zA-Z-]+$', key)
+ assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
expr = xpath + u"[@%s='%s']" % (key, val)
return node.find(expr)
else:
@@ -827,6 +852,7 @@ def unified_strdate(date_str):
'%b %dnd %Y %I:%M%p',
'%b %dth %Y %I:%M%p',
'%Y-%m-%d',
+ '%Y/%m/%d',
'%d.%m.%Y',
'%d/%m/%Y',
'%Y/%m/%d %H:%M:%S',
@@ -852,6 +878,8 @@ def unified_strdate(date_str):
return upload_date
def determine_ext(url, default_ext=u'unknown_video'):
+ if url is None:
+ return default_ext
guess = url.partition(u'?')[0].rpartition(u'.')[2]
if re.match(r'^[A-Za-z0-9]+$', guess):
return guess
@@ -1271,9 +1299,15 @@ def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
if get_attr:
if v is not None:
v = getattr(v, get_attr, None)
+ if v == '':
+ v = None
return default if v is None else (int(v) * invscale // scale)
+def str_or_none(v, default=None):
+ return default if v is None else compat_str(v)
+
+
def str_to_int(int_str):
if int_str is None:
return None
@@ -1440,6 +1474,34 @@ def strip_jsonp(code):
return re.sub(r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?\s*$', r'\1', code)
+def js_to_json(code):
+ def fix_kv(m):
+ key = m.group(2)
+ if key.startswith("'"):
+ assert key.endswith("'")
+ assert '"' not in key
+ key = '"%s"' % key[1:-1]
+ elif not key.startswith('"'):
+ key = '"%s"' % key
+
+ value = m.group(4)
+ if value.startswith("'"):
+ assert value.endswith("'")
+ assert '"' not in value
+ value = '"%s"' % value[1:-1]
+
+ return m.group(1) + key + m.group(3) + value
+
+ res = re.sub(r'''(?x)
+ ([{,]\s*)
+ ("[^"]*"|\'[^\']*\'|[a-z0-9A-Z]+)
+ (:\s*)
+ ([0-9.]+|true|false|"[^"]*"|\'[^\']*\'|\[|\{)
+ ''', fix_kv, code)
+ res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
+ return res
+
+
def qualities(quality_ids):
""" Get a numeric quality value out of a list of possible values """
def q(qid):
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index dca400d5e..15b9d6c61 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
-__version__ = '2014.07.23.2'
+__version__ = '2014.08.21.3'