aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.md20
-rw-r--r--docs/supportedsites.md10
-rw-r--r--test/test_utils.py38
-rw-r--r--youtube_dl/downloader/__init__.py6
-rw-r--r--youtube_dl/downloader/common.py13
-rw-r--r--youtube_dl/downloader/external.py16
-rw-r--r--youtube_dl/downloader/rtmp.py21
-rw-r--r--youtube_dl/downloader/rtsp.py (renamed from youtube_dl/downloader/mplayer.py)20
-rw-r--r--youtube_dl/extractor/__init__.py2
-rw-r--r--youtube_dl/extractor/cspan.py11
-rw-r--r--youtube_dl/extractor/instagram.py10
-rw-r--r--youtube_dl/extractor/mtv.py7
-rw-r--r--youtube_dl/extractor/orf.py14
-rw-r--r--youtube_dl/extractor/philharmoniedeparis.py78
-rw-r--r--youtube_dl/extractor/southpark.py16
-rw-r--r--youtube_dl/postprocessor/embedthumbnail.py3
-rw-r--r--youtube_dl/postprocessor/ffmpeg.py25
-rw-r--r--youtube_dl/utils.py102
-rw-r--r--youtube_dl/version.py2
19 files changed, 332 insertions, 82 deletions
diff --git a/README.md b/README.md
index 948e0a4b9..e1f30ca47 100644
--- a/README.md
+++ b/README.md
@@ -269,9 +269,9 @@ The simplest case is requesting a specific format, for example `-f 22`. You can
If you want to download multiple videos and they don't have the same formats available, you can specify the order of preference using slashes, as in `-f 22/17/18`. You can also filter the video results by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`). This works for filesize, height, width, tbr, abr, vbr, asr, and fps and the comparisons <, <=, >, >=, =, != and for ext, acodec, vcodec, container, and protocol and the comparisons =, != . Formats for which the value is not known are excluded unless you put a question mark (?) after the operator. You can combine format filters, so `-f "[height <=? 720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. Use commas to download multiple formats, such as `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`. You can merge the video and audio of two formats into a single file using `-f <video-format>+<audio-format>` (requires ffmpeg or avconv), for example `-f bestvideo+bestaudio`.
-Since the end of April 2015 youtube-dl uses `-f bestvideo+bestaudio/best` as default format selection (see #5447, #5456). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some dash formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file.
+Since the end of April 2015 and version 2015.04.26 youtube-dl uses `-f bestvideo+bestaudio/best` as default format selection (see #5447, #5456). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some dash formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file.
-If you want to preserve the old format selection behavior (pre-April 2015), i.e. you want to download best available quality media served as a single file, you should explicitly specify your choice with `-f best`. You may want to add it to the [configuration file](#configuration) in order not to type it every time you run youtube-dl.
+If you want to preserve the old format selection behavior (prior to youtube-dl 2015.04.26), i.e. you want to download best available quality media served as a single file, you should explicitly specify your choice with `-f best`. You may want to add it to the [configuration file](#configuration) in order not to type it every time you run youtube-dl.
# VIDEO SELECTION
@@ -357,6 +357,22 @@ YouTube has switched to a new video info format in July 2011 which is not suppor
YouTube requires an additional signature since September 2012 which is not supported by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl.
+### Video URL contains an ampersand and I'm getting some strange output `[1] 2839` or `'v' is not recognized as an internal or external command` ###
+
+That's actually the output from your shell. Since ampersand is one of the special shell characters it's interpreted by shell preventing you from passing the whole URL to youtube-dl. To disable your shell from interpreting the ampersands (or any other special characters) you have to either put the whole URL in quotes or escape them with a backslash (which approach will work depends on your shell).
+
+For example if your URL is https://www.youtube.com/watch?t=4&v=BaW_jenozKc you should end up with following command:
+
+```youtube-dl 'https://www.youtube.com/watch?t=4&v=BaW_jenozKc'```
+
+or
+
+```youtube-dl https://www.youtube.com/watch?t=4\&v=BaW_jenozKc```
+
+For Windows you have to use the double quotes:
+
+```youtube-dl "https://www.youtube.com/watch?t=4&v=BaW_jenozKc"```
+
### ExtractorError: Could not find JS function u'OF'
In February 2015, the new YouTube player contained a character sequence in a string that was misinterpreted by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl.
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index 80e86c1b6..cc1564b7b 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -174,6 +174,7 @@
- **Gazeta**
- **GDCVault**
- **generic**: Generic downloader that works on some sites
+ - **Gfycat**
- **GiantBomb**
- **Giga**
- **Glide**: Glide mobile video messages (glide.me)
@@ -181,7 +182,7 @@
- **GodTube**
- **GoldenMoustache**
- **Golem**
- - **GorillaVid**: GorillaVid.in, daclips.in, movpod.in and fastvideo.in
+ - **GorillaVid**: GorillaVid.in, daclips.in, movpod.in, fastvideo.in and realvid.net
- **Goshgay**
- **Grooveshark**
- **Groupon**
@@ -252,6 +253,7 @@
- **Malemotion**
- **MDR**
- **media.ccc.de**
+ - **MegaVideoz**
- **metacafe**
- **Metacritic**
- **Mgoon**
@@ -405,6 +407,7 @@
- **Screencast**
- **ScreencastOMatic**
- **ScreenwaveMedia**
+ - **SenateISVP**
- **ServingSys**
- **Sexu**
- **SexyKarma**: Sexy Karma and Watch Indian Porn
@@ -427,6 +430,7 @@
- **soundgasm**
- **soundgasm:profile**
- **southpark.cc.com**
+ - **southpark.cc.com:español**
- **southpark.de**
- **Space**
- **SpankBang**
@@ -461,7 +465,7 @@
- **TeamFour**
- **TechTalks**
- **techtv.mit.edu**
- - **TED**
+ - **ted**
- **tegenlicht.vpro.nl**
- **TeleBruxelles**
- **telecinco.es**
@@ -551,7 +555,7 @@
- **vimeo:review**: Review pages on vimeo
- **vimeo:user**
- **vimeo:watchlater**: Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)
- - **Vimple**: Vimple.ru
+ - **Vimple**: Vimple - one-click video hosting
- **Vine**
- **vine:user**
- **vk.com**
diff --git a/test/test_utils.py b/test/test_utils.py
index 2e3a6480c..17017a8c0 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -58,6 +58,8 @@ from youtube_dl.utils import (
xpath_text,
render_table,
match_str,
+ parse_dfxp_time_expr,
+ dfxp2srt,
)
@@ -581,6 +583,42 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
'like_count > 100 & dislike_count <? 50 & description',
{'like_count': 190, 'dislike_count': 10}))
+ def test_parse_dfxp_time_expr(self):
+ self.assertEqual(parse_dfxp_time_expr(None), 0.0)
+ self.assertEqual(parse_dfxp_time_expr(''), 0.0)
+ self.assertEqual(parse_dfxp_time_expr('0.1'), 0.1)
+ self.assertEqual(parse_dfxp_time_expr('0.1s'), 0.1)
+ self.assertEqual(parse_dfxp_time_expr('00:00:01'), 1.0)
+ self.assertEqual(parse_dfxp_time_expr('00:00:01.100'), 1.1)
+
+ def test_dfxp2srt(self):
+ dfxp_data = '''<?xml version="1.0" encoding="UTF-8"?>
+ <tt xmlns="http://www.w3.org/ns/ttml" xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter">
+ <body>
+ <div xml:lang="en">
+ <p begin="0" end="1">The following line contains Chinese characters and special symbols</p>
+ <p begin="1" end="2">第二行<br/>♪♪</p>
+ <p begin="2" end="3"><span>Third<br/>Line</span></p>
+ </div>
+ </body>
+ </tt>'''
+ srt_data = '''1
+00:00:00,000 --> 00:00:01,000
+The following line contains Chinese characters and special symbols
+
+2
+00:00:01,000 --> 00:00:02,000
+第二行
+♪♪
+
+3
+00:00:02,000 --> 00:00:03,000
+Third
+Line
+
+'''
+ self.assertEqual(dfxp2srt(dfxp_data), srt_data)
+
if __name__ == '__main__':
unittest.main()
diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py
index 9fb66e2f7..f110830c4 100644
--- a/youtube_dl/downloader/__init__.py
+++ b/youtube_dl/downloader/__init__.py
@@ -6,7 +6,7 @@ from .f4m import F4mFD
from .hls import HlsFD
from .hls import NativeHlsFD
from .http import HttpFD
-from .mplayer import MplayerFD
+from .rtsp import RtspFD
from .rtmp import RtmpFD
from ..utils import (
@@ -17,8 +17,8 @@ PROTOCOL_MAP = {
'rtmp': RtmpFD,
'm3u8_native': NativeHlsFD,
'm3u8': HlsFD,
- 'mms': MplayerFD,
- 'rtsp': MplayerFD,
+ 'mms': RtspFD,
+ 'rtsp': RtspFD,
'f4m': F4mFD,
}
diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py
index a0fc5ead0..97e755d4b 100644
--- a/youtube_dl/downloader/common.py
+++ b/youtube_dl/downloader/common.py
@@ -8,6 +8,7 @@ import time
from ..compat import compat_str
from ..utils import (
encodeFilename,
+ decodeArgument,
format_bytes,
timeconvert,
)
@@ -353,19 +354,15 @@ class FileDownloader(object):
# this interface
self._progress_hooks.append(ph)
- def _debug_cmd(self, args, subprocess_encoding, exe=None):
+ def _debug_cmd(self, args, exe=None):
if not self.params.get('verbose', False):
return
+ str_args = [decodeArgument(a) for a in args]
+
if exe is None:
- exe = os.path.basename(args[0])
+ exe = os.path.basename(str_args[0])
- if subprocess_encoding:
- str_args = [
- a.decode(subprocess_encoding) if isinstance(a, bytes) else a
- for a in args]
- else:
- str_args = args
try:
import pipes
shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py
index 1673b2382..7ca2d3143 100644
--- a/youtube_dl/downloader/external.py
+++ b/youtube_dl/downloader/external.py
@@ -2,11 +2,11 @@ from __future__ import unicode_literals
import os.path
import subprocess
-import sys
from .common import FileDownloader
from ..utils import (
encodeFilename,
+ encodeArgument,
)
@@ -60,17 +60,9 @@ class ExternalFD(FileDownloader):
def _call_downloader(self, tmpfilename, info_dict):
""" Either overwrite this or implement _make_cmd """
- cmd = self._make_cmd(tmpfilename, info_dict)
-
- if sys.platform == 'win32' and sys.version_info < (3, 0):
- # Windows subprocess module does not actually support Unicode
- # on Python 2.x
- # See http://stackoverflow.com/a/9951851/35070
- subprocess_encoding = sys.getfilesystemencoding()
- cmd = [a.encode(subprocess_encoding, 'ignore') for a in cmd]
- else:
- subprocess_encoding = None
- self._debug_cmd(cmd, subprocess_encoding)
+ cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
+
+ self._debug_cmd(cmd)
p = subprocess.Popen(
cmd, stderr=subprocess.PIPE)
diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py
index ddf5724ae..6865b5e2f 100644
--- a/youtube_dl/downloader/rtmp.py
+++ b/youtube_dl/downloader/rtmp.py
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
import os
import re
import subprocess
-import sys
import time
from .common import FileDownloader
@@ -11,6 +10,7 @@ from ..compat import compat_str
from ..utils import (
check_executable,
encodeFilename,
+ encodeArgument,
get_exe_version,
)
@@ -121,7 +121,7 @@ class RtmpFD(FileDownloader):
# possible. This is part of rtmpdump's normal usage, AFAIK.
basic_args = [
'rtmpdump', '--verbose', '-r', url,
- '-o', encodeFilename(tmpfilename, True)]
+ '-o', tmpfilename]
if player_url is not None:
basic_args += ['--swfVfy', player_url]
if page_url is not None:
@@ -154,16 +154,9 @@ class RtmpFD(FileDownloader):
if not live and continue_dl:
args += ['--skip', '1']
- if sys.platform == 'win32' and sys.version_info < (3, 0):
- # Windows subprocess module does not actually support Unicode
- # on Python 2.x
- # See http://stackoverflow.com/a/9951851/35070
- subprocess_encoding = sys.getfilesystemencoding()
- args = [a.encode(subprocess_encoding, 'ignore') for a in args]
- else:
- subprocess_encoding = None
+ args = [encodeArgument(a) for a in args]
- self._debug_cmd(args, subprocess_encoding, exe='rtmpdump')
+ self._debug_cmd(args, exe='rtmpdump')
RD_SUCCESS = 0
RD_FAILED = 1
@@ -180,7 +173,11 @@ class RtmpFD(FileDownloader):
prevsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen('[rtmpdump] %s bytes' % prevsize)
time.sleep(5.0) # This seems to be needed
- retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == RD_FAILED])
+ args = basic_args + ['--resume']
+ if retval == RD_FAILED:
+ args += ['--skip', '1']
+ args = [encodeArgument(a) for a in args]
+ retval = run_rtmpdump(args)
cursize = os.path.getsize(encodeFilename(tmpfilename))
if prevsize == cursize and retval == RD_FAILED:
break
diff --git a/youtube_dl/downloader/mplayer.py b/youtube_dl/downloader/rtsp.py
index 72cef30ea..3eb29526c 100644
--- a/youtube_dl/downloader/mplayer.py
+++ b/youtube_dl/downloader/rtsp.py
@@ -10,21 +10,23 @@ from ..utils import (
)
-class MplayerFD(FileDownloader):
+class RtspFD(FileDownloader):
def real_download(self, filename, info_dict):
url = info_dict['url']
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
- args = [
- 'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy',
- '-dumpstream', '-dumpfile', tmpfilename, url]
- # Check for mplayer first
- if not check_executable('mplayer', ['-h']):
- self.report_error('MMS or RTSP download detected but "%s" could not be run' % args[0])
+ if check_executable('mplayer', ['-h']):
+ args = [
+ 'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy',
+ '-dumpstream', '-dumpfile', tmpfilename, url]
+ elif check_executable('mpv', ['-h']):
+ args = [
+ 'mpv', '-really-quiet', '--vo=null', '--stream-dump=' + tmpfilename, url]
+ else:
+ self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install any.')
return False
- # Download using mplayer.
retval = subprocess.call(args)
if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename))
@@ -39,5 +41,5 @@ class MplayerFD(FileDownloader):
return True
else:
self.to_stderr('\n')
- self.report_error('mplayer exited with code %d' % retval)
+ self.report_error('%s exited with code %d' % (args[0], retval))
return False
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index a64afa1da..641c45f43 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -377,6 +377,7 @@ from .orf import (
from .parliamentliveuk import ParliamentLiveUKIE
from .patreon import PatreonIE
from .pbs import PBSIE
+from .philharmoniedeparis import PhilharmonieDeParisIE
from .phoenix import PhoenixIE
from .photobucket import PhotobucketIE
from .planetaplay import PlanetaPlayIE
@@ -478,6 +479,7 @@ from .soundgasm import (
)
from .southpark import (
SouthParkIE,
+ SouthParkEsIE,
SouthparkDeIE,
)
from .space import SpaceIE
diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py
index d516b1402..fbefd37d0 100644
--- a/youtube_dl/extractor/cspan.py
+++ b/youtube_dl/extractor/cspan.py
@@ -8,6 +8,7 @@ from ..utils import (
unescapeHTML,
find_xpath_attr,
smuggle_url,
+ determine_ext,
)
from .senateisvp import SenateISVPIE
@@ -87,6 +88,10 @@ class CSpanIE(InfoExtractor):
return self.url_result(surl, 'SenateISVP', video_id, title)
files = data['video']['files']
+ try:
+ capfile = data['video']['capfile']['#text']
+ except KeyError:
+ capfile = None
entries = [{
'id': '%s_%d' % (video_id, partnum + 1),
@@ -97,6 +102,12 @@ class CSpanIE(InfoExtractor):
'description': description,
'thumbnail': thumbnail,
'duration': int_or_none(f.get('length', {}).get('#text')),
+ 'subtitles': {
+ 'en': [{
+ 'url': capfile,
+ 'ext': determine_ext(capfile, 'dfxp')
+ }],
+ } if capfile else None,
} for partnum, f in enumerate(files)]
if len(entries) == 1:
diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py
index b020e2621..65f6ca103 100644
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@@ -3,13 +3,11 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..utils import (
- int_or_none,
-)
+from ..utils import int_or_none
class InstagramIE(InfoExtractor):
- _VALID_URL = r'http://instagram\.com/p/(?P<id>.*?)/'
+ _VALID_URL = r'https?://instagram\.com/p/(?P<id>[\da-zA-Z]+)'
_TEST = {
'url': 'http://instagram.com/p/aye83DjauH/?foo=bar#abc',
'md5': '0d2da106a9d2631273e192b372806516',
@@ -23,8 +21,8 @@ class InstagramIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
+
webpage = self._download_webpage(url, video_id)
uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"',
webpage, 'uploader id', fatal=False)
diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py
index 4430b3416..b48fac5e3 100644
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@@ -25,6 +25,7 @@ def _media_xml_tag(tag):
class MTVServicesInfoExtractor(InfoExtractor):
_MOBILE_TEMPLATE = None
+ _LANG = None
@staticmethod
def _id_from_uri(uri):
@@ -169,8 +170,12 @@ class MTVServicesInfoExtractor(InfoExtractor):
video_id = self._id_from_uri(uri)
feed_url = self._get_feed_url(uri)
data = compat_urllib_parse.urlencode({'uri': uri})
+ info_url = feed_url + '?'
+ if self._LANG:
+ info_url += 'lang=%s&' % self._LANG
+ info_url += data
idoc = self._download_xml(
- feed_url + '?' + data, video_id,
+ info_url, video_id,
'Downloading info', transform_source=fix_xml_ampersands)
return self.playlist_result(
[self._get_video_info(item) for item in idoc.findall('.//item')])
diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py
index ca1a5bb3c..2e6c9872b 100644
--- a/youtube_dl/extractor/orf.py
+++ b/youtube_dl/extractor/orf.py
@@ -210,16 +210,16 @@ class ORFIPTVIE(InfoExtractor):
_VALID_URL = r'http://iptv\.orf\.at/(?:#/)?stories/(?P<id>\d+)'
_TEST = {
- 'url': 'http://iptv.orf.at/stories/2267952',
- 'md5': '26ffa4bab6dbce1eee78bbc7021016cd',
+ 'url': 'http://iptv.orf.at/stories/2275236/',
+ 'md5': 'c8b22af4718a4b4af58342529453e3e5',
'info_dict': {
- 'id': '339775',
+ 'id': '350612',
'ext': 'flv',
- 'title': 'Kreml-Kritiker Nawalny wieder frei',
- 'description': 'md5:6f24e7f546d364dacd0e616a9e409236',
- 'duration': 84.729,
+ 'title': 'Weitere Evakuierungen um Vulkan Calbuco',
+ 'description': 'md5:d689c959bdbcf04efeddedbf2299d633',
+ 'duration': 68.197,
'thumbnail': 're:^https?://.*\.jpg$',
- 'upload_date': '20150306',
+ 'upload_date': '20150425',
},
}
diff --git a/youtube_dl/extractor/philharmoniedeparis.py b/youtube_dl/extractor/philharmoniedeparis.py
new file mode 100644
index 000000000..6e60e5fe9
--- /dev/null
+++ b/youtube_dl/extractor/philharmoniedeparis.py
@@ -0,0 +1,78 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ int_or_none,
+ parse_iso8601,
+ xpath_text,
+)
+
+
+class PhilharmonieDeParisIE(InfoExtractor):
+ IE_DESC = 'Philharmonie de Paris'
+ _VALID_URL = r'http://live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|misc/Playlist\.ashx\?id=)(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://live.philharmoniedeparis.fr/concert/1032066.html',
+ 'info_dict': {
+ 'id': '1032066',
+ 'ext': 'flv',
+ 'title': 'md5:d1f5585d87d041d07ce9434804bc8425',
+ 'timestamp': 1428179400,
+ 'upload_date': '20150404',
+ 'duration': 6592.278,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'http://live.philharmoniedeparis.fr/Concert/1030324.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=1030324&track=&lang=fr',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ concert = self._download_xml(
+ 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=%s' % video_id,
+ video_id).find('./concert')
+
+ formats = []
+ info_dict = {
+ 'id': video_id,
+ 'title': xpath_text(concert, './titre', 'title', fatal=True),
+ 'formats': formats,
+ }
+
+ fichiers = concert.find('./fichiers')
+ stream = fichiers.attrib['serveurstream']
+ for fichier in fichiers.findall('./fichier'):
+ info_dict['duration'] = float_or_none(fichier.get('timecodefin'))
+ for quality, (format_id, suffix) in enumerate([('lq', ''), ('hq', '_hd')]):
+ format_url = fichier.get('url%s' % suffix)
+ if not format_url:
+ continue
+ formats.append({
+ 'url': stream,
+ 'play_path': format_url,
+ 'ext': 'flv',
+ 'format_id': format_id,
+ 'width': int_or_none(concert.get('largeur%s' % suffix)),
+ 'height': int_or_none(concert.get('hauteur%s' % suffix)),
+ 'quality': quality,
+ })
+ self._sort_formats(formats)
+
+ date, hour = concert.get('date'), concert.get('heure')
+ if date and hour:
+ info_dict['timestamp'] = parse_iso8601(
+ '%s-%s-%sT%s:00' % (date[0:4], date[4:6], date[6:8], hour))
+ elif date:
+ info_dict['upload_date'] = date
+
+ return info_dict
diff --git a/youtube_dl/extractor/southpark.py b/youtube_dl/extractor/southpark.py
index c20397b3d..e3b73295c 100644
--- a/youtube_dl/extractor/southpark.py
+++ b/youtube_dl/extractor/southpark.py
@@ -1,3 +1,4 @@
+# encoding: utf-8
from __future__ import unicode_literals
from .mtv import MTVServicesInfoExtractor
@@ -5,7 +6,7 @@ from .mtv import MTVServicesInfoExtractor
class SouthParkIE(MTVServicesInfoExtractor):
IE_NAME = 'southpark.cc.com'
- _VALID_URL = r'https?://(www\.)?(?P<url>southpark\.cc\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
+ _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/(?:clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
_FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
@@ -20,9 +21,20 @@ class SouthParkIE(MTVServicesInfoExtractor):
}]
+class SouthParkEsIE(SouthParkIE):
+ IE_NAME = 'southpark.cc.com:español'
+ _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/episodios-en-espanol/(?P<id>.+?)(\?|#|$))'
+ _LANG = 'es'
+
+ _TESTS = [{
+ 'url': 'http://southpark.cc.com/episodios-en-espanol/s01e01-cartman-consigue-una-sonda-anal#source=351c1323-0b96-402d-a8b9-40d01b2e9bde&position=1&sort=!airdate',
+ 'playlist_count': 4,
+ }]
+
+
class SouthparkDeIE(SouthParkIE):
IE_NAME = 'southpark.de'
- _VALID_URL = r'https?://(www\.)?(?P<url>southpark\.de/(clips|alle-episoden)/(?P<id>.+?)(\?|#|$))'
+ _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.de/(?:clips|alle-episoden)/(?P<id>.+?)(\?|#|$))'
_FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/'
_TESTS = [{
diff --git a/youtube_dl/postprocessor/embedthumbnail.py b/youtube_dl/postprocessor/embedthumbnail.py
index 7ba98a0ea..4868a42fd 100644
--- a/youtube_dl/postprocessor/embedthumbnail.py
+++ b/youtube_dl/postprocessor/embedthumbnail.py
@@ -11,6 +11,7 @@ from ..compat import (
compat_urlretrieve,
)
from ..utils import (
+ determine_ext,
check_executable,
encodeFilename,
PostProcessingError,
@@ -27,7 +28,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
def run(self, info):
filename = info['filepath']
temp_filename = prepend_extension(filename, 'temp')
- temp_thumbnail = prepend_extension(filename, 'thumb')
+ temp_thumbnail = filename + '.' + determine_ext(info['thumbnail'])
if not info.get('thumbnail'):
raise EmbedThumbnailPPError('Thumbnail was not found. Nothing to do.')
diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py
index 7a952963e..1765f4969 100644
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -20,6 +20,7 @@ from ..utils import (
prepend_extension,
shell_quote,
subtitles_filename,
+ dfxp2srt,
)
@@ -651,6 +652,30 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
'format' % new_ext)
continue
new_file = subtitles_filename(filename, lang, new_ext)
+
+ if ext == 'dfxp' or ext == 'ttml':
+ self._downloader.report_warning(
+ 'You have requested to convert dfxp (TTML) subtitles into another format, '
+ 'which results in style information loss')
+
+ dfxp_file = subtitles_filename(filename, lang, ext)
+ srt_file = subtitles_filename(filename, lang, 'srt')
+
+ with io.open(dfxp_file, 'rt', encoding='utf-8') as f:
+ srt_data = dfxp2srt(f.read())
+
+ with io.open(srt_file, 'wt', encoding='utf-8') as f:
+ f.write(srt_data)
+
+ ext = 'srt'
+ subs[lang] = {
+ 'ext': 'srt',
+ 'data': srt_data
+ }
+
+ if new_ext == 'srt':
+ continue
+
self.run_ffmpeg(
subtitles_filename(filename, lang, ext),
new_file, ['-f', new_format])
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index c69d3e165..7d15eab64 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -371,6 +371,18 @@ def unescapeHTML(s):
r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s)
+def get_subprocess_encoding():
+ if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
+ # For subprocess calls, encode with locale encoding
+ # Refer to http://stackoverflow.com/a/9951851/35070
+ encoding = preferredencoding()
+ else:
+ encoding = sys.getfilesystemencoding()
+ if encoding is None:
+ encoding = 'utf-8'
+ return encoding
+
+
def encodeFilename(s, for_subprocess=False):
"""
@param s The name of the file
@@ -382,21 +394,24 @@ def encodeFilename(s, for_subprocess=False):
if sys.version_info >= (3, 0):
return s
- if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
- # Pass '' directly to use Unicode APIs on Windows 2000 and up
- # (Detecting Windows NT 4 is tricky because 'major >= 4' would
- # match Windows 9x series as well. Besides, NT 4 is obsolete.)
- if not for_subprocess:
- return s
- else:
- # For subprocess calls, encode with locale encoding
- # Refer to http://stackoverflow.com/a/9951851/35070
- encoding = preferredencoding()
- else:
- encoding = sys.getfilesystemencoding()
- if encoding is None:
- encoding = 'utf-8'
- return s.encode(encoding, 'ignore')
+ # Pass '' directly to use Unicode APIs on Windows 2000 and up
+ # (Detecting Windows NT 4 is tricky because 'major >= 4' would
+ # match Windows 9x series as well. Besides, NT 4 is obsolete.)
+ if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
+ return s
+
+ return s.encode(get_subprocess_encoding(), 'ignore')
+
+
+def decodeFilename(b, for_subprocess=False):
+
+ if sys.version_info >= (3, 0):
+ return b
+
+ if not isinstance(b, bytes):
+ return b
+
+ return b.decode(get_subprocess_encoding(), 'ignore')
def encodeArgument(s):
@@ -408,6 +423,10 @@ def encodeArgument(s):
return encodeFilename(s, True)
+def decodeArgument(b):
+ return decodeFilename(b, True)
+
+
def decodeOption(optval):
if optval is None:
return optval
@@ -1791,6 +1810,59 @@ def match_filter_func(filter_str):
return _match_func
+def parse_dfxp_time_expr(time_expr):
+ if not time_expr:
+ return 0.0
+
+ mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
+ if mobj:
+ return float(mobj.group('time_offset'))
+
+ mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:\.\d+)?)$', time_expr)
+ if mobj:
+ return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3))
+
+
+def format_srt_time(seconds):
+ (mins, secs) = divmod(seconds, 60)
+ (hours, mins) = divmod(mins, 60)
+ millisecs = (secs - int(secs)) * 1000
+ secs = int(secs)
+ return '%02d:%02d:%02d,%03d' % (hours, mins, secs, millisecs)
+
+
+def dfxp2srt(dfxp_data):
+ _x = functools.partial(xpath_with_ns, ns_map={'ttml': 'http://www.w3.org/ns/ttml'})
+
+ def parse_node(node):
+ str_or_empty = functools.partial(str_or_none, default='')
+
+ out = str_or_empty(node.text)
+
+ for child in node:
+ if child.tag == _x('ttml:br'):
+ out += '\n' + str_or_empty(child.tail)
+ elif child.tag == _x('ttml:span'):
+ out += str_or_empty(parse_node(child))
+ else:
+ out += str_or_empty(xml.etree.ElementTree.tostring(child))
+
+ return out
+
+ dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8'))
+ out = []
+ paras = dfxp.findall(_x('.//ttml:p'))
+
+ for para, index in zip(paras, itertools.count(1)):
+ out.append('%d\n%s --> %s\n%s\n\n' % (
+ index,
+ format_srt_time(parse_dfxp_time_expr(para.attrib.get('begin'))),
+ format_srt_time(parse_dfxp_time_expr(para.attrib.get('end'))),
+ parse_node(para)))
+
+ return ''.join(out)
+
+
class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
def __init__(self, proxies=None):
# Set default handlers
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 3fd0e7e56..dc7d666dd 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
-__version__ = '2015.04.17'
+__version__ = '2015.04.26'