diff options
-rw-r--r-- | README.md | 20 | ||||
-rw-r--r-- | docs/supportedsites.md | 10 | ||||
-rw-r--r-- | test/test_utils.py | 38 | ||||
-rw-r--r-- | youtube_dl/downloader/__init__.py | 6 | ||||
-rw-r--r-- | youtube_dl/downloader/common.py | 13 | ||||
-rw-r--r-- | youtube_dl/downloader/external.py | 16 | ||||
-rw-r--r-- | youtube_dl/downloader/rtmp.py | 21 | ||||
-rw-r--r-- | youtube_dl/downloader/rtsp.py (renamed from youtube_dl/downloader/mplayer.py) | 20 | ||||
-rw-r--r-- | youtube_dl/extractor/__init__.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/cspan.py | 11 | ||||
-rw-r--r-- | youtube_dl/extractor/instagram.py | 10 | ||||
-rw-r--r-- | youtube_dl/extractor/mtv.py | 7 | ||||
-rw-r--r-- | youtube_dl/extractor/orf.py | 14 | ||||
-rw-r--r-- | youtube_dl/extractor/philharmoniedeparis.py | 78 | ||||
-rw-r--r-- | youtube_dl/extractor/southpark.py | 16 | ||||
-rw-r--r-- | youtube_dl/postprocessor/embedthumbnail.py | 3 | ||||
-rw-r--r-- | youtube_dl/postprocessor/ffmpeg.py | 25 | ||||
-rw-r--r-- | youtube_dl/utils.py | 102 | ||||
-rw-r--r-- | youtube_dl/version.py | 2 |
19 files changed, 332 insertions, 82 deletions
@@ -269,9 +269,9 @@ The simplest case is requesting a specific format, for example `-f 22`. You can If you want to download multiple videos and they don't have the same formats available, you can specify the order of preference using slashes, as in `-f 22/17/18`. You can also filter the video results by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`). This works for filesize, height, width, tbr, abr, vbr, asr, and fps and the comparisons <, <=, >, >=, =, != and for ext, acodec, vcodec, container, and protocol and the comparisons =, != . Formats for which the value is not known are excluded unless you put a question mark (?) after the operator. You can combine format filters, so `-f "[height <=? 720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. Use commas to download multiple formats, such as `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`. You can merge the video and audio of two formats into a single file using `-f <video-format>+<audio-format>` (requires ffmpeg or avconv), for example `-f bestvideo+bestaudio`. -Since the end of April 2015 youtube-dl uses `-f bestvideo+bestaudio/best` as default format selection (see #5447, #5456). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some dash formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. +Since the end of April 2015 and version 2015.04.26 youtube-dl uses `-f bestvideo+bestaudio/best` as default format selection (see #5447, #5456). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some dash formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. -If you want to preserve the old format selection behavior (pre-April 2015), i.e. you want to download best available quality media served as a single file, you should explicitly specify your choice with `-f best`. You may want to add it to the [configuration file](#configuration) in order not to type it every time you run youtube-dl. +If you want to preserve the old format selection behavior (prior to youtube-dl 2015.04.26), i.e. you want to download best available quality media served as a single file, you should explicitly specify your choice with `-f best`. You may want to add it to the [configuration file](#configuration) in order not to type it every time you run youtube-dl. # VIDEO SELECTION @@ -357,6 +357,22 @@ YouTube has switched to a new video info format in July 2011 which is not suppor YouTube requires an additional signature since September 2012 which is not supported by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl. +### Video URL contains an ampersand and I'm getting some strange output `[1] 2839` or `'v' is not recognized as an internal or external command` ### + +That's actually the output from your shell. Since ampersand is one of the special shell characters it's interpreted by shell preventing you from passing the whole URL to youtube-dl. To disable your shell from interpreting the ampersands (or any other special characters) you have to either put the whole URL in quotes or escape them with a backslash (which approach will work depends on your shell). + +For example if your URL is https://www.youtube.com/watch?t=4&v=BaW_jenozKc you should end up with following command: + +```youtube-dl 'https://www.youtube.com/watch?t=4&v=BaW_jenozKc'``` + +or + +```youtube-dl https://www.youtube.com/watch?t=4\&v=BaW_jenozKc``` + +For Windows you have to use the double quotes: + +```youtube-dl "https://www.youtube.com/watch?t=4&v=BaW_jenozKc"``` + ### ExtractorError: Could not find JS function u'OF' In February 2015, the new YouTube player contained a character sequence in a string that was misinterpreted by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl. diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 80e86c1b6..cc1564b7b 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -174,6 +174,7 @@ - **Gazeta** - **GDCVault** - **generic**: Generic downloader that works on some sites + - **Gfycat** - **GiantBomb** - **Giga** - **Glide**: Glide mobile video messages (glide.me) @@ -181,7 +182,7 @@ - **GodTube** - **GoldenMoustache** - **Golem** - - **GorillaVid**: GorillaVid.in, daclips.in, movpod.in and fastvideo.in + - **GorillaVid**: GorillaVid.in, daclips.in, movpod.in, fastvideo.in and realvid.net - **Goshgay** - **Grooveshark** - **Groupon** @@ -252,6 +253,7 @@ - **Malemotion** - **MDR** - **media.ccc.de** + - **MegaVideoz** - **metacafe** - **Metacritic** - **Mgoon** @@ -405,6 +407,7 @@ - **Screencast** - **ScreencastOMatic** - **ScreenwaveMedia** + - **SenateISVP** - **ServingSys** - **Sexu** - **SexyKarma**: Sexy Karma and Watch Indian Porn @@ -427,6 +430,7 @@ - **soundgasm** - **soundgasm:profile** - **southpark.cc.com** + - **southpark.cc.com:español** - **southpark.de** - **Space** - **SpankBang** @@ -461,7 +465,7 @@ - **TeamFour** - **TechTalks** - **techtv.mit.edu** - - **TED** + - **ted** - **tegenlicht.vpro.nl** - **TeleBruxelles** - **telecinco.es** @@ -551,7 +555,7 @@ - **vimeo:review**: Review pages on vimeo - **vimeo:user** - **vimeo:watchlater**: Vimeo watch later list, "vimeowatchlater" keyword (requires authentication) - - **Vimple**: Vimple.ru + - **Vimple**: Vimple - one-click video hosting - **Vine** - **vine:user** - **vk.com** diff --git a/test/test_utils.py b/test/test_utils.py index 2e3a6480c..17017a8c0 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -58,6 +58,8 @@ from youtube_dl.utils import ( xpath_text, render_table, match_str, + parse_dfxp_time_expr, + dfxp2srt, ) @@ -581,6 +583,42 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4') 'like_count > 100 & dislike_count <? 50 & description', {'like_count': 190, 'dislike_count': 10})) + def test_parse_dfxp_time_expr(self): + self.assertEqual(parse_dfxp_time_expr(None), 0.0) + self.assertEqual(parse_dfxp_time_expr(''), 0.0) + self.assertEqual(parse_dfxp_time_expr('0.1'), 0.1) + self.assertEqual(parse_dfxp_time_expr('0.1s'), 0.1) + self.assertEqual(parse_dfxp_time_expr('00:00:01'), 1.0) + self.assertEqual(parse_dfxp_time_expr('00:00:01.100'), 1.1) + + def test_dfxp2srt(self): + dfxp_data = '''<?xml version="1.0" encoding="UTF-8"?> + <tt xmlns="http://www.w3.org/ns/ttml" xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter"> + <body> + <div xml:lang="en"> + <p begin="0" end="1">The following line contains Chinese characters and special symbols</p> + <p begin="1" end="2">第二行<br/>♪♪</p> + <p begin="2" end="3"><span>Third<br/>Line</span></p> + </div> + </body> + </tt>''' + srt_data = '''1 +00:00:00,000 --> 00:00:01,000 +The following line contains Chinese characters and special symbols + +2 +00:00:01,000 --> 00:00:02,000 +第二行 +♪♪ + +3 +00:00:02,000 --> 00:00:03,000 +Third +Line + +''' + self.assertEqual(dfxp2srt(dfxp_data), srt_data) + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py index 9fb66e2f7..f110830c4 100644 --- a/youtube_dl/downloader/__init__.py +++ b/youtube_dl/downloader/__init__.py @@ -6,7 +6,7 @@ from .f4m import F4mFD from .hls import HlsFD from .hls import NativeHlsFD from .http import HttpFD -from .mplayer import MplayerFD +from .rtsp import RtspFD from .rtmp import RtmpFD from ..utils import ( @@ -17,8 +17,8 @@ PROTOCOL_MAP = { 'rtmp': RtmpFD, 'm3u8_native': NativeHlsFD, 'm3u8': HlsFD, - 'mms': MplayerFD, - 'rtsp': MplayerFD, + 'mms': RtspFD, + 'rtsp': RtspFD, 'f4m': F4mFD, } diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index a0fc5ead0..97e755d4b 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -8,6 +8,7 @@ import time from ..compat import compat_str from ..utils import ( encodeFilename, + decodeArgument, format_bytes, timeconvert, ) @@ -353,19 +354,15 @@ class FileDownloader(object): # this interface self._progress_hooks.append(ph) - def _debug_cmd(self, args, subprocess_encoding, exe=None): + def _debug_cmd(self, args, exe=None): if not self.params.get('verbose', False): return + str_args = [decodeArgument(a) for a in args] + if exe is None: - exe = os.path.basename(args[0]) + exe = os.path.basename(str_args[0]) - if subprocess_encoding: - str_args = [ - a.decode(subprocess_encoding) if isinstance(a, bytes) else a - for a in args] - else: - str_args = args try: import pipes shell_quote = lambda args: ' '.join(map(pipes.quote, str_args)) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 1673b2382..7ca2d3143 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -2,11 +2,11 @@ from __future__ import unicode_literals import os.path import subprocess -import sys from .common import FileDownloader from ..utils import ( encodeFilename, + encodeArgument, ) @@ -60,17 +60,9 @@ class ExternalFD(FileDownloader): def _call_downloader(self, tmpfilename, info_dict): """ Either overwrite this or implement _make_cmd """ - cmd = self._make_cmd(tmpfilename, info_dict) - - if sys.platform == 'win32' and sys.version_info < (3, 0): - # Windows subprocess module does not actually support Unicode - # on Python 2.x - # See http://stackoverflow.com/a/9951851/35070 - subprocess_encoding = sys.getfilesystemencoding() - cmd = [a.encode(subprocess_encoding, 'ignore') for a in cmd] - else: - subprocess_encoding = None - self._debug_cmd(cmd, subprocess_encoding) + cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)] + + self._debug_cmd(cmd) p = subprocess.Popen( cmd, stderr=subprocess.PIPE) diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py index ddf5724ae..6865b5e2f 100644 --- a/youtube_dl/downloader/rtmp.py +++ b/youtube_dl/downloader/rtmp.py @@ -3,7 +3,6 @@ from __future__ import unicode_literals import os import re import subprocess -import sys import time from .common import FileDownloader @@ -11,6 +10,7 @@ from ..compat import compat_str from ..utils import ( check_executable, encodeFilename, + encodeArgument, get_exe_version, ) @@ -121,7 +121,7 @@ class RtmpFD(FileDownloader): # possible. This is part of rtmpdump's normal usage, AFAIK. basic_args = [ 'rtmpdump', '--verbose', '-r', url, - '-o', encodeFilename(tmpfilename, True)] + '-o', tmpfilename] if player_url is not None: basic_args += ['--swfVfy', player_url] if page_url is not None: @@ -154,16 +154,9 @@ class RtmpFD(FileDownloader): if not live and continue_dl: args += ['--skip', '1'] - if sys.platform == 'win32' and sys.version_info < (3, 0): - # Windows subprocess module does not actually support Unicode - # on Python 2.x - # See http://stackoverflow.com/a/9951851/35070 - subprocess_encoding = sys.getfilesystemencoding() - args = [a.encode(subprocess_encoding, 'ignore') for a in args] - else: - subprocess_encoding = None + args = [encodeArgument(a) for a in args] - self._debug_cmd(args, subprocess_encoding, exe='rtmpdump') + self._debug_cmd(args, exe='rtmpdump') RD_SUCCESS = 0 RD_FAILED = 1 @@ -180,7 +173,11 @@ class RtmpFD(FileDownloader): prevsize = os.path.getsize(encodeFilename(tmpfilename)) self.to_screen('[rtmpdump] %s bytes' % prevsize) time.sleep(5.0) # This seems to be needed - retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == RD_FAILED]) + args = basic_args + ['--resume'] + if retval == RD_FAILED: + args += ['--skip', '1'] + args = [encodeArgument(a) for a in args] + retval = run_rtmpdump(args) cursize = os.path.getsize(encodeFilename(tmpfilename)) if prevsize == cursize and retval == RD_FAILED: break diff --git a/youtube_dl/downloader/mplayer.py b/youtube_dl/downloader/rtsp.py index 72cef30ea..3eb29526c 100644 --- a/youtube_dl/downloader/mplayer.py +++ b/youtube_dl/downloader/rtsp.py @@ -10,21 +10,23 @@ from ..utils import ( ) -class MplayerFD(FileDownloader): +class RtspFD(FileDownloader): def real_download(self, filename, info_dict): url = info_dict['url'] self.report_destination(filename) tmpfilename = self.temp_name(filename) - args = [ - 'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', - '-dumpstream', '-dumpfile', tmpfilename, url] - # Check for mplayer first - if not check_executable('mplayer', ['-h']): - self.report_error('MMS or RTSP download detected but "%s" could not be run' % args[0]) + if check_executable('mplayer', ['-h']): + args = [ + 'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', + '-dumpstream', '-dumpfile', tmpfilename, url] + elif check_executable('mpv', ['-h']): + args = [ + 'mpv', '-really-quiet', '--vo=null', '--stream-dump=' + tmpfilename, url] + else: + self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install any.') return False - # Download using mplayer. retval = subprocess.call(args) if retval == 0: fsize = os.path.getsize(encodeFilename(tmpfilename)) @@ -39,5 +41,5 @@ class MplayerFD(FileDownloader): return True else: self.to_stderr('\n') - self.report_error('mplayer exited with code %d' % retval) + self.report_error('%s exited with code %d' % (args[0], retval)) return False diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index a64afa1da..641c45f43 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -377,6 +377,7 @@ from .orf import ( from .parliamentliveuk import ParliamentLiveUKIE from .patreon import PatreonIE from .pbs import PBSIE +from .philharmoniedeparis import PhilharmonieDeParisIE from .phoenix import PhoenixIE from .photobucket import PhotobucketIE from .planetaplay import PlanetaPlayIE @@ -478,6 +479,7 @@ from .soundgasm import ( ) from .southpark import ( SouthParkIE, + SouthParkEsIE, SouthparkDeIE, ) from .space import SpaceIE diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index d516b1402..fbefd37d0 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -8,6 +8,7 @@ from ..utils import ( unescapeHTML, find_xpath_attr, smuggle_url, + determine_ext, ) from .senateisvp import SenateISVPIE @@ -87,6 +88,10 @@ class CSpanIE(InfoExtractor): return self.url_result(surl, 'SenateISVP', video_id, title) files = data['video']['files'] + try: + capfile = data['video']['capfile']['#text'] + except KeyError: + capfile = None entries = [{ 'id': '%s_%d' % (video_id, partnum + 1), @@ -97,6 +102,12 @@ class CSpanIE(InfoExtractor): 'description': description, 'thumbnail': thumbnail, 'duration': int_or_none(f.get('length', {}).get('#text')), + 'subtitles': { + 'en': [{ + 'url': capfile, + 'ext': determine_ext(capfile, 'dfxp') + }], + } if capfile else None, } for partnum, f in enumerate(files)] if len(entries) == 1: diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index b020e2621..65f6ca103 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -3,13 +3,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( - int_or_none, -) +from ..utils import int_or_none class InstagramIE(InfoExtractor): - _VALID_URL = r'http://instagram\.com/p/(?P<id>.*?)/' + _VALID_URL = r'https?://instagram\.com/p/(?P<id>[\da-zA-Z]+)' _TEST = { 'url': 'http://instagram.com/p/aye83DjauH/?foo=bar#abc', 'md5': '0d2da106a9d2631273e192b372806516', @@ -23,8 +21,8 @@ class InstagramIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"', webpage, 'uploader id', fatal=False) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 4430b3416..b48fac5e3 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -25,6 +25,7 @@ def _media_xml_tag(tag): class MTVServicesInfoExtractor(InfoExtractor): _MOBILE_TEMPLATE = None + _LANG = None @staticmethod def _id_from_uri(uri): @@ -169,8 +170,12 @@ class MTVServicesInfoExtractor(InfoExtractor): video_id = self._id_from_uri(uri) feed_url = self._get_feed_url(uri) data = compat_urllib_parse.urlencode({'uri': uri}) + info_url = feed_url + '?' + if self._LANG: + info_url += 'lang=%s&' % self._LANG + info_url += data idoc = self._download_xml( - feed_url + '?' + data, video_id, + info_url, video_id, 'Downloading info', transform_source=fix_xml_ampersands) return self.playlist_result( [self._get_video_info(item) for item in idoc.findall('.//item')]) diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index ca1a5bb3c..2e6c9872b 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -210,16 +210,16 @@ class ORFIPTVIE(InfoExtractor): _VALID_URL = r'http://iptv\.orf\.at/(?:#/)?stories/(?P<id>\d+)' _TEST = { - 'url': 'http://iptv.orf.at/stories/2267952', - 'md5': '26ffa4bab6dbce1eee78bbc7021016cd', + 'url': 'http://iptv.orf.at/stories/2275236/', + 'md5': 'c8b22af4718a4b4af58342529453e3e5', 'info_dict': { - 'id': '339775', + 'id': '350612', 'ext': 'flv', - 'title': 'Kreml-Kritiker Nawalny wieder frei', - 'description': 'md5:6f24e7f546d364dacd0e616a9e409236', - 'duration': 84.729, + 'title': 'Weitere Evakuierungen um Vulkan Calbuco', + 'description': 'md5:d689c959bdbcf04efeddedbf2299d633', + 'duration': 68.197, 'thumbnail': 're:^https?://.*\.jpg$', - 'upload_date': '20150306', + 'upload_date': '20150425', }, } diff --git a/youtube_dl/extractor/philharmoniedeparis.py b/youtube_dl/extractor/philharmoniedeparis.py new file mode 100644 index 000000000..6e60e5fe9 --- /dev/null +++ b/youtube_dl/extractor/philharmoniedeparis.py @@ -0,0 +1,78 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + float_or_none, + int_or_none, + parse_iso8601, + xpath_text, +) + + +class PhilharmonieDeParisIE(InfoExtractor): + IE_DESC = 'Philharmonie de Paris' + _VALID_URL = r'http://live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|misc/Playlist\.ashx\?id=)(?P<id>\d+)' + _TESTS = [{ + 'url': 'http://live.philharmoniedeparis.fr/concert/1032066.html', + 'info_dict': { + 'id': '1032066', + 'ext': 'flv', + 'title': 'md5:d1f5585d87d041d07ce9434804bc8425', + 'timestamp': 1428179400, + 'upload_date': '20150404', + 'duration': 6592.278, + }, + 'params': { + # rtmp download + 'skip_download': True, + } + }, { + 'url': 'http://live.philharmoniedeparis.fr/Concert/1030324.html', + 'only_matching': True, + }, { + 'url': 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=1030324&track=&lang=fr', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + concert = self._download_xml( + 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=%s' % video_id, + video_id).find('./concert') + + formats = [] + info_dict = { + 'id': video_id, + 'title': xpath_text(concert, './titre', 'title', fatal=True), + 'formats': formats, + } + + fichiers = concert.find('./fichiers') + stream = fichiers.attrib['serveurstream'] + for fichier in fichiers.findall('./fichier'): + info_dict['duration'] = float_or_none(fichier.get('timecodefin')) + for quality, (format_id, suffix) in enumerate([('lq', ''), ('hq', '_hd')]): + format_url = fichier.get('url%s' % suffix) + if not format_url: + continue + formats.append({ + 'url': stream, + 'play_path': format_url, + 'ext': 'flv', + 'format_id': format_id, + 'width': int_or_none(concert.get('largeur%s' % suffix)), + 'height': int_or_none(concert.get('hauteur%s' % suffix)), + 'quality': quality, + }) + self._sort_formats(formats) + + date, hour = concert.get('date'), concert.get('heure') + if date and hour: + info_dict['timestamp'] = parse_iso8601( + '%s-%s-%sT%s:00' % (date[0:4], date[4:6], date[6:8], hour)) + elif date: + info_dict['upload_date'] = date + + return info_dict diff --git a/youtube_dl/extractor/southpark.py b/youtube_dl/extractor/southpark.py index c20397b3d..e3b73295c 100644 --- a/youtube_dl/extractor/southpark.py +++ b/youtube_dl/extractor/southpark.py @@ -1,3 +1,4 @@ +# encoding: utf-8 from __future__ import unicode_literals from .mtv import MTVServicesInfoExtractor @@ -5,7 +6,7 @@ from .mtv import MTVServicesInfoExtractor class SouthParkIE(MTVServicesInfoExtractor): IE_NAME = 'southpark.cc.com' - _VALID_URL = r'https?://(www\.)?(?P<url>southpark\.cc\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))' + _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/(?:clips|full-episodes)/(?P<id>.+?)(\?|#|$))' _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss' @@ -20,9 +21,20 @@ class SouthParkIE(MTVServicesInfoExtractor): }] +class SouthParkEsIE(SouthParkIE): + IE_NAME = 'southpark.cc.com:español' + _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/episodios-en-espanol/(?P<id>.+?)(\?|#|$))' + _LANG = 'es' + + _TESTS = [{ + 'url': 'http://southpark.cc.com/episodios-en-espanol/s01e01-cartman-consigue-una-sonda-anal#source=351c1323-0b96-402d-a8b9-40d01b2e9bde&position=1&sort=!airdate', + 'playlist_count': 4, + }] + + class SouthparkDeIE(SouthParkIE): IE_NAME = 'southpark.de' - _VALID_URL = r'https?://(www\.)?(?P<url>southpark\.de/(clips|alle-episoden)/(?P<id>.+?)(\?|#|$))' + _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.de/(?:clips|alle-episoden)/(?P<id>.+?)(\?|#|$))' _FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/' _TESTS = [{ diff --git a/youtube_dl/postprocessor/embedthumbnail.py b/youtube_dl/postprocessor/embedthumbnail.py index 7ba98a0ea..4868a42fd 100644 --- a/youtube_dl/postprocessor/embedthumbnail.py +++ b/youtube_dl/postprocessor/embedthumbnail.py @@ -11,6 +11,7 @@ from ..compat import ( compat_urlretrieve, ) from ..utils import ( + determine_ext, check_executable, encodeFilename, PostProcessingError, @@ -27,7 +28,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor): def run(self, info): filename = info['filepath'] temp_filename = prepend_extension(filename, 'temp') - temp_thumbnail = prepend_extension(filename, 'thumb') + temp_thumbnail = filename + '.' + determine_ext(info['thumbnail']) if not info.get('thumbnail'): raise EmbedThumbnailPPError('Thumbnail was not found. Nothing to do.') diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 7a952963e..1765f4969 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -20,6 +20,7 @@ from ..utils import ( prepend_extension, shell_quote, subtitles_filename, + dfxp2srt, ) @@ -651,6 +652,30 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): 'format' % new_ext) continue new_file = subtitles_filename(filename, lang, new_ext) + + if ext == 'dfxp' or ext == 'ttml': + self._downloader.report_warning( + 'You have requested to convert dfxp (TTML) subtitles into another format, ' + 'which results in style information loss') + + dfxp_file = subtitles_filename(filename, lang, ext) + srt_file = subtitles_filename(filename, lang, 'srt') + + with io.open(dfxp_file, 'rt', encoding='utf-8') as f: + srt_data = dfxp2srt(f.read()) + + with io.open(srt_file, 'wt', encoding='utf-8') as f: + f.write(srt_data) + + ext = 'srt' + subs[lang] = { + 'ext': 'srt', + 'data': srt_data + } + + if new_ext == 'srt': + continue + self.run_ffmpeg( subtitles_filename(filename, lang, ext), new_file, ['-f', new_format]) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index c69d3e165..7d15eab64 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -371,6 +371,18 @@ def unescapeHTML(s): r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s) +def get_subprocess_encoding(): + if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5: + # For subprocess calls, encode with locale encoding + # Refer to http://stackoverflow.com/a/9951851/35070 + encoding = preferredencoding() + else: + encoding = sys.getfilesystemencoding() + if encoding is None: + encoding = 'utf-8' + return encoding + + def encodeFilename(s, for_subprocess=False): """ @param s The name of the file @@ -382,21 +394,24 @@ def encodeFilename(s, for_subprocess=False): if sys.version_info >= (3, 0): return s - if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5: - # Pass '' directly to use Unicode APIs on Windows 2000 and up - # (Detecting Windows NT 4 is tricky because 'major >= 4' would - # match Windows 9x series as well. Besides, NT 4 is obsolete.) - if not for_subprocess: - return s - else: - # For subprocess calls, encode with locale encoding - # Refer to http://stackoverflow.com/a/9951851/35070 - encoding = preferredencoding() - else: - encoding = sys.getfilesystemencoding() - if encoding is None: - encoding = 'utf-8' - return s.encode(encoding, 'ignore') + # Pass '' directly to use Unicode APIs on Windows 2000 and up + # (Detecting Windows NT 4 is tricky because 'major >= 4' would + # match Windows 9x series as well. Besides, NT 4 is obsolete.) + if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5: + return s + + return s.encode(get_subprocess_encoding(), 'ignore') + + +def decodeFilename(b, for_subprocess=False): + + if sys.version_info >= (3, 0): + return b + + if not isinstance(b, bytes): + return b + + return b.decode(get_subprocess_encoding(), 'ignore') def encodeArgument(s): @@ -408,6 +423,10 @@ def encodeArgument(s): return encodeFilename(s, True) +def decodeArgument(b): + return decodeFilename(b, True) + + def decodeOption(optval): if optval is None: return optval @@ -1791,6 +1810,59 @@ def match_filter_func(filter_str): return _match_func +def parse_dfxp_time_expr(time_expr): + if not time_expr: + return 0.0 + + mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr) + if mobj: + return float(mobj.group('time_offset')) + + mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:\.\d+)?)$', time_expr) + if mobj: + return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3)) + + +def format_srt_time(seconds): + (mins, secs) = divmod(seconds, 60) + (hours, mins) = divmod(mins, 60) + millisecs = (secs - int(secs)) * 1000 + secs = int(secs) + return '%02d:%02d:%02d,%03d' % (hours, mins, secs, millisecs) + + +def dfxp2srt(dfxp_data): + _x = functools.partial(xpath_with_ns, ns_map={'ttml': 'http://www.w3.org/ns/ttml'}) + + def parse_node(node): + str_or_empty = functools.partial(str_or_none, default='') + + out = str_or_empty(node.text) + + for child in node: + if child.tag == _x('ttml:br'): + out += '\n' + str_or_empty(child.tail) + elif child.tag == _x('ttml:span'): + out += str_or_empty(parse_node(child)) + else: + out += str_or_empty(xml.etree.ElementTree.tostring(child)) + + return out + + dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8')) + out = [] + paras = dfxp.findall(_x('.//ttml:p')) + + for para, index in zip(paras, itertools.count(1)): + out.append('%d\n%s --> %s\n%s\n\n' % ( + index, + format_srt_time(parse_dfxp_time_expr(para.attrib.get('begin'))), + format_srt_time(parse_dfxp_time_expr(para.attrib.get('end'))), + parse_node(para))) + + return ''.join(out) + + class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): def __init__(self, proxies=None): # Set default handlers diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 3fd0e7e56..dc7d666dd 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.04.17' +__version__ = '2015.04.26' |