diff options
Diffstat (limited to 'youtube_dl')
-rw-r--r-- | youtube_dl/downloader/__init__.py | 6 | ||||
-rw-r--r-- | youtube_dl/downloader/common.py | 13 | ||||
-rw-r--r-- | youtube_dl/downloader/external.py | 16 | ||||
-rw-r--r-- | youtube_dl/downloader/rtmp.py | 21 | ||||
-rw-r--r-- | youtube_dl/downloader/rtsp.py (renamed from youtube_dl/downloader/mplayer.py) | 20 | ||||
-rw-r--r-- | youtube_dl/extractor/__init__.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/cspan.py | 11 | ||||
-rw-r--r-- | youtube_dl/extractor/instagram.py | 10 | ||||
-rw-r--r-- | youtube_dl/extractor/mtv.py | 7 | ||||
-rw-r--r-- | youtube_dl/extractor/orf.py | 14 | ||||
-rw-r--r-- | youtube_dl/extractor/philharmoniedeparis.py | 78 | ||||
-rw-r--r-- | youtube_dl/extractor/southpark.py | 16 | ||||
-rw-r--r-- | youtube_dl/postprocessor/embedthumbnail.py | 3 | ||||
-rw-r--r-- | youtube_dl/postprocessor/ffmpeg.py | 25 | ||||
-rw-r--r-- | youtube_dl/utils.py | 102 | ||||
-rw-r--r-- | youtube_dl/version.py | 2 |
16 files changed, 269 insertions, 77 deletions
diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py index 9fb66e2f7..f110830c4 100644 --- a/youtube_dl/downloader/__init__.py +++ b/youtube_dl/downloader/__init__.py @@ -6,7 +6,7 @@ from .f4m import F4mFD from .hls import HlsFD from .hls import NativeHlsFD from .http import HttpFD -from .mplayer import MplayerFD +from .rtsp import RtspFD from .rtmp import RtmpFD from ..utils import ( @@ -17,8 +17,8 @@ PROTOCOL_MAP = { 'rtmp': RtmpFD, 'm3u8_native': NativeHlsFD, 'm3u8': HlsFD, - 'mms': MplayerFD, - 'rtsp': MplayerFD, + 'mms': RtspFD, + 'rtsp': RtspFD, 'f4m': F4mFD, } diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index a0fc5ead0..97e755d4b 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -8,6 +8,7 @@ import time from ..compat import compat_str from ..utils import ( encodeFilename, + decodeArgument, format_bytes, timeconvert, ) @@ -353,19 +354,15 @@ class FileDownloader(object): # this interface self._progress_hooks.append(ph) - def _debug_cmd(self, args, subprocess_encoding, exe=None): + def _debug_cmd(self, args, exe=None): if not self.params.get('verbose', False): return + str_args = [decodeArgument(a) for a in args] + if exe is None: - exe = os.path.basename(args[0]) + exe = os.path.basename(str_args[0]) - if subprocess_encoding: - str_args = [ - a.decode(subprocess_encoding) if isinstance(a, bytes) else a - for a in args] - else: - str_args = args try: import pipes shell_quote = lambda args: ' '.join(map(pipes.quote, str_args)) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 1673b2382..7ca2d3143 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -2,11 +2,11 @@ from __future__ import unicode_literals import os.path import subprocess -import sys from .common import FileDownloader from ..utils import ( encodeFilename, + encodeArgument, ) @@ -60,17 +60,9 @@ class ExternalFD(FileDownloader): def _call_downloader(self, tmpfilename, info_dict): """ Either overwrite this or implement _make_cmd """ - cmd = self._make_cmd(tmpfilename, info_dict) - - if sys.platform == 'win32' and sys.version_info < (3, 0): - # Windows subprocess module does not actually support Unicode - # on Python 2.x - # See http://stackoverflow.com/a/9951851/35070 - subprocess_encoding = sys.getfilesystemencoding() - cmd = [a.encode(subprocess_encoding, 'ignore') for a in cmd] - else: - subprocess_encoding = None - self._debug_cmd(cmd, subprocess_encoding) + cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)] + + self._debug_cmd(cmd) p = subprocess.Popen( cmd, stderr=subprocess.PIPE) diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py index ddf5724ae..6865b5e2f 100644 --- a/youtube_dl/downloader/rtmp.py +++ b/youtube_dl/downloader/rtmp.py @@ -3,7 +3,6 @@ from __future__ import unicode_literals import os import re import subprocess -import sys import time from .common import FileDownloader @@ -11,6 +10,7 @@ from ..compat import compat_str from ..utils import ( check_executable, encodeFilename, + encodeArgument, get_exe_version, ) @@ -121,7 +121,7 @@ class RtmpFD(FileDownloader): # possible. This is part of rtmpdump's normal usage, AFAIK. basic_args = [ 'rtmpdump', '--verbose', '-r', url, - '-o', encodeFilename(tmpfilename, True)] + '-o', tmpfilename] if player_url is not None: basic_args += ['--swfVfy', player_url] if page_url is not None: @@ -154,16 +154,9 @@ class RtmpFD(FileDownloader): if not live and continue_dl: args += ['--skip', '1'] - if sys.platform == 'win32' and sys.version_info < (3, 0): - # Windows subprocess module does not actually support Unicode - # on Python 2.x - # See http://stackoverflow.com/a/9951851/35070 - subprocess_encoding = sys.getfilesystemencoding() - args = [a.encode(subprocess_encoding, 'ignore') for a in args] - else: - subprocess_encoding = None + args = [encodeArgument(a) for a in args] - self._debug_cmd(args, subprocess_encoding, exe='rtmpdump') + self._debug_cmd(args, exe='rtmpdump') RD_SUCCESS = 0 RD_FAILED = 1 @@ -180,7 +173,11 @@ class RtmpFD(FileDownloader): prevsize = os.path.getsize(encodeFilename(tmpfilename)) self.to_screen('[rtmpdump] %s bytes' % prevsize) time.sleep(5.0) # This seems to be needed - retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == RD_FAILED]) + args = basic_args + ['--resume'] + if retval == RD_FAILED: + args += ['--skip', '1'] + args = [encodeArgument(a) for a in args] + retval = run_rtmpdump(args) cursize = os.path.getsize(encodeFilename(tmpfilename)) if prevsize == cursize and retval == RD_FAILED: break diff --git a/youtube_dl/downloader/mplayer.py b/youtube_dl/downloader/rtsp.py index 72cef30ea..3eb29526c 100644 --- a/youtube_dl/downloader/mplayer.py +++ b/youtube_dl/downloader/rtsp.py @@ -10,21 +10,23 @@ from ..utils import ( ) -class MplayerFD(FileDownloader): +class RtspFD(FileDownloader): def real_download(self, filename, info_dict): url = info_dict['url'] self.report_destination(filename) tmpfilename = self.temp_name(filename) - args = [ - 'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', - '-dumpstream', '-dumpfile', tmpfilename, url] - # Check for mplayer first - if not check_executable('mplayer', ['-h']): - self.report_error('MMS or RTSP download detected but "%s" could not be run' % args[0]) + if check_executable('mplayer', ['-h']): + args = [ + 'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', + '-dumpstream', '-dumpfile', tmpfilename, url] + elif check_executable('mpv', ['-h']): + args = [ + 'mpv', '-really-quiet', '--vo=null', '--stream-dump=' + tmpfilename, url] + else: + self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install any.') return False - # Download using mplayer. retval = subprocess.call(args) if retval == 0: fsize = os.path.getsize(encodeFilename(tmpfilename)) @@ -39,5 +41,5 @@ class MplayerFD(FileDownloader): return True else: self.to_stderr('\n') - self.report_error('mplayer exited with code %d' % retval) + self.report_error('%s exited with code %d' % (args[0], retval)) return False diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index a64afa1da..641c45f43 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -377,6 +377,7 @@ from .orf import ( from .parliamentliveuk import ParliamentLiveUKIE from .patreon import PatreonIE from .pbs import PBSIE +from .philharmoniedeparis import PhilharmonieDeParisIE from .phoenix import PhoenixIE from .photobucket import PhotobucketIE from .planetaplay import PlanetaPlayIE @@ -478,6 +479,7 @@ from .soundgasm import ( ) from .southpark import ( SouthParkIE, + SouthParkEsIE, SouthparkDeIE, ) from .space import SpaceIE diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index d516b1402..fbefd37d0 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -8,6 +8,7 @@ from ..utils import ( unescapeHTML, find_xpath_attr, smuggle_url, + determine_ext, ) from .senateisvp import SenateISVPIE @@ -87,6 +88,10 @@ class CSpanIE(InfoExtractor): return self.url_result(surl, 'SenateISVP', video_id, title) files = data['video']['files'] + try: + capfile = data['video']['capfile']['#text'] + except KeyError: + capfile = None entries = [{ 'id': '%s_%d' % (video_id, partnum + 1), @@ -97,6 +102,12 @@ class CSpanIE(InfoExtractor): 'description': description, 'thumbnail': thumbnail, 'duration': int_or_none(f.get('length', {}).get('#text')), + 'subtitles': { + 'en': [{ + 'url': capfile, + 'ext': determine_ext(capfile, 'dfxp') + }], + } if capfile else None, } for partnum, f in enumerate(files)] if len(entries) == 1: diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index b020e2621..65f6ca103 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -3,13 +3,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( - int_or_none, -) +from ..utils import int_or_none class InstagramIE(InfoExtractor): - _VALID_URL = r'http://instagram\.com/p/(?P<id>.*?)/' + _VALID_URL = r'https?://instagram\.com/p/(?P<id>[\da-zA-Z]+)' _TEST = { 'url': 'http://instagram.com/p/aye83DjauH/?foo=bar#abc', 'md5': '0d2da106a9d2631273e192b372806516', @@ -23,8 +21,8 @@ class InstagramIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"', webpage, 'uploader id', fatal=False) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 4430b3416..b48fac5e3 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -25,6 +25,7 @@ def _media_xml_tag(tag): class MTVServicesInfoExtractor(InfoExtractor): _MOBILE_TEMPLATE = None + _LANG = None @staticmethod def _id_from_uri(uri): @@ -169,8 +170,12 @@ class MTVServicesInfoExtractor(InfoExtractor): video_id = self._id_from_uri(uri) feed_url = self._get_feed_url(uri) data = compat_urllib_parse.urlencode({'uri': uri}) + info_url = feed_url + '?' + if self._LANG: + info_url += 'lang=%s&' % self._LANG + info_url += data idoc = self._download_xml( - feed_url + '?' + data, video_id, + info_url, video_id, 'Downloading info', transform_source=fix_xml_ampersands) return self.playlist_result( [self._get_video_info(item) for item in idoc.findall('.//item')]) diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index ca1a5bb3c..2e6c9872b 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -210,16 +210,16 @@ class ORFIPTVIE(InfoExtractor): _VALID_URL = r'http://iptv\.orf\.at/(?:#/)?stories/(?P<id>\d+)' _TEST = { - 'url': 'http://iptv.orf.at/stories/2267952', - 'md5': '26ffa4bab6dbce1eee78bbc7021016cd', + 'url': 'http://iptv.orf.at/stories/2275236/', + 'md5': 'c8b22af4718a4b4af58342529453e3e5', 'info_dict': { - 'id': '339775', + 'id': '350612', 'ext': 'flv', - 'title': 'Kreml-Kritiker Nawalny wieder frei', - 'description': 'md5:6f24e7f546d364dacd0e616a9e409236', - 'duration': 84.729, + 'title': 'Weitere Evakuierungen um Vulkan Calbuco', + 'description': 'md5:d689c959bdbcf04efeddedbf2299d633', + 'duration': 68.197, 'thumbnail': 're:^https?://.*\.jpg$', - 'upload_date': '20150306', + 'upload_date': '20150425', }, } diff --git a/youtube_dl/extractor/philharmoniedeparis.py b/youtube_dl/extractor/philharmoniedeparis.py new file mode 100644 index 000000000..6e60e5fe9 --- /dev/null +++ b/youtube_dl/extractor/philharmoniedeparis.py @@ -0,0 +1,78 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + float_or_none, + int_or_none, + parse_iso8601, + xpath_text, +) + + +class PhilharmonieDeParisIE(InfoExtractor): + IE_DESC = 'Philharmonie de Paris' + _VALID_URL = r'http://live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|misc/Playlist\.ashx\?id=)(?P<id>\d+)' + _TESTS = [{ + 'url': 'http://live.philharmoniedeparis.fr/concert/1032066.html', + 'info_dict': { + 'id': '1032066', + 'ext': 'flv', + 'title': 'md5:d1f5585d87d041d07ce9434804bc8425', + 'timestamp': 1428179400, + 'upload_date': '20150404', + 'duration': 6592.278, + }, + 'params': { + # rtmp download + 'skip_download': True, + } + }, { + 'url': 'http://live.philharmoniedeparis.fr/Concert/1030324.html', + 'only_matching': True, + }, { + 'url': 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=1030324&track=&lang=fr', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + concert = self._download_xml( + 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=%s' % video_id, + video_id).find('./concert') + + formats = [] + info_dict = { + 'id': video_id, + 'title': xpath_text(concert, './titre', 'title', fatal=True), + 'formats': formats, + } + + fichiers = concert.find('./fichiers') + stream = fichiers.attrib['serveurstream'] + for fichier in fichiers.findall('./fichier'): + info_dict['duration'] = float_or_none(fichier.get('timecodefin')) + for quality, (format_id, suffix) in enumerate([('lq', ''), ('hq', '_hd')]): + format_url = fichier.get('url%s' % suffix) + if not format_url: + continue + formats.append({ + 'url': stream, + 'play_path': format_url, + 'ext': 'flv', + 'format_id': format_id, + 'width': int_or_none(concert.get('largeur%s' % suffix)), + 'height': int_or_none(concert.get('hauteur%s' % suffix)), + 'quality': quality, + }) + self._sort_formats(formats) + + date, hour = concert.get('date'), concert.get('heure') + if date and hour: + info_dict['timestamp'] = parse_iso8601( + '%s-%s-%sT%s:00' % (date[0:4], date[4:6], date[6:8], hour)) + elif date: + info_dict['upload_date'] = date + + return info_dict diff --git a/youtube_dl/extractor/southpark.py b/youtube_dl/extractor/southpark.py index c20397b3d..e3b73295c 100644 --- a/youtube_dl/extractor/southpark.py +++ b/youtube_dl/extractor/southpark.py @@ -1,3 +1,4 @@ +# encoding: utf-8 from __future__ import unicode_literals from .mtv import MTVServicesInfoExtractor @@ -5,7 +6,7 @@ from .mtv import MTVServicesInfoExtractor class SouthParkIE(MTVServicesInfoExtractor): IE_NAME = 'southpark.cc.com' - _VALID_URL = r'https?://(www\.)?(?P<url>southpark\.cc\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))' + _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/(?:clips|full-episodes)/(?P<id>.+?)(\?|#|$))' _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss' @@ -20,9 +21,20 @@ class SouthParkIE(MTVServicesInfoExtractor): }] +class SouthParkEsIE(SouthParkIE): + IE_NAME = 'southpark.cc.com:espaƱol' + _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/episodios-en-espanol/(?P<id>.+?)(\?|#|$))' + _LANG = 'es' + + _TESTS = [{ + 'url': 'http://southpark.cc.com/episodios-en-espanol/s01e01-cartman-consigue-una-sonda-anal#source=351c1323-0b96-402d-a8b9-40d01b2e9bde&position=1&sort=!airdate', + 'playlist_count': 4, + }] + + class SouthparkDeIE(SouthParkIE): IE_NAME = 'southpark.de' - _VALID_URL = r'https?://(www\.)?(?P<url>southpark\.de/(clips|alle-episoden)/(?P<id>.+?)(\?|#|$))' + _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.de/(?:clips|alle-episoden)/(?P<id>.+?)(\?|#|$))' _FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/' _TESTS = [{ diff --git a/youtube_dl/postprocessor/embedthumbnail.py b/youtube_dl/postprocessor/embedthumbnail.py index 7ba98a0ea..4868a42fd 100644 --- a/youtube_dl/postprocessor/embedthumbnail.py +++ b/youtube_dl/postprocessor/embedthumbnail.py @@ -11,6 +11,7 @@ from ..compat import ( compat_urlretrieve, ) from ..utils import ( + determine_ext, check_executable, encodeFilename, PostProcessingError, @@ -27,7 +28,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor): def run(self, info): filename = info['filepath'] temp_filename = prepend_extension(filename, 'temp') - temp_thumbnail = prepend_extension(filename, 'thumb') + temp_thumbnail = filename + '.' + determine_ext(info['thumbnail']) if not info.get('thumbnail'): raise EmbedThumbnailPPError('Thumbnail was not found. Nothing to do.') diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 7a952963e..1765f4969 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -20,6 +20,7 @@ from ..utils import ( prepend_extension, shell_quote, subtitles_filename, + dfxp2srt, ) @@ -651,6 +652,30 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): 'format' % new_ext) continue new_file = subtitles_filename(filename, lang, new_ext) + + if ext == 'dfxp' or ext == 'ttml': + self._downloader.report_warning( + 'You have requested to convert dfxp (TTML) subtitles into another format, ' + 'which results in style information loss') + + dfxp_file = subtitles_filename(filename, lang, ext) + srt_file = subtitles_filename(filename, lang, 'srt') + + with io.open(dfxp_file, 'rt', encoding='utf-8') as f: + srt_data = dfxp2srt(f.read()) + + with io.open(srt_file, 'wt', encoding='utf-8') as f: + f.write(srt_data) + + ext = 'srt' + subs[lang] = { + 'ext': 'srt', + 'data': srt_data + } + + if new_ext == 'srt': + continue + self.run_ffmpeg( subtitles_filename(filename, lang, ext), new_file, ['-f', new_format]) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index c69d3e165..7d15eab64 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -371,6 +371,18 @@ def unescapeHTML(s): r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s) +def get_subprocess_encoding(): + if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5: + # For subprocess calls, encode with locale encoding + # Refer to http://stackoverflow.com/a/9951851/35070 + encoding = preferredencoding() + else: + encoding = sys.getfilesystemencoding() + if encoding is None: + encoding = 'utf-8' + return encoding + + def encodeFilename(s, for_subprocess=False): """ @param s The name of the file @@ -382,21 +394,24 @@ def encodeFilename(s, for_subprocess=False): if sys.version_info >= (3, 0): return s - if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5: - # Pass '' directly to use Unicode APIs on Windows 2000 and up - # (Detecting Windows NT 4 is tricky because 'major >= 4' would - # match Windows 9x series as well. Besides, NT 4 is obsolete.) - if not for_subprocess: - return s - else: - # For subprocess calls, encode with locale encoding - # Refer to http://stackoverflow.com/a/9951851/35070 - encoding = preferredencoding() - else: - encoding = sys.getfilesystemencoding() - if encoding is None: - encoding = 'utf-8' - return s.encode(encoding, 'ignore') + # Pass '' directly to use Unicode APIs on Windows 2000 and up + # (Detecting Windows NT 4 is tricky because 'major >= 4' would + # match Windows 9x series as well. Besides, NT 4 is obsolete.) + if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5: + return s + + return s.encode(get_subprocess_encoding(), 'ignore') + + +def decodeFilename(b, for_subprocess=False): + + if sys.version_info >= (3, 0): + return b + + if not isinstance(b, bytes): + return b + + return b.decode(get_subprocess_encoding(), 'ignore') def encodeArgument(s): @@ -408,6 +423,10 @@ def encodeArgument(s): return encodeFilename(s, True) +def decodeArgument(b): + return decodeFilename(b, True) + + def decodeOption(optval): if optval is None: return optval @@ -1791,6 +1810,59 @@ def match_filter_func(filter_str): return _match_func +def parse_dfxp_time_expr(time_expr): + if not time_expr: + return 0.0 + + mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr) + if mobj: + return float(mobj.group('time_offset')) + + mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:\.\d+)?)$', time_expr) + if mobj: + return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3)) + + +def format_srt_time(seconds): + (mins, secs) = divmod(seconds, 60) + (hours, mins) = divmod(mins, 60) + millisecs = (secs - int(secs)) * 1000 + secs = int(secs) + return '%02d:%02d:%02d,%03d' % (hours, mins, secs, millisecs) + + +def dfxp2srt(dfxp_data): + _x = functools.partial(xpath_with_ns, ns_map={'ttml': 'http://www.w3.org/ns/ttml'}) + + def parse_node(node): + str_or_empty = functools.partial(str_or_none, default='') + + out = str_or_empty(node.text) + + for child in node: + if child.tag == _x('ttml:br'): + out += '\n' + str_or_empty(child.tail) + elif child.tag == _x('ttml:span'): + out += str_or_empty(parse_node(child)) + else: + out += str_or_empty(xml.etree.ElementTree.tostring(child)) + + return out + + dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8')) + out = [] + paras = dfxp.findall(_x('.//ttml:p')) + + for para, index in zip(paras, itertools.count(1)): + out.append('%d\n%s --> %s\n%s\n\n' % ( + index, + format_srt_time(parse_dfxp_time_expr(para.attrib.get('begin'))), + format_srt_time(parse_dfxp_time_expr(para.attrib.get('end'))), + parse_node(para))) + + return ''.join(out) + + class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): def __init__(self, proxies=None): # Set default handlers diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 3fd0e7e56..dc7d666dd 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.04.17' +__version__ = '2015.04.26' |