7 files changed, 153 insertions, 47 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 76fc394bc..74e426168 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -1300,17 +1300,18 @@ class YoutubeDL(object):
             # subtitles download errors are already managed as troubles in relevant IE
             # that way it will silently go on when used with unsupporting IE
             subtitles = info_dict['requested_subtitles']
+            ie = self.get_info_extractor(info_dict['extractor_key'])
             for sub_lang, sub_info in subtitles.items():
                 sub_format = sub_info['ext']
                 if sub_info.get('data') is not None:
                     sub_data = sub_info['data']
                 else:
                     try:
-                        uf = self.urlopen(sub_info['url'])
-                        sub_data = uf.read().decode('utf-8')
-                    except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+                        sub_data = ie._download_webpage(
+                            sub_info['url'], info_dict['id'], note=False)
+                    except ExtractorError as err:
                         self.report_warning('Unable to download subtitle for "%s": %s' %
-                                            (sub_lang, compat_str(err)))
+                                            (sub_lang, compat_str(err.cause)))
                         continue
                 try:
                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 5ce201800..49f382695 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -170,6 +170,9 @@ def _real_main(argv=None):
     if opts.recodevideo is not None:
         if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv']:
             parser.error('invalid video recode format specified')
+    if opts.convertsubtitles is not None:
+        if opts.convertsubtitles not in ['srt', 'vtt', 'ass']:
+            parser.error('invalid subtitle format specified')
 
     if opts.date is not None:
         date = DateRange.day(opts.date)
@@ -223,6 +226,11 @@ def _real_main(argv=None):
             'key': 'FFmpegVideoConvertor',
             'preferedformat': opts.recodevideo,
         })
+    if opts.convertsubtitles:
+        postprocessors.append({
+            'key': 'FFmpegSubtitlesConvertor',
+            'format': opts.convertsubtitles,
+        })
     if opts.embedsubtitles:
         postprocessors.append({
             'key': 'FFmpegEmbedSubtitle',
diff --git a/youtube_dl/extractor/puls4.py b/youtube_dl/extractor/puls4.py
index 70dedbff3..cce84b9e4 100644
--- a/youtube_dl/extractor/puls4.py
+++ b/youtube_dl/extractor/puls4.py
@@ -1,61 +1,88 @@
 # -*- coding: utf-8 -*-
-
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-
-import re
+from ..utils import (
+    ExtractorError,
+    unified_strdate,
+    int_or_none,
+)
 
 
 class Puls4IE(InfoExtractor):
-
-    _VALID_URL = r'https?://www.puls4.com/video/.+?/play/(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?puls4\.com/video/[^/]+/play/(?P<id>[0-9]+)'
     _TESTS = [{
         'url': 'http://www.puls4.com/video/pro-und-contra/play/2716816',
         'md5': '49f6a6629747eeec43cef6a46b5df81d',
         'info_dict': {
             'id': '2716816',
             'ext': 'mp4',
-            'title': 'Pro und Contra vom 23.02.2015'}},
-        {
+            'title': 'Pro und Contra vom 23.02.2015',
+            'description': 'md5:293e44634d9477a67122489994675db6',
+            'duration': 2989,
+            'upload_date': '20150224',
+            'uploader': 'PULS_4',
+        },
+        'skip': 'Only works from Germany',
+    }, {
         'url': 'http://www.puls4.com/video/kult-spielfilme/play/1298106',
         'md5': '6a48316c8903ece8dab9b9a7bf7a59ec',
         'info_dict': {
             'id': '1298106',
             'ext': 'mp4',
-            'title': 'Lucky Fritz'}}
-    ]
+            'title': 'Lucky Fritz',
+        },
+        'skip': 'Only works from Germany',
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
-        # if fsk-button
-        real_url = self._html_search_regex(r'\"fsk-button\".+?href=\"([^"]+)',
-                                           webpage, 'fsk_button', default=None)
+        error_message = self._html_search_regex(
+            r'<div class="message-error">(.+?)</div>',
+            webpage, 'error message', default=None)
+        if error_message:
+            raise ExtractorError(
+                '%s returned error: %s' % (self.IE_NAME, error_message), expected=True)
+
+        real_url = self._html_search_regex(
+            r'\"fsk-button\".+?href=\"([^"]+)',
+            webpage, 'fsk_button', default=None)
         if real_url:
             webpage = self._download_webpage(real_url, video_id)
 
-        title = self._html_search_regex(
-            r'<div id="bg_brandableContent">.+?<h1>(.+?)</h1>',
-            webpage, 'title', flags=re.DOTALL)
-
-        sd_url = self._html_search_regex(
-            r'{\"url\":\"([^"]+?)\",\"hd\":false',
-            webpage, 'sd_url').replace('\\', '')
-
-        formats = [{'format_id': 'sd', 'url': sd_url, 'quality': -2}]
-
-        hd_url = self._html_search_regex(
-            r'{\"url\":\"([^"]+?)\",\"hd\":true',
-            webpage, 'hd_url', default=None)
-        if hd_url:
-            hd_url = hd_url.replace('\\', '')
-            formats.append({'format_id': 'hd', 'url': hd_url, 'quality': -1})
-
-        return {
-            'id': video_id,
-            'title': title,
-            'formats': formats,
-            'ext': 'mp4'
-        }
+        player = self._search_regex(
+            r'p4_video_player(?:_iframe)?\("video_\d+_container"\s*,(.+?)\);\s*\}',
+            webpage, 'player')
+
+        player_json = self._parse_json(
+            '[%s]' % player, video_id,
+            transform_source=lambda s: s.replace('undefined,', ''))
+
+        formats = None
+        result = None
+
+        for v in player_json:
+            if isinstance(v, list) and not formats:
+                formats = [{
+                    'url': f['url'],
+                    'format': 'hd' if f.get('hd') else 'sd',
+                    'width': int_or_none(f.get('size_x')),
+                    'height': int_or_none(f.get('size_y')),
+                    'tbr': int_or_none(f.get('bitrate')),
+                } for f in v]
+                self._sort_formats(formats)
+            elif isinstance(v, dict) and not result:
+                result = {
+                    'id': video_id,
+                    'title': v['videopartname'].strip(),
+                    'description': v.get('videotitle'),
+                    'duration': int_or_none(v.get('videoduration') or v.get('episodeduration')),
+                    'upload_date': unified_strdate(v.get('clipreleasetime')),
+                    'uploader': v.get('channel'),
+                }
+
+        result['formats'] = formats
+
+        return result
diff --git a/youtube_dl/extractor/svtplay.py b/youtube_dl/extractor/svtplay.py
index eadb9ccb4..433dfd1cb 100644
--- a/youtube_dl/extractor/svtplay.py
+++ b/youtube_dl/extractor/svtplay.py
@@ -1,6 +1,8 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 from ..utils import (
     determine_ext,
@@ -8,23 +10,40 @@ from ..utils import (
 
 
 class SVTPlayIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?svtplay\.se/video/(?P<id>[0-9]+)'
-    _TEST = {
+    IE_DESC = 'SVT Play and Öppet arkiv'
+    _VALID_URL = r'https?://(?:www\.)?(?P<host>svtplay|oppetarkiv)\.se/video/(?P<id>[0-9]+)'
+    _TESTS = [{
         'url': 'http://www.svtplay.se/video/2609989/sm-veckan/sm-veckan-rally-final-sasong-1-sm-veckan-rally-final',
-        'md5': 'f4a184968bc9c802a9b41316657aaa80',
+        'md5': 'ade3def0643fa1c40587a422f98edfd9',
         'info_dict': {
             'id': '2609989',
-            'ext': 'mp4',
+            'ext': 'flv',
             'title': 'SM veckan vinter, Örebro - Rally, final',
             'duration': 4500,
             'thumbnail': 're:^https?://.*[\.-]jpg$',
+            'age_limit': 0,
         },
-    }
+    }, {
+        'url': 'http://www.oppetarkiv.se/video/1058509/rederiet-sasong-1-avsnitt-1-av-318',
+        'md5': 'c3101a17ce9634f4c1f9800f0746c187',
+        'info_dict': {
+            'id': '1058509',
+            'ext': 'flv',
+            'title': 'Farlig kryssning',
+            'duration': 2566,
+            'thumbnail': 're:^https?://.*[\.-]jpg$',
+            'age_limit': 0,
+        },
+        'skip': 'Only works from Sweden',
+    }]
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        host = mobj.group('host')
+
         info = self._download_json(
-            'http://www.svtplay.se/video/%s?output=json' % video_id, video_id)
+            'http://www.%s.se/video/%s?output=json' % (host, video_id), video_id)
 
         title = info['context']['title']
         thumbnail = info['context'].get('thumbnailImage')
@@ -33,11 +52,16 @@ class SVTPlayIE(InfoExtractor):
         formats = []
         for vr in video_info['videoReferences']:
             vurl = vr['url']
-            if determine_ext(vurl) == 'm3u8':
+            ext = determine_ext(vurl)
+            if ext == 'm3u8':
                 formats.extend(self._extract_m3u8_formats(
                     vurl, video_id,
                     ext='mp4', entry_protocol='m3u8_native',
                     m3u8_id=vr.get('playerType')))
+            elif ext == 'f4m':
+                formats.extend(self._extract_f4m_formats(
+                    vurl + '?hdcore=3.3.0', video_id,
+                    f4m_id=vr.get('playerType')))
             else:
                 formats.append({
                     'format_id': vr.get('playerType'),
@@ -46,6 +70,7 @@ class SVTPlayIE(InfoExtractor):
         self._sort_formats(formats)
 
         duration = video_info.get('materialLength')
+        age_limit = 18 if video_info.get('inappropriateForChildren') else 0
 
         return {
             'id': video_id,
@@ -53,4 +78,5 @@ class SVTPlayIE(InfoExtractor):
             'formats': formats,
             'thumbnail': thumbnail,
             'duration': duration,
+            'age_limit': age_limit,
         }
diff --git a/youtube_dl/options.py b/youtube_dl/options.py
index 886ce9613..58f811162 100644
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -751,6 +751,10 @@ def parseOpts(overrideArguments=None):
         '--exec',
         metavar='CMD', dest='exec_cmd',
         help='Execute a command on the file after downloading, similar to find\'s -exec syntax. Example: --exec \'adb push {} /sdcard/Music/ && rm {}\'')
+    postproc.add_option(
+        '--convert-subtitles', '--convert-subs',
+        metavar='FORMAT', dest='convertsubtitles', default=None,
+        help='Convert the subtitles to other format (currently supported: srt|ass|vtt)')
 
     parser.add_option_group(general)
     parser.add_option_group(network)
diff --git a/youtube_dl/postprocessor/__init__.py b/youtube_dl/postprocessor/__init__.py
index 0ffbca258..708df3dd4 100644
--- a/youtube_dl/postprocessor/__init__.py
+++ b/youtube_dl/postprocessor/__init__.py
@@ -11,6 +11,7 @@ from .ffmpeg import (
     FFmpegMergerPP,
     FFmpegMetadataPP,
     FFmpegVideoConvertorPP,
+    FFmpegSubtitlesConvertorPP,
 )
 from .xattrpp import XAttrMetadataPP
 from .execafterdownload import ExecAfterDownloadPP
@@ -31,6 +32,7 @@ __all__ = [
     'FFmpegMergerPP',
     'FFmpegMetadataPP',
     'FFmpegPostProcessor',
+    'FFmpegSubtitlesConvertorPP',
     'FFmpegVideoConvertorPP',
     'XAttrMetadataPP',
 ]
diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py
index 398fe050e..30094c2f3 100644
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -1,5 +1,6 @@
 from __future__ import unicode_literals
 
+import io
 import os
 import subprocess
 import sys
@@ -635,3 +636,40 @@ class FFmpegFixupM4aPP(FFmpegPostProcessor):
         os.rename(encodeFilename(temp_filename), encodeFilename(filename))
 
         return True, info
+
+
+class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
+    def __init__(self, downloader=None, format=None):
+        super(FFmpegSubtitlesConvertorPP, self).__init__(downloader)
+        self.format = format
+
+    def run(self, info):
+        subs = info.get('requested_subtitles')
+        filename = info['filepath']
+        new_ext = self.format
+        new_format = new_ext
+        if new_format == 'vtt':
+            new_format = 'webvtt'
+        if subs is None:
+            self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to convert')
+            return True, info
+        self._downloader.to_screen('[ffmpeg] Converting subtitles')
+        for lang, sub in subs.items():
+            ext = sub['ext']
+            if ext == new_ext:
+                self._downloader.to_screen(
+                    '[ffmpeg] Subtitle file for %s is already in the requested'
+                    'format' % new_ext)
+                continue
+            new_file = subtitles_filename(filename, lang, new_ext)
+            self.run_ffmpeg(
+                subtitles_filename(filename, lang, ext),
+                new_file, ['-f', new_format])
+
+            with io.open(new_file, 'rt', encoding='utf-8') as f:
+                subs[lang] = {
+                    'ext': ext,
+                    'data': f.read(),
+                }
+
+        return True, info