diff options
| -rwxr-xr-x | youtube_dl/YoutubeDL.py | 9 | ||||
| -rw-r--r-- | youtube_dl/__init__.py | 8 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 3 | ||||
| -rw-r--r-- | youtube_dl/extractor/generic.py | 18 | ||||
| -rw-r--r-- | youtube_dl/extractor/kaltura.py | 138 | ||||
| -rw-r--r-- | youtube_dl/extractor/lynda.py | 11 | ||||
| -rw-r--r-- | youtube_dl/extractor/musicvault.py | 27 | ||||
| -rw-r--r-- | youtube_dl/extractor/odnoklassniki.py | 85 | ||||
| -rw-r--r-- | youtube_dl/extractor/puls4.py | 88 | ||||
| -rw-r--r-- | youtube_dl/extractor/rtve.py | 5 | ||||
| -rw-r--r-- | youtube_dl/extractor/svtplay.py | 42 | ||||
| -rw-r--r-- | youtube_dl/extractor/twitch.py | 10 | ||||
| -rw-r--r-- | youtube_dl/options.py | 4 | ||||
| -rw-r--r-- | youtube_dl/postprocessor/__init__.py | 2 | ||||
| -rw-r--r-- | youtube_dl/postprocessor/ffmpeg.py | 38 | 
15 files changed, 450 insertions, 38 deletions
| diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 76fc394bc..74e426168 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1300,17 +1300,18 @@ class YoutubeDL(object):              # subtitles download errors are already managed as troubles in relevant IE              # that way it will silently go on when used with unsupporting IE              subtitles = info_dict['requested_subtitles'] +            ie = self.get_info_extractor(info_dict['extractor_key'])              for sub_lang, sub_info in subtitles.items():                  sub_format = sub_info['ext']                  if sub_info.get('data') is not None:                      sub_data = sub_info['data']                  else:                      try: -                        uf = self.urlopen(sub_info['url']) -                        sub_data = uf.read().decode('utf-8') -                    except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: +                        sub_data = ie._download_webpage( +                            sub_info['url'], info_dict['id'], note=False) +                    except ExtractorError as err:                          self.report_warning('Unable to download subtitle for "%s": %s' % -                                            (sub_lang, compat_str(err))) +                                            (sub_lang, compat_str(err.cause)))                          continue                  try:                      sub_filename = subtitles_filename(filename, sub_lang, sub_format) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 5ce201800..49f382695 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -170,6 +170,9 @@ def _real_main(argv=None):      if opts.recodevideo is not None:          if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv']:              parser.error('invalid video recode format specified') +    if opts.convertsubtitles is not None: +        if opts.convertsubtitles not in ['srt', 'vtt', 'ass']: +            parser.error('invalid subtitle format specified')      if opts.date is not None:          date = DateRange.day(opts.date) @@ -223,6 +226,11 @@ def _real_main(argv=None):              'key': 'FFmpegVideoConvertor',              'preferedformat': opts.recodevideo,          }) +    if opts.convertsubtitles: +        postprocessors.append({ +            'key': 'FFmpegSubtitlesConvertor', +            'format': opts.convertsubtitles, +        })      if opts.embedsubtitles:          postprocessors.append({              'key': 'FFmpegEmbedSubtitle', diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index b064a3a7c..f3967ff7b 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -227,6 +227,7 @@ from .jeuxvideo import JeuxVideoIE  from .jove import JoveIE  from .jukebox import JukeboxIE  from .jpopsukitv import JpopsukiIE +from .kaltura import KalturaIE  from .kankan import KankanIE  from .karaoketv import KaraoketvIE  from .keezmovies import KeezMoviesIE @@ -345,6 +346,7 @@ from .ntvde import NTVDeIE  from .ntvru import NTVRuIE  from .nytimes import NYTimesIE  from .nuvid import NuvidIE +from .odnoklassniki import OdnoklassnikiIE  from .oktoberfesttv import OktoberfestTVIE  from .ooyala import OoyalaIE  from .openfilm import OpenFilmIE @@ -372,6 +374,7 @@ from .pornotube import PornotubeIE  from .pornoxo import PornoXOIE  from .promptfile import PromptFileIE  from .prosiebensat1 import ProSiebenSat1IE +from .puls4 import Puls4IE  from .pyvideo import PyvideoIE  from .quickvid import QuickVidIE  from .r7 import R7IE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 3aff57e30..27e2bc300 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -557,6 +557,18 @@ class GenericIE(InfoExtractor):                  'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',              }          }, +        # Kaltura embed +        { +            'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15', +            'info_dict': { +                'id': '1_eergr3h1', +                'ext': 'mp4', +                'upload_date': '20150226', +                'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com', +                'timestamp': int, +                'title': 'John Carlson Postgame 2/25/15', +            }, +        },      ]      def report_following_redirect(self, new_url): @@ -1113,6 +1125,12 @@ class GenericIE(InfoExtractor):          if mobj is not None:              return self.url_result(mobj.group('url'), 'Zapiks') +        # Look for Kaltura embeds +        mobj = re.search( +            r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage) +        if mobj is not None: +            return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura') +          def check_video(vurl):              if YoutubeIE.suitable(vurl):                  return True diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py new file mode 100644 index 000000000..d28730492 --- /dev/null +++ b/youtube_dl/extractor/kaltura.py @@ -0,0 +1,138 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_urllib_parse +from ..utils import ( +    ExtractorError, +    int_or_none, +) + + +class KalturaIE(InfoExtractor): +    _VALID_URL = r'''(?x) +    (?:kaltura:| +       https?://(:?(?:www|cdnapisec)\.)?kaltura\.com/index\.php/kwidget/(?:[^/]+/)*?wid/_ +    )(?P<partner_id>\d+) +    (?::| +       /(?:[^/]+/)*?entry_id/ +    )(?P<id>[0-9a-z_]+)''' +    _API_BASE = 'http://cdnapi.kaltura.com/api_v3/index.php?' +    _TESTS = [ +        { +            'url': 'kaltura:269692:1_1jc2y3e4', +            'md5': '3adcbdb3dcc02d647539e53f284ba171', +            'info_dict': { +                'id': '1_1jc2y3e4', +                'ext': 'mp4', +                'title': 'Track 4', +                'upload_date': '20131219', +                'uploader_id': 'mlundberg@wolfgangsvault.com', +                'description': 'The Allman Brothers Band, 12/16/1981', +                'thumbnail': 're:^https?://.*/thumbnail/.*', +                'timestamp': int, +            }, +        }, +        { +            'url': 'http://www.kaltura.com/index.php/kwidget/cache_st/1300318621/wid/_269692/uiconf_id/3873291/entry_id/1_1jc2y3e4', +            'only_matching': True, +        }, +        { +            'url': 'https://cdnapisec.kaltura.com/index.php/kwidget/wid/_557781/uiconf_id/22845202/entry_id/1_plr1syf3', +            'only_matching': True, +        }, +    ] + +    def _kaltura_api_call(self, video_id, actions, *args, **kwargs): +        params = actions[0] +        if len(actions) > 1: +            for i, a in enumerate(actions[1:], start=1): +                for k, v in a.items(): +                    params['%d:%s' % (i, k)] = v + +        query = compat_urllib_parse.urlencode(params) +        url = self._API_BASE + query +        data = self._download_json(url, video_id, *args, **kwargs) + +        status = data if len(actions) == 1 else data[0] +        if status.get('objectType') == 'KalturaAPIException': +            raise ExtractorError( +                '%s said: %s' % (self.IE_NAME, status['message'])) + +        return data + +    def _get_kaltura_signature(self, video_id, partner_id): +        actions = [{ +            'apiVersion': '3.1', +            'expiry': 86400, +            'format': 1, +            'service': 'session', +            'action': 'startWidgetSession', +            'widgetId': '_%s' % partner_id, +        }] +        return self._kaltura_api_call( +            video_id, actions, note='Downloading Kaltura signature')['ks'] + +    def _get_video_info(self, video_id, partner_id): +        signature = self._get_kaltura_signature(video_id, partner_id) +        actions = [ +            { +                'action': 'null', +                'apiVersion': '3.1.5', +                'clientTag': 'kdp:v3.8.5', +                'format': 1,  # JSON, 2 = XML, 3 = PHP +                'service': 'multirequest', +                'ks': signature, +            }, +            { +                'action': 'get', +                'entryId': video_id, +                'service': 'baseentry', +                'version': '-1', +            }, +            { +                'action': 'getContextData', +                'contextDataParams:objectType': 'KalturaEntryContextDataParams', +                'contextDataParams:referrer': 'http://www.kaltura.com/', +                'contextDataParams:streamerType': 'http', +                'entryId': video_id, +                'service': 'baseentry', +            }, +        ] +        return self._kaltura_api_call( +            video_id, actions, note='Downloading video info JSON') + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        mobj = re.match(self._VALID_URL, url) +        partner_id, entry_id = mobj.group('partner_id'), mobj.group('id') + +        info, source_data = self._get_video_info(entry_id, partner_id) + +        formats = [{ +            'format_id': '%(fileExt)s-%(bitrate)s' % f, +            'ext': f['fileExt'], +            'tbr': f['bitrate'], +            'fps': f.get('frameRate'), +            'filesize_approx': int_or_none(f.get('size'), invscale=1024), +            'container': f.get('containerFormat'), +            'vcodec': f.get('videoCodecId'), +            'height': f.get('height'), +            'width': f.get('width'), +            'url': '%s/flavorId/%s' % (info['dataUrl'], f['id']), +        } for f in source_data['flavorAssets']] +        self._sort_formats(formats) + +        return { +            'id': video_id, +            'title': info['name'], +            'formats': formats, +            'description': info.get('description'), +            'thumbnail': info.get('thumbnailUrl'), +            'duration': info.get('duration'), +            'timestamp': info.get('createdAt'), +            'uploader_id': info.get('userId'), +            'view_count': info.get('plays'), +        } diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index 109055e72..5dc22da22 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -18,7 +18,7 @@ from ..utils import (  class LyndaIE(InfoExtractor):      IE_NAME = 'lynda'      IE_DESC = 'lynda.com videos' -    _VALID_URL = r'https?://www\.lynda\.com/[^/]+/[^/]+/\d+/(\d+)-\d\.html' +    _VALID_URL = r'https?://www\.lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(\d+)'      _LOGIN_URL = 'https://www.lynda.com/login/login.aspx'      _NETRC_MACHINE = 'lynda' @@ -27,7 +27,7 @@ class LyndaIE(InfoExtractor):      ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.' -    _TEST = { +    _TESTS = [{          'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',          'md5': 'ecfc6862da89489161fb9cd5f5a6fac1',          'info_dict': { @@ -36,7 +36,10 @@ class LyndaIE(InfoExtractor):              'title': 'Using the exercise files',              'duration': 68          } -    } +    }, { +        'url': 'https://www.lynda.com/player/embed/133770?tr=foo=1;bar=g;fizz=rt&fs=0', +        'only_matching': True, +    }]      def _real_initialize(self):          self._login() @@ -152,7 +155,7 @@ class LyndaIE(InfoExtractor):                  continue              appear_time = m_current.group('timecode')              disappear_time = m_next.group('timecode') -            text = seq_current['Caption'] +            text = seq_current['Caption'].lstrip()              srt += '%s\r\n%s --> %s\r\n%s' % (str(pos), appear_time, disappear_time, text)          if srt:              return srt diff --git a/youtube_dl/extractor/musicvault.py b/youtube_dl/extractor/musicvault.py index ebb1eb8e9..0e46ac7c1 100644 --- a/youtube_dl/extractor/musicvault.py +++ b/youtube_dl/extractor/musicvault.py @@ -3,17 +3,13 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..utils import ( -    parse_duration, -    unified_strdate, -)  class MusicVaultIE(InfoExtractor):      _VALID_URL = r'https?://www\.musicvault\.com/(?P<uploader_id>[^/?#]*)/video/(?P<display_id>[^/?#]*)_(?P<id>[0-9]+)\.html'      _TEST = {          'url': 'http://www.musicvault.com/the-allman-brothers-band/video/straight-from-the-heart_1010863.html', -        'md5': '2cdbb3ae75f7fb3519821507d2fb3c15', +        'md5': '3adcbdb3dcc02d647539e53f284ba171',          'info_dict': {              'id': '1010863',              'ext': 'mp4', @@ -22,9 +18,10 @@ class MusicVaultIE(InfoExtractor):              'duration': 244,              'uploader': 'The Allman Brothers Band',              'thumbnail': 're:^https?://.*/thumbnail/.*', -            'upload_date': '19811216', +            'upload_date': '20131219',              'location': 'Capitol Theatre (Passaic, NJ)',              'description': 'Listen to The Allman Brothers Band perform Straight from the Heart at Capitol Theatre (Passaic, NJ) on Dec 16, 1981', +            'timestamp': int,          }      } @@ -43,34 +40,24 @@ class MusicVaultIE(InfoExtractor):              r'<h1.*?>(.*?)</h1>', data_div, 'uploader', fatal=False)          title = self._html_search_regex(              r'<h2.*?>(.*?)</h2>', data_div, 'title') -        upload_date = unified_strdate(self._html_search_regex( -            r'<h3.*?>(.*?)</h3>', data_div, 'uploader', fatal=False))          location = self._html_search_regex(              r'<h4.*?>(.*?)</h4>', data_div, 'location', fatal=False) -        duration = parse_duration(self._html_search_meta('duration', webpage)) - -        VIDEO_URL_TEMPLATE = 'http://cdnapi.kaltura.com/p/%(uid)s/sp/%(wid)s/playManifest/entryId/%(entry_id)s/format/url/protocol/http'          kaltura_id = self._search_regex(              r'<div id="video-detail-player" data-kaltura-id="([^"]+)"',              webpage, 'kaltura ID') -        video_url = VIDEO_URL_TEMPLATE % { -            'entry_id': kaltura_id, -            'wid': self._search_regex(r'/wid/_([0-9]+)/', webpage, 'wid'), -            'uid': self._search_regex(r'uiconf_id/([0-9]+)/', webpage, 'uid'), -        } +        wid = self._search_regex(r'/wid/_([0-9]+)/', webpage, 'wid')          return {              'id': mobj.group('id'), -            'url': video_url, -            'ext': 'mp4', +            '_type': 'url_transparent', +            'url': 'kaltura:%s:%s' % (wid, kaltura_id), +            'ie_key': 'Kaltura',              'display_id': display_id,              'uploader_id': mobj.group('uploader_id'),              'thumbnail': thumbnail,              'description': self._html_search_meta('description', webpage), -            'upload_date': upload_date,              'location': location,              'title': title,              'uploader': uploader, -            'duration': duration,          } diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py new file mode 100644 index 000000000..155d0ee6a --- /dev/null +++ b/youtube_dl/extractor/odnoklassniki.py @@ -0,0 +1,85 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( +    unified_strdate, +    int_or_none, +    qualities, +) + + +class OdnoklassnikiIE(InfoExtractor): +    _VALID_URL = r'https?://(?:odnoklassniki|ok)\.ru/(?:video|web-api/video/moviePlayer)/(?P<id>\d+)' +    _TESTS = [{ +        'url': 'http://ok.ru/video/20079905452', +        'md5': '8e24ad2da6f387948e7a7d44eb8668fe', +        'info_dict': { +            'id': '20079905452', +            'ext': 'mp4', +            'title': 'Культура меняет нас (прекрасный ролик!))', +            'duration': 100, +            'upload_date': '20141207', +            'uploader_id': '330537914540', +            'uploader': 'Виталий Добровольский', +            'like_count': int, +            'age_limit': 0, +        }, +    }, { +        'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452', +        'only_matching': True, +    }] + +    def _real_extract(self, url): +        video_id = self._match_id(url) + +        webpage = self._download_webpage(url, video_id) + +        player = self._parse_json( +            self._search_regex( +                r"OKVideo\.start\(({.+?})\s*,\s*'VideoAutoplay_player'", webpage, 'player'), +            video_id) + +        metadata = self._parse_json(player['flashvars']['metadata'], video_id) + +        movie = metadata['movie'] +        title = movie['title'] +        thumbnail = movie.get('poster') +        duration = int_or_none(movie.get('duration')) + +        author = metadata.get('author', {}) +        uploader_id = author.get('id') +        uploader = author.get('name') + +        upload_date = unified_strdate(self._html_search_meta( +            'ya:ovs:upload_date', webpage, 'upload date')) + +        age_limit = None +        adult = self._html_search_meta( +            'ya:ovs:adult', webpage, 'age limit') +        if adult: +            age_limit = 18 if adult == 'true' else 0 + +        like_count = int_or_none(metadata.get('likeCount')) + +        quality = qualities(('mobile', 'lowest', 'low', 'sd', 'hd')) + +        formats = [{ +            'url': f['url'], +            'ext': 'mp4', +            'format_id': f['name'], +            'quality': quality(f['name']), +        } for f in metadata['videos']] + +        return { +            'id': video_id, +            'title': title, +            'thumbnail': thumbnail, +            'duration': duration, +            'upload_date': upload_date, +            'uploader': uploader, +            'uploader_id': uploader_id, +            'like_count': like_count, +            'age_limit': age_limit, +            'formats': formats, +        } diff --git a/youtube_dl/extractor/puls4.py b/youtube_dl/extractor/puls4.py new file mode 100644 index 000000000..cce84b9e4 --- /dev/null +++ b/youtube_dl/extractor/puls4.py @@ -0,0 +1,88 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( +    ExtractorError, +    unified_strdate, +    int_or_none, +) + + +class Puls4IE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?puls4\.com/video/[^/]+/play/(?P<id>[0-9]+)' +    _TESTS = [{ +        'url': 'http://www.puls4.com/video/pro-und-contra/play/2716816', +        'md5': '49f6a6629747eeec43cef6a46b5df81d', +        'info_dict': { +            'id': '2716816', +            'ext': 'mp4', +            'title': 'Pro und Contra vom 23.02.2015', +            'description': 'md5:293e44634d9477a67122489994675db6', +            'duration': 2989, +            'upload_date': '20150224', +            'uploader': 'PULS_4', +        }, +        'skip': 'Only works from Germany', +    }, { +        'url': 'http://www.puls4.com/video/kult-spielfilme/play/1298106', +        'md5': '6a48316c8903ece8dab9b9a7bf7a59ec', +        'info_dict': { +            'id': '1298106', +            'ext': 'mp4', +            'title': 'Lucky Fritz', +        }, +        'skip': 'Only works from Germany', +    }] + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        webpage = self._download_webpage(url, video_id) + +        error_message = self._html_search_regex( +            r'<div class="message-error">(.+?)</div>', +            webpage, 'error message', default=None) +        if error_message: +            raise ExtractorError( +                '%s returned error: %s' % (self.IE_NAME, error_message), expected=True) + +        real_url = self._html_search_regex( +            r'\"fsk-button\".+?href=\"([^"]+)', +            webpage, 'fsk_button', default=None) +        if real_url: +            webpage = self._download_webpage(real_url, video_id) + +        player = self._search_regex( +            r'p4_video_player(?:_iframe)?\("video_\d+_container"\s*,(.+?)\);\s*\}', +            webpage, 'player') + +        player_json = self._parse_json( +            '[%s]' % player, video_id, +            transform_source=lambda s: s.replace('undefined,', '')) + +        formats = None +        result = None + +        for v in player_json: +            if isinstance(v, list) and not formats: +                formats = [{ +                    'url': f['url'], +                    'format': 'hd' if f.get('hd') else 'sd', +                    'width': int_or_none(f.get('size_x')), +                    'height': int_or_none(f.get('size_y')), +                    'tbr': int_or_none(f.get('bitrate')), +                } for f in v] +                self._sort_formats(formats) +            elif isinstance(v, dict) and not result: +                result = { +                    'id': video_id, +                    'title': v['videopartname'].strip(), +                    'description': v.get('videotitle'), +                    'duration': int_or_none(v.get('videoduration') or v.get('episodeduration')), +                    'upload_date': unified_strdate(v.get('clipreleasetime')), +                    'uploader': v.get('channel'), +                } + +        result['formats'] = formats + +        return result diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index c0fd23ff1..b42442d12 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -8,8 +8,9 @@ import time  from .common import InfoExtractor  from ..compat import compat_urlparse  from ..utils import ( -    struct_unpack, +    float_or_none,      remove_end, +    struct_unpack,  ) @@ -67,6 +68,7 @@ class RTVEALaCartaIE(InfoExtractor):              'id': '2491869',              'ext': 'mp4',              'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia', +            'duration': 5024.566,          },      }, {          'note': 'Live stream', @@ -113,6 +115,7 @@ class RTVEALaCartaIE(InfoExtractor):              'thumbnail': info.get('image'),              'page_url': url,              'subtitles': subtitles, +            'duration': float_or_none(info.get('duration'), scale=1000),          }      def _get_subtitles(self, video_id, sub_file): diff --git a/youtube_dl/extractor/svtplay.py b/youtube_dl/extractor/svtplay.py index eadb9ccb4..433dfd1cb 100644 --- a/youtube_dl/extractor/svtplay.py +++ b/youtube_dl/extractor/svtplay.py @@ -1,6 +1,8 @@  # coding: utf-8  from __future__ import unicode_literals +import re +  from .common import InfoExtractor  from ..utils import (      determine_ext, @@ -8,23 +10,40 @@ from ..utils import (  class SVTPlayIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?svtplay\.se/video/(?P<id>[0-9]+)' -    _TEST = { +    IE_DESC = 'SVT Play and Öppet arkiv' +    _VALID_URL = r'https?://(?:www\.)?(?P<host>svtplay|oppetarkiv)\.se/video/(?P<id>[0-9]+)' +    _TESTS = [{          'url': 'http://www.svtplay.se/video/2609989/sm-veckan/sm-veckan-rally-final-sasong-1-sm-veckan-rally-final', -        'md5': 'f4a184968bc9c802a9b41316657aaa80', +        'md5': 'ade3def0643fa1c40587a422f98edfd9',          'info_dict': {              'id': '2609989', -            'ext': 'mp4', +            'ext': 'flv',              'title': 'SM veckan vinter, Örebro - Rally, final',              'duration': 4500,              'thumbnail': 're:^https?://.*[\.-]jpg$', +            'age_limit': 0,          }, -    } +    }, { +        'url': 'http://www.oppetarkiv.se/video/1058509/rederiet-sasong-1-avsnitt-1-av-318', +        'md5': 'c3101a17ce9634f4c1f9800f0746c187', +        'info_dict': { +            'id': '1058509', +            'ext': 'flv', +            'title': 'Farlig kryssning', +            'duration': 2566, +            'thumbnail': 're:^https?://.*[\.-]jpg$', +            'age_limit': 0, +        }, +        'skip': 'Only works from Sweden', +    }]      def _real_extract(self, url): -        video_id = self._match_id(url) +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') +        host = mobj.group('host') +          info = self._download_json( -            'http://www.svtplay.se/video/%s?output=json' % video_id, video_id) +            'http://www.%s.se/video/%s?output=json' % (host, video_id), video_id)          title = info['context']['title']          thumbnail = info['context'].get('thumbnailImage') @@ -33,11 +52,16 @@ class SVTPlayIE(InfoExtractor):          formats = []          for vr in video_info['videoReferences']:              vurl = vr['url'] -            if determine_ext(vurl) == 'm3u8': +            ext = determine_ext(vurl) +            if ext == 'm3u8':                  formats.extend(self._extract_m3u8_formats(                      vurl, video_id,                      ext='mp4', entry_protocol='m3u8_native',                      m3u8_id=vr.get('playerType'))) +            elif ext == 'f4m': +                formats.extend(self._extract_f4m_formats( +                    vurl + '?hdcore=3.3.0', video_id, +                    f4m_id=vr.get('playerType')))              else:                  formats.append({                      'format_id': vr.get('playerType'), @@ -46,6 +70,7 @@ class SVTPlayIE(InfoExtractor):          self._sort_formats(formats)          duration = video_info.get('materialLength') +        age_limit = 18 if video_info.get('inappropriateForChildren') else 0          return {              'id': video_id, @@ -53,4 +78,5 @@ class SVTPlayIE(InfoExtractor):              'formats': formats,              'thumbnail': thumbnail,              'duration': duration, +            'age_limit': age_limit,          } diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 4b0d8988d..4b0ce54df 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -34,7 +34,15 @@ class TwitchBaseIE(InfoExtractor):                  expected=True)      def _download_json(self, url, video_id, note='Downloading JSON metadata'): -        response = super(TwitchBaseIE, self)._download_json(url, video_id, note) +        headers = { +            'Referer': 'http://api.twitch.tv/crossdomain/receiver.html?v=2', +            'X-Requested-With': 'XMLHttpRequest', +        } +        for cookie in self._downloader.cookiejar: +            if cookie.name == 'api_token': +                headers['Twitch-Api-Token'] = cookie.value +        request = compat_urllib_request.Request(url, headers=headers) +        response = super(TwitchBaseIE, self)._download_json(request, video_id, note)          self._handle_error(response)          return response diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 886ce9613..58f811162 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -751,6 +751,10 @@ def parseOpts(overrideArguments=None):          '--exec',          metavar='CMD', dest='exec_cmd',          help='Execute a command on the file after downloading, similar to find\'s -exec syntax. Example: --exec \'adb push {} /sdcard/Music/ && rm {}\'') +    postproc.add_option( +        '--convert-subtitles', '--convert-subs', +        metavar='FORMAT', dest='convertsubtitles', default=None, +        help='Convert the subtitles to other format (currently supported: srt|ass|vtt)')      parser.add_option_group(general)      parser.add_option_group(network) diff --git a/youtube_dl/postprocessor/__init__.py b/youtube_dl/postprocessor/__init__.py index 0ffbca258..708df3dd4 100644 --- a/youtube_dl/postprocessor/__init__.py +++ b/youtube_dl/postprocessor/__init__.py @@ -11,6 +11,7 @@ from .ffmpeg import (      FFmpegMergerPP,      FFmpegMetadataPP,      FFmpegVideoConvertorPP, +    FFmpegSubtitlesConvertorPP,  )  from .xattrpp import XAttrMetadataPP  from .execafterdownload import ExecAfterDownloadPP @@ -31,6 +32,7 @@ __all__ = [      'FFmpegMergerPP',      'FFmpegMetadataPP',      'FFmpegPostProcessor', +    'FFmpegSubtitlesConvertorPP',      'FFmpegVideoConvertorPP',      'XAttrMetadataPP',  ] diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 398fe050e..30094c2f3 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -1,5 +1,6 @@  from __future__ import unicode_literals +import io  import os  import subprocess  import sys @@ -635,3 +636,40 @@ class FFmpegFixupM4aPP(FFmpegPostProcessor):          os.rename(encodeFilename(temp_filename), encodeFilename(filename))          return True, info + + +class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): +    def __init__(self, downloader=None, format=None): +        super(FFmpegSubtitlesConvertorPP, self).__init__(downloader) +        self.format = format + +    def run(self, info): +        subs = info.get('requested_subtitles') +        filename = info['filepath'] +        new_ext = self.format +        new_format = new_ext +        if new_format == 'vtt': +            new_format = 'webvtt' +        if subs is None: +            self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to convert') +            return True, info +        self._downloader.to_screen('[ffmpeg] Converting subtitles') +        for lang, sub in subs.items(): +            ext = sub['ext'] +            if ext == new_ext: +                self._downloader.to_screen( +                    '[ffmpeg] Subtitle file for %s is already in the requested' +                    'format' % new_ext) +                continue +            new_file = subtitles_filename(filename, lang, new_ext) +            self.run_ffmpeg( +                subtitles_filename(filename, lang, ext), +                new_file, ['-f', new_format]) + +            with io.open(new_file, 'rt', encoding='utf-8') as f: +                subs[lang] = { +                    'ext': ext, +                    'data': f.read(), +                } + +        return True, info | 
