diff options
| -rw-r--r-- | README.md | 2 | ||||
| -rwxr-xr-x | youtube_dl/YoutubeDL.py | 5 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/common.py | 3 | ||||
| -rw-r--r-- | youtube_dl/extractor/jeuxvideo.py | 36 | ||||
| -rw-r--r-- | youtube_dl/extractor/kanalplay.py | 93 | ||||
| -rw-r--r-- | youtube_dl/extractor/nrk.py | 7 | 
7 files changed, 125 insertions, 22 deletions
@@ -515,6 +515,7 @@ youtube-dl makes the best effort to be a good command-line program, and thus sho  From a Python program, you can embed youtube-dl in a more powerful fashion, like this:  ```python +from __future__ import unicode_literals  import youtube_dl  ydl_opts = {} @@ -527,6 +528,7 @@ Most likely, you'll want to use various options. For a list of what can be done,  Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:  ```python +from __future__ import unicode_literals  import youtube_dl diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index be5b3c1ab..e779fc9a8 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -323,6 +323,11 @@ class YoutubeDL(object):                  'Set the LC_ALL environment variable to fix this.')              self.params['restrictfilenames'] = True +        if isinstance(params.get('outtmpl'), bytes): +            self.report_warning( +                'Parameter outtmpl is bytes, but should be a unicode string. ' +                'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.') +          if '%(stitle)s' in self.params.get('outtmpl', ''):              self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.') diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index ac765fdb8..4682996cd 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -232,6 +232,7 @@ from .jove import JoveIE  from .jukebox import JukeboxIE  from .jpopsukitv import JpopsukiIE  from .kaltura import KalturaIE +from .kanalplay import KanalPlayIE  from .kankan import KankanIE  from .karaoketv import KaraoketvIE  from .keezmovies import KeezMoviesIE diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index f9e8e2bad..e5245ec3f 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1062,6 +1062,9 @@ class InfoExtractor(object):      def _get_automatic_captions(self, *args, **kwargs):          raise NotImplementedError("This method must be implemented by subclasses") +    def _subtitles_timecode(self, seconds): +        return '%02d:%02d:%02d.%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000) +  class SearchInfoExtractor(InfoExtractor):      """ diff --git a/youtube_dl/extractor/jeuxvideo.py b/youtube_dl/extractor/jeuxvideo.py index 8094cc2e4..d0720ff56 100644 --- a/youtube_dl/extractor/jeuxvideo.py +++ b/youtube_dl/extractor/jeuxvideo.py @@ -2,7 +2,6 @@  from __future__ import unicode_literals -import json  import re  from .common import InfoExtractor @@ -15,10 +14,10 @@ class JeuxVideoIE(InfoExtractor):          'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',          'md5': '046e491afb32a8aaac1f44dd4ddd54ee',          'info_dict': { -            'id': '5182', +            'id': '114765',              'ext': 'mp4', -            'title': 'GC 2013 : Tearaway nous présente ses papiers d\'identité', -            'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n', +            'title': 'Tearaway : GC 2013 : Tearaway nous présente ses papiers d\'identité', +            'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.',          },      } @@ -26,26 +25,29 @@ class JeuxVideoIE(InfoExtractor):          mobj = re.match(self._VALID_URL, url)          title = mobj.group(1)          webpage = self._download_webpage(url, title) -        xml_link = self._html_search_regex( -            r'<param name="flashvars" value="config=(.*?)" />', +        title = self._html_search_meta('name', webpage) +        config_url = self._html_search_regex( +            r'data-src="(/contenu/medias/video.php.*?)"',              webpage, 'config URL') +        config_url = 'http://www.jeuxvideo.com' + config_url          video_id = self._search_regex( -            r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml', -            xml_link, 'video ID') +            r'id=(\d+)', +            config_url, 'video ID') -        config = self._download_xml( -            xml_link, title, 'Downloading XML config') -        info_json = config.find('format.json').text -        info = json.loads(info_json)['versions'][0] +        config = self._download_json( +            config_url, title, 'Downloading JSON config') -        video_url = 'http://video720.jeuxvideo.com/' + info['file'] +        formats = [{ +            'url': source['file'], +            'format_id': source['label'], +            'resolution': source['label'], +        } for source in reversed(config['sources'])]          return {              'id': video_id, -            'title': config.find('titre_video').text, -            'ext': 'mp4', -            'url': video_url, +            'title': title, +            'formats': formats,              'description': self._og_search_description(webpage), -            'thumbnail': config.find('image').text, +            'thumbnail': config.get('image'),          } diff --git a/youtube_dl/extractor/kanalplay.py b/youtube_dl/extractor/kanalplay.py new file mode 100644 index 000000000..772d5aca4 --- /dev/null +++ b/youtube_dl/extractor/kanalplay.py @@ -0,0 +1,93 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( +    ExtractorError, +    float_or_none, +) + + +class KanalPlayIE(InfoExtractor): +    IE_DESC = 'Kanal 5/9/11 Play' +    _VALID_URL = r'https?://(?:www\.)?kanal(?P<channel_id>5|9|11)play\.se/(?:#!/)?(?:play/)?program/\d+/video/(?P<id>\d+)' +    _TESTS = [{ +        'url': 'http://www.kanal5play.se/#!/play/program/3060212363/video/3270012277', +        'md5': '', +        'info_dict': { +            'id': '2609989', +            'ext': 'flv', +            'title': 'Saknar både dusch och avlopp', +            'description': 'md5:', +            'duration': 2636.36, +        }, +    }, { +        'url': 'http://www.kanal9play.se/#!/play/program/335032/video/246042', +        'only_matching': True, +    }, { +        'url': 'http://www.kanal11play.se/#!/play/program/232835958/video/367135199', +        'only_matching': True, +    }] + +    def _fix_subtitles(self, subs): +        return '\r\n\r\n'.join( +            '%s\r\n%s --> %s\r\n%s' +            % ( +                num, +                self._subtitles_timecode(item['startMillis'] / 1000.0), +                self._subtitles_timecode(item['endMillis'] / 1000.0), +                item['text'], +            ) for num, item in enumerate(subs, 1)) + +    def _get_subtitles(self, channel_id, video_id): +        subs = self._download_json( +            'http://www.kanal%splay.se/api/subtitles/%s' % (channel_id, video_id), +            video_id, 'Downloading subtitles JSON', fatal=False) +        return {'se': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]} if subs else {} + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') +        channel_id = mobj.group('channel_id') + +        video = self._download_json( +            'http://www.kanal%splay.se/api/getVideo?format=FLASH&videoId=%s' % (channel_id, video_id), +            video_id) + +        reasons_for_no_streams = video.get('reasonsForNoStreams') +        if reasons_for_no_streams: +            raise ExtractorError( +                '%s returned error: %s' % (self.IE_NAME, '\n'.join(reasons_for_no_streams)), +                expected=True) + +        title = video['title'] +        description = video.get('description') +        duration = float_or_none(video.get('length'), 1000) +        thumbnail = video.get('posterUrl') + +        stream_base_url = video['streamBaseUrl'] + +        formats = [{ +            'url': stream_base_url, +            'play_path': stream['source'], +            'ext': 'flv', +            'tbr': float_or_none(stream.get('bitrate'), 1000), +            'rtmp_real_time': True, +        } for stream in video['streams']] +        self._sort_formats(formats) + +        subtitles = {} +        if video.get('hasSubtitle'): +            subtitles = self.extract_subtitles(channel_id, video_id) + +        return { +            'id': video_id, +            'title': title, +            'description': description, +            'thumbnail': thumbnail, +            'duration': duration, +            'formats': formats, +            'subtitles': subtitles, +        } diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 1e4cfa2e7..bff36f9d3 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -149,9 +149,6 @@ class NRKTVIE(InfoExtractor):          }      ] -    def _seconds2str(self, s): -        return '%02d:%02d:%02d.%03d' % (s / 3600, (s % 3600) / 60, s % 60, (s % 1) * 1000) -      def _debug_print(self, txt):          if self._downloader.params.get('verbose', False):              self.to_screen('[debug] %s' % txt) @@ -168,8 +165,8 @@ class NRKTVIE(InfoExtractor):          for pos, p in enumerate(ps):              begin = parse_duration(p.get('begin'))              duration = parse_duration(p.get('dur')) -            starttime = self._seconds2str(begin) -            endtime = self._seconds2str(begin + duration) +            starttime = self._subtitles_timecode(begin) +            endtime = self._subtitles_timecode(begin + duration)              srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (compat_str(pos), starttime, endtime, p.text)          return {lang: [              {'ext': 'ttml', 'url': url},  | 
