diff options
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/npo.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/ssa.py | 58 | 
3 files changed, 62 insertions, 1 deletions
| diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 20bc73dce..7adcc4dbf 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -460,6 +460,7 @@ from .sport5 import Sport5IE  from .sportbox import SportBoxIE  from .sportdeutschland import SportDeutschlandIE  from .srmediathek import SRMediathekIE +from .ssa import SSAIE  from .stanfordoc import StanfordOpenClassroomIE  from .steam import SteamIE  from .streamcloud import StreamcloudIE diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 9c01eb0af..557dffa46 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -219,7 +219,8 @@ class NPOLiveIE(NPOBaseIE):          if streams:              for stream in streams:                  stream_type = stream.get('type').lower() -                if stream_type == 'ss': +                # smooth streaming is not supported +                if stream_type in ['ss', 'ms']:                      continue                  stream_info = self._download_json(                      'http://ida.omroep.nl/aapi/?stream=%s&token=%s&type=jsonp' @@ -242,6 +243,7 @@ class NPOLiveIE(NPOBaseIE):                  else:                      formats.append({                          'url': stream_url, +                        'preference': -10,                      })          self._sort_formats(formats) diff --git a/youtube_dl/extractor/ssa.py b/youtube_dl/extractor/ssa.py new file mode 100644 index 000000000..13101c714 --- /dev/null +++ b/youtube_dl/extractor/ssa.py @@ -0,0 +1,58 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( +    unescapeHTML, +    parse_duration, +) + + +class SSAIE(InfoExtractor): +    _VALID_URL = r'http://ssa\.nls\.uk/film/(?P<id>\d+)' +    _TEST = { +        'url': 'http://ssa.nls.uk/film/3561', +        'info_dict': { +            'id': '3561', +            'ext': 'flv', +            'title': 'SHETLAND WOOL', +            'description': 'md5:c5afca6871ad59b4271e7704fe50ab04', +            'duration': 900, +            'thumbnail': 're:^https?://.*\.jpg$', +        }, +        'params': { +            # rtmp download +            'skip_download': True, +        }, +    } + +    def _real_extract(self, url): +        video_id = self._match_id(url) + +        webpage = self._download_webpage(url, video_id) + +        streamer = self._search_regex( +            r"'streamer'\s*,\S*'(rtmp[^']+)'", webpage, 'streamer') +        play_path = self._search_regex( +            r"'file'\s*,\s*'([^']+)'", webpage, 'file').rpartition('.')[0] + +        def search_field(field_name, fatal=False): +            return self._search_regex( +                r'<span\s+class="field_title">%s:</span>\s*<span\s+class="field_content">([^<]+)</span>' % field_name, +                webpage, 'title', fatal=fatal) + +        title = unescapeHTML(search_field('Title', fatal=True)).strip('()[]') +        description = unescapeHTML(search_field('Description')) +        duration = parse_duration(search_field('Running time')) +        thumbnail = self._search_regex( +            r"'image'\s*,\s*'([^']+)'", webpage, 'thumbnails', fatal=False) + +        return { +            'id': video_id, +            'url': streamer, +            'play_path': play_path, +            'ext': 'flv', +            'title': title, +            'description': description, +            'duration': duration, +            'thumbnail': thumbnail, +        } | 
