diff options
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/onionstudios.py | 67 | 
2 files changed, 68 insertions, 0 deletions
| diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index dc1a302e6..46cc4cd06 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -388,6 +388,7 @@ from .nytimes import (  from .nuvid import NuvidIE  from .odnoklassniki import OdnoklassnikiIE  from .oktoberfesttv import OktoberfestTVIE +from .onionstudios import OnionStudiosIE  from .ooyala import (      OoyalaIE,      OoyalaExternalIE, diff --git a/youtube_dl/extractor/onionstudios.py b/youtube_dl/extractor/onionstudios.py new file mode 100644 index 000000000..d5d03fd44 --- /dev/null +++ b/youtube_dl/extractor/onionstudios.py @@ -0,0 +1,67 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import determine_ext + + +class OnionStudiosIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?onionstudios\.com/(?:videos/[^/]+-|embed\?.*\bid=)(?P<id>\d+)(?!-)' + +    _TESTS = [{ +        'url': 'http://www.onionstudios.com/videos/hannibal-charges-forward-stops-for-a-cocktail-2937', +        'md5': 'd4851405d31adfadf71cd7a487b765bb', +        'info_dict': { +            'id': '2937', +            'ext': 'mp4', +            'title': 'Hannibal charges forward, stops for a cocktail', +            'description': 'md5:545299bda6abf87e5ec666548c6a9448', +            'thumbnail': 're:^https?://.*\.jpg$', +            'uploader': 'The A.V. Club', +            'uploader_id': 'TheAVClub', +        }, +    }, { +        'url': 'http://www.onionstudios.com/embed?id=2855&autoplay=true', +        'only_matching': True, +    }] + +    def _real_extract(self, url): +        video_id = self._match_id(url) + +        webpage = self._download_webpage( +            'http://www.onionstudios.com/embed?id=%s' % video_id, video_id) + +        formats = [] +        for src in re.findall(r'<source[^>]+src="([^"]+)"', webpage): +            if determine_ext(src) != 'm3u8':  # m3u8 always results in 403 +                formats.append({ +                    'url': src, +                }) +        self._sort_formats(formats) + +        title = self._search_regex( +            r'share_title\s*=\s*"([^"]+)"', webpage, 'title') +        description = self._search_regex( +            r'share_description\s*=\s*"([^"]+)"', webpage, +            'description', default=None) +        thumbnail = self._search_regex( +            r'poster="([^"]+)"', webpage, 'thumbnail', default=False) + +        uploader_id = self._search_regex( +            r'twitter_handle\s*=\s*"([^"]+)"', +            webpage, 'uploader id', fatal=False) +        uploader = self._search_regex( +            r'window\.channelName\s*=\s*"Embedded:([^"]+)"', +            webpage, 'uploader', default=False) + +        return { +            'id': video_id, +            'title': title, +            'description': description, +            'thumbnail': thumbnail, +            'uploader': uploader, +            'uploader_id': uploader_id, +            'formats': formats, +        } | 
