diff options
| author | Sergey M․ <dstftw@gmail.com> | 2014-08-02 17:56:01 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2014-08-02 17:56:01 +0700 | 
| commit | 7a5e7b303c6186f01e475789de7ef8cc3f9eea94 (patch) | |
| tree | 80e74ce3f922dbee0c293f0682132be74a6640ea | |
| parent | 61aabb9d701a7d67ddec55e6c21810fd59db5e92 (diff) | |
[ubu] Add extractor (Close #3418)
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/ubu.py | 56 | 
2 files changed, 57 insertions, 0 deletions
| diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index f6c0ee795..e73bea881 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -327,6 +327,7 @@ from .tutv import TutvIE  from .tvigle import TvigleIE  from .tvp import TvpIE  from .tvplay import TVPlayIE +from.ubu import UbuIE  from .udemy import (      UdemyIE,      UdemyCourseIE diff --git a/youtube_dl/extractor/ubu.py b/youtube_dl/extractor/ubu.py new file mode 100644 index 000000000..0182d67ec --- /dev/null +++ b/youtube_dl/extractor/ubu.py @@ -0,0 +1,56 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import int_or_none + + +class UbuIE(InfoExtractor): +    _VALID_URL = r'http://(?:www\.)?ubu\.com/film/(?P<id>[\da-z_-]+)\.html' +    _TEST = { +        'url': 'http://ubu.com/film/her_noise.html', +        'md5': '8edd46ee8aa6b265fb5ed6cf05c36bc9', +        'info_dict': { +            'id': 'her_noise', +            'ext': 'mp4', +            'title': 'Her Noise - The Making Of (2007)', +            'duration': 3600, +        }, +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') + +        webpage = self._download_webpage(url, video_id) + +        title = self._html_search_regex( +            r'<title>.+?Film & Video: ([^<]+)</title>', webpage, 'title') + +        duration = int_or_none(self._html_search_regex( +            r'Duration: (\d+) minutes', webpage, 'duration', fatal=False, default=None)) +        if duration: +            duration *= 60 + +        formats = [] + +        FORMAT_REGEXES = [ +            ['sq', r"'flashvars'\s*,\s*'file=([^']+)'"], +            ['hq', r'href="(http://ubumexico\.centro\.org\.mx/video/[^"]+)"'] +        ] + +        for format_id, format_regex in FORMAT_REGEXES: +            m = re.search(format_regex, webpage) +            if m: +                formats.append({ +                    'url': m.group(1), +                    'format_id': format_id, +                }) + +        return { +            'id': video_id, +            'title': title, +            'duration': duration, +            'formats': formats, +        } | 
