diff options
| author | hojel <hojelei@gmail.com> | 2014-05-12 03:48:40 -0700 | 
|---|---|---|
| committer | hojel <hojelei@gmail.com> | 2014-05-12 03:48:40 -0700 | 
| commit | 749fe60c1eaa157db4360edf55cf41a10489f349 (patch) | |
| tree | 7a9b9688129b6f529fbad798a830e31c50050f75 | |
| parent | e399853d0c5784257ffcb6fba147d0b47d3f9bb6 (diff) | |
[nuvid] Add new extractor
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/nuvid.py | 39 | 
2 files changed, 40 insertions, 0 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 9529077c7..287044180 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -194,6 +194,7 @@ from .nowvideo import NowVideoIE  from .nrk import NRKIE  from .ntv import NTVIE  from .nytimes import NYTimesIE +from .nuvid import NuvidIE  from .oe1 import OE1IE  from .ooyala import OoyalaIE  from .orf import ORFIE diff --git a/youtube_dl/extractor/nuvid.py b/youtube_dl/extractor/nuvid.py new file mode 100644 index 000000000..2e5198c1a --- /dev/null +++ b/youtube_dl/extractor/nuvid.py @@ -0,0 +1,39 @@ +import re + +from .common import InfoExtractor + +class NuvidIE(InfoExtractor): +    _VALID_URL = r'^https?://(?:www|m)\.nuvid\.com/video/(?P<videoid>\d+)' +    _TEST = { +        u'url': u'http://m.nuvid.com/video/1310741/', +        u'file': u'1310741.mp4', +        u'md5': u'eab207b7ac4fccfb4e23c86201f11277', +        u'info_dict': { +            u"title": u"Horny babes show their awesome bodeis and", +            u"age_limit": 18, +        } +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) + +        video_id = mobj.group('videoid') + +        # Get webpage content +        murl = url.replace('//www.', '//m.') +        webpage = self._download_webpage(murl, video_id) + +        video_title = self._html_search_regex(r'<div class="title">\s+<h2[^>]*>([^<]+)</h2>', webpage, 'video_title').strip() + +        video_url = 'http://m.nuvid.com'+self._html_search_regex(r'href="(/mp4/[^"]+)"[^>]*data-link_type="mp4"', webpage, 'video_url') + +        video_thumb = self._html_search_regex(r'href="(/thumbs/[^"]+)"[^>]*data-link_type="thumbs"', webpage, 'video_thumb') + +        info = {'id': video_id, +                'url': video_url, +                'title': video_title, +                'thumbnail': video_thumb, +                'ext': 'mp4', +                'age_limit': 18} + +        return [info]  | 
