diff options
author | hojel <hojelei@gmail.com> | 2014-05-12 03:48:40 -0700 |
---|---|---|
committer | hojel <hojelei@gmail.com> | 2014-05-12 03:48:40 -0700 |
commit | 749fe60c1eaa157db4360edf55cf41a10489f349 (patch) | |
tree | 7a9b9688129b6f529fbad798a830e31c50050f75 /youtube_dl/extractor | |
parent | e399853d0c5784257ffcb6fba147d0b47d3f9bb6 (diff) |
[nuvid] Add new extractor
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/nuvid.py | 39 |
2 files changed, 40 insertions, 0 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 9529077c7..287044180 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -194,6 +194,7 @@ from .nowvideo import NowVideoIE from .nrk import NRKIE from .ntv import NTVIE from .nytimes import NYTimesIE +from .nuvid import NuvidIE from .oe1 import OE1IE from .ooyala import OoyalaIE from .orf import ORFIE diff --git a/youtube_dl/extractor/nuvid.py b/youtube_dl/extractor/nuvid.py new file mode 100644 index 000000000..2e5198c1a --- /dev/null +++ b/youtube_dl/extractor/nuvid.py @@ -0,0 +1,39 @@ +import re + +from .common import InfoExtractor + +class NuvidIE(InfoExtractor): + _VALID_URL = r'^https?://(?:www|m)\.nuvid\.com/video/(?P<videoid>\d+)' + _TEST = { + u'url': u'http://m.nuvid.com/video/1310741/', + u'file': u'1310741.mp4', + u'md5': u'eab207b7ac4fccfb4e23c86201f11277', + u'info_dict': { + u"title": u"Horny babes show their awesome bodeis and", + u"age_limit": 18, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + + video_id = mobj.group('videoid') + + # Get webpage content + murl = url.replace('//www.', '//m.') + webpage = self._download_webpage(murl, video_id) + + video_title = self._html_search_regex(r'<div class="title">\s+<h2[^>]*>([^<]+)</h2>', webpage, 'video_title').strip() + + video_url = 'http://m.nuvid.com'+self._html_search_regex(r'href="(/mp4/[^"]+)"[^>]*data-link_type="mp4"', webpage, 'video_url') + + video_thumb = self._html_search_regex(r'href="(/thumbs/[^"]+)"[^>]*data-link_type="thumbs"', webpage, 'video_thumb') + + info = {'id': video_id, + 'url': video_url, + 'title': video_title, + 'thumbnail': video_thumb, + 'ext': 'mp4', + 'age_limit': 18} + + return [info] |