diff options
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/mofosex.py | 49 | 
2 files changed, 50 insertions, 0 deletions
| diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 0d933986f..045d4447a 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -81,6 +81,7 @@ from .metacafe import MetacafeIE  from .metacritic import MetacriticIE  from .mit import TechTVMITIE, MITIE  from .mixcloud import MixcloudIE +from .mofosex import MofosexIE  from .mtv import MTVIE  from .muzu import MuzuTVIE  from .myspass import MySpassIE diff --git a/youtube_dl/extractor/mofosex.py b/youtube_dl/extractor/mofosex.py new file mode 100644 index 000000000..a0c926cd1 --- /dev/null +++ b/youtube_dl/extractor/mofosex.py @@ -0,0 +1,49 @@ +import os +import re + +from .common import InfoExtractor +from ..utils import ( +    compat_urllib_parse_urlparse, +    compat_urllib_request, +    compat_urllib_parse, +) + +class MofosexIE(InfoExtractor): +    _VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>mofosex\.com/videos/(?P<videoid>[0-9]+)/.*?\.html)' +    _TEST = { +        u'url': u'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html', +        u'file': u'5018.mp4', +        u'md5': u'1b2eb47ac33cc75d4a80e3026b613c5a', +        u'info_dict': { +            u"title": u"Japanese Teen Music Video", +            u"age_limit": 18, +        } +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('videoid') +        url = 'http://www.' + mobj.group('url') + +        req = compat_urllib_request.Request(url) +        req.add_header('Cookie', 'age_verified=1') +        webpage = self._download_webpage(req, video_id) + +        video_title = self._html_search_regex(r'<h1>(.+?)<', webpage, u'title') +        video_url = compat_urllib_parse.unquote(self._html_search_regex(r'flashvars.video_url = \'([^\']+)', webpage, u'video_url')) +        path = compat_urllib_parse_urlparse( video_url ).path +        extension = os.path.splitext( path )[1][1:] +        format = path.split('/')[5].split('_')[:2] +        format = "-".join( format ) + +        age_limit = self._rta_search(webpage) + +        return { +            'id': video_id, +            'title': video_title, +            'url': video_url, +            'ext': extension, +            'format': format, +            'format_id': format, +            'age_limit': age_limit, +        } | 
