diff options
Diffstat (limited to 'youtube_dl/extractor/dailymotion.py')
| -rw-r--r-- | youtube_dl/extractor/dailymotion.py | 77 | 
1 files changed, 77 insertions, 0 deletions
| diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py new file mode 100644 index 000000000..34306b073 --- /dev/null +++ b/youtube_dl/extractor/dailymotion.py @@ -0,0 +1,77 @@ +import re + +from .common import InfoExtractor +from ..utils import ( +    compat_urllib_request, +    compat_urllib_parse, + +    ExtractorError, +    unescapeHTML, +) + +class DailymotionIE(InfoExtractor): +    """Information Extractor for Dailymotion""" + +    _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)' +    IE_NAME = u'dailymotion' + +    def _real_extract(self, url): +        # Extract id and simplified title from URL +        mobj = re.match(self._VALID_URL, url) + +        video_id = mobj.group(1).split('_')[0].split('?')[0] + +        video_extension = 'mp4' + +        # Retrieve video webpage to extract further information +        request = compat_urllib_request.Request(url) +        request.add_header('Cookie', 'family_filter=off') +        webpage = self._download_webpage(request, video_id) + +        # Extract URL, uploader and title from webpage +        self.report_extraction(video_id) +        mobj = re.search(r'\s*var flashvars = (.*)', webpage) +        if mobj is None: +            raise ExtractorError(u'Unable to extract media URL') +        flashvars = compat_urllib_parse.unquote(mobj.group(1)) + +        for key in ['hd1080URL', 'hd720URL', 'hqURL', 'sdURL', 'ldURL', 'video_url']: +            if key in flashvars: +                max_quality = key +                self.to_screen(u'Using %s' % key) +                break +        else: +            raise ExtractorError(u'Unable to extract video URL') + +        mobj = re.search(r'"' + max_quality + r'":"(.+?)"', flashvars) +        if mobj is None: +            raise ExtractorError(u'Unable to extract video URL') + +        video_url = compat_urllib_parse.unquote(mobj.group(1)).replace('\\/', '/') + +        # TODO: support choosing qualities + +        mobj = re.search(r'<meta property="og:title" content="(?P<title>[^"]*)" />', webpage) +        if mobj is None: +            raise ExtractorError(u'Unable to extract title') +        video_title = unescapeHTML(mobj.group('title')) + +        video_uploader = None +        video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>', +                                             # Looking for official user +                                             r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'], +                                            webpage, 'video uploader') + +        video_upload_date = None +        mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage) +        if mobj is not None: +            video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1) + +        return [{ +            'id':       video_id, +            'url':      video_url, +            'uploader': video_uploader, +            'upload_date':  video_upload_date, +            'title':    video_title, +            'ext':      video_extension, +        }] | 
