diff options
| -rwxr-xr-x | youtube_dl/InfoExtractors.py | 69 | ||||
| -rw-r--r-- | youtube_dl/extractor/dailymotion.py | 77 | 
2 files changed, 78 insertions, 68 deletions
| diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index fcc5d02cf..b32bd3d94 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -24,80 +24,13 @@ from .utils import *  from .extractor.common import InfoExtractor, SearchInfoExtractor +from .extractor.dailymotion import DailymotionIE  from .extractor.metacafe import MetacafeIE  from .extractor.statigram import StatigramIE  from .extractor.youtube import YoutubeIE, YoutubePlaylistIE, YoutubeUserIE, YoutubeChannelIE -class DailymotionIE(InfoExtractor): -    """Information Extractor for Dailymotion""" - -    _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)' -    IE_NAME = u'dailymotion' - -    def _real_extract(self, url): -        # Extract id and simplified title from URL -        mobj = re.match(self._VALID_URL, url) -        if mobj is None: -            raise ExtractorError(u'Invalid URL: %s' % url) - -        video_id = mobj.group(1).split('_')[0].split('?')[0] - -        video_extension = 'mp4' - -        # Retrieve video webpage to extract further information -        request = compat_urllib_request.Request(url) -        request.add_header('Cookie', 'family_filter=off') -        webpage = self._download_webpage(request, video_id) - -        # Extract URL, uploader and title from webpage -        self.report_extraction(video_id) -        mobj = re.search(r'\s*var flashvars = (.*)', webpage) -        if mobj is None: -            raise ExtractorError(u'Unable to extract media URL') -        flashvars = compat_urllib_parse.unquote(mobj.group(1)) - -        for key in ['hd1080URL', 'hd720URL', 'hqURL', 'sdURL', 'ldURL', 'video_url']: -            if key in flashvars: -                max_quality = key -                self.to_screen(u'Using %s' % key) -                break -        else: -            raise ExtractorError(u'Unable to extract video URL') - -        mobj = re.search(r'"' + max_quality + r'":"(.+?)"', flashvars) -        if mobj is None: -            raise ExtractorError(u'Unable to extract video URL') - -        video_url = compat_urllib_parse.unquote(mobj.group(1)).replace('\\/', '/') - -        # TODO: support choosing qualities - -        mobj = re.search(r'<meta property="og:title" content="(?P<title>[^"]*)" />', webpage) -        if mobj is None: -            raise ExtractorError(u'Unable to extract title') -        video_title = unescapeHTML(mobj.group('title')) - -        video_uploader = None -        video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>', -                                             # Looking for official user -                                             r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'], -                                            webpage, 'video uploader') - -        video_upload_date = None -        mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage) -        if mobj is not None: -            video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1) - -        return [{ -            'id':       video_id, -            'url':      video_url, -            'uploader': video_uploader, -            'upload_date':  video_upload_date, -            'title':    video_title, -            'ext':      video_extension, -        }]  class PhotobucketIE(InfoExtractor): diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py new file mode 100644 index 000000000..34306b073 --- /dev/null +++ b/youtube_dl/extractor/dailymotion.py @@ -0,0 +1,77 @@ +import re + +from .common import InfoExtractor +from ..utils import ( +    compat_urllib_request, +    compat_urllib_parse, + +    ExtractorError, +    unescapeHTML, +) + +class DailymotionIE(InfoExtractor): +    """Information Extractor for Dailymotion""" + +    _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)' +    IE_NAME = u'dailymotion' + +    def _real_extract(self, url): +        # Extract id and simplified title from URL +        mobj = re.match(self._VALID_URL, url) + +        video_id = mobj.group(1).split('_')[0].split('?')[0] + +        video_extension = 'mp4' + +        # Retrieve video webpage to extract further information +        request = compat_urllib_request.Request(url) +        request.add_header('Cookie', 'family_filter=off') +        webpage = self._download_webpage(request, video_id) + +        # Extract URL, uploader and title from webpage +        self.report_extraction(video_id) +        mobj = re.search(r'\s*var flashvars = (.*)', webpage) +        if mobj is None: +            raise ExtractorError(u'Unable to extract media URL') +        flashvars = compat_urllib_parse.unquote(mobj.group(1)) + +        for key in ['hd1080URL', 'hd720URL', 'hqURL', 'sdURL', 'ldURL', 'video_url']: +            if key in flashvars: +                max_quality = key +                self.to_screen(u'Using %s' % key) +                break +        else: +            raise ExtractorError(u'Unable to extract video URL') + +        mobj = re.search(r'"' + max_quality + r'":"(.+?)"', flashvars) +        if mobj is None: +            raise ExtractorError(u'Unable to extract video URL') + +        video_url = compat_urllib_parse.unquote(mobj.group(1)).replace('\\/', '/') + +        # TODO: support choosing qualities + +        mobj = re.search(r'<meta property="og:title" content="(?P<title>[^"]*)" />', webpage) +        if mobj is None: +            raise ExtractorError(u'Unable to extract title') +        video_title = unescapeHTML(mobj.group('title')) + +        video_uploader = None +        video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>', +                                             # Looking for official user +                                             r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'], +                                            webpage, 'video uploader') + +        video_upload_date = None +        mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage) +        if mobj is not None: +            video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1) + +        return [{ +            'id':       video_id, +            'url':      video_url, +            'uploader': video_uploader, +            'upload_date':  video_upload_date, +            'title':    video_title, +            'ext':      video_extension, +        }] | 
