diff options
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/sharesix.py | 91 | 
2 files changed, 92 insertions, 0 deletions
| diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 7adca7df9..c76fb3727 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -296,6 +296,7 @@ from .scivee import SciVeeIE  from .screencast import ScreencastIE  from .servingsys import ServingSysIE  from .shared import SharedIE +from .sharesix import ShareSixIE  from .sina import SinaIE  from .slideshare import SlideshareIE  from .slutload import SlutloadIE diff --git a/youtube_dl/extractor/sharesix.py b/youtube_dl/extractor/sharesix.py new file mode 100644 index 000000000..7531e8325 --- /dev/null +++ b/youtube_dl/extractor/sharesix.py @@ -0,0 +1,91 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( +    compat_urllib_parse, +    compat_urllib_request, +    parse_duration, +) + + +class ShareSixIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?sharesix\.com/(?:f/)?(?P<id>[0-9a-zA-Z]+)' +    _TESTS = [ +        { +            'url': 'http://sharesix.com/f/OXjQ7Y6', +            'md5': '9e8e95d8823942815a7d7c773110cc93', +            'info_dict': { +                'id': 'OXjQ7Y6', +                'ext': 'mp4', +                'title': 'big_buck_bunny_480p_surround-fix.avi', +                'duration': 596, +                'width': 854, +                'height': 480, +            }, +        }, +        { +            'url': 'http://sharesix.com/lfrwoxp35zdd', +            'md5': 'dd19f1435b7cec2d7912c64beeee8185', +            'info_dict': { +                'id': 'lfrwoxp35zdd', +                'ext': 'flv', +                'title': 'WhiteBoard___a_Mac_vs_PC_Parody_Cartoon.mp4.flv', +                'duration': 65, +                'width': 1280, +                'height': 720, +            }, +        } +    ] + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') + +        fields = { +            'method_free': 'Free' +        } +        post = compat_urllib_parse.urlencode(fields) +        req = compat_urllib_request.Request(url, post) +        req.add_header('Content-type', 'application/x-www-form-urlencoded') + +        webpage = self._download_webpage(req, video_id, +                                         'Downloading video page') + +        video_url = self._search_regex( +            r"var\slnk1\s=\s'([^']+)'", webpage, 'video URL') +        title = self._html_search_regex( +            r'(?s)<dt>Filename:</dt>.+?<dd>(.+?)</dd>', webpage, 'title') +        duration = parse_duration( +            self._search_regex( +                r'(?s)<dt>Length:</dt>.+?<dd>(.+?)</dd>', +                webpage, +                'duration', +                fatal=False +            ) +        ) + +        m = re.search( +            r'''(?xs)<dt>Width\sx\sHeight</dt>.+? +                     <dd>(?P<width>\d+)\sx\s(?P<height>\d+)</dd>''', +            webpage +        ) +        width = height = None +        if m: +            width, height = int(m.group('width')), int(m.group('height')) + +        formats = [{ +            'format_id': 'sd', +            'url': video_url, +            'width': width, +            'height': height, +        }] + +        return { +            'id': video_id, +            'title': title, +            'duration': duration, +            'formats': formats, +        } | 
