diff options
Diffstat (limited to 'youtube_dl/extractor/spankwire.py')
| -rw-r--r-- | youtube_dl/extractor/spankwire.py | 60 | 
1 files changed, 38 insertions, 22 deletions
| diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py index 5fa6faf18..9e8fb35b2 100644 --- a/youtube_dl/extractor/spankwire.py +++ b/youtube_dl/extractor/spankwire.py @@ -16,8 +16,9 @@ from ..aes import aes_decrypt_text  class SpankwireIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<videoid>[0-9]+)/?)' -    _TEST = { +    _VALID_URL = r'https?://(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<id>[0-9]+)/?)' +    _TESTS = [{ +        # download URL pattern: */<height>P_<tbr>K_<video_id>.mp4          'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/',          'md5': '8bbfde12b101204b39e4b9fe7eb67095',          'info_dict': { @@ -30,14 +31,27 @@ class SpankwireIE(InfoExtractor):              'upload_date': '20070507',              'age_limit': 18,          } -    } +    }, { +        # download URL pattern: */mp4_<format_id>_<video_id>.mp4 +        'url': 'http://www.spankwire.com/Titcums-Compiloation-I/video1921551/', +        'md5': '09b3c20833308b736ae8902db2f8d7e6', +        'info_dict': { +            'id': '1921551', +            'ext': 'mp4', +            'title': 'Titcums Compiloation I', +            'description': 'cum on tits', +            'uploader': 'dannyh78999', +            'uploader_id': '3056053', +            'upload_date': '20150822', +            'age_limit': 18, +        }, +    }]      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('videoid') -        url = 'http://www.' + mobj.group('url') +        video_id = mobj.group('id') -        req = compat_urllib_request.Request(url) +        req = compat_urllib_request.Request('http://www.' + mobj.group('url'))          req.add_header('Cookie', 'age_verified=1')          webpage = self._download_webpage(req, video_id) @@ -54,7 +68,7 @@ class SpankwireIE(InfoExtractor):              r'by:\s*<a [^>]*>(.+?)</a>',              webpage, 'uploader', fatal=False)          uploader_id = self._html_search_regex( -            r'by:\s*<a href="/Profile\.aspx\?.*?UserId=(\d+).*?"', +            r'by:\s*<a href="/(?:user/viewProfile|Profile\.aspx)\?.*?UserId=(\d+).*?"',              webpage, 'uploader id', fatal=False)          upload_date = unified_strdate(self._html_search_regex(              r'</a> on (.+?) at \d+:\d+', @@ -67,9 +81,10 @@ class SpankwireIE(InfoExtractor):              r'<span\s+id="spCommentCount"[^>]*>([\d,\.]+)</span>',              webpage, 'comment count', fatal=False)) -        video_urls = list(map( -            compat_urllib_parse_unquote, -            re.findall(r'playerData\.cdnPath[0-9]{3,}\s*=\s*(?:encodeURIComponent\()?["\']([^"\']+)["\']', webpage))) +        videos = re.findall( +            r'playerData\.cdnPath([0-9]{3,})\s*=\s*(?:encodeURIComponent\()?["\']([^"\']+)["\']', webpage) +        heights = [int(video[0]) for video in videos] +        video_urls = list(map(compat_urllib_parse_unquote, [video[1] for video in videos]))          if webpage.find('flashvars\.encrypted = "true"') != -1:              password = self._search_regex(                  r'flashvars\.video_title = "([^"]+)', @@ -79,21 +94,22 @@ class SpankwireIE(InfoExtractor):                  video_urls))          formats = [] -        for video_url in video_urls: +        for height, video_url in zip(heights, video_urls):              path = compat_urllib_parse_urlparse(video_url).path -            format = path.split('/')[4].split('_')[:2] -            resolution, bitrate_str = format -            format = "-".join(format) -            height = int(resolution.rstrip('Pp')) -            tbr = int(bitrate_str.rstrip('Kk')) -            formats.append({ +            _, quality = path.split('/')[4].split('_')[:2] +            f = {                  'url': video_url, -                'resolution': resolution, -                'format': format, -                'tbr': tbr,                  'height': height, -                'format_id': format, -            }) +            } +            tbr = self._search_regex(r'^(\d+)[Kk]$', quality, 'tbr', default=None) +            if tbr: +                f.update({ +                    'tbr': int(tbr), +                    'format_id': '%dp' % height, +                }) +            else: +                f['format_id'] = quality +            formats.append(f)          self._sort_formats(formats)          age_limit = self._rta_search(webpage) | 
