diff options
Diffstat (limited to 'youtube_dl/extractor/goshgay.py')
-rw-r--r-- | youtube_dl/extractor/goshgay.py | 72 |
1 files changed, 26 insertions, 46 deletions
diff --git a/youtube_dl/extractor/goshgay.py b/youtube_dl/extractor/goshgay.py index 7bca21ad0..b116d251d 100644 --- a/youtube_dl/extractor/goshgay.py +++ b/youtube_dl/extractor/goshgay.py @@ -1,73 +1,53 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals -import re - from .common import InfoExtractor +from ..compat import ( + compat_parse_qs, +) from ..utils import ( - compat_urlparse, - str_to_int, - ExtractorError, + parse_duration, ) -import json class GoshgayIE(InfoExtractor): - _VALID_URL = r'^(?:https?://)www.goshgay.com/video(?P<id>\d+?)($|/)' + _VALID_URL = r'https?://www\.goshgay\.com/video(?P<id>\d+?)($|/)' _TEST = { - 'url': 'http://www.goshgay.com/video4116282', - 'md5': '268b9f3c3229105c57859e166dd72b03', + 'url': 'http://www.goshgay.com/video299069/diesel_sfw_xxx_video', + 'md5': '027fcc54459dff0feb0bc06a7aeda680', 'info_dict': { - 'id': '4116282', + 'id': '299069', 'ext': 'flv', - 'title': 'md5:089833a4790b5e103285a07337f245bf', - 'thumbnail': 're:http://.*\.jpg', + 'title': 'DIESEL SFW XXX Video', + 'thumbnail': 're:^http://.*\.jpg$', + 'duration': 79, 'age_limit': 18, } } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - title = self._search_regex(r'class="video-title"><h1>(.+?)<', webpage, 'title') - player_config = self._search_regex( - r'(?s)jwplayer\("player"\)\.setup\(({.+?})\)', webpage, 'config settings') - player_vars = json.loads(player_config.replace("'", '"')) - width = str_to_int(player_vars.get('width')) - height = str_to_int(player_vars.get('height')) - config_uri = player_vars.get('config') + title = self._html_search_regex( + r'<h2>(.*?)<', webpage, 'title') + duration = parse_duration(self._html_search_regex( + r'<span class="duration">\s*-?\s*(.*?)</span>', + webpage, 'duration', fatal=False)) + family_friendly = self._html_search_meta( + 'isFamilyFriendly', webpage, default='false') - if config_uri is None: - raise ExtractorError('Missing config URI') - node = self._download_xml(config_uri, video_id, 'Downloading player config XML', - errnote='Unable to download XML') - if node is None: - raise ExtractorError('Missing config XML') - if node.tag != 'config': - raise ExtractorError('Missing config attribute') - fns = node.findall('file') - imgs = node.findall('image') - if len(fns) != 1: - raise ExtractorError('Missing media URI') - video_url = fns[0].text - if len(imgs) < 1: - thumbnail = None - else: - thumbnail = imgs[0].text - - url_comp = compat_urlparse.urlparse(url) - ref = "%s://%s%s" % (url_comp[0], url_comp[1], url_comp[2]) + flashvars = compat_parse_qs(self._html_search_regex( + r'<embed.+?id="flash-player-embed".+?flashvars="([^"]+)"', + webpage, 'flashvars')) + thumbnail = flashvars.get('url_bigthumb', [None])[0] + video_url = flashvars['flv_url'][0] return { 'id': video_id, 'url': video_url, 'title': title, - 'width': width, - 'height': height, 'thumbnail': thumbnail, - 'http_referer': ref, - 'age_limit': 18, + 'duration': duration, + 'age_limit': 0 if family_friendly == 'true' else 18, } |