diff options
author | Philipp Hagemeister <phihag@phihag.de> | 2014-07-11 11:00:37 +0200 |
---|---|---|
committer | Philipp Hagemeister <phihag@phihag.de> | 2014-07-11 11:00:37 +0200 |
commit | 6f66eedc5dc84ac72b36722771ec625f4ccecef7 (patch) | |
tree | 8c6c953123606f7aa8a3446a8767d388563e28f4 /youtube_dl/extractor/goshgay.py | |
parent | 4094b6e36d03a6230689657d87de7a58f3f0b581 (diff) | |
parent | d6aa1967ad5b91cb12b306a9797c7c5097d54472 (diff) |
Merge remote-tracking branch 'MikeCol/goshgay'
Diffstat (limited to 'youtube_dl/extractor/goshgay.py')
-rw-r--r-- | youtube_dl/extractor/goshgay.py | 72 |
1 files changed, 72 insertions, 0 deletions
diff --git a/youtube_dl/extractor/goshgay.py b/youtube_dl/extractor/goshgay.py new file mode 100644 index 000000000..3f31ec896 --- /dev/null +++ b/youtube_dl/extractor/goshgay.py @@ -0,0 +1,72 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urlparse, + str_to_int, + ExtractorError, +) +import json + + +class GoshgayIE(InfoExtractor): + _VALID_URL = r'^(?:https?://)www.goshgay.com/video(?P<id>\d+?)($|/)' + _TEST = { + 'url': 'http://www.goshgay.com/video4116282', + 'md5': '268b9f3c3229105c57859e166dd72b03', + 'info_dict': { + 'id': '4116282', + 'ext': 'flv', + 'title': 'md5:089833a4790b5e103285a07337f245bf', + 'thumbnail': 're:http://.*\.jpg', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + title = self._search_regex(r'class="video-title"><h1>(.+?)<', webpage, 'title') + + player_config = self._search_regex(r'jwplayer\("player"\)\.setup\(({.+?})\)', webpage, 'config settings', + fatal=True, flags=re.S) + player_vars = json.loads(player_config.replace("'", '"')) + width = str_to_int(player_vars.get('width')) + height = str_to_int(player_vars.get('height')) + config_uri = player_vars.get('config') + + if config_uri is None: + raise ExtractorError('Missing config URI') + node = self._download_xml(config_uri, video_id, 'Downloading player config XML', + errnote='Unable to download XML') + if node is None: + raise ExtractorError('Missing config XML') + if node.tag != 'config': + raise ExtractorError('Missing config attribute') + fns = node.findall('file') + imgs = node.findall('image') + if len(fns) != 1: + raise ExtractorError('Missing media URI') + video_url = fns[0].text + if len(imgs) < 1: + thumbnail = None + else: + thumbnail = imgs[0].text + + url_comp = compat_urlparse.urlparse(url) + ref = "%s://%s%s" % (url_comp[0], url_comp[1], url_comp[2]) + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'width': width, + 'height': height, + 'thumbnail': thumbnail, + 'http_referer': ref, + 'age_limit': 18, + } |