diff options
author | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2015-01-05 19:14:50 +0100 |
---|---|---|
committer | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2015-01-05 19:19:01 +0100 |
commit | 8f9529cd0559bdbe6c568cfd765f9129666a77be (patch) | |
tree | dcd05998fb01d2563b7445cfa47056dabde99a7b | |
parent | f4bca0b348fe1f4f65c939b496973062180e0c4f (diff) |
[motorsport] Fix extraction and make trailing '/' optional
They directly embed a youtube video now.
-rw-r--r-- | youtube_dl/extractor/motorsport.py | 60 |
1 files changed, 23 insertions, 37 deletions
diff --git a/youtube_dl/extractor/motorsport.py b/youtube_dl/extractor/motorsport.py index f5ca74e97..c1a482dba 100644 --- a/youtube_dl/extractor/motorsport.py +++ b/youtube_dl/extractor/motorsport.py @@ -1,63 +1,49 @@ # coding: utf-8 from __future__ import unicode_literals -import hashlib -import json -import time - from .common import InfoExtractor from ..compat import ( - compat_parse_qs, - compat_str, -) -from ..utils import ( - int_or_none, + compat_urlparse, ) class MotorsportIE(InfoExtractor): IE_DESC = 'motorsport.com' - _VALID_URL = r'http://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/(?:$|[?#])' + _VALID_URL = r'http://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/?(?:$|[?#])' _TEST = { 'url': 'http://www.motorsport.com/f1/video/main-gallery/red-bull-racing-2014-rules-explained/', - 'md5': '5592cb7c5005d9b2c163df5ac3dc04e4', 'info_dict': { - 'id': '7063', + 'id': '2-T3WuR-KMM', 'ext': 'mp4', 'title': 'Red Bull Racing: 2014 Rules Explained', - 'duration': 207, + 'duration': 208, 'description': 'A new clip from Red Bull sees Daniel Ricciardo and Sebastian Vettel explain the 2014 Formula One regulations – which are arguably the most complex the sport has ever seen.', - 'uploader': 'rainiere', - 'thumbnail': r're:^http://.*motorsport\.com/.+\.jpg$' - } + 'uploader': 'mcomstaff', + 'uploader_id': 'UC334JIYKkVnyFoNCclfZtHQ', + 'upload_date': '20140903', + 'thumbnail': r're:^https?://.+\.jpg$' + }, + 'add_ie': ['Youtube'], + 'params': { + 'skip_download': True, + }, } def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - flashvars_code = self._html_search_regex( - r'<embed id="player".*?flashvars="([^"]+)"', webpage, 'flashvars') - flashvars = compat_parse_qs(flashvars_code) - params = json.loads(flashvars['parameters'][0]) - - e = compat_str(int(time.time()) + 24 * 60 * 60) - base_video_url = params['location'] + '?e=' + e - s = 'h3hg713fh32' - h = hashlib.md5((s + base_video_url).encode('utf-8')).hexdigest() - video_url = base_video_url + '&h=' + h - - uploader = self._html_search_regex( - r'(?s)<span class="label">Video by: </span>(.*?)</a>', webpage, - 'uploader', fatal=False) + iframe_path = self._html_search_regex( + r'<iframe id="player_iframe"[^>]+src="([^"]+)"', webpage, + 'iframe path') + iframe = self._download_webpage( + compat_urlparse.urljoin(url, iframe_path), display_id, + 'Downloading iframe') + youtube_id = self._search_regex( + r'www.youtube.com/embed/(.{11})', iframe, 'youtube id') return { - 'id': params['video_id'], + '_type': 'url_transparent', 'display_id': display_id, - 'title': params['title'], - 'url': video_url, - 'description': params.get('description'), - 'thumbnail': params.get('main_thumb'), - 'duration': int_or_none(params.get('duration')), - 'uploader': uploader, + 'url': 'https://youtube.com/watch?v=%s' % youtube_id, } |