diff options
| author | remitamine <remitamine@gmail.com> | 2015-07-24 12:00:20 +0100 | 
|---|---|---|
| committer | remitamine <remitamine@gmail.com> | 2015-07-24 12:00:20 +0100 | 
| commit | 1a117a77287e7dbd4d92f29062dabcf4efb86cb5 (patch) | |
| tree | 9d4f0b3c9e06962c629ba9c403b14dace6423660 | |
| parent | 53b8247cb5b5ac4a7822c82b94ec2f1221a40625 (diff) | |
[clipfish] extract mp4 video link
| -rw-r--r-- | youtube_dl/extractor/clipfish.py | 37 | 
1 files changed, 19 insertions, 18 deletions
| diff --git a/youtube_dl/extractor/clipfish.py b/youtube_dl/extractor/clipfish.py index a5c3cb7c6..09dfaac60 100644 --- a/youtube_dl/extractor/clipfish.py +++ b/youtube_dl/extractor/clipfish.py @@ -1,13 +1,11 @@  from __future__ import unicode_literals -import re -import time -import xml.etree.ElementTree -  from .common import InfoExtractor  from ..utils import (      ExtractorError, -    parse_duration, +    int_or_none, +    js_to_json, +    determine_ext,  ) @@ -17,37 +15,40 @@ class ClipfishIE(InfoExtractor):      _VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P<id>[0-9]+)/'      _TEST = {          'url': 'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/', -        'md5': '2521cd644e862936cf2e698206e47385', +        'md5': '79bc922f3e8a9097b3d68a93780fd475',          'info_dict': {              'id': '3966754',              'ext': 'mp4',              'title': 'FIFA 14 - E3 2013 Trailer',              'duration': 82, -        }, -        'skip': 'Blocked in the US' +        }      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group(1) +        video_id = self._match_id(url) +        webpage = self._download_webpage(url, video_id) +        video_info = self._parse_json( +            js_to_json(self._html_search_regex('var videoObject = ({[^}]+?})', webpage, 'videoObject')), +            video_id +        ) +        info_url = self._parse_json( +            js_to_json(self._html_search_regex('var globalFlashvars = ({[^}]+?})', webpage, 'globalFlashvars')), +            video_id +        )['data'] -        info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' % -                    (video_id, int(time.time())))          doc = self._download_xml(              info_url, video_id, note='Downloading info page')          title = doc.find('title').text          video_url = doc.find('filename').text -        if video_url is None: -            xml_bytes = xml.etree.ElementTree.tostring(doc) -            raise ExtractorError('Cannot find video URL in document %r' % -                                 xml_bytes)          thumbnail = doc.find('imageurl').text -        duration = parse_duration(doc.find('duration').text) +        duration = int_or_none(video_info['length']) +        formats = [{'url': video_info['videourl']},{'url': video_url}] +        self._sort_formats(formats)          return {              'id': video_id,              'title': title, -            'url': video_url, +            'formats': formats,              'thumbnail': thumbnail,              'duration': duration,          } | 
