diff options
| -rw-r--r-- | youtube_dl/extractor/extractors.py | 5 | ||||
| -rw-r--r-- | youtube_dl/extractor/videa.py | 115 | 
2 files changed, 54 insertions, 66 deletions
| diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9898b9803..3b0bff0d7 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1064,10 +1064,7 @@ from .vice import (  from .viceland import VicelandIE  from .vidbit import VidbitIE  from .viddler import ViddlerIE -from .videa import ( -    VideaIE, -    VideaEmbedIE, -) +from .videa import VideaIE  from .videodetective import VideoDetectiveIE  from .videofyme import VideofyMeIE  from .videomega import VideoMegaIE diff --git a/youtube_dl/extractor/videa.py b/youtube_dl/extractor/videa.py index 3d2e7e31c..039add86b 100644 --- a/youtube_dl/extractor/videa.py +++ b/youtube_dl/extractor/videa.py @@ -1,28 +1,32 @@  # coding: utf-8  from __future__ import unicode_literals -import re -  from .common import InfoExtractor  from ..utils import (      int_or_none, -    parse_duration, +    mimetype2ext, +    parse_codecs,      xpath_element,      xpath_text, -    xpath_attr, -    urlencode_postdata, -    unescapeHTML,  )  class VideaIE(InfoExtractor): -    _VALID_URL = r'https?://(?:.+?\.)?videa\.hu/videok/(?P<id>[^#?]+)' +    _VALID_URL = r'''(?x) +                    https?:// +                        videa\.hu/ +                        (?: +                            videok/(?:[^/]+/)*[^?#&]+-| +                            player\?.*?\bv=| +                            player/v/ +                        ) +                        (?P<id>[^?#&]+) +                    '''      _TESTS = [{          'url': 'http://videa.hu/videok/allatok/az-orult-kigyasz-285-kigyot-kigyo-8YfIAjxwWGwT8HVQ',          'md5': '97a7af41faeaffd9f1fc864a7c7e7603',          'info_dict': {              'id': '8YfIAjxwWGwT8HVQ', -            'display_id': '8YfIAjxwWGwT8HVQ',              'ext': 'mp4',              'title': 'Az őrült kígyász 285 kígyót enged szabadon',              'thumbnail': 'http://videa.hu/static/still/1.4.1.1007274.1204470.3', @@ -31,68 +35,55 @@ class VideaIE(InfoExtractor):      }, {          'url': 'http://videa.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH',          'only_matching': True, +    }, { +        'url': 'http://videa.hu/player?v=8YfIAjxwWGwT8HVQ', +        'only_matching': True, +    }, { +        'url': 'http://videa.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1', +        'only_matching': True,      }]      def _real_extract(self, url):          video_id = self._match_id(url) -        video_data = self._download_json("http://videa.hu/oembed/?" + urlencode_postdata({"url": url.split('?')[0], "format": "json"}), video_id) -        video_url = self._search_regex( -            r'src="(.+?)"', video_data.get('html'), 'embed url') +        info = self._download_xml( +            'http://videa.hu/videaplayer_get_xml.php', video_id, +            query={'v': video_id}) -        return { -            '_type': 'url_transparent', -            'url': video_url, -            'ie_key': 'VideaEmbed' -        } +        video = xpath_element(info, './/video', 'video', fatal=True) +        sources = xpath_element(info, './/video_sources', 'sources', fatal=True) -class VideaEmbedIE(InfoExtractor): -    _VALID_URL = r'(?P<protocol>https?:)(?P<baseurl>//(?:.+?\.)?videa\.hu)/player(?:\?v=|/v/)(?P<id>[^/#?]+)'; -    _TESTS = [{ -        'url': 'http://videa.hu/player?v=8YfIAjxwWGwT8HVQ', -        'md5': '97a7af41faeaffd9f1fc864a7c7e7603', -        'info_dict': { -            'id': '8YfIAjxwWGwT8HVQ', -            'ext': 'mp4', -            'title': 'Az őrült kígyász 285 kígyót enged szabadon', -            'thumbnail': 'http://videa.hu/static/still/1.4.1.1007274.1204470.3', -            'duration': 21 -        }, -    }, { -        'url': 'http://videa.hu/player?v=jAHDWfWSJH5XuFhH', -        'only_matching': True, -    }]; +        title = xpath_text(video, './title', fatal=True) -    @staticmethod -    def _extract_url(webpage): -        mobj = re.search( -            r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:.+?\.)?videa\.hu/player(?:\?v=|/v/)[^/#?]+)\1', -            webpage) -        if mobj: -            return mobj.group('url') +        formats = [] +        for source in sources.findall('./video_source'): +            source_url = source.text +            if not source_url: +                continue +            f = parse_codecs(source.get('codecs')) +            f.update({ +                'url': source_url, +                'ext': mimetype2ext(source.get('mimetype')) or 'mp4', +                'format_id': source.get('name'), +                'width': int_or_none(source.get('width')), +                'height': int_or_none(source.get('height')), +            }) +            formats.append(f) +        self._sort_formats(formats) -    def _real_extract(self, url): -        protocol, base_url, display_id = re.search(self._VALID_URL, url).groups() -        xml = self._download_xml(protocol + base_url + "/flvplayer_get_video_xml.php?v=" + display_id, display_id) +        thumbnail = xpath_text(video, './poster_src') +        duration = int_or_none(xpath_text(video, './duration')) -        medias = [] -         -        for xml_media in xml.findall('video') + xml.findall('audio'): -            media_url = protocol + xpath_attr(xml_media, 'versions/version', 'video_url') -            media = { -                'id': display_id, -                'ext': 'mp4', -                'title': xpath_text(xml_media, 'title', 'title', True), -                'duration': parse_duration(xpath_text(xml_media, 'duration')), -                'thumbnail': protocol + xpath_text(xml_media, 'still', 'still', True), -                'url': media_url, -            } -            medias.append(media) +        age_limit = None +        is_adult = xpath_text(video, './is_adult_content', default=None) +        if is_adult: +            age_limit = 18 if is_adult == '1' else 0 -        if len(medias) > 1: -            self._downloader.report_warning( -                'found multiple medias; please ' -                'report this with the video URL to http://yt-dl.org/bug') -        if not medias: -            raise ExtractorError('No media entries found') -        return medias[0] +        return { +            'id': video_id, +            'title': title, +            'thumbnail': thumbnail, +            'duration': duration, +            'age_limit': age_limit, +            'formats': formats, +        } | 
