diff options
| author | Sergey M․ <dstftw@gmail.com> | 2016-12-20 21:51:11 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2016-12-20 21:53:51 +0700 | 
| commit | 3d6761ba92b96934bdbf1792dfacd0368d7b236c (patch) | |
| tree | 8a45a775699decce4bb21f92016bb80891aab6cb | |
| parent | f59d1146c0ca523ec03a4c7df8987e82ee5054b2 (diff) | |
[vbox7] Fix extraction (closes #11494)
| -rw-r--r-- | youtube_dl/extractor/vbox7.py | 75 | 
1 files changed, 52 insertions, 23 deletions
diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py index a1e0851b7..7fb7574ad 100644 --- a/youtube_dl/extractor/vbox7.py +++ b/youtube_dl/extractor/vbox7.py @@ -4,11 +4,22 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..utils import urlencode_postdata +from ..utils import ExtractorError  class Vbox7IE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?vbox7\.com/(?:play:|emb/external\.php\?.*?\bvid=)(?P<id>[\da-fA-F]+)' +    _VALID_URL = r'''(?x) +                    https?:// +                        (?:[^/]+\.)?vbox7\.com/ +                        (?: +                            play:| +                            (?: +                                emb/external\.php| +                                player/ext\.swf +                            )\?.*?\bvid= +                        ) +                        (?P<id>[\da-fA-F]+) +                    '''      _TESTS = [{          'url': 'http://vbox7.com/play:0946fff23c',          'md5': 'a60f9ab3a3a2f013ef9a967d5f7be5bf', @@ -16,6 +27,14 @@ class Vbox7IE(InfoExtractor):              'id': '0946fff23c',              'ext': 'mp4',              'title': 'Борисов: Притеснен съм за бъдещето на България', +            'description': 'По думите му е опасно страната ни да бъде обявена за "сигурна"', +            'thumbnail': 're:^https?://.*\.jpg$', +            'timestamp': 1470982814, +            'upload_date': '20160812', +            'uploader': 'zdraveibulgaria', +        }, +        'params': { +            'proxy': '127.0.0.1:8118',          },      }, {          'url': 'http://vbox7.com/play:249bb972c2', @@ -29,6 +48,9 @@ class Vbox7IE(InfoExtractor):      }, {          'url': 'http://vbox7.com/emb/external.php?vid=a240d20f9c&autoplay=1',          'only_matching': True, +    }, { +        'url': 'http://i49.vbox7.com/player/ext.swf?vid=0946fff23c&autoplay=1', +        'only_matching': True,      }]      @staticmethod @@ -42,33 +64,40 @@ class Vbox7IE(InfoExtractor):      def _real_extract(self, url):          video_id = self._match_id(url) -        webpage = self._download_webpage( -            'http://vbox7.com/play:%s' % video_id, video_id) - -        title = self._html_search_regex( -            r'<title>(.+?)</title>', webpage, 'title').split('/')[0].strip() +        response = self._download_json( +            'https://www.vbox7.com/ajax/video/nextvideo.php?vid=%s' % video_id, +            video_id) -        video_url = self._search_regex( -            r'src\s*:\s*(["\'])(?P<url>.+?.mp4.*?)\1', -            webpage, 'video url', default=None, group='url') +        if 'error' in response: +            raise ExtractorError( +                '%s said: %s' % (self.IE_NAME, response['error']), expected=True) -        thumbnail_url = self._og_search_thumbnail(webpage) +        video = response['options'] -        if not video_url: -            info_response = self._download_webpage( -                'http://vbox7.com/play/magare.do', video_id, -                'Downloading info webpage', -                data=urlencode_postdata({'as3': '1', 'vid': video_id}), -                headers={'Content-Type': 'application/x-www-form-urlencoded'}) -            final_url, thumbnail_url = map( -                lambda x: x.split('=')[1], info_response.split('&')) +        title = video['title'] +        video_url = video['src']          if '/na.mp4' in video_url:              self.raise_geo_restricted() -        return { +        uploader = video.get('uploader') + +        webpage = self._download_webpage( +            'http://vbox7.com/play:%s' % video_id, video_id, fatal=None) + +        info = {} + +        if webpage: +            info = self._search_json_ld( +                webpage.replace('"/*@context"', '"@context"'), video_id) + +        info.update({              'id': video_id, -            'url': self._proto_relative_url(video_url, 'http:'),              'title': title, -            'thumbnail': thumbnail_url, -        } +            'url': video_url, +            'uploader': uploader, +            'thumbnail': self._proto_relative_url( +                info.get('thumbnail') or self._og_search_thumbnail(webpage), +                'http:'), +        }) +        return info  | 
