diff options
| author | Sergey M․ <dstftw@gmail.com> | 2016-01-10 10:31:36 +0600 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2016-01-10 10:31:36 +0600 | 
| commit | 6e99d5762a844b44cad4cae144045ed0537c084b (patch) | |
| tree | 4bcd48c810d5a6003f7898c26762bbb81adca3e4 | |
| parent | 15b1c6656fd8b384bd974f4d943a04b8bf8ca915 (diff) | |
[bigflix] Extract all formats
| -rw-r--r-- | youtube_dl/extractor/bigflix.py | 42 | 
1 files changed, 35 insertions, 7 deletions
| diff --git a/youtube_dl/extractor/bigflix.py b/youtube_dl/extractor/bigflix.py index aeea1a002..b7e498436 100644 --- a/youtube_dl/extractor/bigflix.py +++ b/youtube_dl/extractor/bigflix.py @@ -1,15 +1,16 @@  # coding: utf-8  from __future__ import unicode_literals -from base64 import b64decode +import base64 +import re  from .common import InfoExtractor  from ..compat import compat_urllib_parse_unquote  class BigflixIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?bigflix\.com/.*/(?P<id>[0-9]+)' -    _TEST = { +    _VALID_URL = r'https?://(?:www\.)?bigflix\.com/.+/(?P<id>[0-9]+)' +    _TESTS = [{          'url': 'http://www.bigflix.com/Hindi-movies/Action-movies/Singham-Returns/16537',          'md5': 'ec76aa9b1129e2e5b301a474e54fab74',          'info_dict': { @@ -18,7 +19,20 @@ class BigflixIE(InfoExtractor):              'title': 'Singham Returns',              'description': 'md5:3d2ba5815f14911d5cc6a501ae0cf65d',          } -    } +    }, { +        # multiple formats +        'url': 'http://www.bigflix.com/Tamil-movies/Drama-movies/Madarasapatinam/16070', +        'info_dict': { +            'id': '16070', +            'ext': 'mp4', +            'title': 'Madarasapatinam', +            'description': 'md5:63b9b8ed79189c6f0418c26d9a3452ca', +            'formats': 'mincount:2', +        }, +        'params': { +            'skip_download': True, +        } +    }]      def _real_extract(self, url):          video_id = self._match_id(url) @@ -29,14 +43,28 @@ class BigflixIE(InfoExtractor):              r'<div[^>]+class=["\']pagetitle["\'][^>]*>(.+?)</div>',              webpage, 'title') -        video_url = b64decode(compat_urllib_parse_unquote(self._search_regex( -            r'file=([^&]+)', webpage, 'video url')).encode('ascii')).decode('utf-8') +        def decode_url(quoted_b64_url): +            return base64.b64decode(compat_urllib_parse_unquote( +                quoted_b64_url)).encode('ascii').decode('utf-8') + +        formats = [{ +            'url': decode_url(encoded_url), +            'format_id': '%sp' % height, +            'height': int(height), +        } for height, encoded_url in re.findall( +            r'ContentURL_(\d{3,4})[pP][^=]+=([^&]+)', webpage)] + +        if not formats: +            formats.append({ +                'url': decode_url(self._search_regex( +                    r'file=([^&]+)', webpage, 'video url')), +            })          description = self._html_search_meta('description', webpage)          return {              'id': video_id,              'title': title, -            'url': video_url,              'description': description, +            'formats': formats          } | 
