diff options
| -rw-r--r-- | youtube_dl/extractor/dumpert.py | 39 | 
1 files changed, 24 insertions, 15 deletions
diff --git a/youtube_dl/extractor/dumpert.py b/youtube_dl/extractor/dumpert.py index 52d07deac..e43bc81b2 100644 --- a/youtube_dl/extractor/dumpert.py +++ b/youtube_dl/extractor/dumpert.py @@ -4,11 +4,11 @@ from __future__ import unicode_literals  import base64  from .common import InfoExtractor +from ..utils import qualities  class DumpertIE(InfoExtractor): -    _VALID_URL = (r'https?://(?:www\.)?dumpert\.nl/mediabase/' -                  r'(?P<id>[0-9]+/[0-9a-zA-Z]+)/?.*') +    _VALID_URL = r'https?://(?:www\.)?dumpert\.nl/mediabase/(?P<id>[0-9]+/[0-9a-zA-Z]+)'      _TEST = {          'url': 'http://www.dumpert.nl/mediabase/6646981/951bc60f/',          'md5': '1b9318d7d5054e7dcb9dc7654f21d643', @@ -16,7 +16,8 @@ class DumpertIE(InfoExtractor):              'id': '6646981/951bc60f',              'ext': 'mp4',              'title': 'Ik heb nieuws voor je', -            'description': 'Niet schrikken hoor' +            'description': 'Niet schrikken hoor', +            'thumbnail': 're:^https?://.*\.jpg$',          }      } @@ -24,24 +25,32 @@ class DumpertIE(InfoExtractor):          video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id) -        title = self._html_search_meta('title', webpage) -        description = self._html_search_meta('description', webpage) +        files_base64 = self._search_regex( +            r'data-files="([^"]+)"', webpage, 'data files') -        files_base64 = self._html_search_regex(r'data-files="(.*?)"', -                                               webpage, -                                               'files') -        files_json = base64.b64decode(files_base64).decode('iso-8859-1') -        files = self._parse_json(files_json, video_id) +        files = self._parse_json( +            base64.b64decode(files_base64.encode('utf-8')).decode('utf-8'), +            video_id) -        format_names = ['flv', 'mobile', 'tablet', '720p'] -        formats = [{'format_id': name, -                    'url': files[name].replace(r'\/', '/')} -                   for name in format_names -                   if name in files] +        quality = qualities(['flv', 'mobile', 'tablet', '720p']) + +        formats = [{ +            'url': video_url, +            'format_id': format_id, +            'quality': quality(format_id), +        } for format_id, video_url in files.items() if format_id != 'still'] +        self._sort_formats(formats) + +        title = self._html_search_meta( +            'title', webpage) or self._og_search_title(webpage) +        description = self._html_search_meta( +            'description', webpage) or self._og_search_description(webpage) +        thumbnail = files.get('still') or self._og_search_thumbnail(webpage)          return {              'id': video_id,              'title': title,              'description': description, +            'thumbnail': thumbnail,              'formats': formats          }  | 
