diff options
| author | Sergey M․ <dstftw@gmail.com> | 2014-09-05 19:53:53 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2014-09-05 19:53:53 +0700 | 
| commit | 4d067a58ca6b4ba2bfe2c9cb17e0ab1ad9e70604 (patch) | |
| tree | 6a111cc2d49885c5b36215c2886ffcc8a6ae25f4 | |
| parent | 1c1cff6a525bc8fc506cf2c6eb8963abc3b1fcee (diff) | |
[dbtv] Simplify, modernize, extract all formats
| -rw-r--r-- | youtube_dl/extractor/dbtv.py | 114 | 
1 files changed, 56 insertions, 58 deletions
| diff --git a/youtube_dl/extractor/dbtv.py b/youtube_dl/extractor/dbtv.py index cf76dbf05..1d3e2ff08 100644 --- a/youtube_dl/extractor/dbtv.py +++ b/youtube_dl/extractor/dbtv.py @@ -4,73 +4,71 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -  from ..utils import ( -  ExtractorError +    float_or_none, +    int_or_none, +    clean_html,  ) +  class DBTVIE(InfoExtractor): -  _VALID_URL = r'http://dbtv.no/(?P<id>[0-9]+)/?(?P<slug>.*)$' -  _TEST = { -    'url': 'http://dbtv.no/3649835190001#Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen', -    'md5': 'b89953ed25dacb6edb3ef6c6f430f8bc', -    'info_dict': { -      'id': '3649835190001', -      'ext': 'mp4', -      'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen', -      'description': 'md5:d681bf2bb7dd3503892cedb9c2d0e6f2', -      'thumbnail': 'http://gfx.dbtv.no/thumbs/still/33100.jpg', -      'timestamp': 1404039863, -      'upload_date': '20140629', -      'duration': 69544, +    _VALID_URL = r'http://dbtv\.no/(?P<id>[0-9]+)#(?P<display_id>.+)' +    _TEST = { +        'url': 'http://dbtv.no/3649835190001#Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen', +        'md5': 'b89953ed25dacb6edb3ef6c6f430f8bc', +        'info_dict': { +            'id': '33100', +            'display_id': 'Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen', +            'ext': 'mp4', +            'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen', +            'description': 'md5:1504a54606c4dde3e4e61fc97aa857e0', +            'thumbnail': 're:https?://.*\.jpg$', +            'timestamp': 1404039863.438, +            'upload_date': '20140629', +            'duration': 69.544, +            'view_count': int, +            'categories': list, +        }      } -  } - -  def _real_extract(self, url): -    mobj = re.match(self._VALID_URL, url) -    video_id = mobj.group('id') -    # Download JSON file containing video info. -    data = self._download_json('http://api.dbtv.no/discovery/%s' % video_id, video_id, 'Downloading media JSON') -    # We only want the first video in the JSON API file. -    video = data['playlist'][0] +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') +        display_id = mobj.group('display_id') -    # Check for full HD video, else use the standard video URL -    for i in range(0, len(video['renditions'])): -      if int(video['renditions'][i]['width']) == 1280: -        video_url = video['renditions'][i]['URL'] -        break -      else: -        video_url = video['URL'] +        data = self._download_json( +            'http://api.dbtv.no/discovery/%s' % video_id, display_id) -    # Add access token to image or it will fail. -    thumbnail = video['splash'] +        video = data['playlist'][0] -    # Duration int. -    duration = int(video['length']) +        formats = [{ +            'url': f['URL'], +            'vcodec': f.get('container'), +            'width': int_or_none(f.get('width')), +            'height': int_or_none(f.get('height')), +            'vbr': float_or_none(f.get('rate'), 1000), +            'filesize': int_or_none(f.get('size')), +        } for f in video['renditions'] if 'URL' in f] -    # Timestamp is given in milliseconds. -    timestamp = float(str(video['publishedAt'])[0:-3]) +        if not formats: +            for url_key, format_id in [('URL', 'mp4'), ('HLSURL', 'hls')]: +                if url_key in video: +                    formats.append({ +                        'url': video[url_key], +                        'format_id': format_id, +                    }) -    formats = [] +        self._sort_formats(formats) -    # Video URL. -    if video['URL'] is not None: -      formats.append({ -        'url': video_url, -        'format_id': 'mp4', -        'ext': 'mp4' -      }) -    else: -      raise ExtractorError('No download URL found for video: %s.' % video_id, expected=True) - -    return { -      'id': video_id, -      'title': video['title'], -      'description': video['desc'], -      'thumbnail': thumbnail, -      'timestamp': timestamp, -      'duration': duration, -      'view_count': video['views'], -      'formats': formats, -    } +        return { +            'id': video['id'], +            'display_id': display_id, +            'title': video['title'], +            'description': clean_html(video['desc']), +            'thumbnail': video.get('splash') or video.get('thumb'), +            'timestamp': float_or_none(video.get('publishedAt'), 1000), +            'duration': float_or_none(video.get('length'), 1000), +            'view_count': int_or_none(video.get('views')), +            'categories': video.get('tags'), +            'formats': formats, +        } | 
