diff options
author | Sergey M․ <dstftw@gmail.com> | 2014-09-05 19:53:53 +0700 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2014-09-05 19:53:53 +0700 |
commit | 4d067a58ca6b4ba2bfe2c9cb17e0ab1ad9e70604 (patch) | |
tree | 6a111cc2d49885c5b36215c2886ffcc8a6ae25f4 | |
parent | 1c1cff6a525bc8fc506cf2c6eb8963abc3b1fcee (diff) |
[dbtv] Simplify, modernize, extract all formats
-rw-r--r-- | youtube_dl/extractor/dbtv.py | 114 |
1 files changed, 56 insertions, 58 deletions
diff --git a/youtube_dl/extractor/dbtv.py b/youtube_dl/extractor/dbtv.py index cf76dbf05..1d3e2ff08 100644 --- a/youtube_dl/extractor/dbtv.py +++ b/youtube_dl/extractor/dbtv.py @@ -4,73 +4,71 @@ from __future__ import unicode_literals import re from .common import InfoExtractor - from ..utils import ( - ExtractorError + float_or_none, + int_or_none, + clean_html, ) + class DBTVIE(InfoExtractor): - _VALID_URL = r'http://dbtv.no/(?P<id>[0-9]+)/?(?P<slug>.*)$' - _TEST = { - 'url': 'http://dbtv.no/3649835190001#Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen', - 'md5': 'b89953ed25dacb6edb3ef6c6f430f8bc', - 'info_dict': { - 'id': '3649835190001', - 'ext': 'mp4', - 'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen', - 'description': 'md5:d681bf2bb7dd3503892cedb9c2d0e6f2', - 'thumbnail': 'http://gfx.dbtv.no/thumbs/still/33100.jpg', - 'timestamp': 1404039863, - 'upload_date': '20140629', - 'duration': 69544, + _VALID_URL = r'http://dbtv\.no/(?P<id>[0-9]+)#(?P<display_id>.+)' + _TEST = { + 'url': 'http://dbtv.no/3649835190001#Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen', + 'md5': 'b89953ed25dacb6edb3ef6c6f430f8bc', + 'info_dict': { + 'id': '33100', + 'display_id': 'Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen', + 'ext': 'mp4', + 'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen', + 'description': 'md5:1504a54606c4dde3e4e61fc97aa857e0', + 'thumbnail': 're:https?://.*\.jpg$', + 'timestamp': 1404039863.438, + 'upload_date': '20140629', + 'duration': 69.544, + 'view_count': int, + 'categories': list, + } } - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - # Download JSON file containing video info. - data = self._download_json('http://api.dbtv.no/discovery/%s' % video_id, video_id, 'Downloading media JSON') - # We only want the first video in the JSON API file. - video = data['playlist'][0] + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + display_id = mobj.group('display_id') - # Check for full HD video, else use the standard video URL - for i in range(0, len(video['renditions'])): - if int(video['renditions'][i]['width']) == 1280: - video_url = video['renditions'][i]['URL'] - break - else: - video_url = video['URL'] + data = self._download_json( + 'http://api.dbtv.no/discovery/%s' % video_id, display_id) - # Add access token to image or it will fail. - thumbnail = video['splash'] + video = data['playlist'][0] - # Duration int. - duration = int(video['length']) + formats = [{ + 'url': f['URL'], + 'vcodec': f.get('container'), + 'width': int_or_none(f.get('width')), + 'height': int_or_none(f.get('height')), + 'vbr': float_or_none(f.get('rate'), 1000), + 'filesize': int_or_none(f.get('size')), + } for f in video['renditions'] if 'URL' in f] - # Timestamp is given in milliseconds. - timestamp = float(str(video['publishedAt'])[0:-3]) + if not formats: + for url_key, format_id in [('URL', 'mp4'), ('HLSURL', 'hls')]: + if url_key in video: + formats.append({ + 'url': video[url_key], + 'format_id': format_id, + }) - formats = [] + self._sort_formats(formats) - # Video URL. - if video['URL'] is not None: - formats.append({ - 'url': video_url, - 'format_id': 'mp4', - 'ext': 'mp4' - }) - else: - raise ExtractorError('No download URL found for video: %s.' % video_id, expected=True) - - return { - 'id': video_id, - 'title': video['title'], - 'description': video['desc'], - 'thumbnail': thumbnail, - 'timestamp': timestamp, - 'duration': duration, - 'view_count': video['views'], - 'formats': formats, - } + return { + 'id': video['id'], + 'display_id': display_id, + 'title': video['title'], + 'description': clean_html(video['desc']), + 'thumbnail': video.get('splash') or video.get('thumb'), + 'timestamp': float_or_none(video.get('publishedAt'), 1000), + 'duration': float_or_none(video.get('length'), 1000), + 'view_count': int_or_none(video.get('views')), + 'categories': video.get('tags'), + 'formats': formats, + } |