diff options
author | Sergey M․ <dstftw@gmail.com> | 2014-04-03 19:53:11 +0700 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2014-04-03 19:53:11 +0700 |
commit | 4a419b88514ce4052dd34bedd92d65dcbfa715e8 (patch) | |
tree | b18408d47ad18aa13feed09fca997d00b27839ee | |
parent | bec1fad2234a9b6d1fac6a1a430af2b828fb8060 (diff) |
[c56] Modernize and add duration extraction
-rw-r--r-- | youtube_dl/extractor/c56.py | 29 |
1 files changed, 18 insertions, 11 deletions
diff --git a/youtube_dl/extractor/c56.py b/youtube_dl/extractor/c56.py index 690bc7c25..cb96c3876 100644 --- a/youtube_dl/extractor/c56.py +++ b/youtube_dl/extractor/c56.py @@ -2,39 +2,46 @@ from __future__ import unicode_literals import re -import json from .common import InfoExtractor class C56IE(InfoExtractor): - _VALID_URL = r'https?://((www|player)\.)?56\.com/(.+?/)?(v_|(play_album.+-))(?P<textid>.+?)\.(html|swf)' + _VALID_URL = r'https?://(?:(?:www|player)\.)?56\.com/(?:.+?/)?(?:v_|(?:play_album.+-))(?P<textid>.+?)\.(?:html|swf)' IE_NAME = '56.com' _TEST = { 'url': 'http://www.56.com/u39/v_OTM0NDA3MTY.html', - 'file': '93440716.flv', 'md5': 'e59995ac63d0457783ea05f93f12a866', 'info_dict': { + 'id': '93440716', + 'ext': 'flv', 'title': '网事知多少 第32期:车怒', + 'duration': 283.813, }, } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE) text_id = mobj.group('textid') - info_page = self._download_webpage('http://vxml.56.com/json/%s/' % text_id, - text_id, 'Downloading video info') - info = json.loads(info_page)['info'] - formats = [{ - 'format_id': f['type'], - 'filesize': int(f['filesize']), - 'url': f['url'] - } for f in info['rfiles']] + + page = self._download_json( + 'http://vxml.56.com/json/%s/' % text_id, text_id, 'Downloading video info') + + info = page['info'] + + formats = [ + { + 'format_id': f['type'], + 'filesize': int(f['filesize']), + 'url': f['url'] + } for f in info['rfiles'] + ] self._sort_formats(formats) return { 'id': info['vid'], 'title': info['Subject'], + 'duration': int(info['duration']) / 1000.0, 'formats': formats, 'thumbnail': info.get('bimg') or info.get('img'), } |