diff options
author | Sergey M․ <dstftw@gmail.com> | 2017-07-20 22:49:52 +0700 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2017-07-20 22:50:56 +0700 |
commit | c653326a1425f4c271f387fde7a706bf4b52a7a3 (patch) | |
tree | 308ebf05c4d6cc3eb6689032e86c246b020d8696 /youtube_dl | |
parent | 3fcf346ac16e6fe1963a3eab861d6bd9c32ce6db (diff) |
[funnyordie] Extract more metadata (closes #13677)
Diffstat (limited to 'youtube_dl')
-rw-r--r-- | youtube_dl/extractor/funnyordie.py | 64 |
1 files changed, 56 insertions, 8 deletions
diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py index 49409369c..f85e7de14 100644 --- a/youtube_dl/extractor/funnyordie.py +++ b/youtube_dl/extractor/funnyordie.py @@ -1,10 +1,14 @@ from __future__ import unicode_literals -import json import re from .common import InfoExtractor -from ..utils import ExtractorError +from ..utils import ( + ExtractorError, + float_or_none, + int_or_none, + unified_timestamp, +) class FunnyOrDieIE(InfoExtractor): @@ -18,6 +22,10 @@ class FunnyOrDieIE(InfoExtractor): 'title': 'Heart-Shaped Box: Literal Video Version', 'description': 'md5:ea09a01bc9a1c46d9ab696c01747c338', 'thumbnail': r're:^http:.*\.jpg$', + 'uploader': 'DASjr', + 'timestamp': 1317904928, + 'upload_date': '20111006', + 'duration': 318.3, }, }, { 'url': 'http://www.funnyordie.com/embed/e402820827', @@ -27,6 +35,8 @@ class FunnyOrDieIE(InfoExtractor): 'title': 'Please Use This Song (Jon Lajoie)', 'description': 'Please use this to sell something. www.jonlajoie.com', 'thumbnail': r're:^http:.*\.jpg$', + 'timestamp': 1398988800, + 'upload_date': '20140502', }, 'params': { 'skip_download': True, @@ -100,15 +110,53 @@ class FunnyOrDieIE(InfoExtractor): 'url': 'http://www.funnyordie.com%s' % src, }] - post_json = self._search_regex( - r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details') - post = json.loads(post_json) + timestamp = unified_timestamp(self._html_search_meta( + 'uploadDate', webpage, 'timestamp', default=None)) + + uploader = self._html_search_regex( + r'<h\d[^>]+\bclass=["\']channel-preview-name[^>]+>(.+?)</h', + webpage, 'uploader', default=None) + + title, description, thumbnail, duration = [None] * 4 + + medium = self._parse_json( + self._search_regex( + r'jsonMedium\s*=\s*({.+?});', webpage, 'JSON medium', + default='{}'), + video_id, fatal=False) + if medium: + title = medium.get('title') + duration = float_or_none(medium.get('duration')) + if not timestamp: + timestamp = unified_timestamp(medium.get('publishDate')) + + post = self._parse_json( + self._search_regex( + r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details', + default='{}'), + video_id, fatal=False) + if post: + if not title: + title = post.get('name') + description = post.get('description') + thumbnail = post.get('picture') + + if not title: + title = self._og_search_title(webpage) + if not description: + description = self._og_search_description(webpage) + if not duration: + duration = int_or_none(self._html_search_meta( + ('video:duration', 'duration'), webpage, 'duration', default=False)) return { 'id': video_id, - 'title': post['name'], - 'description': post.get('description'), - 'thumbnail': post.get('picture'), + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'uploader': uploader, + 'timestamp': timestamp, + 'duration': duration, 'formats': formats, 'subtitles': subtitles, } |