aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2017-07-20 22:49:52 +0700
committerSergey M․ <dstftw@gmail.com>2017-07-20 22:50:56 +0700
commitc653326a1425f4c271f387fde7a706bf4b52a7a3 (patch)
tree308ebf05c4d6cc3eb6689032e86c246b020d8696 /youtube_dl
parent3fcf346ac16e6fe1963a3eab861d6bd9c32ce6db (diff)
[funnyordie] Extract more metadata (closes #13677)
Diffstat (limited to 'youtube_dl')
-rw-r--r--youtube_dl/extractor/funnyordie.py64
1 files changed, 56 insertions, 8 deletions
diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py
index 49409369c..f85e7de14 100644
--- a/youtube_dl/extractor/funnyordie.py
+++ b/youtube_dl/extractor/funnyordie.py
@@ -1,10 +1,14 @@
from __future__ import unicode_literals
-import json
import re
from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ unified_timestamp,
+)
class FunnyOrDieIE(InfoExtractor):
@@ -18,6 +22,10 @@ class FunnyOrDieIE(InfoExtractor):
'title': 'Heart-Shaped Box: Literal Video Version',
'description': 'md5:ea09a01bc9a1c46d9ab696c01747c338',
'thumbnail': r're:^http:.*\.jpg$',
+ 'uploader': 'DASjr',
+ 'timestamp': 1317904928,
+ 'upload_date': '20111006',
+ 'duration': 318.3,
},
}, {
'url': 'http://www.funnyordie.com/embed/e402820827',
@@ -27,6 +35,8 @@ class FunnyOrDieIE(InfoExtractor):
'title': 'Please Use This Song (Jon Lajoie)',
'description': 'Please use this to sell something. www.jonlajoie.com',
'thumbnail': r're:^http:.*\.jpg$',
+ 'timestamp': 1398988800,
+ 'upload_date': '20140502',
},
'params': {
'skip_download': True,
@@ -100,15 +110,53 @@ class FunnyOrDieIE(InfoExtractor):
'url': 'http://www.funnyordie.com%s' % src,
}]
- post_json = self._search_regex(
- r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details')
- post = json.loads(post_json)
+ timestamp = unified_timestamp(self._html_search_meta(
+ 'uploadDate', webpage, 'timestamp', default=None))
+
+ uploader = self._html_search_regex(
+ r'<h\d[^>]+\bclass=["\']channel-preview-name[^>]+>(.+?)</h',
+ webpage, 'uploader', default=None)
+
+ title, description, thumbnail, duration = [None] * 4
+
+ medium = self._parse_json(
+ self._search_regex(
+ r'jsonMedium\s*=\s*({.+?});', webpage, 'JSON medium',
+ default='{}'),
+ video_id, fatal=False)
+ if medium:
+ title = medium.get('title')
+ duration = float_or_none(medium.get('duration'))
+ if not timestamp:
+ timestamp = unified_timestamp(medium.get('publishDate'))
+
+ post = self._parse_json(
+ self._search_regex(
+ r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details',
+ default='{}'),
+ video_id, fatal=False)
+ if post:
+ if not title:
+ title = post.get('name')
+ description = post.get('description')
+ thumbnail = post.get('picture')
+
+ if not title:
+ title = self._og_search_title(webpage)
+ if not description:
+ description = self._og_search_description(webpage)
+ if not duration:
+ duration = int_or_none(self._html_search_meta(
+ ('video:duration', 'duration'), webpage, 'duration', default=False))
return {
'id': video_id,
- 'title': post['name'],
- 'description': post.get('description'),
- 'thumbnail': post.get('picture'),
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'timestamp': timestamp,
+ 'duration': duration,
'formats': formats,
'subtitles': subtitles,
}