aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2018-04-28 02:48:03 +0700
committerSergey M․ <dstftw@gmail.com>2018-04-28 02:48:03 +0700
commite7e4a6e0f9166cee82c165ca69a6a3c94ddc5f45 (patch)
tree121ae20443e5eb11ca59f2d1c566b3fdfabe7f86
parent6cc622327ff8289f94894f3695ed31014c61cf8e (diff)
[extractor/common] Extract interaction statistic
-rw-r--r--youtube_dl/extractor/common.py35
1 files changed, 35 insertions, 0 deletions
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index e0c3c8eb0..a9939b0fd 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1020,6 +1020,40 @@ class InfoExtractor(object):
if isinstance(json_ld, dict):
json_ld = [json_ld]
+ INTERACTION_TYPE_MAP = {
+ 'CommentAction': 'comment',
+ 'AgreeAction': 'like',
+ 'DisagreeAction': 'dislike',
+ 'LikeAction': 'like',
+ 'DislikeAction': 'dislike',
+ 'ListenAction': 'view',
+ 'WatchAction': 'view',
+ 'ViewAction': 'view',
+ }
+
+ def extract_interaction_statistic(e):
+ interaction_statistic = e.get('interactionStatistic')
+ if not isinstance(interaction_statistic, list):
+ return
+ for is_e in interaction_statistic:
+ if not isinstance(is_e, dict):
+ continue
+ if is_e.get('@type') != 'InteractionCounter':
+ continue
+ interaction_type = is_e.get('interactionType')
+ if not isinstance(interaction_type, compat_str):
+ continue
+ interaction_count = int_or_none(is_e.get('userInteractionCount'))
+ if interaction_count is None:
+ continue
+ count_kind = INTERACTION_TYPE_MAP.get(interaction_type.split('/')[-1])
+ if not count_kind:
+ continue
+ count_key = '%s_count' % count_kind
+ if info.get(count_key) is not None:
+ continue
+ info[count_key] = interaction_count
+
def extract_video_object(e):
assert e['@type'] == 'VideoObject'
info.update({
@@ -1035,6 +1069,7 @@ class InfoExtractor(object):
'height': int_or_none(e.get('height')),
'view_count': int_or_none(e.get('interactionCount')),
})
+ extract_interaction_statistic(e)
for e in json_ld:
if isinstance(e.get('@context'), compat_str) and re.match(r'^https?://schema.org/?$', e.get('@context')):