aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/facebook.py
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2017-02-01 23:15:38 +0700
committerSergey M․ <dstftw@gmail.com>2017-02-01 23:19:20 +0700
commitb83ef507b457e6ea8c52265ea42b6c5d2c500a7e (patch)
tree8554cf9dad9f3f4b3772cd58e383f27ae1f5ef9a /youtube_dl/extractor/facebook.py
parent000f207944e277e63dbec5a60007c30e3187d3fd (diff)
[facebook] Fix extraction (closes #11926)
Diffstat (limited to 'youtube_dl/extractor/facebook.py')
-rw-r--r--youtube_dl/extractor/facebook.py36
1 files changed, 27 insertions, 9 deletions
diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py
index c0a7fc7d8..47bcc0dbc 100644
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@@ -12,14 +12,16 @@ from ..compat import (
compat_urllib_parse_unquote_plus,
)
from ..utils import (
+ clean_html,
error_to_compat_str,
ExtractorError,
+ get_element_by_id,
int_or_none,
+ js_to_json,
limit_length,
sanitized_Request,
+ try_get,
urlencode_postdata,
- get_element_by_id,
- clean_html,
)
@@ -243,14 +245,30 @@ class FacebookIE(InfoExtractor):
video_data = None
+ def extract_video_data(instances):
+ for item in instances:
+ if item[1][0] == 'VideoConfig':
+ video_item = item[2][0]
+ if video_item.get('video_id') == video_id:
+ return video_item['videoData']
+
server_js_data = self._parse_json(self._search_regex(
- r'handleServerJS\(({.+})(?:\);|,")', webpage, 'server js data', default='{}'), video_id)
- for item in server_js_data.get('instances', []):
- if item[1][0] == 'VideoConfig':
- video_item = item[2][0]
- if video_item.get('video_id') == video_id:
- video_data = video_item['videoData']
- break
+ r'handleServerJS\(({.+})(?:\);|,")', webpage,
+ 'server js data', default='{}'), video_id, fatal=False)
+
+ if server_js_data:
+ video_data = extract_video_data(server_js_data.get('instances', []))
+
+ if not video_data:
+ server_js_data = self._parse_json(
+ self._search_regex(
+ r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+stream_pagelet',
+ webpage, 'js data', default='{}'),
+ video_id, transform_source=js_to_json, fatal=False)
+ if server_js_data:
+ video_data = extract_video_data(try_get(
+ server_js_data, lambda x: x['jsmods']['instances'],
+ list) or [])
if not video_data:
if not fatal_if_no_video: