diff options
| author | Sergey M․ <dstftw@gmail.com> | 2016-07-09 00:28:07 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2016-07-09 00:28:07 +0700 | 
| commit | cedc70b29277c7ceebee4843bc2f13ebe88109b0 (patch) | |
| tree | 87c5e2850629e6950a2e6f47fc2d2a111c2a092f | |
| parent | 07d7689f2eb4698cf98ee837b56489b7ff1924df (diff) | |
[facebook] Fix invalid video being extracted (Closes #9851)
| -rw-r--r-- | youtube_dl/extractor/facebook.py | 19 | 
1 files changed, 15 insertions, 4 deletions
| diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index f5d4f966a..0d43acc4a 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -219,12 +219,23 @@ class FacebookIE(InfoExtractor):          BEFORE = '{swf.addParam(param[0], param[1]);});'          AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});' -        m = re.search(re.escape(BEFORE) + '(?:\n|\\\\n)(.*?)' + re.escape(AFTER), webpage) -        if m: -            swf_params = m.group(1).replace('\\\\', '\\').replace('\\"', '"') +        PATTERN = re.escape(BEFORE) + '(?:\n|\\\\n)(.*?)' + re.escape(AFTER) + +        for m in re.findall(PATTERN, webpage): +            swf_params = m.replace('\\\\', '\\').replace('\\"', '"')              data = dict(json.loads(swf_params))              params_raw = compat_urllib_parse_unquote(data['params']) -            video_data = json.loads(params_raw)['video_data'] +            video_data_candidate = json.loads(params_raw)['video_data'] +            for _, f in video_data_candidate.items(): +                if not f: +                    continue +                if isinstance(f, dict): +                    f = [f] +                if isinstance(f, list): +                    continue +                if f[0].get('video_id') == video_id: +                    video_data = video_data_candidate +                    break          def video_data_list2dict(video_data):              ret = {} | 
