diff options
| -rw-r--r-- | youtube_dl/extractor/facebook.py | 57 | 
1 files changed, 45 insertions, 12 deletions
diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 27a57ad03..6c360e29e 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -72,6 +72,7 @@ class FacebookIE(InfoExtractor):          },          'skip': 'Requires logging in',      }, { +        # data.video          'url': 'https://www.facebook.com/video.php?v=274175099429670',          'info_dict': {              'id': '274175099429670', @@ -133,6 +134,7 @@ class FacebookIE(InfoExtractor):          },      }, {          # have 1080P, but only up to 720p in swf params +        # data.video.story.attachments[].media          'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',          'md5': '9571fae53d4165bbbadb17a94651dcdc',          'info_dict': { @@ -147,6 +149,7 @@ class FacebookIE(InfoExtractor):          },      }, {          # bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall +        # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media          'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/',          'info_dict': {              'id': '1417995061575415', @@ -174,6 +177,7 @@ class FacebookIE(InfoExtractor):              'skip_download': True,          },      }, { +        # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media          'url': 'https://www.facebook.com/groups/1024490957622648/permalink/1396382447100162/',          'info_dict': {              'id': '1396382447100162', @@ -193,18 +197,23 @@ class FacebookIE(InfoExtractor):          'url': 'https://www.facebook.com/amogood/videos/1618742068337349/?fref=nf',          'only_matching': True,      }, { +        # data.mediaset.currMedia.edges          'url': 'https://www.facebook.com/ChristyClarkForBC/videos/vb.22819070941/10153870694020942/?type=2&theater',          'only_matching': True,      }, { +        # data.video.story.attachments[].media          'url': 'facebook:544765982287235',          'only_matching': True,      }, { +        # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media          'url': 'https://www.facebook.com/groups/164828000315060/permalink/764967300301124/',          'only_matching': True,      }, { +        # data.video.creation_story.attachments[].media          'url': 'https://zh-hk.facebook.com/peoplespower/videos/1135894589806027/',          'only_matching': True,      }, { +        # data.video          'url': 'https://www.facebookcorewwwi.onion/video.php?v=274175099429670',          'only_matching': True,      }, { @@ -212,6 +221,7 @@ class FacebookIE(InfoExtractor):          'url': 'https://www.facebook.com/onlycleverentertainment/videos/1947995502095005/',          'only_matching': True,      }, { +        # data.video          'url': 'https://www.facebook.com/WatchESLOne/videos/359649331226507/',          'info_dict': {              'id': '359649331226507', @@ -222,6 +232,13 @@ class FacebookIE(InfoExtractor):          'params': {              'skip_download': True,          }, +    }, { +        # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.all_subattachments.nodes[].media +        'url': 'https://www.facebook.com/100033620354545/videos/106560053808006/', +        'info_dict': { +            'id': '106560053808006', +        }, +        'playlist_count': 2,      }]      @staticmethod @@ -330,9 +347,7 @@ class FacebookIE(InfoExtractor):                  return extract_video_data(try_get(                      js_data, lambda x: x['jsmods']['instances'], list) or []) -        formats = [] - -        def extract_dash_manifest(video): +        def extract_dash_manifest(video, formats):              dash_manifest = video.get('dash_manifest')              if dash_manifest:                  formats.extend(self._parse_mpd_formats( @@ -351,7 +366,10 @@ class FacebookIE(InfoExtractor):                  webpage, 'graphql data', default='{}'), video_id, fatal=False) or {}              for require in (graphql_data.get('require') or []):                  if require[0] == 'RelayPrefetchedStreamCache': +                    entries = [] +                      def parse_graphql_video(video): +                        formats = []                          q = qualities(['sd', 'hd'])                          for (suffix, format_id) in [('', 'sd'), ('_quality_hd', 'hd')]:                              playable_url = video.get('playable_url' + suffix) @@ -362,7 +380,7 @@ class FacebookIE(InfoExtractor):                                  'quality': q(format_id),                                  'url': playable_url,                              }) -                        extract_dash_manifest(video) +                        extract_dash_manifest(video, formats)                          self._sort_formats(formats)                          v_id = video.get('videoId') or video.get('id') or video_id                          info = { @@ -382,7 +400,12 @@ class FacebookIE(InfoExtractor):                              })                          else:                              info['title'] = description or 'Facebook video #%s' % v_id -                        return webpage, info +                        entries.append(info) + +                    def parse_attachment(attachment, key='media'): +                        media = attachment.get(key) or {} +                        if media.get('__typename') == 'Video': +                            return parse_graphql_video(media)                      data = try_get(require, lambda x: x[3][1]['__bbox']['result']['data'], dict) or {} @@ -392,13 +415,22 @@ class FacebookIE(InfoExtractor):                          lambda x: x['node']['comet_sections']['content']['story']['attachments']                      ], list) or []                      for attachment in attachments: -                        media = attachment.get('media') or try_get(attachment, lambda x: x['style_type_renderer']['attachment']['media'], dict) or {} -                        if media.get('__typename') == 'Video': -                            return parse_graphql_video(media) +                        attachment = try_get(attachment, lambda x: x['style_type_renderer']['attachment'], dict) or attachment +                        nodes = try_get(attachment, lambda x: x['all_subattachments']['nodes'], list) or [] +                        for node in nodes: +                            parse_attachment(node) +                        parse_attachment(attachment) -                    video = data.get('video') or {} -                    if video: -                        return parse_graphql_video(video) +                    edges = try_get(data, lambda x: x['mediaset']['currMedia']['edges'], list) or [] +                    for edge in edges: +                        parse_attachment(edge, key='node') + +                    if not entries: +                        video = data.get('video') or {} +                        if video: +                            parse_graphql_video(video) + +                    return webpage, self.playlist_result(entries, video_id)          if not video_data:              if not fatal_if_no_video: @@ -440,6 +472,7 @@ class FacebookIE(InfoExtractor):          if not video_data:              raise ExtractorError('Cannot parse data') +        formats = []          subtitles = {}          for f in video_data:              format_id = f['stream_type'] @@ -459,7 +492,7 @@ class FacebookIE(InfoExtractor):                              'url': src,                              'preference': preference,                          }) -            extract_dash_manifest(f[0]) +            extract_dash_manifest(f[0], formats)              subtitles_src = f[0].get('subtitles_src')              if subtitles_src:                  subtitles.setdefault('en', []).append({'url': subtitles_src})  | 
