diff options
Diffstat (limited to 'youtube_dl/extractor/generic.py')
| -rw-r--r-- | youtube_dl/extractor/generic.py | 382 | 
1 files changed, 256 insertions, 126 deletions
| diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index b06f43446..b83c18380 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -10,6 +10,7 @@ from .common import InfoExtractor  from .youtube import YoutubeIE  from ..compat import (      compat_etree_fromstring, +    compat_str,      compat_urllib_parse_unquote,      compat_urlparse,      compat_xml_parse_error, @@ -35,6 +36,10 @@ from .brightcove import (      BrightcoveLegacyIE,      BrightcoveNewIE,  ) +from .nexx import ( +    NexxIE, +    NexxEmbedIE, +)  from .nbc import NBCSportsVPlayerIE  from .ooyala import OoyalaIE  from .rutv import RUTVIE @@ -56,6 +61,7 @@ from .dailymotion import (      DailymotionIE,      DailymotionCloudIE,  ) +from .dailymail import DailyMailIE  from .onionstudios import OnionStudiosIE  from .viewlift import ViewLiftEmbedIE  from .mtv import MTVServicesEmbeddedIE @@ -88,6 +94,11 @@ from .rutube import RutubeIE  from .limelight import LimelightBaseIE  from .anvato import AnvatoIE  from .washingtonpost import WashingtonPostIE +from .wistia import WistiaIE +from .mediaset import MediasetIE +from .joj import JojIE +from .megaphone import MegaphoneIE +from .vzaar import VzaarIE  class GenericIE(InfoExtractor): @@ -565,6 +576,19 @@ class GenericIE(InfoExtractor):              },              'skip': 'movie expired',          }, +        # ooyala video embedded with http://player.ooyala.com/static/v4/production/latest/core.min.js +        { +            'url': 'http://wnep.com/2017/07/22/steampunk-fest-comes-to-honesdale/', +            'info_dict': { +                'id': 'lwYWYxYzE6V5uJMjNGyKtwwiw9ZJD7t2', +                'ext': 'mp4', +                'title': 'Steampunk Fest Comes to Honesdale', +                'duration': 43.276, +            }, +            'params': { +                'skip_download': True, +            } +        },          # embed.ly video          {              'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/', @@ -756,6 +780,20 @@ class GenericIE(InfoExtractor):              },              'add_ie': ['Dailymotion'],          }, +        # DailyMail embed +        { +            'url': 'http://www.bumm.sk/krimi/2017/07/05/biztonsagi-kamera-buktatta-le-az-agg-ferfit-utlegelo-apolot', +            'info_dict': { +                'id': '1495629', +                'ext': 'mp4', +                'title': 'Care worker punches elderly dementia patient in head 11 times', +                'description': 'md5:3a743dee84e57e48ec68bf67113199a5', +            }, +            'add_ie': ['DailyMail'], +            'params': { +                'skip_download': True, +            }, +        },          # YouTube embed          {              'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html', @@ -1182,7 +1220,7 @@ class GenericIE(InfoExtractor):              },              'add_ie': ['Kaltura'],          }, -        # Eagle.Platform embed (generic URL) +        # EaglePlatform embed (generic URL)          {              'url': 'http://lenta.ru/news/2015/03/06/navalny/',              # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used @@ -1196,8 +1234,26 @@ class GenericIE(InfoExtractor):                  'view_count': int,                  'age_limit': 0,              }, +            'params': { +                'skip_download': True, +            }, +        }, +        # referrer protected EaglePlatform embed +        { +            'url': 'https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/', +            'info_dict': { +                'id': '582306', +                'ext': 'mp4', +                'title': 'Стас Намин: «Мы нарушили девственность Кремля»', +                'thumbnail': r're:^https?://.*\.jpg$', +                'duration': 3382, +                'view_count': int, +            }, +            'params': { +                'skip_download': True, +            },          }, -        # ClipYou (Eagle.Platform) embed (custom URL) +        # ClipYou (EaglePlatform) embed (custom URL)          {              'url': 'http://muz-tv.ru/play/7129/',              # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used @@ -1209,6 +1265,9 @@ class GenericIE(InfoExtractor):                  'duration': 216,                  'view_count': int,              }, +            'params': { +                'skip_download': True, +            },          },          # Pladform embed          { @@ -1460,14 +1519,27 @@ class GenericIE(InfoExtractor):          # LiveLeak embed          {              'url': 'http://www.wykop.pl/link/3088787/', -            'md5': 'ace83b9ed19b21f68e1b50e844fdf95d', +            'md5': '7619da8c820e835bef21a1efa2a0fc71',              'info_dict': {                  'id': '874_1459135191',                  'ext': 'mp4',                  'title': 'Man shows poor quality of new apartment building',                  'description': 'The wall is like a sand pile.',                  'uploader': 'Lake8737', -            } +            }, +            'add_ie': [LiveLeakIE.ie_key()], +        }, +        # Another LiveLeak embed pattern (#13336) +        { +            'url': 'https://milo.yiannopoulos.net/2017/06/concealed-carry-robbery/', +            'info_dict': { +                'id': '2eb_1496309988', +                'ext': 'mp4', +                'title': 'Thief robs place where everyone was armed', +                'description': 'md5:694d73ee79e535953cf2488562288eee', +                'uploader': 'brazilwtf', +            }, +            'add_ie': [LiveLeakIE.ie_key()],          },          # Duplicated embedded video URLs          { @@ -1509,6 +1581,22 @@ class GenericIE(InfoExtractor):              },              'add_ie': ['BrightcoveLegacy'],          }, +        # Nexx embed +        { +            'url': 'https://www.funk.net/serien/5940e15073f6120001657956/items/593efbb173f6120001657503', +            'info_dict': { +                'id': '247746', +                'ext': 'mp4', +                'title': "Yesterday's Jam (OV)", +                'description': 'md5:09bc0984723fed34e2581624a84e05f0', +                'timestamp': 1492594816, +                'upload_date': '20170419', +            }, +            'params': { +                'format': 'bestvideo', +                'skip_download': True, +            }, +        },          # Facebook <iframe> embed          {              'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html', @@ -1519,6 +1607,21 @@ class GenericIE(InfoExtractor):                  'title': 'Facebook video #599637780109885',              },          }, +        # Facebook <iframe> embed, plugin video +        { +            'url': 'http://5pillarsuk.com/2017/06/07/tariq-ramadan-disagrees-with-pr-exercise-by-imams-refusing-funeral-prayers-for-london-attackers/', +            'info_dict': { +                'id': '1754168231264132', +                'ext': 'mp4', +                'title': 'About the Imams and Religious leaders refusing to perform funeral prayers for...', +                'uploader': 'Tariq Ramadan (official)', +                'timestamp': 1496758379, +                'upload_date': '20170606', +            }, +            'params': { +                'skip_download': True, +            }, +        },          # Facebook API embed          {              'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/', @@ -1696,6 +1799,21 @@ class GenericIE(InfoExtractor):              'playlist_mincount': 5,          },          { +            # Limelight embed (LimelightPlayerUtil.embed) +            'url': 'https://tv5.ca/videos?v=xuu8qowr291ri', +            'info_dict': { +                'id': '95d035dc5c8a401588e9c0e6bd1e9c92', +                'ext': 'mp4', +                'title': '07448641', +                'timestamp': 1499890639, +                'upload_date': '20170712', +            }, +            'params': { +                'skip_download': True, +            }, +            'add_ie': ['LimelightMedia'], +        }, +        {              'url': 'http://kron4.com/2017/04/28/standoff-with-walnut-creek-murder-suspect-ends-with-arrest/',              'info_dict': {                  'id': 'standoff-with-walnut-creek-murder-suspect-ends-with-arrest', @@ -1718,6 +1836,49 @@ class GenericIE(InfoExtractor):              },              'add_ie': [WashingtonPostIE.ie_key()],          }, +        { +            # Mediaset embed +            'url': 'http://www.tgcom24.mediaset.it/politica/serracchiani-voglio-vivere-in-una-societa-aperta-reazioni-sproporzionate-_3071354-201702a.shtml', +            'info_dict': { +                'id': '720642', +                'ext': 'mp4', +                'title': 'Serracchiani: "Voglio vivere in una società aperta, con tutela del patto di fiducia"', +            }, +            'params': { +                'skip_download': True, +            }, +            'add_ie': [MediasetIE.ie_key()], +        }, +        { +            # JOJ.sk embeds +            'url': 'https://www.noviny.sk/slovensko/238543-slovenskom-sa-prehnala-vlna-silnych-burok', +            'info_dict': { +                'id': '238543-slovenskom-sa-prehnala-vlna-silnych-burok', +                'title': 'Slovenskom sa prehnala vlna silných búrok', +            }, +            'playlist_mincount': 5, +            'add_ie': [JojIE.ie_key()], +        }, +        { +            # AMP embed (see https://www.ampproject.org/docs/reference/components/amp-video) +            'url': 'https://tvrain.ru/amp/418921/', +            'md5': 'cc00413936695987e8de148b67d14f1d', +            'info_dict': { +                'id': '418921', +                'ext': 'mp4', +                'title': 'Стас Намин: «Мы нарушили девственность Кремля»', +            }, +        }, +        { +            # vzaar embed +            'url': 'http://help.vzaar.com/article/165-embedding-video', +            'md5': '7e3919d9d2620b89e3e00bec7fe8c9d4', +            'info_dict': { +                'id': '8707641', +                'ext': 'mp4', +                'title': 'Building A Business Online: Principal Chairs Q & A', +            }, +        },          # {          #     # TODO: find another test          #     # http://schema.org/VideoObject @@ -1867,7 +2028,7 @@ class GenericIE(InfoExtractor):          if head_response is not False:              # Check for redirect -            new_url = head_response.geturl() +            new_url = compat_str(head_response.geturl())              if url != new_url:                  self.report_following_redirect(new_url)                  if force_videoid: @@ -1892,14 +2053,14 @@ class GenericIE(InfoExtractor):          content_type = head_response.headers.get('Content-Type', '').lower()          m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)          if m: -            format_id = m.group('format_id') +            format_id = compat_str(m.group('format_id'))              if format_id.endswith('mpegurl'):                  formats = self._extract_m3u8_formats(url, video_id, 'mp4')              elif format_id == 'f4m':                  formats = self._extract_f4m_formats(url, video_id)              else:                  formats = [{ -                    'format_id': m.group('format_id'), +                    'format_id': format_id,                      'url': url,                      'vcodec': 'none' if m.group('type') == 'audio' else None                  }] @@ -1968,7 +2129,7 @@ class GenericIE(InfoExtractor):              elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):                  info_dict['formats'] = self._parse_mpd_formats(                      doc, video_id, -                    mpd_base_url=full_response.geturl().rpartition('/')[0], +                    mpd_base_url=compat_str(full_response.geturl()).rpartition('/')[0],                      mpd_url=url)                  self._sort_formats(info_dict['formats'])                  return info_dict @@ -2017,6 +2178,13 @@ class GenericIE(InfoExtractor):          video_description = self._og_search_description(webpage, default=None)          video_thumbnail = self._og_search_thumbnail(webpage, default=None) +        info_dict.update({ +            'title': video_title, +            'description': video_description, +            'thumbnail': video_thumbnail, +            'age_limit': age_limit, +        }) +          # Look for Brightcove Legacy Studio embeds          bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)          if bc_urls: @@ -2038,6 +2206,16 @@ class GenericIE(InfoExtractor):          if bc_urls:              return self.playlist_from_matches(bc_urls, video_id, video_title, ie='BrightcoveNew') +        # Look for Nexx embeds +        nexx_urls = NexxIE._extract_urls(webpage) +        if nexx_urls: +            return self.playlist_from_matches(nexx_urls, video_id, video_title, ie=NexxIE.ie_key()) + +        # Look for Nexx iFrame embeds +        nexx_embed_urls = NexxEmbedIE._extract_urls(webpage) +        if nexx_embed_urls: +            return self.playlist_from_matches(nexx_embed_urls, video_id, video_title, ie=NexxEmbedIE.ie_key()) +          # Look for ThePlatform embeds          tp_urls = ThePlatformIE._extract_urls(webpage)          if tp_urls: @@ -2065,36 +2243,11 @@ class GenericIE(InfoExtractor):          if vid_me_embed_url is not None:              return self.url_result(vid_me_embed_url, 'Vidme') -        # Look for embedded YouTube player -        matches = re.findall(r'''(?x) -            (?: -                <iframe[^>]+?src=| -                data-video-url=| -                <embed[^>]+?src=| -                embedSWF\(?:\s*| -                <object[^>]+data=| -                new\s+SWFObject\( -            ) -            (["\']) -                (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/ -                (?:embed|v|p)/.+?) -            \1''', webpage) -        if matches: +        # Look for YouTube embeds +        youtube_urls = YoutubeIE._extract_urls(webpage) +        if youtube_urls:              return self.playlist_from_matches( -                matches, video_id, video_title, lambda m: unescapeHTML(m[1])) - -        # Look for lazyYT YouTube embed -        matches = re.findall( -            r'class="lazyYT" data-youtube-id="([^"]+)"', webpage) -        if matches: -            return self.playlist_from_matches(matches, video_id, video_title, lambda m: unescapeHTML(m)) - -        # Look for Wordpress "YouTube Video Importer" plugin -        matches = re.findall(r'''(?x)<div[^>]+ -            class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+ -            data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage) -        if matches: -            return self.playlist_from_matches(matches, video_id, video_title, lambda m: m[-1]) +                youtube_urls, video_id, video_title, ie=YoutubeIE.ie_key())          matches = DailymotionIE._extract_urls(webpage)          if matches: @@ -2110,58 +2263,27 @@ class GenericIE(InfoExtractor):                  return self.playlist_from_matches(                      playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p) -        # Look for embedded Wistia player -        match = re.search( -            r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage) -        if match: -            embed_url = self._proto_relative_url( -                unescapeHTML(match.group('url'))) -            return { -                '_type': 'url_transparent', -                'url': embed_url, -                'ie_key': 'Wistia', -                'uploader': video_uploader, -            } +        # Look for DailyMail embeds +        dailymail_urls = DailyMailIE._extract_urls(webpage) +        if dailymail_urls: +            return self.playlist_from_matches( +                dailymail_urls, video_id, video_title, ie=DailyMailIE.ie_key()) -        match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage) -        if match: +        # Look for embedded Wistia player +        wistia_url = WistiaIE._extract_url(webpage) +        if wistia_url:              return {                  '_type': 'url_transparent', -                'url': 'wistia:%s' % match.group('id'), -                'ie_key': 'Wistia', +                'url': self._proto_relative_url(wistia_url), +                'ie_key': WistiaIE.ie_key(),                  'uploader': video_uploader,              } -        match = re.search( -            r'''(?sx) -                <script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*? -                <div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]+)\b.*?\2 -            ''', webpage) -        if match: -            return self.url_result(self._proto_relative_url( -                'wistia:%s' % match.group('id')), 'Wistia') -          # Look for SVT player          svt_url = SVTIE._extract_url(webpage)          if svt_url:              return self.url_result(svt_url, 'SVT') -        # Look for embedded condenast player -        matches = re.findall( -            r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")', -            webpage) -        if matches: -            return { -                '_type': 'playlist', -                'entries': [{ -                    '_type': 'url', -                    'ie_key': 'CondeNast', -                    'url': ma, -                } for ma in matches], -                'title': video_title, -                'id': video_id, -            } -          # Look for Bandcamp pages with custom domain          mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)          if mobj is not None: @@ -2198,6 +2320,7 @@ class GenericIE(InfoExtractor):          # Look for Ooyala videos          mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or                  re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or +                re.search(r'OO\.Player\.create\.apply\(\s*OO\.Player\s*,\s*op\(\s*\[\s*[\'"][^\'"]*[\'"]\s*,\s*[\'"](?P<ec>.{32})[\'"]', webpage) or                  re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or                  re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))          if mobj is not None: @@ -2243,9 +2366,9 @@ class GenericIE(InfoExtractor):              return self.url_result(mobj.group('url'))          # Look for embedded Facebook player -        facebook_url = FacebookIE._extract_url(webpage) -        if facebook_url is not None: -            return self.url_result(facebook_url, 'Facebook') +        facebook_urls = FacebookIE._extract_urls(webpage) +        if facebook_urls: +            return self.playlist_from_matches(facebook_urls, video_id, video_title)          # Look for embedded VK player          mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage) @@ -2442,12 +2565,12 @@ class GenericIE(InfoExtractor):          if kaltura_url:              return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key()) -        # Look for Eagle.Platform embeds +        # Look for EaglePlatform embeds          eagleplatform_url = EaglePlatformIE._extract_url(webpage)          if eagleplatform_url: -            return self.url_result(eagleplatform_url, EaglePlatformIE.ie_key()) +            return self.url_result(smuggle_url(eagleplatform_url, {'referrer': url}), EaglePlatformIE.ie_key()) -        # Look for ClipYou (uses Eagle.Platform) embeds +        # Look for ClipYou (uses EaglePlatform) embeds          mobj = re.search(              r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)          if mobj is not None: @@ -2555,29 +2678,6 @@ class GenericIE(InfoExtractor):              return self.playlist_result(                  limelight_urls, video_id, video_title, video_description) -        mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage) -        if mobj: -            lm = { -                'Media': 'media', -                'Channel': 'channel', -                'ChannelList': 'channel_list', -            } -            return self.url_result(smuggle_url('limelight:%s:%s' % ( -                lm[mobj.group(1)], mobj.group(2)), {'source_url': url}), -                'Limelight%s' % mobj.group(1), mobj.group(2)) - -        mobj = re.search( -            r'''(?sx) -                <object[^>]+class=(["\'])LimelightEmbeddedPlayerFlash\1[^>]*>.*? -                    <param[^>]+ -                        name=(["\'])flashVars\2[^>]+ -                        value=(["\'])(?:(?!\3).)*mediaId=(?P<id>[a-z0-9]{32}) -            ''', webpage) -        if mobj: -            return self.url_result(smuggle_url( -                'limelight:media:%s' % mobj.group('id'), -                {'source_url': url}), 'LimelightMedia', mobj.group('id')) -          # Look for Anvato embeds          anvato_urls = AnvatoIE._extract_urls(self, webpage, video_id)          if anvato_urls: @@ -2645,9 +2745,9 @@ class GenericIE(InfoExtractor):                  self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())          # Look for LiveLeak embeds -        liveleak_url = LiveLeakIE._extract_url(webpage) -        if liveleak_url: -            return self.url_result(liveleak_url, 'LiveLeak') +        liveleak_urls = LiveLeakIE._extract_urls(webpage) +        if liveleak_urls: +            return self.playlist_from_matches(liveleak_urls, video_id, video_title)          # Look for 3Q SDN embeds          threeqsdn_url = ThreeQSDNIE._extract_url(webpage) @@ -2699,7 +2799,7 @@ class GenericIE(InfoExtractor):          rutube_urls = RutubeIE._extract_urls(webpage)          if rutube_urls:              return self.playlist_from_matches( -                rutube_urls, ie=RutubeIE.ie_key()) +                rutube_urls, video_id, video_title, ie=RutubeIE.ie_key())          # Look for WashingtonPost embeds          wapo_urls = WashingtonPostIE._extract_urls(webpage) @@ -2707,18 +2807,44 @@ class GenericIE(InfoExtractor):              return self.playlist_from_matches(                  wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key()) -        # Looking for http://schema.org/VideoObject -        json_ld = self._search_json_ld( -            webpage, video_id, default={}, expected_type='VideoObject') -        if json_ld.get('url'): -            info_dict.update({ -                'title': video_title or info_dict['title'], -                'description': video_description, -                'thumbnail': video_thumbnail, -                'age_limit': age_limit -            }) -            info_dict.update(json_ld) -            return info_dict +        # Look for Mediaset embeds +        mediaset_urls = MediasetIE._extract_urls(webpage) +        if mediaset_urls: +            return self.playlist_from_matches( +                mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key()) + +        # Look for JOJ.sk embeds +        joj_urls = JojIE._extract_urls(webpage) +        if joj_urls: +            return self.playlist_from_matches( +                joj_urls, video_id, video_title, ie=JojIE.ie_key()) + +        # Look for megaphone.fm embeds +        mpfn_urls = MegaphoneIE._extract_urls(webpage) +        if mpfn_urls: +            return self.playlist_from_matches( +                mpfn_urls, video_id, video_title, ie=MegaphoneIE.ie_key()) + +        # Look for vzaar embeds +        vzaar_urls = VzaarIE._extract_urls(webpage) +        if vzaar_urls: +            return self.playlist_from_matches( +                vzaar_urls, video_id, video_title, ie=VzaarIE.ie_key()) + +        def merge_dicts(dict1, dict2): +            merged = {} +            for k, v in dict1.items(): +                if v is not None: +                    merged[k] = v +            for k, v in dict2.items(): +                if v is None: +                    continue +                if (k not in merged or +                        (isinstance(v, compat_str) and v and +                            isinstance(merged[k], compat_str) and +                            not merged[k])): +                    merged[k] = v +            return merged          # Look for HTML5 media          entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') @@ -2736,9 +2862,13 @@ class GenericIE(InfoExtractor):          if jwplayer_data:              info = self._parse_jwplayer_data(                  jwplayer_data, video_id, require_title=False, base_url=url) -            if not info.get('title'): -                info['title'] = video_title -            return info +            return merge_dicts(info, info_dict) + +        # Looking for http://schema.org/VideoObject +        json_ld = self._search_json_ld( +            webpage, video_id, default={}, expected_type='VideoObject') +        if json_ld.get('url'): +            return merge_dicts(json_ld, info_dict)          def check_video(vurl):              if YoutubeIE.suitable(vurl): | 
