diff options
| author | Sergey M․ <dstftw@gmail.com> | 2016-12-24 22:46:27 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2016-12-24 22:46:27 +0700 | 
| commit | 53a664edf4bf713df0159e604bbc131dde5ed1e6 (patch) | |
| tree | e36b5e632991e7ff0eb3ab31f5d91d6d1ef61296 | |
| parent | 264e77c406a3b14f15aafcd036524cb6fe86aa20 (diff) | |
[brightcove:legacy] Improve embeds detection (closes #11523)
| -rw-r--r-- | youtube_dl/extractor/brightcove.py | 13 | ||||
| -rw-r--r-- | youtube_dl/extractor/generic.py | 24 | 
2 files changed, 29 insertions, 8 deletions
| diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index ac5f32541..aa2923ccf 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -232,13 +232,16 @@ class BrightcoveLegacyIE(InfoExtractor):          """Return a list of all Brightcove URLs from the webpage """          url_m = re.search( -            r'<meta\s+property=[\'"]og:video[\'"]\s+content=[\'"](https?://(?:secure|c)\.brightcove.com/[^\'"]+)[\'"]', -            webpage) +            r'''(?x) +                <meta\s+ +                    (?:property|itemprop)=([\'"])(?:og:video|embedURL)\1[^>]+ +                    content=([\'"])(?P<url>https?://(?:secure|c)\.brightcove.com/(?:(?!\2).)+)\2 +            ''', webpage)          if url_m: -            url = unescapeHTML(url_m.group(1)) +            url = unescapeHTML(url_m.group('url'))              # Some sites don't add it, we can't download with this url, for example:              # http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/ -            if 'playerKey' in url or 'videoId' in url: +            if 'playerKey' in url or 'videoId' in url or 'idVideo' in url:                  return [url]          matches = re.findall( @@ -259,7 +262,7 @@ class BrightcoveLegacyIE(InfoExtractor):          url, smuggled_data = unsmuggle_url(url, {})          # Change the 'videoId' and others field to '@videoPlayer' -        url = re.sub(r'(?<=[?&])(videoI(d|D)|bctid)', '%40videoPlayer', url) +        url = re.sub(r'(?<=[?&])(videoI(d|D)|idVideo|bctid)', '%40videoPlayer', url)          # Change bckey (used by bcove.me urls) to playerKey          url = re.sub(r'(?<=[?&])bckey', 'playerKey', url)          mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 87daf83f8..79d10a1d1 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -344,10 +344,10 @@ class GenericIE(InfoExtractor):              },              'skip': 'There is a limit of 200 free downloads / month for the test song',          }, -        # embedded brightcove video -        # it also tests brightcove videos that need to set the 'Referer' in the -        # http requests          { +            # embedded brightcove video +            # it also tests brightcove videos that need to set the 'Referer' +            # in the http requests              'add_ie': ['BrightcoveLegacy'],              'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',              'info_dict': { @@ -362,6 +362,24 @@ class GenericIE(InfoExtractor):              },          },          { +            # embedded with itemprop embedURL and video id spelled as `idVideo` +            'add_id': ['BrightcoveLegacy'], +            'url': 'http://bfmbusiness.bfmtv.com/mediaplayer/chroniques/olivier-delamarche/', +            'info_dict': { +                'id': '5255628253001', +                'ext': 'mp4', +                'title': 'md5:37c519b1128915607601e75a87995fc0', +                'description': 'md5:37f7f888b434bb8f8cc8dbd4f7a4cf26', +                'uploader': 'BFM BUSINESS', +                'uploader_id': '876450612001', +                'timestamp': 1482255315, +                'upload_date': '20161220', +            }, +            'params': { +                'skip_download': True, +            }, +        }, +        {              # https://github.com/rg3/youtube-dl/issues/2253              'url': 'http://bcove.me/i6nfkrc3',              'md5': '0ba9446db037002366bab3b3eb30c88c', | 
