diff options
author | Sergey M․ <dstftw@gmail.com> | 2016-12-24 22:46:27 +0700 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2016-12-24 22:46:27 +0700 |
commit | 53a664edf4bf713df0159e604bbc131dde5ed1e6 (patch) | |
tree | e36b5e632991e7ff0eb3ab31f5d91d6d1ef61296 | |
parent | 264e77c406a3b14f15aafcd036524cb6fe86aa20 (diff) |
[brightcove:legacy] Improve embeds detection (closes #11523)
-rw-r--r-- | youtube_dl/extractor/brightcove.py | 13 | ||||
-rw-r--r-- | youtube_dl/extractor/generic.py | 24 |
2 files changed, 29 insertions, 8 deletions
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index ac5f32541..aa2923ccf 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -232,13 +232,16 @@ class BrightcoveLegacyIE(InfoExtractor): """Return a list of all Brightcove URLs from the webpage """ url_m = re.search( - r'<meta\s+property=[\'"]og:video[\'"]\s+content=[\'"](https?://(?:secure|c)\.brightcove.com/[^\'"]+)[\'"]', - webpage) + r'''(?x) + <meta\s+ + (?:property|itemprop)=([\'"])(?:og:video|embedURL)\1[^>]+ + content=([\'"])(?P<url>https?://(?:secure|c)\.brightcove.com/(?:(?!\2).)+)\2 + ''', webpage) if url_m: - url = unescapeHTML(url_m.group(1)) + url = unescapeHTML(url_m.group('url')) # Some sites don't add it, we can't download with this url, for example: # http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/ - if 'playerKey' in url or 'videoId' in url: + if 'playerKey' in url or 'videoId' in url or 'idVideo' in url: return [url] matches = re.findall( @@ -259,7 +262,7 @@ class BrightcoveLegacyIE(InfoExtractor): url, smuggled_data = unsmuggle_url(url, {}) # Change the 'videoId' and others field to '@videoPlayer' - url = re.sub(r'(?<=[?&])(videoI(d|D)|bctid)', '%40videoPlayer', url) + url = re.sub(r'(?<=[?&])(videoI(d|D)|idVideo|bctid)', '%40videoPlayer', url) # Change bckey (used by bcove.me urls) to playerKey url = re.sub(r'(?<=[?&])bckey', 'playerKey', url) mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 87daf83f8..79d10a1d1 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -344,10 +344,10 @@ class GenericIE(InfoExtractor): }, 'skip': 'There is a limit of 200 free downloads / month for the test song', }, - # embedded brightcove video - # it also tests brightcove videos that need to set the 'Referer' in the - # http requests { + # embedded brightcove video + # it also tests brightcove videos that need to set the 'Referer' + # in the http requests 'add_ie': ['BrightcoveLegacy'], 'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/', 'info_dict': { @@ -362,6 +362,24 @@ class GenericIE(InfoExtractor): }, }, { + # embedded with itemprop embedURL and video id spelled as `idVideo` + 'add_id': ['BrightcoveLegacy'], + 'url': 'http://bfmbusiness.bfmtv.com/mediaplayer/chroniques/olivier-delamarche/', + 'info_dict': { + 'id': '5255628253001', + 'ext': 'mp4', + 'title': 'md5:37c519b1128915607601e75a87995fc0', + 'description': 'md5:37f7f888b434bb8f8cc8dbd4f7a4cf26', + 'uploader': 'BFM BUSINESS', + 'uploader_id': '876450612001', + 'timestamp': 1482255315, + 'upload_date': '20161220', + }, + 'params': { + 'skip_download': True, + }, + }, + { # https://github.com/rg3/youtube-dl/issues/2253 'url': 'http://bcove.me/i6nfkrc3', 'md5': '0ba9446db037002366bab3b3eb30c88c', |