aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2018-06-20 23:51:14 +0700
committerSergey M․ <dstftw@gmail.com>2018-06-20 23:53:37 +0700
commitf51f526b0acb5943332452d1958581cb1135bfe9 (patch)
tree85e50283e2e22f21d43b9f882aad5c886bd8e913 /youtube_dl/extractor
parentc9b983ff827aae25a0fe2116c98c26702c581b81 (diff)
[foxnews] Add support for iframe embeds (closes #15810, closes #16711)
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/foxnews.py42
-rw-r--r--youtube_dl/extractor/generic.py6
2 files changed, 41 insertions, 7 deletions
diff --git a/youtube_dl/extractor/foxnews.py b/youtube_dl/extractor/foxnews.py
index dc0662f74..4c402806a 100644
--- a/youtube_dl/extractor/foxnews.py
+++ b/youtube_dl/extractor/foxnews.py
@@ -58,6 +58,14 @@ class FoxNewsIE(AMPIE):
},
]
+ @staticmethod
+ def _extract_urls(webpage):
+ return [
+ mobj.group('url')
+ for mobj in re.finditer(
+ r'<(?:amp-)?iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//video\.foxnews\.com/v/video-embed\.html?.*?\bvideo_id=\d+.*?)\1',
+ webpage)]
+
def _real_extract(self, url):
host, video_id = re.match(self._VALID_URL, url).groups()
@@ -71,18 +79,35 @@ class FoxNewsArticleIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?foxnews\.com/(?!v)([^/]+/)+(?P<id>[a-z-]+)'
IE_NAME = 'foxnews:article'
- _TEST = {
+ _TESTS = [{
+ # data-video-id
'url': 'http://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html',
- 'md5': '62aa5a781b308fdee212ebb6f33ae7ef',
+ 'md5': '83d44e1aff1433e7a29a7b537d1700b5',
'info_dict': {
'id': '5116295019001',
'ext': 'mp4',
'title': 'Trump and Clinton asked to defend positions on Iraq War',
'description': 'Veterans react on \'The Kelly File\'',
- 'timestamp': 1473299755,
+ 'timestamp': 1473301045,
'upload_date': '20160908',
},
- }
+ }, {
+ # iframe embed
+ 'url': 'http://www.foxnews.com/us/2018/03/09/parkland-survivor-kyle-kashuv-on-meeting-trump-his-app-to-prevent-another-school-shooting.amp.html?__twitter_impression=true',
+ 'info_dict': {
+ 'id': '5748266721001',
+ 'ext': 'flv',
+ 'title': 'Kyle Kashuv has a positive message for the Trump White House',
+ 'description': 'Marjory Stoneman Douglas student disagrees with classmates.',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 229,
+ 'timestamp': 1520594670,
+ 'upload_date': '20180309',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
def _real_extract(self, url):
display_id = self._match_id(url)
@@ -90,10 +115,13 @@ class FoxNewsArticleIE(InfoExtractor):
video_id = self._html_search_regex(
r'data-video-id=([\'"])(?P<id>[^\'"]+)\1',
- webpage, 'video ID', group='id')
+ webpage, 'video ID', group='id', default=None)
+ if video_id:
+ return self.url_result(
+ 'http://video.foxnews.com/v/' + video_id, FoxNewsIE.ie_key())
+
return self.url_result(
- 'http://video.foxnews.com/v/' + video_id,
- FoxNewsIE.ie_key())
+ FoxNewsIE._extract_urls(webpage)[0], FoxNewsIE.ie_key())
class FoxNewsInsiderIE(InfoExtractor):
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 6c0f772ac..d71cb9050 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -111,6 +111,7 @@ from .cloudflarestream import CloudflareStreamIE
from .peertube import PeerTubeIE
from .indavideo import IndavideoEmbedIE
from .apa import APAIE
+from .foxnews import FoxNewsIE
class GenericIE(InfoExtractor):
@@ -3091,6 +3092,11 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches(
apa_urls, video_id, video_title, ie=APAIE.ie_key())
+ foxnews_urls = FoxNewsIE._extract_urls(webpage)
+ if foxnews_urls:
+ return self.playlist_from_matches(
+ foxnews_urls, video_id, video_title, ie=FoxNewsIE.ie_key())
+
sharevideos_urls = [mobj.group('url') for mobj in re.finditer(
r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1',
webpage)]