aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYen Chi Hsuan <yan12125@gmail.com>2016-10-12 01:40:28 +0800
committerYen Chi Hsuan <yan12125@gmail.com>2016-10-12 01:40:28 +0800
commitcea364f70c97dad933fa38698f3c9df1bdb485cf (patch)
treefd5f43969b383f15474506bc4939b5da23d40692
parent55642487f072565bea3b2826b836a1a3159a3807 (diff)
[extractor/common] Support HTML media elements without child nodes
-rw-r--r--ChangeLog1
-rw-r--r--youtube_dl/extractor/common.py6
2 files changed, 6 insertions, 1 deletions
diff --git a/ChangeLog b/ChangeLog
index 9a7e7133b..49488c888 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,7 @@
version <unreleased>
Core
++ Support HTML media elements without child nodes
* [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387)
Extractors
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index da192728f..431cef831 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1802,7 +1802,11 @@ class InfoExtractor(object):
return is_plain_url, formats
entries = []
- for media_tag, media_type, media_content in re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage):
+ media_tags = [(media_tag, media_type, '')
+ for media_tag, media_type
+ in re.findall(r'(?s)(<(video|audio)[^>]*/>)', webpage)]
+ media_tags.extend(re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage))
+ for media_tag, media_type, media_content in media_tags:
media_info = {
'formats': [],
'subtitles': {},