aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2018-02-09 21:17:02 +0700
committerSergey M․ <dstftw@gmail.com>2018-02-09 21:17:02 +0700
commit9e167e1ee311b43dbe9c3911bd0a79f0db15efef (patch)
tree5fe8bc2c0816c6639ea7cfbe716f40e2d8685fac
parent5828489072e97f7e29a611ce018ab62cf1c2d4d9 (diff)
[newgrounds] Fix metadata extraction (closes #15531)
-rw-r--r--youtube_dl/extractor/newgrounds.py16
1 files changed, 9 insertions, 7 deletions
diff --git a/youtube_dl/extractor/newgrounds.py b/youtube_dl/extractor/newgrounds.py
index 0e26f8399..82e7cf522 100644
--- a/youtube_dl/extractor/newgrounds.py
+++ b/youtube_dl/extractor/newgrounds.py
@@ -87,19 +87,21 @@ class NewgroundsIE(InfoExtractor):
self._check_formats(formats, media_id)
self._sort_formats(formats)
- uploader = self._search_regex(
- r'(?:Author|Writer)\s*<a[^>]+>([^<]+)', webpage, 'uploader',
+ uploader = self._html_search_regex(
+ (r'(?s)<h4[^>]*>(.+?)</h4>.*?<em>\s*Author\s*</em>',
+ r'(?:Author|Writer)\s*<a[^>]+>([^<]+)'), webpage, 'uploader',
fatal=False)
- timestamp = unified_timestamp(self._search_regex(
- r'<dt>Uploaded</dt>\s*<dd>([^<]+)', webpage, 'timestamp',
+ timestamp = unified_timestamp(self._html_search_regex(
+ (r'<dt>\s*Uploaded\s*</dt>\s*<dd>([^<]+</dd>\s*<dd>[^<]+)',
+ r'<dt>\s*Uploaded\s*</dt>\s*<dd>([^<]+)'), webpage, 'timestamp',
default=None))
duration = parse_duration(self._search_regex(
- r'<dd>Song\s*</dd><dd>.+?</dd><dd>([^<]+)', webpage, 'duration',
- default=None))
+ r'(?s)<dd>\s*Song\s*</dd>\s*<dd>.+?</dd>\s*<dd>([^<]+)', webpage,
+ 'duration', default=None))
filesize_approx = parse_filesize(self._html_search_regex(
- r'<dd>Song\s*</dd><dd>(.+?)</dd>', webpage, 'filesize',
+ r'(?s)<dd>\s*Song\s*</dd>\s*<dd>(.+?)</dd>', webpage, 'filesize',
default=None))
if len(formats) == 1:
formats[0]['filesize_approx'] = filesize_approx