diff options
| -rw-r--r-- | ChangeLog | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/snotr.py | 38 | 
2 files changed, 22 insertions, 17 deletions
| @@ -7,6 +7,7 @@ Core  * Fix js_to_json(): correct octal or hexadecimal number detection  Extractors +* [snotr] Fix extraction (#10338)  * [n-tv.de] Fix extraction (#10331) diff --git a/youtube_dl/extractor/snotr.py b/youtube_dl/extractor/snotr.py index 0d1ab07f8..3bb78cb84 100644 --- a/youtube_dl/extractor/snotr.py +++ b/youtube_dl/extractor/snotr.py @@ -5,9 +5,9 @@ import re  from .common import InfoExtractor  from ..utils import ( -    float_or_none, -    str_to_int,      parse_duration, +    parse_filesize, +    str_to_int,  ) @@ -17,21 +17,24 @@ class SnotrIE(InfoExtractor):          'url': 'http://www.snotr.com/video/13708/Drone_flying_through_fireworks',          'info_dict': {              'id': '13708', -            'ext': 'flv', +            'ext': 'mp4',              'title': 'Drone flying through fireworks!', -            'duration': 247, -            'filesize_approx': 98566144, +            'duration': 248, +            'filesize_approx': 40700000,              'description': 'A drone flying through Fourth of July Fireworks', -        } +            'thumbnail': 're:^https?://.*\.jpg$', +        }, +        'expected_warnings': ['description'],      }, {          'url': 'http://www.snotr.com/video/530/David_Letteman_-_George_W_Bush_Top_10',          'info_dict': {              'id': '530', -            'ext': 'flv', +            'ext': 'mp4',              'title': 'David Letteman - George W. Bush Top 10',              'duration': 126, -            'filesize_approx': 8912896, +            'filesize_approx': 8500000,              'description': 'The top 10 George W. Bush moments, brought to you by David Letterman!', +            'thumbnail': 're:^https?://.*\.jpg$',          }      }] @@ -43,26 +46,27 @@ class SnotrIE(InfoExtractor):          title = self._og_search_title(webpage)          description = self._og_search_description(webpage) -        video_url = 'http://cdn.videos.snotr.com/%s.flv' % video_id +        info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0]          view_count = str_to_int(self._html_search_regex( -            r'<p>\n<strong>Views:</strong>\n([\d,\.]+)</p>', +            r'<p[^>]*>\s*<strong[^>]*>Views:</strong>\s*<span[^>]*>([\d,\.]+)',              webpage, 'view count', fatal=False))          duration = parse_duration(self._html_search_regex( -            r'<p>\n<strong>Length:</strong>\n\s*([0-9:]+).*?</p>', +            r'<p[^>]*>\s*<strong[^>]*>Length:</strong>\s*<span[^>]*>([\d:]+)',              webpage, 'duration', fatal=False)) -        filesize_approx = float_or_none(self._html_search_regex( -            r'<p>\n<strong>Filesize:</strong>\n\s*([0-9.]+)\s*megabyte</p>', -            webpage, 'filesize', fatal=False), invscale=1024 * 1024) +        filesize_approx = parse_filesize(self._html_search_regex( +            r'<p[^>]*>\s*<strong[^>]*>Filesize:</strong>\s*<span[^>]*>([^<]+)', +            webpage, 'filesize', fatal=False)) -        return { +        info_dict.update({              'id': video_id,              'description': description,              'title': title, -            'url': video_url,              'view_count': view_count,              'duration': duration,              'filesize_approx': filesize_approx, -        } +        }) + +        return info_dict | 
