From 8adec2b9e05d356a6996ea6f85aa9b4bf0665ce2 Mon Sep 17 00:00:00 2001 From: hassaanaliw Date: Sat, 19 Jul 2014 22:49:25 +0500 Subject: [snotr] Add new extractor --- youtube_dl/extractor/snotr.py | 73 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 youtube_dl/extractor/snotr.py (limited to 'youtube_dl/extractor/snotr.py') diff --git a/youtube_dl/extractor/snotr.py b/youtube_dl/extractor/snotr.py new file mode 100644 index 000000000..f89e81bf3 --- /dev/null +++ b/youtube_dl/extractor/snotr.py @@ -0,0 +1,73 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + +from ..utils import ( + + str_to_int, + parse_iso8601, + + + +) + +class SnotrIE(InfoExtractor): + _VALID_URL = r'http?://(?:www\.)?snotr\.com/video/(?P\d+)/([\w]+)' + _TESTS =[ { + 'url': 'http://www.snotr.com/video/13708/Drone_flying_through_fireworks', + 'info_dict': { + 'id': '13708', + 'ext': 'flv', + 'title': 'Drone flying through fireworks!', + 'duration': 247, + 'filesize':12320768 + } + }, + + + + { + + 'url': 'http://www.snotr.com/video/530/David_Letteman_-_George_W_Bush_Top_10', + 'info_dict': { + 'id': '530', + 'ext': 'flv', + 'title': 'David Letteman - George W. Bush Top 10', + 'duration': 126, + 'filesize': 1048576 + } + }] + + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + # TODO more code goes here, for example ... + webpage = self._download_webpage(url, video_id) + title = self._og_search_title(webpage) + + description = self._og_search_description(webpage) + + video_url = "http://cdn.videos.snotr.com/%s.flv" % video_id + + view_count = str_to_int(self._html_search_regex(r'

\nViews:\n([\d,\.]+)

',webpage,'view count')) + + duration = self._html_search_regex(r'

\nLength:\n(.*?)

',webpage,'duration') + duration = str_to_int(duration[:1])*60 + str_to_int(duration[2:4]) + + file_size = self._html_search_regex(r'

\nFilesize:\n(.*?)

',webpage,'filesize') + file_size = str_to_int(re.match(r'\d+',file_size).group())*131072 + + return { + 'id': video_id, + 'title': title, + 'url':video_url, + 'view_count':view_count, + 'duration':duration, + 'filesize':file_size + + } \ No newline at end of file -- cgit v1.2.3 From 9732d77ed273406afcf9ed3ccb4d109824c9c69d Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 21 Jul 2014 12:02:44 +0200 Subject: [snotr] PEP8 and minor fixes (#3296) --- youtube_dl/extractor/snotr.py | 55 +++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 31 deletions(-) (limited to 'youtube_dl/extractor/snotr.py') diff --git a/youtube_dl/extractor/snotr.py b/youtube_dl/extractor/snotr.py index f89e81bf3..e762ad8f6 100644 --- a/youtube_dl/extractor/snotr.py +++ b/youtube_dl/extractor/snotr.py @@ -4,49 +4,39 @@ from __future__ import unicode_literals import re from .common import InfoExtractor - from ..utils import ( - + float_or_none, str_to_int, - parse_iso8601, - - - + parse_duration, ) + class SnotrIE(InfoExtractor): _VALID_URL = r'http?://(?:www\.)?snotr\.com/video/(?P\d+)/([\w]+)' - _TESTS =[ { + _TESTS = [{ 'url': 'http://www.snotr.com/video/13708/Drone_flying_through_fireworks', 'info_dict': { 'id': '13708', 'ext': 'flv', 'title': 'Drone flying through fireworks!', 'duration': 247, - 'filesize':12320768 - } - }, - - - - { - + 'filesize_approx': 98566144, + } + }, { 'url': 'http://www.snotr.com/video/530/David_Letteman_-_George_W_Bush_Top_10', 'info_dict': { 'id': '530', 'ext': 'flv', 'title': 'David Letteman - George W. Bush Top 10', 'duration': 126, - 'filesize': 1048576 - } - }] - + 'filesize_approx': 8912896, + } + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - # TODO more code goes here, for example ... webpage = self._download_webpage(url, video_id) title = self._og_search_title(webpage) @@ -54,20 +44,23 @@ class SnotrIE(InfoExtractor): video_url = "http://cdn.videos.snotr.com/%s.flv" % video_id - view_count = str_to_int(self._html_search_regex(r'

\nViews:\n([\d,\.]+)

',webpage,'view count')) + view_count = str_to_int(self._html_search_regex( + r'

\nViews:\n([\d,\.]+)

', + webpage, 'view count', fatal=False)) - duration = self._html_search_regex(r'

\nLength:\n(.*?)

',webpage,'duration') - duration = str_to_int(duration[:1])*60 + str_to_int(duration[2:4]) + duration = parse_duration(self._html_search_regex( + r'

\nLength:\n\s*([0-9:]+).*?

', + webpage, 'duration', fatal=False)) - file_size = self._html_search_regex(r'

\nFilesize:\n(.*?)

',webpage,'filesize') - file_size = str_to_int(re.match(r'\d+',file_size).group())*131072 + filesize_approx = float_or_none(self._html_search_regex( + r'

\nFilesize:\n\s*([0-9.]+)\s*megabyte

', + webpage, 'filesize', fatal=False), invscale=1024 * 1024) return { 'id': video_id, 'title': title, - 'url':video_url, - 'view_count':view_count, - 'duration':duration, - 'filesize':file_size - - } \ No newline at end of file + 'url': video_url, + 'view_count': view_count, + 'duration': duration, + 'filesize_approx': filesize_approx, + } -- cgit v1.2.3 From da8fb85859964d9a1d21a0328eb9044e19499d9c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 21 Jul 2014 12:08:44 +0200 Subject: [snotr] Add description --- youtube_dl/extractor/snotr.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'youtube_dl/extractor/snotr.py') diff --git a/youtube_dl/extractor/snotr.py b/youtube_dl/extractor/snotr.py index e762ad8f6..da3b05a8d 100644 --- a/youtube_dl/extractor/snotr.py +++ b/youtube_dl/extractor/snotr.py @@ -21,6 +21,7 @@ class SnotrIE(InfoExtractor): 'title': 'Drone flying through fireworks!', 'duration': 247, 'filesize_approx': 98566144, + 'description': 'A drone flying through Fourth of July Fireworks', } }, { 'url': 'http://www.snotr.com/video/530/David_Letteman_-_George_W_Bush_Top_10', @@ -30,6 +31,7 @@ class SnotrIE(InfoExtractor): 'title': 'David Letteman - George W. Bush Top 10', 'duration': 126, 'filesize_approx': 8912896, + 'description': 'The top 10 George W. Bush moments, brought to you by David Letterman!', } }] @@ -41,7 +43,6 @@ class SnotrIE(InfoExtractor): title = self._og_search_title(webpage) description = self._og_search_description(webpage) - video_url = "http://cdn.videos.snotr.com/%s.flv" % video_id view_count = str_to_int(self._html_search_regex( @@ -58,6 +59,7 @@ class SnotrIE(InfoExtractor): return { 'id': video_id, + 'description': description, 'title': title, 'url': video_url, 'view_count': view_count, -- cgit v1.2.3