diff options
Diffstat (limited to 'youtube_dl')
-rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/toypics.py | 55 |
2 files changed, 56 insertions, 0 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index d828c6932..5ca6eb16c 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -239,6 +239,7 @@ from .theplatform import ThePlatformIE from .thisav import ThisAVIE from .tinypic import TinyPicIE from .toutv import TouTvIE +from .toypics import ToypicsIE from .traileraddict import TrailerAddictIE from .trilulilu import TriluliluIE from .trutube import TruTubeIE diff --git a/youtube_dl/extractor/toypics.py b/youtube_dl/extractor/toypics.py new file mode 100644 index 000000000..3cbfe2e7e --- /dev/null +++ b/youtube_dl/extractor/toypics.py @@ -0,0 +1,55 @@ +from .common import InfoExtractor +from math import ceil +import re + +class ToypicsIE(InfoExtractor): + _VALID_URL = r'(?:http://)?videos\.toypics\.net/.*' + _TEST = { + 'url': 'http://videos.toypics.net/view/514/chancebulged,-2-1/', + #'md5': '8a8b546956bbd0e769dbe28f6e80abb3', == $head -c10K 12929646011616163504.mp4 |md5sum //no idea why it fails + 'info_dict': { + 'id': '514', + 'ext': 'mp4', + 'title': 'Chance-Bulge\'d, 2', + 'age_limit': 18 + } + } + PAGINATED=8 + + def _real_extract(self, url): + mobj = re.match(r'(http://)?videos\.toypics\.net/(?P<username>[^/?]+)$', url) + if not mobj: + return self.extract_one(url) + return [self.extract_one(u) for u in self.process_paginated(url, + r'public/">Public Videos \((?P<videos_count>[0-9]+)\)</a></li>', + r'<p class="video-entry-title">\n\s*<a href="(http://videos.toypics.net/view/[^"]+)">' + )] + + def process_paginated(self, profile_url, re_total, re_video_page): + profile_page = self._download_webpage(profile_url, 'profile' , 'getting profile page: '+profile_url) + videos_count = self._html_search_regex(re_total, profile_page, 'videos count') + lst = [] + for n in xrange(1,int(ceil(float(videos_count)/self.PAGINATED)) +1): + lpage_url = profile_url +'/public/%d'%n + lpage = self._download_webpage(lpage_url, 'page %d'%n) + lst.extend(re.findall(re_video_page, lpage)) + return lst + + def extract_one(self,url): + mobj = re.match(r'(http://)?videos\.toypics\.net/view/(?P<videoid>[0-9]+)/.*', url) + video_id = mobj.group('videoid') + page = self._download_webpage(url, video_id, 'getting page: '+url) + video_url = self._html_search_regex( + r'src:\s+"(http://static[0-9]+\.toypics\.net/flvideo/[^"]+)"', page, 'video URL') + title = self._html_search_regex( + r'<title>Toypics - ([^<]+)</title>', page, 'title') + username = self._html_search_regex( + r'toypics.net/([^/"]+)" class="user-name">', page, 'username') + return { + 'id': video_id, + 'url': video_url, + 'ext': video_url[-3:], + 'title': title, + 'uploader': username, + 'age_limit': 18 + } |