diff options
author | Sergey M․ <dstftw@gmail.com> | 2017-05-20 01:28:42 +0700 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2017-05-20 01:29:33 +0700 |
commit | 2ca29f1aafda58fd7fd6db55d8b99eb89e07f2df (patch) | |
tree | b7b583cbbd9b37833829b817c5516a19557335a4 /youtube_dl | |
parent | 77d682da9dc2e62a1505cccb77408b8fa638a7f1 (diff) |
[toypics] Improve and modernize
Diffstat (limited to 'youtube_dl')
-rw-r--r-- | youtube_dl/extractor/toypics.py | 34 |
1 files changed, 19 insertions, 15 deletions
diff --git a/youtube_dl/extractor/toypics.py b/youtube_dl/extractor/toypics.py index 774fa565c..f705a06c9 100644 --- a/youtube_dl/extractor/toypics.py +++ b/youtube_dl/extractor/toypics.py @@ -6,43 +6,48 @@ import re class ToypicsIE(InfoExtractor): - IE_DESC = 'Toypics user profile' - _VALID_URL = r'https?://videos\.toypics\.net/view/(?P<id>[0-9]+)/.*' + IE_DESC = 'Toypics video' + _VALID_URL = r'https?://videos\.toypics\.net/view/(?P<id>[0-9]+)' _TEST = { 'url': 'http://videos.toypics.net/view/514/chancebulged,-2-1/', 'md5': '16e806ad6d6f58079d210fe30985e08b', 'info_dict': { 'id': '514', 'ext': 'mp4', - 'title': 'Chance-Bulge\'d, 2', + 'title': "Chance-Bulge'd, 2", 'age_limit': 18, 'uploader': 'kidsune', } } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - page = self._download_webpage(url, video_id) - formats = self._parse_html5_media_entries(url, page, video_id)[0]['formats'] + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + formats = self._parse_html5_media_entries( + url, webpage, video_id)[0]['formats'] title = self._html_search_regex([ r'<h1[^>]+class=["\']view-video-title[^>]+>([^<]+)</h', r'<title>([^<]+) - Toypics</title>', - ], page, 'title') - username = self._html_search_regex( - r'More videos from <strong>([^<]+)</strong>', page, 'username') + ], webpage, 'title') + + uploader = self._html_search_regex( + r'More videos from <strong>([^<]+)</strong>', webpage, 'uploader', + fatal=False) + return { 'id': video_id, 'formats': formats, 'title': title, - 'uploader': username, + 'uploader': uploader, 'age_limit': 18, } class ToypicsUserIE(InfoExtractor): IE_DESC = 'Toypics user profile' - _VALID_URL = r'https?://videos\.toypics\.net/(?P<username>[^/?]+)(?:$|[?#])' + _VALID_URL = r'https?://videos\.toypics\.net/(?!view)(?P<id>[^/?#&]+)' _TEST = { 'url': 'http://videos.toypics.net/Mikey', 'info_dict': { @@ -52,8 +57,7 @@ class ToypicsUserIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - username = mobj.group('username') + username = self._match_id(url) profile_page = self._download_webpage( url, username, note='Retrieving profile page') @@ -72,7 +76,7 @@ class ToypicsUserIE(InfoExtractor): note='Downloading page %d/%d' % (n, page_count)) urls.extend( re.findall( - r'<div[^>]+class=["\']preview[^>]+>\s*<a[^>]+href="(https?://videos.toypics.net/view/[^"]+)"', + r'<div[^>]+class=["\']preview[^>]+>\s*<a[^>]+href="(https?://videos\.toypics\.net/view/[^"]+)"', lpage)) return { |