diff options
author | Martin Weinelt <mweinelt@users.noreply.github.com> | 2018-01-06 17:33:40 +0100 |
---|---|---|
committer | Sergey M <dstftw@gmail.com> | 2018-01-06 23:33:40 +0700 |
commit | 45283afdec81af21ba50ff3aca3d86fb6d2584b0 (patch) | |
tree | a9b157f84b04204caac6d22e0d44bcbbce46a952 /youtube_dl/extractor/motherless.py | |
parent | b7c74c04036c07f8a81d3048b482afd6ef384b40 (diff) |
[motherless] Add support for groups
Diffstat (limited to 'youtube_dl/extractor/motherless.py')
-rw-r--r-- | youtube_dl/extractor/motherless.py | 73 |
1 files changed, 73 insertions, 0 deletions
diff --git a/youtube_dl/extractor/motherless.py b/youtube_dl/extractor/motherless.py index 6fe3b6049..90ed91ba6 100644 --- a/youtube_dl/extractor/motherless.py +++ b/youtube_dl/extractor/motherless.py @@ -4,8 +4,11 @@ import datetime import re from .common import InfoExtractor +from ..compat import compat_urlparse from ..utils import ( ExtractorError, + InAdvancePagedList, + orderedSet, str_to_int, unified_strdate, ) @@ -114,3 +117,73 @@ class MotherlessIE(InfoExtractor): 'age_limit': age_limit, 'url': video_url, } + + +class MotherlessGroupIE(InfoExtractor): + _VALID_URL = 'https?://(?:www\.)?motherless\.com/gv?/(?P<id>[a-z0-9_]+)' + _TESTS = [{ + 'url': 'http://motherless.com/g/movie_scenes', + 'info_dict': { + 'id': 'movie_scenes', + 'title': 'Movie Scenes', + 'description': 'Hot and sexy scenes from "regular" movies... ' + 'Beautiful actresses fully nude... A looot of ' + 'skin! :)Enjoy!', + }, + 'playlist_mincount': 662, + }, { + 'url': 'http://motherless.com/gv/sex_must_be_funny', + 'info_dict': { + 'id': 'sex_must_be_funny', + 'title': 'Sex must be funny', + 'description': 'Sex can be funny. Wide smiles,laugh, games, fun of ' + 'any kind!' + }, + 'playlist_mincount': 9, + }] + + @classmethod + def suitable(cls, url): + return (False if MotherlessIE.suitable(url) + else super(MotherlessGroupIE, cls).suitable(url)) + + def _extract_entries(self, webpage, base): + return [ + self.url_result( + compat_urlparse.urljoin(base, video_path), + MotherlessIE.ie_key(), video_title=title) + for video_path, title in orderedSet(re.findall( + r'href="/([^"]+)"[^>]+>\s+<img[^>]+alt="[^-]+-\s([^"]+)"', + webpage)) + ] + + def _real_extract(self, url): + group_id = self._match_id(url) + page_url = compat_urlparse.urljoin(url, '/gv/%s' % group_id) + webpage = self._download_webpage(page_url, group_id) + title = self._search_regex( + r'<title>([\w\s]+\w)\s+-', webpage, 'title', fatal=False) + description = self._html_search_meta( + 'description', webpage, fatal=False) + page_count = self._int(self._search_regex( + r'(\d+)</(?:a|span)><(?:a|span)[^>]+>\s*NEXT', + webpage, 'page_count'), 'page_count') + PAGE_SIZE = 80 + + def _get_page(idx): + webpage = self._download_webpage( + page_url, group_id, query={'page': idx + 1}, + note='Downloading page %d/%d' % (idx + 1, page_count) + ) + for entry in self._extract_entries(webpage, url): + yield entry + + playlist = InAdvancePagedList(_get_page, page_count, PAGE_SIZE) + + return { + '_type': 'playlist', + 'id': group_id, + 'title': title, + 'description': description, + 'entries': playlist + } |