diff options
| -rw-r--r-- | youtube_dl/extractor/motherless.py | 52 | 
1 files changed, 34 insertions, 18 deletions
| diff --git a/youtube_dl/extractor/motherless.py b/youtube_dl/extractor/motherless.py index 6cc36b308..ef1e081f2 100644 --- a/youtube_dl/extractor/motherless.py +++ b/youtube_dl/extractor/motherless.py @@ -61,6 +61,23 @@ class MotherlessIE(InfoExtractor):          # no keywords          'url': 'http://motherless.com/8B4BBC1',          'only_matching': True, +    }, { +        # see https://motherless.com/videos/recent for recent videos with +        # uploaded date in "ago" format +        'url': 'https://motherless.com/3C3E2CF', +        'info_dict': { +            'id': '3C3E2CF', +            'ext': 'mp4', +            'title': 'a/ Hot Teens', +            'categories': list, +            'upload_date': '20210104', +            'uploader_id': 'yonbiw', +            'thumbnail': r're:https?://.*\.jpg', +            'age_limit': 18, +        }, +        'params': { +            'skip_download': True, +        },      }]      def _real_extract(self, url): @@ -85,29 +102,28 @@ class MotherlessIE(InfoExtractor):              or 'http://cdn4.videos.motherlessmedia.com/videos/%s.mp4?fs=opencloud' % video_id)          age_limit = self._rta_search(webpage)          view_count = str_to_int(self._html_search_regex( -            (r'>([\d,.]+)\s+Views<',  # 1,234,567 Views -             r'<strong>Views</strong>\s+([^<]+)<'), +            (r'>([\d,.]+)\s+Views<', r'<strong>Views</strong>\s+([^<]+)<'),              webpage, 'view count', fatal=False))          like_count = str_to_int(self._html_search_regex( -            (r'>([\d,.]+)\s+Favorites<',  # 1,234 Favorites +            (r'>([\d,.]+)\s+Favorites<',               r'<strong>Favorited</strong>\s+([^<]+)<'),              webpage, 'like count', fatal=False)) -        upload_date = self._html_search_regex( -            (r'class=["\']count[^>]+>(\d+\s+[a-zA-Z]{3}\s+\d{4})<', -             r'class=["\']count[^>]+>(\d+[hd])\s+[aA]go<',  # 20h/1d ago -             r'<strong>Uploaded</strong>\s+([^<]+)<'), webpage, 'upload date') -        relative = re.match(r'(\d+)([hd])$', upload_date) -        if relative: -            delta = int(relative.group(1)) -            unit = relative.group(2) -            if unit == 'h': -                delta_t = datetime.timedelta(hours=delta) -            else:  # unit == 'd' -                delta_t = datetime.timedelta(days=delta) -            upload_date = (datetime.datetime.now() - delta_t).strftime('%Y%m%d') -        else: -            upload_date = unified_strdate(upload_date) +        upload_date = unified_strdate(self._search_regex( +            r'class=["\']count[^>]+>(\d+\s+[a-zA-Z]{3}\s+\d{4})<', webpage, +            'upload date', default=None)) +        if not upload_date: +            uploaded_ago = self._search_regex( +                r'>\s*(\d+[hd])\s+[aA]go\b', webpage, 'uploaded ago', +                default=None) +            if uploaded_ago: +                delta = int(uploaded_ago[:-1]) +                _AGO_UNITS = { +                    'h': 'hours', +                    'd': 'days', +                } +                kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta} +                upload_date = (datetime.datetime.utcnow() - datetime.timedelta(**kwargs)).strftime('%Y%m%d')          comment_count = webpage.count('class="media-comment-contents"')          uploader_id = self._html_search_regex( | 
