diff options
| author | cladmi <hartergaetan@gmail.com> | 2020-12-16 09:54:48 +0100 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2021-01-05 07:40:25 +0700 | 
| commit | f318882955b90bead8206ee411641e65037b1011 (patch) | |
| tree | efae16d991dfa29b2d92e293866177aca37b950b | |
| parent | c3399cac19a51897024545fb267f7579ea42318b (diff) | |
[motherless] Fix recent videos upload date extraction (closes #27661)
Less than a week old videos use a '20h ago' or '1d ago' format.
I kept the support for 'Ago' with uppercase start at is was already in the code.
| -rw-r--r-- | youtube_dl/extractor/motherless.py | 19 | 
1 files changed, 14 insertions, 5 deletions
| diff --git a/youtube_dl/extractor/motherless.py b/youtube_dl/extractor/motherless.py index b1615b4d8..6cc36b308 100644 --- a/youtube_dl/extractor/motherless.py +++ b/youtube_dl/extractor/motherless.py @@ -85,18 +85,27 @@ class MotherlessIE(InfoExtractor):              or 'http://cdn4.videos.motherlessmedia.com/videos/%s.mp4?fs=opencloud' % video_id)          age_limit = self._rta_search(webpage)          view_count = str_to_int(self._html_search_regex( -            (r'>(\d+)\s+Views<', r'<strong>Views</strong>\s+([^<]+)<'), +            (r'>([\d,.]+)\s+Views<',  # 1,234,567 Views +             r'<strong>Views</strong>\s+([^<]+)<'),              webpage, 'view count', fatal=False))          like_count = str_to_int(self._html_search_regex( -            (r'>(\d+)\s+Favorites<', r'<strong>Favorited</strong>\s+([^<]+)<'), +            (r'>([\d,.]+)\s+Favorites<',  # 1,234 Favorites +             r'<strong>Favorited</strong>\s+([^<]+)<'),              webpage, 'like count', fatal=False))          upload_date = self._html_search_regex(              (r'class=["\']count[^>]+>(\d+\s+[a-zA-Z]{3}\s+\d{4})<', +             r'class=["\']count[^>]+>(\d+[hd])\s+[aA]go<',  # 20h/1d ago               r'<strong>Uploaded</strong>\s+([^<]+)<'), webpage, 'upload date') -        if 'Ago' in upload_date: -            days = int(re.search(r'([0-9]+)', upload_date).group(1)) -            upload_date = (datetime.datetime.now() - datetime.timedelta(days=days)).strftime('%Y%m%d') +        relative = re.match(r'(\d+)([hd])$', upload_date) +        if relative: +            delta = int(relative.group(1)) +            unit = relative.group(2) +            if unit == 'h': +                delta_t = datetime.timedelta(hours=delta) +            else:  # unit == 'd' +                delta_t = datetime.timedelta(days=delta) +            upload_date = (datetime.datetime.now() - delta_t).strftime('%Y%m%d')          else:              upload_date = unified_strdate(upload_date) | 
