aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
authorXiyue <113869642+xiyue077@users.noreply.github.com>2022-10-11 09:52:48 +1100
committerGitHub <noreply@github.com>2022-10-10 23:52:48 +0100
commit82e4eca711a128138ed0b84ddb4321e403d56340 (patch)
tree3fc5e6157870a190909e8879cc215f8c4341a4b4 /youtube_dl/extractor
parent1b1442887e67b63545453e10816904e2b4c561c1 (diff)
downloadyoutube-dl-82e4eca711a128138ed0b84ddb4321e403d56340.tar.xz
[motherless] Fixed the broken uploader_id in the extractor (#31243)
* Fixed the broken uploader_id in the extractor. * Make uploader_id RE looser * Fix uploader_id in test Motherless_3 * Fix group pagination * # coding: utf-8 Co-authored-by: Andy Xuming <xuminic@gmail.com> Co-authored-by: dirkf <fieldhouse@gmx.net>
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/motherless.py13
1 files changed, 7 insertions, 6 deletions
diff --git a/youtube_dl/extractor/motherless.py b/youtube_dl/extractor/motherless.py
index ef1e081f2..35d2b46ed 100644
--- a/youtube_dl/extractor/motherless.py
+++ b/youtube_dl/extractor/motherless.py
@@ -1,3 +1,4 @@
+# coding: utf-8
from __future__ import unicode_literals
import datetime
@@ -71,7 +72,7 @@ class MotherlessIE(InfoExtractor):
'title': 'a/ Hot Teens',
'categories': list,
'upload_date': '20210104',
- 'uploader_id': 'yonbiw',
+ 'uploader_id': 'anonymous',
'thumbnail': r're:https?://.*\.jpg',
'age_limit': 18,
},
@@ -127,7 +128,7 @@ class MotherlessIE(InfoExtractor):
comment_count = webpage.count('class="media-comment-contents"')
uploader_id = self._html_search_regex(
- r'"thumb-member-username">\s+<a href="/m/([^"]+)"',
+ r'''(?s)['"](?:media-meta-member|thumb-member-username)\b[^>]+>\s*<a\b[^>]+\bhref\s*=\s*['"]/m/([^"']+)''',
webpage, 'uploader_id')
categories = self._html_search_meta('keywords', webpage, default=None)
@@ -169,7 +170,7 @@ class MotherlessGroupIE(InfoExtractor):
'description': 'Sex can be funny. Wide smiles,laugh, games, fun of '
'any kind!'
},
- 'playlist_mincount': 9,
+ 'playlist_mincount': 0,
}]
@classmethod
@@ -208,9 +209,9 @@ class MotherlessGroupIE(InfoExtractor):
r'<title>([\w\s]+\w)\s+-', webpage, 'title', fatal=False)
description = self._html_search_meta(
'description', webpage, fatal=False)
- page_count = self._int(self._search_regex(
- r'(\d+)</(?:a|span)><(?:a|span)[^>]+>\s*NEXT',
- webpage, 'page_count'), 'page_count')
+ page_count = str_to_int(self._search_regex(
+ r'(\d+)\s*</(?:a|span)>\s*<(?:a|span)[^>]+(?:>\s*NEXT|\brel\s*=\s*["\']?next)\b',
+ webpage, 'page_count', default='1'))
PAGE_SIZE = 80
def _get_page(idx):