diff options
author | Yen Chi Hsuan <yan12125@gmail.com> | 2015-07-24 21:29:44 +0800 |
---|---|---|
committer | Yen Chi Hsuan <yan12125@gmail.com> | 2015-07-24 21:29:44 +0800 |
commit | 12434026574bcaaaa705c31ef14428cc91a5efad (patch) | |
tree | 3a2c3d1cd459fc4eeb897bcb92dc0c51b8544ebd | |
parent | 2b2ee140c3c6f08b4078cc6a5a289e5e74bec2b4 (diff) |
[dailymotion:playlist] Detect problematic redirection (fixes #6347)
-rw-r--r-- | youtube_dl/extractor/dailymotion.py | 28 |
1 files changed, 25 insertions, 3 deletions
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 5a4987772..85d945509 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -30,6 +30,10 @@ class DailymotionBaseInfoExtractor(InfoExtractor): request.add_header('Cookie', 'family_filter=off; ff=off') return request + def _download_webpage_handle_no_ff(self, url, *args, **kwargs): + request = self._build_request(url) + return self._download_webpage_handle(request, *args, **kwargs) + def _download_webpage_no_ff(self, url, *args, **kwargs): request = self._build_request(url) return self._download_webpage(request, *args, **kwargs) @@ -275,10 +279,17 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): def _extract_entries(self, id): video_ids = [] + processed_urls = set() for pagenum in itertools.count(1): - webpage = self._download_webpage_no_ff( - self._PAGE_TEMPLATE % (id, pagenum), - id, 'Downloading page %s' % pagenum) + page_url = self._PAGE_TEMPLATE % (id, pagenum) + webpage, urlh = self._download_webpage_handle_no_ff( + page_url, id, 'Downloading page %s' % pagenum) + if urlh.geturl() in processed_urls: + self.report_warning('Stopped at duplicated page %s, which is the same as %s' % ( + page_url, urlh.geturl()), id) + break + + processed_urls.add(urlh.geturl()) video_ids.extend(re.findall(r'data-xid="(.+?)"', webpage)) @@ -311,6 +322,17 @@ class DailymotionUserIE(DailymotionPlaylistIE): 'title': 'RĂ©mi Gaillard', }, 'playlist_mincount': 100, + }, { + 'url': 'http://www.dailymotion.com/user/UnderProject', + 'info_dict': { + 'id': 'UnderProject', + 'title': 'UnderProject', + }, + 'playlist_mincount': 1800, + 'expected_warnings': [ + 'Stopped at duplicated page', + ], + 'skip': 'Takes too long time', }] def _real_extract(self, url): |