diff options
author | Sergey M․ <dstftw@gmail.com> | 2015-09-09 23:59:17 +0600 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2015-09-09 23:59:17 +0600 |
commit | cc1ac110173f77a22c9becbee4cb6522087986a1 (patch) | |
tree | dd41afb485ee4a6b1852baa9e9ae3b55b56f7001 | |
parent | 73f536439ee70145237c72127262052206097e20 (diff) |
[vier:videos] Fix extraction with old approach (Closes #6806)
-rw-r--r-- | youtube_dl/extractor/vier.py | 13 |
1 files changed, 5 insertions, 8 deletions
diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py index 15377097e..c76c20614 100644 --- a/youtube_dl/extractor/vier.py +++ b/youtube_dl/extractor/vier.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import re +import itertools from .common import InfoExtractor @@ -91,31 +92,27 @@ class VierVideosIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) program = mobj.group('program') - webpage = self._download_webpage(url, program) - page_id = mobj.group('page') if page_id: page_id = int(page_id) start_page = page_id - last_page = start_page + 1 playlist_id = '%s-page%d' % (program, page_id) else: start_page = 0 - last_page = int(self._search_regex( - r'videos\?page=(\d+)">laatste</a>', - webpage, 'last page', default=0)) + 1 playlist_id = program entries = [] - for current_page_id in range(start_page, last_page): + for current_page_id in itertools.count(start_page): current_page = self._download_webpage( 'http://www.vier.be/%s/videos?page=%d' % (program, current_page_id), program, - 'Downloading page %d' % (current_page_id + 1)) if current_page_id != page_id else webpage + 'Downloading page %d' % (current_page_id + 1)) page_entries = [ self.url_result('http://www.vier.be' + video_url, 'Vier') for video_url in re.findall( r'<h3><a href="(/[^/]+/videos/[^/]+(?:/\d+)?)">', current_page)] entries.extend(page_entries) + if page_id or '>Meer<' not in current_page: + break return self.playlist_result(entries, playlist_id) |