aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/youtube.py
diff options
context:
space:
mode:
authorJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>2013-11-26 19:09:14 +0100
committerJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>2013-11-26 19:09:14 +0100
commit6e47b51eef26dbaa3634b73914e4ee7213ad38f7 (patch)
treec99fec079572efa909a3eebd48432b8c401d99c1 /youtube_dl/extractor/youtube.py
parent4a98cdbf3b19b07c7a885d348e79ddf79318f133 (diff)
[youtube:playlist] Remove the link with index 0
It's not the first video of the playlist, it appears in the 'Play all' button (see the test course for an example)
Diffstat (limited to 'youtube_dl/extractor/youtube.py')
-rw-r--r--youtube_dl/extractor/youtube.py8
1 files changed, 5 insertions, 3 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 1bf9cb7d4..4c43d5739 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1528,7 +1528,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
)"""
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
_MORE_PAGES_INDICATOR = r'data-link-type="next"'
- _VIDEO_RE = r'href="/watch\?v=([0-9A-Za-z_-]{11})&amp;'
+ _VIDEO_RE = r'href="/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)'
IE_NAME = u'youtube:playlist'
@classmethod
@@ -1562,8 +1562,10 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
for page_num in itertools.count(1):
url = self._TEMPLATE_URL % (playlist_id, page_num)
page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
- # The ids are duplicated
- new_ids = orderedSet(re.findall(self._VIDEO_RE, page))
+ matches = re.finditer(self._VIDEO_RE, page)
+ # We remove the duplicates and the link with index 0
+ # (it's not the first video of the playlist)
+ new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')
ids.extend(new_ids)
if re.search(self._MORE_PAGES_INDICATOR, page) is None: