diff options
author | Sergey M․ <dstftw@gmail.com> | 2016-07-17 00:01:25 +0700 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2016-07-17 00:01:25 +0700 |
commit | 49bc16b95ed7d418b353fda46ba845ac0eca648b (patch) | |
tree | 3bb630040157c0d7b12928e3930b15825e7adb3c | |
parent | a2f9ca1e67e6b926957abe3b35a4d78355bef7d4 (diff) |
[nintendo] Improve playlist extraction (Closes #9986)
-rw-r--r-- | youtube_dl/extractor/nintendo.py | 33 |
1 files changed, 16 insertions, 17 deletions
diff --git a/youtube_dl/extractor/nintendo.py b/youtube_dl/extractor/nintendo.py index 57333ada0..4b4e66b05 100644 --- a/youtube_dl/extractor/nintendo.py +++ b/youtube_dl/extractor/nintendo.py @@ -1,13 +1,15 @@ +# coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from .ooyala import OoyalaIE - -import re +from ..utils import unescapeHTML class NintendoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?nintendo\.com/games/detail/(?P<id>[\w-]+)' + _VALID_URL = r'https?://(?:www\.)?nintendo\.com/games/detail/(?P<id>[^/?#&]+)' _TESTS = [{ 'url': 'http://www.nintendo.com/games/detail/yEiAzhU2eQI1KZ7wOHhngFoAHc1FpHwj', 'info_dict': { @@ -24,24 +26,21 @@ class NintendoIE(InfoExtractor): 'url': 'http://www.nintendo.com/games/detail/tokyo-mirage-sessions-fe-wii-u', 'info_dict': { 'id': 'tokyo-mirage-sessions-fe-wii-u', + 'title': 'Tokyo Mirage Sessions ♯FE', }, - 'params': { - 'skip_download': True, - }, - 'add_ie': ['Ooyala'], - 'playlist_count': 4, + 'playlist_count': 3, }] def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + page_id = self._match_id(url) - ooyala_codes = re.findall( - r'data-video-code=(["\'])(?P<code>.+?)\1', - webpage) + webpage = self._download_webpage(url, page_id) - entries = [] - for ooyala_code in ooyala_codes: - entries.append(OoyalaIE._build_url_result(ooyala_code[1])) + entries = [ + OoyalaIE._build_url_result(m.group('code')) + for m in re.finditer( + r'class=(["\'])embed-video\1[^>]+data-video-code=(["\'])(?P<code>(?:(?!\2).)+)\2', + webpage)] - return self.playlist_result(entries, video_id, self._og_search_title(webpage)) + return self.playlist_result( + entries, page_id, unescapeHTML(self._og_search_title(webpage, fatal=False))) |