diff options
author | Philipp Hagemeister <phihag@phihag.de> | 2014-10-29 20:10:07 +0100 |
---|---|---|
committer | Philipp Hagemeister <phihag@phihag.de> | 2014-10-29 20:10:07 +0100 |
commit | 7d11297f3f91e6ddd3f0caa5ad4dca1a40d6c820 (patch) | |
tree | 794503925069a94a5af82daf3b3d9fa786e9f58a /youtube_dl/extractor | |
parent | 6ad4013d40e839211e2896129eed05ccd40ee963 (diff) | |
parent | dbd1283d31aa1df43bf9b3255dc27a1c8bfed4ca (diff) |
Merge branch 'master' of github.com:rg3/youtube-dl
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/__init__.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/naver.py | 6 | ||||
-rw-r--r-- | youtube_dl/extractor/niconico.py | 34 | ||||
-rw-r--r-- | youtube_dl/extractor/trutube.py | 38 |
4 files changed, 58 insertions, 22 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 615018c09..32236f0fa 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -252,7 +252,7 @@ from .newstube import NewstubeIE from .nfb import NFBIE from .nfl import NFLIE from .nhl import NHLIE, NHLVideocenterIE -from .niconico import NiconicoIE +from .niconico import NiconicoIE, NiconicoPlaylistIE from .ninegag import NineGagIE from .noco import NocoIE from .normalboots import NormalbootsIE diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index c0231c197..cb1af9ecc 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -7,6 +7,7 @@ from .common import InfoExtractor from ..utils import ( compat_urllib_parse, ExtractorError, + clean_html, ) @@ -31,6 +32,11 @@ class NaverIE(InfoExtractor): m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"', webpage) if m_id is None: + m_error = re.search( + r'(?s)<div class="nation_error">\s*(?:<!--.*?-->)?\s*<p class="[^"]+">(?P<msg>.+?)</p>\s*</div>', + webpage) + if m_error: + raise ExtractorError(clean_html(m_error.group('msg')), expected=True) raise ExtractorError('couldn\'t extract vid and key') vid = m_id.group(1) key = m_id.group(2) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index 7b85589b7..62d5707fe 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import re +import json from .common import InfoExtractor from ..utils import ( @@ -146,3 +147,36 @@ class NiconicoIE(InfoExtractor): 'duration': duration, 'webpage_url': webpage_url, } + + +class NiconicoPlaylistIE(InfoExtractor): + _VALID_URL = r'https?://www\.nicovideo\.jp/mylist/(?P<id>\d+)' + + _TEST = { + 'url': 'http://www.nicovideo.jp/mylist/27411728', + 'info_dict': { + 'id': '27411728', + 'title': 'AKB48のオールナイトニッポン', + }, + 'playlist_mincount': 225, + } + + def _real_extract(self, url): + list_id = self._match_id(url) + webpage = self._download_webpage(url, list_id) + + entries_json = self._search_regex(r'Mylist\.preload\(\d+, (\[.*\])\);', + webpage, 'entries') + entries = json.loads(entries_json) + entries = [{ + '_type': 'url', + 'ie_key': NiconicoIE.ie_key(), + 'url': 'http://www.nicovideo.jp/watch/%s' % entry['item_id'], + } for entry in entries] + + return { + '_type': 'playlist', + 'title': self._search_regex(r'\s+name: "(.*?)"', webpage, 'title'), + 'id': list_id, + 'entries': entries, + } diff --git a/youtube_dl/extractor/trutube.py b/youtube_dl/extractor/trutube.py index 57f956683..a73f3c43a 100644 --- a/youtube_dl/extractor/trutube.py +++ b/youtube_dl/extractor/trutube.py @@ -1,13 +1,12 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor +from ..utils import xpath_text class TruTubeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?trutube\.tv/video/(?P<id>[0-9]+)/.*' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?trutube\.tv/(?:video/|nuevo/player/embed\.php\?v=)(?P<id>[0-9]+)' + _TESTS = [{ 'url': 'http://trutube.tv/video/14880/Ramses-II-Proven-To-Be-A-Red-Headed-Caucasoid-', 'md5': 'c5b6e301b0a2040b074746cbeaa26ca1', 'info_dict': { @@ -16,29 +15,26 @@ class TruTubeIE(InfoExtractor): 'title': 'Ramses II - Proven To Be A Red Headed Caucasoid', 'thumbnail': 're:^http:.*\.jpg$', } - } + }, { + 'url': 'https://trutube.tv/nuevo/player/embed.php?v=14880', + 'only_matching': True, + }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - video_title = self._og_search_title(webpage).strip() - thumbnail = self._search_regex( - r"var splash_img = '([^']+)';", webpage, 'thumbnail', fatal=False) + config = self._download_xml( + 'https://trutube.tv/nuevo/player/config.php?v=%s' % video_id, + video_id, transform_source=lambda s: s.strip()) - all_formats = re.finditer( - r"var (?P<key>[a-z]+)_video_file\s*=\s*'(?P<url>[^']+)';", webpage) - formats = [{ - 'format_id': m.group('key'), - 'quality': -i, - 'url': m.group('url'), - } for i, m in enumerate(all_formats)] - self._sort_formats(formats) + # filehd is always 404 + video_url = xpath_text(config, './file', 'video URL', fatal=True) + title = xpath_text(config, './title', 'title') + thumbnail = xpath_text(config, './image', ' thumbnail') return { 'id': video_id, - 'title': video_title, - 'formats': formats, + 'url': video_url, + 'title': title, 'thumbnail': thumbnail, } |