Merge branch 'master' of github.com:rg3/youtube-dl

author: Philipp Hagemeister <phihag@phihag.de> 2014-10-29 20:10:07 +0100
committer: Philipp Hagemeister <phihag@phihag.de> 2014-10-29 20:10:07 +0100
commit: 7d11297f3f91e6ddd3f0caa5ad4dca1a40d6c820 (patch)
tree: 794503925069a94a5af82daf3b3d9fa786e9f58a /youtube_dl/extractor
parent: 6ad4013d40e839211e2896129eed05ccd40ee963 (diff)
parent: dbd1283d31aa1df43bf9b3255dc27a1c8bfed4ca (diff)
4 files changed, 58 insertions, 22 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 615018c09..32236f0fa 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -252,7 +252,7 @@ from .newstube import NewstubeIE
 from .nfb import NFBIE
 from .nfl import NFLIE
 from .nhl import NHLIE, NHLVideocenterIE
-from .niconico import NiconicoIE
+from .niconico import NiconicoIE, NiconicoPlaylistIE
 from .ninegag import NineGagIE
 from .noco import NocoIE
 from .normalboots import NormalbootsIE
diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py
index c0231c197..cb1af9ecc 100644
--- a/youtube_dl/extractor/naver.py
+++ b/youtube_dl/extractor/naver.py
@@ -7,6 +7,7 @@ from .common import InfoExtractor
 from ..utils import (
     compat_urllib_parse,
     ExtractorError,
+    clean_html,
 )
 
 
@@ -31,6 +32,11 @@ class NaverIE(InfoExtractor):
         m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',
             webpage)
         if m_id is None:
+            m_error = re.search(
+                r'(?s)<div class="nation_error">\s*(?:<!--.*?-->)?\s*<p class="[^"]+">(?P<msg>.+?)</p>\s*</div>',
+                webpage)
+            if m_error:
+                raise ExtractorError(clean_html(m_error.group('msg')), expected=True)
             raise ExtractorError('couldn\'t extract vid and key')
         vid = m_id.group(1)
         key = m_id.group(2)
diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py
index 7b85589b7..62d5707fe 100644
--- a/youtube_dl/extractor/niconico.py
+++ b/youtube_dl/extractor/niconico.py
@@ -2,6 +2,7 @@
 from __future__ import unicode_literals
 
 import re
+import json
 
 from .common import InfoExtractor
 from ..utils import (
@@ -146,3 +147,36 @@ class NiconicoIE(InfoExtractor):
             'duration': duration,
             'webpage_url': webpage_url,
         }
+
+
+class NiconicoPlaylistIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.nicovideo\.jp/mylist/(?P<id>\d+)'
+
+    _TEST = {
+        'url': 'http://www.nicovideo.jp/mylist/27411728',
+        'info_dict': {
+            'id': '27411728',
+            'title': 'AKB48のオールナイトニッポン',
+        },
+        'playlist_mincount': 225,
+    }
+
+    def _real_extract(self, url):
+        list_id = self._match_id(url)
+        webpage = self._download_webpage(url, list_id)
+
+        entries_json = self._search_regex(r'Mylist\.preload\(\d+, (\[.*\])\);',
+            webpage, 'entries')
+        entries = json.loads(entries_json)
+        entries = [{
+            '_type': 'url',
+            'ie_key': NiconicoIE.ie_key(),
+            'url': 'http://www.nicovideo.jp/watch/%s' % entry['item_id'],
+        } for entry in entries]
+
+        return {
+            '_type': 'playlist',
+            'title': self._search_regex(r'\s+name: "(.*?)"', webpage, 'title'),
+            'id': list_id,
+            'entries': entries,
+        }
diff --git a/youtube_dl/extractor/trutube.py b/youtube_dl/extractor/trutube.py
index 57f956683..a73f3c43a 100644
--- a/youtube_dl/extractor/trutube.py
+++ b/youtube_dl/extractor/trutube.py
@@ -1,13 +1,12 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
+from ..utils import xpath_text
 
 
 class TruTubeIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?trutube\.tv/video/(?P<id>[0-9]+)/.*'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?trutube\.tv/(?:video/|nuevo/player/embed\.php\?v=)(?P<id>[0-9]+)'
+    _TESTS = [{
         'url': 'http://trutube.tv/video/14880/Ramses-II-Proven-To-Be-A-Red-Headed-Caucasoid-',
         'md5': 'c5b6e301b0a2040b074746cbeaa26ca1',
         'info_dict': {
@@ -16,29 +15,26 @@ class TruTubeIE(InfoExtractor):
             'title': 'Ramses II - Proven To Be A Red Headed Caucasoid',
             'thumbnail': 're:^http:.*\.jpg$',
         }
-    }
+    }, {
+        'url': 'https://trutube.tv/nuevo/player/embed.php?v=14880',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
-        webpage = self._download_webpage(url, video_id)
-        video_title = self._og_search_title(webpage).strip()
-        thumbnail = self._search_regex(
-            r"var splash_img = '([^']+)';", webpage, 'thumbnail', fatal=False)
+        config = self._download_xml(
+            'https://trutube.tv/nuevo/player/config.php?v=%s' % video_id,
+            video_id, transform_source=lambda s: s.strip())
 
-        all_formats = re.finditer(
-            r"var (?P<key>[a-z]+)_video_file\s*=\s*'(?P<url>[^']+)';", webpage)
-        formats = [{
-            'format_id': m.group('key'),
-            'quality': -i,
-            'url': m.group('url'),
-        } for i, m in enumerate(all_formats)]
-        self._sort_formats(formats)
+        # filehd is always 404
+        video_url = xpath_text(config, './file', 'video URL', fatal=True)
+        title = xpath_text(config, './title', 'title')
+        thumbnail = xpath_text(config, './image', ' thumbnail')
 
         return {
             'id': video_id,
-            'title': video_title,
-            'formats': formats,
+            'url': video_url,
+            'title': title,
             'thumbnail': thumbnail,
         }
author	Philipp Hagemeister <phihag@phihag.de>	2014-10-29 20:10:07 +0100
committer	Philipp Hagemeister <phihag@phihag.de>	2014-10-29 20:10:07 +0100
commit	7d11297f3f91e6ddd3f0caa5ad4dca1a40d6c820 (patch)
tree	794503925069a94a5af82daf3b3d9fa786e9f58a /youtube_dl/extractor
parent	6ad4013d40e839211e2896129eed05ccd40ee963 (diff)
parent	dbd1283d31aa1df43bf9b3255dc27a1c8bfed4ca (diff)