diff options
| -rw-r--r-- | test/test_utils.py | 6 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/naver.py | 6 | ||||
| -rw-r--r-- | youtube_dl/extractor/niconico.py | 34 | ||||
| -rw-r--r-- | youtube_dl/extractor/trutube.py | 38 | 
5 files changed, 62 insertions, 24 deletions
diff --git a/test/test_utils.py b/test/test_utils.py index 1c2b57609..0b31d1a39 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -361,12 +361,14 @@ class TestUtil(unittest.TestCase):      def test_compat_getenv(self):          test_str = 'тест' -        os.environ['YOUTUBE-DL-TEST'] = test_str.encode(get_filesystem_encoding()) +        os.environ['YOUTUBE-DL-TEST'] = (test_str if sys.version_info >= (3, 0) +            else test_str.encode(get_filesystem_encoding()))          self.assertEqual(compat_getenv('YOUTUBE-DL-TEST'), test_str)      def test_compat_expanduser(self):          test_str = 'C:\Documents and Settings\тест\Application Data' -        os.environ['HOME'] = test_str.encode(get_filesystem_encoding()) +        os.environ['HOME'] = (test_str if sys.version_info >= (3, 0) +            else test_str.encode(get_filesystem_encoding()))          self.assertEqual(compat_expanduser('~'), test_str)  if __name__ == '__main__': diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 615018c09..32236f0fa 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -252,7 +252,7 @@ from .newstube import NewstubeIE  from .nfb import NFBIE  from .nfl import NFLIE  from .nhl import NHLIE, NHLVideocenterIE -from .niconico import NiconicoIE +from .niconico import NiconicoIE, NiconicoPlaylistIE  from .ninegag import NineGagIE  from .noco import NocoIE  from .normalboots import NormalbootsIE diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index c0231c197..cb1af9ecc 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -7,6 +7,7 @@ from .common import InfoExtractor  from ..utils import (      compat_urllib_parse,      ExtractorError, +    clean_html,  ) @@ -31,6 +32,11 @@ class NaverIE(InfoExtractor):          m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',              webpage)          if m_id is None: +            m_error = re.search( +                r'(?s)<div class="nation_error">\s*(?:<!--.*?-->)?\s*<p class="[^"]+">(?P<msg>.+?)</p>\s*</div>', +                webpage) +            if m_error: +                raise ExtractorError(clean_html(m_error.group('msg')), expected=True)              raise ExtractorError('couldn\'t extract vid and key')          vid = m_id.group(1)          key = m_id.group(2) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index 7b85589b7..62d5707fe 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -2,6 +2,7 @@  from __future__ import unicode_literals  import re +import json  from .common import InfoExtractor  from ..utils import ( @@ -146,3 +147,36 @@ class NiconicoIE(InfoExtractor):              'duration': duration,              'webpage_url': webpage_url,          } + + +class NiconicoPlaylistIE(InfoExtractor): +    _VALID_URL = r'https?://www\.nicovideo\.jp/mylist/(?P<id>\d+)' + +    _TEST = { +        'url': 'http://www.nicovideo.jp/mylist/27411728', +        'info_dict': { +            'id': '27411728', +            'title': 'AKB48のオールナイトニッポン', +        }, +        'playlist_mincount': 225, +    } + +    def _real_extract(self, url): +        list_id = self._match_id(url) +        webpage = self._download_webpage(url, list_id) + +        entries_json = self._search_regex(r'Mylist\.preload\(\d+, (\[.*\])\);', +            webpage, 'entries') +        entries = json.loads(entries_json) +        entries = [{ +            '_type': 'url', +            'ie_key': NiconicoIE.ie_key(), +            'url': 'http://www.nicovideo.jp/watch/%s' % entry['item_id'], +        } for entry in entries] + +        return { +            '_type': 'playlist', +            'title': self._search_regex(r'\s+name: "(.*?)"', webpage, 'title'), +            'id': list_id, +            'entries': entries, +        } diff --git a/youtube_dl/extractor/trutube.py b/youtube_dl/extractor/trutube.py index 57f956683..a73f3c43a 100644 --- a/youtube_dl/extractor/trutube.py +++ b/youtube_dl/extractor/trutube.py @@ -1,13 +1,12 @@  from __future__ import unicode_literals -import re -  from .common import InfoExtractor +from ..utils import xpath_text  class TruTubeIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?trutube\.tv/video/(?P<id>[0-9]+)/.*' -    _TEST = { +    _VALID_URL = r'https?://(?:www\.)?trutube\.tv/(?:video/|nuevo/player/embed\.php\?v=)(?P<id>[0-9]+)' +    _TESTS = [{          'url': 'http://trutube.tv/video/14880/Ramses-II-Proven-To-Be-A-Red-Headed-Caucasoid-',          'md5': 'c5b6e301b0a2040b074746cbeaa26ca1',          'info_dict': { @@ -16,29 +15,26 @@ class TruTubeIE(InfoExtractor):              'title': 'Ramses II - Proven To Be A Red Headed Caucasoid',              'thumbnail': 're:^http:.*\.jpg$',          } -    } +    }, { +        'url': 'https://trutube.tv/nuevo/player/embed.php?v=14880', +        'only_matching': True, +    }]      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') +        video_id = self._match_id(url) -        webpage = self._download_webpage(url, video_id) -        video_title = self._og_search_title(webpage).strip() -        thumbnail = self._search_regex( -            r"var splash_img = '([^']+)';", webpage, 'thumbnail', fatal=False) +        config = self._download_xml( +            'https://trutube.tv/nuevo/player/config.php?v=%s' % video_id, +            video_id, transform_source=lambda s: s.strip()) -        all_formats = re.finditer( -            r"var (?P<key>[a-z]+)_video_file\s*=\s*'(?P<url>[^']+)';", webpage) -        formats = [{ -            'format_id': m.group('key'), -            'quality': -i, -            'url': m.group('url'), -        } for i, m in enumerate(all_formats)] -        self._sort_formats(formats) +        # filehd is always 404 +        video_url = xpath_text(config, './file', 'video URL', fatal=True) +        title = xpath_text(config, './title', 'title') +        thumbnail = xpath_text(config, './image', ' thumbnail')          return {              'id': video_id, -            'title': video_title, -            'formats': formats, +            'url': video_url, +            'title': title,              'thumbnail': thumbnail,          }  | 
