diff options
| -rw-r--r-- | test/test_playlists.py | 17 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 7 | ||||
| -rw-r--r-- | youtube_dl/extractor/brightcove.py | 13 | ||||
| -rw-r--r-- | youtube_dl/extractor/generic.py | 12 | ||||
| -rw-r--r-- | youtube_dl/extractor/imdb.py | 5 | ||||
| -rw-r--r-- | youtube_dl/extractor/rutube.py | 118 | 
6 files changed, 133 insertions, 39 deletions
diff --git a/test/test_playlists.py b/test/test_playlists.py index 5eeba091e..b3ce6f71e 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -33,6 +33,7 @@ from youtube_dl.extractor import (      ImdbListIE,      KhanAcademyIE,      EveryonesMixtapeIE, +    RutubeChannelIE,  ) @@ -195,11 +196,11 @@ class TestPlaylists(unittest.TestCase):      def test_imdb_list(self):          dl = FakeYDL()          ie = ImdbListIE(dl) -        result = ie.extract('http://www.imdb.com/list/sMjedvGDd8U') +        result = ie.extract('http://www.imdb.com/list/JFs9NWw6XI0')          self.assertIsPlaylist(result) -        self.assertEqual(result['id'], 'sMjedvGDd8U') -        self.assertEqual(result['title'], 'Animated and Family Films') -        self.assertTrue(len(result['entries']) >= 48) +        self.assertEqual(result['id'], 'JFs9NWw6XI0') +        self.assertEqual(result['title'], 'March 23, 2012 Releases') +        self.assertEqual(len(result['entries']), 7)      def test_khanacademy_topic(self):          dl = FakeYDL() @@ -219,6 +220,14 @@ class TestPlaylists(unittest.TestCase):          self.assertEqual(result['id'], 'm7m0jJAbMQi')          self.assertEqual(result['title'], 'Driving')          self.assertEqual(len(result['entries']), 24) +         +    def test_rutube_channel(self): +        dl = FakeYDL() +        ie = RutubeChannelIE(dl) +        result = ie.extract('http://rutube.ru/tags/video/1409') +        self.assertIsPlaylist(result) +        self.assertEqual(result['id'], '1409') +        self.assertTrue(len(result['entries']) >= 34)  if __name__ == '__main__': diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 1e8556124..e89b5cf9d 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -161,7 +161,12 @@ from .ro220 import Ro220IE  from .rottentomatoes import RottenTomatoesIE  from .roxwel import RoxwelIE  from .rtlnow import RTLnowIE -from .rutube import RutubeIE +from .rutube import ( +    RutubeIE, +    RutubeChannelIE, +    RutubeMovieIE, +    RutubePersonIE, +)  from .servingsys import ServingSysIE  from .sina import SinaIE  from .slashdot import SlashdotIE diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 443294e6f..9ccf923a6 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -23,7 +23,6 @@ from ..utils import (  class BrightcoveIE(InfoExtractor):      _VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)'      _FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s' -    _PLAYLIST_URL_TEMPLATE = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s'      _TESTS = [          { @@ -70,7 +69,7 @@ class BrightcoveIE(InfoExtractor):                  'description': 'md5:363109c02998fee92ec02211bd8000df',                  'uploader': 'National Ballet of Canada',              }, -        }, +        }      ]      @classmethod @@ -131,6 +130,11 @@ class BrightcoveIE(InfoExtractor):          """Try to extract the brightcove url from the wepbage, returns None          if it can't be found          """ + +        url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage) +        if url_m: +            return url_m.group(1) +          m_brightcove = re.search(              r'''(?sx)<object              (?: @@ -183,8 +187,9 @@ class BrightcoveIE(InfoExtractor):          return self._extract_video_info(video_info)      def _get_playlist_info(self, player_key): -        playlist_info = self._download_webpage(self._PLAYLIST_URL_TEMPLATE % player_key, -                                               player_key, 'Downloading playlist information') +        info_url = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s' % player_key +        playlist_info = self._download_webpage( +            info_url, player_key, 'Downloading playlist information')          json_data = json.loads(playlist_info)          if 'videoList' not in json_data: diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 829e5894f..48de379b7 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -78,6 +78,18 @@ class GenericIE(InfoExtractor):                  'skip_download': True,              },          }, +        { +            # https://github.com/rg3/youtube-dl/issues/2253 +            'url': 'http://bcove.me/i6nfkrc3', +            'file': '3101154703001.mp4', +            'md5': '0ba9446db037002366bab3b3eb30c88c', +            'info_dict': { +                'title': 'Still no power', +                'uploader': 'thestar.com', +                'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.', +            }, +            'add_ie': ['Brightcove'], +        },          # Direct link to a video          {              'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4', diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py index 1763af020..7cee505c0 100644 --- a/youtube_dl/extractor/imdb.py +++ b/youtube_dl/extractor/imdb.py @@ -69,12 +69,9 @@ class ImdbListIE(InfoExtractor):          list_id = mobj.group('id')          webpage = self._download_webpage(url, list_id) -        list_code = self._search_regex( -            r'(?s)<div\s+class="list\sdetail">(.*?)class="see-more"', -            webpage, 'list code')          entries = [              self.url_result('http://www.imdb.com' + m, 'Imdb') -            for m in re.findall(r'href="(/video/imdb/vi[^"]+)"', webpage)] +            for m in re.findall(r'href="(/video/imdb/vi[^"]+)"\s+data-type="playlist"', webpage)]          list_title = self._html_search_regex(              r'<h1 class="header">(.*?)</h1>', webpage, 'list title') diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index e3e9bc07f..4922dd764 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -1,58 +1,124 @@  # encoding: utf-8 +from __future__ import unicode_literals +  import re  import json +import itertools  from .common import InfoExtractor  from ..utils import ( -    compat_urlparse,      compat_str, +    unified_strdate,      ExtractorError,  )  class RutubeIE(InfoExtractor): -    _VALID_URL = r'https?://rutube\.ru/video/(?P<long_id>\w+)' +    IE_NAME = 'rutube' +    IE_DESC = 'Rutube videos' +    _VALID_URL = r'https?://rutube\.ru/video/(?P<id>[\da-z]{32})'      _TEST = { -        u'url': u'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/', -        u'file': u'3eac3b4561676c17df9132a9a1e62e3e.mp4', -        u'info_dict': { -            u'title': u'Раненный кенгуру забежал в аптеку', -            u'uploader': u'NTDRussian', -            u'uploader_id': u'29790', +        'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/', +        'file': '3eac3b4561676c17df9132a9a1e62e3e.mp4', +        'info_dict': { +            'title': 'Раненный кенгуру забежал в аптеку', +            'description': 'http://www.ntdtv.ru ', +            'duration': 80, +            'uploader': 'NTDRussian', +            'uploader_id': '29790', +            'upload_date': '20131016',          }, -        u'params': { +        'params': {              # It requires ffmpeg (m3u8 download) -            u'skip_download': True, +            'skip_download': True,          },      } -    def _get_api_response(self, short_id, subpath): -        api_url = 'http://rutube.ru/api/play/%s/%s/?format=json' % (subpath, short_id) -        response_json = self._download_webpage(api_url, short_id, -            u'Downloading %s json' % subpath) -        return json.loads(response_json) -      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) -        long_id = mobj.group('long_id') -        webpage = self._download_webpage(url, long_id) -        og_video = self._og_search_video_url(webpage) -        short_id = compat_urlparse.urlparse(og_video).path[1:] -        options = self._get_api_response(short_id, 'options') -        trackinfo = self._get_api_response(short_id, 'trackinfo') +        video_id = mobj.group('id') +         +        api_response = self._download_webpage('http://rutube.ru/api/video/%s/?format=json' % video_id, +                                              video_id, 'Downloading video JSON') +        video = json.loads(api_response) +         +        api_response = self._download_webpage('http://rutube.ru/api/play/trackinfo/%s/?format=json' % video_id, +                                              video_id, 'Downloading trackinfo JSON') +        trackinfo = json.loads(api_response) +                  # Some videos don't have the author field          author = trackinfo.get('author') or {}          m3u8_url = trackinfo['video_balancer'].get('m3u8')          if m3u8_url is None: -            raise ExtractorError(u'Couldn\'t find m3u8 manifest url') +            raise ExtractorError('Couldn\'t find m3u8 manifest url')          return { -            'id': trackinfo['id'], -            'title': trackinfo['title'], +            'id': video['id'], +            'title': video['title'], +            'description': video['description'], +            'duration': video['duration'], +            'view_count': video['hits'],              'url': m3u8_url,              'ext': 'mp4', -            'thumbnail': options['thumbnail_url'], +            'thumbnail': video['thumbnail_url'],              'uploader': author.get('name'),              'uploader_id': compat_str(author['id']) if author else None, +            'upload_date': unified_strdate(video['created_ts']), +            'age_limit': 18 if video['is_adult'] else 0,          } + + +class RutubeChannelIE(InfoExtractor): +    IE_NAME = 'rutube:channel' +    IE_DESC = 'Rutube channels' +    _VALID_URL = r'http://rutube\.ru/tags/video/(?P<id>\d+)' + +    _PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json' + +    def _extract_videos(self, channel_id, channel_title=None): +        entries = [] +        for pagenum in itertools.count(1): +            api_response = self._download_webpage( +                self._PAGE_TEMPLATE % (channel_id, pagenum), +                channel_id, 'Downloading page %s' % pagenum) +            page = json.loads(api_response) +            results = page['results'] +            if not results: +                break +            entries.extend(self.url_result(result['video_url'], 'Rutube') for result in results) +            if not page['has_next']: +                break +        return self.playlist_result(entries, channel_id, channel_title) + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        channel_id = mobj.group('id') +        return self._extract_videos(channel_id) + + +class RutubeMovieIE(RutubeChannelIE): +    IE_NAME = 'rutube:movie' +    IE_DESC = 'Rutube movies' +    _VALID_URL = r'http://rutube\.ru/metainfo/tv/(?P<id>\d+)' + +    _MOVIE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/?format=json' +    _PAGE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json' + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        movie_id = mobj.group('id') +        api_response = self._download_webpage( +            self._MOVIE_TEMPLATE % movie_id, movie_id, +            'Downloading movie JSON') +        movie = json.loads(api_response) +        movie_name = movie['name'] +        return self._extract_videos(movie_id, movie_name) + + +class RutubePersonIE(RutubeChannelIE): +    IE_NAME = 'rutube:person' +    IE_DESC = 'Rutube person videos' +    _VALID_URL = r'http://rutube\.ru/video/person/(?P<id>\d+)' + +    _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json'  | 
