diff options
Diffstat (limited to 'youtube_dl/extractor')
| -rw-r--r-- | youtube_dl/extractor/brightcove.py | 22 | ||||
| -rw-r--r-- | youtube_dl/extractor/imdb.py | 5 | ||||
| -rw-r--r-- | youtube_dl/extractor/la7.py | 3 | ||||
| -rw-r--r-- | youtube_dl/extractor/rutube.py | 26 | 
4 files changed, 36 insertions, 20 deletions
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 443294e6f..717e151d9 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -23,7 +23,6 @@ from ..utils import (  class BrightcoveIE(InfoExtractor):      _VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)'      _FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s' -    _PLAYLIST_URL_TEMPLATE = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s'      _TESTS = [          { @@ -71,6 +70,17 @@ class BrightcoveIE(InfoExtractor):                  'uploader': 'National Ballet of Canada',              },          }, +        { +            # https://github.com/rg3/youtube-dl/issues/2253 +            'url': 'http://v.thestar.com/services/player/bcpid2071349530001?bckey=AQ~~,AAAAuO4KaJE~,gatFNwSKdGDmDpIYqNJ-fTHn_c4z_LH_&bctid=3101154703001', +            'file': '3101154703001.mp4', +            'md5': '0ba9446db037002366bab3b3eb30c88c', +            'info_dict': { +                'title': 'Still no power', +                'uploader': 'thestar.com', +                'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.', +            } +        }      ]      @classmethod @@ -131,6 +141,11 @@ class BrightcoveIE(InfoExtractor):          """Try to extract the brightcove url from the wepbage, returns None          if it can't be found          """ + +        url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage) +        if url_m: +            return url_m.group(1) +          m_brightcove = re.search(              r'''(?sx)<object              (?: @@ -183,8 +198,9 @@ class BrightcoveIE(InfoExtractor):          return self._extract_video_info(video_info)      def _get_playlist_info(self, player_key): -        playlist_info = self._download_webpage(self._PLAYLIST_URL_TEMPLATE % player_key, -                                               player_key, 'Downloading playlist information') +        info_url = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s' % player_key +        playlist_info = self._download_webpage( +            info_url, player_key, 'Downloading playlist information')          json_data = json.loads(playlist_info)          if 'videoList' not in json_data: diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py index 1763af020..7cee505c0 100644 --- a/youtube_dl/extractor/imdb.py +++ b/youtube_dl/extractor/imdb.py @@ -69,12 +69,9 @@ class ImdbListIE(InfoExtractor):          list_id = mobj.group('id')          webpage = self._download_webpage(url, list_id) -        list_code = self._search_regex( -            r'(?s)<div\s+class="list\sdetail">(.*?)class="see-more"', -            webpage, 'list code')          entries = [              self.url_result('http://www.imdb.com' + m, 'Imdb') -            for m in re.findall(r'href="(/video/imdb/vi[^"]+)"', webpage)] +            for m in re.findall(r'href="(/video/imdb/vi[^"]+)"\s+data-type="playlist"', webpage)]          list_title = self._html_search_regex(              r'<h1 class="header">(.*?)</h1>', webpage, 'list title') diff --git a/youtube_dl/extractor/la7.py b/youtube_dl/extractor/la7.py index 6d61f9a90..db2028e9f 100644 --- a/youtube_dl/extractor/la7.py +++ b/youtube_dl/extractor/la7.py @@ -26,7 +26,8 @@ class LA7IE(InfoExtractor):              'title': 'IL DIVO',              'description': 'Un film di Paolo Sorrentino con Toni Servillo, Anna Bonaiuto, Giulio Bosetti  e Flavio Bucci',              'duration': 6254, -        } +        }, +        'skip': 'Blocked in the US',      }      def _real_extract(self, url): diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index 83459c522..4922dd764 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -15,7 +15,7 @@ from ..utils import (  class RutubeIE(InfoExtractor):      IE_NAME = 'rutube' -    IE_DESC = 'Rutube videos'     +    IE_DESC = 'Rutube videos'      _VALID_URL = r'https?://rutube\.ru/video/(?P<id>[\da-z]{32})'      _TEST = { @@ -71,7 +71,7 @@ class RutubeIE(InfoExtractor):  class RutubeChannelIE(InfoExtractor):      IE_NAME = 'rutube:channel' -    IE_DESC = 'Rutube channels'     +    IE_DESC = 'Rutube channels'      _VALID_URL = r'http://rutube\.ru/tags/video/(?P<id>\d+)'      _PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json' @@ -79,15 +79,16 @@ class RutubeChannelIE(InfoExtractor):      def _extract_videos(self, channel_id, channel_title=None):          entries = []          for pagenum in itertools.count(1): -            api_response = self._download_webpage(self._PAGE_TEMPLATE % (channel_id, pagenum), -                                                   channel_id, 'Downloading page %s' % pagenum) +            api_response = self._download_webpage( +                self._PAGE_TEMPLATE % (channel_id, pagenum), +                channel_id, 'Downloading page %s' % pagenum)              page = json.loads(api_response)              results = page['results'] -            if len(results) == 0: -                break; +            if not results: +                break              entries.extend(self.url_result(result['video_url'], 'Rutube') for result in results) -            if page['has_next'] is False: -                break; +            if not page['has_next']: +                break          return self.playlist_result(entries, channel_id, channel_title)      def _real_extract(self, url): @@ -98,7 +99,7 @@ class RutubeChannelIE(InfoExtractor):  class RutubeMovieIE(RutubeChannelIE):      IE_NAME = 'rutube:movie' -    IE_DESC = 'Rutube movies'     +    IE_DESC = 'Rutube movies'      _VALID_URL = r'http://rutube\.ru/metainfo/tv/(?P<id>\d+)'      _MOVIE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/?format=json' @@ -107,8 +108,9 @@ class RutubeMovieIE(RutubeChannelIE):      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)          movie_id = mobj.group('id') -        api_response = self._download_webpage(self._MOVIE_TEMPLATE % movie_id, movie_id, -                                            'Downloading movie JSON') +        api_response = self._download_webpage( +            self._MOVIE_TEMPLATE % movie_id, movie_id, +            'Downloading movie JSON')          movie = json.loads(api_response)          movie_name = movie['name']          return self._extract_videos(movie_id, movie_name) @@ -119,4 +121,4 @@ class RutubePersonIE(RutubeChannelIE):      IE_DESC = 'Rutube person videos'      _VALID_URL = r'http://rutube\.ru/video/person/(?P<id>\d+)' -    _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json'
\ No newline at end of file +    _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json'  | 
