diff options
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/brightcove.py | 22 | ||||
-rw-r--r-- | youtube_dl/extractor/imdb.py | 5 | ||||
-rw-r--r-- | youtube_dl/extractor/la7.py | 3 | ||||
-rw-r--r-- | youtube_dl/extractor/rutube.py | 26 |
4 files changed, 36 insertions, 20 deletions
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 443294e6f..717e151d9 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -23,7 +23,6 @@ from ..utils import ( class BrightcoveIE(InfoExtractor): _VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)' _FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s' - _PLAYLIST_URL_TEMPLATE = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s' _TESTS = [ { @@ -71,6 +70,17 @@ class BrightcoveIE(InfoExtractor): 'uploader': 'National Ballet of Canada', }, }, + { + # https://github.com/rg3/youtube-dl/issues/2253 + 'url': 'http://v.thestar.com/services/player/bcpid2071349530001?bckey=AQ~~,AAAAuO4KaJE~,gatFNwSKdGDmDpIYqNJ-fTHn_c4z_LH_&bctid=3101154703001', + 'file': '3101154703001.mp4', + 'md5': '0ba9446db037002366bab3b3eb30c88c', + 'info_dict': { + 'title': 'Still no power', + 'uploader': 'thestar.com', + 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.', + } + } ] @classmethod @@ -131,6 +141,11 @@ class BrightcoveIE(InfoExtractor): """Try to extract the brightcove url from the wepbage, returns None if it can't be found """ + + url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage) + if url_m: + return url_m.group(1) + m_brightcove = re.search( r'''(?sx)<object (?: @@ -183,8 +198,9 @@ class BrightcoveIE(InfoExtractor): return self._extract_video_info(video_info) def _get_playlist_info(self, player_key): - playlist_info = self._download_webpage(self._PLAYLIST_URL_TEMPLATE % player_key, - player_key, 'Downloading playlist information') + info_url = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s' % player_key + playlist_info = self._download_webpage( + info_url, player_key, 'Downloading playlist information') json_data = json.loads(playlist_info) if 'videoList' not in json_data: diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py index 1763af020..7cee505c0 100644 --- a/youtube_dl/extractor/imdb.py +++ b/youtube_dl/extractor/imdb.py @@ -69,12 +69,9 @@ class ImdbListIE(InfoExtractor): list_id = mobj.group('id') webpage = self._download_webpage(url, list_id) - list_code = self._search_regex( - r'(?s)<div\s+class="list\sdetail">(.*?)class="see-more"', - webpage, 'list code') entries = [ self.url_result('http://www.imdb.com' + m, 'Imdb') - for m in re.findall(r'href="(/video/imdb/vi[^"]+)"', webpage)] + for m in re.findall(r'href="(/video/imdb/vi[^"]+)"\s+data-type="playlist"', webpage)] list_title = self._html_search_regex( r'<h1 class="header">(.*?)</h1>', webpage, 'list title') diff --git a/youtube_dl/extractor/la7.py b/youtube_dl/extractor/la7.py index 6d61f9a90..db2028e9f 100644 --- a/youtube_dl/extractor/la7.py +++ b/youtube_dl/extractor/la7.py @@ -26,7 +26,8 @@ class LA7IE(InfoExtractor): 'title': 'IL DIVO', 'description': 'Un film di Paolo Sorrentino con Toni Servillo, Anna Bonaiuto, Giulio Bosetti e Flavio Bucci', 'duration': 6254, - } + }, + 'skip': 'Blocked in the US', } def _real_extract(self, url): diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index 83459c522..4922dd764 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -15,7 +15,7 @@ from ..utils import ( class RutubeIE(InfoExtractor): IE_NAME = 'rutube' - IE_DESC = 'Rutube videos' + IE_DESC = 'Rutube videos' _VALID_URL = r'https?://rutube\.ru/video/(?P<id>[\da-z]{32})' _TEST = { @@ -71,7 +71,7 @@ class RutubeIE(InfoExtractor): class RutubeChannelIE(InfoExtractor): IE_NAME = 'rutube:channel' - IE_DESC = 'Rutube channels' + IE_DESC = 'Rutube channels' _VALID_URL = r'http://rutube\.ru/tags/video/(?P<id>\d+)' _PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json' @@ -79,15 +79,16 @@ class RutubeChannelIE(InfoExtractor): def _extract_videos(self, channel_id, channel_title=None): entries = [] for pagenum in itertools.count(1): - api_response = self._download_webpage(self._PAGE_TEMPLATE % (channel_id, pagenum), - channel_id, 'Downloading page %s' % pagenum) + api_response = self._download_webpage( + self._PAGE_TEMPLATE % (channel_id, pagenum), + channel_id, 'Downloading page %s' % pagenum) page = json.loads(api_response) results = page['results'] - if len(results) == 0: - break; + if not results: + break entries.extend(self.url_result(result['video_url'], 'Rutube') for result in results) - if page['has_next'] is False: - break; + if not page['has_next']: + break return self.playlist_result(entries, channel_id, channel_title) def _real_extract(self, url): @@ -98,7 +99,7 @@ class RutubeChannelIE(InfoExtractor): class RutubeMovieIE(RutubeChannelIE): IE_NAME = 'rutube:movie' - IE_DESC = 'Rutube movies' + IE_DESC = 'Rutube movies' _VALID_URL = r'http://rutube\.ru/metainfo/tv/(?P<id>\d+)' _MOVIE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/?format=json' @@ -107,8 +108,9 @@ class RutubeMovieIE(RutubeChannelIE): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) movie_id = mobj.group('id') - api_response = self._download_webpage(self._MOVIE_TEMPLATE % movie_id, movie_id, - 'Downloading movie JSON') + api_response = self._download_webpage( + self._MOVIE_TEMPLATE % movie_id, movie_id, + 'Downloading movie JSON') movie = json.loads(api_response) movie_name = movie['name'] return self._extract_videos(movie_id, movie_name) @@ -119,4 +121,4 @@ class RutubePersonIE(RutubeChannelIE): IE_DESC = 'Rutube person videos' _VALID_URL = r'http://rutube\.ru/video/person/(?P<id>\d+)' - _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json'
\ No newline at end of file + _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json' |