aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/brightcove.py22
-rw-r--r--youtube_dl/extractor/imdb.py5
-rw-r--r--youtube_dl/extractor/la7.py3
-rw-r--r--youtube_dl/extractor/rutube.py26
4 files changed, 36 insertions, 20 deletions
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py
index 443294e6f..717e151d9 100644
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -23,7 +23,6 @@ from ..utils import (
class BrightcoveIE(InfoExtractor):
_VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)'
_FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
- _PLAYLIST_URL_TEMPLATE = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s'
_TESTS = [
{
@@ -71,6 +70,17 @@ class BrightcoveIE(InfoExtractor):
'uploader': 'National Ballet of Canada',
},
},
+ {
+ # https://github.com/rg3/youtube-dl/issues/2253
+ 'url': 'http://v.thestar.com/services/player/bcpid2071349530001?bckey=AQ~~,AAAAuO4KaJE~,gatFNwSKdGDmDpIYqNJ-fTHn_c4z_LH_&bctid=3101154703001',
+ 'file': '3101154703001.mp4',
+ 'md5': '0ba9446db037002366bab3b3eb30c88c',
+ 'info_dict': {
+ 'title': 'Still no power',
+ 'uploader': 'thestar.com',
+ 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
+ }
+ }
]
@classmethod
@@ -131,6 +141,11 @@ class BrightcoveIE(InfoExtractor):
"""Try to extract the brightcove url from the wepbage, returns None
if it can't be found
"""
+
+ url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
+ if url_m:
+ return url_m.group(1)
+
m_brightcove = re.search(
r'''(?sx)<object
(?:
@@ -183,8 +198,9 @@ class BrightcoveIE(InfoExtractor):
return self._extract_video_info(video_info)
def _get_playlist_info(self, player_key):
- playlist_info = self._download_webpage(self._PLAYLIST_URL_TEMPLATE % player_key,
- player_key, 'Downloading playlist information')
+ info_url = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s' % player_key
+ playlist_info = self._download_webpage(
+ info_url, player_key, 'Downloading playlist information')
json_data = json.loads(playlist_info)
if 'videoList' not in json_data:
diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py
index 1763af020..7cee505c0 100644
--- a/youtube_dl/extractor/imdb.py
+++ b/youtube_dl/extractor/imdb.py
@@ -69,12 +69,9 @@ class ImdbListIE(InfoExtractor):
list_id = mobj.group('id')
webpage = self._download_webpage(url, list_id)
- list_code = self._search_regex(
- r'(?s)<div\s+class="list\sdetail">(.*?)class="see-more"',
- webpage, 'list code')
entries = [
self.url_result('http://www.imdb.com' + m, 'Imdb')
- for m in re.findall(r'href="(/video/imdb/vi[^"]+)"', webpage)]
+ for m in re.findall(r'href="(/video/imdb/vi[^"]+)"\s+data-type="playlist"', webpage)]
list_title = self._html_search_regex(
r'<h1 class="header">(.*?)</h1>', webpage, 'list title')
diff --git a/youtube_dl/extractor/la7.py b/youtube_dl/extractor/la7.py
index 6d61f9a90..db2028e9f 100644
--- a/youtube_dl/extractor/la7.py
+++ b/youtube_dl/extractor/la7.py
@@ -26,7 +26,8 @@ class LA7IE(InfoExtractor):
'title': 'IL DIVO',
'description': 'Un film di Paolo Sorrentino con Toni Servillo, Anna Bonaiuto, Giulio Bosetti e Flavio Bucci',
'duration': 6254,
- }
+ },
+ 'skip': 'Blocked in the US',
}
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py
index 83459c522..4922dd764 100644
--- a/youtube_dl/extractor/rutube.py
+++ b/youtube_dl/extractor/rutube.py
@@ -15,7 +15,7 @@ from ..utils import (
class RutubeIE(InfoExtractor):
IE_NAME = 'rutube'
- IE_DESC = 'Rutube videos'
+ IE_DESC = 'Rutube videos'
_VALID_URL = r'https?://rutube\.ru/video/(?P<id>[\da-z]{32})'
_TEST = {
@@ -71,7 +71,7 @@ class RutubeIE(InfoExtractor):
class RutubeChannelIE(InfoExtractor):
IE_NAME = 'rutube:channel'
- IE_DESC = 'Rutube channels'
+ IE_DESC = 'Rutube channels'
_VALID_URL = r'http://rutube\.ru/tags/video/(?P<id>\d+)'
_PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json'
@@ -79,15 +79,16 @@ class RutubeChannelIE(InfoExtractor):
def _extract_videos(self, channel_id, channel_title=None):
entries = []
for pagenum in itertools.count(1):
- api_response = self._download_webpage(self._PAGE_TEMPLATE % (channel_id, pagenum),
- channel_id, 'Downloading page %s' % pagenum)
+ api_response = self._download_webpage(
+ self._PAGE_TEMPLATE % (channel_id, pagenum),
+ channel_id, 'Downloading page %s' % pagenum)
page = json.loads(api_response)
results = page['results']
- if len(results) == 0:
- break;
+ if not results:
+ break
entries.extend(self.url_result(result['video_url'], 'Rutube') for result in results)
- if page['has_next'] is False:
- break;
+ if not page['has_next']:
+ break
return self.playlist_result(entries, channel_id, channel_title)
def _real_extract(self, url):
@@ -98,7 +99,7 @@ class RutubeChannelIE(InfoExtractor):
class RutubeMovieIE(RutubeChannelIE):
IE_NAME = 'rutube:movie'
- IE_DESC = 'Rutube movies'
+ IE_DESC = 'Rutube movies'
_VALID_URL = r'http://rutube\.ru/metainfo/tv/(?P<id>\d+)'
_MOVIE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/?format=json'
@@ -107,8 +108,9 @@ class RutubeMovieIE(RutubeChannelIE):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
movie_id = mobj.group('id')
- api_response = self._download_webpage(self._MOVIE_TEMPLATE % movie_id, movie_id,
- 'Downloading movie JSON')
+ api_response = self._download_webpage(
+ self._MOVIE_TEMPLATE % movie_id, movie_id,
+ 'Downloading movie JSON')
movie = json.loads(api_response)
movie_name = movie['name']
return self._extract_videos(movie_id, movie_name)
@@ -119,4 +121,4 @@ class RutubePersonIE(RutubeChannelIE):
IE_DESC = 'Rutube person videos'
_VALID_URL = r'http://rutube\.ru/video/person/(?P<id>\d+)'
- _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json' \ No newline at end of file
+ _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json'