diff options
author | Sergey M․ <dstftw@gmail.com> | 2016-07-13 22:41:46 +0700 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2016-07-13 22:42:53 +0700 |
commit | a0560d8ab83a565d502e160ccd993317a0ed69ef (patch) | |
tree | e4e2a2b9a12a2057c6f20b83591c5aa23502ae54 | |
parent | 0385aa6199206e4ba7745efec73be26c5826286a (diff) |
[ellentv] Improve extraction (Closes #10067)
-rw-r--r-- | youtube_dl/extractor/ellentv.py | 50 |
1 files changed, 37 insertions, 13 deletions
diff --git a/youtube_dl/extractor/ellentv.py b/youtube_dl/extractor/ellentv.py index 4c8190d68..74bbc5c51 100644 --- a/youtube_dl/extractor/ellentv.py +++ b/youtube_dl/extractor/ellentv.py @@ -6,12 +6,13 @@ import json from .common import InfoExtractor from ..utils import ( ExtractorError, + NO_DEFAULT, ) class EllenTVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?(?:ellentv|ellentube)\.com/videos/(?P<id>[a-z0-9_-]+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.ellentv.com/videos/0-ipq1gsai/', 'md5': '4294cf98bc165f218aaa0b89e0fd8042', 'info_dict': { @@ -22,24 +23,47 @@ class EllenTVIE(InfoExtractor): 'timestamp': 1428035648, 'upload_date': '20150403', 'uploader_id': 'batchUser', - } - } + }, + }, { + # not available via http://widgets.ellentube.com/ + 'url': 'http://www.ellentv.com/videos/1-szkgu2m2/', + 'info_dict': { + 'id': '1_szkgu2m2', + 'ext': 'flv', + 'title': "Ellen's Amazingly Talented Audience", + 'description': 'md5:86ff1e376ff0d717d7171590e273f0a5', + 'timestamp': 1255140900, + 'upload_date': '20091010', + 'uploader_id': 'ellenkaltura@gmail.com', + }, + 'params': { + 'skip_download': True, + }, + }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage( - 'http://widgets.ellentube.com/videos/%s' % video_id, - video_id) + URLS = ('http://widgets.ellentube.com/videos/%s' % video_id, url) + + for num, url_ in enumerate(URLS, 1): + webpage = self._download_webpage( + url_, video_id, fatal=num == len(URLS)) + + default = NO_DEFAULT if num == len(URLS) else None + + partner_id = self._search_regex( + r"var\s+partnerId\s*=\s*'([^']+)", webpage, 'partner id', + default=default) - partner_id = self._search_regex( - r"var\s+partnerId\s*=\s*'([^']+)", webpage, 'partner id') + kaltura_id = self._search_regex( + [r'id="kaltura_player_([^"]+)"', + r"_wb_entry_id\s*:\s*'([^']+)", + r'data-kaltura-entry-id="([^"]+)'], + webpage, 'kaltura id', default=default) - kaltura_id = self._search_regex( - [r'id="kaltura_player_([^"]+)"', - r"_wb_entry_id\s*:\s*'([^']+)", - r'data-kaltura-entry-id="([^"]+)'], - webpage, 'kaltura id') + if partner_id and kaltura_id: + break return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura') |