diff options
| -rw-r--r-- | youtube_dl/extractor/ellentv.py | 50 | 
1 files changed, 37 insertions, 13 deletions
diff --git a/youtube_dl/extractor/ellentv.py b/youtube_dl/extractor/ellentv.py index 4c8190d68..74bbc5c51 100644 --- a/youtube_dl/extractor/ellentv.py +++ b/youtube_dl/extractor/ellentv.py @@ -6,12 +6,13 @@ import json  from .common import InfoExtractor  from ..utils import (      ExtractorError, +    NO_DEFAULT,  )  class EllenTVIE(InfoExtractor):      _VALID_URL = r'https?://(?:www\.)?(?:ellentv|ellentube)\.com/videos/(?P<id>[a-z0-9_-]+)' -    _TEST = { +    _TESTS = [{          'url': 'http://www.ellentv.com/videos/0-ipq1gsai/',          'md5': '4294cf98bc165f218aaa0b89e0fd8042',          'info_dict': { @@ -22,24 +23,47 @@ class EllenTVIE(InfoExtractor):              'timestamp': 1428035648,              'upload_date': '20150403',              'uploader_id': 'batchUser', -        } -    } +        }, +    }, { +        # not available via http://widgets.ellentube.com/ +        'url': 'http://www.ellentv.com/videos/1-szkgu2m2/', +        'info_dict': { +            'id': '1_szkgu2m2', +            'ext': 'flv', +            'title': "Ellen's Amazingly Talented Audience", +            'description': 'md5:86ff1e376ff0d717d7171590e273f0a5', +            'timestamp': 1255140900, +            'upload_date': '20091010', +            'uploader_id': 'ellenkaltura@gmail.com', +        }, +        'params': { +            'skip_download': True, +        }, +    }]      def _real_extract(self, url):          video_id = self._match_id(url) -        webpage = self._download_webpage( -            'http://widgets.ellentube.com/videos/%s' % video_id, -            video_id) +        URLS = ('http://widgets.ellentube.com/videos/%s' % video_id, url) + +        for num, url_ in enumerate(URLS, 1): +            webpage = self._download_webpage( +                url_, video_id, fatal=num == len(URLS)) + +            default = NO_DEFAULT if num == len(URLS) else None + +            partner_id = self._search_regex( +                r"var\s+partnerId\s*=\s*'([^']+)", webpage, 'partner id', +                default=default) -        partner_id = self._search_regex( -            r"var\s+partnerId\s*=\s*'([^']+)", webpage, 'partner id') +            kaltura_id = self._search_regex( +                [r'id="kaltura_player_([^"]+)"', +                 r"_wb_entry_id\s*:\s*'([^']+)", +                 r'data-kaltura-entry-id="([^"]+)'], +                webpage, 'kaltura id', default=default) -        kaltura_id = self._search_regex( -            [r'id="kaltura_player_([^"]+)"', -             r"_wb_entry_id\s*:\s*'([^']+)", -             r'data-kaltura-entry-id="([^"]+)'], -            webpage, 'kaltura id') +            if partner_id and kaltura_id: +                break          return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura')  | 
