diff options
author | Philipp Hagemeister <phihag@phihag.de> | 2013-06-23 21:08:17 +0200 |
---|---|---|
committer | Philipp Hagemeister <phihag@phihag.de> | 2013-06-23 21:08:17 +0200 |
commit | 153697660dfbc5f510f756e22e30d1ac86d9e612 (patch) | |
tree | 68a77fc3c2df64f66c0a5b68763358695431cd93 /youtube_dl/extractor/escapist.py | |
parent | 60a72e8d458d4729f55e77b45074827d8769d9b9 (diff) |
Move Escapist into its own file
Diffstat (limited to 'youtube_dl/extractor/escapist.py')
-rw-r--r-- | youtube_dl/extractor/escapist.py | 68 |
1 files changed, 68 insertions, 0 deletions
diff --git a/youtube_dl/extractor/escapist.py b/youtube_dl/extractor/escapist.py new file mode 100644 index 000000000..86b145bca --- /dev/null +++ b/youtube_dl/extractor/escapist.py @@ -0,0 +1,68 @@ +import json +import re + +from .common import InfoExtractor +from ..utils import ( + compat_str, + compat_urllib_parse, + + ExtractorError, +) + + +class EscapistIE(InfoExtractor): + _VALID_URL = r'^(https?://)?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + if mobj is None: + raise ExtractorError(u'Invalid URL: %s' % url) + showName = mobj.group('showname') + videoId = mobj.group('episode') + + self.report_extraction(videoId) + webpage = self._download_webpage(url, videoId) + + videoDesc = self._html_search_regex('<meta name="description" content="([^"]*)"', + webpage, u'description', fatal=False) + + imgUrl = self._html_search_regex('<meta property="og:image" content="([^"]*)"', + webpage, u'thumbnail', fatal=False) + + playerUrl = self._html_search_regex('<meta property="og:video" content="([^"]*)"', + webpage, u'player url') + + title = self._html_search_regex('<meta name="title" content="([^"]*)"', + webpage, u'player url').split(' : ')[-1] + + configUrl = self._search_regex('config=(.*)$', playerUrl, u'config url') + configUrl = compat_urllib_parse.unquote(configUrl) + + configJSON = self._download_webpage(configUrl, videoId, + u'Downloading configuration', + u'unable to download configuration') + + # Technically, it's JavaScript, not JSON + configJSON = configJSON.replace("'", '"') + + try: + config = json.loads(configJSON) + except (ValueError,) as err: + raise ExtractorError(u'Invalid JSON in configuration file: ' + compat_str(err)) + + playlist = config['playlist'] + videoUrl = playlist[1]['url'] + + info = { + 'id': videoId, + 'url': videoUrl, + 'uploader': showName, + 'upload_date': None, + 'title': title, + 'ext': 'mp4', + 'thumbnail': imgUrl, + 'description': videoDesc, + 'player_url': playerUrl, + } + + return [info] |