diff options
author | Philipp Hagemeister <phihag@phihag.de> | 2014-08-24 03:44:54 +0200 |
---|---|---|
committer | Philipp Hagemeister <phihag@phihag.de> | 2014-08-24 04:14:02 +0200 |
commit | e5402ac1203c8fd0c438ad515bbd80eb288b3907 (patch) | |
tree | 91a3fc1eb82c4fc60b472d8e7d821908c0a9d81a | |
parent | f56f8399c7a222dd07cbf09b02c212509f2ad805 (diff) |
[wayofthemaster] Add extractor (Fixes #3575)
-rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/wayofthemaster.py | 53 |
2 files changed, 54 insertions, 0 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 0d7824023..52354e6e0 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -390,6 +390,7 @@ from .vuclip import VuClipIE from .vulture import VultureIE from .washingtonpost import WashingtonPostIE from .wat import WatIE +from .wayofthemaster import WayOfTheMasterIE from .wdr import ( WDRIE, WDRMobileIE, diff --git a/youtube_dl/extractor/wayofthemaster.py b/youtube_dl/extractor/wayofthemaster.py new file mode 100644 index 000000000..c0723b8eb --- /dev/null +++ b/youtube_dl/extractor/wayofthemaster.py @@ -0,0 +1,53 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import determine_ext + + +class WayOfTheMasterIE(InfoExtractor): + _VALID_URL = r'https?://www\.wayofthemaster\.com/([^/?#]*/)*(?P<id>[^/?#]+)\.s?html(?:$|[?#])' + + _TEST = { + 'url': 'http://www.wayofthemaster.com/hbks.shtml', + 'md5': '5316b57487ada8480606a93cb3d18d24', + 'info_dict': { + 'id': 'hbks', + 'ext': 'mp4', + 'title': 'Intelligent Design vs. Evolution', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + + title = self._search_regex( + r'<img src="images/title_[^"]+".*?alt="([^"]+)"', + webpage, 'title', default=None) + if title is None: + title = self._html_search_regex( + r'<title>(.*?)</title>', webpage, 'page title') + + url_base = self._search_regex( + r'<param\s+name="?movie"?\s+value=".*?/wotm_videoplayer_highlow[0-9]*\.swf\?vid=([^"]+)"', + webpage, 'URL base') + formats = [{ + 'format_id': 'low', + 'quality': 1, + 'url': url_base + '_low.mp4', + }, { + 'format_id': 'high', + 'quality': 2, + 'url': url_base + '_high.mp4', + }] + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + } |