From c3c77cec300dd05938dcf175ab5fec536184589a Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 23 Jun 2013 22:14:22 +0200 Subject: [youjizz] move into own file --- youtube_dl/extractor/youjizz.py | 45 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 youtube_dl/extractor/youjizz.py (limited to 'youtube_dl/extractor/youjizz.py') diff --git a/youtube_dl/extractor/youjizz.py b/youtube_dl/extractor/youjizz.py new file mode 100644 index 000000000..d9efac76e --- /dev/null +++ b/youtube_dl/extractor/youjizz.py @@ -0,0 +1,45 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, +) + + +class YouJizzIE(InfoExtractor): + _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P[^.]+).html$' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + + video_id = mobj.group('videoid') + + # Get webpage content + webpage = self._download_webpage(url, video_id) + + # Get the video title + video_title = self._html_search_regex(r'(?P<title>.*)', + webpage, u'title').strip() + + # Get the embed page + result = re.search(r'https?://www.youjizz.com/videos/embed/(?P[0-9]+)', webpage) + if result is None: + raise ExtractorError(u'ERROR: unable to extract embed page') + + embed_page_url = result.group(0).strip() + video_id = result.group('videoid') + + webpage = self._download_webpage(embed_page_url, video_id) + + # Get the video URL + video_url = self._search_regex(r'so.addVariable\("file",encodeURIComponent\("(?P[^"]+)"\)\);', + webpage, u'video URL') + + info = {'id': video_id, + 'url': video_url, + 'title': video_title, + 'ext': 'flv', + 'format': 'flv', + 'player_url': embed_page_url} + + return [info] -- cgit v1.2.3