From 20ff802c9bcff954ac10b10e9254078f308f037d Mon Sep 17 00:00:00 2001 From: naglis Date: Sun, 14 Sep 2014 00:12:36 +0300 Subject: [cloudy] Add new extractor. Closes #3743 --- youtube_dl/extractor/cloudy.py | 67 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 youtube_dl/extractor/cloudy.py (limited to 'youtube_dl/extractor/cloudy.py') diff --git a/youtube_dl/extractor/cloudy.py b/youtube_dl/extractor/cloudy.py new file mode 100644 index 000000000..73c6e3d49 --- /dev/null +++ b/youtube_dl/extractor/cloudy.py @@ -0,0 +1,67 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + compat_parse_qs, + compat_urllib_parse, +) + + +class CloudyIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?://(?:www\.)?cloudy\.ec/ + (?:v/|embed\.php\?id=) + (?P[A-Za-z0-9]+) + ''' + _API_URL = 'http://www.cloudy.ec/api/player.api.php?%s' + _TEST = { + 'url': 'https://www.cloudy.ec/v/af511e2527aac', + 'md5': '5cb253ace826a42f35b4740539bedf07', + 'info_dict': { + 'id': 'af511e2527aac', + 'ext': 'flv', + 'title': 'Funny Cats and Animals Compilation june 2013', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + url = 'http://www.cloudy.ec/embed.php?id=%s' % video_id + webpage = self._download_webpage(url, video_id) + + file_key = self._search_regex( + r'filekey\s*=\s*"([^"]+)"', webpage, 'file_key') + data_url = self._API_URL % compat_urllib_parse.urlencode({ + 'file': video_id, + 'key': file_key, + }) + player_data = self._download_webpage( + data_url, video_id, 'Downloading player data') + data = compat_parse_qs(player_data) + + if 'error' in data: + raise ExtractorError( + '%s error: %s' % (self.IE_NAME, ' '.join(data['error_msg'])), + expected=True) + + title = data.get('title', [None])[0] + if title: + title = title.replace('&asdasdas', '').strip() + + formats = [] + formats.append({ + 'format_id': 'sd', + 'url': data.get('url', [None])[0], + }) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + } -- cgit v1.2.3 From 5dbf3b5c60e99585697cea95a34aa8fd6c109827 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 14 Sep 2014 05:01:25 +0700 Subject: [cloudy] Minor changes --- youtube_dl/extractor/cloudy.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'youtube_dl/extractor/cloudy.py') diff --git a/youtube_dl/extractor/cloudy.py b/youtube_dl/extractor/cloudy.py index 73c6e3d49..95eda2e58 100644 --- a/youtube_dl/extractor/cloudy.py +++ b/youtube_dl/extractor/cloudy.py @@ -8,6 +8,7 @@ from ..utils import ( ExtractorError, compat_parse_qs, compat_urllib_parse, + remove_end, ) @@ -52,13 +53,15 @@ class CloudyIE(InfoExtractor): title = data.get('title', [None])[0] if title: - title = title.replace('&asdasdas', '').strip() + title = remove_end(title, '&asdasdas').strip() formats = [] - formats.append({ - 'format_id': 'sd', - 'url': data.get('url', [None])[0], - }) + video_url = data.get('url', [None])[0] + if video_url: + formats.append({ + 'format_id': 'sd', + 'url': video_url, + }) return { 'id': video_id, -- cgit v1.2.3 From 98676c08a1c4977f945f8e83c31c227f337176ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 14 Sep 2014 05:17:21 +0700 Subject: [cloudy] Add support for videoraj.ch --- youtube_dl/extractor/cloudy.py | 40 +++++++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 13 deletions(-) (limited to 'youtube_dl/extractor/cloudy.py') diff --git a/youtube_dl/extractor/cloudy.py b/youtube_dl/extractor/cloudy.py index 95eda2e58..bfdfbcfe6 100644 --- a/youtube_dl/extractor/cloudy.py +++ b/youtube_dl/extractor/cloudy.py @@ -13,35 +13,49 @@ from ..utils import ( class CloudyIE(InfoExtractor): + _IE_DESC = 'cloudy.ec and videoraj.ch' _VALID_URL = r'''(?x) - https?://(?:www\.)?cloudy\.ec/ + https?://(?:www\.)?(?Pcloudy\.ec|videoraj\.ch)/ (?:v/|embed\.php\?id=) (?P[A-Za-z0-9]+) ''' - _API_URL = 'http://www.cloudy.ec/api/player.api.php?%s' - _TEST = { - 'url': 'https://www.cloudy.ec/v/af511e2527aac', - 'md5': '5cb253ace826a42f35b4740539bedf07', - 'info_dict': { - 'id': 'af511e2527aac', - 'ext': 'flv', - 'title': 'Funny Cats and Animals Compilation june 2013', + _EMBED_URL = 'http://www.%s/embed.php?id=%s' + _API_URL = 'http://www.%s/api/player.api.php?%s' + _TESTS = [ + { + 'url': 'https://www.cloudy.ec/v/af511e2527aac', + 'md5': '5cb253ace826a42f35b4740539bedf07', + 'info_dict': { + 'id': 'af511e2527aac', + 'ext': 'flv', + 'title': 'Funny Cats and Animals Compilation june 2013', + } + }, + { + 'url': 'http://www.videoraj.ch/v/47f399fd8bb60', + 'md5': '7d0f8799d91efd4eda26587421c3c3b0', + 'info_dict': { + 'id': '47f399fd8bb60', + 'ext': 'flv', + 'title': 'Burning a New iPhone 5 with Gasoline - Will it Survive?', + } } - } + ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) + video_host = mobj.group('host') video_id = mobj.group('id') - url = 'http://www.cloudy.ec/embed.php?id=%s' % video_id + url = self._EMBED_URL % (video_host, video_id) webpage = self._download_webpage(url, video_id) file_key = self._search_regex( r'filekey\s*=\s*"([^"]+)"', webpage, 'file_key') - data_url = self._API_URL % compat_urllib_parse.urlencode({ + data_url = self._API_URL % (video_host, compat_urllib_parse.urlencode({ 'file': video_id, 'key': file_key, - }) + })) player_data = self._download_webpage( data_url, video_id, 'Downloading player data') data = compat_parse_qs(player_data) -- cgit v1.2.3 From eb3bd7ba8db8a31f5262d2d8f335700d1664a1bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 14 Sep 2014 19:04:16 +0700 Subject: [cloudy] Retry extraction on 410 status code (#3743 #3744) --- youtube_dl/extractor/cloudy.py | 56 ++++++++++++++++++++++++++++++------------ 1 file changed, 40 insertions(+), 16 deletions(-) (limited to 'youtube_dl/extractor/cloudy.py') diff --git a/youtube_dl/extractor/cloudy.py b/youtube_dl/extractor/cloudy.py index bfdfbcfe6..386f080d2 100644 --- a/youtube_dl/extractor/cloudy.py +++ b/youtube_dl/extractor/cloudy.py @@ -9,6 +9,8 @@ from ..utils import ( compat_parse_qs, compat_urllib_parse, remove_end, + HEADRequest, + compat_HTTPError, ) @@ -21,6 +23,7 @@ class CloudyIE(InfoExtractor): ''' _EMBED_URL = 'http://www.%s/embed.php?id=%s' _API_URL = 'http://www.%s/api/player.api.php?%s' + _MAX_TRIES = 2 _TESTS = [ { 'url': 'https://www.cloudy.ec/v/af511e2527aac', @@ -42,24 +45,30 @@ class CloudyIE(InfoExtractor): } ] - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_host = mobj.group('host') - video_id = mobj.group('id') + def _extract_video(self, video_host, video_id, file_key, error_url=None, try_num=0): - url = self._EMBED_URL % (video_host, video_id) - webpage = self._download_webpage(url, video_id) + if try_num > self._MAX_TRIES - 1: + raise ExtractorError('Unable to extract video URL', expected=True) - file_key = self._search_regex( - r'filekey\s*=\s*"([^"]+)"', webpage, 'file_key') - data_url = self._API_URL % (video_host, compat_urllib_parse.urlencode({ + form = { 'file': video_id, 'key': file_key, - })) + } + + if error_url: + form.update({ + 'numOfErrors': try_num, + 'errorCode': '404', + 'errorUrl': error_url, + }) + + data_url = self._API_URL % (video_host, compat_urllib_parse.urlencode(form)) player_data = self._download_webpage( data_url, video_id, 'Downloading player data') data = compat_parse_qs(player_data) + try_num += 1 + if 'error' in data: raise ExtractorError( '%s error: %s' % (self.IE_NAME, ' '.join(data['error_msg'])), @@ -69,16 +78,31 @@ class CloudyIE(InfoExtractor): if title: title = remove_end(title, '&asdasdas').strip() - formats = [] video_url = data.get('url', [None])[0] + if video_url: - formats.append({ - 'format_id': 'sd', - 'url': video_url, - }) + try: + self._request_webpage(HEADRequest(video_url), video_id, 'Checking video URL') + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code in [404, 410]: + self.report_warning('Invalid video URL, requesting another', video_id) + return self._extract_video(video_host, video_id, file_key, video_url, try_num) return { 'id': video_id, + 'url': video_url, 'title': title, - 'formats': formats, } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_host = mobj.group('host') + video_id = mobj.group('id') + + url = self._EMBED_URL % (video_host, video_id) + webpage = self._download_webpage(url, video_id) + + file_key = self._search_regex( + r'filekey\s*=\s*"([^"]+)"', webpage, 'file_key') + + return self._extract_video(video_host, video_id, file_key) -- cgit v1.2.3