aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2018-09-03 02:53:26 +0700
committerSergey M․ <dstftw@gmail.com>2018-09-03 02:53:51 +0700
commit93284ff2ea4eb84c25b8d496012398a056ba89ac (patch)
tree67436e856e0c35e71cc3b46540d65225a017184e
parent0a9a8118ce834c206434df04c18187934acbf608 (diff)
downloadyoutube-dl-93284ff2ea4eb84c25b8d496012398a056ba89ac.tar.xz
[radiojavan] Improve extraction (closes #17151)
-rw-r--r--youtube_dl/extractor/radiojavan.py30
1 files changed, 17 insertions, 13 deletions
diff --git a/youtube_dl/extractor/radiojavan.py b/youtube_dl/extractor/radiojavan.py
index 4124bcd45..3f74f0c01 100644
--- a/youtube_dl/extractor/radiojavan.py
+++ b/youtube_dl/extractor/radiojavan.py
@@ -4,15 +4,16 @@ import re
from .common import InfoExtractor
from ..utils import (
- unified_strdate,
+ parse_resolution,
str_to_int,
+ unified_strdate,
urlencode_postdata,
+ urljoin,
)
class RadioJavanIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?radiojavan\.com/videos/video/(?P<id>[^/]+)/?'
- _HOST_TRACKER_URL = 'https://www.radiojavan.com/videos/video_host'
_TEST = {
'url': 'http://www.radiojavan.com/videos/video/chaartaar-ashoobam',
'md5': 'e85208ffa3ca8b83534fca9fe19af95b',
@@ -31,23 +32,26 @@ class RadioJavanIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
download_host = self._download_json(
- self._HOST_TRACKER_URL,
- video_id,
+ 'https://www.radiojavan.com/videos/video_host', video_id,
data=urlencode_postdata({'id': video_id}),
headers={
'Content-Type': 'application/x-www-form-urlencoded',
'Referer': url,
- }
- )['host']
+ }).get('host', 'https://host1.rjmusicmedia.com')
+
+ webpage = self._download_webpage(url, video_id)
- formats = [{
- 'url': '%s/%s' % (download_host, video_path),
- 'format_id': '%sp' % height,
- 'height': int(height),
- } for height, video_path in re.findall(r"RJ\.video(\d+)p\s*=\s*'/?([^']+)'", webpage)]
+ formats = []
+ for format_id, _, video_path in re.findall(
+ r'RJ\.video(?P<format_id>\d+[pPkK])\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2',
+ webpage):
+ f = parse_resolution(format_id)
+ f.update({
+ 'url': urljoin(download_host, video_path),
+ 'format_id': format_id,
+ })
+ formats.append(f)
self._sort_formats(formats)
title = self._og_search_title(webpage)