aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2016-01-01 02:23:03 +0600
committerSergey M․ <dstftw@gmail.com>2016-01-01 02:23:03 +0600
commitb26afec81f408af37cca6298109ca2a59688bf79 (patch)
tree8c79485efe97342dde704749b50a7cf866bb0bac /youtube_dl/extractor
parent8f7c4f7d2ec712e890519aa7133b0d6dc5ee778b (diff)
[einthusan] Improve extraction (Closes #7877)
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/einthusan.py34
1 files changed, 20 insertions, 14 deletions
diff --git a/youtube_dl/extractor/einthusan.py b/youtube_dl/extractor/einthusan.py
index bc6def65e..f7339702c 100644
--- a/youtube_dl/extractor/einthusan.py
+++ b/youtube_dl/extractor/einthusan.py
@@ -1,9 +1,12 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+ remove_start,
+ sanitized_Request,
+)
class EinthusanIE(InfoExtractor):
@@ -34,30 +37,33 @@ class EinthusanIE(InfoExtractor):
]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- webpage = self._download_webpage(url, video_id)
+ video_id = self._match_id(url)
+
+ request = sanitized_Request(url)
+ request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0')
+ webpage = self._download_webpage(request, video_id)
- video_title = self._html_search_regex(
- r'<h1><a class="movie-title".*?>(.*?)</a></h1>', webpage, 'title')
+ title = self._html_search_regex(
+ r'<h1><a[^>]+class=["\']movie-title["\'][^>]*>(.+?)</a></h1>',
+ webpage, 'title')
- movieid = self._html_search_regex(
- r'data-movieid="(.*?)"', webpage, 'movieid')
+ video_id = self._search_regex(
+ r'data-movieid=["\'](\d+)', webpage, 'video id', default=video_id)
- location = 'Washington'
- geturl = 'http://cdn.einthusan.com/geturl/%s/hd/%s' % (movieid, location)
- video_url = self._download_webpage(geturl, video_id)
+ video_url = self._download_webpage(
+ 'http://cdn.einthusan.com/geturl/%s/hd/London,Washington,Toronto,Dallas,San,Sydney/'
+ % video_id, video_id)
description = self._html_search_meta('description', webpage)
thumbnail = self._html_search_regex(
r'''<a class="movie-cover-wrapper".*?><img src=["'](.*?)["'].*?/></a>''',
webpage, "thumbnail url", fatal=False)
if thumbnail is not None:
- thumbnail = thumbnail.replace('..', 'http://www.einthusan.com')
+ thumbnail = compat_urlparse.urljoin(url, remove_start(thumbnail, '..'))
return {
'id': video_id,
- 'title': video_title,
+ 'title': title,
'url': video_url,
'thumbnail': thumbnail,
'description': description,