aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/sohu.py
diff options
context:
space:
mode:
authorhuohuarong <huohuarong@gmail.com>2013-08-05 22:51:54 +0800
committerhuohuarong <huohuarong@gmail.com>2013-08-05 22:51:54 +0800
commitb5a6d408181c118bf51382f486a2492643ed74ec (patch)
tree1c4d59369ee1085d5274fda9658b142e536c5cd2 /youtube_dl/extractor/sohu.py
parent4ec929dc9b55a2588b4a27e64871c5bfa900bf37 (diff)
downloadyoutube-dl-b5a6d408181c118bf51382f486a2492643ed74ec.tar.xz
fix parse title bug
Diffstat (limited to 'youtube_dl/extractor/sohu.py')
-rw-r--r--youtube_dl/extractor/sohu.py9
1 files changed, 5 insertions, 4 deletions
diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py
index cf0ab5478..cd049b6f0 100644
--- a/youtube_dl/extractor/sohu.py
+++ b/youtube_dl/extractor/sohu.py
@@ -27,10 +27,10 @@ class SohuIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
- pattern = r'<h1 id="video-title">\n*?(.+?)\n*?</h1>'
+ pattern = r'<title>(.+?)</title>'
compiled = re.compile(pattern, re.DOTALL)
- title = self._search_regex(compiled, webpage, u'video title').strip('\t\n')
- title = clean_html(title)
+ title = self._search_regex(compiled, webpage, u'video title')
+ title = clean_html(title).split('-')[0].strip()
pattern = re.compile(r'var vid="(\d+)"')
result = re.search(pattern, webpage)
if not result:
@@ -41,7 +41,8 @@ class SohuIE(InfoExtractor):
base_url_1 = 'http://hot.vrs.sohu.com/vrs_flash.action?vid='
url_1 = base_url_1 + vid
logging.info('json url: %s' % url_1)
- json_1 = json.loads(urllib2.urlopen(url_1).read())
+ webpage = self._download_webpage(url_1, vid)
+ json_1 = json.loads(webpage)
# get the highest definition video vid and json infomation.
vids = []
qualities = ('oriVid', 'superVid', 'highVid', 'norVid')