diff options
author | huohuarong <huohuarong@gmail.com> | 2013-08-05 22:51:54 +0800 |
---|---|---|
committer | huohuarong <huohuarong@gmail.com> | 2013-08-05 22:51:54 +0800 |
commit | b5a6d408181c118bf51382f486a2492643ed74ec (patch) | |
tree | 1c4d59369ee1085d5274fda9658b142e536c5cd2 | |
parent | 4ec929dc9b55a2588b4a27e64871c5bfa900bf37 (diff) |
fix parse title bug
-rw-r--r-- | youtube_dl/extractor/sohu.py | 9 |
1 files changed, 5 insertions, 4 deletions
diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py index cf0ab5478..cd049b6f0 100644 --- a/youtube_dl/extractor/sohu.py +++ b/youtube_dl/extractor/sohu.py @@ -27,10 +27,10 @@ class SohuIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) - pattern = r'<h1 id="video-title">\n*?(.+?)\n*?</h1>' + pattern = r'<title>(.+?)</title>' compiled = re.compile(pattern, re.DOTALL) - title = self._search_regex(compiled, webpage, u'video title').strip('\t\n') - title = clean_html(title) + title = self._search_regex(compiled, webpage, u'video title') + title = clean_html(title).split('-')[0].strip() pattern = re.compile(r'var vid="(\d+)"') result = re.search(pattern, webpage) if not result: @@ -41,7 +41,8 @@ class SohuIE(InfoExtractor): base_url_1 = 'http://hot.vrs.sohu.com/vrs_flash.action?vid=' url_1 = base_url_1 + vid logging.info('json url: %s' % url_1) - json_1 = json.loads(urllib2.urlopen(url_1).read()) + webpage = self._download_webpage(url_1, vid) + json_1 = json.loads(webpage) # get the highest definition video vid and json infomation. vids = [] qualities = ('oriVid', 'superVid', 'highVid', 'norVid') |