aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/sohu.py
diff options
context:
space:
mode:
authorhuohuarong <huohuarong@gmail.com>2013-08-03 10:29:58 +0800
committerhuohuarong <huohuarong@gmail.com>2013-08-03 10:29:58 +0800
commit4ec929dc9b55a2588b4a27e64871c5bfa900bf37 (patch)
treebdee9a28442f183a2a1c5c8c6ff561d536033afa /youtube_dl/extractor/sohu.py
parent6624a2b07dafad4de895b4e84f4595214817518d (diff)
use ..utils/clean_html()
Diffstat (limited to 'youtube_dl/extractor/sohu.py')
-rw-r--r--youtube_dl/extractor/sohu.py19
1 files changed, 6 insertions, 13 deletions
diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py
index 830814221..cf0ab5478 100644
--- a/youtube_dl/extractor/sohu.py
+++ b/youtube_dl/extractor/sohu.py
@@ -7,7 +7,7 @@ import logging
import urllib2
from .common import InfoExtractor
-from ..utils import compat_urllib_request
+from ..utils import compat_urllib_request, clean_html
class SohuIE(InfoExtractor):
@@ -22,16 +22,6 @@ class SohuIE(InfoExtractor):
},
}
- def _clearn_html(self, string):
- tags = re.findall(r'<.+?>', string)
- for t in tags:
- string = string.replace(t, ' ')
- for i in range(2):
- spaces = re.findall(r'\s+', string)
- for s in spaces:
- string = string.replace(s, ' ')
- string = string.strip()
- return string
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -40,7 +30,7 @@ class SohuIE(InfoExtractor):
pattern = r'<h1 id="video-title">\n*?(.+?)\n*?</h1>'
compiled = re.compile(pattern, re.DOTALL)
title = self._search_regex(compiled, webpage, u'video title').strip('\t\n')
- title = self._clearn_html(title)
+ title = clean_html(title)
pattern = re.compile(r'var vid="(\d+)"')
result = re.search(pattern, webpage)
if not result:
@@ -93,5 +83,8 @@ class SohuIE(InfoExtractor):
}
files_info.append(info)
time.sleep(1)
-
+ if num_of_parts == 1:
+ info = files_info[0]
+ info['id'] = video_id
+ return info
return files_info