diff options
author | Pierre Rudloff <contact@rudloff.pro> | 2013-08-31 00:37:29 +0200 |
---|---|---|
committer | Pierre Rudloff <contact@rudloff.pro> | 2013-08-31 00:37:29 +0200 |
commit | 847f582290c6ad6ec0c72760ea3cfa6417d28e3c (patch) | |
tree | c12baa61c649ec00e5fdfe35b1f88f0025d54f99 /youtube_dl/extractor/common.py | |
parent | cd9c100963e8b8bf651d1f359e5f7812603ca0fd (diff) | |
parent | 10f5c016ec6262e5d29327e97fe4f3d1127ccdff (diff) |
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'youtube_dl/extractor/common.py')
-rw-r--r-- | youtube_dl/extractor/common.py | 14 |
1 files changed, 12 insertions, 2 deletions
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 12169b2bb..77726ee24 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -114,6 +114,11 @@ class InfoExtractor(object): """Real extraction process. Redefine in subclasses.""" pass + @classmethod + def ie_key(cls): + """A string for getting the InfoExtractor with get_info_extractor""" + return cls.__name__[:-2] + @property def IE_NAME(self): return type(self).__name__[:-2] @@ -140,12 +145,17 @@ class InfoExtractor(object): urlh = self._request_webpage(url_or_request, video_id, note, errnote) content_type = urlh.headers.get('Content-Type', '') + webpage_bytes = urlh.read() m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type) if m: encoding = m.group(1) else: - encoding = 'utf-8' - webpage_bytes = urlh.read() + m = re.search(br'<meta[^>]+charset=[\'"]?([^\'")]+)[ /\'">]', + webpage_bytes[:1024]) + if m: + encoding = m.group(1).decode('ascii') + else: + encoding = 'utf-8' if self._downloader.params.get('dump_intermediate_pages', False): try: url = url_or_request.get_full_url() |