From 56c7366547462ecec0536df58971249a8a870ddd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
 <jaime.marquinez.ferrandiz@gmail.com>
Date: Mon, 8 Jul 2013 15:14:27 +0200
Subject: YoutubeIE: reuse instances of InfoExtractors (closes #998)

When a IE is added to the list, it's also added to a dictionary. When a IE is requested it first looks in the dictionary and if there's no instance it will create a new one.

That way _real_initialize is only called once for each IE, saving time if it needs to login for example.
---
 youtube_dl/extractor/common.py | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'youtube_dl/extractor/common.py')
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 1d98222ce..236c7b12c 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -106,6 +106,11 @@ class InfoExtractor(object):
         """Real extraction process. Redefine in subclasses."""
         pass
 
+    @classmethod
+    def ie_key(cls):
+        """A string for getting the InfoExtractor with get_info_extractor"""
+        return cls.__name__[:-2]
+
     @property
     def IE_NAME(self):
         return type(self).__name__[:-2]
-- 
cgit v1.2.3


From f143d86ad2fc0633d8e2da598cf21e73ff0f2872 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Wed, 28 Aug 2013 13:59:08 +0200
Subject: [sohu] Handle encoding, and fix tests

---
 youtube_dl/extractor/common.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 77a13aea5..a2986cebe 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -145,12 +145,17 @@ class InfoExtractor(object):
 
         urlh = self._request_webpage(url_or_request, video_id, note, errnote)
         content_type = urlh.headers.get('Content-Type', '')
+        webpage_bytes = urlh.read()
         m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
         if m:
             encoding = m.group(1)
         else:
-            encoding = 'utf-8'
-        webpage_bytes = urlh.read()
+            m = re.search(br'<meta[^>]+charset="?([^"]+)[ /">]',
+                          webpage_bytes[:1024])
+            if m:
+                encoding = m.group(1).decode('ascii')
+            else:
+                encoding = 'utf-8'
         if self._downloader.params.get('dump_intermediate_pages', False):
             try:
                 url = url_or_request.get_full_url()
-- 
cgit v1.2.3


From 0d75ae2ce313c5738b2bdd9602ab3cc15e78810d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
 <jaime.marquinez.ferrandiz@gmail.com>
Date: Thu, 29 Aug 2013 11:35:15 +0200
Subject: Fix detection of the webpage charset if it's declared using ' instead
 of "

Like in "<meta charset='utf-8'/>"
---
 youtube_dl/extractor/common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'youtube_dl/extractor/common.py')

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index a2986cebe..77726ee24 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -150,7 +150,7 @@ class InfoExtractor(object):
         if m:
             encoding = m.group(1)
         else:
-            m = re.search(br'<meta[^>]+charset="?([^"]+)[ /">]',
+            m = re.search(br'<meta[^>]+charset=[\'"]?([^\'")]+)[ /\'">]',
                           webpage_bytes[:1024])
             if m:
                 encoding = m.group(1).decode('ascii')
-- 
cgit v1.2.3