1 files changed, 0 insertions, 116 deletions
diff --git a/libraries/libuchardet/uchardet-0.0.5-fix-ASCII-detection.patch b/libraries/libuchardet/uchardet-0.0.5-fix-ASCII-detection.patch
deleted file mode 100644
index c82aee866ebc8..0000000000000
--- a/libraries/libuchardet/uchardet-0.0.5-fix-ASCII-detection.patch
+++ /dev/null
@@ -1,116 +0,0 @@
-commit 4c8316f9cfda38d75fb015c0eb40e0eebb03d28f
-Author: Jehan <jehan@girinstud.io>
-Date:   Sat Dec 5 21:04:20 2015 +0100
-
-    Nearly-ASCII text with NBSP is still not ASCII.
-    
-    There is no "exception" in encoding. The non-breaking space 0xA0 is not
-    ASCII, and therefore returning "ASCII" will later create issues (for
-    instance trying to re-encode with iconv produces an error).
-    This was obviously an explicit decision in original code (according to
-    code comments), probably tied to specifity of the original program from
-    Mozilla. Now we want strict detection.
-    I will return "ISO-8859-1" for "nearly-ASCII texts with NBSP as only
-    exception" (note that I could have returned any ISO-8859 charsets since
-    they all have this character in common).
-
-diff --git a/src/nsUniversalDetector.cpp b/src/nsUniversalDetector.cpp
-index ab8bae0..ff06b9d 100644
---- a/src/nsUniversalDetector.cpp
-+++ b/src/nsUniversalDetector.cpp
-@@ -47,6 +47,7 @@
-
- nsUniversalDetector::nsUniversalDetector(PRUint32 aLanguageFilter)
- {
-+  mNbspFound = PR_FALSE;
-   mDone = PR_FALSE;
-   mBestGuess = -1;   //illegal value as signal
-   mInTag = PR_FALSE;
-@@ -75,6 +76,7 @@ nsUniversalDetector::~nsUniversalDetector()
- void
- nsUniversalDetector::Reset()
- {
-+  mNbspFound = PR_FALSE;
-   mDone = PR_FALSE;
-   mBestGuess = -1;   //illegal value as signal
-   mInTag = PR_FALSE;
-@@ -162,9 +164,10 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
-   PRUint32 i;
-   for (i = 0; i < aLen; i++)
-   {
--    /* Other than 0xA0, if every other character is ASCII, the page is ASCII.
-+    /* If every other character is ASCII or 0xA0, we don't run charset
-+     * probers.
-      * 0xA0 (NBSP in a few charset) is apparently a rare exception
--     * of non-ASCII character contained in ASCII text. */
-+     * of non-ASCII character often contained in nearly-ASCII text. */
-     if (aBuf[i] & '\x80' && aBuf[i] != '\xA0')
-     {
-       /* We got a non-ASCII byte (high-byte) */
-@@ -203,11 +206,19 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
-     }
-     else
-     {
--      //ok, just pure ascii so far
--      if ( ePureAscii == mInputState &&
--        (aBuf[i] == '\033' || (aBuf[i] == '{' && mLastChar == '~')) )
-+      /* Just pure ASCII or NBSP so far. */
-+      if (aBuf[i] == '\xA0')
-       {
--        //found escape character or HZ "~{"
-+        /* ASCII with the only exception of NBSP seems quite common.
-+         * I doubt it is really necessary to train a model here, so let's
-+         * just make an exception.
-+         */
-+          mNbspFound = PR_TRUE;
-+      }
-+      else if (mInputState == ePureAscii &&
-+               (aBuf[i] == '\033' || (aBuf[i] == '{' && mLastChar == '~')))
-+      {
-+        /* We found an escape character or HZ "~{". */
-         mInputState = eEscAscii;
-       }
-       mLastChar = aBuf[i];
-@@ -229,6 +240,10 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
-       mDone = PR_TRUE;
-       mDetectedCharset = mEscCharSetProber->GetCharSetName();
-     }
-+    else if (mNbspFound)
-+    {
-+      mDetectedCharset = "ISO-8859-1";
-+    }
-     else
-     {
-       /* ASCII with the ESC character (or the sequence "~{") is still
-@@ -253,8 +268,17 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
-     break;
-
-   default:
--    /* Pure ASCII */
--    mDetectedCharset = "ASCII";
-+    if (mNbspFound)
-+    {
-+      /* ISO-8859-1 is a good result candidate for ASCII + NBSP.
-+       * (though it could have been any ISO-8859 encoding). */
-+      mDetectedCharset = "ISO-8859-1";
-+    }
-+    else
-+    {
-+      /* Pure ASCII */
-+      mDetectedCharset = "ASCII";
-+    }
-     break;
-   }
-   return NS_OK;
-diff --git a/src/nsUniversalDetector.h b/src/nsUniversalDetector.h
-index 4d9b460..9f0a4b1 100644
---- a/src/nsUniversalDetector.h
-+++ b/src/nsUniversalDetector.h
-@@ -72,6 +72,7 @@ protected:
-    virtual void Report(const char* aCharset) = 0;
-    virtual void Reset();
-    nsInputState  mInputState;
-+   PRBool  mNbspFound;
-    PRBool  mDone;
-    PRBool  mInTag;
-    PRBool  mStart;