aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarlson2k <k2k@narod.ru>2013-09-19 18:30:29 +0400
committerKarlson2k <k2k@narod.ru>2013-09-25 05:45:42 +0400
commite3138d4d166280677af9856bdd26b5c5d6f3f13c (patch)
tree59b1b4da3f36ff1d9fc1138fb3d2e9a100615e41
parent6d2989ceba881d9a44b32c0cbfa82a9dfa3316de (diff)
CharsetConverter: add UTF-8 <-> UTF-32 and UTF-32 <-> wide conversion functions
-rw-r--r--configure.in1
-rw-r--r--xbmc/utils/CharsetConverter.cpp104
-rw-r--r--xbmc/utils/CharsetConverter.h63
3 files changed, 158 insertions, 10 deletions
diff --git a/configure.in b/configure.in
index 4127c9ddb0..50bdf838ca 100644
--- a/configure.in
+++ b/configure.in
@@ -920,6 +920,7 @@ fi
AC_LANG_PUSH([C++])
AC_CHECK_TYPES([std::u16string, std::u32string], [], [], [[#include <string>]])
AC_CHECK_TYPES([char16_t, char32_t])
+AC_CHECK_SIZEOF([wchar_t])
AC_LANG_POP([C++])
# Add top source directory for all builds so we can use config.h
diff --git a/xbmc/utils/CharsetConverter.cpp b/xbmc/utils/CharsetConverter.cpp
index 9b098d2fe6..f5957b8e4d 100644
--- a/xbmc/utils/CharsetConverter.cpp
+++ b/xbmc/utils/CharsetConverter.cpp
@@ -32,30 +32,52 @@
#include <iconv.h>
#if defined(TARGET_DARWIN)
-#ifdef __POWERPC__
- #define WCHAR_CHARSET "UTF-32BE"
-#else
- #define WCHAR_CHARSET "UTF-32LE"
-#endif
+ #define WCHAR_IS_UTF32 1
+ #undef WCHAR_IS_UTF16
+ #ifdef __POWERPC__
+ #define WCHAR_CHARSET "UTF-32BE"
+ #else
+ #define WCHAR_CHARSET "UTF-32LE"
+ #endif
#define UTF8_SOURCE "UTF-8-MAC"
#elif defined(TARGET_WINDOWS)
+ #undef WCHAR_IS_UTF32
+ #define WCHAR_IS_UTF16 1
#define WCHAR_CHARSET "UTF-16LE"
#define UTF8_SOURCE "UTF-8"
#pragma comment(lib, "libfribidi.lib")
#pragma comment(lib, "libiconv.lib")
#elif defined(TARGET_ANDROID)
+ #define WCHAR_IS_UTF32 1
+ #undef WCHAR_IS_UTF16
#define UTF8_SOURCE "UTF-8"
-#ifdef __BIG_ENDIAN__
- #define WCHAR_CHARSET "UTF-32BE"
-#else
- #define WCHAR_CHARSET "UTF-32LE"
-#endif
+ #ifdef __BIG_ENDIAN__
+ #define WCHAR_CHARSET "UTF-32BE"
+ #else
+ #define WCHAR_CHARSET "UTF-32LE"
+ #endif
#else
#define WCHAR_CHARSET "WCHAR_T"
#define UTF8_SOURCE "UTF-8"
+ #ifdef HAVE_CONFIG_H
+ #include "config.h"
+ #endif // HAVE_CONFIG_H
+ #undef WCHAR_IS_UTF32
+ #undef WCHAR_IS_UTF16
+ #ifdef SIZEOF_WCHAR_T
+ #if SIZEOF_WCHAR_T == 4
+ #define WCHAR_IS_UTF32 1
+ #elif SIZEOF_WCHAR_T == 2
+ #define WCHAR_IS_UTF16 1
+ #endif
+ #endif
#endif
+static iconv_t m_iconvUtf8ToUtf32 = (iconv_t)-1;
+static iconv_t m_iconvUtf32ToUtf8 = (iconv_t)-1;
+static iconv_t m_iconvUtf32ToW = (iconv_t)-1;
+static iconv_t m_iconvWToUtf32 = (iconv_t)-1;
static iconv_t m_iconvSubtitleCharsetToW = (iconv_t)-1;
static iconv_t m_iconvUtf8ToStringCharset = (iconv_t)-1;
static iconv_t m_iconvStringCharsetToUtf8 = (iconv_t)-1;
@@ -424,6 +446,10 @@ void CCharsetConverter::reset(void)
{
CSingleLock lock(m_critSection);
+ ICONV_SAFE_CLOSE(m_iconvUtf8ToUtf32);
+ ICONV_SAFE_CLOSE(m_iconvUtf32ToUtf8);
+ ICONV_SAFE_CLOSE(m_iconvUtf32ToW);
+ ICONV_SAFE_CLOSE(m_iconvWToUtf32);
ICONV_SAFE_CLOSE(m_iconvUtf8ToStringCharset);
ICONV_SAFE_CLOSE(m_iconvStringCharsetToUtf8);
ICONV_SAFE_CLOSE(m_iconvSubtitleCharsetToW);
@@ -448,6 +474,64 @@ void CCharsetConverter::reset(void)
}
}
+bool CCharsetConverter::utf8ToUtf32(const std::string& utf8StringSrc, std::u32string& utf32StringDst, bool failOnBadChar /*= true*/)
+{
+ CSingleLock lock(m_critSection);
+ return convert(m_iconvUtf8ToUtf32, 1, UTF8_SOURCE, "UTF-32", utf8StringSrc, utf32StringDst, failOnBadChar);
+}
+
+bool CCharsetConverter::utf8ToUtf32Visual(const std::string& utf8StringSrc, std::u32string& utf32StringDst, bool bVisualBiDiFlip /*= false*/, bool forceLTRReadingOrder /*= false*/, bool failOnBadChar /*= false*/)
+{
+ if (bVisualBiDiFlip)
+ {
+ std::string strFlipped;
+ if (!logicalToVisualBiDi(utf8StringSrc, strFlipped, FRIBIDI_UTF8, forceLTRReadingOrder ? FRIBIDI_TYPE_LTR : FRIBIDI_TYPE_PDF))
+ return false;
+ CSingleLock lock(m_critSection);
+ return convert(m_iconvUtf8ToUtf32, 1, UTF8_SOURCE, "UTF-32", strFlipped, utf32StringDst, failOnBadChar);
+ }
+ CSingleLock lock(m_critSection);
+ return convert(m_iconvUtf8ToUtf32, 1, UTF8_SOURCE, "UTF-32", utf8StringSrc, utf32StringDst, failOnBadChar);
+}
+
+bool CCharsetConverter::utf32ToUtf8(const std::u32string& utf32StringSrc, std::string& utf8StringDst, bool failOnBadChar /*= true*/)
+{
+ CSingleLock lock(m_critSection);
+ return convert(m_iconvUtf32ToUtf8, m_Utf8CharMaxSize, "UTF-32", "UTF-8", utf32StringSrc, utf8StringDst, failOnBadChar);
+}
+
+bool CCharsetConverter::utf32ToW(const std::u32string& utf32StringSrc, std::wstring& wStringDst, bool failOnBadChar /*= true*/)
+{
+#ifdef WCHAR_IS_UTF32
+ wStringDst.assign((const wchar_t*)utf32StringSrc.c_str(), utf32StringSrc.length());
+ return true;
+#else // !WCHAR_IS_UTF32
+ CSingleLock lock(m_critSection);
+ return convert(m_iconvUtf32ToW, 1, "UTF-32", WCHAR_CHARSET, utf32StringSrc, wStringDst, failOnBadChar);
+#endif // !WCHAR_IS_UTF32
+}
+
+bool CCharsetConverter::utf32logicalToVisualBiDi(const std::u32string& logicalStringSrc, std::u32string& visualStringDst, bool forceLTRReadingOrder /*= false*/)
+{
+ visualStringDst.clear();
+ std::string utf8Str;
+ if (!utf32ToUtf8(logicalStringSrc, utf8Str, false))
+ return false;
+
+ return utf8ToUtf32Visual(utf8Str, visualStringDst, true, forceLTRReadingOrder);
+}
+
+bool CCharsetConverter::wToUtf32(const std::wstring& wStringSrc, std::u32string& utf32StringDst, bool failOnBadChar /*= true*/)
+{
+#ifdef WCHAR_IS_UTF32
+ utf32StringDst.assign((const char32_t*)wStringSrc.c_str(), wStringSrc.length());
+ return true;
+#else // !WCHAR_IS_UTF32
+ CSingleLock lock(m_critSection);
+ return convert(m_iconvWToUtf32, 1, WCHAR_CHARSET, "UTF-32", wStringSrc, utf32StringDst, failOnBadChar);
+#endif // !WCHAR_IS_UTF32
+}
+
// The bVisualBiDiFlip forces a flip of characters for hebrew/arabic languages, only set to false if the flipping
// of the string is already made or the string is not displayed in the GUI
bool CCharsetConverter::utf8ToW(const std::string& utf8StringSrc, std::wstring& wStringDst, bool bVisualBiDiFlip /*= true*/,
diff --git a/xbmc/utils/CharsetConverter.h b/xbmc/utils/CharsetConverter.h
index d347f16681..f6dd178f37 100644
--- a/xbmc/utils/CharsetConverter.h
+++ b/xbmc/utils/CharsetConverter.h
@@ -42,6 +42,69 @@ public:
void clear();
+ /**
+ * Convert UTF-8 string to UTF-32 string.
+ * No RTL logical-visual transformation is performed.
+ * @param utf8StringSrc is source UTF-8 string to convert
+ * @param utf32StringDst is output UTF-32 string, empty on any error
+ * @param failOnBadChar if set to true function will fail on invalid character,
+ * otherwise invalid character will be skipped
+ * @return true on successful conversion, false on any error
+ */
+ bool utf8ToUtf32(const std::string& utf8StringSrc, std::u32string& utf32StringDst, bool failOnBadChar = true);
+ /**
+ * Convert UTF-8 string to UTF-32 string.
+ * RTL logical-visual transformation is optionally performed.
+ * Use it for readable text, GUI strings etc.
+ * @param utf8StringSrc is source UTF-8 string to convert
+ * @param utf32StringDst is output UTF-32 string, empty on any error
+ * @param bVisualBiDiFlip allow RTL visual-logical transformation if set to true, must be set
+ * to false is logical-visual transformation is already done
+ * @param forceLTRReadingOrder force LTR reading order
+ * @param failOnBadChar if set to true function will fail on invalid character,
+ * otherwise invalid character will be skipped
+ * @return true on successful conversion, false on any error
+ */
+ bool utf8ToUtf32Visual(const std::string& utf8StringSrc, std::u32string& utf32StringDst, bool bVisualBiDiFlip = false, bool forceLTRReadingOrder = false, bool failOnBadChar = false);
+ /**
+ * Convert UTF-32 string to UTF-8 string.
+ * No RTL visual-logical transformation is performed.
+ * @param utf32StringSrc is source UTF-32 string to convert
+ * @param utf8StringDst is output UTF-8 string, empty on any error
+ * @param failOnBadChar if set to true function will fail on invalid character,
+ * otherwise invalid character will be skipped
+ * @return true on successful conversion, false on any error
+ */
+ bool utf32ToUtf8(const std::u32string& utf32StringSrc, std::string& utf8StringDst, bool failOnBadChar = false);
+ /**
+ * Convert UTF-32 string to wchar_t string (wstring).
+ * No RTL visual-logical transformation is performed.
+ * @param utf32StringSrc is source UTF-32 string to convert
+ * @param wStringDst is output wchar_t string, empty on any error
+ * @param failOnBadChar if set to true function will fail on invalid character,
+ * otherwise invalid character will be skipped
+ * @return true on successful conversion, false on any error
+ */
+ bool utf32ToW(const std::u32string& utf32StringSrc, std::wstring& wStringDst, bool failOnBadChar = false);
+ /**
+ * Perform logical to visual flip.
+ * @param logicalStringSrc is source string with logical characters order
+ * @param visualStringDst is output string with visual characters order, empty on any error
+ * @param forceLTRReadingOrder force LTR reading order
+ * @return true on success, false otherwise
+ */
+ bool utf32logicalToVisualBiDi(const std::u32string& logicalStringSrc, std::u32string& visualStringDst, bool forceLTRReadingOrder = false);
+ /**
+ * Strictly convert wchar_t string (wstring) to UTF-32 string.
+ * No RTL visual-logical transformation is performed.
+ * @param wStringSrc is source wchar_t string to convert
+ * @param utf32StringDst is output UTF-32 string, empty on any error
+ * @param failOnBadChar if set to true function will fail on invalid character,
+ * otherwise invalid character will be skipped
+ * @return true on successful conversion, false on any error
+ */
+ bool wToUtf32(const std::wstring& wStringSrc, std::u32string& utf32StringDst, bool failOnBadChar = false);
+
bool utf8ToW(const std::string& utf8StringSrc, std::wstring& wStringDst,
bool bVisualBiDiFlip = true, bool forceLTRReadingOrder = false,
bool failOnBadChar = false, bool* bWasFlipped = NULL);