diff options
author | Karlson2k <k2k@narod.ru> | 2013-09-19 18:30:29 +0400 |
---|---|---|
committer | Karlson2k <k2k@narod.ru> | 2013-09-25 05:45:42 +0400 |
commit | e3138d4d166280677af9856bdd26b5c5d6f3f13c (patch) | |
tree | 59b1b4da3f36ff1d9fc1138fb3d2e9a100615e41 | |
parent | 6d2989ceba881d9a44b32c0cbfa82a9dfa3316de (diff) |
CharsetConverter: add UTF-8 <-> UTF-32 and UTF-32 <-> wide conversion functions
-rw-r--r-- | configure.in | 1 | ||||
-rw-r--r-- | xbmc/utils/CharsetConverter.cpp | 104 | ||||
-rw-r--r-- | xbmc/utils/CharsetConverter.h | 63 |
3 files changed, 158 insertions, 10 deletions
diff --git a/configure.in b/configure.in index 4127c9ddb0..50bdf838ca 100644 --- a/configure.in +++ b/configure.in @@ -920,6 +920,7 @@ fi AC_LANG_PUSH([C++]) AC_CHECK_TYPES([std::u16string, std::u32string], [], [], [[#include <string>]]) AC_CHECK_TYPES([char16_t, char32_t]) +AC_CHECK_SIZEOF([wchar_t]) AC_LANG_POP([C++]) # Add top source directory for all builds so we can use config.h diff --git a/xbmc/utils/CharsetConverter.cpp b/xbmc/utils/CharsetConverter.cpp index 9b098d2fe6..f5957b8e4d 100644 --- a/xbmc/utils/CharsetConverter.cpp +++ b/xbmc/utils/CharsetConverter.cpp @@ -32,30 +32,52 @@ #include <iconv.h> #if defined(TARGET_DARWIN) -#ifdef __POWERPC__ - #define WCHAR_CHARSET "UTF-32BE" -#else - #define WCHAR_CHARSET "UTF-32LE" -#endif + #define WCHAR_IS_UTF32 1 + #undef WCHAR_IS_UTF16 + #ifdef __POWERPC__ + #define WCHAR_CHARSET "UTF-32BE" + #else + #define WCHAR_CHARSET "UTF-32LE" + #endif #define UTF8_SOURCE "UTF-8-MAC" #elif defined(TARGET_WINDOWS) + #undef WCHAR_IS_UTF32 + #define WCHAR_IS_UTF16 1 #define WCHAR_CHARSET "UTF-16LE" #define UTF8_SOURCE "UTF-8" #pragma comment(lib, "libfribidi.lib") #pragma comment(lib, "libiconv.lib") #elif defined(TARGET_ANDROID) + #define WCHAR_IS_UTF32 1 + #undef WCHAR_IS_UTF16 #define UTF8_SOURCE "UTF-8" -#ifdef __BIG_ENDIAN__ - #define WCHAR_CHARSET "UTF-32BE" -#else - #define WCHAR_CHARSET "UTF-32LE" -#endif + #ifdef __BIG_ENDIAN__ + #define WCHAR_CHARSET "UTF-32BE" + #else + #define WCHAR_CHARSET "UTF-32LE" + #endif #else #define WCHAR_CHARSET "WCHAR_T" #define UTF8_SOURCE "UTF-8" + #ifdef HAVE_CONFIG_H + #include "config.h" + #endif // HAVE_CONFIG_H + #undef WCHAR_IS_UTF32 + #undef WCHAR_IS_UTF16 + #ifdef SIZEOF_WCHAR_T + #if SIZEOF_WCHAR_T == 4 + #define WCHAR_IS_UTF32 1 + #elif SIZEOF_WCHAR_T == 2 + #define WCHAR_IS_UTF16 1 + #endif + #endif #endif +static iconv_t m_iconvUtf8ToUtf32 = (iconv_t)-1; +static iconv_t m_iconvUtf32ToUtf8 = (iconv_t)-1; +static iconv_t m_iconvUtf32ToW = (iconv_t)-1; +static iconv_t m_iconvWToUtf32 = (iconv_t)-1; static iconv_t m_iconvSubtitleCharsetToW = (iconv_t)-1; static iconv_t m_iconvUtf8ToStringCharset = (iconv_t)-1; static iconv_t m_iconvStringCharsetToUtf8 = (iconv_t)-1; @@ -424,6 +446,10 @@ void CCharsetConverter::reset(void) { CSingleLock lock(m_critSection); + ICONV_SAFE_CLOSE(m_iconvUtf8ToUtf32); + ICONV_SAFE_CLOSE(m_iconvUtf32ToUtf8); + ICONV_SAFE_CLOSE(m_iconvUtf32ToW); + ICONV_SAFE_CLOSE(m_iconvWToUtf32); ICONV_SAFE_CLOSE(m_iconvUtf8ToStringCharset); ICONV_SAFE_CLOSE(m_iconvStringCharsetToUtf8); ICONV_SAFE_CLOSE(m_iconvSubtitleCharsetToW); @@ -448,6 +474,64 @@ void CCharsetConverter::reset(void) } } +bool CCharsetConverter::utf8ToUtf32(const std::string& utf8StringSrc, std::u32string& utf32StringDst, bool failOnBadChar /*= true*/) +{ + CSingleLock lock(m_critSection); + return convert(m_iconvUtf8ToUtf32, 1, UTF8_SOURCE, "UTF-32", utf8StringSrc, utf32StringDst, failOnBadChar); +} + +bool CCharsetConverter::utf8ToUtf32Visual(const std::string& utf8StringSrc, std::u32string& utf32StringDst, bool bVisualBiDiFlip /*= false*/, bool forceLTRReadingOrder /*= false*/, bool failOnBadChar /*= false*/) +{ + if (bVisualBiDiFlip) + { + std::string strFlipped; + if (!logicalToVisualBiDi(utf8StringSrc, strFlipped, FRIBIDI_UTF8, forceLTRReadingOrder ? FRIBIDI_TYPE_LTR : FRIBIDI_TYPE_PDF)) + return false; + CSingleLock lock(m_critSection); + return convert(m_iconvUtf8ToUtf32, 1, UTF8_SOURCE, "UTF-32", strFlipped, utf32StringDst, failOnBadChar); + } + CSingleLock lock(m_critSection); + return convert(m_iconvUtf8ToUtf32, 1, UTF8_SOURCE, "UTF-32", utf8StringSrc, utf32StringDst, failOnBadChar); +} + +bool CCharsetConverter::utf32ToUtf8(const std::u32string& utf32StringSrc, std::string& utf8StringDst, bool failOnBadChar /*= true*/) +{ + CSingleLock lock(m_critSection); + return convert(m_iconvUtf32ToUtf8, m_Utf8CharMaxSize, "UTF-32", "UTF-8", utf32StringSrc, utf8StringDst, failOnBadChar); +} + +bool CCharsetConverter::utf32ToW(const std::u32string& utf32StringSrc, std::wstring& wStringDst, bool failOnBadChar /*= true*/) +{ +#ifdef WCHAR_IS_UTF32 + wStringDst.assign((const wchar_t*)utf32StringSrc.c_str(), utf32StringSrc.length()); + return true; +#else // !WCHAR_IS_UTF32 + CSingleLock lock(m_critSection); + return convert(m_iconvUtf32ToW, 1, "UTF-32", WCHAR_CHARSET, utf32StringSrc, wStringDst, failOnBadChar); +#endif // !WCHAR_IS_UTF32 +} + +bool CCharsetConverter::utf32logicalToVisualBiDi(const std::u32string& logicalStringSrc, std::u32string& visualStringDst, bool forceLTRReadingOrder /*= false*/) +{ + visualStringDst.clear(); + std::string utf8Str; + if (!utf32ToUtf8(logicalStringSrc, utf8Str, false)) + return false; + + return utf8ToUtf32Visual(utf8Str, visualStringDst, true, forceLTRReadingOrder); +} + +bool CCharsetConverter::wToUtf32(const std::wstring& wStringSrc, std::u32string& utf32StringDst, bool failOnBadChar /*= true*/) +{ +#ifdef WCHAR_IS_UTF32 + utf32StringDst.assign((const char32_t*)wStringSrc.c_str(), wStringSrc.length()); + return true; +#else // !WCHAR_IS_UTF32 + CSingleLock lock(m_critSection); + return convert(m_iconvWToUtf32, 1, WCHAR_CHARSET, "UTF-32", wStringSrc, utf32StringDst, failOnBadChar); +#endif // !WCHAR_IS_UTF32 +} + // The bVisualBiDiFlip forces a flip of characters for hebrew/arabic languages, only set to false if the flipping // of the string is already made or the string is not displayed in the GUI bool CCharsetConverter::utf8ToW(const std::string& utf8StringSrc, std::wstring& wStringDst, bool bVisualBiDiFlip /*= true*/, diff --git a/xbmc/utils/CharsetConverter.h b/xbmc/utils/CharsetConverter.h index d347f16681..f6dd178f37 100644 --- a/xbmc/utils/CharsetConverter.h +++ b/xbmc/utils/CharsetConverter.h @@ -42,6 +42,69 @@ public: void clear(); + /** + * Convert UTF-8 string to UTF-32 string. + * No RTL logical-visual transformation is performed. + * @param utf8StringSrc is source UTF-8 string to convert + * @param utf32StringDst is output UTF-32 string, empty on any error + * @param failOnBadChar if set to true function will fail on invalid character, + * otherwise invalid character will be skipped + * @return true on successful conversion, false on any error + */ + bool utf8ToUtf32(const std::string& utf8StringSrc, std::u32string& utf32StringDst, bool failOnBadChar = true); + /** + * Convert UTF-8 string to UTF-32 string. + * RTL logical-visual transformation is optionally performed. + * Use it for readable text, GUI strings etc. + * @param utf8StringSrc is source UTF-8 string to convert + * @param utf32StringDst is output UTF-32 string, empty on any error + * @param bVisualBiDiFlip allow RTL visual-logical transformation if set to true, must be set + * to false is logical-visual transformation is already done + * @param forceLTRReadingOrder force LTR reading order + * @param failOnBadChar if set to true function will fail on invalid character, + * otherwise invalid character will be skipped + * @return true on successful conversion, false on any error + */ + bool utf8ToUtf32Visual(const std::string& utf8StringSrc, std::u32string& utf32StringDst, bool bVisualBiDiFlip = false, bool forceLTRReadingOrder = false, bool failOnBadChar = false); + /** + * Convert UTF-32 string to UTF-8 string. + * No RTL visual-logical transformation is performed. + * @param utf32StringSrc is source UTF-32 string to convert + * @param utf8StringDst is output UTF-8 string, empty on any error + * @param failOnBadChar if set to true function will fail on invalid character, + * otherwise invalid character will be skipped + * @return true on successful conversion, false on any error + */ + bool utf32ToUtf8(const std::u32string& utf32StringSrc, std::string& utf8StringDst, bool failOnBadChar = false); + /** + * Convert UTF-32 string to wchar_t string (wstring). + * No RTL visual-logical transformation is performed. + * @param utf32StringSrc is source UTF-32 string to convert + * @param wStringDst is output wchar_t string, empty on any error + * @param failOnBadChar if set to true function will fail on invalid character, + * otherwise invalid character will be skipped + * @return true on successful conversion, false on any error + */ + bool utf32ToW(const std::u32string& utf32StringSrc, std::wstring& wStringDst, bool failOnBadChar = false); + /** + * Perform logical to visual flip. + * @param logicalStringSrc is source string with logical characters order + * @param visualStringDst is output string with visual characters order, empty on any error + * @param forceLTRReadingOrder force LTR reading order + * @return true on success, false otherwise + */ + bool utf32logicalToVisualBiDi(const std::u32string& logicalStringSrc, std::u32string& visualStringDst, bool forceLTRReadingOrder = false); + /** + * Strictly convert wchar_t string (wstring) to UTF-32 string. + * No RTL visual-logical transformation is performed. + * @param wStringSrc is source wchar_t string to convert + * @param utf32StringDst is output UTF-32 string, empty on any error + * @param failOnBadChar if set to true function will fail on invalid character, + * otherwise invalid character will be skipped + * @return true on successful conversion, false on any error + */ + bool wToUtf32(const std::wstring& wStringSrc, std::u32string& utf32StringDst, bool failOnBadChar = false); + bool utf8ToW(const std::string& utf8StringSrc, std::wstring& wStringDst, bool bVisualBiDiFlip = true, bool forceLTRReadingOrder = false, bool failOnBadChar = false, bool* bWasFlipped = NULL); |