/*
* Copyright (C) 2005-2013 Team XBMC
* http://xbmc.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with XBMC; see the file COPYING. If not, see
* .
*
*/
#include "HTMLUtil.h"
#include "utils/StringUtils.h"
#include
using namespace std;
using namespace HTML;
CHTMLUtil::CHTMLUtil(void)
{}
CHTMLUtil::~CHTMLUtil(void)
{}
int CHTMLUtil::FindTag(const std::string& strHTML, const std::string& strTag, std::string& strtagFound, int iPos)
{
std::string strHTMLLow = strHTML;
std::string strTagLow = strTag;
StringUtils::ToLower(strHTMLLow);
StringUtils::ToLower(strTagLow);
strtagFound = "";
size_t iStart = strHTMLLow.find(strTag, iPos);
if (iStart == std::string::npos)
return -1;
size_t iEnd = strHTMLLow.find(">", iStart);
if (iEnd == std::string::npos)
iEnd = strHTMLLow.size();
strtagFound = strHTMLLow.substr(iStart, (iEnd + 1) - iStart);
return iStart;
}
int CHTMLUtil::FindClosingTag(const std::string& strHTML, const std::string& strTag, std::string& strtagFound, int iPos)
{
std::string strHTMLLow = strHTML;
std::string strTagLow = strTag;
StringUtils::ToLower(strHTMLLow);
StringUtils::ToLower(strTagLow);
strtagFound = "";
size_t iStart = strHTMLLow.find("" + strTag, iPos);
if (iStart == std::string::npos)
return -1;
size_t iOpenStart = strHTMLLow.find("<" + strTag, iPos);
while (iOpenStart < iStart && iOpenStart != std::string::npos)
{
iStart = strHTMLLow.find("" + strTag, iStart + 1);
iOpenStart = strHTMLLow.find("<" + strTag, iOpenStart + 1);
}
size_t iEnd = strHTMLLow.find(">", iStart);
if (iEnd == std::string::npos)
iEnd = strHTMLLow.size();
strtagFound = strHTMLLow.substr(iStart, (iEnd + 1) - iStart);
return iStart;
}
void CHTMLUtil::getValueOfTag(const std::string& strTagAndValue, std::string& strValue)
{
// strTagAndValue contains:
// like value
strValue = strTagAndValue;
size_t iStart = strTagAndValue.find(">");
size_t iEnd = strTagAndValue.find("<", iStart + 1);
if (iStart != std::string::npos &&
iEnd != std::string::npos)
{
iStart++;
strValue = strTagAndValue.substr(iStart, iEnd - iStart);
}
}
void CHTMLUtil::getAttributeOfTag(const std::string& strTagAndValue, const std::string& strTag, std::string& strValue)
{
// strTagAndValue contains:
// like ')
iEnd++;
if (iStart != std::string::npos && iEnd != std::string::npos)
{
strValue = strTagAndValue.substr(iStart, iEnd - iStart);
}
}
void CHTMLUtil::RemoveTags(std::string& strHTML)
{
int iNested = 0;
std::string strReturn = "";
for (int i = 0; i < (int) strHTML.size(); ++i)
{
if (strHTML[i] == '<') iNested++;
else if (strHTML[i] == '>') iNested--;
else
{
if (!iNested)
{
strReturn += strHTML[i];
}
}
}
strHTML = strReturn;
}
typedef struct
{
const wchar_t* html;
const wchar_t w;
} HTMLMapping;
static const HTMLMapping mappings[] =
{{L"&", 0x0026},
{L"'", 0x0027},
{L"´", 0x00B4},
{L"à", 0x00E0},
{L"á", 0x00E1},
{L"â", 0x00E2},
{L"ã", 0x00E3},
{L"ä", 0x00E4},
{L"å", 0x00E5},
{L"æ", 0x00E6},
{L"À", 0x00C0},
{L"Á", 0x00C1},
{L"Â", 0x00C2},
{L"Ã", 0x00C3},
{L"Ä", 0x00C4},
{L"Å", 0x00C5},
{L"Æ", 0x00C6},
{L"„", 0x201E},
{L"¦", 0x00A6},
{L"•", 0x2022},
{L"•", 0x2022},
{L"¢", 0x00A2},
{L"ˆ", 0x02C6},
{L"¤", 0x00A4},
{L"©", 0x00A9},
{L"¸", 0x00B8},
{L"Ç", 0x00C7},
{L"ç", 0x00E7},
{L"†", 0x2020},
{L"°", 0x00B0},
{L"÷", 0x00F7},
{L"‡", 0x2021},
{L"è", 0x00E8},
{L"é", 0x00E9},
{L"ê", 0x00EA},
{L" ", 0x2003},
{L" ", 0x2002},
{L"ë", 0x00EB},
{L"ð", 0x00F0},
{L"€", 0x20AC},
{L"È", 0x00C8},
{L"É", 0x00C9},
{L"Ê", 0x00CA},
{L"Ë", 0x00CB},
{L"Ð", 0x00D0},
{L""", 0x0022},
{L"⁄", 0x2044},
{L"¼", 0x00BC},
{L"½", 0x00BD},
{L"¾", 0x00BE},
{L">", 0x003E},
{L"…", 0x2026},
{L"¡", 0x00A1},
{L"¿", 0x00BF},
{L"ì", 0x00EC},
{L"í", 0x00ED},
{L"î", 0x00EE},
{L"ï", 0x00EF},
{L"Ì", 0x00CC},
{L"Í", 0x00CD},
{L"Î", 0x00CE},
{L"Ï", 0x00CF},
{L"", 0x200E},
{L"<", 0x003C},
{L"«", 0x00AB},
{L"“", 0x201C},
{L"‹", 0x2039},
{L"‘", 0x2018},
{L"¯", 0x00AF},
{L"µ", 0x00B5},
{L"·", 0x00B7},
{L"—", 0x2014},
{L" ", 0x00A0},
{L"–", 0x2013},
{L"ñ", 0x00F1},
{L"¬", 0x00AC},
{L"Ñ", 0x00D1},
{L"ª", 0x00AA},
{L"º", 0x00BA},
{L"œ", 0x0153},
{L"ò", 0x00F2},
{L"ó", 0x00F3},
{L"ô", 0x00F4},
{L"õ", 0x00F5},
{L"ö", 0x00F6},
{L"ø", 0x00F8},
{L"Œ", 0x0152},
{L"Ò", 0x00D2},
{L"Ó", 0x00D3},
{L"Ô", 0x00D4},
{L"Õ", 0x00D5},
{L"Ö", 0x00D6},
{L"Ø", 0x00D8},
{L"¶", 0x00B6},
{L"‰", 0x2030},
{L"±", 0x00B1},
{L"£", 0x00A3},
{L"»", 0x00BB},
{L"”", 0x201D},
{L"®", 0x00AE},
{L"", 0x200F},
{L"›", 0x203A},
{L"’", 0x2019},
{L"‚", 0x201A},
{L"š", 0x0161},
{L"§", 0x00A7},
{L"", 0x00AD},
{L"¹", 0x00B9},
{L"²", 0x00B2},
{L"³", 0x00B3},
{L"ß", 0x00DF},
{L"Š", 0x0160},
{L" ", 0x2009},
{L"þ", 0x00FE},
{L"˜", 0x02DC},
{L"×", 0x00D7},
{L"™", 0x2122},
{L"Þ", 0x00DE},
{L"¨", 0x00A8},
{L"ù", 0x00F9},
{L"ú", 0x00FA},
{L"û", 0x00FB},
{L"ü", 0x00FC},
{L"Ù", 0x00D9},
{L"Ú", 0x00DA},
{L"Û", 0x00DB},
{L"Ü", 0x00DC},
{L"¥", 0x00A5},
{L"ÿ", 0x00FF},
{L"ý", 0x00FD},
{L"Ý", 0x00DD},
{L"Ÿ", 0x0178},
{L"", 0x200D},
{L"", 0x200C},
{NULL, L'\0'}};
void CHTMLUtil::ConvertHTMLToW(const std::wstring& strHTML, std::wstring& strStripped)
{
/* TODO:STRING_CLEANUP */
if (strHTML.size() == 0)
{
strStripped.clear();
return ;
}
size_t iPos = 0;
strStripped = strHTML;
while (mappings[iPos].html)
{
StringUtils::Replace(strStripped, mappings[iPos].html,std::wstring(1, mappings[iPos].w));
iPos++;
}
iPos = strStripped.find(L"");
while (iPos > 0 && iPos < strStripped.size() - 4)
{
size_t iStart = iPos + 1;
iPos += 2;
std::wstring num;
int base = 10;
if (strStripped[iPos] == L'x')
{
base = 16;
iPos++;
}
size_t i = iPos;
while (iPos < strStripped.size() &&
(base == 16 ? iswxdigit(strStripped[iPos]) : iswdigit(strStripped[iPos])))
iPos++;
num = strStripped.substr(i, iPos-i);
wchar_t val = (wchar_t)wcstol(num.c_str(),NULL,base);
if (base == 10)
num = StringUtils::Format(L"%ls;", num.c_str());
else
num = StringUtils::Format(L"%ls;", num.c_str());
StringUtils::Replace(strStripped, num,std::wstring(1,val));
iPos = strStripped.find(L"", iStart);
}
}