UTF-8: 3-byte character
UNICODE: 2-byte character
GB2312: 1-byte character
example:
br> In addition to x in addition to the number of spliced together, it becomes
Note that the top of UTF-8 3 1, said the UTF-8 string is composed of 3 bytes.
encoded by UTF-8, will no longer appear sensitive to the character, because the highest bit is always 1.
class definition
class CChineseCode
{
public:
static void UTF_8ToUnicode (wchar_t * pOut, char * pText); / / convert to UTF-8 Unicode
static void UnicodeToUTF_8 (char * pOut, wchar_t * pText); / / Unicode into UTF-8
static void UnicodeToGB2312 (char * pOut, wchar_t uData); / / convert the Unicode GB2312
static void Gb2312ToUnicode (wchar_t * pOut, char * gbBuffer) ; / / GB2312 converted into Unicode
static void GB2312ToUTF_8 (string & pOut,
power balance, char * pText, int pLen); / / GB2312 into UTF-8
; static void UTF_8ToGB2312 (string & pOut,
balance bracelet, char * pText, int pLen); / / UTF-8 converted to GB2312
};
class implements
void CChineseCode: : UTF_8ToUnicode (wchar_t * pOut, char * pText)
{
char * uchar = (char *) pOut;
uchar [1] = ((pText [0] & 0x0F) > 2) & 0x0F);
uchar [0] = ((pText [1] & ; 0x03) > 4));
pOut [1 ] = (0x80 | ((pchar [1] & 0x0F) > 6);
pOut [2 ] = (0x80 | (pchar [0] & 0x3F));
return;
}
void CChineseCode:: UnicodeToGB2312 (char * pOut, wchar_t uData )
{
WideCharToMultiByte (CP_ACP, NULL, & uData, 1, pOut, sizeof (wchar_t), NULL, NULL);
return;
} ;
void CChineseCode:: Gb2312ToUnicode (wchar_t * pOut, char * gbBuffer)
{
:: MultiByteToWideChar (CP_ACP, MB_PRECOMPOSED, gbBuffer, 2, pOut, 1) ;
return;
}
void CChineseCode:: GB2312ToUTF_8 (string & pOut, char * pText, int pLen)
{
char buf [4];
int nLength = pLen * 3;
char * rst = new char [nLength];
memset (buf, 0,4);
memset (rst, 0,
bracelet balance, nLength);
int i = 0;
int j = 0;
while (i
{
/ / If it is a direct copy in English will be able to
if (* (pText + i)> = 0)
{
rst [j + +] = pText [i ++];
;}
else
{
wchar_t pbuffer;
; Gb2312ToUnicode (& pbuffer, pText + i);
;
UnicodeToUTF_8 (buf, & pbuffer);
unsigned short int tmp = 0;
tmp = rst [j] = buf [0];
tmp = rst [j +1] = buf [1];
; tmp = rst [j +2] = buf [2];
j + = 3;
i + = 2;
}
}
rst [j] ='';
/ / return the results
pOut = rst;
delete [] rst;
return;
}
void CChineseCode:: UTF_8ToGB2312 (string & pOut, char * pText, int pLen)
{
char * newBuf = new char [pLen];
char Ctemp [4];
memset (Ctemp, 0,4);
int i = 0;
int j = 0;
while (i
{
if (pText> 0)
{
newBuf [j + +] = pText [i + +]; ;
}
else
{
WCHAR Wtemp;
UTF_8ToUnicode (& Wtemp, pText + i);
UnicodeToGB2312 (Ctemp, Wtemp);
;
newBuf [j] = Ctemp [0];
; newBuf [j + 1] = Ctemp [1];
i + = 3;
j + = 2;
;}
}
newBuf [j] ='';
pOut = newBuf;
delete [] newBuf;
return;
}