In order to save network bandwidth, many web applications use UTF8 encoding methods, such as MSN, etc., UTF8 encoding principle is actually very simple, here is a VC instance below Windows.
#define unkown_char '? # Define isbyteofonebytechar (b) (((b) & 0x80) == 0x00) #define isheadoftwobyteschar (b) (((b) & 0xe0) == 0xc0) #define isheadoftHreebyteschar (b) (( (b) & 0xF0) == 0xE0) #define isComingByteOfMultBytesChar (b) (((b) & 0xC0) == 0x80) #define isHeadOfUTF8Char (b) / isByteOfOneByteChar (b) || / isByteOfOneByteChar (b) || / isHeadOfThreeBytesChar (b) Data Data :: Utf8Decode (const data & utf8hex) {int L = uTF8HEX.LENGTH (); unsigned short * unicode = new unsigned short [l * 2]; int outi = 0; for (int i = 0; i
}} BYTE b2 = (BYTE) utf8hex.getChar (i 1); BYTE b3 = (BYTE) utf8hex.getChar (i 2); if (isComingByteOfMultBytesChar (b2) && isComingByteOfMultBytesChar (b3)) {unsigned short tmp = b1 & 0x0f; TMP = TMP << 12; unsigned short tmp2 = b2 & 0x3f; TMP2 = TMP2 << 6; Unicode [OUTI ] = TMP TMP2 (B3 & 0x3F); i = 3;} else f (isheadofutf8char (b2)) {unicode [outi ] = unkown_char; i = 1;} else if (isheadofutf8char (b3)) {unicode [outi ] = unkown_char; i = 2;} else {unicode [outi ] = unkown_char; i = 3;}} else {unicode [outi ] = unkown_char; i = 1;}} unicode [outi] = 0; Data Ret (outi * 4, true); # ifdef win32 int count = widechartomultibyte (0, 0 , Unicode, Outi, Ret.mbuf, Outi * 4, NULL, NULL); if (count == 0) CPLOG (log_err, "get utf byte, error number =% lu", getLastError ()); # else # Endif IF (count> 0) {RET.MBUF [count] = '/ 0'; ret.mlength = count;} delete [] unicode;
Return Ret;}