GB2312 UTF-8 mutual conversion

xiaoxiao2021-03-06  42

$ filename =

Http://smsrc.sina.com.cn/newsinarc/include/gb2utf8.txt

// Use of the comparison table

$ filename = "GB2UTF8.TXT";

$ fp = fopen ($ filename, "r");

While (! Feof ($ fp)) {

List ($ GB, $ UTF8) = FGETCSV ($ FP, 10);

$ Charset [$ GB] = $ UTF8;

}

Fclose ($ fp);

// Read the comparison table to the array

/ ** GB2312 to UTF-8 ** /

Function GB2UTF8 ($ TEXT, & $ Charset) {

/ / Extract the ingredients in the text, the Chinese characters are an element, and the continuous non-Chinese characters are an element.

PREG_MATCH_ALL ("/ (?: [/ x7f] /", $ text, $ TMP);

$ TMP = $ TMP [0];

// Separate Chinese characters

$ ar = array_intersect ($ TMP, Array_Keys ($ Charset);

// Replace Chinese character encoding

Foreach ($ AS $ K => $ V)

$ TMP [$ K] = $ Charset [$ V];

// Return to the changed string

Return Join ('', $ TMP);

}

/ ** UTF-8 to GB2312 ** /

Function UTF82GB ($ TEXT, & $ Charset) {

$ P = "/ [xf0-xf7] [x80-xbf] {3} | [xe0-xef] [x80-xbf] {2} | [XC2-xdf] [x80-xbf] | [x01-x7f] / ";

PREG_MATCH_ALL ($ P, $ TEXT, $ R);

$ UTF8 = array_flip ($ charset);

Foreach ($ R [0] as $ k => $ V)

IF (Isset ($ UTF8 [$ V])))

$ R [0] = $ UTF8 [$ V];

Return Join ('', $ R [0]);

}

//test

$ S = GB2UTF8 ('This is the test of the control ", $ Charset);

Echo UTF82GB ($ S, $ CHARSET);

?>

转载请注明原文地址:https://www.9cbs.com/read-70826.html

New Post(0)