Author: zhaojianjun
Source:
http://www.openDesktop.net/modules/...dex.php?b_id=78
When encoding conversion on Linux, it can be implemented using the iconv function family, or the ICONV command can be used to implement, but the latter is for the file, the designated file is converted from one encoding to another.
First, encoding conversion using the ICONV function family
The header file of the Iconv function family is iconv.h, which needs to be included before use.
#include
The ICONV function family has three functions, the prototype is as follows:
(1) Iconv_t iconv_open (const char * tocode, const char * fromcode);
This function explains which two coded conversions will be processed, and TOCODE is the target code, and the fromCode is originally encoded. The function returns a conversion handle for use by the following two functions.
(2) SIZE_T ICONV (iconv_t CD, char ** inbuf, size_t * inbytesleft, char ** outbuf, size_t * outbytesleft);
This function reads the character from the Inbuf. After the conversion is output to Outbuf, the InbytesLes are used to record the number of characters that have not been converted, and OutBytesLes are used to record the remaining space of the output buffer. (3) INT ICONV_CLOSE (ICONV_T CD);
This function is used to turn off the conversion handle and release resources.
Example 1: Conversion sample program implemented with C language
/ * f.c: Code conversion example C program * /
#include
#define Outlen 255
Main ()
{
Char * in_utf8 = "E? ㄥ ???"
CHAR * IN_GB2312 = "I am installing";
Char out [OUTLEN];
// unicode code to GB2312 code
RC = U2G (IN_UTF8, STRLEN (in_UTF8), OUT, OUTLEN);
Printf ("Unicode -> GB2312 OUT =% SN", OUT;
// GB2312 code to Unicode code
Rc = G2U (IN_GB2312, Strlen (IN_GB2312), OUT, OUTLEN
Printf ("GB2312 -> Unicode Out =% SN", OUT;
}
// Code conversion: to another coding from one encoding
INT Code_Convert (Char * from_Charset, Char * To_Charset, Char * Inbuf, Int Inlen, Char * Outbuf, Int Outlen)
{
Iconv_t cd;
Int rc;
CHAR ** PIN = & inbuf;
Char ** pout = & outbuf;
CD = iconv_open (to_charset, from_charset);
IF (CD == 0) Return -1;
MEMSET (Outbuf, 0, Outlen);
IF (Iconv (CD, PIN, & INLEN, Pout, & Outlen) == - 1) Return -1;
Iconv_close (CD);
Return 0;
}
// unicode code to GB2312 code
INT U2G (Char * Inbuf, Int INLEN, Char * Outbuf, Int Outlen)
{
Return Code_Convert ("UTF-8", "GB2312", Inbuf, INLEN, OUTBUF, OUTLEN
}
// GB2312 code to Unicode code
INT G2U (Char * Inbuf, Size_t INLEN, Char * Outbuf, SIZE_T OUTLEN) {
Return Code_Convert ("GB2312", "UTF-8", Inbuf, INLEN, Outbuf, Outlen;
}
Example 2: Conversion Sample Program implemented with C language
/ * f.cpp: Code conversion example C program * /
#include
#include
#define Outlen 255
Using namespace std;
// Code conversion operation class
Class codeConverter {
Private:
Iconv_t cd;
PUBLIC:
// Structure
CODECONVERTER (const char * from_charset, const char * to_CHARSET) {
CD = iconv_open (to_charset, from_charset);
}
// destruct
~ Codeconverter () {
Iconv_close (CD);
}
// Conversion output
INT Convert (Char * Inbuf, Int Inlen, Char * Outbuf, Int Outlen) {
CHAR ** PIN = & inbuf;
Char ** pout = & outbuf;
MEMSET (Outbuf, 0, Outlen);
Return Iconv (CD, PIN, (SIZE_T *) & INLEN, POUT, (SIZE_T *) & OUTLEN);
}
}
INT main (int Argc, char ** argv)
{
Char * in_utf8 = "E? ㄥ ???"
CHAR * IN_GB2312 = "I am installing";
Char out [OUTLEN];
// UTF-8 -> GB2312
CodeConverter CC = CODECONVERTER ("UTF-8", "GB2312");
Cc.convert (In_UTF8, Strlen (In_UTF8), OUT, OUTLEN
COUT << "UTF-8 -> GB2312 IN =" << in_utf8 << ", out =" << OUT << end1
// GB2312 -> UTF-8
CodeConverter CC2 = CODECONVERTER ("GB2312", "UTF-8");
Cc2.convert (IN_GB2312, STRLEN (IN_GB2312), OUT, OUTLEN;
COUT << "GB2312 -> UTF-8 in =" << IN_GB2312 << ", OUT = << OUT << ENDL;
}
Second, encoding the encoded conversion using the iconv command
The iconv command is used to convert the coding of the specified file, and the default output to the standard output device, or the output file can also be specified.
Usage: iconv [option ...] [File ...]
There are options available:
Input / Output format specification:
-f, --from-code = Name Original text encoding
-T, --to-code = Name Output Code
information:
-l, --List lists all known character sets
Output control:
-c ignores invalid characters from the output
-o, --output = file output file
-s, --silent Close Warning - VERBOSE Print Progress Information
-? - HELP gives the system for help list
--usage gives a brief usage information
-V, --Version print program version number
example:
Iconv -f UTF-8 -T GB2312 AAA.TXT> BBB.TXT
This command reads the aaa.txt file, converts from the UTF-8 encoding to GB2312 encoding, and outputs it to the bbb.txt file.
Summary: Linux provides us with powerful encoding conversion tools, bringing us convenience.