Code conversion between different character sets
For coding conversions, Windows has two basic APIs, WideChartomultibyte and MultibyTetowideChar. These two APIs are very cumbersome, not forced, I don't want to move it. In addition, the Chinese version of Windows provides a Chinese conversion tool that can be converted between GB2312 and BIG5. It is the advantage that traditional transfers can be converted to Simplifications, and the disadvantage is that it is not possible to convert, and no command line mode is provided.
One of the advantages of .NET is to provide a powerful class library. Encoding is one of them. It is very convenient to do coding with it. The appendix provides a simple example with a lot of features, but more practical. You can convert (for directories), but you cannot transfer traditional to Simplified. There are only two files: CodePage.cs - the main program, providing UI cpconvertor.cs - I tried the API in the concrete conversion function code, but the effect is not ideal. By the way, a class is provided in the program: FolderBrowserDialog, the Directory dialog. Pressing, C # should provide a directory dialog that is similar to OpenFileDialog, but I didn't find it. Below Linux, there is an encoding conversion tool: iConV, very simple use. Syntax: iconv -f encoding -t encoding inputfile example: iconv -f big5 -t gb2312 big5.txt> GB2312.txt
If you need to convert all files in the current directory, you can use the following code:
#! / bin / bashfor f in $ (Find.) DOCIONV -F BIG5 -T GB2312 F> TMPMV TMP FDONE
Of course, there is also a iconv function in Linux. Individuals believe that it is convenient to use the iconv command. appendix:/*********************************************** ******** CodePage.cs ************************************************* *************** / using system; use system.drawing; use system.collections; using system.componentmodel; using system.windows.forms; using system.windows.forms.design Using system.data; using system.text;
A summary description of Namespace CodePage {///
// // TODO: added after InitializeComponent call any constructor code // CCodePage [] codePageList = CCodePage.GetCodePageList (); m_comboSrcCodePage.BeginUpdate (); m_comboSrcCodePage.Items.Clear (); m_comboSrcCodePage.Items.AddRange (codePageList) ; m_comboSrcCodePage.EndUpdate (); m_comboSrcCodePage.SelectedIndex = 0; m_comboDestCodePage.BeginUpdate (); m_comboDestCodePage.Items.Clear (); m_comboDestCodePage.Items.AddRange (codePageList); m_comboDestCodePage.EndUpdate (); m_comboDestCodePage.SelectedIndex = 0;}
///
/// summary> private void InitializeComponent () {this.m_comboSrcCodePage = new System.Windows.Forms.ComboBox (); this.m_txtFolder = new System.Windows.Forms.TextBox (); this.m_btBrowerFolder = new System. Windows.Forms.Button (); this.m_CodePageLabel = new System.Windows.Forms.Label (); this.m_labelFolder = new System.Windows.Forms.Label (); this.label1 = new System.Windows.Forms.Label (); this.m_comboDestCodePage = new System.Windows.Forms.ComboBox (); this.m_btExit = new System.Windows.Forms.Button (); this.m_ckSearchSubFolder = new System.Windows.Forms.CheckBox (); this. m_btConvert = new System.Windows.Forms.Button (); this.m_txtPattern = new System.Windows.Forms.TextBox (); this.m_labelPattern = new System.Windows.Forms.Label (); this.SuspendLayout (); / / // m_comboSrcCodePage // this.m_comboSrcCodePage.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList; this.m_comboSrcCodePage.Location = new System.Drawing.Point (96, 24); this.m_comboSrcC odePage.Name = "m_comboSrcCodePage"; this.m_comboSrcCodePage.Size = new System.Drawing.Size (136, 20); this.m_comboSrcCodePage.TabIndex = 0; // // m_txtFolder // this.m_txtFolder.Location = new System. Drawing.Point (96, 96); this.m_txtFolder.Name = "m_txtFolder"; this.m_txtFolder.RightToLeft = System.Windows.Forms.RightToLeft.No; this.m_txtFolder.Size = new System.Drawing.Size (328, 21); this.m_txtFolder.TabIndex = 2; this.m_txtFolder.Text = ""; // // m_btBrowerFolder // this.m_btBrowerFolder.Location = new System.Drawing.Point (424, 96); this.m_btBrowerFolder.Name = "
m_btBrowerFolder "; this.m_btBrowerFolder.Size = new System.Drawing.Size (56, 21); this.m_btBrowerFolder.TabIndex = 11; this.m_btBrowerFolder.Text =" Brower "; this.m_btBrowerFolder.Click = new System.EventHandler (this.m_btBrowerForder_Click); // // m_CodePageLabel // this.m_CodePageLabel.Location = new System.Drawing.Point (24, 24); this.m_CodePageLabel.Name = "m_CodePageLabel"; this.m_CodePageLabel.Size = new System. Drawing.Size (72, 20); this.m_CodePageLabel.TabIndex = 16; this.m_CodePageLabel.Text = "source page:"; this.m_CodePageLabel.TextAlign = System.Drawing.ContentAlignment.MiddleLeft; // // m_labelFolder / / this.m_labelFolder.Location = new System.Drawing.Point (24, 96); this.m_labelFolder.Name = "m_labelFolder"; this.m_labelFolder.Size = new System.Drawing.Size (48, 20); this.m_labelFolder . Tabindex = 17; this.m_labelfolder.text = "directory:"; this.m_labelfolder.textalign = system.drawing.contentalignment.middleleft; // // label1 // this .label1.location = new system.drawing.point (256, 24); this.label1.name = "label1"; this.label1.size = new system.drawing.size (80, 20); this.Label1.tabindex = 20; this.label1.Text = "target code page:"; this.label1.TextAlign = System.Drawing.ContentAlignment.MiddleLeft; // // m_comboDestCodePage // this.m_comboDestCodePage.DropDownStyle = System.Windows.Forms.ComboBoxStyle .Dropdownload; this.m_combodestcode.drawing.point (336, 24); this.m_combodestcodepage.name = "m_combodestcodepage"; this.m_combodestcodepage.size =
New system.drawing.size (136, 20); this.m_combodestcodepage.tabindex = 19; // // m_btexit // this.m_btexit.location = new system.drawing.point (280, 144); this.m_btexit.name = "m_btexit"; this.m_btexit.size = new system.drawing.size (128, 24); this.m_btexit.tabindex = 27; this.m_btexit.text = "Reversed"; this.m_btexit.click = new System.EventHandler (this.m_btExit_Click); // // m_ckSearchSubFolder // this.m_ckSearchSubFolder.Location = new System.Drawing.Point (256, 64); this.m_ckSearchSubFolder.Name = "m_ckSearchSubFolder"; this.m_ckSearchSubFolder.TabIndex = 33; this.m_ckSearchSubFolder.Text = "Search subdirectories"; this.m_ckSearchSubFolder.CheckedChanged = new System.EventHandler (this.m_ckSearchSubDirectory_CheckedChanged); // // m_btConvert // this.m_btConvert.Location = new System.Drawing.Point (104, 144); this.m_btconvert.name = "m_btconvert"; this.m_btconvert.size = new system.drawing.size (120, 23); this.m_btconvert.tabindex = 34; this.m_bt Convert.text = "Convert"; this.m_btconvert.click = new system.eventhandler (this.m_btconvert_click; // // m_txtpattern // this.m_txtpattern.location = new system.drawing.point (96, 64); this.m_txtPattern.Name = "m_txtPattern"; this.m_txtPattern.TabIndex = 35; this.m_txtPattern.Text = ""; // // m_labelPattern // this.m_labelPattern.Location = new System.Drawing.Point (24, 64 ); This.m_labelpattern.name = "m_labelpattern"; this.m_labelpattern.size = new system.drawing.size (72, 20); this.m_labelpattern.tabindex = 39;
this.m_labelPattern.Text = "File / Type:"; this.m_labelPattern.TextAlign = System.Drawing.ContentAlignment.MiddleLeft; // // CodePageConvertor // this.AutoScaleBaseSize = new System.Drawing.Size (6, 14); this.ClientSize = new System.Drawing.Size (504, 189); this.Controls.AddRange (new System.Windows.Forms.Control [] {this.m_labelPattern, this.m_txtPattern, this.m_btConvert, this.m_ckSearchSubFolder, this .m_btExit, this.label1, this.m_comboDestCodePage, this.m_labelFolder, this.m_CodePageLabel, this.m_btBrowerFolder, this.m_txtFolder, this.m_comboSrcCodePage}); this.Name = "CodePageConvertor"; this.Text = "code page converter "" This.ResumeLayout (false);} #ENDREGION
///
Private string getFile () {OpenFiledialog OpenFiledialog = New OpenFiledialog (); //openfiledialog.initialfolder = "c: //"; openfiledialog.filter = "txt files (* .txt) | * .txt | all files (*. * ) | * *. "; openFileDialog.FilterIndex = 2; openFileDialog.RestoreDirectory = true; if (openFileDialog.ShowDialog () == DialogResult.OK) {return openFileDialog.FileName;} else return" ";}
private string GetFolder () {FolderBrowserDialog folder = new FolderBrowserDialog ( "Get Folder"); if (folder.ShowDialog () == DialogResult.OK) return folder.Folder; else return "";} private void m_btExit_Click (object sender, System . Eventargs e) {application.exit ();
private void m_btConvert_Click (object sender, System.EventArgs e) {string sFolder = m_txtFolder.Text; string sPattern = m_txtPattern.Text; bool bSearchSubFolder = this.m_ckSearchSubFolder.Checked; Encoding srcEncoding = ((CCodePage) m_comboSrcCodePage.SelectedItem) .Encoding; Encoding destenncoding = (ccodepage) m_combodestcodepage.selecteditem) .encoding; cpconvertor.convertFolder (Sfolder, SPATTERN, BSTENCHSUBFOLDER, SRCENCODING, DESTENCODING);}
Private void m_txtfolder_textchanged (Object Sender, System.EventArgs E) {}
Private void m_btbrowerforder_click (object sender, system.eventargs e) {m_txtfolder.text = getFolder ();}
Private void m_cksearchsubdirectory_checkedchanged (Object Sender, System.EventArgs E) {}
}
Public class folderbrowserdialog: system.windows.Forms.design.FoldernameEditor {protected foldernameEditor.FolderBrowser Folderdlg;
public FolderBrowserDialog (string description) {folderDlg = new FolderNameEditor.FolderBrowser (); //folderDlg.StartLocation = FolderBrowserFolder.MyDocuments; folderDlg.Style = FolderBrowserStyles.RestrictToFilesystem; //FolderBrowserStyles.BrowseForEverything; //FolderBrowserStyles.BrowseForComputer | // FolderBrowserStyles. RestrictToDomain | //FolderBrowserStyles.RestrictToFilesystem | //FolderBrowserStyles.RestrictToSubfolders | //FolderBrowserStyles.ShowTextBox; folderDlg.Description = description;} public DialogResult ShowDialog () {return folderDlg.ShowDialog ();}
Public string folder {get {return folderdlg.directorypath;}}}} / *********************************** *********************************************************************************************************************************************TION ************************************ / Using System; use system.io; use system.runtime Using system.text; using system.windows.forms;
Namespace CodePage {PUBLIC CLAS CCODEPAGE; Private String M_Name; private string m_displayName; private encoding m_encoding;
public CCodePage (int codePage, string name) {m_CodePage = codePage; m_Name = name; m_DisplayName = codePage "" name; m_Encoding = Encoding.GetEncoding (codePage); // Use Encoding.WindowsCodePage Encoding reconfigured in order to solve a Portugal The problem of the conversion of the text, for Portuguese, using the 860 code page, the result is always wrong, // actually discovers Encoding (860) .windowscodePage = 1252 // and use 1252 to convert, the result is right, so a special processing: // If the codepage Encoding and WindowsCodePage not, the re-configured with a WindowsCodePage Encoding if (! m_CodePage = m_Encoding.WindowsCodePage) m_Encoding = Encoding.GetEncoding (m_Encoding.WindowsCodePage);}
public int CodePage {get {return m_CodePage;} set {m_CodePage = value;}} public string Name {get {return m_Name;} set {m_Name = value;}} public string DisplayName {get {return m_DisplayName;} set {m_DisplayName = Value;}} public encoding encoding {get {return m_encoding;} set {m_encoding = value;}}
Public override string toString () {return m_displayName;}
Public static ccodepage [] getCodepageList () {ccodepage [] codepagelist = new ccodepage [] {new ccodepage (65001, "UTF-8"), New CcodePage (437, "Default: United States"), New CcodePage (936, "Chinese (Simplified) "), New CcodePage (950," Chinese (Taiwan, Hong Kong Traditional) "), New CCODEPAGE (949," Korean "), New ccodepage (860," Portuguese "), New Ccodepage (932," Japanese "), // New ccodepage (708," Arab code page "), // new ccodepage (737," Greece "), // new ccodepage (775," Baltic "), // new ccodepage (850 , "International"), // New ccodepage (852, "slavic"), // new ccodepage (855, "Siri"), // new ccodepage (857, "Turkish"), // new ccodepage 861, "Iceland"), // new ccodepage (862, "Hebrew"), // new ccodepage (863, "Canadian French"), // New ccodepage (864, "Arabic"), / / New CCODEPAGE (865, Norway / Danish "), // new ccodepage (866," Russian "), // new ccodepage (874," Thai ")}; Return CodePagelist;
}} ///
public static bool ConvertFileByAPI (string sSrcFile, int srcCodePage, string sDestFile, int destCodePage) {byte [] inBuffer = Read (sSrcFile); byte [] outBuffer = null; if (srcCodePage == (int) EnumCodePage.CP_UTF8) {outBuffer = W2M (inBuffer, destCodePage);} else if (destCodePage == (int) EnumCodePage.CP_UTF8) {outBuffer = M2W (inBuffer, srcCodePage);} else {outBuffer = M2W (inBuffer, srcCodePage); outBuffer = W2M (outBuffer, destCodePage );} Save (outbuffer, sdestfile); return true;} // # define cp_acp 0 // DEFAULT to ANSI CODE PAGE / / # define cp_oemcp 1 // # define cp_maccp 2 // default to Mac code page // # define cp_thread_acp 3 // Current Thread's Ansi CP_SYMBOL 42 // Symbol Translations // // # Define CP_UTF7 65000 //// UTF-7 Translation / // # Define CP_UTF8 65001 // UTF-8 Translation PUBLIC ENUM EnumcodePage {CP_ACP, CP_OEMCP, CP_MACCP, CP_THREAD_ACP, CP_SYMBOL = 42, CP_UTF7 = 65000, cp_utf8 = 65001}
[DllImport ( "kernel32.dll", CharSet = CharSet.Auto, ExactSpelling = true)] static extern public int WideCharToMultiByte (int CodePage, // code page int dwFlags, // performance and mapping flags byte [] lpWideCharStr, // address of wide-character string int cchWideChar, // number of characters in string byte [] lpMultiByteStr, // address of buffer for new string int cchMultiByte, // size of buffer string lpDefaultChar, // address of default for unmappable // characters bool . lpUsedDefaultChar // address of flag set when default // char used); [DllImport ( "kernel32.dll", CharSet = CharSet.Auto, ExactSpelling = true)] static extern public int MultiByteToWideChar (int CodePage, // code page int dwFlags, // character-type options byte [] lpMultiByteStr, // address of string to map int cchMultiByte, // number of bytes in string byte [] lpWideCharStr, // address of wide-character buffer int cchWideChar // size Of buffer;