C#实现获取文本文件的编码的一个类(区分GB2312和UTF8)
以下是获取文件编码的一个类:
usingSystem; usingSystem.IO; usingSystem.Text; ///<summary> ///FileEncoding的摘要说明 ///</summary> namespaceFileEncoding { ///<summary> ///获取文件的编码格式 ///</summary> publicclassEncodingType { ///<summary> ///给定文件的路径,读取文件的二进制数据,判断文件的编码类型 ///</summary> ///<paramname="FILE_NAME">文件路径</param> ///<returns>文件的编码类型</returns> publicstaticSystem.Text.EncodingGetType(stringFILE_NAME) { FileStreamfs=newFileStream(FILE_NAME,FileMode.Open,FileAccess.Read); Encodingr=GetType(fs); fs.Close(); returnr; } ///<summary> ///通过给定的文件流,判断文件的编码类型 ///</summary> ///<paramname="fs">文件流</param> ///<returns>文件的编码类型</returns> publicstaticSystem.Text.EncodingGetType(FileStreamfs) { byte[]Unicode=newbyte[]{0xFF,0xFE,0x41}; byte[]UnicodeBIG=newbyte[]{0xFE,0xFF,0x00}; byte[]UTF8=newbyte[]{0xEF,0xBB,0xBF};//带BOM EncodingreVal=Encoding.Default; BinaryReaderr=newBinaryReader(fs,System.Text.Encoding.Default); inti; int.TryParse(fs.Length.ToString(),outi); byte[]ss=r.ReadBytes(i); if(IsUTF8Bytes(ss)||(ss[0]==0xEF&&ss[1]==0xBB&&ss[2]==0xBF)) { reVal=Encoding.UTF8; } elseif(ss[0]==0xFE&&ss[1]==0xFF&&ss[2]==0x00) { reVal=Encoding.BigEndianUnicode; } elseif(ss[0]==0xFF&&ss[1]==0xFE&&ss[2]==0x41) { reVal=Encoding.Unicode; } r.Close(); returnreVal; } ///<summary> ///判断是否是不带BOM的UTF8格式 ///</summary> ///<paramname="data"></param> ///<returns></returns> privatestaticboolIsUTF8Bytes(byte[]data) { intcharByteCounter=1;//计算当前正分析的字符应还有的字节数 bytecurByte;//当前分析的字节. for(inti=0;i<data.Length;i++) { curByte=data[i]; if(charByteCounter==1) { if(curByte>=0x80) { //判断当前 while(((curByte<<=1)&0x80)!=0) { charByteCounter++; } //标记位首位若为非0则至少以2个1开始如:110XXXXX...........1111110X if(charByteCounter==1||charByteCounter>6) { returnfalse; } } } else { //若是UTF-8此时第一位必须为1 if((curByte&0xC0)!=0x80) { returnfalse; } charByteCounter--; } } if(charByteCounter>1) { thrownewException("非预期的byte格式"); } returntrue; } } }
以下是使用示例:
#region打开按钮 ///<summary> ///打开按钮 ///</summary> ///<paramname="sender"></param> ///<paramname="e"></param> privatevoidtxtMenuOpen_Click(objectsender,EventArgse) { stringfName; OpenFileDialogopenFileDialog=newOpenFileDialog(); openFileDialog.InitialDirectory="";//注意这里写路径时要用c:而不是c: openFileDialog.Filter="文本文档|*.txt"; openFileDialog.RestoreDirectory=true; openFileDialog.FilterIndex=1; if(openFileDialog.ShowDialog()==DialogResult.OK) { fName=openFileDialog.FileName; txtBox.Text=System.IO.File.ReadAllText(fName, FileEncoding.EncodingType.GetType(fName)); } } #endregion