Windows下Java调用OCR进行图片识别
使用Java语言,通过Tesseract-OCR对图片进行识别。
1.Tesseract-OCR
下载windows版本并安装。
2.程序如下:
a.ImageIOHelper类
packageOCR; importjava.awt.image.BufferedImage; importjava.io.File; importjava.io.IOException; importjava.util.Iterator; importjava.util.Locale; importjavax.imageio.IIOImage; importjavax.imageio.ImageIO; importjavax.imageio.ImageReader; importjavax.imageio.ImageWriteParam; importjavax.imageio.ImageWriter; importjavax.imageio.metadata.IIOMetadata; importjavax.imageio.stream.ImageInputStream; importjavax.imageio.stream.ImageOutputStream; importcom.sun.media.imageio.plugins.tiff.TIFFImageWriteParam; publicclassImageIOHelper{ /** *图片文件转换为tif格式 *@paramimageFile文件路径 *@paramimageFormat文件扩展名 *@return */ publicstaticFilecreateImage(FileimageFile,StringimageFormat){ FiletempFile=null; try{ Iteratorreaders=ImageIO.getImageReadersByFormatName(imageFormat); ImageReaderreader=readers.next(); ImageInputStreamiis=ImageIO.createImageInputStream(imageFile); reader.setInput(iis); //Readthestreammetadata IIOMetadatastreamMetadata=reader.getStreamMetadata(); //SetupthewriteParam TIFFImageWriteParamtiffWriteParam=newTIFFImageWriteParam(Locale.CHINESE); tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_DISABLED); //Gettifwriterandsetoutputtofile Iterator writers=ImageIO.getImageWritersByFormatName("tiff"); ImageWriterwriter=writers.next(); BufferedImagebi=reader.read(0); IIOImageimage=newIIOImage(bi,null,reader.getImageMetadata(0)); tempFile=tempImageFile(imageFile); ImageOutputStreamios=ImageIO.createImageOutputStream(tempFile); writer.setOutput(ios); writer.write(streamMetadata,image,tiffWriteParam); ios.close(); writer.dispose(); reader.dispose(); }catch(IOExceptione){ e.printStackTrace(); } returntempFile; } privatestaticFiletempImageFile(FileimageFile){ Stringpath=imageFile.getPath(); StringBufferstrB=newStringBuffer(path); strB.insert(path.lastIndexOf('.'),0); returnnewFile(strB.toString().replaceFirst("(?<=//.)(//w+)$","tif")); } }
b.OCR核心类
packageOCR; importjava.io.BufferedReader; importjava.io.File; importjava.io.FileInputStream; importjava.io.InputStreamReader; importjava.util.ArrayList; importjava.util.List; importorg.jdesktop.swingx.util.OS; publicclassOCR{ privatefinalStringLANG_OPTION="-l";//英文字母小写l,并非数字1 privatefinalStringEOL=System.getProperty("line.separator"); privateStringtessPath="C://ProgramFiles//Tesseract-OCR"; //privateStringtessPath=newFile("tesseract").getAbsolutePath(); publicStringrecognizeText(FileimageFile,StringimageFormat)throwsException{ FiletempImage=ImageIOHelper.createImage(imageFile,imageFormat); FileoutputFile=newFile(imageFile.getParentFile(),"output"); StringBufferstrB=newStringBuffer(); Listcmd=newArrayList (); if(OS.isWindowsXP()){ cmd.add(tessPath+"//tesseract"); }elseif(OS.isLinux()){ cmd.add("tesseract"); }else{ cmd.add(tessPath+"//tesseract"); } cmd.add(""); cmd.add(outputFile.getName()); //cmd.add(LANG_OPTION); //cmd.add("chi_sim"); //cmd.add("eng"); ProcessBuilderpb=newProcessBuilder(); pb.directory(imageFile.getParentFile()); cmd.set(1,tempImage.getName()); pb.command(cmd); pb.redirectErrorStream(true); Processprocess=pb.start(); //tesseract.exe1.jpg1-lchi_sim intw=process.waitFor(); //删除临时正在工作文件 tempImage.delete(); if(w==0){ BufferedReaderin=newBufferedReader(newInputStreamReader(newFileInputStream(outputFile.getAbsolutePath()+".txt"),"UTF-8")); Stringstr; while((str=in.readLine())!=null){ strB.append(str).append(EOL); } in.close(); }else{ Stringmsg; switch(w){ case1: msg="Errorsaccessingfiles.Theremaybespacesinyourimage'sfilename."; break; case29: msg="Cannotrecongnizetheimageoritsselectedregion."; break; case31: msg="Unsupportedimageformat."; break; default: msg="Errorsoccurred."; } tempImage.delete(); //thrownewRuntimeException(msg); } newFile(outputFile.getAbsolutePath()+".txt").delete(); returnstrB.toString(); } }
c.main
packageOCR; importjava.io.File; importjava.io.IOException; publicclassTestOcr{ /** *@paramargs */ publicstaticvoidmain(String[]args){ //输入图片地址 Stringpath="d://test//test.bmp"; try{ StringvalCode=newOCR().recognizeText(newFile(path),"bmp"); System.out.println(valCode); }catch(IOExceptione){ e.printStackTrace(); }catch(Exceptione){ e.printStackTrace(); } } }
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持毛票票。