Java中的StringBuilder性能测试
在看KMP算法时,想要简单的统计一下执行时间和性能。
得出的结论是:Java的String的indexOf方法性能最好,其次是KMP算法,其次是传统的BF算法,当然,对比有点牵强,SUN的算法也使用Java来实现、用的看着不像是KMP,还需要详细研究一下。
测试代码如下所示:
packagecom.test.test.kmp; importjava.util.Random; publicclassKMPTest{ publicstaticvoidmain(String[]args){ test(); } // publicstaticvoidtest(){ // intallLen=8000000; intsubLen=11; intcharLen=4; Stringcl=buildString(charLen); inttimes=50; // testMatch(allLen,subLen,cl,times); } publicstaticvoidtestMatch(intallLen,intsubLen,Stringcl,inttimes){ intallBF=0; intallString=0; intallKMP=0; intallGC=0; intallbuild=0; intalltoArray=0; longstart=System.currentTimeMillis(); System.out.println("--------------------------"); for(inti=0;i<times;i++){ //1.构造字符串的损耗 longbuildStart=System.currentTimeMillis(); Stringsub=buildString(subLen,cl); Stringall=buildString(allLen,sub)+sub; longbuildEnd=System.currentTimeMillis(); allbuild+=(buildEnd-buildStart); //2.toCharArray的损耗 longtoArrayStart=System.currentTimeMillis(); char[]allArray=all.toCharArray(); char[]subArray=sub.toCharArray(); longtoArrayEnd=System.currentTimeMillis(); alltoArray+=(toArrayEnd-toArrayStart); // longstartBF=System.currentTimeMillis(); intindexBF=bfIndexOf(all,sub); longendBF=System.currentTimeMillis(); // longtimeoutBF=endBF-startBF; allBF+=timeoutBF; System.gc(); allGC+=(System.currentTimeMillis()-endBF); // longstartString=System.currentTimeMillis(); intindexString=stringIndexOf(all,sub); longendString=System.currentTimeMillis(); // longtimeoutString=endString-startString; allString+=timeoutString; System.gc(); allGC+=(System.currentTimeMillis()-endString); // longstartKMP=System.currentTimeMillis(); //intindexKMP=kmpIndexOf(all,sub); intindexKMP=KMP_Index(allArray,subArray); longendKMP=System.currentTimeMillis(); // longtimeoutKMP=endKMP-startKMP; allKMP+=timeoutKMP; System.gc(); allGC+=(System.currentTimeMillis()-endKMP); // //System.out.println("all="+all.substring(0,100)+"......"); //System.out.println("sub="+sub); // //System.out.println("indexBF="+indexBF+";耗时:"+timeoutBF+"ms"); //System.out.println("indexString="+indexString+";耗时:"+timeoutString+"ms"); if(indexBF==indexString&&indexKMP==indexString){ //System.out.println("!!!!对比相等。"); }else{ thrownewRuntimeException("对比出错."); } // if(i%20==10){ System.out.println(i+"次bfIndexOf="+allBF+"ms"); System.out.println(i+"次stringIndexOf="+allString+"ms"); System.out.println(i+"次KMPIndexOf="+allKMP+"ms"); System.out.println(i+"次allbuild="+allbuild+"ms"); System.out.println(i+"次alltoArray="+alltoArray+"ms"); System.out.println(i+"*3次,GC="+allGC+"ms"); longend=System.currentTimeMillis(); longallTime=end-start; longlossTime=allTime-(allBF+allString+allKMP+allGC); System.out.println(i+"次,所有总计耗时:"+(allTime)+"ms;损耗:"+lossTime+"ms;损耗比:"+((0.0+lossTime)/allTime*100)+"%"); System.out.println("--------------------------"); } } // System.out.println(times+"次bfIndexOf;总计耗时:"+allBF+"ms"); System.out.println(times+"次stringIndexOf;总计耗时:"+allString+"ms"); System.out.println(times+"次KMPIndexOf;总计耗时:"+allKMP+"ms"); System.out.println(times+"次allbuild="+allbuild+"ms"); System.out.println(times+"次alltoArray="+alltoArray+"ms"); System.out.println(times+"*3次,GC;总计耗时:"+allGC+"ms"); longend=System.currentTimeMillis(); longallTime=end-start; longlossTime=allTime-(allBF+allString+allKMP+allGC); System.out.println(times+"次,所有总计耗时:"+(allTime)+"ms;损耗:"+lossTime+"ms;损耗比:"+((0.0+lossTime)/allTime*100)+"%"); System.out.println("--------------------------"); } // //构建字符 publicstaticStringbuildString(intlen){ returnbuildString(len,null); } publicstaticStringbuildString(intlen,Stringsub){ // finalintTYPE_NUMBER=0; finalintTYPE_LOWERCASE=1; finalintTYPE_UPPERCASE=2; // longseed=System.nanoTime(); Randomrandom=newRandom(seed); // // StringBuilderbuilder=newStringBuilder(); for(inti=0;i<len;i++){ // inttype=random.nextInt(3);//0-2 intcvalue=0; if(TYPE_NUMBER==type){ cvalue='0'+random.nextInt(10);//0~(n-1) }elseif(TYPE_LOWERCASE==type){ cvalue='a'+random.nextInt(26);//0~(n-1) }elseif(TYPE_UPPERCASE==type){ cvalue='A'+random.nextInt(26);//0~(n-1) }else{ thrownewRuntimeException(Random.class.getName()+"#newxtInt(int);Wrong!"); } // //cvalue='A'+random.nextInt(26);//0~(n-1) // charc=(char)cvalue; if(null!=sub&&sub.length()>1){ intindex=random.nextInt(sub.length()); c=sub.charAt(index); } //Strings=String.valueOf(c); builder.append(c); // } // returnbuilder.toString(); } /** *Java自带的方法,内部实现了 *@paramall *@paramsub *@return */ publicstaticintstringIndexOf(Stringall,Stringsub){ //防御式编程 if(null==all||null==sub){ return-1; } //调用Java的String方法 returnall.indexOf(sub); } /** *BF算法 *@paramall *@paramsub *@return */ publicstaticintbfIndexOf(Stringall,Stringsub){ //防御式编程 if(null==all||null==sub){ return-1; } // intlenAll=all.length(); intlenSub=sub.length(); inti=0; while(i<lenAll){ //从i下标开始对比 intj=0; while(j<lenSub){ // charall_i=all.charAt(i); charsub_j=sub.charAt(j); if(all_i==sub_j){ //相等则继续对比下一个字符 i++; j++;//这个增长很重要 }else{ //否则跳出内层循环 break; } } //如果j增长到和lenSub相等,则匹配成功 if(lenSub==j){ returni-lenSub; }else{ i=1+i-j;//回溯i } } // return-1; } /** *KMP算法 *@paramall *@paramsub *@return */ publicstaticintkmpIndexOf(Stringall,Stringsub){ // char[]allArray=all.toCharArray(); char[]subArray=sub.toCharArray(); // returnKMP_Index(allArray,subArray); } /** *获得字符串的next函数值 * *@paramt *字符串 *@returnnext函数值 */ publicstaticint[]next(char[]t){ int[]next=newint[t.length]; next[0]=-1; inti=0; intj=-1; while(i<t.length-1){ if(j==-1||t[i]==t[j]){ i++; j++; if(t[i]!=t[j]){ next[i]=j; }else{ next[i]=next[j]; } }else{ j=next[j]; } } returnnext; } /** *KMP匹配字符串 * *@paramallArray *主串 *@paramsubArray *模式串 *@return若匹配成功,返回下标,否则返回-1 */ publicstaticintKMP_Index(char[]allArray,char[]subArray){ int[]next=next(subArray); inti=0; intj=0; while(i<=allArray.length-1&&j<=subArray.length-1){ if(j==-1||allArray[i]==subArray[j]){ i++; j++; }else{ j=next[j]; } } if(j<subArray.length){ return-1; }else returni-subArray.length;//返回模式串在主串中的头下标 } }
测试的一个结果如下所示:
-------------------------- 10次bfIndexOf=94ms 10次stringIndexOf=56ms 10次KMPIndexOf=76ms 10次allbuild=5870ms 10次alltoArray=137ms 10*3次,GC=155ms 10次,所有总计耗时:6388ms;损耗:6007ms;损耗比:94.03569192235442% -------------------------- 30次bfIndexOf=365ms 30次stringIndexOf=222ms 30次KMPIndexOf=295ms 30次allbuild=16452ms 30次alltoArray=395ms 30*3次,GC=452ms 30次,所有总计耗时:18184ms;损耗:16850ms;损耗比:92.66388033435987% -------------------------- 50次bfIndexOf;总计耗时:550ms 50次stringIndexOf;总计耗时:335ms 50次KMPIndexOf;总计耗时:450ms 50次allbuild=26501ms 50次alltoArray=637ms 50*3次,GC;总计耗时:734ms 50次,所有总计耗时:29211ms;损耗:27142ms;损耗比:92.91705179555647% --------------------------
从中可以看出来,使用StringBuilder构造字符串,800万*50次append大约消耗了26秒。换算下来每秒钟是1600万次。。。
看来是需要写一个专门计时的函数,本来Junit是有自己的统计的,但是样本不太好做。
如此看来,数据的准备,也就是测试用例的选取很关键,可能需要先生成并写入到文本文件中。