python查找指定具有相同内容文件的方法
本文实例讲述了python查找指定具有相同内容文件的方法。分享给大家供大家参考。具体如下:
python代码用于查找指定具有相同内容的文件,可以同时指定多个目录
调用方式:pythondoublesdetector.pyc:\;d:\;e:\>doubles.txt
#Hello,thisscriptiswritteninPython-http://www.python.org #doublesdetector.py1.0p importos,os.path,string,sys,sha message=""" doublesdetector.py1.0p Thisscriptwillsearchforfilesthatareidentical (whatevertheirname/date/time). Syntax:python%s<directories> where<directories>isadirectoryoralistofdirectories separatedbyasemicolon(;) Examples:python%sc:\windows python%sc:\;d:\;e:\>doubles.txt python%sc:\programfiles>doubles.txt Thisscriptispublicdomain.Feelfreetoreuseandtweakit. TheauthorofthisscriptSebastienSAUVAGE<sebsauvageatsebsauvagedotnet> http://sebsauvage.net/python/ """%((sys.argv[0],)*4) deffileSHA(filepath): """ComputeSHA(SecureHashAlgorythm)ofafile. Input:filepath:fullpathandnameoffile(eg.'c:\windows\emm386.exe') Output:string:containsthehexadecimalrepresentationoftheSHAofthefile. returns'0'iffilecouldnotberead(filenotfound,noreadrights...) """ try: file=open(filepath,'rb') digest=sha.new() data=file.read(65536) whilelen(data)!=0: digest.update(data) data=file.read(65536) file.close() except: return'0' else: returndigest.hexdigest() defdetectDoubles(directories): fileslist={} #Groupallfilesbysize(inthefileslistdictionnary) fordirectoryindirectories.split(';'): directory=os.path.abspath(directory) sys.stderr.write('Scanningdirectory'+directory+'...') os.path.walk(directory,callback,fileslist) sys.stderr.write('\n') sys.stderr.write('Comparingfiles...') #Removekeys(filesize)inthedictionnarywhichhaveonly1file for(filesize,listoffiles)infileslist.items(): iflen(listoffiles)==1: delfileslist[filesize] #NowcomputeSHAoffilesthathavethesamesize, #andgroupfilesbySHA(inthefilesshadictionnary) filessha={} whilelen(fileslist)>0: (filesize,listoffiles)=fileslist.popitem() forfilepathinlistoffiles: sys.stderr.write('.') sha=fileSHA(filepath) iffilessha.has_key(sha): filessha[sha].append(filepath) else: filessha[sha]=[filepath] iffilessha.has_key('0'): delfilessha['0'] #Removekeys(sha)inthedictionnarywhichhaveonly1file for(sha,listoffiles)infilessha.items(): iflen(listoffiles)==1: delfilessha[sha] sys.stderr.write('\n') returnfilessha defcallback(fileslist,directory,files): sys.stderr.write('.') forfileNameinfiles: filepath=os.path.join(directory,fileName) ifos.path.isfile(filepath): filesize=os.stat(filepath)[6] iffileslist.has_key(filesize): fileslist[filesize].append(filepath) else: fileslist[filesize]=[filepath] iflen(sys.argv)>1: doubles=detectDoubles("".join(sys.argv[1:])) print'Thefollowingfilesareidentical:' print'\n'.join(["----\n%s"%'\n'.join(doubles[filesha])forfileshaindoubles.keys()]) print'----' else: printmessage
希望本文所述对大家的Python程序设计有所帮助。