PostgreSQL模糊匹配走索引的操作
场景lower(name)like'pf%'
createtableusers(idintprimarykey,namevarchar(255)); Createorreplacefunctionrandom_string(lengthinteger)returnstextas $$ declare charstext[]:='{0,1,2,3,4,5,6,7,8,9,A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,Y,Z,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z}'; resulttext:=''; iinteger:=0; begin iflength<0then raiseexception'Givenlengthcannotbelessthan0'; endif; foriin1..lengthloop result:=result||chars[1+random()*(array_length(chars,1)-1)]; endloop; returnresult; end; $$languageplpgsql; insertintousersvalues(generate_series(1,50000),random_string(15));
普通bt:不走索引
pg_trgm模块提供函数和操作符测定字母数字文本基于三元模型匹配的相似性,还有支持快速搜索相似字符串的索引操作符类。三元模型是一组从一个字符串中获得的三个连续的字符。我们可以通过计数两个字符串共享的三元模型的数量来测量它们的相似性。这个简单的想法证明在测量许多自然语言词汇的相似性时是非常有效的。
CREATEINDEXusers_idx0ONusers(name);
全字匹配查询(走索引)
explainselect*fromuserswherename='pfDNQVmhqDrF1EY'; QUERYPLAN ------------------------------------------------------------------------- IndexScanusingusers_idx0onusers(cost=0.29..8.31rows=1width=20) IndexCond:((name)::text='pfDNQVmhqDrF1EY'::text) (2rows)
加函数全字匹配(不走索引)
explainselect*fromuserswherelower(name)='pfDNQVmhqDrF1EY'; QUERYPLAN ----------------------------------------------------------- SeqScanonusers(cost=0.00..1069.00rows=250width=20) Filter:(lower((name)::text)='pfDNQVmhqDrF1EY'::text) (2rows)
模糊匹配(不走索引)
explainselect*fromuserswherenamelike'pf%'; QUERYPLAN -------------------------------------------------------- SeqScanonusers(cost=0.00..944.00rows=5width=20) Filter:((name)::text~~'pf%'::text)
explainselect*fromuserswherenamelike'pf_'; QUERYPLAN -------------------------------------------------------- SeqScanonusers(cost=0.00..944.00rows=5width=20) Filter:((name)::text~~'pf_'::text)
字段带函数的bt索引:函数走索引
dropindexusers_idx0; CREATEINDEXusers_dex1ONusers(lower(name));
加函数全字匹配(走索引)
explainselect*fromuserswherelower(name)='pfDNQVmhqDrF1EY'; QUERYPLAN --------------------------------------------------------------------------- BitmapHeapScanonusers(cost=6.23..324.34rows=250width=20) RecheckCond:(lower((name)::text)='pfDNQVmhqDrF1EY'::text) ->BitmapIndexScanonusers_dex1(cost=0.00..6.17rows=250width=0) IndexCond:(lower((name)::text)='pfDNQVmhqDrF1EY'::text) (4rows)
模糊匹配(不走索引)
explainselect*fromuserswherelower(name)like'pf%'; QUERYPLAN ----------------------------------------------------------- SeqScanonusers(cost=0.00..1069.00rows=250width=20) Filter:(lower((name)::text)~~'pf%'::text) (2rows)
声明操作符类的bt索引:like走索引
定义索引的同时可以为索引的每个字段声明一个操作符类。
CREATEINDEXnameONtable(columnopclass[sortoptions][,…]);
这个操作符类指明该索引用于该字段时要使用的操作符。
CREATEINDEXusers_dex2ONusers(lower(name)varchar_pattern_ops);
模糊匹配(走索引)
explainselect*fromuserswherelower(name)like'pf%'; QUERYPLAN ------------------------------------------------------------------------------------------------------ BitmapHeapScanonusers(cost=4.82..144.00rows=5width=20) Filter:(lower((name)::text)~~'pf%'::text) ->BitmapIndexScanonusers_dex2(cost=0.00..4.82rows=53width=0) IndexCond:((lower((name)::text)~>=~'pf'::text)AND(lower((name)::text)~<~'pg'::text)) (4rows)
场景2namelike'%pf%'
Createorreplacefunctionrandom_string(lengthinteger)returnstextas $$ declare charstext[]:='{0,1,2,3,4,5,6,7,8,9,A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,Y,Z,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z}'; resulttext:=''; iinteger:=0; begin iflength<0then raiseexception'Givenlengthcannotbelessthan0'; endif; foriin1..lengthloop result:=result||chars[1+random()*(array_length(chars,1)-1)]; endloop; returnresult; end; $$languageplpgsql; createtableusers(idintprimarykey,namevarchar(255)); insertintousersvalues(generate_series(1,50000),random_string(15));
声明操作符bt:不走索引
CREATEINDEXidx_nameONusersUSINGbtree(lower(name)varchar_pattern_ops);
explain(analyzetrue,formatyaml,verbosetrue,bufferstrue)select*fromuserswherelower(name)like'%pf%';\ QUERYPLAN ----------------------------------------------------------- -Plan:+ NodeType:"SeqScan"+ ParallelAware:false+ RelationName:"users"+ Schema:"public"+ Alias:"users"+ StartupCost:0.00+ TotalCost:1069.00+ PlanRows:5+ PlanWidth:20+ ActualStartupTime:0.320+ ActualTotalTime:86.841+ ActualRows:710+ ActualLoops:1+ Output:+ -"id"+ -"name"+ Filter:"(lower((users.name)::text)~~'%pf%'::text)"+ RowsRemovedbyFilter:49290+ SharedHitBlocks:319+ SharedReadBlocks:0+ SharedDirtiedBlocks:0+ SharedWrittenBlocks:0+ LocalHitBlocks:0+ LocalReadBlocks:0+ LocalDirtiedBlocks:0+ LocalWrittenBlocks:0+ TempReadBlocks:0+ TempWrittenBlocks:0+ PlanningTime:0.188+ Triggers:+ ExecutionTime:86.975
声明pg_trgm操作符bt:可以走索引
CREATEEXTENSIONpg_trgm; CREATEINDEXidx_users_name_trgm_gistONusersUSINGgist(namegist_trgm_ops);
explain(analyzetrue,verbosetrue,bufferstrue)select*fromuserswherenamelike'%pf%'; QUERYPLAN ------------------------------------------------------------------------------------------------------------------------------------------ BitmapHeapScanonpublic.users(cost=32.19..371.08rows=505width=20)(actualtime=19.314..53.132rows=193loops=1) Output:id,name RecheckCond:((users.name)::text~~'%pf%'::text) RowsRemovedbyIndexRecheck:49807 HeapBlocks:exact=319 Buffers:sharedhit=972 ->BitmapIndexScanonidx_users_name_trgm_gist(cost=0.00..32.06rows=505width=0)(actualtime=19.175..19.175rows=50000loops=1) IndexCond:((users.name)::text~~'%pf%'::text) Buffers:sharedhit=653 Planningtime:0.188ms Executiontime:53.231ms (11rows)
以上为个人经验,希望能给大家一个参考,也希望大家多多支持毛票票。如有错误或未考虑完全的地方,望不吝赐教。