Pandas中DataFrame基本函数整理(小结)
构造函数
DataFrame([data,index,columns,dtype,copy])#构造数据框
属性和数据
DataFrame.axes#index:行标签;columns:列标签 DataFrame.as_matrix([columns])#转换为矩阵 DataFrame.dtypes#返回数据的类型 DataFrame.ftypes#返回每一列的数据类型float64:dense DataFrame.get_dtype_counts()#返回数据框数据类型的个数 DataFrame.get_ftype_counts()#返回数据框数据类型float64:dense的个数 DataFrame.select_dtypes([include,include])#根据数据类型选取子数据框 DataFrame.values#Numpy的展示方式 DataFrame.axes#返回横纵坐标的标签名 DataFrame.ndim#返回数据框的纬度 DataFrame.size#返回数据框元素的个数 DataFrame.shape#返回数据框的形状 DataFrame.memory_usage()#每一列的存储
类型转换
DataFrame.astype(dtype[,copy,errors])#转换数据类型 DataFrame.copy([deep])#deep深度复制数据 DataFrame.isnull()#以布尔的方式返回空值 DataFrame.notnull()#以布尔的方式返回非空值
索引和迭代
DataFrame.head([n])#返回前n行数据 DataFrame.at#快速标签常量访问器 DataFrame.iat#快速整型常量访问器 DataFrame.loc#标签定位,使用名称 DataFrame.iloc#整型定位,使用数字 DataFrame.insert(loc,column,value)#在特殊地点loc[数字]插入column[列名]某列数据 DataFrame.iter()#Iterateoverinforaxis DataFrame.iteritems()#返回列名和序列的迭代器 DataFrame.iterrows()#返回索引和序列的迭代器 DataFrame.itertuples([index,name])#IterateoverDataFramerowsasnamedtuples,withindexvalueasfirstelementofthetuple. DataFrame.lookup(row_labels,col_labels)#Label-based“fancyindexing”functionforDataFrame. DataFrame.pop(item)#返回删除的项目 DataFrame.tail([n])#返回最后n行 DataFrame.xs(key[,axis,level,drop_level])#Returnsacross-section(row(s)orcolumn(s))fromtheSeries/DataFrame. DataFrame.isin(values)#是否包含数据框中的元素 DataFrame.where(cond[,other,inplace,…])#条件筛选 DataFrame.mask(cond[,other,inplace,…])#ReturnanobjectofsameshapeasselfandwhosecorrespondingentriesarefromselfwherecondisFalseandotherwisearefromother. DataFrame.query(expr[,inplace])#Querythecolumnsofaframewithabooleanexpression.
二元运算
DataFrame.add(other[,axis,fill_value])#加法,元素指向 DataFrame.sub(other[,axis,fill_value])#减法,元素指向 DataFrame.mul(other[,axis,fill_value])#乘法,元素指向 DataFrame.div(other[,axis,fill_value])#小数除法,元素指向 DataFrame.truediv(other[,axis,level,…])#真除法,元素指向 DataFrame.floordiv(other[,axis,level,…])#向下取整除法,元素指向 DataFrame.mod(other[,axis,fill_value])#模运算,元素指向 DataFrame.pow(other[,axis,fill_value])#幂运算,元素指向 DataFrame.radd(other[,axis,fill_value])#右侧加法,元素指向 DataFrame.rsub(other[,axis,fill_value])#右侧减法,元素指向 DataFrame.rmul(other[,axis,fill_value])#右侧乘法,元素指向 DataFrame.rdiv(other[,axis,fill_value])#右侧小数除法,元素指向 DataFrame.rtruediv(other[,axis,…])#右侧真除法,元素指向 DataFrame.rfloordiv(other[,axis,…])#右侧向下取整除法,元素指向 DataFrame.rmod(other[,axis,fill_value])#右侧模运算,元素指向 DataFrame.rpow(other[,axis,fill_value])#右侧幂运算,元素指向 DataFrame.lt(other[,axis,level])#类似Array.lt DataFrame.gt(other[,axis,level])#类似Array.gt DataFrame.le(other[,axis,level])#类似Array.le DataFrame.ge(other[,axis,level])#类似Array.ge DataFrame.ne(other[,axis,level])#类似Array.ne DataFrame.eq(other[,axis,level])#类似Array.eq DataFrame.combine(other,func[,fill_value,…])#AddtwoDataFrameobjectsanddonotpropagateNaNvalues,soiffora DataFrame.combine_first(other)#CombinetwoDataFrameobjectsanddefaulttonon-nullvaluesinframecallingthemethod.
函数应用&分组&窗口
DataFrame.apply(func[,axis,broadcast,…])#应用函数 DataFrame.applymap(func)#ApplyafunctiontoaDataFramethatisintendedtooperateelementwise,i.e. DataFrame.aggregate(func[,axis])#Aggregateusingcallable,string,dict,orlistofstring/callables DataFrame.transform(func,*args,**kwargs)#Callfunctionproducingalike-indexedNDFrame DataFrame.groupby([by,axis,level,…])#分组 DataFrame.rolling(window[,min_periods,…])#滚动窗口 DataFrame.expanding([min_periods,freq,…])#拓展窗口 DataFrame.ewm([com,span,halflife,…])#指数权重窗口
描述统计学
DataFrame.abs()#返回绝对值 DataFrame.all([axis,bool_only,skipna])#ReturnwhetherallelementsareTrueoverrequestedaxis DataFrame.any([axis,bool_only,skipna])#ReturnwhetheranyelementisTrueoverrequestedaxis DataFrame.clip([lower,upper,axis])#Trimvaluesatinputthreshold(s). DataFrame.clip_lower(threshold[,axis])#Returncopyoftheinputwithvaluesbelowgivenvalue(s)truncated. DataFrame.clip_upper(threshold[,axis])#Returncopyofinputwithvaluesabovegivenvalue(s)truncated. DataFrame.corr([method,min_periods])#返回本数据框成对列的相关性系数 DataFrame.corrwith(other[,axis,drop])#返回不同数据框的相关性 DataFrame.count([axis,level,numeric_only])#返回非空元素的个数 DataFrame.cov([min_periods])#计算协方差 DataFrame.cummax([axis,skipna])#Returncumulativemaxoverrequestedaxis. DataFrame.cummin([axis,skipna])#Returncumulativeminimumoverrequestedaxis. DataFrame.cumprod([axis,skipna])#返回累积 DataFrame.cumsum([axis,skipna])#返回累和 DataFrame.describe([percentiles,include,…])#整体描述数据框 DataFrame.diff([periods,axis])#1stdiscretedifferenceofobject DataFrame.eval(expr[,inplace])#EvaluateanexpressioninthecontextofthecallingDataFrameinstance. DataFrame.kurt([axis,skipna,level,…])#返回无偏峰度Fisher's(kurtosisofnormal==0.0). DataFrame.mad([axis,skipna,level])#返回偏差 DataFrame.max([axis,skipna,level,…])#返回最大值 DataFrame.mean([axis,skipna,level,…])#返回均值 DataFrame.median([axis,skipna,level,…])#返回中位数 DataFrame.min([axis,skipna,level,…])#返回最小值 DataFrame.mode([axis,numeric_only])#返回众数 DataFrame.pct_change([periods,fill_method])#返回百分比变化 DataFrame.prod([axis,skipna,level,…])#返回连乘积 DataFrame.quantile([q,axis,numeric_only])#返回分位数 DataFrame.rank([axis,method,numeric_only])#返回数字的排序 DataFrame.round([decimals])#RoundaDataFrametoavariablenumberofdecimalplaces. DataFrame.sem([axis,skipna,level,ddof])#返回无偏标准误 DataFrame.skew([axis,skipna,level,…])#返回无偏偏度 DataFrame.sum([axis,skipna,level,…])#求和 DataFrame.std([axis,skipna,level,ddof])#返回标准误差 DataFrame.var([axis,skipna,level,ddof])#返回无偏误差
从新索引&选取&标签操作
DataFrame.add_prefix(prefix)#添加前缀 DataFrame.add_suffix(suffix)#添加后缀 DataFrame.align(other[,join,axis,level])#Aligntwoobjectontheiraxeswiththe DataFrame.drop(labels[,axis,level,…])#返回删除的列 DataFrame.drop_duplicates([subset,keep,…])#ReturnDataFramewithduplicaterowsremoved,optionallyonly DataFrame.duplicated([subset,keep])#ReturnbooleanSeriesdenotingduplicaterows,optionallyonly DataFrame.equals(other)#两个数据框是否相同 DataFrame.filter([items,like,regex,axis])#过滤特定的子数据框 DataFrame.first(offset)#Conveniencemethodforsubsettinginitialperiodsoftimeseriesdatabasedonadateoffset. DataFrame.head([n])#返回前n行 DataFrame.idxmax([axis,skipna])#Returnindexoffirstoccurrenceofmaximumoverrequestedaxis. DataFrame.idxmin([axis,skipna])#Returnindexoffirstoccurrenceofminimumoverrequestedaxis. DataFrame.last(offset)#Conveniencemethodforsubsettingfinalperiodsoftimeseriesdatabasedonadateoffset. DataFrame.reindex([index,columns])#ConformDataFrametonewindexwithoptionalfillinglogic,placingNA/NaNinlocationshavingnovalueinthepreviousindex. DataFrame.reindex_axis(labels[,axis,…])#Conforminputobjecttonewindexwithoptionalfillinglogic,placingNA/NaNinlocationshavingnovalueinthepreviousindex. DataFrame.reindex_like(other[,method,…])#Returnanobjectwithmatchingindicestomyself. DataFrame.rename([index,columns])#Alteraxesinputfunctionorfunctions. DataFrame.rename_axis(mapper[,axis,copy])#Alterindexand/orcolumnsusinginputfunctionorfunctions. DataFrame.reset_index([level,drop,…])#ForDataFramewithmulti-levelindex,returnnewDataFramewithlabelinginformationinthecolumnsundertheindexnames,defaultingto‘level_0',‘level_1',etc. DataFrame.sample([n,frac,replace,…])#返回随机抽样 DataFrame.select(crit[,axis])#Returndatacorrespondingtoaxislabelsmatchingcriteria DataFrame.set_index(keys[,drop,append])#SettheDataFrameindex(rowlabels)usingoneormoreexistingcolumns. DataFrame.tail([n])#返回最后几行 DataFrame.take(indices[,axis,convert])#Analogoustondarray.take DataFrame.truncate([before,after,axis])#TruncatesasortedNDFramebeforeand/oraftersomeparticularindexvalue.
处理缺失值
DataFrame.dropna([axis,how,thresh,…])#Returnobjectwithlabelsongivenaxisomittedwherealternatelyany DataFrame.fillna([value,method,axis,…])#填充空值 DataFrame.replace([to_replace,value,…])#Replacevaluesgivenin‘to_replace'with‘value'.
从新定型&排序&转变形态
DataFrame.pivot([index,columns,values])#Reshapedata(producea“pivot”table)basedoncolumnvalues. DataFrame.reorder_levels(order[,axis])#Rearrangeindexlevelsusinginputorder. DataFrame.sort_values(by[,axis,ascending])#Sortbythevaluesalongeitheraxis DataFrame.sort_index([axis,level,…])#Sortobjectbylabels(alonganaxis) DataFrame.nlargest(n,columns[,keep])#GettherowsofaDataFramesortedbythenlargestvaluesofcolumns. DataFrame.nsmallest(n,columns[,keep])#GettherowsofaDataFramesortedbythensmallestvaluesofcolumns. DataFrame.swaplevel([i,j,axis])#SwaplevelsiandjinaMultiIndexonaparticularaxis DataFrame.stack([level,dropna])#Pivotalevelofthe(possiblyhierarchical)columnlabels,returningaDataFrame(orSeriesinthecaseofanobjectwithasinglelevelofcolumnlabels)havingahierarchicalindexwithanewinner-mostlevelofrowlabels. DataFrame.unstack([level,fill_value])#Pivotalevelofthe(necessarilyhierarchical)indexlabels,returningaDataFramehavinganewlevelofcolumnlabelswhoseinner-mostlevelconsistsofthepivotedindexlabels. DataFrame.melt([id_vars,value_vars,…])#“Unpivots”aDataFramefromwideformattolongformat,optionally DataFrame.T#Transposeindexandcolumns DataFrame.to_panel()#Transformlong(stacked)format(DataFrame)intowide(3D,Panel)format. DataFrame.to_xarray()#Returnanxarrayobjectfromthepandasobject. DataFrame.transpose(*args,**kwargs)#Transposeindexandcolumns
Combining&joining&merging
DataFrame.append(other[,ignore_index,…])#追加数据 DataFrame.assign(**kwargs)#AssignnewcolumnstoaDataFrame,returninganewobject(acopy)withalltheoriginalcolumnsinadditiontothenewones. DataFrame.join(other[,on,how,lsuffix,…])#JoincolumnswithotherDataFrameeitheronindexoronakeycolumn. DataFrame.merge(right[,how,on,left_on,…])#MergeDataFrameobjectsbyperformingadatabase-stylejoinoperationbycolumnsorindexes. DataFrame.update(other[,join,overwrite,…])#ModifyDataFrameinplaceusingnon-NAvaluesfrompassedDataFrame.
时间序列
DataFrame.asfreq(freq[,method,how,…])#将时间序列转换为特定的频次 DataFrame.asof(where[,subset])#ThelastrowwithoutanyNaNistaken(orthelastrowwithout DataFrame.shift([periods,freq,axis])#Shiftindexbydesirednumberofperiodswithanoptionaltimefreq DataFrame.first_valid_index()#Returnlabelforfirstnon-NA/nullvalue DataFrame.last_valid_index()#Returnlabelforlastnon-NA/nullvalue DataFrame.resample(rule[,how,axis,…])#Conveniencemethodforfrequencyconversionandresamplingoftimeseries. DataFrame.to_period([freq,axis,copy])#ConvertDataFramefromDatetimeIndextoPeriodIndexwithdesired DataFrame.to_timestamp([freq,how,axis])#CasttoDatetimeIndexoftimestamps,atbeginningofperiod DataFrame.tz_convert(tz[,axis,level,copy])#Converttz-awareaxistotargettimezone. DataFrame.tz_localize(tz[,axis,level,…])#Localizetz-naiveTimeSeriestotargettimezone.
作图
DataFrame.plot([x,y,kind,ax,….])#DataFrameplottingaccessorandmethod DataFrame.plot.area([x,y])#面积图Areaplot DataFrame.plot.bar([x,y])#垂直条形图Verticalbarplot DataFrame.plot.barh([x,y])#水平条形图Horizontalbarplot DataFrame.plot.box([by])#箱图Boxplot DataFrame.plot.density(**kwds)#核密度KernelDensityEstimateplot DataFrame.plot.hexbin(x,y[,C,…])#Hexbinplot DataFrame.plot.hist([by,bins])#直方图Histogram DataFrame.plot.kde(**kwds)#核密度KernelDensityEstimateplot DataFrame.plot.line([x,y])#线图Lineplot DataFrame.plot.pie([y])#饼图Piechart DataFrame.plot.scatter(x,y[,s,c])#散点图Scatterplot DataFrame.boxplot([column,by,ax,…])#MakeaboxplotfromDataFramecolumnoptionallygroupedbysomecolumnsor DataFrame.hist(data[,column,by,grid,…])#DrawhistogramoftheDataFrame'sseriesusingmatplotlib/pylab.
转换为其他格式
DataFrame.from_csv(path[,header,sep,…])#ReadCSVfile(DEPRECATED,pleaseusepandas.read_csv()instead). DataFrame.from_dict(data[,orient,dtype])#ConstructDataFramefromdictofarray-likeordicts DataFrame.from_items(items[,columns,orient])#Convert(key,value)pairstoDataFrame. DataFrame.from_records(data[,index,…])#ConvertstructuredorrecordndarraytoDataFrame DataFrame.info([verbose,buf,max_cols,…])#ConcisesummaryofaDataFrame. DataFrame.to_pickle(path[,compression,…])#Pickle(serialize)objecttoinputfilepath. DataFrame.to_csv([path_or_buf,sep,na_rep])#WriteDataFrametoacomma-separatedvalues(csv)file DataFrame.to_hdf(path_or_buf,key,**kwargs)#WritethecontaineddatatoanHDF5fileusingHDFStore. DataFrame.to_sql(name,con[,flavor,…])#WriterecordsstoredinaDataFrametoaSQLdatabase. DataFrame.to_dict([orient,into])#ConvertDataFrametodictionary. DataFrame.to_excel(excel_writer[,…])#WriteDataFrametoanexcelsheet DataFrame.to_json([path_or_buf,orient,…])#ConverttheobjecttoaJSONstring. DataFrame.to_html([buf,columns,col_space])#RenderaDataFrameasanHTMLtable. DataFrame.to_feather(fname)#writeoutthebinaryfeather-formatforDataFrames DataFrame.to_latex([buf,columns,…])#Renderanobjecttoatabularenvironmenttable. DataFrame.to_stata(fname[,convert_dates,…])#AclassforwritingStatabinarydtafilesfromarray-likeobjects DataFrame.to_msgpack([path_or_buf,encoding])#msgpack(serialize)objecttoinputfilepath DataFrame.to_sparse([fill_value,kind])#ConverttoSparseDataFrame DataFrame.to_dense()#ReturndenserepresentationofNDFrame(asopposedtosparse) DataFrame.to_string([buf,columns,…])#RenderaDataFrametoaconsole-friendlytabularoutput. DataFrame.to_clipboard([excel,sep])#AttempttowritetextrepresentationofobjecttothesystemclipboardThiscanbepastedintoExcel,forexample.
到此这篇关于Pandas中DataFrame基本函数整理(小结)的文章就介绍到这了,更多相关PandasDataFrame基本函数内容请搜索毛票票以前的文章或继续浏览下面的相关文章希望大家以后多多支持毛票票!