python pandas 对时间序列文件处理的实例
如下所示:
importpandasaspd
fromnumpyimport*
importmatplotlib.pylabasplt
importcopy
defread(filename):
dat=pd.read_csv(filename,iterator=True)
loop=True
chunkSize=1000000
R=[]
whileloop:
try:
data=dat.get_chunk(chunkSize)
data=data.loc[:,'B':'C']#切片
data=data[data.B==855]#条件选择
data['C']=pd.to_datetime(data['C'])#转换成时间格式
data=data.set_index(['C'])#设置索引
data.loc[:,'D']=array([1]*len(data))#增加一列
data=data.resample('D').sum()#按天求和
data=data.loc[:,'D']#截取
data.fillna(0)#填充缺失值
R.append(data)
exceptStopIteration:
loop=False
print("Iterationisstopped.")
R.to_csv('855_pay.csv')#保存
defread2(filename):
reader=pd.read_csv(filename,iterator=True)
loop=True
chunkSize=100000
chunks=[]
whileloop:
try:
chunk=reader.get_chunk(chunkSize)
chunks.append(chunk)
exceptStopIteration:
loop=False
print("Iterationisstopped.")
df=pd.concat(chunks,ignore_index=True)
returndf
defread3save(filename):
dat=pd.read_csv(filename)
#data=dat.get_chunk(chunkSize)
data=dat.loc[:,'B':'C']#切片
data=data[data.B==855]#条件选择
print(shape(data))
data['C']=pd.to_datetime(data['C'])#转换成时间格式
data=data.set_index(['C'])#设置索引
iflen(data)==0:
return
data.loc[:,'D']=array([1]*len(data))#增加一列
data=data.resample('D').sum()#按天求和
data=data.loc[:,'D']#截取
data.fillna(0)#填充缺失值
data.to_csv('855_pay.csv',mode='a')#保存
defloadDataSet(fileName,delim='\t'):
fr=open(fileName)
stringArr=[line.strip().split(delim)forlineinfr.readlines()]
datArr=[list(map(float,line))forlineinstringArr]
returnmat(datArr)
defgetShopData():
fr=open('shopInfo.txt')
shopID=[line.strip().split('\n')forlineinfr.readlines()]
#datArr=[list(map(float,line))forlineinstringArr]
foriinrange(1,9):
name="user_pay.001.00%d"%i
dat=pd.read_csv(name)
#data=dat.get_chunk(chunkSize)
data=dat.loc[:,'B':'C']#切片
forfactorinshopID:
data=data[data.B==int(str(factor[0]))]#条件选择
print(shape(data))
iflen(data)==0:continue
data['C']=pd.to_datetime(data['C'])#转换成时间格式
data=data.set_index(['C'])#设置索引
data.loc[:,'D']=array([1]*len(data))#增加一列
data=data.resample('D').sum()#按天求和
data=data.loc[:,'D']#截取
data.fillna(0)#填充缺失值
s=str(factor[0])
savename='D:\python\data\%s_pay.csv'%s
data.to_csv(savename,mode='a')#保存
deldat
print("over")
deftset(filename):
dat=pd.read_csv(filename)
#data=dat.get_chunk(chunkSize)
data=dat.loc[:,'B':'C']#切片
data=data[data.B==855]#条件选择
print(shape(data))
data['C']=pd.to_datetime(data['C'])#转换成时间格式
data=data.set_index(['C'])#设置索引
iflen(data)==0:
return
data.loc[:,'D']=array([1]*len(data))#增加一列
data=data.resample('D').sum()#按天求和
data=data.loc[:,'D']#截取
data.fillna(0)#填充缺失值
#data.to_csv('855_pay.csv',mode='a')#保存
s='my'
savename='D:\python\data\%s_pay.csv'%s
data.to_csv(savename,mode='a')#保存
defgetShopData2(filename):
importcsv
#fr=open('shopInfo.txt')
#shopID=[line.strip().split('\n')forlineinfr.readlines()]
#datArr=[list(map(float,line))forlineinstringArr]
#foriinrange(1,9):
#name="user_pay.001.00%d"%i
dat=pd.read_csv(filename)
#data=dat.get_chunk(chunkSize)
data=dat.loc[:,'B':'C']#切片
data['C']=pd.to_datetime(data['C'])#转换成时间格式
data=data.set_index(['C'])#设置索引
data.loc[:,'D']=array([1]*len(data))#增加一列
foriinrange(1,2001):
d=copy.copy(data)
d=d[data.B==i]#条件选择
#print(shape(d))
print(i)
iflen(d)==0:continue
d=d.resample('D').sum()#按天求和
d=d.loc[:,'D']#截取
d.fillna(0)#填充缺失值
s=str(i)
#print(s)
savename='D:\python\data2\%s_pay.csv'%s
c=open(savename,'a')
writer=csv.writer(c)
writer.writerow(['C','D'])
c.close()
d.to_csv(savename,mode='a')#保存
#deldat
print("over")
defformatData():
#fr=open('shopInfo.txt')
#shopID=[line.strip().split('\n')forlineinfr.readlines()]
#datArr=[list(map(float,line))forlineinstringArr]
#data=dat.get_chunk(chunkSize)
foriinrange(1,2001):
s=str(i)
print(s)
name='D:\python\data2\%s_pay.csv'%s
dat=pd.read_csv(name)
data['C']=pd.to_datetime(data['C'])#转换成时间格式
data=data.set_index(['C'])#设置索引
data=data.resample('D').sum()#按天求和
data.fillna(0)#填充缺失值
savename='D:\python\data3\%s_pay.csv'%s
data.to_csv(savename,mode='w')#保存
deldat
print("over")
以上这篇pythonpandas对时间序列文件处理的实例就是小编分享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持毛票票。