您现在的位置是:门户> 编程语言> Python

python pandas 对时间序列文件处理的实例
2021-06-09 16人围观 0条评论
简介今天小编就为大家分享一篇python pandas 对时间序列文件处理的实例,具有很好的参考价值,希望对大家有所帮助。一起跟随小编过来看看吧

    如下所示:

    import pandas as pd
    from numpy import *
    import matplotlib.pylab as plt
    import copy
    
    def read(filename):
     dat=pd.read_csv(filename,iterator=True)
     loop = True
     chunkSize = 1000000
     R=[]
     while loop:
      try:
       data = dat.get_chunk(chunkSize)
       data=data.loc[:,'B':'C'] # 切片
       data=data[data.B==855]  #条件选择
       data['C']=pd.to_datetime(data['C']) # 转换成时间格式
       data=data.set_index(['C'])    # 设置索引
       data.loc[:,'D']=array([1]*len(data)) #增加一列
       data=data.resample('D').sum() #按天求和
       data=data.loc[:,'D'] #截取
       data.fillna(0) #填充缺失值
       R.append(data)
      except StopIteration:
       loop = False
       print ("Iteration is stopped.")
     R.to_csv('855_pay.csv') # 保存
    
    def read2(filename):
     reader=pd.read_csv(filename,iterator=True)
     loop = True
     chunkSize = 100000
     chunks = []
     while loop:
      try:
       chunk = reader.get_chunk(chunkSize)
       chunks.append(chunk)
      except StopIteration:
       loop = False
       print ("Iteration is stopped.")
     df = pd.concat(chunks, ignore_index=True)
     return df
    
    def read3save(filename):
     dat=pd.read_csv(filename)
     #data = dat.get_chunk(chunkSize)
     data=dat.loc[:,'B':'C'] # 切片
     data=data[data.B==855]#条件选择
     print(shape(data))
     data['C']=pd.to_datetime(data['C']) # 转换成时间格式
     data=data.set_index(['C'])# 设置索引
     if len(data)==0:
      return
     data.loc[:,'D']=array([1]*len(data)) #增加一列
     data=data.resample('D').sum() #按天求和
     data=data.loc[:,'D'] #截取
     data.fillna(0) #填充缺失值
     data.to_csv('855_pay.csv',mode='a') # 保存
    
    def loadDataSet(fileName, delim='\t'):
     fr = open(fileName)
     stringArr = [line.strip().split(delim) for line in fr.readlines()]
     datArr = [list(map(float,line)) for line in stringArr]
     return mat(datArr)
    
    def getShopData():
     fr = open('shopInfo.txt')
     shopID = [line.strip().split('\n') for line in fr.readlines()]
     # datArr = [list(map(float,line))for line in stringArr]
     for i in range(1,9):
      name="user_pay.001.00%d"%i
      dat=pd.read_csv(name)
      #data = dat.get_chunk(chunkSize)
      data=dat.loc[:,'B':'C'] # 切片
      for factor in shopID:
       data=data[data.B==int(str(factor[0]))]#条件选择
       print(shape(data))
       if len(data)==0: continue
       data['C']=pd.to_datetime(data['C']) # 转换成时间格式
       data=data.set_index(['C'])# 设置索引
       data.loc[:,'D']=array([1]*len(data)) #增加一列
       data=data.resample('D').sum() #按天求和
       data=data.loc[:,'D'] #截取
       data.fillna(0) #填充缺失值
       s=str(factor[0])
       savename='D:\python\data\%s_pay.csv'%s
       data.to_csv(savename,mode='a') # 保存
       del dat
     print("over")
    
    def tset(filename):
     dat=pd.read_csv(filename)
     #data = dat.get_chunk(chunkSize)
     data=dat.loc[:,'B':'C'] # 切片
     data=data[data.B==855]#条件选择
     print(shape(data))
     data['C']=pd.to_datetime(data['C']) # 转换成时间格式
     data=data.set_index(['C'])# 设置索引
     if len(data)==0:
      return
     data.loc[:,'D']=array([1]*len(data)) #增加一列
     data=data.resample('D').sum() #按天求和
     data=data.loc[:,'D'] #截取
     data.fillna(0) #填充缺失值
     #data.to_csv('855_pay.csv',mode='a') # 保存
     s='my'
     savename='D:\python\data\%s_pay.csv'%s
     data.to_csv(savename,mode='a') # 保存
      
    def getShopData2(filename):
      import csv
     # fr = open('shopInfo.txt')
      # shopID = [line.strip().split('\n') for line in fr.readlines()]
     # datArr = [list(map(float,line))for line in stringArr]
     #for i in range(1,9):
     #name="user_pay.001.00%d"%i
      dat=pd.read_csv(filename)
      #data = dat.get_chunk(chunkSize)
      data=dat.loc[:,'B':'C'] # 切片
      data['C']=pd.to_datetime(data['C']) # 转换成时间格式
      data=data.set_index(['C'])# 设置索引
      data.loc[:,'D']=array([1]*len(data)) #增加一列
      for i in range(1,2001):
       d=copy.copy(data)
       d=d[data.B==i]#条件选择
       #print(shape(d))
       print(i)
       if len(d)==0: continue
       d=d.resample('D').sum() #按天求和
       d=d.loc[:,'D'] #截取
       d.fillna(0) #填充缺失值
       s=str(i)
       #print(s)
       savename='D:\python\data2\%s_pay.csv'%s
       c=open(savename,'a')
       writer=csv.writer(c)
       writer.writerow(['C','D'])
       c.close()
       d.to_csv(savename,mode='a') # 保存
       # del dat
       print("over")
    def formatData():
      #fr = open('shopInfo.txt')
      #shopID = [line.strip().split('\n') for line in fr.readlines()]
     # datArr = [list(map(float,line))for line in stringArr]
      #data = dat.get_chunk(chunkSize)
      for i in range(1,2001):
       s=str(i)
       print(s)
       name='D:\python\data2\%s_pay.csv'%s
       dat=pd.read_csv(name)
       data['C']=pd.to_datetime(data['C']) # 转换成时间格式
       data=data.set_index(['C'])# 设置索引
       data=data.resample('D').sum() #按天求和
       data.fillna(0) #填充缺失值
       savename='D:\python\data3\%s_pay.csv'%s
       data.to_csv(savename,mode='w') # 保存
       del dat
       print("over")
    

    以上这篇python pandas 对时间序列文件处理的实例就是小编分享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持脚本之家。

分享:

文章评论

    • wxpython 学习笔记 第一天
    • python文本数据处理学习笔记详解