每天的数据差不多是这样的:

抓取网页数据,进行保存:

import urllibimport timeimport calendaryear_list=[2016]month_list=[1,2,3,4,5,6,7,8,9,10,11,12]for year in year_list:    if year==2015:        for month in month_list[6:]:                                     days=range(calendar.monthrange(year, month)[1]+1)[1:]            for day in days:                                date1=str(year)+"-"+str(month)+"-"+str(day)                date2=time.strptime(date1, "%Y-%m-%d")                datestring=time.strftime("%Y-%m-%d",date2)                url="https://myview.chinanetcenter.com/api/bandwidth-channel.action?u=howbuy&p=Howbuy123&date="+datestring                filename = urllib.urlretrieve(url,filename="E:\\xml\\2015\\"+datestring+".xml") #modify path                time.sleep(5)    else:                            days=days=range(calendar.monthrange(year, 6)[1]+1)[7:]        for day in days:                            date1=str(year)+"-"+str(6)+"-"+str(day)            date2=time.strptime(date1, "%Y-%m-%d")            datestring=time.strftime("%Y-%m-%d",date2)            url="https://myview.chinanetcenter.com/api/bandwidth-channel.action?u=howbuy&p=Howbuy123&date="+datestring            filename = urllib.urlretrieve(url,filename="E:\\xml\\2016\\"+datestring+".xml") #modify path            time.sleep(5)

处理每天数据的平均值,最大值,最小值一直大于130的值统计处理,并且生成xls表格

import xml.dom.minidomimport urllibimport timeimport calendarimport osimport xlrdimport xlwtfrom xlwt import *value_list=[]datalist=[]data=()for path,dir,filenames in os.walk("E:\\zh\\"):#modify path    print filenames    print path    print dir    for filename in filenames:                dom = xml.dom.minidom.parse("E:\\zh\\"+filename) #modify path        root = dom.documentElement        bands = root.getElementsByTagName('bandwidth')            for i in range(bands.length):                    bandschild=bands[i]            bandschildvalue=float(bandschild.firstChild.data)            value_list.append(bandschildvalue)        vmax=max(value_list)        #average=reduce(lambda x,y: x+y, value_list)/len(value_list)        average=sum(value_list)/len(value_list)            data=(filename.strip(".xml"),vmax,average)        datalist.append(data)        value_list=[]        data=()        file = xlwt.Workbook()        table = file.add_sheet('tongji')        for i in range(len(datalist)):            daydate,daymax,dayv=datalist[i]            table.write(i,0,daydate)            table.write(i,1,daymax)            table.write(i,2,dayv)        file.save('E:\\mini6.xls') #modify path
import xml.dom.minidomimport urllibimport timeimport calendarimport osimport xlrdimport xlwtfrom xlwt import *value_list=[]datalist=[]data=()for path,dir,filenames in os.walk("E:\\zh\\2016\\"):#modify path        for filename in filenames:                dom = xml.dom.minidom.parse("E:\\zh\\2016\\"+filename) #modify path        root = dom.documentElement        bands = root.getElementsByTagName('bandwidth')            for i in range(bands.length):                    bandschild=bands[i]            bandschildvalue=float(bandschild.firstChild.data)                                                        value_list.append(bandschildvalue)        #vmax=max(value_list)        #average=reduce(lambda x,y: x+y, value_list)/len(value_list)        #average=sum(value_list)/len(value_list)        for ii in value_list:                        data=(filename.strip(".xml"),ii)            datalist.append(data)            data=()        value_list=[]               file = xlwt.Workbook()        table = file.add_sheet('tongji')        for iii in range(len(datalist)):            daydate,daycount=datalist[iii]            table.write(iii,0,daydate)            table.write(iii,1,daycount)                    file.save('E:\\mini14.xls') #modify path