python处理excel需要用到xlrd和xlwt这两个库,xlrd是读excel,xlwt是写excel的库。在pycharm里可直接下载。
各种常用函数
table = data.sheets()[0] # 获取一个工作,通过索引获取
table = data.sheet_by_index(0) #此形式也可
table = data.sheet_by_name(u'shheet 1') # 按名字索引
table.row_values(i) # 获取整行的值返回数组
table.col_values(i)# 获取整列的值返回数组
获取行数列数用table.nrows和table.ncols
获取单元格table.cell(0, 0).value
实际代码
#选取特定列
from datetime import date
from xlrd import open_workbook, xldate_as_tuple
from xlwt import Workbook
input_file = 'C:/Users/AwesomeRick/Desktop/Study/机器学习/csv文件/sales_2013.xlsx'
output_file = 'C:/Users/AwesomeRick/Desktop/new 1.xls'
output_workbook = Workbook()
output_worksheet = output_workbook.add_sheet('jan_2013_output')
sale_amount_column_index = 3
with open_workbook(input_file) as workbook:
worksheet = workbook.sheet_by_name('january_2013')
data = []
header = worksheet.row_values(0)
data.append(header)
for row_index in range(1,worksheet.nrows):
row_list = []
sale = worksheet.cell_value(row_index,sale_amount_column_index)
sale_amount = float(str(sale).strip('$').replace(',','')) #去掉$和“,”把剩下的字符串变为浮点型数字
if sale_amount>1400.0:
for column_index in range(worksheet.ncols):
cell_value = worksheet.cell_value(row_index, column_index)
cell_type = worksheet.cell_type(row_index,column_index)
if cell_type == 3:
date_cell = xldate_as_tuple(cell_value,workbook.datemode)
date_cell = date(*date_cell[0:3]).strftime('%m%d%Y')
row_list.append(date_cell)
else:
row_list.append(cell_value)
if row_list:
data.append(row_list)
for list_index, output_list in enumerate(data):
for element_index, element in enumerate(output_list):
output_worksheet.write(list_index, element_index, element)
output_workbook.save(output_file)
其中,cell_type:0 empty,1 string,2 number, 3 date,4 boolean,5 error
即date的cell_type=3,这时需要使用xlrd的xldate_as_tuple来处理为date格式,先判断表格的cell_type=3时date_cell才能开始操作。