因为每个月会分析部门内部各个小团队的平均加班时长,以评估大家的工作量,每次手工分析纯粹是重复逻辑工作,做多了之后便觉得枯燥无味。于是想到了利用python写一个脚本来解放自己。
#coding:utf-8
from openpyxl import Workbook
from openpyxl import load_workbook
from pandas import Series,DataFrame
import pandas as pd
import numpy as np,pandas as pd
import xlrd
df = pd.read_excel(r'E:\pythonstudy\excel\data\sample.xlsx','Worksheet',encoding='gbk')
#print(df[u'加班时长(分)'])
#df01 = pd.read_excel(r'E:\pythonstudy\excel\data\9.et','Worksheet',encoding='gbk')
total_OT = df[u'加班时长(分)'].sum() #计算加班时长列的总和
print(total_OT)
df_name = df[u'姓名'].drop_duplicates() #剔除姓名的重复数据
df_name_total = df_name.count() #统计部门总人数
print('total_num:',df_name_total)
team_dz_cp01 = df[df[u'所在中心'] == u'XX事业部/测试部/XX测试部/XX测试一组'] #按小组筛选数据
team_dz_cp01_OT = team_dz_cp01[u'加班时长(分)'].sum() #产品测试一组加班时长统计
print(team_dz_cp01_OT)
name_dz_cp01 = team_dz_cp01[u'姓名'].drop_duplicates() #XX测试一组按姓名剔除重复数据
name_dz_cp01_num = name_dz_cp01.count() #统计XX测试一组人数
print(name_dz_cp01_num)
team_dz_cp02 = df[df[u'所在中心'] == u'XX事业部/测试部/XX测试部/XX测试二组'] #按小组筛选数据
team_dz_cp02_OT = team_dz_cp02[u'加班时长(分)'].sum() #筛选出的小组数据中,按加班时长列求和,统计加班时长
print(team_dz_cp02_OT)
name_dz_cp02 = team_dz_cp02[u'姓名'].drop_duplicates()
name_dz_cp02_num = name_dz_cp02.count() #统计XX测试二组人数
print(name_dz_cp02_num)
team_dz_yy = df[df[u'所在中心'] == u'XX事业部/测试部/XX测试部/XX测试组']
team_dz_yy_OT = team_dz_yy[u'加班时长(分)'].sum()
print(team_dz_yy_OT)
name_dz_yy = team_dz_yy[u'姓名'].drop_duplicates()
name_dz_yy_num = name_dz_yy.count() - 1 #统计XX测试组人数,减掉一个休产假人员
print('yy:',name_dz_yy_num)
team_dz_xxm = df[df[u'所在中心'] == u'XX事业部/测试部/XX测试部/XX测试组']
team_dz_xxm_OT = team_dz_xxm[u'加班时长(分)'].sum()
print(team_dz_xxm_OT)
name_dz_xxm = team_dz_xxm[u'姓名'].drop_duplicates()
name_dz_xxm_num = name_dz_xxm.count() #统计XX测试组人数
print(name_dz_xxm_num)
team_dz_gj = df[df[u'所在中心'] == u'XX事业部/测试部/XX测试部/XX测试组']
team_dz_gj_OT = team_dz_gj[u'加班时长(分)'].sum()
print(team_dz_gj_OT)
name_dz_gj = team_dz_gj[u'姓名'].drop_duplicates()
name_dz_gj_num = name_dz_gj.count() #统计XX测试组人数
print(name_dz_gj_num)
#XX休产假
team_dz_leader = df[df[u'所在中心'] == u'XX事业部/测试部/XX测试部']
team_dz_leader_OT= team_dz_leader[u'加班时长(分)'].sum() #由于休产假,加班时长为Nan
print('dz leader',team_dz_leader_OT)
name_dz_leader = team_dz_leader[u'姓名'].drop_duplicates()
name_dz_leader_num = name_dz_leader.count() - 1 #休产假,人数减1
print(name_dz_leader_num)
team_dz_OT = sum([team_dz_cp01_OT,team_dz_cp02_OT,team_dz_xxm_OT,team_dz_gj_OT,
team_dz_yy_OT]) #XX测试部加班时长统计
print(team_dz_OT)
team_dz_num = sum([name_dz_gj_num,name_dz_xxm_num,name_dz_yy_num,name_dz_cp02_num,
name_dz_cp01_num],name_dz_leader_num) #XX测试部人数统计
print(team_dz_num)
team_qp_dfqp = df[df[u'所在中心'] == u'XX事业部/测试部/XX测试部/XX测试组']
team_qp_dfqp_OT = team_qp_dfqp[u'加班时长(分)'].sum()
print(team_qp_dfqp_OT)
name_qp_dfqp = team_qp_dfqp[u'姓名'].drop_duplicates()
name_qp_dfqp_num = name_qp_dfqp.count() #统计XX测试组人数
print(name_qp_dfqp_num)
team_qp_jg = df[df[u'所在中心'] == u'XX事业部/测试部/XX测试部/XX测试组']
team_qp_jg_OT = team_qp_jg[u'加班时长(分)'].sum()
print(team_qp_jg_OT)
name_qp_jg = team_qp_jg[u'姓名'].drop_duplicates()
name_qp_jg_num = name_qp_jg.count() #统计XX测试组人数
print(name_qp_jg_num)
team_qp_ljf = df[df[u'所在中心'] == u'XX事业部/测试部/XX测试部/XX测试组']
team_qp_ljf_OT = team_qp_ljf[u'加班时长(分)'].sum()
print(team_qp_ljf_OT)
name_qp_ljf = team_qp_ljf[u'姓名'].drop_duplicates()
name_qp_ljf_num = name_qp_ljf.count() #统计XX测试组人数
print(name_qp_ljf_num)
team_qp_jd = df[df[u'所在中心'] == u'XX事业部/测试部/XX测试部/XX测试组']
team_qp_jd_OT = team_qp_jd[u'加班时长(分)'].sum()
print(team_qp_jd_OT)
name_qp_jd = team_qp_jd[u'姓名'].drop_duplicates()
name_qp_jd_num = name_qp_jd.count() #统计XX测试组人数
print(name_qp_jd_num)
team_qp_gg = df[df[u'所在中心'] == u'XX事业部/测试部/XX测试部/XX测试组']
team_qp_gg_OT = team_qp_gg[u'加班时长(分)'].sum()
print(team_qp_gg_OT)
name_qp_gg = team_qp_gg[u'姓名'].drop_duplicates()
name_qp_gg_num = name_qp_gg.count() #统计XX测试组人数
print(name_qp_gg_num)
team_qp_leader = df[df[u'所在中心'] == u'XX事业部/测试部/XX测试部']
team_qp_leader_OT = team_qp_leader[u'加班时长(分)'].sum()
print(team_qp_leader_OT)
name_qp_leader = team_qp_leader[u'姓名'].drop_duplicates()
name_qp_leader_num = name_qp_leader.count()
print(name_qp_leader_num)
team_qp_OT = sum([team_qp_leader_OT,team_qp_dfqp_OT,team_qp_jg_OT,team_qp_gg_OT,
team_qp_ljf_OT,team_qp_jd_OT]) #XX测试部加班时长总和
print(team_qp_OT)
team_qp_num = sum([name_qp_jg_num,name_qp_gg_num,name_qp_jd_num,name_qp_ljf_num,
name_qp_leader_num,name_qp_dfqp_num]) #XX测试部人数统计
team_zdh = df[df[u'所在中心'] == u'XX事业部/测试部/XX开发组']
team_zdh_OT = team_zdh[u'加班时长(分)'].sum()
print(team_zdh_OT)
name_zdh = team_zdh[u'姓名'].drop_duplicates()
name_zdh_num = name_zdh.count() #统计XX测试组人数
print(name_zdh_num)
date = df[df[u'工作类型'] == u'工作日'] #筛选工作日天数
date01 = date[u'日期'].drop_duplicates() #去掉工作日天数中的重复数据
date_num = date01.count() #工作类型为工作日的数据统计
print(date_num)
#判断当月是否有法定节假日
#re = df[u'加班时长(分)'].isnull().count() #统计备注列表为空值的数量,isnull()函数没有发挥作用(why?)
re = sum(df[u'备注'].isnull()) #统计备注列表为空值的数量
print('re:',re)
re_list = df[u'姓名'].count() #统计任意列的总行数
print('re_list:',re_list)
if re ==re_list: #判断备注列表的空值数量是否等于总行数,是,则没有法定节假日
#re = df[u'备注'].isnull() #统计备注列表为空值的数量
#if True in re: #只能判断备注列表中包含空值,计算结果不准确
date_fd_num = 0 #当月法定节假日为0
else:
date_fd = df[df[u'备注'] == u'法定节假日'] #筛选法定节假日
date_fd01 = date_fd[u'日期'].drop_duplicates()
date_fd_num = date_fd01.count() #工作类型为法定节假日的数据统计
print(date_fd_num)
data_total = date_num - date_fd_num #计算当月的工作日天数
print('workday:',data_total)
wb = load_workbook(r'E:\pythonstudy\excel\data\sample.xlsx') #加载excel表
print wb.get_sheet_names() #打印各表的表名
ws3 = wb.create_sheet("sheet2") #创建新表
ws3["A1"] = '部门'
ws3["B1"] = '加班总和' #单元格赋值
ws3["C1"] = '人数'
ws3["D1"] = '工作日天数'
ws3["E1"] = '平均加班时长(分)'
ws3.column_dimensions['A'].width = 30 #设置列宽
ws3.column_dimensions['B'].width = 10
ws3.column_dimensions['C'].width = 10
ws3.column_dimensions['D'].width = 20
ws3.column_dimensions['E'].width = 20
ws3["A2"] = '测试部'
ws3["A3"] = 'XX测试部'
ws3["A4"] = 'XX测试部\XX测试一组'
ws3["A5"] = 'XX测试部\XX测试二组'
ws3["A6"] = 'XX测试部\XX测试组'
ws3["A7"] = 'XX测试部\XX测试组'
ws3["A8"] = 'XX测试部\XX测试组'
ws3["A9"] = 'XX测试部'
ws3["A10"] = 'XX测试部\XX棋牌测试组'
ws3["A11"] = 'XX测试部\XX测试组'
ws3["A12"] = 'XX测试部\XX测试组'
ws3["A13"] = 'XX测试部\XX测试组'
ws3["A14"] = 'XX测试部\XX测试组'
ws3["A15"] = 'XX开发组'
ws3['B2'] = total_OT
ws3['B3'] = team_dz_OT
ws3['B4'] = team_dz_cp01_OT
ws3['B5'] = team_dz_cp02_OT
ws3['B6'] = team_dz_yy_OT
ws3['B7'] = team_dz_xxm_OT
ws3['B8'] = team_dz_gj_OT
ws3['B9'] = team_qp_OT
ws3['B10'] = team_qp_dfqp_OT
ws3['B11'] = team_qp_jg_OT
ws3['B12'] = team_qp_ljf_OT
ws3['B13'] = team_qp_jd_OT
ws3['B14'] = team_qp_gg_OT
ws3['B15'] = team_zdh_OT
ws3['C2'] = df_name_total
ws3['C3'] = team_dz_num #未去掉休产假人数
ws3['C4'] = name_dz_cp01_num
ws3['C5'] = name_dz_cp02_num
ws3['C6'] = name_dz_yy_num
ws3['C7'] = name_dz_xxm_num
ws3['C8'] = name_dz_gj_num
ws3['C9'] = team_qp_num
ws3['C10'] = name_qp_dfqp_num
ws3['C11'] = name_qp_jg_num
ws3['C12'] = name_qp_ljf_num
ws3['C13'] = name_qp_jd_num
ws3['C14'] = name_qp_gg_num
ws3['C15'] = name_zdh_num
ws3['D2'] = data_total #同一列表赋相同值,需要找寻更简单的方法
ws3['D3'] = data_total
ws3['D4'] = data_total
ws3['D5'] = data_total
ws3['D6'] = data_total
ws3['D7'] = data_total
ws3['D8'] = data_total
ws3['D9'] = data_total
ws3['D10'] = data_total
ws3['D11'] = data_total
ws3['D12'] = data_total
ws3['D13'] = data_total
ws3['D14'] = data_total
ws3['D15'] = data_total
ws3['E2'] = ws3['B2'].value//ws3['C2'].value//ws3['D2'].value #计算平均加班时长,且除取整数
ws3['E3'] = ws3['B3'].value//ws3['C3'].value//ws3['D3'].value
ws3['E4'] = ws3['B4'].value//ws3['C4'].value//ws3['D4'].value
ws3['E5'] = ws3['B5'].value//ws3['C5'].value//ws3['D5'].value
ws3['E6'] = ws3['B6'].value//ws3['C6'].value//ws3['D6'].value
ws3['E7'] = ws3['B7'].value//ws3['C7'].value//ws3['D7'].value
ws3['E8'] = ws3['B8'].value//ws3['C8'].value//ws3['D8'].value
ws3['E9'] = ws3['B9'].value//ws3['C9'].value//ws3['D9'].value
ws3['E10'] = ws3['B10'].value//ws3['C10'].value//ws3['D10'].value
ws3['E11'] = ws3['B11'].value//ws3['C11'].value//ws3['D11'].value
ws3['E12'] = ws3['B12'].value//ws3['C12'].value//ws3['D12'].value
ws3['E13'] = ws3['B13'].value//ws3['C13'].value//ws3['D13'].value
ws3['E14'] = ws3['B14'].value//ws3['C14'].value//ws3['D14'].value
ws3['E15'] = ws3['B15'].value//ws3['C15'].value//ws3['D15'].value
wb.save('E:\pythonstudy\excel\data\sample01.xlsx')