纯代码,也够用。
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from matplotlib import rcParams
import warnings
warnings.filterwarnings('ignore')
# 设置中文字体和现代化样式
plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'Arial Unicode MS']
plt.rcParams['axes.unicode_minus'] = False
plt.rcParams['figure.facecolor'] = 'white'
plt.rcParams['axes.facecolor'] = 'white'
# 设置现代化配色方案
modern_colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', '#FFEAA7',
'#DDA0DD', '#98D8C8', '#F7DC6F', '#BB8FCE', '#85C1E9']
sns.set_palette(modern_colors)
def read_and_process_data():
"""读取和处理数据"""
file_path = "历史故障集合.xlsx"
sheet_name = "历史故障合集"
try:
df = pd.read_excel(file_path, sheet_name=sheet_name)
# 处理故障时长数据
if '故障时长' in df.columns:
df['故障时长'] = pd.to_numeric(df['故障时长'], errors='coerce')
df_clean = df.dropna(subset=['故障时长'])
return df_clean
else:
print("❌ 未找到'故障时长'列")
return None
except Exception as e:
print(f"❌ 读取文件错误: {str(e)}")
return None
def create_first_chart_group(df_clean):
"""创建第一组图表:系统类别分析"""
fig, axes = plt.subplots(1, 3, figsize=(20, 6))
fig.suptitle('📊 系统类别故障分析报告', fontsize=18, fontweight='bold', y=1.02)
# 计算系统类别统计数据
category_stats = df_clean.groupby('系统类别')['故障时长'].agg(['count', 'sum', 'mean']).round(2)
category_top10 = category_stats.sort_values('sum', ascending=False).head(10)
# 图表1:系统类别故障时长TOP10柱状图
ax1 = axes[0]
bars1 = ax1.bar(range(len(category_top10)), category_top10['sum'],
color=modern_colors[:len(category_top10)], alpha=0.8, edgecolor='white', linewidth=2)
ax1.set_title('系统类别故障时长TOP10', fontsize=14, fontweight='bold', pad=20)
ax1.set_xlabel('系统类别', fontsize=12, fontweight='bold')
ax1.set_ylabel('故障时长(小时)', fontsize=12, fontweight='bold')
ax1.set_xticks(range(len(category_top10)))
ax1.set_xticklabels(category_top10.index, rotation=45, ha='right')
# 添加数值标签和百分比
total_time = category_top10['sum'].sum()
for i, bar in enumerate(bars1):
height = bar.get_height()
percentage = (height / total_time) * 100
ax1.text(bar.get_x() + bar.get_width() / 2., height + height * 0.01,
f'{height:.1f}h\n({percentage:.1f}%)',
ha='center', va='bottom', fontsize=10, fontweight='bold')
ax1.grid(axis='y', alpha=0.3, linestyle='--')
ax1.spines['top'].set_visible(False)
ax1.spines['right'].set_visible(False)
# 图表2:系统类别故障次数饼图
ax2 = axes[1]
colors2 = modern_colors[:len(category_top10)]
wedges, texts, autotexts = ax2.pie(category_top10['count'],
labels=category_top10.index,
autopct='%1.1f%%',
colors=colors2,
startangle=90,
explode=[0.05] * len(category_top10),
shadow=True)
ax2.set_title('系统类别故障次数分布', fontsize=14, fontweight='bold', pad=20)
# 美化饼图文字
for i, autotext in enumerate(autotexts):
autotext.set_color('white')
autotext.set_fontweight('bold')
autotext.set_fontsize(10)
# 添加具体数值到图例
legend_labels = [f'{cat}: {count}次' for cat, count in zip(category_top10.index, category_top10['count'])]
ax2.legend(wedges, legend_labels, loc="center left", bbox_to_anchor=(1, 0, 0.5, 1))
# 图表3:系统类别平均故障时长对比
ax3 = axes[2]
category_avg = category_top10['mean'].sort_values(ascending=True)
y_pos = np.arange(len(category_avg))
bars3 = ax3.barh(y_pos, category_avg.values,
color=modern_colors[:len(category_avg)], alpha=0.8, edgecolor='white', linewidth=2)
ax3.set_title('系统类别平均故障时长', fontsize=14, fontweight='bold', pad=20)
ax3.set_xlabel('平均故障时长(小时)', fontsize=12, fontweight='bold')
ax3.set_ylabel('系统类别', fontsize=12, fontweight='bold')
ax3.set_yticks(y_pos)
ax3.set_yticklabels(category_avg.index)
# 添加数值标签
for i, bar in enumerate(bars3):
width = bar.get_width()
ax3.text(width + width * 0.02, bar.get_y() + bar.get_height() / 2.,
f'{width:.2f}h', ha='left', va='center', fontsize=10, fontweight='bold')
ax3.grid(axis='x', alpha=0.3, linestyle='--')
ax3.spines['top'].set_visible(False)
ax3.spines['right'].set_visible(False)
plt.tight_layout()
plt.show()
return category_stats
def create_second_chart_group(df_clean):
"""创建第二组图表:系统详细分析"""
fig, axes = plt.subplots(1, 3, figsize=(20, 6))
fig.suptitle('🔧 系统详细故障分析报告', fontsize=18, fontweight='bold', y=1.02)
# 计算系统统计数据
system_stats = df_clean.groupby('系统')['故障时长'].agg(['count', 'sum', 'mean']).round(2)
system_top10 = system_stats.sort_values('sum', ascending=False).head(10)
# 图表1:系统故障时长TOP10水平柱状图
ax1 = axes[0]
y_pos = np.arange(len(system_top10))
bars1 = ax1.barh(y_pos, system_top10['sum'],
color=modern_colors[:len(system_top10)], alpha=0.8, edgecolor='white', linewidth=2)
ax1.set_title('系统故障时长TOP10', fontsize=14, fontweight='bold', pad=20)
ax1.set_xlabel('故障时长(小时)', fontsize=12, fontweight='bold')
ax1.set_ylabel('系统', fontsize=12, fontweight='bold')
ax1.set_yticks(y_pos)
ax1.set_yticklabels(system_top10.index)
# 添加数值标签和百分比
total_system_time = system_top10['sum'].sum()
for i, bar in enumerate(bars1):
width = bar.get_width()
percentage = (width / total_system_time) * 100
ax1.text(width + width * 0.02, bar.get_y() + bar.get_height() / 2.,
f'{width:.1f}h ({percentage:.1f}%)',
ha='left', va='center', fontsize=10, fontweight='bold')
ax1.grid(axis='x', alpha=0.3, linestyle='--')
ax1.spines['top'].set_visible(False)
ax1.spines['right'].set_visible(False)
# 图表2:故障时长分布直方图
ax2 = axes[1]
n, bins, patches = ax2.hist(df_clean['故障时长'], bins=20, alpha=0.7,
color='#4ECDC4', edgecolor='white', linewidth=2)
ax2.set_title('故障时长分布', fontsize=14, fontweight='bold', pad=20)
ax2.set_xlabel('故障时长(小时)', fontsize=12, fontweight='bold')
ax2.set_ylabel('故障次数', fontsize=12, fontweight='bold')
# 添加统计线
mean_val = df_clean['故障时长'].mean()
median_val = df_clean['故障时长'].median()
ax2.axvline(mean_val, color='red', linestyle='--', linewidth=2, alpha=0.8,
label=f'平均值: {mean_val:.2f}h')
ax2.axvline(median_val, color='orange', linestyle='--', linewidth=2, alpha=0.8,
label=f'中位数: {median_val:.2f}h')
# 在直方图柱子上添加数值
for i, patch in enumerate(patches):
if n[i] > 0:
ax2.text(patch.get_x() + patch.get_width() / 2., patch.get_height() + max(n) * 0.01,
f'{int(n[i])}', ha='center', va='bottom', fontsize=9, fontweight='bold')
ax2.legend()
ax2.grid(axis='y', alpha=0.3, linestyle='--')
ax2.spines['top'].set_visible(False)
ax2.spines['right'].set_visible(False)
# 图表3:事件定级分析
ax3 = axes[2]
if '事件定级' in df_clean.columns:
level_counts = df_clean['事件定级'].value_counts()
level_duration = df_clean.groupby('事件定级')['故障时长'].sum()
# 创建组合图:柱状图显示次数,折线图显示时长
x_pos = range(len(level_counts))
bars3 = ax3.bar(x_pos, level_counts.values,
color=modern_colors[:len(level_counts)], alpha=0.7,
edgecolor='white', linewidth=2, label='故障次数')
# 创建第二个y轴显示故障时长
ax3_twin = ax3.twinx()
line = ax3_twin.plot(x_pos, level_duration[level_counts.index].values,
color='red', marker='o', linewidth=3, markersize=8,
label='故障时长', alpha=0.8)
ax3.set_title('事件定级分析', fontsize=14, fontweight='bold', pad=20)
ax3.set_xlabel('事件定级', fontsize=12, fontweight='bold')
ax3.set_ylabel('故障次数', fontsize=12, fontweight='bold', color='blue')
ax3_twin.set_ylabel('故障时长(小时)', fontsize=12, fontweight='bold', color='red')
ax3.set_xticks(x_pos)
ax3.set_xticklabels(level_counts.index)
# 添加数值标签
for i, bar in enumerate(bars3):
height = bar.get_height()
total_count = level_counts.sum()
percentage = (height / total_count) * 100
ax3.text(bar.get_x() + bar.get_width() / 2., height + height * 0.02,
f'{int(height)}\n({percentage:.1f}%)',
ha='center', va='bottom', fontsize=10, fontweight='bold')
# 为折线图添加数值标签
for i, val in enumerate(level_duration[level_counts.index].values):
ax3_twin.text(i, val + max(level_duration) * 0.02, f'{val:.1f}h',
ha='center', va='bottom', fontsize=10, fontweight='bold', color='red')
# 图例
lines1, labels1 = ax3.get_legend_handles_labels()
lines2, labels2 = ax3_twin.get_legend_handles_labels()
ax3.legend(lines1 + lines2, labels1 + labels2, loc='upper right')
else:
ax3.text(0.5, 0.5, '事件定级数据不可用', ha='center', va='center',
transform=ax3.transAxes, fontsize=12,
bbox=dict(boxstyle="round,pad=0.3", facecolor="lightgray"))
ax3.grid(axis='y', alpha=0.3, linestyle='--')
ax3.spines['top'].set_visible(False)
ax3.spines['right'].set_visible(False)
plt.tight_layout()
plt.show()
return system_stats
def print_summary_statistics(df_clean, category_stats, system_stats):
"""输出汇总统计信息"""
print("\n" + "=" * 80)
print("📋 故障分析汇总报告")
print("=" * 80)
total_duration = df_clean['故障时长'].sum()
total_incidents = len(df_clean)
print(f"\n🏆 关键指标:")
print(f" ├─ 故障总时长: {total_duration:.2f} 小时")
print(f" ├─ 故障总次数: {total_incidents} 次")
print(f" ├─ 平均故障时长: {df_clean['故障时长'].mean():.2f} 小时")
print(f" ├─ 故障时长中位数: {df_clean['故障时长'].median():.2f} 小时")
print(f" └─ 最长单次故障: {df_clean['故障时长'].max():.2f} 小时")
print(f"\n🏅 TOP3 系统类别(按故障时长):")
category_top3 = category_stats.sort_values('sum', ascending=False).head(3)
for i, (category, data) in enumerate(category_top3.iterrows(), 1):
percentage = (data['sum'] / total_duration) * 100
print(f" {i}. {category}: {data['sum']:.2f}小时 ({percentage:.1f}%) - {data['count']}次故障")
print(f"\n🔧 TOP3 系统(按故障时长):")
system_top3 = system_stats.sort_values('sum', ascending=False).head(3)
for i, (system, data) in enumerate(system_top3.iterrows(), 1):
percentage = (data['sum'] / total_duration) * 100
print(f" {i}. {system}: {data['sum']:.2f}小时 ({percentage:.1f}%) - {data['count']}次故障")
print("\n" + "=" * 80)
# 主执行流程
def main():
print("🚀 开始分析历史故障数据...")
# 读取和处理数据
df_clean = read_and_process_data()
if df_clean is None:
return
print(f"✅ 数据加载成功,共 {len(df_clean)} 条有效记录")
# 创建第一组图表
print("\n📊 生成第一组图表:系统类别分析...")
category_stats = create_first_chart_group(df_clean)
# 创建第二组图表
print("\n📊 生成第二组图表:系统详细分析...")
system_stats = create_second_chart_group(df_clean)
# 输出汇总统计
print_summary_statistics(df_clean, category_stats, system_stats)
print("\n🎉 分析完成!")
# 运行主程序
if __name__ == "__main__":
main()
输出样式
image.png