这个分析维度不错,收藏

纯代码,也够用。

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from matplotlib import rcParams
import warnings

warnings.filterwarnings('ignore')

# 设置中文字体和现代化样式
plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'Arial Unicode MS']
plt.rcParams['axes.unicode_minus'] = False
plt.rcParams['figure.facecolor'] = 'white'
plt.rcParams['axes.facecolor'] = 'white'

# 设置现代化配色方案
modern_colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', '#FFEAA7',
                 '#DDA0DD', '#98D8C8', '#F7DC6F', '#BB8FCE', '#85C1E9']
sns.set_palette(modern_colors)


def read_and_process_data():
    """读取和处理数据"""
    file_path = "历史故障集合.xlsx"
    sheet_name = "历史故障合集"

    try:
        df = pd.read_excel(file_path, sheet_name=sheet_name)

        # 处理故障时长数据
        if '故障时长' in df.columns:
            df['故障时长'] = pd.to_numeric(df['故障时长'], errors='coerce')
            df_clean = df.dropna(subset=['故障时长'])
            return df_clean
        else:
            print("❌ 未找到'故障时长'列")
            return None

    except Exception as e:
        print(f"❌ 读取文件错误: {str(e)}")
        return None


def create_first_chart_group(df_clean):
    """创建第一组图表:系统类别分析"""
    fig, axes = plt.subplots(1, 3, figsize=(20, 6))
    fig.suptitle('📊 系统类别故障分析报告', fontsize=18, fontweight='bold', y=1.02)

    # 计算系统类别统计数据
    category_stats = df_clean.groupby('系统类别')['故障时长'].agg(['count', 'sum', 'mean']).round(2)
    category_top10 = category_stats.sort_values('sum', ascending=False).head(10)

    # 图表1:系统类别故障时长TOP10柱状图
    ax1 = axes[0]
    bars1 = ax1.bar(range(len(category_top10)), category_top10['sum'],
                    color=modern_colors[:len(category_top10)], alpha=0.8, edgecolor='white', linewidth=2)

    ax1.set_title('系统类别故障时长TOP10', fontsize=14, fontweight='bold', pad=20)
    ax1.set_xlabel('系统类别', fontsize=12, fontweight='bold')
    ax1.set_ylabel('故障时长(小时)', fontsize=12, fontweight='bold')
    ax1.set_xticks(range(len(category_top10)))
    ax1.set_xticklabels(category_top10.index, rotation=45, ha='right')

    # 添加数值标签和百分比
    total_time = category_top10['sum'].sum()
    for i, bar in enumerate(bars1):
        height = bar.get_height()
        percentage = (height / total_time) * 100
        ax1.text(bar.get_x() + bar.get_width() / 2., height + height * 0.01,
                 f'{height:.1f}h\n({percentage:.1f}%)',
                 ha='center', va='bottom', fontsize=10, fontweight='bold')

    ax1.grid(axis='y', alpha=0.3, linestyle='--')
    ax1.spines['top'].set_visible(False)
    ax1.spines['right'].set_visible(False)

    # 图表2:系统类别故障次数饼图
    ax2 = axes[1]
    colors2 = modern_colors[:len(category_top10)]
    wedges, texts, autotexts = ax2.pie(category_top10['count'],
                                       labels=category_top10.index,
                                       autopct='%1.1f%%',
                                       colors=colors2,
                                       startangle=90,
                                       explode=[0.05] * len(category_top10),
                                       shadow=True)

    ax2.set_title('系统类别故障次数分布', fontsize=14, fontweight='bold', pad=20)

    # 美化饼图文字
    for i, autotext in enumerate(autotexts):
        autotext.set_color('white')
        autotext.set_fontweight('bold')
        autotext.set_fontsize(10)

    # 添加具体数值到图例
    legend_labels = [f'{cat}: {count}次' for cat, count in zip(category_top10.index, category_top10['count'])]
    ax2.legend(wedges, legend_labels, loc="center left", bbox_to_anchor=(1, 0, 0.5, 1))

    # 图表3:系统类别平均故障时长对比
    ax3 = axes[2]
    category_avg = category_top10['mean'].sort_values(ascending=True)
    y_pos = np.arange(len(category_avg))

    bars3 = ax3.barh(y_pos, category_avg.values,
                     color=modern_colors[:len(category_avg)], alpha=0.8, edgecolor='white', linewidth=2)

    ax3.set_title('系统类别平均故障时长', fontsize=14, fontweight='bold', pad=20)
    ax3.set_xlabel('平均故障时长(小时)', fontsize=12, fontweight='bold')
    ax3.set_ylabel('系统类别', fontsize=12, fontweight='bold')
    ax3.set_yticks(y_pos)
    ax3.set_yticklabels(category_avg.index)

    # 添加数值标签
    for i, bar in enumerate(bars3):
        width = bar.get_width()
        ax3.text(width + width * 0.02, bar.get_y() + bar.get_height() / 2.,
                 f'{width:.2f}h', ha='left', va='center', fontsize=10, fontweight='bold')

    ax3.grid(axis='x', alpha=0.3, linestyle='--')
    ax3.spines['top'].set_visible(False)
    ax3.spines['right'].set_visible(False)

    plt.tight_layout()
    plt.show()

    return category_stats


def create_second_chart_group(df_clean):
    """创建第二组图表:系统详细分析"""
    fig, axes = plt.subplots(1, 3, figsize=(20, 6))
    fig.suptitle('🔧 系统详细故障分析报告', fontsize=18, fontweight='bold', y=1.02)

    # 计算系统统计数据
    system_stats = df_clean.groupby('系统')['故障时长'].agg(['count', 'sum', 'mean']).round(2)
    system_top10 = system_stats.sort_values('sum', ascending=False).head(10)

    # 图表1:系统故障时长TOP10水平柱状图
    ax1 = axes[0]
    y_pos = np.arange(len(system_top10))
    bars1 = ax1.barh(y_pos, system_top10['sum'],
                     color=modern_colors[:len(system_top10)], alpha=0.8, edgecolor='white', linewidth=2)

    ax1.set_title('系统故障时长TOP10', fontsize=14, fontweight='bold', pad=20)
    ax1.set_xlabel('故障时长(小时)', fontsize=12, fontweight='bold')
    ax1.set_ylabel('系统', fontsize=12, fontweight='bold')
    ax1.set_yticks(y_pos)
    ax1.set_yticklabels(system_top10.index)

    # 添加数值标签和百分比
    total_system_time = system_top10['sum'].sum()
    for i, bar in enumerate(bars1):
        width = bar.get_width()
        percentage = (width / total_system_time) * 100
        ax1.text(width + width * 0.02, bar.get_y() + bar.get_height() / 2.,
                 f'{width:.1f}h ({percentage:.1f}%)',
                 ha='left', va='center', fontsize=10, fontweight='bold')

    ax1.grid(axis='x', alpha=0.3, linestyle='--')
    ax1.spines['top'].set_visible(False)
    ax1.spines['right'].set_visible(False)

    # 图表2:故障时长分布直方图
    ax2 = axes[1]
    n, bins, patches = ax2.hist(df_clean['故障时长'], bins=20, alpha=0.7,
                                color='#4ECDC4', edgecolor='white', linewidth=2)

    ax2.set_title('故障时长分布', fontsize=14, fontweight='bold', pad=20)
    ax2.set_xlabel('故障时长(小时)', fontsize=12, fontweight='bold')
    ax2.set_ylabel('故障次数', fontsize=12, fontweight='bold')

    # 添加统计线
    mean_val = df_clean['故障时长'].mean()
    median_val = df_clean['故障时长'].median()
    ax2.axvline(mean_val, color='red', linestyle='--', linewidth=2, alpha=0.8,
                label=f'平均值: {mean_val:.2f}h')
    ax2.axvline(median_val, color='orange', linestyle='--', linewidth=2, alpha=0.8,
                label=f'中位数: {median_val:.2f}h')

    # 在直方图柱子上添加数值
    for i, patch in enumerate(patches):
        if n[i] > 0:
            ax2.text(patch.get_x() + patch.get_width() / 2., patch.get_height() + max(n) * 0.01,
                     f'{int(n[i])}', ha='center', va='bottom', fontsize=9, fontweight='bold')

    ax2.legend()
    ax2.grid(axis='y', alpha=0.3, linestyle='--')
    ax2.spines['top'].set_visible(False)
    ax2.spines['right'].set_visible(False)

    # 图表3:事件定级分析
    ax3 = axes[2]
    if '事件定级' in df_clean.columns:
        level_counts = df_clean['事件定级'].value_counts()
        level_duration = df_clean.groupby('事件定级')['故障时长'].sum()

        # 创建组合图:柱状图显示次数,折线图显示时长
        x_pos = range(len(level_counts))
        bars3 = ax3.bar(x_pos, level_counts.values,
                        color=modern_colors[:len(level_counts)], alpha=0.7,
                        edgecolor='white', linewidth=2, label='故障次数')

        # 创建第二个y轴显示故障时长
        ax3_twin = ax3.twinx()
        line = ax3_twin.plot(x_pos, level_duration[level_counts.index].values,
                             color='red', marker='o', linewidth=3, markersize=8,
                             label='故障时长', alpha=0.8)

        ax3.set_title('事件定级分析', fontsize=14, fontweight='bold', pad=20)
        ax3.set_xlabel('事件定级', fontsize=12, fontweight='bold')
        ax3.set_ylabel('故障次数', fontsize=12, fontweight='bold', color='blue')
        ax3_twin.set_ylabel('故障时长(小时)', fontsize=12, fontweight='bold', color='red')
        ax3.set_xticks(x_pos)
        ax3.set_xticklabels(level_counts.index)

        # 添加数值标签
        for i, bar in enumerate(bars3):
            height = bar.get_height()
            total_count = level_counts.sum()
            percentage = (height / total_count) * 100
            ax3.text(bar.get_x() + bar.get_width() / 2., height + height * 0.02,
                     f'{int(height)}\n({percentage:.1f}%)',
                     ha='center', va='bottom', fontsize=10, fontweight='bold')

        # 为折线图添加数值标签
        for i, val in enumerate(level_duration[level_counts.index].values):
            ax3_twin.text(i, val + max(level_duration) * 0.02, f'{val:.1f}h',
                          ha='center', va='bottom', fontsize=10, fontweight='bold', color='red')

        # 图例
        lines1, labels1 = ax3.get_legend_handles_labels()
        lines2, labels2 = ax3_twin.get_legend_handles_labels()
        ax3.legend(lines1 + lines2, labels1 + labels2, loc='upper right')

    else:
        ax3.text(0.5, 0.5, '事件定级数据不可用', ha='center', va='center',
                 transform=ax3.transAxes, fontsize=12,
                 bbox=dict(boxstyle="round,pad=0.3", facecolor="lightgray"))

    ax3.grid(axis='y', alpha=0.3, linestyle='--')
    ax3.spines['top'].set_visible(False)
    ax3.spines['right'].set_visible(False)

    plt.tight_layout()
    plt.show()

    return system_stats


def print_summary_statistics(df_clean, category_stats, system_stats):
    """输出汇总统计信息"""
    print("\n" + "=" * 80)
    print("📋 故障分析汇总报告")
    print("=" * 80)

    total_duration = df_clean['故障时长'].sum()
    total_incidents = len(df_clean)

    print(f"\n🏆 关键指标:")
    print(f"   ├─ 故障总时长: {total_duration:.2f} 小时")
    print(f"   ├─ 故障总次数: {total_incidents} 次")
    print(f"   ├─ 平均故障时长: {df_clean['故障时长'].mean():.2f} 小时")
    print(f"   ├─ 故障时长中位数: {df_clean['故障时长'].median():.2f} 小时")
    print(f"   └─ 最长单次故障: {df_clean['故障时长'].max():.2f} 小时")

    print(f"\n🏅 TOP3 系统类别(按故障时长):")
    category_top3 = category_stats.sort_values('sum', ascending=False).head(3)
    for i, (category, data) in enumerate(category_top3.iterrows(), 1):
        percentage = (data['sum'] / total_duration) * 100
        print(f"   {i}. {category}: {data['sum']:.2f}小时 ({percentage:.1f}%) - {data['count']}次故障")

    print(f"\n🔧 TOP3 系统(按故障时长):")
    system_top3 = system_stats.sort_values('sum', ascending=False).head(3)
    for i, (system, data) in enumerate(system_top3.iterrows(), 1):
        percentage = (data['sum'] / total_duration) * 100
        print(f"   {i}. {system}: {data['sum']:.2f}小时 ({percentage:.1f}%) - {data['count']}次故障")

    print("\n" + "=" * 80)


# 主执行流程
def main():
    print("🚀 开始分析历史故障数据...")

    # 读取和处理数据
    df_clean = read_and_process_data()
    if df_clean is None:
        return

    print(f"✅ 数据加载成功,共 {len(df_clean)} 条有效记录")

    # 创建第一组图表
    print("\n📊 生成第一组图表:系统类别分析...")
    category_stats = create_first_chart_group(df_clean)

    # 创建第二组图表
    print("\n📊 生成第二组图表:系统详细分析...")
    system_stats = create_second_chart_group(df_clean)

    # 输出汇总统计
    print_summary_statistics(df_clean, category_stats, system_stats)

    print("\n🎉 分析完成!")


# 运行主程序
if __name__ == "__main__":
    main()

输出样式

image.png
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容