多图表的多析EXCEL

之前分析，一直是一个图输出一个表，这次升级了，作下记录。
代码
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from matplotlib import rcParams
import warnings
import platform

warnings.filterwarnings('ignore')

def set_chinese_font():
    sys = platform.system()
    if sys == "Windows":
        plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei']
    elif sys == "Darwin":  # Mac
        plt.rcParams['font.sans-serif'] = ['Arial Unicode MS', 'Heiti SC']
    else:  # Linux
        plt.rcParams['font.sans-serif'] = ['SimHei', 'Noto Sans CJK SC']
    plt.rcParams['axes.unicode_minus'] = False

set_chinese_font()

# 读取Excel文件
file_path = "历史故障集合.xlsx"
sheet_name = "历史故障合集"

try:
    # 读取指定sheet的数据
    df = pd.read_excel(file_path, sheet_name=sheet_name)

    print("=" * 60)
    print("📊 历史故障数据分析报告")
    print("=" * 60)

    # 数据基本信息
    print(f"📋 数据概览:")
    print(f"   总记录数: {len(df)}")
    print(f"   字段数量: {len(df.columns)}")

    # 处理故障时长数据
    if '故障时长' in df.columns:
        # 转换故障时长为数值类型
        df['故障时长'] = pd.to_numeric(df['故障时长'], errors='coerce')

        # 移除空值
        df_clean = df.dropna(subset=['故障时长'])

        # 计算故障总时长
        total_duration = df_clean['故障时长'].sum()
        valid_records = len(df_clean)

        print(f"\n⏱️  故障时长统计:")
        print(f"   有效记录数: {valid_records}")
        print(f"   故障总时长: {total_duration:.2f} 小时")
        print(f"   平均故障时长: {df_clean['故障时长'].mean():.2f} 小时")
        print(f"   最长故障时长: {df_clean['故障时长'].max():.2f} 小时")

        # 创建图表
        fig = plt.figure(figsize=(20, 16))

        # 1. 按系统类别统计TOP10
        if '系统类别' in df.columns:
            category_stats = df_clean.groupby('系统类别')['故障时长'].agg(['count', 'sum', 'mean']).round(2)
            category_top10 = category_stats.sort_values('sum', ascending=False).head(10)

            print(f"\n🏆 按系统类别故障时长TOP10:")
            print(category_top10)

            # 绘制系统类别TOP10柱状图
            plt.subplot(2, 3, 1)
            bars = plt.bar(range(len(category_top10)), category_top10['sum'],
                           color=sns.color_palette("viridis", len(category_top10)))
            plt.title('系统类别故障时长TOP10', fontsize=14, fontweight='bold', pad=20)
            plt.xlabel('系统类别', fontsize=12)
            plt.ylabel('故障时长(小时)', fontsize=12)
            plt.xticks(range(len(category_top10)), category_top10.index, rotation=45, ha='right')

            # 添加数值标签
            for i, bar in enumerate(bars):
                height = bar.get_height()
                plt.text(bar.get_x() + bar.get_width() / 2., height,
                         f'{height:.1f}h', ha='center', va='bottom', fontsize=10)

            # 绘制系统类别故障次数饼图
            plt.subplot(2, 3, 2)
            colors = sns.color_palette("Set3", len(category_top10))
            wedges, texts, autotexts = plt.pie(category_top10['count'],
                                               labels=category_top10.index,
                                               autopct='%1.1f%%',
                                               colors=colors,
                                               startangle=90)
            plt.title('系统类别故障次数分布', fontsize=14, fontweight='bold', pad=20)

            # 美化饼图文字
            for autotext in autotexts:
                autotext.set_color('white')
                autotext.set_fontweight('bold')

        # 2. 按系统统计TOP10
        if '系统' in df.columns:
            system_stats = df_clean.groupby('系统')['故障时长'].agg(['count', 'sum', 'mean']).round(2)
            system_top10 = system_stats.sort_values('sum', ascending=False).head(10)

            print(f"\n🔧 按系统故障时长TOP10:")
            print(system_top10)

            # 绘制系统TOP10水平柱状图
            plt.subplot(2, 3, 3)
            y_pos = np.arange(len(system_top10))
            bars = plt.barh(y_pos, system_top10['sum'],
                            color=sns.color_palette("plasma", len(system_top10)))
            plt.title('系统故障时长TOP10', fontsize=14, fontweight='bold', pad=20)
            plt.xlabel('故障时长(小时)', fontsize=12)
            plt.ylabel('系统', fontsize=12)
            plt.yticks(y_pos, system_top10.index)

            # 添加数值标签
            for i, bar in enumerate(bars):
                width = bar.get_width()
                plt.text(width, bar.get_y() + bar.get_height() / 2.,
                         f'{width:.1f}h', ha='left', va='center', fontsize=10)

        # 3. 故障时长分布直方图
        plt.subplot(2, 3, 4)
        plt.hist(df_clean['故障时长'], bins=30, alpha=0.7, color='skyblue', edgecolor='black')
        plt.title('故障时长分布', fontsize=14, fontweight='bold', pad=20)
        plt.xlabel('故障时长(小时)', fontsize=12)
        plt.ylabel('频次', fontsize=12)
        plt.axvline(df_clean['故障时长'].mean(), color='red', linestyle='--',
                    label=f'平均值: {df_clean["故障时长"].mean():.2f}h')
        plt.legend()

        # 4. 事件定级分布（如果存在该字段）
        if '事件定级' in df.columns:
            plt.subplot(2, 3, 5)
            level_counts = df_clean['事件定级'].value_counts()
            bars = plt.bar(level_counts.index, level_counts.values,
                           color=sns.color_palette("coolwarm", len(level_counts)))
            plt.title('事件定级分布', fontsize=14, fontweight='bold', pad=20)
            plt.xlabel('事件定级', fontsize=12)
            plt.ylabel('故障次数', fontsize=12)

            # 添加数值标签
            for bar in bars:
                height = bar.get_height()
                plt.text(bar.get_x() + bar.get_width() / 2., height,
                         f'{int(height)}', ha='center', va='bottom', fontsize=10)

        # 5. 月度故障趋势（如果时间字段存在）
        if '时间' in df.columns:
            plt.subplot(2, 3, 6)
            try:
                df_clean['时间'] = pd.to_datetime(df_clean['时间'])
                df_clean['月份'] = df_clean['时间'].dt.to_period('M')
                monthly_stats = df_clean.groupby('月份')['故障时长'].agg(['count', 'sum'])

                x_pos = range(len(monthly_stats))
                plt.plot(x_pos, monthly_stats['sum'], marker='o', linewidth=2,
                         markersize=6, color='#e74c3c', label='故障时长')
                plt.title('月度故障时长趋势', fontsize=14, fontweight='bold', pad=20)
                plt.xlabel('月份', fontsize=12)
                plt.ylabel('故障时长(小时)', fontsize=12)
                plt.xticks(x_pos, [str(m) for m in monthly_stats.index], rotation=45)
                plt.grid(True, alpha=0.3)
                plt.legend()
            except:
                plt.text(0.5, 0.5, '时间数据格式异常\n无法生成趋势图',
                         ha='center', va='center', transform=plt.gca().transAxes,
                         fontsize=12, bbox=dict(boxstyle="round,pad=0.3", facecolor="lightgray"))

        plt.tight_layout(pad=3.0)
        plt.show()

        # 生成详细统计表格
        print(f"\n📈 详细统计分析:")
        print(f"{'=' * 60}")

        if '系统类别' in df.columns and '系统' in df.columns:
            # 系统类别和系统的交叉分析
            cross_analysis = df_clean.pivot_table(
                values='故障时长',
                index='系统类别',
                columns='系统',
                aggfunc=['count', 'sum'],
                fill_value=0
            )
            print(f"\n🔍 系统类别与系统交叉分析（故障次数）:")
            print(cross_analysis['count'].head())

        # 输出关键指标
        print(f"\n📋 关键指标总结:")
        print(f"   • 故障总时长: {total_duration:.2f} 小时")
        print(f"   • 故障总次数: {valid_records} 次")
        print(f"   • 平均故障时长: {df_clean['故障时长'].mean():.2f} 小时")
        print(f"   • 故障时长中位数: {df_clean['故障时长'].median():.2f} 小时")
        print(f"   • 最严重故障: {df_clean['故障时长'].max():.2f} 小时")

        if '系统类别' in df.columns:
            worst_category = category_stats.sort_values('sum', ascending=False).index[0]
            print(f"   • 故障最多系统类别: {worst_category}")

        if '系统' in df.columns:
            worst_system = system_stats.sort_values('sum', ascending=False).index[0]
            print(f"   • 故障最多系统: {worst_system}")

    else:
        print("❌ 未找到'故障时长'列，请检查数据格式")
        print(f"   当前列名: {list(df.columns)}")

except FileNotFoundError:
    print(f"❌ 文件 {file_path} 未找到")
    print("   请确认文件路径正确，文件存在于当前目录中")
except Exception as e:
    print(f"❌ 读取文件时出现错误: {str(e)}")
    print("   请检查文件格式和sheet名称是否正确")
输出
image.png
多图表的多析EXCEL

多图表的多析EXCEL

相关阅读更多精彩内容

友情链接更多精彩内容