# R语言Excel高级操作完整教程
## 目录
1. 高级包介绍与安装
2. 复杂Excel文件处理
3. 动态数据操作
4. 高级格式化与样式
5. 自动化Excel报告生成
6. 性能优化与大数据处理
7. 企业级应用案例
8. 错误处理与调试
## 1. 高级包介绍与安装
### 核心包安装
```r
# 安装所有必要的包
required_packages <- c(
"readxl", # 基础读取
"writexl", # 基础写入
"openxlsx", # 高级操作
"xlsx", # 替代方案
"tidyverse", # 数据处理
"data.table", # 高性能数据处理
"DT", # 交互式表格
"flextable", # 高级表格格式化
"officer", # Office文档操作
"rmarkdown", # 报告生成
"knitr", # 文档编织
"plotly", # 交互式图表
"highcharter", # 高级图表
"formattable", # 表格格式化
"reactable", # 现代交互表格
"gt", # 优雅表格
"kableExtra" # 表格美化
)
# 批量安装
install.packages(required_packages)
# 加载核心包
library(readxl)
library(writexl)
library(openxlsx)
library(tidyverse)
library(data.table)
library(flextable)
library(officer)
library(rmarkdown)
```
## 2. 复杂Excel文件处理
### 2.1 多工作表批量处理
```r
# 高级多工作表处理函数
process_multiple_sheets <- function(file_path, processing_function) {
# 获取所有工作表
sheets <- excel_sheets(file_path)
# 创建结果列表
results <- list()
# 处理每个工作表
for(sheet in sheets) {
cat("Processing sheet:", sheet, "\n")
# 读取数据
data <- read_excel(file_path, sheet = sheet)
# 应用处理函数
processed_data <- processing_function(data, sheet_name = sheet)
# 存储结果
results[[sheet]] <- processed_data
}
return(results)
}
# 示例处理函数
clean_and_validate_data <- function(data, sheet_name) {
# 数据清洗
cleaned_data <- data %>%
# 移除完全空的行
filter(!if_all(everything(), is.na)) %>%
# 移除完全空的列
select(where(~!all(is.na(.)))) %>%
# 标准化列名
janitor::clean_names() %>%
# 处理缺失值
mutate(across(where(is.character), ~ifelse(. == "", NA, .)))
# 数据验证
validation_results <- list(
sheet_name = sheet_name,
original_rows = nrow(data),
cleaned_rows = nrow(cleaned_data),
missing_percentage = sum(is.na(cleaned_data)) / (nrow(cleaned_data) * ncol(cleaned_data)) * 100,
duplicate_rows = sum(duplicated(cleaned_data))
)
return(list(
data = cleaned_data,
validation = validation_results
))
}
# 使用示例
file_path <- "complex_data.xlsx"
processed_results <- process_multiple_sheets(file_path, clean_and_validate_data)
```
### 2.2 动态范围读取
```r
# 智能范围检测函数
detect_data_range <- function(file_path, sheet_name) {
# 读取整个工作表
full_data <- read_excel(file_path, sheet = sheet_name, col_names = FALSE)
# 找到数据的实际边界
# 查找第一个非空行
first_row <- which(apply(full_data, 1, function(x) !all(is.na(x))))[1]
# 查找最后一个非空行
last_row <- max(which(apply(full_data, 1, function(x) !all(is.na(x)))))
# 查找第一个非空列
first_col <- which(apply(full_data, 2, function(x) !all(is.na(x))))[1]
# 查找最后一个非空列
last_col <- max(which(apply(full_data, 2, function(x) !all(is.na(x)))))
# 构建范围字符串
range_string <- paste0(
LETTERS[first_col], first_row, ":",
LETTERS[last_col], last_row
)
return(list(
range = range_string,
first_row = first_row,
last_row = last_row,
first_col = first_col,
last_col = last_col
))
}
# 使用动态范围读取数据
smart_read_excel <- function(file_path, sheet_name) {
# 检测数据范围
range_info <- detect_data_range(file_path, sheet_name)
# 使用检测到的范围读取数据
data <- read_excel(
file_path,
sheet = sheet_name,
range = range_info$range
)
return(list(
data = data,
range_info = range_info
))
}
```
## 3. 动态数据操作
### 3.1 实时数据更新
```r
# 实时数据监控和更新系统
class ExcelDataManager {
private_data <- NULL
file_path <- NULL
last_modified <- NULL
initialize = function(file_path) {
self$file_path <- file_path
self$update_data()
}
update_data = function() {
# 检查文件是否被修改
current_modified <- file.info(self$file_path)$mtime
if (is.null(self$last_modified) || current_modified > self$last_modified) {
cat("Updating data from file...\n")
self$private_data <- read_excel(self$file_path)
self$last_modified <- current_modified
}
}
get_data = function() {
self$update_data()
return(self$private_data)
}
add_data = function(new_data) {
self$update_data()
self$private_data <- rbind(self$private_data, new_data)
self$save_data()
}
save_data = function() {
write_xlsx(self$private_data, self$file_path)
self$last_modified <- file.info(self$file_path)$mtime
}
}
# 使用示例
data_manager <- ExcelDataManager$new("dynamic_data.xlsx")
current_data <- data_manager$get_data()
```
### 3.2 数据验证与约束
```r
# 高级数据验证系统
create_data_validator <- function(validation_rules) {
validator <- list(
rules = validation_rules,
validate_data = function(data) {
errors <- list()
warnings <- list()
for (rule_name in names(validation_rules)) {
rule <- validation_rules[[rule_name]]
# 应用验证规则
result <- rule$validate(data)
if (!result$valid) {
if (rule$severity == "error") {
errors[[rule_name]] <- result$message
} else {
warnings[[rule_name]] <- result$message
}
}
}
return(list(
valid = length(errors) == 0,
errors = errors,
warnings = warnings
))
},
add_rule = function(name, rule) {
validation_rules[[name]] <<- rule
}
)
return(validator)
}
# 预定义验证规则
validation_rules <- list(
no_duplicates = list(
validate = function(data) {
duplicates <- sum(duplicated(data))
return(list(
valid = duplicates == 0,
message = paste("Found", duplicates, "duplicate rows")
))
},
severity = "warning"
),
required_columns = list(
validate = function(data) {
required_cols <- c("ID", "Name", "Value")
missing_cols <- setdiff(required_cols, names(data))
return(list(
valid = length(missing_cols) == 0,
message = paste("Missing required columns:", paste(missing_cols, collapse = ", "))
))
},
severity = "error"
),
numeric_range = list(
validate = function(data) {
if ("Value" %in% names(data)) {
out_of_range <- sum(data$Value < 0 | data$Value > 1000, na.rm = TRUE)
return(list(
valid = out_of_range == 0,
message = paste("Found", out_of_range, "values outside range [0, 1000]")
))
}
return(list(valid = TRUE, message = ""))
},
severity = "warning"
)
)
# 使用验证器
validator <- create_data_validator(validation_rules)
validation_result <- validator$validate_data(my_data)
if (!validation_result$valid) {
cat("Validation errors found:\n")
print(validation_result$errors)
}
```
## 4. 高级格式化与样式
### 4.1 企业级表格格式化
```r
# 高级表格格式化系统
create_enterprise_table <- function(data, title = "", subtitle = "") {
# 使用flextable创建专业表格
ft <- flextable(data) %>%
# 设置表格标题
set_caption(title) %>%
# 设置表格样式
theme_box() %>%
# 设置字体
font(fontname = "Times New Roman", part = "all") %>%
# 设置字体大小
fontsize(size = 10, part = "all") %>%
# 设置对齐
align(align = "center", part = "header") %>%
align(align = "right", part = "body") %>%
# 设置边框
border_outer(border = fp_border(color = "black", width = 2)) %>%
border_inner_h(border = fp_border(color = "gray", width = 1)) %>%
border_inner_v(border = fp_border(color = "gray", width = 1)) %>%
# 设置背景色
bg(bg = "lightblue", part = "header") %>%
# 自动调整列宽
autofit()
return(ft)
}
# 条件格式化函数
apply_conditional_formatting <- function(ft, column, conditions) {
for (condition in conditions) {
ft <- ft %>%
bg(bg = condition$color,
i = condition$rule(ft$body$dataset[[column]]),
j = column)
}
return(ft)
}
# 使用示例
formatted_table <- create_enterprise_table(
data = sales_data,
title = "Sales Report Q4 2024",
subtitle = "Monthly breakdown by region"
)
# 添加条件格式化
conditions <- list(
list(
rule = function(x) x > 1000,
color = "lightgreen"
),
list(
rule = function(x) x < 500,
color = "lightcoral"
)
)
formatted_table <- apply_conditional_formatting(
formatted_table,
"Sales_Amount",
conditions
)
```
### 4.2 动态样式生成
```r
# 动态样式生成器
create_dynamic_styles <- function(data, style_config) {
# 创建样式配置
styles <- list()
# 根据数据类型生成样式
for (col in names(data)) {
col_data <- data[[col]]
if (is.numeric(col_data)) {
# 数值列样式
styles[[col]] <- list(
type = "numeric",
format = style_config$numeric_format,
conditional_colors = generate_color_scale(col_data)
)
} else if (is.character(col_data)) {
# 字符列样式
styles[[col]] <- list(
type = "character",
max_length = max(nchar(col_data), na.rm = TRUE),
text_wrap = style_config$text_wrap
)
} else if (inherits(col_data, "Date")) {
# 日期列样式
styles[[col]] <- list(
type = "date",
format = style_config$date_format
)
}
}
return(styles)
}
# 生成颜色比例尺
generate_color_scale <- function(values) {
# 移除NA值
clean_values <- values[!is.na(values)]
if (length(clean_values) == 0) return(NULL)
# 计算分位数
quantiles <- quantile(clean_values, probs = c(0.25, 0.5, 0.75))
# 生成颜色映射
color_scale <- list(
low = list(
threshold = quantiles[1],
color = "#FF6B6B" # 红色
),
medium = list(
threshold = quantiles[2],
color = "#FFE66D" # 黄色
),
high = list(
threshold = quantiles[3],
color = "#4ECDC4" # 青色
)
)
return(color_scale)
}
```
## 5. 自动化Excel报告生成
### 5.1 智能报告生成器
```r
# 智能Excel报告生成器
ExcelReportGenerator <- R6::R6Class("ExcelReportGenerator",
public = list(
initialize = function(template_path = NULL) {
private$workbook <- createWorkbook()
private$template_path <- template_path
private$sections <- list()
private$charts <- list()
},
add_section = function(name, content, type = "data") {
private$sections[[name]] <- list(
content = content,
type = type
)
},
add_chart = function(name, chart_data, chart_type = "line") {
private$charts[[name]] <- list(
data = chart_data,
type = chart_type
)
},
generate_report = function(output_path) {
# 添加工作表
self$add_worksheets()
# 写入数据
self$write_sections()
# 添加图表
self$add_charts()
# 应用样式
self$apply_styles()
# 保存文件
saveWorkbook(private$workbook, output_path, overwrite = TRUE)
cat("Report generated successfully:", output_path, "\n")
}
),
private = list(
workbook = NULL,
template_path = NULL,
sections = list(),
charts = list(),
add_worksheets = function() {
# 添加摘要工作表
addWorksheet(private$workbook, "Summary")
# 添加详细数据工作表
for (section_name in names(private$sections)) {
addWorksheet(private$workbook, section_name)
}
# 添加图表工作表
if (length(private$charts) > 0) {
addWorksheet(private$workbook, "Charts")
}
},
write_sections = function() {
for (section_name in names(private$sections)) {
section <- private$sections[[section_name]]
if (section$type == "data") {
writeData(private$workbook, section_name, section$content)
} else if (section$type == "summary") {
self$write_summary_section(section_name, section$content)
}
}
},
write_summary_section = function(sheet_name, content) {
# 写入标题
writeData(private$workbook, sheet_name,
data.frame(Title = "Executive Summary"),
startRow = 1, startCol = 1)
# 写入内容
writeData(private$workbook, sheet_name, content,
startRow = 3, startCol = 1)
},
add_charts = function() {
if (length(private$charts) == 0) return()
chart_sheet <- "Charts"
current_row <- 1
for (chart_name in names(private$charts)) {
chart <- private$charts[[chart_name]]
# 写入图表数据
writeData(private$workbook, chart_sheet,
chart$data, startRow = current_row)
# 这里可以添加实际的图表创建代码
# 注意:openxlsx的图表功能有限
current_row <- current_row + nrow(chart$data) + 3
}
},
apply_styles = function() {
# 应用全局样式
for (sheet_name in getSheetNames(private$workbook)) {
# 设置列宽
setColWidths(private$workbook, sheet_name,
cols = 1:20, widths = "auto")
# 设置标题样式
addStyle(private$workbook, sheet_name,
style = createStyle(
fontSize = 14,
fontColour = "#000000",
fgFill = "#D3D3D3",
halign = "center",
valign = "center",
textDecoration = "bold"
),
rows = 1, cols = 1:20)
}
}
)
)
# 使用示例
report_generator <- ExcelReportGenerator$new()
# 添加数据部分
report_generator$add_section("Sales_Data", sales_data, "data")
report_generator$add_section("Summary", summary_data, "summary")
# 添加图表
report_generator$add_chart("Sales_Trend", chart_data, "line")
# 生成报告
report_generator$generate_report("quarterly_report.xlsx")
```
### 5.2 模板化报告系统
```r
# 模板化报告系统
create_report_template <- function(template_config) {
template <- list(
config = template_config,
generate_from_template = function(data, output_path) {
# 创建新工作簿
wb <- createWorkbook()
# 应用模板配置
self$apply_template_config(wb, template_config)
# 填充数据
self$fill_template_data(wb, data, template_config)
# 保存文件
saveWorkbook(wb, output_path, overwrite = TRUE)
},
apply_template_config = function(wb, config) {
# 添加工作表
for (sheet_config in config$sheets) {
addWorksheet(wb, sheet_config$name)
# 设置工作表属性
if (!is.null(sheet_config$tab_color)) {
setTabColour(wb, sheet_config$name, sheet_config$tab_color)
}
}
},
fill_template_data = function(wb, data, config) {
for (sheet_config in config$sheets) {
sheet_name <- sheet_config$name
# 写入标题
if (!is.null(sheet_config$title)) {
writeData(wb, sheet_name,
data.frame(Title = sheet_config$title),
startRow = 1, startCol = 1)
}
# 写入数据
if (!is.null(sheet_config$data_mapping)) {
mapped_data <- data[sheet_config$data_mapping]
writeData(wb, sheet_name, mapped_data,
startRow = sheet_config$start_row,
startCol = sheet_config$start_col)
}
# 应用样式
if (!is.null(sheet_config$styles)) {
self$apply_sheet_styles(wb, sheet_name, sheet_config$styles)
}
}
},
apply_sheet_styles = function(wb, sheet_name, styles) {
for (style in styles) {
addStyle(wb, sheet_name,
style = createStyle(
fontSize = style$font_size,
fontColour = style$font_color,
fgFill = style$bg_color,
halign = style$align,
valign = style$valign,
textDecoration = style$decoration
),
rows = style$rows,
cols = style$cols)
}
}
)
return(template)
}
# 模板配置示例
template_config <- list(
sheets = list(
list(
name = "Executive_Summary",
title = "Executive Summary Report",
tab_color = "#FF6B6B",
start_row = 3,
start_col = 1,
data_mapping = c("Total_Sales", "Growth_Rate", "Top_Product"),
styles = list(
list(
rows = 1,
cols = 1,
font_size = 16,
font_color = "#000000",
bg_color = "#D3D3D3",
align = "center",
valign = "center",
decoration = "bold"
)
)
),
list(
name = "Detailed_Analysis",
title = "Detailed Analysis",
tab_color = "#4ECDC4",
start_row = 2,
start_col = 1,
data_mapping = NULL, # 使用所有数据
styles = list()
)
)
)
# 使用模板
template <- create_report_template(template_config)
template$generate_from_template(analysis_data, "template_report.xlsx")
```
## 6. 性能优化与大数据处理
### 6.1 大数据Excel处理
```r
# 大数据Excel处理系统
BigDataExcelProcessor <- R6::R6Class("BigDataExcelProcessor",
public = list(
initialize = function(chunk_size = 10000) {
private$chunk_size <- chunk_size
private$temp_files <- list()
},
process_large_file = function(input_path, output_path, processing_function) {
# 获取文件信息
file_info <- self$get_file_info(input_path)
# 分批处理
chunks <- self$split_into_chunks(file_info)
# 处理每个块
processed_chunks <- list()
for (i in seq_along(chunks)) {
cat("Processing chunk", i, "of", length(chunks), "\n")
chunk_data <- self$read_chunk(input_path, chunks[[i]])
processed_chunk <- processing_function(chunk_data)
processed_chunks[[i]] <- processed_chunk
# 保存临时文件
temp_file <- tempfile(fileext = ".rds")
saveRDS(processed_chunk, temp_file)
private$temp_files[[i]] <- temp_file
}
# 合并结果
final_result <- self$merge_chunks(processed_chunks)
# 写入输出文件
self$write_large_file(final_result, output_path)
# 清理临时文件
self$cleanup_temp_files()
return(final_result)
},
get_file_info = function(file_path) {
# 获取工作表信息
sheets <- excel_sheets(file_path)
# 估算每个工作表的大小
sheet_info <- list()
for (sheet in sheets) {
# 读取前几行来估算大小
sample_data <- read_excel(file_path, sheet = sheet, n_max = 100)
estimated_rows <- self$estimate_total_rows(file_path, sheet)
sheet_info[[sheet]] <- list(
name = sheet,
estimated_rows = estimated_rows,
cols = ncol(sample_data)
)
}
return(sheet_info)
},
estimate_total_rows = function(file_path, sheet) {
# 使用不同的方法估算行数
tryCatch({
# 方法1:读取最后几行
last_rows <- read_excel(file_path, sheet = sheet,
skip = 100000, n_max = 100)
if (nrow(last_rows) > 0) {
return(100000 + nrow(last_rows))
}
}, error = function(e) {
# 如果失败,尝试其他方法
})
# 方法2:使用文件大小估算
file_size <- file.size(file_path)
return(floor(file_size / 1000)) # 粗略估算
},
split_into_chunks = function(file_info) {
chunks <- list()
for (sheet_name in names(file_info)) {
sheet <- file_info[[sheet_name]]
total_rows <- sheet$estimated_rows
# 计算块数
num_chunks <- ceiling(total_rows / private$chunk_size)
for (i in 1:num_chunks) {
start_row <- (i - 1) * private$chunk_size + 1
end_row <- min(i * private$chunk_size, total_rows)
chunks[[paste(sheet_name, "chunk", i, sep = "_")]] <- list(
sheet = sheet_name,
start_row = start_row,
end_row = end_row
)
}
}
return(chunks)
},
read_chunk = function(file_path, chunk_info) {
# 读取指定范围的数据
data <- read_excel(
file_path,
sheet = chunk_info$sheet,
skip = chunk_info$start_row - 1,
n_max = chunk_info$end_row - chunk_info$start_row + 1
)
return(data)
},
merge_chunks = function(processed_chunks) {
# 合并处理后的块
if (length(processed_chunks) == 1) {
return(processed_chunks[[1]])
}
# 使用data.table进行高效合并
dt_list <- lapply(processed_chunks, as.data.table)
merged_data <- rbindlist(dt_list, fill = TRUE)
return(merged_data)
},
write_large_file = function(data, output_path) {
# 分批写入大文件
if (nrow(data) > private$chunk_size) {
self$write_in_chunks(data, output_path)
} else {
write_xlsx(data, output_path)
}
},
write_in_chunks = function(data, output_path) {
# 创建新工作簿
wb <- createWorkbook()
addWorksheet(wb, "Data")
# 分批写入
num_chunks <- ceiling(nrow(data) / private$chunk_size)
for (i in 1:num_chunks) {
start_row <- (i - 1) * private$chunk_size + 1
end_row <- min(i * private$chunk_size, nrow(data))
chunk_data <- data[start_row:end_row, ]
# 写入数据
if (i == 1) {
writeData(wb, "Data", chunk_data, startRow = 1)
} else {
writeData(wb, "Data", chunk_data,
startRow = start_row, startCol = 1)
}
}
# 保存文件
saveWorkbook(wb, output_path, overwrite = TRUE)
},
cleanup_temp_files = function() {
# 删除临时文件
for (temp_file in private$temp_files) {
if (file.exists(temp_file)) {
file.remove(temp_file)
}
}
private$temp_files <- list()
}
),
private = list(
chunk_size = 10000,
temp_files = list()
)
)
# 使用示例
processor <- BigDataExcelProcessor$new(chunk_size = 5000)
# 定义处理函数
process_function <- function(data) {
# 数据清洗和转换
processed_data <- data %>%
filter(!is.na(Value)) %>%
mutate(Processed_Value = Value * 1.1)
return(processed_data)
}
# 处理大文件
result <- processor$process_large_file(
"large_data.xlsx",
"processed_large_data.xlsx",
process_function
)
```
### 6.2 内存优化策略
```r
# 内存优化的Excel处理
MemoryOptimizedProcessor <- R6::R6Class("MemoryOptimizedProcessor",
public = list(
initialize = function() {
private$memory_threshold <- 0.8 # 80%内存使用率阈值
},
process_with_memory_management = function(input_path, output_path,
processing_function) {
# 监控内存使用
self$monitor_memory()
# 分批处理
chunks <- self$create_optimal_chunks(input_path)
# 处理每个块
for (i in seq_along(chunks)) {
# 检查内存使用
if (self$is_memory_high()) {
self$cleanup_memory()
}
# 处理当前块
chunk_result <- self$process_chunk(
input_path, chunks[[i]], processing_function
)
# 立即写入结果
self$append_to_output(chunk_result, output_path, i == 1)
# 清理内存
rm(chunk_result)
gc()
}
},
monitor_memory = function() {
# 获取内存使用情况
memory_info <- gc()
memory_usage <- memory_info[2, 2] / memory_info[2, 1]
cat("Memory usage:", round(memory_usage * 100, 2), "%\n")
return(memory_usage)
},
is_memory_high = function() {
memory_usage <- self$monitor_memory()
return(memory_usage > private$memory_threshold)
},
cleanup_memory = function() {
cat("Cleaning up memory...\n")
gc()
Sys.sleep(1) # 给系统时间清理内存
},
create_optimal_chunks = function(file_path) {
# 根据可用内存创建最优块大小
available_memory <- self$get_available_memory()
# 估算每个数据行的大小(字节)
sample_data <- read_excel(file_path, n_max = 100)
row_size <- object.size(sample_data) / nrow(sample_data)
# 计算最优块大小
optimal_chunk_size <- floor(available_memory * 0.5 / row_size)
# 确保块大小在合理范围内
optimal_chunk_size <- max(100, min(optimal_chunk_size, 10000))
cat("Optimal chunk size:", optimal_chunk_size, "\n")
return(optimal_chunk_size)
},
get_available_memory = function() {
# 获取可用内存(简化版本)
memory_info <- gc()
return(memory_info[2, 1] - memory_info[2, 2])
},
process_chunk = function(file_path, chunk_size, processing_function) {
# 读取和处理一个块
data <- read_excel(file_path, n_max = chunk_size)
result <- processing_function(data)
return(result)
},
append_to_output = function(data, output_path, is_first = FALSE) {
if (is_first) {
# 创建新文件
write_xlsx(data, output_path)
} else {
# 追加到现有文件
self$append_to_excel(data, output_path)
}
},
append_to_excel = function(data, output_path) {
# 读取现有数据
existing_data <- read_excel(output_path)
# 合并数据
combined_data <- rbind(existing_data, data)
# 重写文件
write_xlsx(combined_data, output_path)
}
),
private = list(
memory_threshold = 0.8
)
)
```
## 7. 企业级应用案例
### 7.1 财务报告自动化系统
```r
# 财务报告自动化系统
FinancialReportSystem <- R6::R6Class("FinancialReportSystem",
public = list(
initialize = function(config_path) {
private$config <- self$load_config(config_path)
private$data_sources <- list()
private$calculations <- list()
},
add_data_source = function(name, file_path, sheet_name = NULL) {
private$data_sources[[name]] <- list(
file_path = file_path,
sheet_name = sheet_name
)
},
add_calculation = function(name, calculation_function) {
private$calculations[[name]] <- calculation_function
},
generate_financial_report = function(output_path, report_type = "monthly") {
# 加载所有数据源
data <- self$load_all_data_sources()
# 执行计算
calculations <- self$execute_calculations(data)
# 生成报告
report <- self$create_financial_report(calculations, report_type)
# 保存报告
self$save_report(report, output_path)
return(report)
},
load_config = function(config_path) {
# 加载配置文件
config <- read_yaml(config_path)
return(config)
},
load_all_data_sources = function() {
data <- list()
for (source_name in names(private$data_sources)) {
source <- private$data_sources[[source_name]]
if (is.null(source$sheet_name)) {
data[[source_name]] <- read_excel(source$file_path)
} else {
data[[source_name]] <- read_excel(source$file_path,
sheet = source$sheet_name)
}
}
return(data)
},
execute_calculations = function(data) {
calculations <- list()
for (calc_name in names(private$calculations)) {
calc_function <- private$calculations[[calc_name]]
calculations[[calc_name]] <- calc_function(data)
}
return(calculations)
},
create_financial_report = function(calculations, report_type) {
# 创建报告结构
report <- list(
type = report_type,
timestamp = Sys.time(),
calculations = calculations,
summary = self$create_summary(calculations),
charts = self$create_charts(calculations)
)
return(report)
},
create_summary = function(calculations) {
# 创建财务摘要
summary <- list(
total_revenue = sum(calculations$revenue$amount, na.rm = TRUE),
total_expenses = sum(calculations$expenses$amount, na.rm = TRUE),
net_profit = sum(calculations$revenue$amount, na.rm = TRUE) -
sum(calculations$expenses$amount, na.rm = TRUE),
profit_margin = (sum(calculations$revenue$amount, na.rm = TRUE) -
sum(calculations$expenses$amount, na.rm = TRUE)) /
sum(calculations$revenue$amount, na.rm = TRUE) * 100
)
return(summary)
},
create_charts = function(calculations) {
# 创建图表数据
charts <- list(
revenue_trend = calculations$revenue,
expense_breakdown = calculations$expenses,
profit_analysis = calculations$profit
)
return(charts)
},
save_report = function(report, output_path) {
# 创建Excel工作簿
wb <- createWorkbook()
# 添加摘要工作表
addWorksheet(wb, "Executive_Summary")
summary_data <- data.frame(
Metric = names(report$summary),
Value = unlist(report$summary)
)
writeData(wb, "Executive_Summary", summary_data)
# 添加详细数据工作表
for (calc_name in names(report$calculations)) {
addWorksheet(wb, calc_name)
writeData(wb, calc_name, report$calculations[[calc_name]])
}
# 添加图表工作表
addWorksheet(wb, "Charts")
self$add_charts_to_workbook(wb, report$charts)
# 保存文件
saveWorkbook(wb, output_path, overwrite = TRUE)
},
add_charts_to_workbook = function(wb, charts) {
# 添加图表到工作簿
current_row <- 1
for (chart_name in names(charts)) {
chart_data <- charts[[chart_name]]
# 写入图表标题
writeData(wb, "Charts",
data.frame(Title = chart_name),
startRow = current_row)
# 写入图表数据
writeData(wb, "Charts", chart_data,
startRow = current_row + 2)
current_row <- current_row + nrow(chart_data) + 5
}
}
),
private = list(
config = NULL,
data_sources = list(),
calculations = list()
)
)
# 使用示例
financial_system <- FinancialReportSystem$new("financial_config.yaml")
# 添加数据源
financial_system$add_data_source("revenue", "revenue_data.xlsx", "Monthly")
financial_system$add_data_source("expenses", "expense_data.xlsx", "Details")
# 添加计算
financial_system$add_calculation("revenue", function(data) {
return(data$revenue %>%
group_by(Month) %>%
summarise(amount = sum(Revenue, na.rm = TRUE)))
})
financial_system$add_calculation("expenses", function(data) {
return(data$expenses %>%
group_by(Category) %>%
summarise(amount = sum(Amount, na.rm = TRUE)))
})
# 生成报告
report <- financial_system$generate_financial_report("financial_report.xlsx")
```
### 7.2 销售分析仪表板
```r
# 销售分析仪表板系统
SalesDashboardSystem <- R6::R6Class("SalesDashboardSystem",
public = list(
initialize = function() {
private$data <- NULL
private$metrics <- list()
private$filters <- list()
},
load_data = function(file_path) {
private$data <- read_excel(file_path)
self$preprocess_data()
},
add_metric = function(name, calculation_function, description = "") {
private$metrics[[name]] <- list(
function = calculation_function,
description = description
)
},
add_filter = function(name, filter_function) {
private$filters[[name]] <- filter_function
},
apply_filters = function(filter_values) {
filtered_data <- private$data
for (filter_name in names(filter_values)) {
if (filter_name %in% names(private$filters)) {
filter_function <- private$filters[[filter_name]]
filtered_data <- filter_function(filtered_data, filter_values[[filter_name]])
}
}
return(filtered_data)
},
calculate_metrics = function(filtered_data = NULL) {
if (is.null(filtered_data)) {
filtered_data <- private$data
}
results <- list()
for (metric_name in names(private$metrics)) {
metric_function <- private$metrics[[metric_name]]$function
results[[metric_name]] <- metric_function(filtered_data)
}
return(results)
},
generate_dashboard = function(output_path, filter_values = NULL) {
# 应用过滤器
filtered_data <- self$apply_filters(filter_values)
# 计算指标
metrics <- self$calculate_metrics(filtered_data)
# 创建仪表板
dashboard <- self$create_dashboard(filtered_data, metrics)
# 保存仪表板
self$save_dashboard(dashboard, output_path)
return(dashboard)
},
preprocess_data = function() {
# 数据预处理
private$data <- private$data %>%
# 标准化列名
janitor::clean_names() %>%
# 处理日期
mutate(across(contains("date"), as.Date)) %>%
# 处理数值
mutate(across(contains("amount"), as.numeric)) %>%
# 移除重复
distinct()
},
create_dashboard = function(data, metrics) {
# 创建仪表板结构
dashboard <- list(
timestamp = Sys.time(),
data_summary = self$create_data_summary(data),
metrics = metrics,
charts = self$create_dashboard_charts(data),
tables = self$create_dashboard_tables(data)
)
return(dashboard)
},
create_data_summary = function(data) {
summary <- list(
total_records = nrow(data),
date_range = range(data$date, na.rm = TRUE),
total_sales = sum(data$amount, na.rm = TRUE),
unique_customers = n_distinct(data$customer_id),
unique_products = n_distinct(data$product_id)
)
return(summary)
},
create_dashboard_charts = function(data) {
charts <- list(
# 销售趋势
sales_trend = data %>%
group_by(date) %>%
summarise(daily_sales = sum(amount, na.rm = TRUE)) %>%
arrange(date),
专业咨询
www.rdaizuo.com www.rdaima.com
专业R语言辅导 | Python编程 | 数据分析 Data analysis | 统计分析 Statistics | 数据挖掘 Data mining | 机器学习 Machine learning | |统计分析 Statistics|STATS 202|STATS 203|STAT 110|STAT 104|STAT 705|STAT 707|STAT4203|STAT4204|STAT4205|STAT4206|STAT 133|STAT 134|STAT 101A|STAT 100A|STAT 581|STAT 520|STAT 521|STAT 4500|STAT 5805|STAT 5806|STAT 4600|STAT30001|STAT3001|STAT3002|STAT3003|STAT3004|STAT3005|STAT3006|STAT5001|STAT5002|STAT5003|STAT5004|
专业咨询 www.daixie.it.com www.rcodedaixie.com
这个教程涵盖了R语言中Excel文件处理的主要方面,从基本的读写操作到高级的数据分析和格式化。您可以根据具体需求选择合适的方法和包。
# 产品表现
product_performance = data %>%
group_by(product_name) %>%
summarise(total_sales = sum(amount, na.rm = TRUE)) %>%
arrange(desc(total_sales)) %>%
head(10),
# 客户分析
customer_analysis = data %>%
group_by(customer_segment) %>%
summarise(
total_sales = sum(amount, na.rm = TRUE),
customer_count = n_distinct(customer_id)
)
)
return(charts)
},
create_dashboard_tables = function(data) {
tables <- list(
# 顶级产品
top_products = data %>%
group_by(product_name) %>%
summarise(
total_sales = sum(amount, na.rm = TRUE),
units_sold = sum(quantity, na.rm = TRUE),
avg_price = mean(unit_price, na.rm = TRUE)
) %>%
arrange(desc(total_sales)) %>%
head(20),
# 顶级客户
top_customers = data %>%
group_by(customer_name) %>%
summarise(
total_spent = sum(amount, na.rm = TRUE),
order_count = n(),
avg_order_value = mean(amount, na.rm = TRUE)
) %>%
arrange(desc(total_spent)) %>%
head(20)
)
return(tables)
},
save_dashboard = function(dashboard, output_path) {
# 创建Excel工作簿
wb <- createWorkbook()
# 添加概览工作表
addWorksheet(wb, "Overview")
overview_data <- data.frame(
Metric = names(dashboard$data_summary),
Value = unlist(dashboard$data_summary)
)
writeData(wb, "Overview", overview_data)
# 添加指标工作表
addWorksheet(wb, "Metrics")
metrics_data <- data.frame(
Metric = names(dashboard$metrics),
Value = unlist(dashboard$metrics)
)
writeData(wb, "Metrics", metrics_data)
# 添加图表数据工作表
addWorksheet(wb, "Chart_Data")
self$add_chart_data_to_workbook(wb, dashboard$charts)
# 添加表格数据工作表
addWorksheet(wb, "Tables")
self$add_table_data_to_workbook(wb, dashboard$tables)
# 保存文件
saveWorkbook(wb, output_path, overwrite = TRUE)
},
add_chart_data_to_workbook = function(wb, charts) {
current_row <- 1
for (chart_name in names(charts)) {
chart_data <- charts[[chart_name]]
# 写入图表标题
writeData(wb, "Chart_Data",
data.frame(Chart = chart_name),
startRow = current_row)
# 写入图表数据
writeData(wb, "Chart_Data", chart_data,
startRow = current_row + 2)
current_row <- current_row + nrow(chart_data) + 5
}
},
add_table_data_to_workbook = function(wb, tables) {
current_row <- 1
for (table_name in names(tables)) {
table_data <- tables[[table_name]]
# 写入表格标题
writeData(wb, "Tables",