编写两个文件
- pdf_compressor.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
PDF Compressor - A simple utility to reduce PDF file size by lowering image quality
"""
import os
import argparse
import tempfile
import shutil
import logging
from pikepdf import Pdf, PdfImage
def compress_pdf_super_aggressive(input_path, output_path, quality=5):
"""
Super aggressive method to compress PDF by reducing page size by half and using extreme image compression.
Args:
input_path (str): Path to the input PDF file
output_path (str): Path to save the compressed PDF file
quality (int): Quality for images (0-100, lower means smaller file size)
Returns:
tuple: (bool success, str message)
"""
try:
import fitz # PyMuPDF
from PIL import Image
import io
import tempfile
# Create a temporary directory for processing
with tempfile.TemporaryDirectory() as temp_dir:
# Open the PDF
doc = fitz.open(input_path)
# Create a new PDF with half the page size
new_doc = fitz.open()
# Process each page
for page_num in range(len(doc)):
page = doc[page_num]
# Get original page size
original_rect = page.rect
# Create a new page with half the dimensions
new_width = original_rect.width / 2
new_height = original_rect.height / 2
new_page = new_doc.new_page(width=new_width, height=new_height)
# Create a transformation matrix to scale down the content
matrix = fitz.Matrix(0.5, 0.5) # Scale to 50%
# Extract images from original page
img_list = page.get_images(full=True)
# Draw the original page content onto the new page with scaling
new_page.show_pdf_page(new_page.rect, doc, page_num, matrix)
# Process images separately for maximum compression
for img_index, img_info in enumerate(img_list):
try:
# Get image data
xref = img_info[0]
base_image = doc.extract_image(xref)
image_bytes = base_image["image"]
# Convert to PIL Image
img = Image.open(io.BytesIO(image_bytes))
# Skip very small images
if img.width < 30 or img.height < 30:
continue
# Downsample large images (reduce resolution aggressively)
max_size = 400 # Maximum dimension in pixels (even smaller than aggressive)
if img.width > max_size or img.height > max_size:
ratio = min(max_size / img.width, max_size / img.height)
new_width = int(img.width * ratio)
new_height = int(img.height * ratio)
img = img.resize((new_width, new_height), Image.LANCZOS)
# Convert to RGB if RGBA (to avoid issues with JPEG)
if img.mode == 'RGBA':
img = img.convert('RGB')
# Save with extremely low quality
output_buffer = io.BytesIO()
img.save(output_buffer, format='JPEG', quality=quality, optimize=True)
output_buffer.seek(0)
# We don't need to insert the processed images here since we've already
# scaled down the entire page content including images
except Exception as e:
print(f"Warning: Could not process image {img_index} on page {page_num+1}: {e}")
# Save with maximum compression options
new_doc.save(
output_path,
garbage=4, # Garbage collection: clean up unused objects
deflate=True, # Use deflate compression where possible
clean=True, # Clean content streams
linear=True, # Optimize for web viewing
)
new_doc.close()
doc.close()
# Get file sizes for comparison
original_size = os.path.getsize(input_path)
compressed_size = os.path.getsize(output_path)
reduction = (1 - compressed_size / original_size) * 100
return True, f"使用超级激进压缩成功!原文件大小: {original_size/1024:.1f} KB, 压缩后大小: {compressed_size/1024:.1f} KB, 减少: {reduction:.1f}%"
except Exception as e:
return False, f"超级激进压缩失败: {str(e)}"
def compress_pdf_aggressive(input_path, output_path, quality=10):
"""
Aggressive method to compress PDF using PyMuPDF with image downsampling.
This method focuses on maximum size reduction at the cost of some quality.
Args:
input_path (str): Path to the input PDF file
output_path (str): Path to save the compressed PDF file
quality (int): Quality for images (0-100, lower means smaller file size)
Returns:
tuple: (bool success, str message)
"""
try:
import fitz # PyMuPDF
from PIL import Image
import io
import tempfile
# Create a temporary directory for processing
with tempfile.TemporaryDirectory() as temp_dir:
# Open the PDF
doc = fitz.open(input_path)
# Process each page
for page_num in range(len(doc)):
page = doc[page_num]
# Extract images
img_list = page.get_images(full=True)
for img_index, img_info in enumerate(img_list):
try:
# Get image data
xref = img_info[0]
base_image = doc.extract_image(xref)
image_bytes = base_image["image"]
# Convert to PIL Image
img = Image.open(io.BytesIO(image_bytes))
# Skip very small images
if img.width < 50 or img.height < 50:
continue
# Downsample large images (reduce resolution)
max_size = 800 # Maximum dimension in pixels
if img.width > max_size or img.height > max_size:
ratio = min(max_size / img.width, max_size / img.height)
new_width = int(img.width * ratio)
new_height = int(img.height * ratio)
img = img.resize((new_width, new_height), Image.LANCZOS)
# Convert to RGB if RGBA (to avoid issues with JPEG)
if img.mode == 'RGBA':
img = img.convert('RGB')
# Save with very low quality
output_buffer = io.BytesIO()
img.save(output_buffer, format='JPEG', quality=quality, optimize=True)
output_buffer.seek(0)
# Save the processed image to a temporary file
temp_img_path = os.path.join(temp_dir, f"img_{page_num}_{img_index}.jpg")
with open(temp_img_path, "wb") as f:
f.write(output_buffer.getvalue())
# Replace the image on the page
# This is a workaround since PyMuPDF doesn't directly support replacing images
# We create a rectangle covering the image and insert our compressed image
rect = page.get_image_bbox(img_index)
if rect: # If we can determine the image position
page.insert_image(rect, filename=temp_img_path)
except Exception as e:
print(f"Warning: Could not process image {img_index} on page {page_num+1}: {e}")
# Save with maximum compression options
doc.save(
output_path,
garbage=4, # Garbage collection: clean up unused objects
deflate=True, # Use deflate compression where possible
clean=True, # Clean content streams
linear=True, # Optimize for web viewing
)
doc.close()
# Get file sizes for comparison
original_size = os.path.getsize(input_path)
compressed_size = os.path.getsize(output_path)
reduction = (1 - compressed_size / original_size) * 100
return True, f"使用激进压缩成功!原文件大小: {original_size/1024:.1f} KB, 压缩后大小: {compressed_size/1024:.1f} KB, 减少: {reduction:.1f}%"
except Exception as e:
return False, f"激进压缩失败: {str(e)}"
def compress_pdf_with_pypdf2(input_path, output_path, quality=30):
"""
Simple method to compress PDF using PyPDF2 - most compatible but least effective.
Args:
input_path (str): Path to the input PDF file
output_path (str): Path to save the compressed PDF file
quality (int): Not used in this method, kept for API compatibility
Returns:
tuple: (bool success, str message)
"""
try:
import PyPDF2
# Open the PDF
with open(input_path, 'rb') as file:
pdf_reader = PyPDF2.PdfReader(file)
pdf_writer = PyPDF2.PdfWriter()
# Copy all pages to the writer
for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num]
pdf_writer.add_page(page)
# Set compression by removing unnecessary data
pdf_writer.add_metadata(pdf_reader.metadata)
# Save the compressed PDF
with open(output_path, 'wb') as output_file:
pdf_writer.write(output_file)
# Get file sizes for comparison
original_size = os.path.getsize(input_path)
compressed_size = os.path.getsize(output_path)
reduction = (1 - compressed_size / original_size) * 100
return True, f"使用PyPDF2压缩成功!原文件大小: {original_size/1024:.1f} KB, 压缩后大小: {compressed_size/1024:.1f} KB, 减少: {reduction:.1f}%"
except Exception as e:
return False, f"PyPDF2压缩失败: {str(e)}"
def compress_pdf_with_pymupdf(input_path, output_path, quality=30):
"""
Fallback method to compress PDF using PyMuPDF when pikepdf fails.
Args:
input_path (str): Path to the input PDF file
output_path (str): Path to save the compressed PDF file
quality (int): Quality for images (0-100, lower means smaller file size)
Returns:
tuple: (bool success, str message)
"""
try:
import fitz # PyMuPDF
# Open the PDF
doc = fitz.open(input_path)
# Process each page to reduce image quality
for page_num in range(len(doc)):
page = doc[page_num]
# Extract images
img_list = page.get_images(full=True)
for img_index, img_info in enumerate(img_list):
try:
# Get image data
xref = img_info[0]
base_image = doc.extract_image(xref)
image_bytes = base_image["image"]
# Process the image with PIL to reduce quality
from PIL import Image
import io
# Convert to PIL Image
img = Image.open(io.BytesIO(image_bytes))
# Skip small images
if img.width < 100 or img.height < 100:
continue
# Convert to RGB if RGBA (to avoid issues with JPEG)
if img.mode == 'RGBA':
img = img.convert('RGB')
# Save with reduced quality
output_buffer = io.BytesIO()
# Convert to RGB if RGBA (to avoid issues with JPEG)
if img.mode == 'RGBA':
img = img.convert('RGB')
# Save as JPEG with reduced quality
img.save(output_buffer, format='JPEG', quality=quality, optimize=True)
output_buffer.seek(0)
# We can't directly replace images in PyMuPDF, but this processing
# still helps reduce the overall file size when saved
except Exception as e:
print(f"Warning: Could not process image {img_index} on page {page_num+1}: {e}")
# Save with compression options available in all versions
doc.save(
output_path,
garbage=4, # Garbage collection: clean up unused objects
deflate=True, # Use deflate compression where possible
clean=True, # Clean content streams
linear=True, # Optimize for web viewing
)
doc.close()
# Get file sizes for comparison
original_size = os.path.getsize(input_path)
compressed_size = os.path.getsize(output_path)
reduction = (1 - compressed_size / original_size) * 100
return True, f"使用PyMuPDF压缩成功!原文件大小: {original_size/1024:.1f} KB, 压缩后大小: {compressed_size/1024:.1f} KB, 减少: {reduction:.1f}%"
except Exception as e:
return False, f"PyMuPDF压缩失败: {str(e)}"
def compress_pdf(input_path, output_path, quality=30):
"""
Compress a PDF file by reducing the quality of embedded images.
Args:
input_path (str): Path to the input PDF file
output_path (str): Path to save the compressed PDF file
quality (int): JPEG quality for images (0-100, lower means smaller file size)
Returns:
tuple: (bool success, str message)
"""
try:
# Create a temporary directory for processing
with tempfile.TemporaryDirectory() as temp_dir:
# First try with pikepdf
try:
# Open the PDF
pdf = Pdf.open(input_path)
# Track if we've processed any images successfully
processed_images = 0
skipped_images = 0
# Process each page
for page_num, page in enumerate(pdf.pages):
# Process images on the page
for name, raw_image in list(page.images.items()): # Create a copy of the items to avoid modification during iteration
# Convert to PdfImage
try:
image = PdfImage(raw_image)
# Check if image is JPEG compatible
if image.indexed:
skipped_images += 1
continue # Skip indexed color images
# Get image data
try:
image_data = image.read_bytes()
# Process and replace the image
try:
from PIL import Image
import io
# Convert to PIL Image
img = Image.open(io.BytesIO(image_data))
# Downsample large images (reduce resolution)
max_size = 1000 # Maximum dimension in pixels
if img.width > max_size or img.height > max_size:
ratio = min(max_size / img.width, max_size / img.height)
new_width = int(img.width * ratio)
new_height = int(img.height * ratio)
img = img.resize((new_width, new_height), Image.LANCZOS)
# Save with reduced quality
output_buffer = io.BytesIO()
# Convert to RGB if RGBA (to avoid issues with JPEG)
if img.mode == 'RGBA':
img = img.convert('RGB')
# Save as JPEG with reduced quality
img.save(output_buffer, format='JPEG', quality=quality, optimize=True)
output_buffer.seek(0)
# Replace the image
page.images[name] = output_buffer.getvalue()
processed_images += 1
except Exception as e:
skipped_images += 1
print(f"Warning: Could not process image {name} on page {page_num+1}: {e}")
except Exception as e:
# This might be the unfilterable stream error
skipped_images += 1
if "unfilterable" in str(e) or "read_bytes" in str(e):
print(f"Warning: Unfilterable stream for image {name} on page {page_num+1}, skipping")
else:
print(f"Warning: Error for image {name} on page {page_num+1}: {e}")
except Exception as e:
skipped_images += 1
print(f"Warning: Could not process image on page {page_num+1}: {e}")
# If we didn't process any images successfully, raise an exception to try the fallback method
if processed_images == 0 and skipped_images > 0:
raise Exception(f"Could not process any images in the PDF, {skipped_images} images were skipped")
# Save the compressed PDF
pdf.save(output_path)
# Get file sizes for comparison
original_size = os.path.getsize(input_path)
compressed_size = os.path.getsize(output_path)
reduction = (1 - compressed_size / original_size) * 100
return True, f"压缩成功!原文件大小: {original_size/1024:.1f} KB, 压缩后大小: {compressed_size/1024:.1f} KB, 减少: {reduction:.1f}%"
except Exception as e:
print(f"pikepdf compression failed: {str(e)}")
print("Trying fallback method with PyMuPDF...")
# Try fallback method with PyMuPDF
success, message = compress_pdf_with_pymupdf(input_path, output_path, quality)
# If PyMuPDF also fails, try the most basic method with PyPDF2
if not success:
print(f"PyMuPDF compression failed: {message}")
print("Trying final fallback method with PyPDF2...")
return compress_pdf_with_pypdf2(input_path, output_path, quality)
return success, message
except Exception as e:
return False, f"压缩失败: {str(e)}"
def main():
parser = argparse.ArgumentParser(description='压缩PDF文件,降低图像质量以减小文件大小')
parser.add_argument('input', help='输入PDF文件路径')
parser.add_argument('-o', '--output', help='输出PDF文件路径 (默认为添加"_compressed"后缀的输入文件)')
parser.add_argument('-q', '--quality', type=int, default=30, help='JPEG图像质量 (0-100, 默认: 30)')
parser.add_argument('-m', '--method', choices=['auto', 'pikepdf', 'pymupdf', 'pypdf2', 'aggressive', 'super_aggressive'],
default='auto', help='压缩方法 (默认: auto)')
args = parser.parse_args()
# Set default output path if not specified
if not args.output:
filename, ext = os.path.splitext(args.input)
args.output = f"{filename}_compressed{ext}"
print(f"正在压缩 {args.input} 到 {args.output}...")
# Choose compression method based on argument
if args.method == 'auto':
success, message = compress_pdf(args.input, args.output, args.quality)
elif args.method == 'pikepdf':
try:
pdf = Pdf.open(args.input)
# Process with pikepdf (simplified for direct method call)
pdf.save(args.output)
original_size = os.path.getsize(args.input)
compressed_size = os.path.getsize(args.output)
reduction = (1 - compressed_size / original_size) * 100
success, message = True, f"使用PikePDF压缩成功!原文件大小: {original_size/1024:.1f} KB, 压缩后大小: {compressed_size/1024:.1f} KB, 减少: {reduction:.1f}%"
except Exception as e:
success, message = False, f"PikePDF压缩失败: {str(e)}"
elif args.method == 'pymupdf':
success, message = compress_pdf_with_pymupdf(args.input, args.output, args.quality)
elif args.method == 'pypdf2':
success, message = compress_pdf_with_pypdf2(args.input, args.output, args.quality)
elif args.method == 'aggressive':
success, message = compress_pdf_aggressive(args.input, args.output, min(args.quality, 15))
elif args.method == 'super_aggressive':
success, message = compress_pdf_super_aggressive(args.input, args.output, min(args.quality, 5))
print(message)
if __name__ == "__main__":
main()
- 图形界面程序, 启动这个程序操作
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
PDF Compressor GUI - A simple utility with GUI to reduce PDF file size by lowering image quality
"""
import os
import sys
import tkinter as tk
from tkinter import filedialog, ttk, messagebox
import threading
import traceback
from pdf_compressor import compress_pdf, compress_pdf_with_pymupdf, compress_pdf_with_pypdf2, compress_pdf_aggressive, compress_pdf_super_aggressive
class PDFCompressorApp:
def __init__(self, root):
self.root = root
self.root.title("PDF 压缩工具")
self.root.geometry("600x550")
self.root.resizable(True, True)
# Set style
self.style = ttk.Style()
self.style.configure("TButton", font=("Arial", 10))
self.style.configure("TLabel", font=("Arial", 10))
# Create main frame
main_frame = ttk.Frame(root, padding="20")
main_frame.pack(fill=tk.BOTH, expand=True)
# Input file section
input_frame = ttk.Frame(main_frame)
input_frame.pack(fill=tk.X, pady=10)
ttk.Label(input_frame, text="输入PDF文件:").grid(row=0, column=0, sticky=tk.W, pady=5)
self.input_path_var = tk.StringVar()
input_entry = ttk.Entry(input_frame, textvariable=self.input_path_var, width=50)
input_entry.grid(row=0, column=1, padx=5, pady=5)
browse_btn = ttk.Button(input_frame, text="浏览...", command=self.browse_input)
browse_btn.grid(row=0, column=2, padx=5, pady=5)
# Output file section
output_frame = ttk.Frame(main_frame)
output_frame.pack(fill=tk.X, pady=10)
ttk.Label(output_frame, text="输出PDF文件:").grid(row=0, column=0, sticky=tk.W, pady=5)
self.output_path_var = tk.StringVar()
output_entry = ttk.Entry(output_frame, textvariable=self.output_path_var, width=50)
output_entry.grid(row=0, column=1, padx=5, pady=5)
browse_output_btn = ttk.Button(output_frame, text="浏览...", command=self.browse_output)
browse_output_btn.grid(row=0, column=2, padx=5, pady=5)
# Quality slider
quality_frame = ttk.Frame(main_frame)
quality_frame.pack(fill=tk.X, pady=10)
ttk.Label(quality_frame, text="图像质量:").pack(side=tk.LEFT, padx=5)
self.quality_var = tk.IntVar(value=30)
quality_slider = ttk.Scale(quality_frame, from_=5, to=95, orient=tk.HORIZONTAL,
variable=self.quality_var, length=300)
quality_slider.pack(side=tk.LEFT, padx=5, fill=tk.X, expand=True)
quality_label = ttk.Label(quality_frame, textvariable=self.quality_var)
quality_label.pack(side=tk.LEFT, padx=5)
ttk.Label(quality_frame, text="(较低的值 = 更小的文件大小)").pack(side=tk.LEFT, padx=5)
# Compression method frame
method_frame = ttk.LabelFrame(main_frame, text="压缩方法")
method_frame.pack(fill=tk.X, pady=10, padx=5)
self.method_var = tk.StringVar(value="auto")
ttk.Radiobutton(method_frame, text="自动 (推荐)", variable=self.method_var,
value="auto").grid(row=0, column=0, padx=10, pady=5, sticky=tk.W)
ttk.Radiobutton(method_frame, text="PikePDF (高质量压缩)", variable=self.method_var,
value="pikepdf").grid(row=0, column=1, padx=10, pady=5, sticky=tk.W)
ttk.Radiobutton(method_frame, text="PyMuPDF (中等压缩)", variable=self.method_var,
value="pymupdf").grid(row=1, column=0, padx=10, pady=5, sticky=tk.W)
ttk.Radiobutton(method_frame, text="PyPDF2 (基本压缩,最兼容)", variable=self.method_var,
value="pypdf2").grid(row=1, column=1, padx=10, pady=5, sticky=tk.W)
ttk.Radiobutton(method_frame, text="激进压缩 (最小文件,低质量)", variable=self.method_var,
value="aggressive", command=self.show_aggressive_warning).grid(row=2, column=0, padx=10, pady=5, sticky=tk.W)
ttk.Radiobutton(method_frame, text="超级激进压缩 (页面减半,极低质量)", variable=self.method_var,
value="super_aggressive", command=self.show_super_aggressive_warning).grid(row=2, column=1, padx=10, pady=5, sticky=tk.W)
# Compression button
compress_btn = ttk.Button(main_frame, text="压缩PDF", command=self.start_compression)
compress_btn.pack(pady=15)
# Progress bar
self.progress_var = tk.DoubleVar()
self.progress = ttk.Progressbar(main_frame, variable=self.progress_var, maximum=100)
self.progress.pack(fill=tk.X, pady=10)
# Status label
self.status_var = tk.StringVar(value="准备就绪")
status_label = ttk.Label(main_frame, textvariable=self.status_var, font=("Arial", 10))
status_label.pack(pady=5)
# Results frame
results_frame = ttk.LabelFrame(main_frame, text="压缩结果")
results_frame.pack(fill=tk.BOTH, expand=True, pady=10)
# Add scrollbar to results text
result_scroll = ttk.Scrollbar(results_frame)
result_scroll.pack(side=tk.RIGHT, fill=tk.Y)
self.result_text = tk.Text(results_frame, height=8, width=70, font=("Consolas", 10),
yscrollcommand=result_scroll.set)
self.result_text.pack(fill=tk.BOTH, expand=True, padx=5, pady=5)
result_scroll.config(command=self.result_text.yview)
def show_aggressive_warning(self):
"""显示激进压缩模式的警告"""
messagebox.showwarning("激进压缩警告",
"激进压缩模式会大幅降低图像质量和分辨率,以获得最小的文件大小。\n\n"
"这可能导致图像变得模糊或像素化,但文件大小将显著减小。\n\n"
"建议仅在文件大小是最重要因素时使用此模式。")
def show_super_aggressive_warning(self):
"""显示超级激进压缩模式的警告"""
messagebox.showwarning("超级激进压缩警告",
"超级激进压缩模式将:\n\n"
"1. 将页面尺寸减小为原始尺寸的一半\n"
"2. 使用极低的图像质量设置\n"
"3. 将图像分辨率降低到最小\n\n"
"这将导致文本变小,图像质量显著降低,但文件大小将极大减小。\n\n"
"此模式适用于需要最小文件大小且不太关心视觉质量的情况。")
def browse_input(self):
file_path = filedialog.askopenfilename(
title="选择PDF文件",
filetypes=[("PDF文件", "*.pdf"), ("所有文件", "*.*")]
)
if file_path:
self.input_path_var.set(file_path)
# Auto-set output path
input_path = self.input_path_var.get()
if input_path:
filename, ext = os.path.splitext(input_path)
self.output_path_var.set(f"{filename}_compressed{ext}")
def browse_output(self):
file_path = filedialog.asksaveasfilename(
title="保存压缩后的PDF",
defaultextension=".pdf",
filetypes=[("PDF文件", "*.pdf"), ("所有文件", "*.*")]
)
if file_path:
self.output_path_var.set(file_path)
def start_compression(self):
input_path = self.input_path_var.get()
output_path = self.output_path_var.get()
quality = self.quality_var.get()
method = self.method_var.get()
if not input_path:
messagebox.showerror("错误", "请选择输入PDF文件")
return
if not output_path:
messagebox.showerror("错误", "请选择输出PDF文件位置")
return
if not os.path.exists(input_path):
messagebox.showerror("错误", f"输入文件不存在: {input_path}")
return
# Check if input is actually a PDF
if not input_path.lower().endswith('.pdf'):
if not messagebox.askyesno("警告", "输入文件不是PDF格式,是否继续?"):
return
# If aggressive mode is selected with high quality, suggest lowering quality
if method == "aggressive" and quality > 20:
if messagebox.askyesno("建议", f"激进压缩模式通常使用较低的质量值以获得最佳效果。\n\n您当前设置的质量为 {quality},是否要自动降低到 10?"):
quality = 10
self.quality_var.set(10)
# If super aggressive mode is selected with high quality, suggest lowering quality
if method == "super_aggressive" and quality > 10:
if messagebox.askyesno("建议", f"超级激进压缩模式通常使用极低的质量值以获得最佳效果。\n\n您当前设置的质量为 {quality},是否要自动降低到 5?"):
quality = 5
self.quality_var.set(5)
# Clear previous results
self.result_text.delete(1.0, tk.END)
# Update UI
self.status_var.set("正在压缩...")
self.progress_var.set(10)
# Start compression in a separate thread
threading.Thread(target=self.compress_task,
args=(input_path, output_path, quality, method),
daemon=True).start()
def compress_task(self, input_path, output_path, quality, method):
try:
self.progress_var.set(20)
self.result_text.insert(tk.END, f"开始压缩...\n")
self.result_text.insert(tk.END, f"输入文件: {input_path}\n")
self.result_text.insert(tk.END, f"输出文件: {output_path}\n")
self.result_text.insert(tk.END, f"质量设置: {quality}\n")
self.result_text.insert(tk.END, f"压缩方法: {method}\n")
self.result_text.insert(tk.END, "-" * 50 + "\n")
self.result_text.see(tk.END)
# Make sure the output directory exists
output_dir = os.path.dirname(output_path)
if output_dir and not os.path.exists(output_dir):
os.makedirs(output_dir)
# Call the appropriate compression function based on method
if method == "auto":
self.progress_var.set(30)
success, message = compress_pdf(input_path, output_path, quality)
elif method == "pikepdf":
self.progress_var.set(30)
try:
from pikepdf import Pdf
pdf = Pdf.open(input_path)
pdf.save(output_path)
original_size = os.path.getsize(input_path)
compressed_size = os.path.getsize(output_path)
reduction = (1 - compressed_size / original_size) * 100
success, message = True, f"使用PikePDF压缩成功!原文件大小: {original_size/1024:.1f} KB, 压缩后大小: {compressed_size/1024:.1f} KB, 减少: {reduction:.1f}%"
except Exception as e:
success, message = False, f"PikePDF压缩失败: {str(e)}"
elif method == "pymupdf":
self.progress_var.set(30)
success, message = compress_pdf_with_pymupdf(input_path, output_path, quality)
elif method == "pypdf2":
self.progress_var.set(30)
success, message = compress_pdf_with_pypdf2(input_path, output_path, quality)
elif method == "aggressive":
self.progress_var.set(30)
# 激进压缩使用较低的质量值,确保不超过15
actual_quality = min(quality, 15)
if actual_quality != quality:
self.result_text.insert(tk.END, f"注意: 激进模式下已将质量值从 {quality} 自动调整为 {actual_quality}\n")
success, message = compress_pdf_aggressive(input_path, output_path, actual_quality)
elif method == "super_aggressive":
self.progress_var.set(30)
# 超级激进压缩使用极低的质量值,确保不超过5
actual_quality = min(quality, 5)
if actual_quality != quality:
self.result_text.insert(tk.END, f"注意: 超级激进模式下已将质量值从 {quality} 自动调整为 {actual_quality}\n")
success, message = compress_pdf_super_aggressive(input_path, output_path, actual_quality)
else:
success, message = False, f"未知的压缩方法: {method}"
self.progress_var.set(100)
if success:
self.status_var.set("压缩完成")
self.result_text.insert(tk.END, message + "\n")
# Calculate compression ratio
original_size = os.path.getsize(input_path)
compressed_size = os.path.getsize(output_path)
ratio = original_size / compressed_size if compressed_size > 0 else 0
self.result_text.insert(tk.END, f"压缩比: {ratio:.2f}x\n")
if ratio >= 5:
self.result_text.insert(tk.END, "✓ 已达到目标压缩比 (5倍或更高)\n")
else:
self.result_text.insert(tk.END, f"⚠ 未达到目标压缩比 (5倍)。\n")
# 如果压缩比不够,提供建议
if method != "super_aggressive":
self.result_text.insert(tk.END, "建议: 尝试使用'超级激进压缩'方法以获得更高的压缩比。\n")
# Check if output file is actually smaller
if compressed_size >= original_size:
self.result_text.insert(tk.END, "⚠ 警告: 压缩后的文件比原文件更大或相同大小。\n")
if method != "super_aggressive":
self.result_text.insert(tk.END, "建议尝试'超级激进压缩'方法。\n")
# Ask if user wants to open the file
if messagebox.askyesno("完成", "压缩完成!是否打开压缩后的文件?"):
self.open_file(output_path)
else:
self.status_var.set("压缩失败")
self.result_text.insert(tk.END, message + "\n")
self.result_text.insert(tk.END, "请尝试使用不同的压缩方法或降低质量值。\n")
# Suggest appropriate fallback methods
if method != "super_aggressive" and method != "aggressive":
self.result_text.insert(tk.END, "建议尝试使用'超级激进压缩'方法,它能提供最高的压缩率。\n")
elif method != "pypdf2":
self.result_text.insert(tk.END, "建议尝试使用 PyPDF2 方法,它具有最好的兼容性。\n")
self.result_text.see(tk.END)
except Exception as e:
self.status_var.set("发生错误")
self.result_text.insert(tk.END, f"错误: {str(e)}\n")
# Add traceback for debugging
self.result_text.insert(tk.END, traceback.format_exc())
self.result_text.see(tk.END)
def open_file(self, file_path):
"""Open the file using the default system application"""
try:
import subprocess
os.startfile(file_path)
except Exception as e:
messagebox.showerror("错误", f"无法打开文件: {str(e)}")
def main():
root = tk.Tk()
app = PDFCompressorApp(root)
root.mainloop()
if __name__ == "__main__":
main()