压缩 PDF 的 python 程序

编写两个文件

pdf_compressor.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
PDF Compressor - A simple utility to reduce PDF file size by lowering image quality
"""

import os
import argparse
import tempfile
import shutil
import logging
from pikepdf import Pdf, PdfImage

def compress_pdf_super_aggressive(input_path, output_path, quality=5):
    """
    Super aggressive method to compress PDF by reducing page size by half and using extreme image compression.
    
    Args:
        input_path (str): Path to the input PDF file
        output_path (str): Path to save the compressed PDF file
        quality (int): Quality for images (0-100, lower means smaller file size)
    
    Returns:
        tuple: (bool success, str message)
    """
    try:
        import fitz  # PyMuPDF
        from PIL import Image
        import io
        import tempfile
        
        # Create a temporary directory for processing
        with tempfile.TemporaryDirectory() as temp_dir:
            # Open the PDF
            doc = fitz.open(input_path)
            
            # Create a new PDF with half the page size
            new_doc = fitz.open()
            
            # Process each page
            for page_num in range(len(doc)):
                page = doc[page_num]
                
                # Get original page size
                original_rect = page.rect
                
                # Create a new page with half the dimensions
                new_width = original_rect.width / 2
                new_height = original_rect.height / 2
                new_page = new_doc.new_page(width=new_width, height=new_height)
                
                # Create a transformation matrix to scale down the content
                matrix = fitz.Matrix(0.5, 0.5)  # Scale to 50%
                
                # Extract images from original page
                img_list = page.get_images(full=True)
                
                # Draw the original page content onto the new page with scaling
                new_page.show_pdf_page(new_page.rect, doc, page_num, matrix)
                
                # Process images separately for maximum compression
                for img_index, img_info in enumerate(img_list):
                    try:
                        # Get image data
                        xref = img_info[0]
                        base_image = doc.extract_image(xref)
                        image_bytes = base_image["image"]
                        
                        # Convert to PIL Image
                        img = Image.open(io.BytesIO(image_bytes))
                        
                        # Skip very small images
                        if img.width < 30 or img.height < 30:
                            continue
                        
                        # Downsample large images (reduce resolution aggressively)
                        max_size = 400  # Maximum dimension in pixels (even smaller than aggressive)
                        if img.width > max_size or img.height > max_size:
                            ratio = min(max_size / img.width, max_size / img.height)
                            new_width = int(img.width * ratio)
                            new_height = int(img.height * ratio)
                            img = img.resize((new_width, new_height), Image.LANCZOS)
                        
                        # Convert to RGB if RGBA (to avoid issues with JPEG)
                        if img.mode == 'RGBA':
                            img = img.convert('RGB')
                        
                        # Save with extremely low quality
                        output_buffer = io.BytesIO()
                        img.save(output_buffer, format='JPEG', quality=quality, optimize=True)
                        output_buffer.seek(0)
                        
                        # We don't need to insert the processed images here since we've already
                        # scaled down the entire page content including images
                    
                    except Exception as e:
                        print(f"Warning: Could not process image {img_index} on page {page_num+1}: {e}")
            
            # Save with maximum compression options
            new_doc.save(
                output_path,
                garbage=4,  # Garbage collection: clean up unused objects
                deflate=True,  # Use deflate compression where possible
                clean=True,  # Clean content streams
                linear=True,  # Optimize for web viewing
            )
            new_doc.close()
            doc.close()
        
        # Get file sizes for comparison
        original_size = os.path.getsize(input_path)
        compressed_size = os.path.getsize(output_path)
        reduction = (1 - compressed_size / original_size) * 100
        
        return True, f"使用超级激进压缩成功！原文件大小: {original_size/1024:.1f} KB, 压缩后大小: {compressed_size/1024:.1f} KB, 减少: {reduction:.1f}%"
    
    except Exception as e:
        return False, f"超级激进压缩失败: {str(e)}"

def compress_pdf_aggressive(input_path, output_path, quality=10):
    """
    Aggressive method to compress PDF using PyMuPDF with image downsampling.
    This method focuses on maximum size reduction at the cost of some quality.
    
    Args:
        input_path (str): Path to the input PDF file
        output_path (str): Path to save the compressed PDF file
        quality (int): Quality for images (0-100, lower means smaller file size)
    
    Returns:
        tuple: (bool success, str message)
    """
    try:
        import fitz  # PyMuPDF
        from PIL import Image
        import io
        import tempfile
        
        # Create a temporary directory for processing
        with tempfile.TemporaryDirectory() as temp_dir:
            # Open the PDF
            doc = fitz.open(input_path)
            
            # Process each page
            for page_num in range(len(doc)):
                page = doc[page_num]
                
                # Extract images
                img_list = page.get_images(full=True)
                
                for img_index, img_info in enumerate(img_list):
                    try:
                        # Get image data
                        xref = img_info[0]
                        base_image = doc.extract_image(xref)
                        image_bytes = base_image["image"]
                        
                        # Convert to PIL Image
                        img = Image.open(io.BytesIO(image_bytes))
                        
                        # Skip very small images
                        if img.width < 50 or img.height < 50:
                            continue
                        
                        # Downsample large images (reduce resolution)
                        max_size = 800  # Maximum dimension in pixels
                        if img.width > max_size or img.height > max_size:
                            ratio = min(max_size / img.width, max_size / img.height)
                            new_width = int(img.width * ratio)
                            new_height = int(img.height * ratio)
                            img = img.resize((new_width, new_height), Image.LANCZOS)
                        
                        # Convert to RGB if RGBA (to avoid issues with JPEG)
                        if img.mode == 'RGBA':
                            img = img.convert('RGB')
                        
                        # Save with very low quality
                        output_buffer = io.BytesIO()
                        img.save(output_buffer, format='JPEG', quality=quality, optimize=True)
                        output_buffer.seek(0)
                        
                        # Save the processed image to a temporary file
                        temp_img_path = os.path.join(temp_dir, f"img_{page_num}_{img_index}.jpg")
                        with open(temp_img_path, "wb") as f:
                            f.write(output_buffer.getvalue())
                        
                        # Replace the image on the page
                        # This is a workaround since PyMuPDF doesn't directly support replacing images
                        # We create a rectangle covering the image and insert our compressed image
                        rect = page.get_image_bbox(img_index)
                        if rect:  # If we can determine the image position
                            page.insert_image(rect, filename=temp_img_path)
                    
                    except Exception as e:
                        print(f"Warning: Could not process image {img_index} on page {page_num+1}: {e}")
            
            # Save with maximum compression options
            doc.save(
                output_path,
                garbage=4,  # Garbage collection: clean up unused objects
                deflate=True,  # Use deflate compression where possible
                clean=True,  # Clean content streams
                linear=True,  # Optimize for web viewing
            )
            doc.close()
        
        # Get file sizes for comparison
        original_size = os.path.getsize(input_path)
        compressed_size = os.path.getsize(output_path)
        reduction = (1 - compressed_size / original_size) * 100
        
        return True, f"使用激进压缩成功！原文件大小: {original_size/1024:.1f} KB, 压缩后大小: {compressed_size/1024:.1f} KB, 减少: {reduction:.1f}%"
    
    except Exception as e:
        return False, f"激进压缩失败: {str(e)}"

def compress_pdf_with_pypdf2(input_path, output_path, quality=30):
    """
    Simple method to compress PDF using PyPDF2 - most compatible but least effective.
    
    Args:
        input_path (str): Path to the input PDF file
        output_path (str): Path to save the compressed PDF file
        quality (int): Not used in this method, kept for API compatibility
    
    Returns:
        tuple: (bool success, str message)
    """
    try:
        import PyPDF2
        
        # Open the PDF
        with open(input_path, 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)
            pdf_writer = PyPDF2.PdfWriter()
            
            # Copy all pages to the writer
            for page_num in range(len(pdf_reader.pages)):
                page = pdf_reader.pages[page_num]
                pdf_writer.add_page(page)
            
            # Set compression by removing unnecessary data
            pdf_writer.add_metadata(pdf_reader.metadata)
            
            # Save the compressed PDF
            with open(output_path, 'wb') as output_file:
                pdf_writer.write(output_file)
        
        # Get file sizes for comparison
        original_size = os.path.getsize(input_path)
        compressed_size = os.path.getsize(output_path)
        reduction = (1 - compressed_size / original_size) * 100
        
        return True, f"使用PyPDF2压缩成功！原文件大小: {original_size/1024:.1f} KB, 压缩后大小: {compressed_size/1024:.1f} KB, 减少: {reduction:.1f}%"
    
    except Exception as e:
        return False, f"PyPDF2压缩失败: {str(e)}"

def compress_pdf_with_pymupdf(input_path, output_path, quality=30):
    """
    Fallback method to compress PDF using PyMuPDF when pikepdf fails.
    
    Args:
        input_path (str): Path to the input PDF file
        output_path (str): Path to save the compressed PDF file
        quality (int): Quality for images (0-100, lower means smaller file size)
    
    Returns:
        tuple: (bool success, str message)
    """
    try:
        import fitz  # PyMuPDF
        
        # Open the PDF
        doc = fitz.open(input_path)
        
        # Process each page to reduce image quality
        for page_num in range(len(doc)):
            page = doc[page_num]
            
            # Extract images
            img_list = page.get_images(full=True)
            
            for img_index, img_info in enumerate(img_list):
                try:
                    # Get image data
                    xref = img_info[0]
                    base_image = doc.extract_image(xref)
                    image_bytes = base_image["image"]
                    
                    # Process the image with PIL to reduce quality
                    from PIL import Image
                    import io
                    
                    # Convert to PIL Image
                    img = Image.open(io.BytesIO(image_bytes))
                    
                    # Skip small images
                    if img.width < 100 or img.height < 100:
                        continue
                    
                    # Convert to RGB if RGBA (to avoid issues with JPEG)
                    if img.mode == 'RGBA':
                        img = img.convert('RGB')
                    
                    # Save with reduced quality
                    output_buffer = io.BytesIO()
                    
                    # Convert to RGB if RGBA (to avoid issues with JPEG)
                    if img.mode == 'RGBA':
                        img = img.convert('RGB')
                    
                    # Save as JPEG with reduced quality
                    img.save(output_buffer, format='JPEG', quality=quality, optimize=True)
                    output_buffer.seek(0)
                    
                    # We can't directly replace images in PyMuPDF, but this processing
                    # still helps reduce the overall file size when saved
                except Exception as e:
                    print(f"Warning: Could not process image {img_index} on page {page_num+1}: {e}")
        
        # Save with compression options available in all versions
        doc.save(
            output_path,
            garbage=4,  # Garbage collection: clean up unused objects
            deflate=True,  # Use deflate compression where possible
            clean=True,  # Clean content streams
            linear=True,  # Optimize for web viewing
        )
        doc.close()
        
        # Get file sizes for comparison
        original_size = os.path.getsize(input_path)
        compressed_size = os.path.getsize(output_path)
        reduction = (1 - compressed_size / original_size) * 100
        
        return True, f"使用PyMuPDF压缩成功！原文件大小: {original_size/1024:.1f} KB, 压缩后大小: {compressed_size/1024:.1f} KB, 减少: {reduction:.1f}%"
    
    except Exception as e:
        return False, f"PyMuPDF压缩失败: {str(e)}"

def compress_pdf(input_path, output_path, quality=30):
    """
    Compress a PDF file by reducing the quality of embedded images.
    
    Args:
        input_path (str): Path to the input PDF file
        output_path (str): Path to save the compressed PDF file
        quality (int): JPEG quality for images (0-100, lower means smaller file size)
    
    Returns:
        tuple: (bool success, str message)
    """
    try:
        # Create a temporary directory for processing
        with tempfile.TemporaryDirectory() as temp_dir:
            # First try with pikepdf
            try:
                # Open the PDF
                pdf = Pdf.open(input_path)
                
                # Track if we've processed any images successfully
                processed_images = 0
                skipped_images = 0
                
                # Process each page
                for page_num, page in enumerate(pdf.pages):
                    # Process images on the page
                    for name, raw_image in list(page.images.items()):  # Create a copy of the items to avoid modification during iteration
                        # Convert to PdfImage
                        try:
                            image = PdfImage(raw_image)
                            
                            # Check if image is JPEG compatible
                            if image.indexed:
                                skipped_images += 1
                                continue  # Skip indexed color images
                            
                            # Get image data
                            try:
                                image_data = image.read_bytes()
                                
                                # Process and replace the image
                                try:
                                    from PIL import Image
                                    import io
                                    
                                    # Convert to PIL Image
                                    img = Image.open(io.BytesIO(image_data))
                                    
                                    # Downsample large images (reduce resolution)
                                    max_size = 1000  # Maximum dimension in pixels
                                    if img.width > max_size or img.height > max_size:
                                        ratio = min(max_size / img.width, max_size / img.height)
                                        new_width = int(img.width * ratio)
                                        new_height = int(img.height * ratio)
                                        img = img.resize((new_width, new_height), Image.LANCZOS)
                                    
                                    # Save with reduced quality
                                    output_buffer = io.BytesIO()
                                    
                                    # Convert to RGB if RGBA (to avoid issues with JPEG)
                                    if img.mode == 'RGBA':
                                        img = img.convert('RGB')
                                    
                                    # Save as JPEG with reduced quality
                                    img.save(output_buffer, format='JPEG', quality=quality, optimize=True)
                                    output_buffer.seek(0)
                                    
                                    # Replace the image
                                    page.images[name] = output_buffer.getvalue()
                                    processed_images += 1
                                except Exception as e:
                                    skipped_images += 1
                                    print(f"Warning: Could not process image {name} on page {page_num+1}: {e}")
                            except Exception as e:
                                # This might be the unfilterable stream error
                                skipped_images += 1
                                if "unfilterable" in str(e) or "read_bytes" in str(e):
                                    print(f"Warning: Unfilterable stream for image {name} on page {page_num+1}, skipping")
                                else:
                                    print(f"Warning: Error for image {name} on page {page_num+1}: {e}")
                        except Exception as e:
                            skipped_images += 1
                            print(f"Warning: Could not process image on page {page_num+1}: {e}")
                
                # If we didn't process any images successfully, raise an exception to try the fallback method
                if processed_images == 0 and skipped_images > 0:
                    raise Exception(f"Could not process any images in the PDF, {skipped_images} images were skipped")
                
                # Save the compressed PDF
                pdf.save(output_path)
                
                # Get file sizes for comparison
                original_size = os.path.getsize(input_path)
                compressed_size = os.path.getsize(output_path)
                reduction = (1 - compressed_size / original_size) * 100
                
                return True, f"压缩成功！原文件大小: {original_size/1024:.1f} KB, 压缩后大小: {compressed_size/1024:.1f} KB, 减少: {reduction:.1f}%"
            
            except Exception as e:
                print(f"pikepdf compression failed: {str(e)}")
                print("Trying fallback method with PyMuPDF...")
                
                # Try fallback method with PyMuPDF
                success, message = compress_pdf_with_pymupdf(input_path, output_path, quality)
                
                # If PyMuPDF also fails, try the most basic method with PyPDF2
                if not success:
                    print(f"PyMuPDF compression failed: {message}")
                    print("Trying final fallback method with PyPDF2...")
                    return compress_pdf_with_pypdf2(input_path, output_path, quality)
                    
                return success, message
    
    except Exception as e:
        return False, f"压缩失败: {str(e)}"

def main():
    parser = argparse.ArgumentParser(description='压缩PDF文件，降低图像质量以减小文件大小')
    parser.add_argument('input', help='输入PDF文件路径')
    parser.add_argument('-o', '--output', help='输出PDF文件路径 (默认为添加"_compressed"后缀的输入文件)')
    parser.add_argument('-q', '--quality', type=int, default=30, help='JPEG图像质量 (0-100, 默认: 30)')
    parser.add_argument('-m', '--method', choices=['auto', 'pikepdf', 'pymupdf', 'pypdf2', 'aggressive', 'super_aggressive'], 
                       default='auto', help='压缩方法 (默认: auto)')
    
    args = parser.parse_args()
    
    # Set default output path if not specified
    if not args.output:
        filename, ext = os.path.splitext(args.input)
        args.output = f"{filename}_compressed{ext}"
    
    print(f"正在压缩 {args.input} 到 {args.output}...")
    
    # Choose compression method based on argument
    if args.method == 'auto':
        success, message = compress_pdf(args.input, args.output, args.quality)
    elif args.method == 'pikepdf':
        try:
            pdf = Pdf.open(args.input)
            # Process with pikepdf (simplified for direct method call)
            pdf.save(args.output)
            original_size = os.path.getsize(args.input)
            compressed_size = os.path.getsize(args.output)
            reduction = (1 - compressed_size / original_size) * 100
            success, message = True, f"使用PikePDF压缩成功！原文件大小: {original_size/1024:.1f} KB, 压缩后大小: {compressed_size/1024:.1f} KB, 减少: {reduction:.1f}%"
        except Exception as e:
            success, message = False, f"PikePDF压缩失败: {str(e)}"
    elif args.method == 'pymupdf':
        success, message = compress_pdf_with_pymupdf(args.input, args.output, args.quality)
    elif args.method == 'pypdf2':
        success, message = compress_pdf_with_pypdf2(args.input, args.output, args.quality)
    elif args.method == 'aggressive':
        success, message = compress_pdf_aggressive(args.input, args.output, min(args.quality, 15))
    elif args.method == 'super_aggressive':
        success, message = compress_pdf_super_aggressive(args.input, args.output, min(args.quality, 5))
    
    print(message)

if __name__ == "__main__":
    main()

图形界面程序，启动这个程序操作

#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
PDF Compressor GUI - A simple utility with GUI to reduce PDF file size by lowering image quality
"""

import os
import sys
import tkinter as tk
from tkinter import filedialog, ttk, messagebox
import threading
import traceback
from pdf_compressor import compress_pdf, compress_pdf_with_pymupdf, compress_pdf_with_pypdf2, compress_pdf_aggressive, compress_pdf_super_aggressive

class PDFCompressorApp:
    def __init__(self, root):
        self.root = root
        self.root.title("PDF 压缩工具")
        self.root.geometry("600x550")
        self.root.resizable(True, True)
        
        # Set style
        self.style = ttk.Style()
        self.style.configure("TButton", font=("Arial", 10))
        self.style.configure("TLabel", font=("Arial", 10))
        
        # Create main frame
        main_frame = ttk.Frame(root, padding="20")
        main_frame.pack(fill=tk.BOTH, expand=True)
        
        # Input file section
        input_frame = ttk.Frame(main_frame)
        input_frame.pack(fill=tk.X, pady=10)
        
        ttk.Label(input_frame, text="输入PDF文件:").grid(row=0, column=0, sticky=tk.W, pady=5)
        
        self.input_path_var = tk.StringVar()
        input_entry = ttk.Entry(input_frame, textvariable=self.input_path_var, width=50)
        input_entry.grid(row=0, column=1, padx=5, pady=5)
        
        browse_btn = ttk.Button(input_frame, text="浏览...", command=self.browse_input)
        browse_btn.grid(row=0, column=2, padx=5, pady=5)
        
        # Output file section
        output_frame = ttk.Frame(main_frame)
        output_frame.pack(fill=tk.X, pady=10)
        
        ttk.Label(output_frame, text="输出PDF文件:").grid(row=0, column=0, sticky=tk.W, pady=5)
        
        self.output_path_var = tk.StringVar()
        output_entry = ttk.Entry(output_frame, textvariable=self.output_path_var, width=50)
        output_entry.grid(row=0, column=1, padx=5, pady=5)
        
        browse_output_btn = ttk.Button(output_frame, text="浏览...", command=self.browse_output)
        browse_output_btn.grid(row=0, column=2, padx=5, pady=5)
        
        # Quality slider
        quality_frame = ttk.Frame(main_frame)
        quality_frame.pack(fill=tk.X, pady=10)
        
        ttk.Label(quality_frame, text="图像质量:").pack(side=tk.LEFT, padx=5)
        
        self.quality_var = tk.IntVar(value=30)
        quality_slider = ttk.Scale(quality_frame, from_=5, to=95, orient=tk.HORIZONTAL, 
                                  variable=self.quality_var, length=300)
        quality_slider.pack(side=tk.LEFT, padx=5, fill=tk.X, expand=True)
        
        quality_label = ttk.Label(quality_frame, textvariable=self.quality_var)
        quality_label.pack(side=tk.LEFT, padx=5)
        
        ttk.Label(quality_frame, text="(较低的值 = 更小的文件大小)").pack(side=tk.LEFT, padx=5)
        
        # Compression method frame
        method_frame = ttk.LabelFrame(main_frame, text="压缩方法")
        method_frame.pack(fill=tk.X, pady=10, padx=5)
        
        self.method_var = tk.StringVar(value="auto")
        ttk.Radiobutton(method_frame, text="自动 (推荐)", variable=self.method_var, 
                       value="auto").grid(row=0, column=0, padx=10, pady=5, sticky=tk.W)
        ttk.Radiobutton(method_frame, text="PikePDF (高质量压缩)", variable=self.method_var, 
                       value="pikepdf").grid(row=0, column=1, padx=10, pady=5, sticky=tk.W)
        ttk.Radiobutton(method_frame, text="PyMuPDF (中等压缩)", variable=self.method_var, 
                       value="pymupdf").grid(row=1, column=0, padx=10, pady=5, sticky=tk.W)
        ttk.Radiobutton(method_frame, text="PyPDF2 (基本压缩，最兼容)", variable=self.method_var, 
                       value="pypdf2").grid(row=1, column=1, padx=10, pady=5, sticky=tk.W)
        ttk.Radiobutton(method_frame, text="激进压缩 (最小文件，低质量)", variable=self.method_var, 
                       value="aggressive", command=self.show_aggressive_warning).grid(row=2, column=0, padx=10, pady=5, sticky=tk.W)
        ttk.Radiobutton(method_frame, text="超级激进压缩 (页面减半，极低质量)", variable=self.method_var, 
                       value="super_aggressive", command=self.show_super_aggressive_warning).grid(row=2, column=1, padx=10, pady=5, sticky=tk.W)
        
        # Compression button
        compress_btn = ttk.Button(main_frame, text="压缩PDF", command=self.start_compression)
        compress_btn.pack(pady=15)
        
        # Progress bar
        self.progress_var = tk.DoubleVar()
        self.progress = ttk.Progressbar(main_frame, variable=self.progress_var, maximum=100)
        self.progress.pack(fill=tk.X, pady=10)
        
        # Status label
        self.status_var = tk.StringVar(value="准备就绪")
        status_label = ttk.Label(main_frame, textvariable=self.status_var, font=("Arial", 10))
        status_label.pack(pady=5)
        
        # Results frame
        results_frame = ttk.LabelFrame(main_frame, text="压缩结果")
        results_frame.pack(fill=tk.BOTH, expand=True, pady=10)
        
        # Add scrollbar to results text
        result_scroll = ttk.Scrollbar(results_frame)
        result_scroll.pack(side=tk.RIGHT, fill=tk.Y)
        
        self.result_text = tk.Text(results_frame, height=8, width=70, font=("Consolas", 10),
                                 yscrollcommand=result_scroll.set)
        self.result_text.pack(fill=tk.BOTH, expand=True, padx=5, pady=5)
        result_scroll.config(command=self.result_text.yview)
    
    def show_aggressive_warning(self):
        """显示激进压缩模式的警告"""
        messagebox.showwarning("激进压缩警告", 
                              "激进压缩模式会大幅降低图像质量和分辨率，以获得最小的文件大小。\n\n"
                              "这可能导致图像变得模糊或像素化，但文件大小将显著减小。\n\n"
                              "建议仅在文件大小是最重要因素时使用此模式。")
    
    def show_super_aggressive_warning(self):
        """显示超级激进压缩模式的警告"""
        messagebox.showwarning("超级激进压缩警告", 
                              "超级激进压缩模式将：\n\n"
                              "1. 将页面尺寸减小为原始尺寸的一半\n"
                              "2. 使用极低的图像质量设置\n"
                              "3. 将图像分辨率降低到最小\n\n"
                              "这将导致文本变小，图像质量显著降低，但文件大小将极大减小。\n\n"
                              "此模式适用于需要最小文件大小且不太关心视觉质量的情况。")
        
    def browse_input(self):
        file_path = filedialog.askopenfilename(
            title="选择PDF文件",
            filetypes=[("PDF文件", "*.pdf"), ("所有文件", "*.*")]
        )
        if file_path:
            self.input_path_var.set(file_path)
            # Auto-set output path
            input_path = self.input_path_var.get()
            if input_path:
                filename, ext = os.path.splitext(input_path)
                self.output_path_var.set(f"{filename}_compressed{ext}")
    
    def browse_output(self):
        file_path = filedialog.asksaveasfilename(
            title="保存压缩后的PDF",
            defaultextension=".pdf",
            filetypes=[("PDF文件", "*.pdf"), ("所有文件", "*.*")]
        )
        if file_path:
            self.output_path_var.set(file_path)
    
    def start_compression(self):
        input_path = self.input_path_var.get()
        output_path = self.output_path_var.get()
        quality = self.quality_var.get()
        method = self.method_var.get()
        
        if not input_path:
            messagebox.showerror("错误", "请选择输入PDF文件")
            return
        
        if not output_path:
            messagebox.showerror("错误", "请选择输出PDF文件位置")
            return
        
        if not os.path.exists(input_path):
            messagebox.showerror("错误", f"输入文件不存在: {input_path}")
            return
            
        # Check if input is actually a PDF
        if not input_path.lower().endswith('.pdf'):
            if not messagebox.askyesno("警告", "输入文件不是PDF格式，是否继续？"):
                return
        
        # If aggressive mode is selected with high quality, suggest lowering quality
        if method == "aggressive" and quality > 20:
            if messagebox.askyesno("建议", f"激进压缩模式通常使用较低的质量值以获得最佳效果。\n\n您当前设置的质量为 {quality}，是否要自动降低到 10？"):
                quality = 10
                self.quality_var.set(10)
        
        # If super aggressive mode is selected with high quality, suggest lowering quality
        if method == "super_aggressive" and quality > 10:
            if messagebox.askyesno("建议", f"超级激进压缩模式通常使用极低的质量值以获得最佳效果。\n\n您当前设置的质量为 {quality}，是否要自动降低到 5？"):
                quality = 5
                self.quality_var.set(5)
        
        # Clear previous results
        self.result_text.delete(1.0, tk.END)
        
        # Update UI
        self.status_var.set("正在压缩...")
        self.progress_var.set(10)
        
        # Start compression in a separate thread
        threading.Thread(target=self.compress_task, 
                        args=(input_path, output_path, quality, method), 
                        daemon=True).start()
    
    def compress_task(self, input_path, output_path, quality, method):
        try:
            self.progress_var.set(20)
            self.result_text.insert(tk.END, f"开始压缩...\n")
            self.result_text.insert(tk.END, f"输入文件: {input_path}\n")
            self.result_text.insert(tk.END, f"输出文件: {output_path}\n")
            self.result_text.insert(tk.END, f"质量设置: {quality}\n")
            self.result_text.insert(tk.END, f"压缩方法: {method}\n")
            self.result_text.insert(tk.END, "-" * 50 + "\n")
            self.result_text.see(tk.END)
            
            # Make sure the output directory exists
            output_dir = os.path.dirname(output_path)
            if output_dir and not os.path.exists(output_dir):
                os.makedirs(output_dir)
            
            # Call the appropriate compression function based on method
            if method == "auto":
                self.progress_var.set(30)
                success, message = compress_pdf(input_path, output_path, quality)
            elif method == "pikepdf":
                self.progress_var.set(30)
                try:
                    from pikepdf import Pdf
                    pdf = Pdf.open(input_path)
                    pdf.save(output_path)
                    original_size = os.path.getsize(input_path)
                    compressed_size = os.path.getsize(output_path)
                    reduction = (1 - compressed_size / original_size) * 100
                    success, message = True, f"使用PikePDF压缩成功！原文件大小: {original_size/1024:.1f} KB, 压缩后大小: {compressed_size/1024:.1f} KB, 减少: {reduction:.1f}%"
                except Exception as e:
                    success, message = False, f"PikePDF压缩失败: {str(e)}"
            elif method == "pymupdf":
                self.progress_var.set(30)
                success, message = compress_pdf_with_pymupdf(input_path, output_path, quality)
            elif method == "pypdf2":
                self.progress_var.set(30)
                success, message = compress_pdf_with_pypdf2(input_path, output_path, quality)
            elif method == "aggressive":
                self.progress_var.set(30)
                # 激进压缩使用较低的质量值，确保不超过15
                actual_quality = min(quality, 15)
                if actual_quality != quality:
                    self.result_text.insert(tk.END, f"注意: 激进模式下已将质量值从 {quality} 自动调整为 {actual_quality}\n")
                success, message = compress_pdf_aggressive(input_path, output_path, actual_quality)
            elif method == "super_aggressive":
                self.progress_var.set(30)
                # 超级激进压缩使用极低的质量值，确保不超过5
                actual_quality = min(quality, 5)
                if actual_quality != quality:
                    self.result_text.insert(tk.END, f"注意: 超级激进模式下已将质量值从 {quality} 自动调整为 {actual_quality}\n")
                success, message = compress_pdf_super_aggressive(input_path, output_path, actual_quality)
            else:
                success, message = False, f"未知的压缩方法: {method}"
            
            self.progress_var.set(100)
            
            if success:
                self.status_var.set("压缩完成")
                self.result_text.insert(tk.END, message + "\n")
                
                # Calculate compression ratio
                original_size = os.path.getsize(input_path)
                compressed_size = os.path.getsize(output_path)
                ratio = original_size / compressed_size if compressed_size > 0 else 0
                
                self.result_text.insert(tk.END, f"压缩比: {ratio:.2f}x\n")
                
                if ratio >= 5:
                    self.result_text.insert(tk.END, "✓ 已达到目标压缩比 (5倍或更高)\n")
                else:
                    self.result_text.insert(tk.END, f"⚠ 未达到目标压缩比 (5倍)。\n")
                    
                    # 如果压缩比不够，提供建议
                    if method != "super_aggressive":
                        self.result_text.insert(tk.END, "建议: 尝试使用'超级激进压缩'方法以获得更高的压缩比。\n")
                
                # Check if output file is actually smaller
                if compressed_size >= original_size:
                    self.result_text.insert(tk.END, "⚠ 警告: 压缩后的文件比原文件更大或相同大小。\n")
                    if method != "super_aggressive":
                        self.result_text.insert(tk.END, "建议尝试'超级激进压缩'方法。\n")
                
                # Ask if user wants to open the file
                if messagebox.askyesno("完成", "压缩完成！是否打开压缩后的文件？"):
                    self.open_file(output_path)
            else:
                self.status_var.set("压缩失败")
                self.result_text.insert(tk.END, message + "\n")
                self.result_text.insert(tk.END, "请尝试使用不同的压缩方法或降低质量值。\n")
                
                # Suggest appropriate fallback methods
                if method != "super_aggressive" and method != "aggressive":
                    self.result_text.insert(tk.END, "建议尝试使用'超级激进压缩'方法，它能提供最高的压缩率。\n")
                elif method != "pypdf2":
                    self.result_text.insert(tk.END, "建议尝试使用 PyPDF2 方法，它具有最好的兼容性。\n")
            
            self.result_text.see(tk.END)
        
        except Exception as e:
            self.status_var.set("发生错误")
            self.result_text.insert(tk.END, f"错误: {str(e)}\n")
            # Add traceback for debugging
            self.result_text.insert(tk.END, traceback.format_exc())
            self.result_text.see(tk.END)
    
    def open_file(self, file_path):
        """Open the file using the default system application"""
        try:
            import subprocess
            os.startfile(file_path)
        except Exception as e:
            messagebox.showerror("错误", f"无法打开文件: {str(e)}")

def main():
    root = tk.Tk()
    app = PDFCompressorApp(root)
    root.mainloop()

if __name__ == "__main__":
    main()

压缩 PDF 的 python 程序

推荐阅读更多精彩内容