压缩 PDF 的 python 程序

编写两个文件

  1. pdf_compressor.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
PDF Compressor - A simple utility to reduce PDF file size by lowering image quality
"""

import os
import argparse
import tempfile
import shutil
import logging
from pikepdf import Pdf, PdfImage

def compress_pdf_super_aggressive(input_path, output_path, quality=5):
    """
    Super aggressive method to compress PDF by reducing page size by half and using extreme image compression.
    
    Args:
        input_path (str): Path to the input PDF file
        output_path (str): Path to save the compressed PDF file
        quality (int): Quality for images (0-100, lower means smaller file size)
    
    Returns:
        tuple: (bool success, str message)
    """
    try:
        import fitz  # PyMuPDF
        from PIL import Image
        import io
        import tempfile
        
        # Create a temporary directory for processing
        with tempfile.TemporaryDirectory() as temp_dir:
            # Open the PDF
            doc = fitz.open(input_path)
            
            # Create a new PDF with half the page size
            new_doc = fitz.open()
            
            # Process each page
            for page_num in range(len(doc)):
                page = doc[page_num]
                
                # Get original page size
                original_rect = page.rect
                
                # Create a new page with half the dimensions
                new_width = original_rect.width / 2
                new_height = original_rect.height / 2
                new_page = new_doc.new_page(width=new_width, height=new_height)
                
                # Create a transformation matrix to scale down the content
                matrix = fitz.Matrix(0.5, 0.5)  # Scale to 50%
                
                # Extract images from original page
                img_list = page.get_images(full=True)
                
                # Draw the original page content onto the new page with scaling
                new_page.show_pdf_page(new_page.rect, doc, page_num, matrix)
                
                # Process images separately for maximum compression
                for img_index, img_info in enumerate(img_list):
                    try:
                        # Get image data
                        xref = img_info[0]
                        base_image = doc.extract_image(xref)
                        image_bytes = base_image["image"]
                        
                        # Convert to PIL Image
                        img = Image.open(io.BytesIO(image_bytes))
                        
                        # Skip very small images
                        if img.width < 30 or img.height < 30:
                            continue
                        
                        # Downsample large images (reduce resolution aggressively)
                        max_size = 400  # Maximum dimension in pixels (even smaller than aggressive)
                        if img.width > max_size or img.height > max_size:
                            ratio = min(max_size / img.width, max_size / img.height)
                            new_width = int(img.width * ratio)
                            new_height = int(img.height * ratio)
                            img = img.resize((new_width, new_height), Image.LANCZOS)
                        
                        # Convert to RGB if RGBA (to avoid issues with JPEG)
                        if img.mode == 'RGBA':
                            img = img.convert('RGB')
                        
                        # Save with extremely low quality
                        output_buffer = io.BytesIO()
                        img.save(output_buffer, format='JPEG', quality=quality, optimize=True)
                        output_buffer.seek(0)
                        
                        # We don't need to insert the processed images here since we've already
                        # scaled down the entire page content including images
                    
                    except Exception as e:
                        print(f"Warning: Could not process image {img_index} on page {page_num+1}: {e}")
            
            # Save with maximum compression options
            new_doc.save(
                output_path,
                garbage=4,  # Garbage collection: clean up unused objects
                deflate=True,  # Use deflate compression where possible
                clean=True,  # Clean content streams
                linear=True,  # Optimize for web viewing
            )
            new_doc.close()
            doc.close()
        
        # Get file sizes for comparison
        original_size = os.path.getsize(input_path)
        compressed_size = os.path.getsize(output_path)
        reduction = (1 - compressed_size / original_size) * 100
        
        return True, f"使用超级激进压缩成功!原文件大小: {original_size/1024:.1f} KB, 压缩后大小: {compressed_size/1024:.1f} KB, 减少: {reduction:.1f}%"
    
    except Exception as e:
        return False, f"超级激进压缩失败: {str(e)}"

def compress_pdf_aggressive(input_path, output_path, quality=10):
    """
    Aggressive method to compress PDF using PyMuPDF with image downsampling.
    This method focuses on maximum size reduction at the cost of some quality.
    
    Args:
        input_path (str): Path to the input PDF file
        output_path (str): Path to save the compressed PDF file
        quality (int): Quality for images (0-100, lower means smaller file size)
    
    Returns:
        tuple: (bool success, str message)
    """
    try:
        import fitz  # PyMuPDF
        from PIL import Image
        import io
        import tempfile
        
        # Create a temporary directory for processing
        with tempfile.TemporaryDirectory() as temp_dir:
            # Open the PDF
            doc = fitz.open(input_path)
            
            # Process each page
            for page_num in range(len(doc)):
                page = doc[page_num]
                
                # Extract images
                img_list = page.get_images(full=True)
                
                for img_index, img_info in enumerate(img_list):
                    try:
                        # Get image data
                        xref = img_info[0]
                        base_image = doc.extract_image(xref)
                        image_bytes = base_image["image"]
                        
                        # Convert to PIL Image
                        img = Image.open(io.BytesIO(image_bytes))
                        
                        # Skip very small images
                        if img.width < 50 or img.height < 50:
                            continue
                        
                        # Downsample large images (reduce resolution)
                        max_size = 800  # Maximum dimension in pixels
                        if img.width > max_size or img.height > max_size:
                            ratio = min(max_size / img.width, max_size / img.height)
                            new_width = int(img.width * ratio)
                            new_height = int(img.height * ratio)
                            img = img.resize((new_width, new_height), Image.LANCZOS)
                        
                        # Convert to RGB if RGBA (to avoid issues with JPEG)
                        if img.mode == 'RGBA':
                            img = img.convert('RGB')
                        
                        # Save with very low quality
                        output_buffer = io.BytesIO()
                        img.save(output_buffer, format='JPEG', quality=quality, optimize=True)
                        output_buffer.seek(0)
                        
                        # Save the processed image to a temporary file
                        temp_img_path = os.path.join(temp_dir, f"img_{page_num}_{img_index}.jpg")
                        with open(temp_img_path, "wb") as f:
                            f.write(output_buffer.getvalue())
                        
                        # Replace the image on the page
                        # This is a workaround since PyMuPDF doesn't directly support replacing images
                        # We create a rectangle covering the image and insert our compressed image
                        rect = page.get_image_bbox(img_index)
                        if rect:  # If we can determine the image position
                            page.insert_image(rect, filename=temp_img_path)
                    
                    except Exception as e:
                        print(f"Warning: Could not process image {img_index} on page {page_num+1}: {e}")
            
            # Save with maximum compression options
            doc.save(
                output_path,
                garbage=4,  # Garbage collection: clean up unused objects
                deflate=True,  # Use deflate compression where possible
                clean=True,  # Clean content streams
                linear=True,  # Optimize for web viewing
            )
            doc.close()
        
        # Get file sizes for comparison
        original_size = os.path.getsize(input_path)
        compressed_size = os.path.getsize(output_path)
        reduction = (1 - compressed_size / original_size) * 100
        
        return True, f"使用激进压缩成功!原文件大小: {original_size/1024:.1f} KB, 压缩后大小: {compressed_size/1024:.1f} KB, 减少: {reduction:.1f}%"
    
    except Exception as e:
        return False, f"激进压缩失败: {str(e)}"

def compress_pdf_with_pypdf2(input_path, output_path, quality=30):
    """
    Simple method to compress PDF using PyPDF2 - most compatible but least effective.
    
    Args:
        input_path (str): Path to the input PDF file
        output_path (str): Path to save the compressed PDF file
        quality (int): Not used in this method, kept for API compatibility
    
    Returns:
        tuple: (bool success, str message)
    """
    try:
        import PyPDF2
        
        # Open the PDF
        with open(input_path, 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)
            pdf_writer = PyPDF2.PdfWriter()
            
            # Copy all pages to the writer
            for page_num in range(len(pdf_reader.pages)):
                page = pdf_reader.pages[page_num]
                pdf_writer.add_page(page)
            
            # Set compression by removing unnecessary data
            pdf_writer.add_metadata(pdf_reader.metadata)
            
            # Save the compressed PDF
            with open(output_path, 'wb') as output_file:
                pdf_writer.write(output_file)
        
        # Get file sizes for comparison
        original_size = os.path.getsize(input_path)
        compressed_size = os.path.getsize(output_path)
        reduction = (1 - compressed_size / original_size) * 100
        
        return True, f"使用PyPDF2压缩成功!原文件大小: {original_size/1024:.1f} KB, 压缩后大小: {compressed_size/1024:.1f} KB, 减少: {reduction:.1f}%"
    
    except Exception as e:
        return False, f"PyPDF2压缩失败: {str(e)}"

def compress_pdf_with_pymupdf(input_path, output_path, quality=30):
    """
    Fallback method to compress PDF using PyMuPDF when pikepdf fails.
    
    Args:
        input_path (str): Path to the input PDF file
        output_path (str): Path to save the compressed PDF file
        quality (int): Quality for images (0-100, lower means smaller file size)
    
    Returns:
        tuple: (bool success, str message)
    """
    try:
        import fitz  # PyMuPDF
        
        # Open the PDF
        doc = fitz.open(input_path)
        
        # Process each page to reduce image quality
        for page_num in range(len(doc)):
            page = doc[page_num]
            
            # Extract images
            img_list = page.get_images(full=True)
            
            for img_index, img_info in enumerate(img_list):
                try:
                    # Get image data
                    xref = img_info[0]
                    base_image = doc.extract_image(xref)
                    image_bytes = base_image["image"]
                    
                    # Process the image with PIL to reduce quality
                    from PIL import Image
                    import io
                    
                    # Convert to PIL Image
                    img = Image.open(io.BytesIO(image_bytes))
                    
                    # Skip small images
                    if img.width < 100 or img.height < 100:
                        continue
                    
                    # Convert to RGB if RGBA (to avoid issues with JPEG)
                    if img.mode == 'RGBA':
                        img = img.convert('RGB')
                    
                    # Save with reduced quality
                    output_buffer = io.BytesIO()
                    
                    # Convert to RGB if RGBA (to avoid issues with JPEG)
                    if img.mode == 'RGBA':
                        img = img.convert('RGB')
                    
                    # Save as JPEG with reduced quality
                    img.save(output_buffer, format='JPEG', quality=quality, optimize=True)
                    output_buffer.seek(0)
                    
                    # We can't directly replace images in PyMuPDF, but this processing
                    # still helps reduce the overall file size when saved
                except Exception as e:
                    print(f"Warning: Could not process image {img_index} on page {page_num+1}: {e}")
        
        # Save with compression options available in all versions
        doc.save(
            output_path,
            garbage=4,  # Garbage collection: clean up unused objects
            deflate=True,  # Use deflate compression where possible
            clean=True,  # Clean content streams
            linear=True,  # Optimize for web viewing
        )
        doc.close()
        
        # Get file sizes for comparison
        original_size = os.path.getsize(input_path)
        compressed_size = os.path.getsize(output_path)
        reduction = (1 - compressed_size / original_size) * 100
        
        return True, f"使用PyMuPDF压缩成功!原文件大小: {original_size/1024:.1f} KB, 压缩后大小: {compressed_size/1024:.1f} KB, 减少: {reduction:.1f}%"
    
    except Exception as e:
        return False, f"PyMuPDF压缩失败: {str(e)}"

def compress_pdf(input_path, output_path, quality=30):
    """
    Compress a PDF file by reducing the quality of embedded images.
    
    Args:
        input_path (str): Path to the input PDF file
        output_path (str): Path to save the compressed PDF file
        quality (int): JPEG quality for images (0-100, lower means smaller file size)
    
    Returns:
        tuple: (bool success, str message)
    """
    try:
        # Create a temporary directory for processing
        with tempfile.TemporaryDirectory() as temp_dir:
            # First try with pikepdf
            try:
                # Open the PDF
                pdf = Pdf.open(input_path)
                
                # Track if we've processed any images successfully
                processed_images = 0
                skipped_images = 0
                
                # Process each page
                for page_num, page in enumerate(pdf.pages):
                    # Process images on the page
                    for name, raw_image in list(page.images.items()):  # Create a copy of the items to avoid modification during iteration
                        # Convert to PdfImage
                        try:
                            image = PdfImage(raw_image)
                            
                            # Check if image is JPEG compatible
                            if image.indexed:
                                skipped_images += 1
                                continue  # Skip indexed color images
                            
                            # Get image data
                            try:
                                image_data = image.read_bytes()
                                
                                # Process and replace the image
                                try:
                                    from PIL import Image
                                    import io
                                    
                                    # Convert to PIL Image
                                    img = Image.open(io.BytesIO(image_data))
                                    
                                    # Downsample large images (reduce resolution)
                                    max_size = 1000  # Maximum dimension in pixels
                                    if img.width > max_size or img.height > max_size:
                                        ratio = min(max_size / img.width, max_size / img.height)
                                        new_width = int(img.width * ratio)
                                        new_height = int(img.height * ratio)
                                        img = img.resize((new_width, new_height), Image.LANCZOS)
                                    
                                    # Save with reduced quality
                                    output_buffer = io.BytesIO()
                                    
                                    # Convert to RGB if RGBA (to avoid issues with JPEG)
                                    if img.mode == 'RGBA':
                                        img = img.convert('RGB')
                                    
                                    # Save as JPEG with reduced quality
                                    img.save(output_buffer, format='JPEG', quality=quality, optimize=True)
                                    output_buffer.seek(0)
                                    
                                    # Replace the image
                                    page.images[name] = output_buffer.getvalue()
                                    processed_images += 1
                                except Exception as e:
                                    skipped_images += 1
                                    print(f"Warning: Could not process image {name} on page {page_num+1}: {e}")
                            except Exception as e:
                                # This might be the unfilterable stream error
                                skipped_images += 1
                                if "unfilterable" in str(e) or "read_bytes" in str(e):
                                    print(f"Warning: Unfilterable stream for image {name} on page {page_num+1}, skipping")
                                else:
                                    print(f"Warning: Error for image {name} on page {page_num+1}: {e}")
                        except Exception as e:
                            skipped_images += 1
                            print(f"Warning: Could not process image on page {page_num+1}: {e}")
                
                # If we didn't process any images successfully, raise an exception to try the fallback method
                if processed_images == 0 and skipped_images > 0:
                    raise Exception(f"Could not process any images in the PDF, {skipped_images} images were skipped")
                
                # Save the compressed PDF
                pdf.save(output_path)
                
                # Get file sizes for comparison
                original_size = os.path.getsize(input_path)
                compressed_size = os.path.getsize(output_path)
                reduction = (1 - compressed_size / original_size) * 100
                
                return True, f"压缩成功!原文件大小: {original_size/1024:.1f} KB, 压缩后大小: {compressed_size/1024:.1f} KB, 减少: {reduction:.1f}%"
            
            except Exception as e:
                print(f"pikepdf compression failed: {str(e)}")
                print("Trying fallback method with PyMuPDF...")
                
                # Try fallback method with PyMuPDF
                success, message = compress_pdf_with_pymupdf(input_path, output_path, quality)
                
                # If PyMuPDF also fails, try the most basic method with PyPDF2
                if not success:
                    print(f"PyMuPDF compression failed: {message}")
                    print("Trying final fallback method with PyPDF2...")
                    return compress_pdf_with_pypdf2(input_path, output_path, quality)
                    
                return success, message
    
    except Exception as e:
        return False, f"压缩失败: {str(e)}"

def main():
    parser = argparse.ArgumentParser(description='压缩PDF文件,降低图像质量以减小文件大小')
    parser.add_argument('input', help='输入PDF文件路径')
    parser.add_argument('-o', '--output', help='输出PDF文件路径 (默认为添加"_compressed"后缀的输入文件)')
    parser.add_argument('-q', '--quality', type=int, default=30, help='JPEG图像质量 (0-100, 默认: 30)')
    parser.add_argument('-m', '--method', choices=['auto', 'pikepdf', 'pymupdf', 'pypdf2', 'aggressive', 'super_aggressive'], 
                       default='auto', help='压缩方法 (默认: auto)')
    
    args = parser.parse_args()
    
    # Set default output path if not specified
    if not args.output:
        filename, ext = os.path.splitext(args.input)
        args.output = f"{filename}_compressed{ext}"
    
    print(f"正在压缩 {args.input} 到 {args.output}...")
    
    # Choose compression method based on argument
    if args.method == 'auto':
        success, message = compress_pdf(args.input, args.output, args.quality)
    elif args.method == 'pikepdf':
        try:
            pdf = Pdf.open(args.input)
            # Process with pikepdf (simplified for direct method call)
            pdf.save(args.output)
            original_size = os.path.getsize(args.input)
            compressed_size = os.path.getsize(args.output)
            reduction = (1 - compressed_size / original_size) * 100
            success, message = True, f"使用PikePDF压缩成功!原文件大小: {original_size/1024:.1f} KB, 压缩后大小: {compressed_size/1024:.1f} KB, 减少: {reduction:.1f}%"
        except Exception as e:
            success, message = False, f"PikePDF压缩失败: {str(e)}"
    elif args.method == 'pymupdf':
        success, message = compress_pdf_with_pymupdf(args.input, args.output, args.quality)
    elif args.method == 'pypdf2':
        success, message = compress_pdf_with_pypdf2(args.input, args.output, args.quality)
    elif args.method == 'aggressive':
        success, message = compress_pdf_aggressive(args.input, args.output, min(args.quality, 15))
    elif args.method == 'super_aggressive':
        success, message = compress_pdf_super_aggressive(args.input, args.output, min(args.quality, 5))
    
    print(message)

if __name__ == "__main__":
    main()


  1. 图形界面程序, 启动这个程序操作
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
PDF Compressor GUI - A simple utility with GUI to reduce PDF file size by lowering image quality
"""

import os
import sys
import tkinter as tk
from tkinter import filedialog, ttk, messagebox
import threading
import traceback
from pdf_compressor import compress_pdf, compress_pdf_with_pymupdf, compress_pdf_with_pypdf2, compress_pdf_aggressive, compress_pdf_super_aggressive

class PDFCompressorApp:
    def __init__(self, root):
        self.root = root
        self.root.title("PDF 压缩工具")
        self.root.geometry("600x550")
        self.root.resizable(True, True)
        
        # Set style
        self.style = ttk.Style()
        self.style.configure("TButton", font=("Arial", 10))
        self.style.configure("TLabel", font=("Arial", 10))
        
        # Create main frame
        main_frame = ttk.Frame(root, padding="20")
        main_frame.pack(fill=tk.BOTH, expand=True)
        
        # Input file section
        input_frame = ttk.Frame(main_frame)
        input_frame.pack(fill=tk.X, pady=10)
        
        ttk.Label(input_frame, text="输入PDF文件:").grid(row=0, column=0, sticky=tk.W, pady=5)
        
        self.input_path_var = tk.StringVar()
        input_entry = ttk.Entry(input_frame, textvariable=self.input_path_var, width=50)
        input_entry.grid(row=0, column=1, padx=5, pady=5)
        
        browse_btn = ttk.Button(input_frame, text="浏览...", command=self.browse_input)
        browse_btn.grid(row=0, column=2, padx=5, pady=5)
        
        # Output file section
        output_frame = ttk.Frame(main_frame)
        output_frame.pack(fill=tk.X, pady=10)
        
        ttk.Label(output_frame, text="输出PDF文件:").grid(row=0, column=0, sticky=tk.W, pady=5)
        
        self.output_path_var = tk.StringVar()
        output_entry = ttk.Entry(output_frame, textvariable=self.output_path_var, width=50)
        output_entry.grid(row=0, column=1, padx=5, pady=5)
        
        browse_output_btn = ttk.Button(output_frame, text="浏览...", command=self.browse_output)
        browse_output_btn.grid(row=0, column=2, padx=5, pady=5)
        
        # Quality slider
        quality_frame = ttk.Frame(main_frame)
        quality_frame.pack(fill=tk.X, pady=10)
        
        ttk.Label(quality_frame, text="图像质量:").pack(side=tk.LEFT, padx=5)
        
        self.quality_var = tk.IntVar(value=30)
        quality_slider = ttk.Scale(quality_frame, from_=5, to=95, orient=tk.HORIZONTAL, 
                                  variable=self.quality_var, length=300)
        quality_slider.pack(side=tk.LEFT, padx=5, fill=tk.X, expand=True)
        
        quality_label = ttk.Label(quality_frame, textvariable=self.quality_var)
        quality_label.pack(side=tk.LEFT, padx=5)
        
        ttk.Label(quality_frame, text="(较低的值 = 更小的文件大小)").pack(side=tk.LEFT, padx=5)
        
        # Compression method frame
        method_frame = ttk.LabelFrame(main_frame, text="压缩方法")
        method_frame.pack(fill=tk.X, pady=10, padx=5)
        
        self.method_var = tk.StringVar(value="auto")
        ttk.Radiobutton(method_frame, text="自动 (推荐)", variable=self.method_var, 
                       value="auto").grid(row=0, column=0, padx=10, pady=5, sticky=tk.W)
        ttk.Radiobutton(method_frame, text="PikePDF (高质量压缩)", variable=self.method_var, 
                       value="pikepdf").grid(row=0, column=1, padx=10, pady=5, sticky=tk.W)
        ttk.Radiobutton(method_frame, text="PyMuPDF (中等压缩)", variable=self.method_var, 
                       value="pymupdf").grid(row=1, column=0, padx=10, pady=5, sticky=tk.W)
        ttk.Radiobutton(method_frame, text="PyPDF2 (基本压缩,最兼容)", variable=self.method_var, 
                       value="pypdf2").grid(row=1, column=1, padx=10, pady=5, sticky=tk.W)
        ttk.Radiobutton(method_frame, text="激进压缩 (最小文件,低质量)", variable=self.method_var, 
                       value="aggressive", command=self.show_aggressive_warning).grid(row=2, column=0, padx=10, pady=5, sticky=tk.W)
        ttk.Radiobutton(method_frame, text="超级激进压缩 (页面减半,极低质量)", variable=self.method_var, 
                       value="super_aggressive", command=self.show_super_aggressive_warning).grid(row=2, column=1, padx=10, pady=5, sticky=tk.W)
        
        # Compression button
        compress_btn = ttk.Button(main_frame, text="压缩PDF", command=self.start_compression)
        compress_btn.pack(pady=15)
        
        # Progress bar
        self.progress_var = tk.DoubleVar()
        self.progress = ttk.Progressbar(main_frame, variable=self.progress_var, maximum=100)
        self.progress.pack(fill=tk.X, pady=10)
        
        # Status label
        self.status_var = tk.StringVar(value="准备就绪")
        status_label = ttk.Label(main_frame, textvariable=self.status_var, font=("Arial", 10))
        status_label.pack(pady=5)
        
        # Results frame
        results_frame = ttk.LabelFrame(main_frame, text="压缩结果")
        results_frame.pack(fill=tk.BOTH, expand=True, pady=10)
        
        # Add scrollbar to results text
        result_scroll = ttk.Scrollbar(results_frame)
        result_scroll.pack(side=tk.RIGHT, fill=tk.Y)
        
        self.result_text = tk.Text(results_frame, height=8, width=70, font=("Consolas", 10),
                                 yscrollcommand=result_scroll.set)
        self.result_text.pack(fill=tk.BOTH, expand=True, padx=5, pady=5)
        result_scroll.config(command=self.result_text.yview)
    
    def show_aggressive_warning(self):
        """显示激进压缩模式的警告"""
        messagebox.showwarning("激进压缩警告", 
                              "激进压缩模式会大幅降低图像质量和分辨率,以获得最小的文件大小。\n\n"
                              "这可能导致图像变得模糊或像素化,但文件大小将显著减小。\n\n"
                              "建议仅在文件大小是最重要因素时使用此模式。")
    
    def show_super_aggressive_warning(self):
        """显示超级激进压缩模式的警告"""
        messagebox.showwarning("超级激进压缩警告", 
                              "超级激进压缩模式将:\n\n"
                              "1. 将页面尺寸减小为原始尺寸的一半\n"
                              "2. 使用极低的图像质量设置\n"
                              "3. 将图像分辨率降低到最小\n\n"
                              "这将导致文本变小,图像质量显著降低,但文件大小将极大减小。\n\n"
                              "此模式适用于需要最小文件大小且不太关心视觉质量的情况。")
        
    def browse_input(self):
        file_path = filedialog.askopenfilename(
            title="选择PDF文件",
            filetypes=[("PDF文件", "*.pdf"), ("所有文件", "*.*")]
        )
        if file_path:
            self.input_path_var.set(file_path)
            # Auto-set output path
            input_path = self.input_path_var.get()
            if input_path:
                filename, ext = os.path.splitext(input_path)
                self.output_path_var.set(f"{filename}_compressed{ext}")
    
    def browse_output(self):
        file_path = filedialog.asksaveasfilename(
            title="保存压缩后的PDF",
            defaultextension=".pdf",
            filetypes=[("PDF文件", "*.pdf"), ("所有文件", "*.*")]
        )
        if file_path:
            self.output_path_var.set(file_path)
    
    def start_compression(self):
        input_path = self.input_path_var.get()
        output_path = self.output_path_var.get()
        quality = self.quality_var.get()
        method = self.method_var.get()
        
        if not input_path:
            messagebox.showerror("错误", "请选择输入PDF文件")
            return
        
        if not output_path:
            messagebox.showerror("错误", "请选择输出PDF文件位置")
            return
        
        if not os.path.exists(input_path):
            messagebox.showerror("错误", f"输入文件不存在: {input_path}")
            return
            
        # Check if input is actually a PDF
        if not input_path.lower().endswith('.pdf'):
            if not messagebox.askyesno("警告", "输入文件不是PDF格式,是否继续?"):
                return
        
        # If aggressive mode is selected with high quality, suggest lowering quality
        if method == "aggressive" and quality > 20:
            if messagebox.askyesno("建议", f"激进压缩模式通常使用较低的质量值以获得最佳效果。\n\n您当前设置的质量为 {quality},是否要自动降低到 10?"):
                quality = 10
                self.quality_var.set(10)
        
        # If super aggressive mode is selected with high quality, suggest lowering quality
        if method == "super_aggressive" and quality > 10:
            if messagebox.askyesno("建议", f"超级激进压缩模式通常使用极低的质量值以获得最佳效果。\n\n您当前设置的质量为 {quality},是否要自动降低到 5?"):
                quality = 5
                self.quality_var.set(5)
        
        # Clear previous results
        self.result_text.delete(1.0, tk.END)
        
        # Update UI
        self.status_var.set("正在压缩...")
        self.progress_var.set(10)
        
        # Start compression in a separate thread
        threading.Thread(target=self.compress_task, 
                        args=(input_path, output_path, quality, method), 
                        daemon=True).start()
    
    def compress_task(self, input_path, output_path, quality, method):
        try:
            self.progress_var.set(20)
            self.result_text.insert(tk.END, f"开始压缩...\n")
            self.result_text.insert(tk.END, f"输入文件: {input_path}\n")
            self.result_text.insert(tk.END, f"输出文件: {output_path}\n")
            self.result_text.insert(tk.END, f"质量设置: {quality}\n")
            self.result_text.insert(tk.END, f"压缩方法: {method}\n")
            self.result_text.insert(tk.END, "-" * 50 + "\n")
            self.result_text.see(tk.END)
            
            # Make sure the output directory exists
            output_dir = os.path.dirname(output_path)
            if output_dir and not os.path.exists(output_dir):
                os.makedirs(output_dir)
            
            # Call the appropriate compression function based on method
            if method == "auto":
                self.progress_var.set(30)
                success, message = compress_pdf(input_path, output_path, quality)
            elif method == "pikepdf":
                self.progress_var.set(30)
                try:
                    from pikepdf import Pdf
                    pdf = Pdf.open(input_path)
                    pdf.save(output_path)
                    original_size = os.path.getsize(input_path)
                    compressed_size = os.path.getsize(output_path)
                    reduction = (1 - compressed_size / original_size) * 100
                    success, message = True, f"使用PikePDF压缩成功!原文件大小: {original_size/1024:.1f} KB, 压缩后大小: {compressed_size/1024:.1f} KB, 减少: {reduction:.1f}%"
                except Exception as e:
                    success, message = False, f"PikePDF压缩失败: {str(e)}"
            elif method == "pymupdf":
                self.progress_var.set(30)
                success, message = compress_pdf_with_pymupdf(input_path, output_path, quality)
            elif method == "pypdf2":
                self.progress_var.set(30)
                success, message = compress_pdf_with_pypdf2(input_path, output_path, quality)
            elif method == "aggressive":
                self.progress_var.set(30)
                # 激进压缩使用较低的质量值,确保不超过15
                actual_quality = min(quality, 15)
                if actual_quality != quality:
                    self.result_text.insert(tk.END, f"注意: 激进模式下已将质量值从 {quality} 自动调整为 {actual_quality}\n")
                success, message = compress_pdf_aggressive(input_path, output_path, actual_quality)
            elif method == "super_aggressive":
                self.progress_var.set(30)
                # 超级激进压缩使用极低的质量值,确保不超过5
                actual_quality = min(quality, 5)
                if actual_quality != quality:
                    self.result_text.insert(tk.END, f"注意: 超级激进模式下已将质量值从 {quality} 自动调整为 {actual_quality}\n")
                success, message = compress_pdf_super_aggressive(input_path, output_path, actual_quality)
            else:
                success, message = False, f"未知的压缩方法: {method}"
            
            self.progress_var.set(100)
            
            if success:
                self.status_var.set("压缩完成")
                self.result_text.insert(tk.END, message + "\n")
                
                # Calculate compression ratio
                original_size = os.path.getsize(input_path)
                compressed_size = os.path.getsize(output_path)
                ratio = original_size / compressed_size if compressed_size > 0 else 0
                
                self.result_text.insert(tk.END, f"压缩比: {ratio:.2f}x\n")
                
                if ratio >= 5:
                    self.result_text.insert(tk.END, "✓ 已达到目标压缩比 (5倍或更高)\n")
                else:
                    self.result_text.insert(tk.END, f"⚠ 未达到目标压缩比 (5倍)。\n")
                    
                    # 如果压缩比不够,提供建议
                    if method != "super_aggressive":
                        self.result_text.insert(tk.END, "建议: 尝试使用'超级激进压缩'方法以获得更高的压缩比。\n")
                
                # Check if output file is actually smaller
                if compressed_size >= original_size:
                    self.result_text.insert(tk.END, "⚠ 警告: 压缩后的文件比原文件更大或相同大小。\n")
                    if method != "super_aggressive":
                        self.result_text.insert(tk.END, "建议尝试'超级激进压缩'方法。\n")
                
                # Ask if user wants to open the file
                if messagebox.askyesno("完成", "压缩完成!是否打开压缩后的文件?"):
                    self.open_file(output_path)
            else:
                self.status_var.set("压缩失败")
                self.result_text.insert(tk.END, message + "\n")
                self.result_text.insert(tk.END, "请尝试使用不同的压缩方法或降低质量值。\n")
                
                # Suggest appropriate fallback methods
                if method != "super_aggressive" and method != "aggressive":
                    self.result_text.insert(tk.END, "建议尝试使用'超级激进压缩'方法,它能提供最高的压缩率。\n")
                elif method != "pypdf2":
                    self.result_text.insert(tk.END, "建议尝试使用 PyPDF2 方法,它具有最好的兼容性。\n")
            
            self.result_text.see(tk.END)
        
        except Exception as e:
            self.status_var.set("发生错误")
            self.result_text.insert(tk.END, f"错误: {str(e)}\n")
            # Add traceback for debugging
            self.result_text.insert(tk.END, traceback.format_exc())
            self.result_text.see(tk.END)
    
    def open_file(self, file_path):
        """Open the file using the default system application"""
        try:
            import subprocess
            os.startfile(file_path)
        except Exception as e:
            messagebox.showerror("错误", f"无法打开文件: {str(e)}")

def main():
    root = tk.Tk()
    app = PDFCompressorApp(root)
    root.mainloop()

if __name__ == "__main__":
    main()

©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。
  • 序言:七十年代末,一起剥皮案震惊了整个滨河市,随后出现的几起案子,更是在滨河造成了极大的恐慌,老刑警刘岩,带你破解...
    沈念sama阅读 230,527评论 6 544
  • 序言:滨河连续发生了三起死亡事件,死亡现场离奇诡异,居然都是意外死亡,警方通过查阅死者的电脑和手机,发现死者居然都...
    沈念sama阅读 99,687评论 3 429
  • 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
    开封第一讲书人阅读 178,640评论 0 383
  • 文/不坏的土叔 我叫张陵,是天一观的道长。 经常有香客问我,道长,这世上最难降的妖魔是什么? 我笑而不...
    开封第一讲书人阅读 63,957评论 1 318
  • 正文 为了忘掉前任,我火速办了婚礼,结果婚礼上,老公的妹妹穿的比我还像新娘。我一直安慰自己,他们只是感情好,可当我...
    茶点故事阅读 72,682评论 6 413
  • 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
    开封第一讲书人阅读 56,011评论 1 329
  • 那天,我揣着相机与录音,去河边找鬼。 笑死,一个胖子当着我的面吹牛,可吹牛的内容都是我干的。 我是一名探鬼主播,决...
    沈念sama阅读 44,009评论 3 449
  • 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
    开封第一讲书人阅读 43,183评论 0 290
  • 序言:老挝万荣一对情侣失踪,失踪者是张志新(化名)和其女友刘颖,没想到半个月后,有当地人在树林里发现了一具尸体,经...
    沈念sama阅读 49,714评论 1 336
  • 正文 独居荒郊野岭守林人离奇死亡,尸身上长有42处带血的脓包…… 初始之章·张勋 以下内容为张勋视角 年9月15日...
    茶点故事阅读 41,435评论 3 359
  • 正文 我和宋清朗相恋三年,在试婚纱的时候发现自己被绿了。 大学时的朋友给我发了我未婚夫和他白月光在一起吃饭的照片。...
    茶点故事阅读 43,665评论 1 374
  • 序言:一个原本活蹦乱跳的男人离奇死亡,死状恐怖,灵堂内的尸体忽然破棺而出,到底是诈尸还是另有隐情,我是刑警宁泽,带...
    沈念sama阅读 39,148评论 5 365
  • 正文 年R本政府宣布,位于F岛的核电站,受9级特大地震影响,放射性物质发生泄漏。R本人自食恶果不足惜,却给世界环境...
    茶点故事阅读 44,838评论 3 350
  • 文/蒙蒙 一、第九天 我趴在偏房一处隐蔽的房顶上张望。 院中可真热闹,春花似锦、人声如沸。这庄子的主人今日做“春日...
    开封第一讲书人阅读 35,251评论 0 28
  • 文/苍兰香墨 我抬头看了看天上的太阳。三九已至,却和暖如春,着一层夹袄步出监牢的瞬间,已是汗流浃背。 一阵脚步声响...
    开封第一讲书人阅读 36,588评论 1 295
  • 我被黑心中介骗来泰国打工, 没想到刚下飞机就差点儿被人妖公主榨干…… 1. 我叫王不留,地道东北人。 一个月前我还...
    沈念sama阅读 52,379评论 3 400
  • 正文 我出身青楼,却偏偏与公主长得像,于是被迫代替她去往敌国和亲。 传闻我的和亲对象是个残疾皇子,可洞房花烛夜当晚...
    茶点故事阅读 48,627评论 2 380

推荐阅读更多精彩内容