flutter开发windows项目之OCR图像识别

flutter开发windows项目之OCR图像识别

在dart项目中,需要涉及图像处理和识别,以下是两种不同方式实现:

一、使用Tesseract_OCR识别图像:

使用这种方式,识别准确度不高,适用于一般要求不高的项目中。
1、 下载Tesseract-ocr相应版本,并安装到本地:https://github.com/tesseract-ocr/tesseract/releases/tag/5.5.0
2、使用shell命令行运行tesseract.exe,完成图像识别:

import 'dart:io';
import 'dart:ffi';
import 'package:win32/win32.dart';
import 'package:process_run/process_run.dart';
import 'package:image/image.dart' as img;
Future<String?> performOCR(String imagePath) async {
    try {
      // 图像预处理
      final bytes = await File(imagePath).readAsBytes();
      var image = img.decodeImage(bytes);
      if (image == null) {
        Log.d('图片解码失败');
        return null;
      }
      // 调整图像大小
      image = img.copyResize(image, width: image.width * 2);
      // 转换为灰度图像
      final grayImage = img.grayscale(image, amount: 1);
      // 二值化处理
      const threshold = 100; // 标准中间值是128,降低阈值,使更多浅白色文字被保留,
      for (int y = 0; y < grayImage.height; y++) {
        for (int x = 0; x < grayImage.width; x++) {
          final pixel = grayImage.getPixel(x, y);
          final luminance = img.getLuminance(pixel);
          final binaryColor = luminance < threshold
              ? img.ColorUint8.rgb(0, 0, 0) // 暗于阈值的像素变为黑色
              : img.ColorUint8.rgb(255, 255, 255); // 亮于阈值的像素变为白色
          grayImage.setPixel(x, y, binaryColor);
        }
      }
      // 可以尝试添加锐化来增强文字边缘
      // image = img.gaussianBlur(image, radius: 1);
      // 保存处理后的图像
      final processedPath = imagePath.replaceAll('.png', '_processed.png');
      await File(processedPath).writeAsBytes(img.encodePng(grayImage));
      // 获取用户文档目录
      final userHome = Platform.environment['USERPROFILE'];
      // 指定输出文件的基本名称
      final outputBase =
          '$userHome\\Documents\\leidian9\\Pictures\\Screenshots\\ocr';

      // 调用 Tesseract 命令行工具
      final result = await Process.run(
        'C:\\Program Files\\Tesseract-OCR\\tesseract.exe', // 替换为你的 Tesseract 安装路径
        [
          processedPath,
          outputBase,
          '-l', 'chi_sim',
          'tessedit_char_whitelist=0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ', // 限制字符集
        ],
      );

      if (result.exitCode == 0) {
        // 读取输出文件
        final outputFile = File('$outputBase.txt');
        final text = await outputFile.readAsString();
        Log.i('识别结果: $text');
        return text;
      } else {
        Log.d('Error: ${result.stderr}');
        return null;
      }
    } catch (e) {
      Log.d('OCR 预处理失败: $e');
      return null;
    }
  }

二、使用百度PaddleOCR识别图像:

使用这种方式,识别准确度大幅提高,但它依赖本地python环境,并且配置复杂度稍高:
1、安装python-3.9.12版本,因paddle最高支持python-3.10,因此,安装3.9比较合适,下载地址:https://www.python.org/ftp/python/3.9.12/python-3.9.12-amd64.exe,或者访问https://www.python.org/downloads/windows/自行选择版本下载。
2、安装飞桨:https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/windows-pip.html
选择windows,CPU版本,shell执行:

# 更新pip
python -m pip install --upgrade pip
python -m pip install paddlepaddle==2.6.2 -i https://www.paddlepaddle.org.cn/packages/stable/cpu/

3、安装paddleocr和相应依赖:

# 使用官方源安装
pip install "paddleocr>=2.0.1"
# 使用阿里源
pip install "paddleocr>=2.0.1" -i https://mirrors.aliyun.com/pypi/simple/
# 如果网络不好,增加超时时间
pip install "paddleocr>=2.0.1" -i https://mirrors.aliyun.com/pypi/simple/ --timeout 1000
# 安装依赖
pip install shapely scikit-image imgaug -i https://pypi.tuna.tsinghua.edu.cn/simple

4、安装完成后,进入 Python 交互式环境

python
测试PaddleOCR
>>>from paddleocr import PaddleOCR

# 初始化 PaddleOCR
>>>ocr = PaddleOCR(use_angle_cls=True, lang='ch')  # 使用中文模型

# 进行 OCR 识别(假设有一张名为 'test.png' 的图片)
>>>result = ocr.ocr('test.png', cls=True)

# 打印识别结果
>>>for line in result:
...     print(line) # 注意这里要按 Tab 键或者 4 个空格进行缩进

5、也可以创建一个script.py文件:

from paddleocr import PaddleOCR

# 初始化 PaddleOCR
ocr = PaddleOCR(use_angle_cls=True, lang='ch')

# 进行识别
result = ocr.ocr('ocr.png', cls=True)

# 打印结果
for line in result:
    print(line)

6、运行文件:

python script.py

7、最后给出集成源码:
需要在pubspec.yaml中申明文件:assets/python/ocr_server.py:

import sys
import paddle
from paddleocr import PaddleOCR
import json
import threading
from concurrent.futures import ThreadPoolExecutor
import queue
import logging

# 配置日志
logging.basicConfig(
    level=logging.INFO,
    format='{"type": "log", "message": "%(message)s"}',
    stream=sys.stdout  # 改用标准输出
)
logger = logging.getLogger(__name__)

# 配置paddle使用GPU(如果可用)
paddle.device.set_device('gpu' if paddle.device.is_compiled_with_cuda() else 'cpu')

# 创建线程池用于异步文件写入
file_executor = ThreadPoolExecutor(max_workers=2)

class OCRWorker:
    def __init__(self):
        self.ocr = PaddleOCR(
            use_angle_cls=False,
            lang='ch',
            use_gpu=True,
            enable_mkldnn=True,
            cpu_threads=4,
            det_db_thresh=0.3,
            det_db_box_thresh=0.3,
            det_limit_side_len=960,
            show_log=False
        )

    def save_result(self, image_path, text_content):
        try:
            output_path = image_path.replace('.png', '_result.txt')
            with open(output_path, 'w', encoding='utf-8') as f:
                f.write(text_content)
        except Exception as e:
            print(f"Error saving file: {str(e)}", file=sys.stderr)

    def process_image(self, image_path):
        try:
            # OCR识别
            result = self.ocr.ocr(image_path, cls=False)
            
            # 提取文本
            texts = []
            for line in result:
                texts.extend([text[1][0] for text in line])
            
            text_content = "\n".join(texts)
            
            # 立即返回JSON结果
            print(json.dumps({
                "status": "success",
                "text": text_content
            }, ensure_ascii=False), flush=True)
            
            # 异步保存文件
            file_executor.submit(self.save_result, image_path, text_content)
            
        except Exception as e:
            print(json.dumps({
                "status": "error",
                "message": str(e)
            }, ensure_ascii=False), flush=True)

def worker_thread():
    ocr_worker = OCRWorker()
    while True:
        try:
            # 从队列获取任务
            image_path = task_queue.get()
            if image_path is None:  # 退出信号
                break
                
            # 处理图片
            ocr_worker.process_image(image_path)
            
        except Exception as e:
            print(json.dumps({
                "status": "error",
                "message": str(e)
            }, ensure_ascii=False), flush=True)
        finally:
            task_queue.task_done()

# 创建任务队列
task_queue = queue.Queue()

def main():
    # 启动工作线程
    workers = []
    num_workers = 3
    
    for _ in range(num_workers):
        t = threading.Thread(target=worker_thread)
        t.daemon = True
        t.start()
        workers.append(t)

    # 使用JSON格式输出启动消息
    print(json.dumps({
        "type": "status",
        "message": "OCR服务已启动"
    }), flush=True)
    
    try:
        while True:
            image_path = input().strip()
            if not image_path:
                continue
            task_queue.put(image_path)
            
    except (EOFError, KeyboardInterrupt):
        for _ in workers:
            task_queue.put(None)
        for t in workers:
            t.join()
        file_executor.shutdown(wait=False)
    except Exception as e:
        print(json.dumps({
            "type": "error",
            "message": str(e)
        }, ensure_ascii=False), flush=True)

if __name__ == "__main__":
    main()

paddle_ocr_process.dart:

import 'dart:async';
import 'dart:convert';
import 'dart:io';
import 'package:path/path.dart' as path;
import '../common/utils/log.dart';

/// OCR进程管理类
class PaddleOCRProcess {
  final Process process;
  final StreamController<String> _inputController = StreamController();
  final StreamController<String> _outputController =
      StreamController.broadcast();
  static const int timeoutSeconds = 30;

  PaddleOCRProcess._(this.process) {
    // 处理进程输出
    process.stdout.transform(utf8.decoder).listen((data) {
      try {
        final jsonData = json.decode(data.trim());
        switch (jsonData['type']) {
          case 'status':
            Log.d('OCR服务状态: ${jsonData['message']}');
            break;
          case 'log':
            Log.d('OCR日志: ${jsonData['message']}');
            break;
          case 'error':
            Log.d('OCR错误: ${jsonData['message']}');
            break;
          case 'success':
            _outputController.add(data); // 只转发识别结果
            break;
          default:
            _outputController.add(data); // 其他情况也转发
        }
      } catch (e) {
        // 如果不是JSON格式,直接转发
        _outputController.add(data);
      }
    });

    // 处理进程错误输出
    process.stderr.transform(utf8.decoder).listen((data) {
      Log.d('Python进程错误: $data');
    });

    // 处理进程输入
    _inputController.stream.listen((data) {
      process.stdin.writeln(data);
    });
  }

  static Future<PaddleOCRProcess?> start(String pythonPath) async {
    try {
      final scriptPath = path.join(
          Directory.current.path, 'assets', 'python', 'ocr_server.py');

      final process = await Process.start(
        pythonPath,
        [scriptPath],
        runInShell: true,
      );

      return PaddleOCRProcess._(process);
    } catch (e) {
      Log.d('启动OCR进程失败: $e');
      return null;
    }
  }

  Future<String?> recognize(String imagePath) async {
    try {
      Log.d('发送识别请求: $imagePath');

      if (!await File(imagePath).exists()) {
        Log.d('图片文件不存在: $imagePath');
        return null;
      }

      _inputController.add(imagePath);

      final completer = Completer<String?>();
      late StreamSubscription subscription;

      subscription = _outputController.stream.listen((data) {
        try {
          final result = json.decode(data.trim());
          if (result['status'] == 'success') {
            subscription.cancel();
            completer.complete(result['text']);
          }
        } catch (e) {
          Log.d('解析OCR结果失败: $e');
          subscription.cancel();
          completer.complete(null);
        }
      }, onError: (error) {
        Log.d('Stream错误: $error');
        subscription.cancel();
        completer.complete(null);
      });

      return await completer.future.timeout(
        Duration(seconds: timeoutSeconds),
        onTimeout: () {
          Log.d('OCR识别超时 ($timeoutSeconds 秒)');
          subscription.cancel();
          return null;
        },
      );
    } catch (e) {
      Log.d('识别请求失败: $e');
      return null;
    }
  }

  Future<void> dispose() async {
    process.kill();
    await _inputController.close();
    await _outputController.close();
  }
}

image_service.dart

class ImageService {
  static PaddleOCRProcess? _ocrProcess;
  static const String pythonPath =
      r'C:\Users\YOURNAME\AppData\Local\Programs\Python\Python39\python.exe'; // 替换成自己的路径
   /// 使用PaddleOCR识别文本
  Future<String?> performPaddleOCR(String imagePath) async {
    try {
      // 懒加载初始化OCR进程
      _ocrProcess ??= await PaddleOCRProcess.start(pythonPath);

      // 直接发送图片路径并获取结果
      final result = await _ocrProcess?.recognize(imagePath);
      if (result != null) {
        Log.d('识别结果: $result');
      }
      return result;
    } catch (e) {
      Log.d('OCR识别失败: $e');
      return null;
    }
  }
/// 关闭OCR服务
  Future<void> disposeOCRService() async {
    await _ocrProcess?.dispose();
    _ocrProcess = null;
  }
}



最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容