flutter开发windows项目之OCR图像识别
在dart项目中,需要涉及图像处理和识别,以下是两种不同方式实现:
一、使用Tesseract_OCR识别图像:
使用这种方式,识别准确度不高,适用于一般要求不高的项目中。
1、 下载Tesseract-ocr相应版本,并安装到本地:https://github.com/tesseract-ocr/tesseract/releases/tag/5.5.0
2、使用shell命令行运行tesseract.exe,完成图像识别:
import 'dart:io';
import 'dart:ffi';
import 'package:win32/win32.dart';
import 'package:process_run/process_run.dart';
import 'package:image/image.dart' as img;
Future<String?> performOCR(String imagePath) async {
try {
// 图像预处理
final bytes = await File(imagePath).readAsBytes();
var image = img.decodeImage(bytes);
if (image == null) {
Log.d('图片解码失败');
return null;
}
// 调整图像大小
image = img.copyResize(image, width: image.width * 2);
// 转换为灰度图像
final grayImage = img.grayscale(image, amount: 1);
// 二值化处理
const threshold = 100; // 标准中间值是128,降低阈值,使更多浅白色文字被保留,
for (int y = 0; y < grayImage.height; y++) {
for (int x = 0; x < grayImage.width; x++) {
final pixel = grayImage.getPixel(x, y);
final luminance = img.getLuminance(pixel);
final binaryColor = luminance < threshold
? img.ColorUint8.rgb(0, 0, 0) // 暗于阈值的像素变为黑色
: img.ColorUint8.rgb(255, 255, 255); // 亮于阈值的像素变为白色
grayImage.setPixel(x, y, binaryColor);
}
}
// 可以尝试添加锐化来增强文字边缘
// image = img.gaussianBlur(image, radius: 1);
// 保存处理后的图像
final processedPath = imagePath.replaceAll('.png', '_processed.png');
await File(processedPath).writeAsBytes(img.encodePng(grayImage));
// 获取用户文档目录
final userHome = Platform.environment['USERPROFILE'];
// 指定输出文件的基本名称
final outputBase =
'$userHome\\Documents\\leidian9\\Pictures\\Screenshots\\ocr';
// 调用 Tesseract 命令行工具
final result = await Process.run(
'C:\\Program Files\\Tesseract-OCR\\tesseract.exe', // 替换为你的 Tesseract 安装路径
[
processedPath,
outputBase,
'-l', 'chi_sim',
'tessedit_char_whitelist=0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ', // 限制字符集
],
);
if (result.exitCode == 0) {
// 读取输出文件
final outputFile = File('$outputBase.txt');
final text = await outputFile.readAsString();
Log.i('识别结果: $text');
return text;
} else {
Log.d('Error: ${result.stderr}');
return null;
}
} catch (e) {
Log.d('OCR 预处理失败: $e');
return null;
}
}
二、使用百度PaddleOCR识别图像:
使用这种方式,识别准确度大幅提高,但它依赖本地python环境,并且配置复杂度稍高:
1、安装python-3.9.12版本,因paddle最高支持python-3.10,因此,安装3.9比较合适,下载地址:https://www.python.org/ftp/python/3.9.12/python-3.9.12-amd64.exe,或者访问https://www.python.org/downloads/windows/自行选择版本下载。
2、安装飞桨:https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/windows-pip.html
选择windows,CPU版本,shell执行:
# 更新pip
python -m pip install --upgrade pip
python -m pip install paddlepaddle==2.6.2 -i https://www.paddlepaddle.org.cn/packages/stable/cpu/
3、安装paddleocr和相应依赖:
# 使用官方源安装
pip install "paddleocr>=2.0.1"
# 使用阿里源
pip install "paddleocr>=2.0.1" -i https://mirrors.aliyun.com/pypi/simple/
# 如果网络不好,增加超时时间
pip install "paddleocr>=2.0.1" -i https://mirrors.aliyun.com/pypi/simple/ --timeout 1000
# 安装依赖
pip install shapely scikit-image imgaug -i https://pypi.tuna.tsinghua.edu.cn/simple
4、安装完成后,进入 Python 交互式环境
python
测试PaddleOCR
>>>from paddleocr import PaddleOCR
# 初始化 PaddleOCR
>>>ocr = PaddleOCR(use_angle_cls=True, lang='ch') # 使用中文模型
# 进行 OCR 识别(假设有一张名为 'test.png' 的图片)
>>>result = ocr.ocr('test.png', cls=True)
# 打印识别结果
>>>for line in result:
... print(line) # 注意这里要按 Tab 键或者 4 个空格进行缩进
5、也可以创建一个script.py文件:
from paddleocr import PaddleOCR
# 初始化 PaddleOCR
ocr = PaddleOCR(use_angle_cls=True, lang='ch')
# 进行识别
result = ocr.ocr('ocr.png', cls=True)
# 打印结果
for line in result:
print(line)
6、运行文件:
python script.py
7、最后给出集成源码:
需要在pubspec.yaml中申明文件:assets/python/ocr_server.py:
import sys
import paddle
from paddleocr import PaddleOCR
import json
import threading
from concurrent.futures import ThreadPoolExecutor
import queue
import logging
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='{"type": "log", "message": "%(message)s"}',
stream=sys.stdout # 改用标准输出
)
logger = logging.getLogger(__name__)
# 配置paddle使用GPU(如果可用)
paddle.device.set_device('gpu' if paddle.device.is_compiled_with_cuda() else 'cpu')
# 创建线程池用于异步文件写入
file_executor = ThreadPoolExecutor(max_workers=2)
class OCRWorker:
def __init__(self):
self.ocr = PaddleOCR(
use_angle_cls=False,
lang='ch',
use_gpu=True,
enable_mkldnn=True,
cpu_threads=4,
det_db_thresh=0.3,
det_db_box_thresh=0.3,
det_limit_side_len=960,
show_log=False
)
def save_result(self, image_path, text_content):
try:
output_path = image_path.replace('.png', '_result.txt')
with open(output_path, 'w', encoding='utf-8') as f:
f.write(text_content)
except Exception as e:
print(f"Error saving file: {str(e)}", file=sys.stderr)
def process_image(self, image_path):
try:
# OCR识别
result = self.ocr.ocr(image_path, cls=False)
# 提取文本
texts = []
for line in result:
texts.extend([text[1][0] for text in line])
text_content = "\n".join(texts)
# 立即返回JSON结果
print(json.dumps({
"status": "success",
"text": text_content
}, ensure_ascii=False), flush=True)
# 异步保存文件
file_executor.submit(self.save_result, image_path, text_content)
except Exception as e:
print(json.dumps({
"status": "error",
"message": str(e)
}, ensure_ascii=False), flush=True)
def worker_thread():
ocr_worker = OCRWorker()
while True:
try:
# 从队列获取任务
image_path = task_queue.get()
if image_path is None: # 退出信号
break
# 处理图片
ocr_worker.process_image(image_path)
except Exception as e:
print(json.dumps({
"status": "error",
"message": str(e)
}, ensure_ascii=False), flush=True)
finally:
task_queue.task_done()
# 创建任务队列
task_queue = queue.Queue()
def main():
# 启动工作线程
workers = []
num_workers = 3
for _ in range(num_workers):
t = threading.Thread(target=worker_thread)
t.daemon = True
t.start()
workers.append(t)
# 使用JSON格式输出启动消息
print(json.dumps({
"type": "status",
"message": "OCR服务已启动"
}), flush=True)
try:
while True:
image_path = input().strip()
if not image_path:
continue
task_queue.put(image_path)
except (EOFError, KeyboardInterrupt):
for _ in workers:
task_queue.put(None)
for t in workers:
t.join()
file_executor.shutdown(wait=False)
except Exception as e:
print(json.dumps({
"type": "error",
"message": str(e)
}, ensure_ascii=False), flush=True)
if __name__ == "__main__":
main()
paddle_ocr_process.dart:
import 'dart:async';
import 'dart:convert';
import 'dart:io';
import 'package:path/path.dart' as path;
import '../common/utils/log.dart';
/// OCR进程管理类
class PaddleOCRProcess {
final Process process;
final StreamController<String> _inputController = StreamController();
final StreamController<String> _outputController =
StreamController.broadcast();
static const int timeoutSeconds = 30;
PaddleOCRProcess._(this.process) {
// 处理进程输出
process.stdout.transform(utf8.decoder).listen((data) {
try {
final jsonData = json.decode(data.trim());
switch (jsonData['type']) {
case 'status':
Log.d('OCR服务状态: ${jsonData['message']}');
break;
case 'log':
Log.d('OCR日志: ${jsonData['message']}');
break;
case 'error':
Log.d('OCR错误: ${jsonData['message']}');
break;
case 'success':
_outputController.add(data); // 只转发识别结果
break;
default:
_outputController.add(data); // 其他情况也转发
}
} catch (e) {
// 如果不是JSON格式,直接转发
_outputController.add(data);
}
});
// 处理进程错误输出
process.stderr.transform(utf8.decoder).listen((data) {
Log.d('Python进程错误: $data');
});
// 处理进程输入
_inputController.stream.listen((data) {
process.stdin.writeln(data);
});
}
static Future<PaddleOCRProcess?> start(String pythonPath) async {
try {
final scriptPath = path.join(
Directory.current.path, 'assets', 'python', 'ocr_server.py');
final process = await Process.start(
pythonPath,
[scriptPath],
runInShell: true,
);
return PaddleOCRProcess._(process);
} catch (e) {
Log.d('启动OCR进程失败: $e');
return null;
}
}
Future<String?> recognize(String imagePath) async {
try {
Log.d('发送识别请求: $imagePath');
if (!await File(imagePath).exists()) {
Log.d('图片文件不存在: $imagePath');
return null;
}
_inputController.add(imagePath);
final completer = Completer<String?>();
late StreamSubscription subscription;
subscription = _outputController.stream.listen((data) {
try {
final result = json.decode(data.trim());
if (result['status'] == 'success') {
subscription.cancel();
completer.complete(result['text']);
}
} catch (e) {
Log.d('解析OCR结果失败: $e');
subscription.cancel();
completer.complete(null);
}
}, onError: (error) {
Log.d('Stream错误: $error');
subscription.cancel();
completer.complete(null);
});
return await completer.future.timeout(
Duration(seconds: timeoutSeconds),
onTimeout: () {
Log.d('OCR识别超时 ($timeoutSeconds 秒)');
subscription.cancel();
return null;
},
);
} catch (e) {
Log.d('识别请求失败: $e');
return null;
}
}
Future<void> dispose() async {
process.kill();
await _inputController.close();
await _outputController.close();
}
}
image_service.dart
class ImageService {
static PaddleOCRProcess? _ocrProcess;
static const String pythonPath =
r'C:\Users\YOURNAME\AppData\Local\Programs\Python\Python39\python.exe'; // 替换成自己的路径
/// 使用PaddleOCR识别文本
Future<String?> performPaddleOCR(String imagePath) async {
try {
// 懒加载初始化OCR进程
_ocrProcess ??= await PaddleOCRProcess.start(pythonPath);
// 直接发送图片路径并获取结果
final result = await _ocrProcess?.recognize(imagePath);
if (result != null) {
Log.d('识别结果: $result');
}
return result;
} catch (e) {
Log.d('OCR识别失败: $e');
return null;
}
}
/// 关闭OCR服务
Future<void> disposeOCRService() async {
await _ocrProcess?.dispose();
_ocrProcess = null;
}
}