tesseract ocr 5.0 Api调用,delphi源码实现--识别率超高速度快
笔者过去使用tesseract-ocr 4.0,一直被识别速度慢和识别率底的问题困扰。最近更新使用了64位的tesseract5.0 dll后识别速度大幅提升,以下是调用DLL的源码和程序说明,供大家参考。
一:下载tesseract DLL和中文字库
方式1,到tesseract官网下载dll和字库,tesseract官网提供源码和编译好的DLL,建议直接使用编译好的DLL,方便省时。以下是64位DLL安装包下载地址:https://github.com/UB-Mannheim/tesseract/wiki要下载64位版本,笔者测试后发现32位识别率没有64位高。
中文字库下载地址:https://github.com/tesseract-ocr/tessdata 下载后的中文简体字库chi_sim.traineddata和 chi_sim_vert.traineddata放到安装包的tessdata文件夹下。
方式2,直接下载笔者提供的完整64位DLL和中文字库及Delphi调用源码,地址如下: https://www.gaya-soft.cn/download/
二:调用dll实例
此delphi源码是由国外的开源项目TTesseractOCR4(https://github.com/r1me/TTesseractOCR4)基础上完善的,原来只支持Tesseract4.0版本,笔者修改了部分源码使之能适应5.0版本。源码中tesseractocr.capi.pas单元是定义DLL接口的,是最主要的部分。testMain.pas单元的实现了4个最主要的函数,TessBaseAPICreate是得到一个API接口,TessBaseAPIInit2加载字库,TessBaseAPISetImage2是加载一个图像,TessBaseAPIRecognize是文字识别。源码下载地址: https://www.gaya-soft.cn/download/以下是界面设计:
代码实现如下:
unit testMain;
interface
uses
Winapi.Windows, Winapi.Messages, System.SysUtils, System.Variants, System.Classes, Vcl.Graphics,
Vcl.Controls, Vcl.Forms, Vcl.Dialogs, Vcl.StdCtrls, Vcl.ExtCtrls, Vcl.Imaging.jpeg, DateUtils,
tesseractocr.capi, tesseractocr.leptonica;
type
TForm1 = class(TForm)
Panel1: TPanel;
Panel2: TPanel;
Memo1: TMemo;
Splitter1: TSplitter;
ComboBox1: TComboBox;
Label1: TLabel;
Image1: TImage;
Label2: TLabel;
edtImage: TEdit;
btSelectImg: TButton;
btRecognize: TButton;
OpenDialog1: TOpenDialog;
btInitialize: TButton;
procedure btSelectImgClick(Sender: TObject);
procedure btRecognizeClick(Sender: TObject);
procedure FormCreate(Sender: TObject);
procedure FormDestroy(Sender: TObject);
procedure btInitializeClick(Sender: TObject);
private
FModulePath: string;
PApi: TessBaseAPI;
FUtf8Text: string;
FHOcrText: string;
FInitOver: Boolean;
function RootPath(): string;
function PUTF8CharToString(Char: PUTF8Char; DeleteText: Boolean = True): string;
function TessInitialize(DataPath, Language: string; Mode: TessOcrEngineMode = OEM_DEFAULT): Boolean;
function SetVariable(Key: string; Value: string): Boolean;
function SetImage(FileName: string): PPix; overload;
function SetImage(Stream: TMemoryStream): PPix; overload;
procedure RecognizeAsText();
public
{ Public declarations }
end;
var
Form1: TForm1;
_CancelRecognize: Boolean;
implementation
{$R *.dfm}
procedure TForm1.FormCreate(Sender: TObject);
var
Buf: array [0 .. MAX_PATH] of Char;
S: string;
begin
SetString(S, Buf, GetModuleFileName(HInstance, Buf, SizeOf(Buf)));
FModulePath := ExtractFilePath(S);
if (hTesseractLib = 0) then
raise Exception.Create('Tesseract library is not loaded');
//
_CancelRecognize := False;
//得到一个API接口
PApi := TessBaseAPICreate();
FInitOver := False;
end;
procedure TForm1.FormDestroy(Sender: TObject);
begin
if Assigned(PApi) then begin
TessBaseAPIEnd(PApi);
TessBaseAPIDelete(PApi);
end;
end;
function TForm1.RootPath(): string;
var
I: Integer;
begin
Result := FModulePath;
for I := Length(Result) - 1 downto 1 do begin
if Result[I] = '\' then begin
Result := Copy(Result, 1, I);
Break;
end;
end;
end;
procedure TForm1.btSelectImgClick(Sender: TObject);
begin
OpenDialog1.InitialDir := RootPath() + 'example\';
OpenDialog1.Filter := 'Image File|*.JPG;*.PNG;*.GIF;*.BMP;*.JPEG;';
if OpenDialog1.Execute() then begin
edtImage.Text := OpenDialog1.FileName;
Image1.Picture.LoadFromFile(edtImage.Text);
end;
end;
function TForm1.PUTF8CharToString(Char: PUTF8Char; DeleteText: Boolean = True): string;
var
UtfStr: UTF8String;
X: Integer;
begin
Result := '';
if Assigned(Char) then begin
X := Length(Char);
SetString(UtfStr, Char, X);
if DeleteText and (X > 0) then
TessDeleteText(Char);
Result := string(UtfStr);
end;
end;
//初始化识别字库
function TForm1.TessInitialize(DataPath, Language: string; Mode: TessOcrEngineMode = OEM_DEFAULT): Boolean;
begin
SetCurrentDirectory(PChar(FModulePath));
//
if Assigned(PApi) then
Result := TessBaseAPIInit2(PApi, PUTF8Char(UTF8Encode(DataPath)), PUTF8Char(UTF8Encode(Language)), Mode) = 0
else
Result := False;
end;
function CancelCallback(cancel_this: Pointer; words: Integer): Boolean; cdecl;
begin
Result := _CancelRecognize;
end;
function ProgressCallback(Progress: Integer; Left, Right, Top, Bottom: Integer): Boolean; cdecl;
begin
Result := False;
end;
//设置识别参数
function TForm1.SetVariable(Key: string; Value: string): Boolean;
begin
Result := TessBaseAPISetVariable(PApi, PUTF8Char(UTF8Encode(Key)), PUTF8Char(UTF8Encode(Value)));
end;
//识别图片中的文字
procedure TForm1.RecognizeAsText();
var
FMonitor: ETEXT_DESC;
begin
FillChar(FMonitor, SizeOf(FMonitor), #0);
FMonitor.cancel := @CancelCallback;
FMonitor.progress_callback := @ProgressCallback;
FUtf8Text := '';
FHOcrText := '';
//
if TessBaseAPIRecognize(PApi, FMonitor) <> 0 then
Exit;
//识别文字
FUtf8Text := PUTF8CharToString(TessBaseAPIGetUTF8Text(PApi));
FUtf8Text := StringReplace(FUtf8Text, #10, #13#10, [rfReplaceAll]);
//识别结果和位置信息
FHOcrText := PUTF8CharToString(TessBaseAPIGetHOCRText(PApi, 0));
FHOcrText := StringReplace(FHOcrText, #10, #13#10, [rfReplaceAll]);
end;
//加载要识别的图片文件
function TForm1.SetImage(FileName: string): PPix;
begin
Result := pixRead(PUTF8Char(UTF8Encode(FileName)));
if Assigned(Result) then
TessBaseAPISetImage2(PApi, Result)
else
Result := nil;
end;
//加载要识别的内存
function TForm1.SetImage(Stream: TMemoryStream): PPix;
begin
Stream.Position := 0;
Result := pixReadMem(Stream.Memory, Stream.Size);
if Assigned(Result) then
TessBaseAPISetImage2(PApi, Result)
else
Result := nil;
end;
procedure TForm1.btInitializeClick(Sender: TObject);
var
Language: string;
begin
case ComboBox1.ItemIndex of
3, 2:
Language := 'eng';
1:
Language := 'chi_sim';
else
Language := 'chi_sim+eng';
end;
//加载需要很长时间,最好只执行一次
TessInitialize('tessdata' + PathDelim, Language);
FInitOver := True;
Memo1.Text := '字库加载完成';
end;
procedure TForm1.btRecognizeClick(Sender: TObject);
var
Stream: TMemoryStream;
Bitmap: TBitmap;
ImagePix: PPix;
T1: TDateTime;
F: Int64;
begin
if not FInitOver then
btInitializeClick(Sender);
//
T1 := Now();
//
Stream := TMemoryStream.Create();
try
Bitmap := TBitmap.Create();
try
Bitmap.Assign(Image1.Picture.Graphic);
//最好把图像转化为标准bmp格式,tesseract加载图片不好用,经常内存错误
Bitmap.SaveToStream(Stream);
finally
Bitmap.Free();
end;
//设置内存图片,也可以直接加载内存文件
ImagePix := SetImage(Stream);
finally
Stream.Free();
end;
//
if Assigned(ImagePix) then begin
try
//数字的情况,设置识别文字白名单
if ComboBox1.ItemIndex = 3 then
SetVariable('tessedit_char_whitelist', '0123456789');
//开始识别
RecognizeAsText();
//毫秒数
F := MilliSecondsBetween(Now, T1);
//
Memo1.Text := FUtf8Text + #13#10 + //
'-------------------------------HOCRText-------------------------------' + #13#10 + //
FHOcrText + #13#10 + Format('%d', [F]);
finally
pixDestroy(ImagePix);
end;
end;
end;
end.
//初始化识别字库
function TForm1.TessInitialize(DataPath, Language: string; Mode: TessOcrEngineMode = OEM_DEFAULT): Boolean;
begin
SetCurrentDirectory(PChar(FModulePath));
//
if Assigned(PApi) then
Result := TessBaseAPIInit2(PApi, PUTF8Char(UTF8Encode(DataPath)), PUTF8Char(UTF8Encode(Language)), Mode) = 0
else
Result := False;
end;
function CancelCallback(cancel_this: Pointer; words: Integer): Boolean; cdecl;
begin
Result := _CancelRecognize;
end;
function ProgressCallback(Progress: Integer; Left, Right, Top, Bottom: Integer): Boolean; cdecl;
begin
Result := False;
end;
//设置识别参数
function TForm1.SetVariable(Key: string; Value: string): Boolean;
begin
Result := TessBaseAPISetVariable(PApi, PUTF8Char(UTF8Encode(Key)), PUTF8Char(UTF8Encode(Value)));
end;
//识别图片中的文字
procedure TForm1.RecognizeAsText();
var
FMonitor: ETEXT_DESC;
begin
FillChar(FMonitor, SizeOf(FMonitor), #0);
FMonitor.cancel := @CancelCallback;
FMonitor.progress_callback := @ProgressCallback;
FUtf8Text := '';
FHOcrText := '';
//
if TessBaseAPIRecognize(PApi, FMonitor) <> 0 then
Exit;
//识别文字
FUtf8Text := PUTF8CharToString(TessBaseAPIGetUTF8Text(PApi));
FUtf8Text := StringReplace(FUtf8Text, #10, #13#10, [rfReplaceAll]);
//识别结果和位置信息
FHOcrText := PUTF8CharToString(TessBaseAPIGetHOCRText(PApi, 0));
FHOcrText := StringReplace(FHOcrText, #10, #13#10, [rfReplaceAll]);
end;
//加载要识别的图片文件
function TForm1.SetImage(FileName: string): PPix;
begin
Result := pixRead(PUTF8Char(UTF8Encode(FileName)));
if Assigned(Result) then
TessBaseAPISetImage2(PApi, Result)
else
Result := nil;
end;
//加载要识别的内存
function TForm1.SetImage(Stream: TMemoryStream): PPix;
begin
Stream.Position := 0;
Result := pixReadMem(Stream.Memory, Stream.Size);
if Assigned(Result) then
TessBaseAPISetImage2(PApi, Result)
else
Result := nil;
end;
procedure TForm1.btInitializeClick(Sender: TObject);
var
Language: string;
begin
case ComboBox1.ItemIndex of
3, 2:
Language := 'eng';
1:
Language := 'chi_sim';
else
Language := 'chi_sim+eng';
end;
//加载需要很长时间,最好只执行一次
TessInitialize('tessdata' + PathDelim, Language);
FInitOver := True;
Memo1.Text := '字库加载完成';
end;
procedure TForm1.btRecognizeClick(Sender: TObject);
var
Stream: TMemoryStream;
Bitmap: TBitmap;
ImagePix: PPix;
T1: TDateTime;
F: Int64;
begin
if not FInitOver then
btInitializeClick(Sender);
//
T1 := Now();
//
Stream := TMemoryStream.Create();
try
Bitmap := TBitmap.Create();
try
Bitmap.Assign(Image1.Picture.Graphic);
//最好把图像转化为标准bmp格式,tesseract加载图片不好用,经常内存错误
Bitmap.SaveToStream(Stream);
finally
Bitmap.Free();
end;
//设置内存图片,也可以直接加载内存文件
ImagePix := SetImage(Stream);
finally
Stream.Free();
end;
//
if Assigned(ImagePix) then begin
try
//数字的情况,设置识别文字白名单
if ComboBox1.ItemIndex = 3 then
SetVariable('tessedit_char_whitelist', '0123456789');
//开始识别
RecognizeAsText();
//毫秒数
F := MilliSecondsBetween(Now, T1);
//
Memo1.Text := FUtf8Text + #13#10 + //
'-------------------------------HOCRText-------------------------------' + #13#10 + //
FHOcrText + #13#10 + Format('%d', [F]);
finally
pixDestroy(ImagePix);
end;
end;
end;
end.
代码实现中注意以下几点:
1:加载字库需要时间较长,代码实现最好调用一次。
2:Tesseract API接口加载图片文件错误比较多,最好是程序自己实现图片转化为Bitmap图像,直接加载到内存为好。
3:图片适当放大,识别效果更好。
4:如果只识别某些字符,比如数字,先调用SetVariable函数加载字符白名单,效果很好。
5:识别结果可以返回文字位置数据,可以更加这些数据定位文字和图片关系。
三:DLL文件及字库
Tesseract API主要DLL文件为liblept-5.dll 和 libtesseract-5.dll,其他的是大量的加载图片使用的DLL,tessdata为字库,如下图:
四:测试效果
一张A4图片的识别时间1.5-3秒左右,中文识别率在96%以上。
笔者还提供PDF图片识别,区域模板识别,请关注。
--------------------------------------------------------------------------------------------------------------
(文中代码来源:北京盖亚软件有限公司,详见 http://www.gaya-soft.cn/ )
***** 敬请关注,欢迎交流 *****
--------------------------------------------------------------------------------------------------------------