一、平台:.net 6.0 控制台应用程序
二、插件安装
EPPlus 7.2.1
OpenCvSharp4.runtime.win 4.10.0.20240616
Sdcb.PaddleInference 2.5.0.1
Sdcb.PaddleInference.runtime.win64.mkl 2.6.1
Sdcb.PaddleOCR 2.7.0.3
Sdcb.PaddleOCR.Models.Local 2.7.0
System.Drawing.Common 8.0.7
三、实现代码
using OpenCvSharp;
using Sdcb.PaddleInference;
using Sdcb.PaddleOCR.Models.Local;
using Sdcb.PaddleOCR.Models;
using Sdcb.PaddleOCR;
using System.Diagnostics;
using System;
using System.Drawing;
using System.IO;
using OfficeOpenXml;
namespace ConsoleApp1
{
internal class Program
{
static void Main(string[] args)
{
Console.WriteLine("【启动图片扫描程序】");
Console.WriteLine("请指定一个文件根目录:");
string rootFolder = Console.ReadLine();
if (!Directory.Exists(rootFolder))
{
Console.WriteLine("【错误】路径不存在...");
return;
}
try
{
Console.WriteLine("开始遍历文件夹...");
string runDir = AppDomain.CurrentDomain.BaseDirectory;
string savePath = $"{runDir}output.xlsx";
ExcelPackage.LicenseContext = LicenseContext.NonCommercial;
using (var package = new ExcelPackage())
{
var sheet1 = package.Workbook.Worksheets.Add("结果表");
var sheet2 = package.Workbook.Worksheets.Add("错误表");
sheet1.SetValue(1, 1, "文件夹名称");
sheet1.SetValue(1, 2, "图片名称");
sheet1.SetValue(1, 3, "时间");
sheet1.SetValue(1, 4, "经度");
sheet1.SetValue(1, 5, "纬度");
sheet1.SetValue(1, 6, "地址");
sheet1.SetValue(1, 7, "路径");
sheet2.SetValue(1, 1, "文件夹名称");
sheet2.SetValue(1, 2, "图片名称");
sheet2.SetValue(1, 3, "路径");
sheet2.SetValue(1, 4, "识别错误原因");
TraverseDirectory(rootFolder, sheet1, sheet2, 2, 2);
package.SaveAs(savePath);
}
Console.WriteLine("识别完成,请查看输出文件!");
}
catch (Exception ex)
{
Console.WriteLine();
}
}
static void TraverseDirectory(string path, ExcelWorksheet sheet1, ExcelWorksheet sheet2, int rowIndex1, int rowIndex2)
{
var rdir = new DirectoryInfo(path);
Console.WriteLine($"***【文件夹{rdir.Name}】***");
foreach (var fileInfo in rdir.GetFiles())
{
if (fileInfo.Extension == ".jpg" || fileInfo.Extension == ".png")
{
Console.WriteLine($"正在识别文件:{fileInfo.Name}");
try
{
FullOcrModel model = LocalFullModels.ChineseV3;
using (PaddleOcrAll all = new PaddleOcrAll(model, PaddleDevice.Mkldnn())
{
AllowRotateDetection = false,
Enable180Classification = false,
})
{
using (Mat src = Cv2.ImRead(fileInfo.FullName))
{
PaddleOcrResult result = all.Run(src);
// 此处是我自定义将文字内容提取的方法,可以自己替换
var txt1 = result.Text.Replace("\r", "").Replace("\n", "").Replace(":", ":");
var index = txt1.IndexOf("间:");
if (index > -1)
{
var txt2 = txt1.Substring(index);
var i1 = txt2.IndexOf("度:");
var i2 = txt2.LastIndexOf("度:");
var i3 = txt2.IndexOf("址:");
sheet1.SetValue(rowIndex1, 1, rdir.Name);
sheet1.SetValue(rowIndex1, 2, fileInfo.Name);
sheet1.SetValue(rowIndex1, 3, txt2.Substring(2, i1 - 2));
sheet1.SetValue(rowIndex1, 4, txt2.Substring(i1 + 2, i2 - i1 - 2));
sheet1.SetValue(rowIndex1, 5, txt2.Substring(i2 + 2, i3 - i2 - 2));
sheet1.SetValue(rowIndex1, 6, txt2.Substring(i3 + 2));
sheet1.SetValue(rowIndex1, 7, fileInfo.FullName);
rowIndex1 += 1;
Console.WriteLine($"识别成功...");
//Console.WriteLine($"时间:{txt2.Substring(2, i1 - 2)}");
//Console.WriteLine($"经度:{txt2.Substring(i1 + 2, i2 - i1 - 2)}");
//Console.WriteLine($"纬度:{txt2.Substring(i2 + 2, i3 - i2 - 2)}");
//Console.WriteLine($"地址:{txt2.Substring(i3 + 2)}");
}
}
}
}
catch(Exception ex)
{
sheet2.SetValue(rowIndex2, 1, rdir.Name);
sheet2.SetValue(rowIndex2, 2, fileInfo.Name);
sheet2.SetValue(rowIndex2, 3, fileInfo.FullName);
sheet2.SetValue(rowIndex2, 4, ex.Message);
rowIndex2 += 1;
Console.WriteLine($"识别异常:{ex.Message}");
}
}
}
foreach (string dir in Directory.GetDirectories(path))
{
TraverseDirectory(dir, sheet1, sheet2, rowIndex1, rowIndex2);
}
}
}
}
效果:
原图: