You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
170 lines
5.5 KiB
C#
170 lines
5.5 KiB
C#
using System;
|
|
using System.Diagnostics;
|
|
using System.Drawing;
|
|
using System.Drawing.Imaging;
|
|
using System.IO;
|
|
using System.Linq;
|
|
using System.Text;
|
|
|
|
namespace PINBlog
|
|
{
|
|
public enum LANGUAGE { ENG = 0 }
|
|
|
|
public class OCRResultInfo
|
|
{
|
|
public LANGUAGE Language { get; set; }
|
|
public string Result { get; set; }
|
|
|
|
public OCRResultInfo()
|
|
{
|
|
Language = LANGUAGE.ENG;
|
|
Result = "";
|
|
}
|
|
}
|
|
|
|
public class Tesseract
|
|
{
|
|
private string m_TesseractExePath;
|
|
private LANGUAGE m_Language;
|
|
|
|
/// <summary>
|
|
/// Initializes a new instance of the <see cref="TesseractService"/> class.
|
|
/// </summary>
|
|
/// <param name="tesseractDir">The path for the Tesseract4 installation folder (C:\Program Files\Tesseract-OCR).</param>
|
|
/// <param name="language">The language used to extract text from images (eng, por, etc)</param>
|
|
/// <param name="dataDir">The data with the trained models (tessdata). Download the models from https://github.com/tesseract-ocr/tessdata_fast</param>
|
|
public Tesseract(LANGUAGE language = LANGUAGE.ENG)
|
|
{
|
|
// Tesseract configs.
|
|
var dir = Path.Combine(".", "tessdata");
|
|
m_TesseractExePath = Path.Combine(dir,"tesseract.exe");
|
|
m_Language = language;
|
|
Environment.SetEnvironmentVariable("TESSDATA_PREFIX", dir);
|
|
}
|
|
|
|
public Stream ToStream(Image image, ImageFormat format = null)
|
|
{
|
|
var stream = new System.IO.MemoryStream();
|
|
if (format == null)
|
|
{
|
|
image.Save(stream, image.RawFormat);
|
|
}
|
|
else
|
|
{
|
|
image.Save(stream, format);
|
|
}
|
|
stream.Position = 0;
|
|
return stream;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Read text from the images streams.
|
|
/// </summary>
|
|
/// <param name="images">The images streams.</param>
|
|
/// <returns>The images text.</returns>
|
|
public string GetText(params Stream[] images)
|
|
{
|
|
var output = string.Empty;
|
|
|
|
if (images.Any())
|
|
{
|
|
var tempPath = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString());
|
|
Directory.CreateDirectory(tempPath);
|
|
var tempInputFile = NewTempFileName(tempPath);
|
|
var tempOutputFile = NewTempFileName(tempPath);
|
|
|
|
try
|
|
{
|
|
WriteInputFiles(images, tempPath, tempInputFile);
|
|
|
|
var info = new ProcessStartInfo
|
|
{
|
|
FileName = m_TesseractExePath,
|
|
Arguments = $"{tempInputFile} {tempOutputFile} -l {m_Language.ToString()}",
|
|
RedirectStandardError = true,
|
|
RedirectStandardOutput = true,
|
|
CreateNoWindow = true,
|
|
UseShellExecute = false
|
|
};
|
|
|
|
using (var ps = Process.Start(info))
|
|
{
|
|
ps.WaitForExit();
|
|
|
|
var exitCode = ps.ExitCode;
|
|
|
|
if (exitCode == 0)
|
|
{
|
|
output = File.ReadAllText(tempOutputFile + ".txt");
|
|
|
|
var charset = new char[] { ' ', '\n', '\f' };
|
|
output = output.TrimStart(charset).TrimEnd(charset);
|
|
}
|
|
else
|
|
{
|
|
var stderr = ps.StandardError.ReadToEnd();
|
|
throw new InvalidOperationException(stderr);
|
|
}
|
|
}
|
|
}
|
|
finally
|
|
{
|
|
Directory.Delete(tempPath, true);
|
|
}
|
|
}
|
|
|
|
return output;
|
|
}
|
|
|
|
private void WriteInputFiles(Stream[] inputStreams, string tempPath, string tempInputFile)
|
|
{
|
|
// If there is more thant one image file, so build the list file using the images as input files.
|
|
if (inputStreams.Length > 1)
|
|
{
|
|
var imagesListFileContent = new StringBuilder();
|
|
|
|
foreach (var inputStream in inputStreams)
|
|
{
|
|
var imageFile = NewTempFileName(tempPath);
|
|
|
|
using (var tempStream = File.OpenWrite(imageFile))
|
|
{
|
|
CopyStream(inputStream, tempStream);
|
|
}
|
|
|
|
imagesListFileContent.AppendLine(imageFile);
|
|
}
|
|
|
|
File.WriteAllText(tempInputFile, imagesListFileContent.ToString());
|
|
}
|
|
else
|
|
{
|
|
// If is only one image file, than use the image file as input file.
|
|
using (var tempStream = File.OpenWrite(tempInputFile))
|
|
{
|
|
CopyStream(inputStreams.First(), tempStream);
|
|
}
|
|
}
|
|
}
|
|
|
|
private void CopyStream(Stream input, Stream output)
|
|
{
|
|
if (input.CanSeek)
|
|
input.Seek(0, SeekOrigin.Begin);
|
|
|
|
//input.CopyTo(output);
|
|
|
|
Byte[] buffer = new byte[input.Length];
|
|
int len = input.Read(buffer, 0, buffer.Length);
|
|
output.Write(buffer, 0, len);
|
|
|
|
input.Close();
|
|
}
|
|
|
|
private string NewTempFileName(string tempPath)
|
|
{
|
|
return Path.Combine(tempPath, Guid.NewGuid().ToString());
|
|
}
|
|
}
|
|
}
|