using System; using System.Diagnostics; using System.Drawing; using System.Drawing.Imaging; using System.IO; using System.Linq; using System.Text; namespace PINBlog { public enum LANGUAGE { ENG = 0 } public class OCRResultInfo { public LANGUAGE Language { get; set; } public string Result { get; set; } public OCRResultInfo() { Language = LANGUAGE.ENG; Result = ""; } } public class Tesseract { private string m_TesseractExePath; private LANGUAGE m_Language; /// /// Initializes a new instance of the class. /// /// The path for the Tesseract4 installation folder (C:\Program Files\Tesseract-OCR). /// The language used to extract text from images (eng, por, etc) /// The data with the trained models (tessdata). Download the models from https://github.com/tesseract-ocr/tessdata_fast public Tesseract(LANGUAGE language = LANGUAGE.ENG) { // Tesseract configs. var dir = Path.Combine(".", "tessdata"); m_TesseractExePath = Path.Combine(dir,"tesseract.exe"); m_Language = language; Environment.SetEnvironmentVariable("TESSDATA_PREFIX", dir); } public Stream ToStream(Image image, ImageFormat format = null) { var stream = new System.IO.MemoryStream(); if (format == null) { image.Save(stream, image.RawFormat); } else { image.Save(stream, format); } stream.Position = 0; return stream; } /// /// Read text from the images streams. /// /// The images streams. /// The images text. public string GetText(params Stream[] images) { var output = string.Empty; if (images.Any()) { var tempPath = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()); Directory.CreateDirectory(tempPath); var tempInputFile = NewTempFileName(tempPath); var tempOutputFile = NewTempFileName(tempPath); try { WriteInputFiles(images, tempPath, tempInputFile); var info = new ProcessStartInfo { FileName = m_TesseractExePath, Arguments = $"{tempInputFile} {tempOutputFile} -l {m_Language.ToString()}", RedirectStandardError = true, RedirectStandardOutput = true, CreateNoWindow = true, UseShellExecute = false }; using (var ps = Process.Start(info)) { ps.WaitForExit(); var exitCode = ps.ExitCode; if (exitCode == 0) { output = File.ReadAllText(tempOutputFile + ".txt"); var charset = new char[] { ' ', '\n', '\f' }; output = output.TrimStart(charset).TrimEnd(charset); } else { var stderr = ps.StandardError.ReadToEnd(); throw new InvalidOperationException(stderr); } } } finally { Directory.Delete(tempPath, true); } } return output; } private void WriteInputFiles(Stream[] inputStreams, string tempPath, string tempInputFile) { // If there is more thant one image file, so build the list file using the images as input files. if (inputStreams.Length > 1) { var imagesListFileContent = new StringBuilder(); foreach (var inputStream in inputStreams) { var imageFile = NewTempFileName(tempPath); using (var tempStream = File.OpenWrite(imageFile)) { CopyStream(inputStream, tempStream); } imagesListFileContent.AppendLine(imageFile); } File.WriteAllText(tempInputFile, imagesListFileContent.ToString()); } else { // If is only one image file, than use the image file as input file. using (var tempStream = File.OpenWrite(tempInputFile)) { CopyStream(inputStreams.First(), tempStream); } } } private void CopyStream(Stream input, Stream output) { if (input.CanSeek) input.Seek(0, SeekOrigin.Begin); //input.CopyTo(output); Byte[] buffer = new byte[input.Length]; int len = input.Read(buffer, 0, buffer.Length); output.Write(buffer, 0, len); input.Close(); } private string NewTempFileName(string tempPath) { return Path.Combine(tempPath, Guid.NewGuid().ToString()); } } }