使用VS+Tesseract来实现中文和英文的识别:
#include<tesseract/baseapi.h>
#include<opencv2/opencv.hpp>
#include<iostream>
#include<Windows.h>
using namespace cv;
using namespace std;
using namespace tesseract;
#pragma comment(lib, "D:/Program Files (x86)/Tesseract-OCR/lib/libtesseract302.lib")
#pragma comment(lib, "D:/Program Files (x86)/Tesseract-OCR/lib/liblept168.lib")
char* U2G(const char* utf8)
{
int len = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0);
wchar_t* wstr = new wchar_t[len + 1];
memset(wstr, 0, len + 1);
MultiByteToWideChar(CP_UTF8, 0, utf8, -1, wstr, len);
len = WideCharToMultiByte(CP_ACP, 0, wstr, -1, NULL, 0, NULL, NULL);
char* str = new char[len + 1];
memset(str, 0, len + 1);
WideCharToMultiByte(CP_ACP, 0, wstr, -1, str, len, NULL, NULL);
if (wstr) delete[] wstr;
return str;
}
char* G2U(const char* gb2312)
{
int len = MultiByteToWideChar(CP_ACP, 0, gb2312, -1, NULL, 0);
wchar_t* wstr = new wchar_t[len + 1];
memset(wstr, 0, len + 1);
MultiByteToWideChar(CP_ACP, 0, gb2312, -1, wstr, len);
len = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, NULL, 0, NULL, NULL);
char* str = new char[len + 1];
memset(str, 0, len + 1);
WideCharToMultiByte(CP_UTF8, 0, wstr, -1, str, len, NULL, NULL);
if (wstr) delete[] wstr;
return str;
}
void main(){
Mat src = imread("D://a//imgch.jpg",0);
TessBaseAPI orc;
char *langue = "eng+chi_sim";
//char *langue1 = "";
orc.Init(NULL,langue,OEM_TESSERACT_ONLY);
orc.SetVariable("tessedit_write_images", "1");
orc.SetPageSegMode(PSM_SINGLE_BLOCK);
orc.SetImage(src.data, src.cols, src.rows, src.elemSize() ,src.cols);
char *text = orc.GetUTF8Text();//.GetUTF8Text();
String s = U2G(text);
cout << s << endl;
cout << orc.MeanTextConf() << endl << endl;
imshow("src",src);
waitKey(0);
}
测试图片:
测试结果: