diff options
Diffstat (limited to 'pkgs/applications/misc/k2pdfopt/tesseract.patch')
-rw-r--r-- | pkgs/applications/misc/k2pdfopt/tesseract.patch | 675 |
1 files changed, 0 insertions, 675 deletions
diff --git a/pkgs/applications/misc/k2pdfopt/tesseract.patch b/pkgs/applications/misc/k2pdfopt/tesseract.patch deleted file mode 100644 index adfee9ae282f..000000000000 --- a/pkgs/applications/misc/k2pdfopt/tesseract.patch +++ /dev/null @@ -1,675 +0,0 @@ -From 39aa8502eee7bb669a29d1a9b3bfe5c9595ad960 Mon Sep 17 00:00:00 2001 -From: Daniel Fullmer <danielrf12@gmail.com> -Date: Fri, 13 Sep 2019 13:45:05 -0400 -Subject: [PATCH] Willus mod changes from k2pdfopt - ---- - src/api/Makefile.am | 1 + - src/api/baseapi.cpp | 87 +++++++++++ - src/api/baseapi.h | 3 + - src/api/tesscapi.cpp | 311 +++++++++++++++++++++++++++++++++++++ - src/api/tesseract.h | 29 ++++ - src/ccmain/tessedit.cpp | 5 +- - src/ccutil/ccutil.h | 7 + - src/ccutil/genericvector.h | 21 ++- - src/ccutil/mainblk.cpp | 17 +- - src/ccutil/params.cpp | 3 +- - src/ccutil/serialis.cpp | 3 + - src/ccutil/serialis.h | 2 + - src/lstm/input.cpp | 3 + - 13 files changed, 488 insertions(+), 4 deletions(-) - create mode 100644 src/api/tesscapi.cpp - create mode 100644 src/api/tesseract.h - -diff --git a/src/api/Makefile.am b/src/api/Makefile.am -index d9b76eb6..cd2dc30f 100644 ---- a/src/api/Makefile.am -+++ b/src/api/Makefile.am -@@ -39,6 +39,7 @@ libtesseract_api_la_SOURCES += lstmboxrenderer.cpp - libtesseract_api_la_SOURCES += pdfrenderer.cpp - libtesseract_api_la_SOURCES += wordstrboxrenderer.cpp - libtesseract_api_la_SOURCES += renderer.cpp -+libtesseract_api_la_SOURCES += tesscapi.cpp - - lib_LTLIBRARIES += libtesseract.la - libtesseract_la_LDFLAGS = $(LEPTONICA_LIBS) $(OPENCL_LDFLAGS) $(libarchive_LIBS) -diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp -index 9245d07c..ea964ee6 100644 ---- a/src/api/baseapi.cpp -+++ b/src/api/baseapi.cpp -@@ -215,6 +215,14 @@ TessBaseAPI::TessBaseAPI() - // Use the current locale if building debug code. - std::locale::global(std::locale("")); - #endif -+ const char *locale; -+ locale = std::setlocale(LC_ALL, nullptr); -+/* willus mod Remove assertions--taken care of in tesscapi.cpp */ -+// ASSERT_HOST(!strcmp(locale, "C")); -+ locale = std::setlocale(LC_CTYPE, nullptr); -+// ASSERT_HOST(!strcmp(locale, "C")); -+ locale = std::setlocale(LC_NUMERIC, nullptr); -+// ASSERT_HOST(!strcmp(locale, "C")); - } - - TessBaseAPI::~TessBaseAPI() { -@@ -1333,6 +1341,85 @@ static void AddBoxToTSV(const PageIterator* it, PageIteratorLevel level, - text->add_str_int("\t", bottom - top); - } - -+/* willus mod */ -+int TessBaseAPI::GetOCRWords(int **x00,int **y00,int **x11,int **y11,int **ybaseline0, -+ char **utf8words) -+ -+ { -+ int iword,nwords,totlen,it8; -+ int *x0,*y0,*x1,*y1,*ybaseline; -+ char *tutf8; -+ -+ ResultIterator *res_it = GetIterator(); -+ /* Count words */ -+ iword=0; -+ totlen=0; -+ while (!res_it->Empty(RIL_BLOCK)) -+ { -+ if (res_it->Empty(RIL_WORD)) -+ { -+ res_it->Next(RIL_WORD); -+ continue; -+ } -+ iword++; -+ STRING textstr=std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_WORD)).get(); -+ totlen+=strlen(textstr.string())+1; -+ res_it->Next(RIL_WORD); -+ } -+ nwords=iword; -+/* -+printf("\nnwords=%d, totlen=%d\n",nwords,totlen); -+*/ -+ x0=(*x00)=(int *)malloc(sizeof(int)*5*nwords); -+ y0=(*y00)=&x0[nwords]; -+ x1=(*x11)=&y0[nwords]; -+ y1=(*y11)=&x1[nwords]; -+ ybaseline=(*ybaseline0)=&y1[nwords]; -+ tutf8=(*utf8words)=(char *)malloc(totlen); -+ iword=0; -+ it8=0; -+ res_it->Begin(); -+ while (!res_it->Empty(RIL_BLOCK)) -+ { -+ if (res_it->Empty(RIL_WORD)) -+ { -+ res_it->Next(RIL_WORD); -+ continue; -+ } -+ STRING textstr=std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_WORD)).get(); -+ strcpy(&tutf8[it8],textstr.string()); -+ it8 += strlen(&tutf8[it8])+1; -+ /* -+ STRING textstr(""); -+ textstr += std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_WORD)).get(); -+ */ -+/* -+printf("Word %d: '%s'\n",iword,textstr.string()); -+*/ -+ int left, top, right, bottom; -+ int u1,v1,u2,v2; -+ res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom); -+ res_it->Baseline(RIL_WORD, &u1, &v1, &u2, &v2); -+ x0[iword]=left; -+ x1[iword]=right; -+ y0[iword]=top; -+ y1[iword]=bottom; -+ ybaseline[iword]=(v1+v2)/2; -+ iword++; -+/* -+printf("BB: (%d,%d)-(%d,%d) BL: (%d,%d)-(%d,%d)\n",left,bottom,right,top,x1,y1,x2,y2); -+*/ -+ res_it->Next(RIL_WORD); -+ } -+/* -+printf("iword=%d\n",iword); -+*/ -+ return(iword); -+ } -+ -+/* willus mod */ -+int GetOCRWords(int **x0,int **y0,int **x1,int **y1,int **ybaseline,char **utf8words); -+ - /** - * Make a TSV-formatted string from the internal data structures. - * page_number is 0-based but will appear in the output as 1-based. -diff --git a/src/api/baseapi.h b/src/api/baseapi.h -index 3724dd92..23be5920 100644 ---- a/src/api/baseapi.h -+++ b/src/api/baseapi.h -@@ -575,6 +575,9 @@ class TESS_API TessBaseAPI { - */ - char* GetHOCRText(ETEXT_DESC* monitor, int page_number); - -+/* willus mod */ -+int GetOCRWords(int **x0,int **y0,int **x1,int **y1,int **ybaseline,char **utf8words); -+ - /** - * Make a HTML-formatted string with hOCR markup from the internal - * data structures. -diff --git a/src/api/tesscapi.cpp b/src/api/tesscapi.cpp -new file mode 100644 -index 00000000..1752fafe ---- /dev/null -+++ b/src/api/tesscapi.cpp -@@ -0,0 +1,311 @@ -+/* -+** tesscapi.cpp willus.com attempt at C wrapper for tesseract. -+** (Butchered from tesseractmain.cpp) -+** Last udpated 9-1-12 -+** -+** Copyright (C) 2012 http://willus.com -+** -+** This program is free software: you can redistribute it and/or modify -+** it under the terms of the GNU Affero General Public License as -+** published by the Free Software Foundation, either version 3 of the -+** License, or (at your option) any later version. -+** -+** This program is distributed in the hope that it will be useful, -+** but WITHOUT ANY WARRANTY; without even the implied warranty of -+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+** GNU Affero General Public License for more details. -+** -+** You should have received a copy of the GNU Affero General Public License -+** along with this program. If not, see <http://www.gnu.org/licenses/>. -+** -+*/ -+ -+/* -+#include "mfcpch.h" -+*/ -+// #define USE_VLD //Uncomment for Visual Leak Detector. -+#if (defined _MSC_VER && defined USE_VLD) -+#include <vld.h> -+#endif -+ -+// Include automatically generated configuration file if running autoconf -+#ifdef HAVE_CONFIG_H -+#include "config_auto.h" -+#endif -+#include <locale.h> -+#ifdef USING_GETTEXT -+#include <libintl.h> -+#define _(x) gettext(x) -+#else -+#define _(x) (x) -+#endif -+ -+#include "allheaders.h" -+#include "baseapi.h" -+#include "strngs.h" -+#include "params.h" -+#include "blobs.h" -+#include "simddetect.h" -+#include "tesseractclass.h" -+/* -+#include "notdll.h" -+*/ -+ -+/* C Wrappers */ -+#include "tesseract.h" -+ -+// static tesseract::TessBaseAPI api[4]; -+ -+/* -+** ocr_type=0: OEM_DEFAULT -+** ocr_type=1: OEM_TESSERACT_ONLY -+** ocr_type=2: OEM_LSTM_ONLY -+** ocr_type=3: OEM_TESSERACT_LSTM_COMBINED -+*/ -+void *tess_capi_init(char *datapath,char *language,int ocr_type,FILE *out, -+ char *initstr,int maxlen,int *status) -+ -+ { -+ char original_locale[256]; -+ tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI; -+/* -+printf("@tess_capi_init\n"); -+printf(" datapath='%s'\n",datapath); -+printf(" language='%s'\n",language); -+printf(" ocr_type=%d\n",ocr_type); -+*/ -+#ifdef USE_NLS -+ setlocale (LC_ALL, ""); -+ bindtextdomain (PACKAGE, LOCALEDIR); -+ textdomain (PACKAGE); -+#endif -+ /* willus mod, 11-24-16 */ -+ /* Tesseract needs "C" locale to correctly parse all data .traineddata files. */ -+/* -+printf("locale='%s'\n",setlocale(LC_ALL,NULL)); -+printf("ctype='%s'\n",setlocale(LC_CTYPE,NULL)); -+printf("numeric='%s'\n",setlocale(LC_NUMERIC,NULL)); -+*/ -+ strncpy(original_locale,setlocale(LC_ALL,NULL),255); -+ original_locale[255]='\0'; -+/* -+printf("original_locale='%s'\n",original_locale); -+*/ -+ setlocale(LC_ALL,"C"); -+/* -+printf("new locale='%s'\n",setlocale(LC_ALL,NULL)); -+printf("new ctype='%s'\n",setlocale(LC_CTYPE,NULL)); -+printf("new numeric='%s'\n",setlocale(LC_NUMERIC,NULL)); -+*/ -+ // fprintf(stderr, "tesseract %s\n", tesseract::TessBaseAPI::Version()); -+ // Make the order of args a bit more forgiving than it used to be. -+ const char* lang = "eng"; -+ tesseract::PageSegMode pagesegmode = tesseract::PSM_SINGLE_BLOCK; -+ if (language!=NULL && language[0]!='\0') -+ lang = language; -+ /* -+ if (output == NULL) -+ { -+ fprintf(stderr, _("Usage:%s imagename outputbase [-l lang] " -+ "[-psm pagesegmode] [configfile...]\n"), argv[0]); -+ fprintf(stderr, -+ _("pagesegmode values are:\n" -+ "0 = Orientation and script detection (OSD) only.\n" -+ "1 = Automatic page segmentation with OSD.\n" -+ "2 = Automatic page segmentation, but no OSD, or OCR\n" -+ "3 = Fully automatic page segmentation, but no OSD. (Default)\n" -+ "4 = Assume a single column of text of variable sizes.\n" -+ "5 = Assume a single uniform block of vertically aligned text.\n" -+ "6 = Assume a single uniform block of text.\n" -+ "7 = Treat the image as a single text line.\n" -+ "8 = Treat the image as a single word.\n" -+ "9 = Treat the image as a single word in a circle.\n" -+ "10 = Treat the image as a single character.\n")); -+ fprintf(stderr, _("-l lang and/or -psm pagesegmode must occur before any" -+ "configfile.\n")); -+ exit(1); -+ } -+ */ -+/* -+printf("SSE = %s\n",SIMDDetect::IsSSEAvailable() ? "AVAILABLE" : "NOT AVAILABLE"); -+printf("AVX = %s\n",SIMDDetect::IsAVXAvailable() ? "AVAILABLE" : "NOT AVAILABLE"); -+*/ -+/* -+v4.00 loads either TESSERACT enginer, LSTM engine, or both. No CUBE. -+*/ -+ ocr_type=0; /* Ignore specified and use default */ -+ api->SetOutputName(NULL); -+ (*status)=api->Init(datapath,lang, -+ ocr_type==0 ? tesseract::OEM_DEFAULT : -+ (ocr_type==1 ? tesseract::OEM_TESSERACT_ONLY : -+ (ocr_type==2 ? tesseract::OEM_LSTM_ONLY : -+ (tesseract::OEM_TESSERACT_LSTM_COMBINED)))); -+ if ((*status)!=0) -+ { -+ /* willus mod, 11-24-16 */ -+ setlocale(LC_ALL,original_locale); -+ api->End(); -+ delete api; -+ return(NULL); -+ } -+ /* -+ api.Init("tesscapi",lang,tesseract::OEM_DEFAULT, -+ &(argv[arg]), argc - arg, NULL, NULL, false); -+ */ -+ // We have 2 possible sources of pagesegmode: a config file and -+ // the command line. For backwards compatability reasons, the -+ // default in tesseract is tesseract::PSM_SINGLE_BLOCK, but the -+ // default for this program is tesseract::PSM_AUTO. We will let -+ // the config file take priority, so the command-line default -+ // can take priority over the tesseract default, so we use the -+ // value from the command line only if the retrieved mode -+ // is still tesseract::PSM_SINGLE_BLOCK, indicating no change -+ // in any config file. Therefore the only way to force -+ // tesseract::PSM_SINGLE_BLOCK is from the command line. -+ // It would be simpler if we could set the value before Init, -+ // but that doesn't work. -+ if (api->GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK) -+ api->SetPageSegMode(pagesegmode); -+ -+ /* -+ ** Initialization message -+ */ -+ { -+ char istr[1024]; -+ int sse,avx; -+ -+// printf("tessedit_ocr_engine_mode = %d\n",tessedit_ocr_engine_mode); -+ sprintf(istr,"%s",api->Version()); -+ sse=tesseract::SIMDDetect::IsSSEAvailable(); -+ avx=tesseract::SIMDDetect::IsAVXAvailable(); -+ if (sse || avx) -+ sprintf(&istr[strlen(istr)]," [%s]",sse&&avx?"SSE+AVX":(sse?"SSE":"AVX")); -+ sprintf(&istr[strlen(istr)],"\n Tesseract data folder = '%s'",datapath==NULL?getenv("TESSDATA_PREFIX"):datapath); -+ strcat(istr,"\n Tesseract languages: "); -+ GenericVector<STRING> languages; -+ api->GetLoadedLanguagesAsVector(&languages); -+/* -+printf("OEM=%d\n",api->oem()); -+printf("Langs='%s'\n",api->GetInitLanguagesAsString()); -+printf("AnyTessLang()=%d\n",(int)api->tesseract()->AnyTessLang()); -+printf("AnyLSTMLang()=%d\n",(int)api->tesseract()->AnyLSTMLang()); -+printf("num_sub_langs()=%d\n",api->tesseract()->num_sub_langs()); -+printf("languages.size()=%d\n",(int)languages.size()); -+*/ -+ -+ for (int i=0;i<=api->tesseract()->num_sub_langs();i++) -+ { -+ tesseract::Tesseract *lang1; -+ int eng; -+ lang1 = i==0 ? api->tesseract() : api->tesseract()->get_sub_lang(i-1); -+ eng=(int)lang1->tessedit_ocr_engine_mode; -+ sprintf(&istr[strlen(istr)],"%s%s [%s]",i==0?"":", ",lang1->lang.string(), -+ eng==2?"LSTM+Tess":(eng==1?"LSTM":"Tess")); -+ } -+/* -+printf("%d. '%s'\n",i+1,languages[i].string()); -+printf(" sublang[%d].oem_engine = %d\n",i+1,(int)api->tesseract()->get_sub_lang(i)->tessedit_ocr_engine_mode); -+*/ -+ -+ /* -+ if (ocr_type==0 || ocr_type==3) -+ sprintf(&istr[strlen(istr)],"[LSTM+] (lang="); -+ else if (ocr_type==2) -+ sprintf(&istr[strlen(istr)],"[LSTM] (lang="); -+ strncpy(&istr[strlen(istr)],language,253-strlen(istr)); -+ istr[253]='\0'; -+ strcat(istr,")"); -+ */ -+ if (out!=NULL) -+ fprintf(out,"%s\n",istr); -+ if (initstr!=NULL) -+ { -+ strncpy(initstr,istr,maxlen-1); -+ initstr[maxlen-1]='\0'; -+ } -+ } -+ -+ -+ /* Turn off LSTM debugging output */ -+ api->SetVariable("lstm_debug_level","0"); -+#if (WILLUSDEBUG & 1) -+ api->SetVariable("lstm_debug_level","9"); -+ api->SetVariable("paragraph_debug_level","9"); -+ api->SetVariable("tessdata_manager_debug_level","9"); -+ api->SetVariable("tosp_debug_level","9"); -+ api->SetVariable("wordrec_debug_level","9"); -+ api->SetVariable("segsearch_debug_level","9"); -+#endif -+ /* willus mod, 11-24-16 */ -+ setlocale(LC_ALL,original_locale); -+ return((void *)api); -+ } -+ -+ -+int tess_capi_get_ocr(void *vapi,PIX *pix,char *outstr,int maxlen,int segmode,FILE *out) -+ -+ { -+ tesseract::TessBaseAPI *api; -+ static int old_segmode=-1; -+ -+ api=(tesseract::TessBaseAPI *)vapi; -+ if (old_segmode != segmode) -+ { -+ old_segmode=segmode; -+ api->SetPageSegMode((tesseract::PageSegMode)segmode); -+ } -+ if (!api->ProcessPage(pix,0,NULL,NULL,0,NULL)) -+ { -+ /* pixDestroy(&pix); */ -+ if (out!=NULL) -+ fprintf(out,"tesscapi: Error during bitmap processing.\n"); -+ api->Clear(); -+ return(-1); -+ } -+ strncpy(outstr,api->GetUTF8Text(),maxlen-1); -+ outstr[maxlen-1]='\0'; -+ api->Clear(); -+ return(0); -+ } -+ -+ -+int tess_capi_get_ocr_multiword(void *vapi,PIX *pix,int segmode, -+ int **left,int **top,int **right,int **bottom, -+ int **ybase,char **text,int *nw, -+ FILE *out) -+ -+ { -+ tesseract::TessBaseAPI *api; -+ static int old_segmode=-1; -+ -+ api=(tesseract::TessBaseAPI *)vapi; -+ if (old_segmode != segmode) -+ { -+ old_segmode=segmode; -+ api->SetPageSegMode((tesseract::PageSegMode)segmode); -+ } -+ if (!api->ProcessPage(pix,0,NULL,NULL,0,NULL)) -+ { -+ if (out!=NULL) -+ fprintf(out,"tesscapi: Error during bitmap processing.\n"); -+ api->Clear(); -+ (*nw)=0; -+ return(-1); -+ } -+ (*nw)=api->GetOCRWords(left,top,right,bottom,ybase,text); -+ api->Clear(); -+ return(0); -+ } -+ -+ -+void tess_capi_end(void *vapi) -+ -+ { -+ tesseract::TessBaseAPI *api; -+ -+ if (vapi==NULL) -+ return; -+ api=(tesseract::TessBaseAPI *)vapi; -+ api->End(); -+ delete api; -+ } -diff --git a/src/api/tesseract.h b/src/api/tesseract.h -new file mode 100644 -index 00000000..575948cc ---- /dev/null -+++ b/src/api/tesseract.h -@@ -0,0 +1,29 @@ -+/* -+** Willus.com's Tesseract C Wrappers -+** -+** 6-8-12 -+** -+*/ -+ -+#ifndef _TESSERACT_H_ -+#define _TESSERACT_H_ -+ -+//#include <leptonica.h> -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+void *tess_capi_init(char *datapath,char *language,int ocr_type,FILE *out, -+ char *initstr,int maxlen,int *status); -+int tess_capi_get_ocr(void *api,PIX *pix,char *outstr,int maxlen,int segmode,FILE *out); -+int tess_capi_get_ocr_multiword(void *vapi,PIX *pix,int segmode, -+ int **left,int **top,int **right,int **bottom, -+ int **ybase,char **text,int *nw, -+ FILE *out); -+void tess_capi_end(void *api); -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif -diff --git a/src/ccmain/tessedit.cpp b/src/ccmain/tessedit.cpp -index 17f0951b..7af94ee2 100644 ---- a/src/ccmain/tessedit.cpp -+++ b/src/ccmain/tessedit.cpp -@@ -101,6 +101,10 @@ bool Tesseract::init_tesseract_lang_data( - " to your \"tessdata\" directory.\n"); - return false; - } -+ /* willus mod */ -+ TFile fp; -+ strncpy(fp.tfile_filename,tessdata_path.string(),511); -+ fp.tfile_filename[511]='\0'; - #ifndef DISABLED_LEGACY_ENGINE - if (oem == OEM_DEFAULT) { - // Set the engine mode from availability, which can then be overridden by -@@ -116,7 +120,6 @@ bool Tesseract::init_tesseract_lang_data( - #endif // ndef DISABLED_LEGACY_ENGINE - - // If a language specific config file (lang.config) exists, load it in. -- TFile fp; - if (mgr->GetComponent(TESSDATA_LANG_CONFIG, &fp)) { - ParamUtils::ReadParamsFromFp(SET_PARAM_CONSTRAINT_NONE, &fp, - this->params()); -diff --git a/src/ccutil/ccutil.h b/src/ccutil/ccutil.h -index 71e89c60..bdeccc14 100644 ---- a/src/ccutil/ccutil.h -+++ b/src/ccutil/ccutil.h -@@ -80,6 +80,13 @@ class CCUtil { - // Member parameters. - // These have to be declared and initialized after params_ member, since - // params_ should be initialized before parameters are added to it. -+/* willus mod */ -+/* -+ #ifdef _WIN32 -+ STRING_VAR_H(tessedit_module_name, WINDLLNAME, -+ "Module colocated with tessdata dir"); -+ #endif -+*/ - INT_VAR_H(ambigs_debug_level, 0, "Debug level for unichar ambiguities"); - BOOL_VAR_H(use_definite_ambigs_for_classifier, false, - "Use definite ambiguities when running character classifier"); -diff --git a/src/ccutil/genericvector.h b/src/ccutil/genericvector.h -index 3556d153..3a5e8662 100644 ---- a/src/ccutil/genericvector.h -+++ b/src/ccutil/genericvector.h -@@ -382,7 +382,26 @@ inline bool LoadDataFromFile(const char* filename, GenericVector<char>* data) { - // reserve an extra byte in case caller wants to append a '\0' character - data->reserve(size + 1); - data->resize_no_init(size); -- result = static_cast<long>(fread(&(*data)[0], 1, size, fp)) == size; -+ /* willus mod Dec 2018--weird issue with Win XP and MinGW gcc 7.3.0 */ -+ /* Can't read entire file at once -- need to break up into smaller blocksize reads */ -+ { -+ int frs,n; -+ int blocksize; -+ blocksize=1024*1024; -+ for (n=0;1;) -+ { -+ int bs; -+ bs= size-n > blocksize ? blocksize : size-n; -+ frs=(int)fread(&(*data)[n],1,bs,fp); -+ n+=frs; -+ if (frs<bs || bs<blocksize || n>=size) -+ break; -+ } -+ result = static_cast<long>((long)n==size); -+ } -+ /* -+ result = static_cast<long>(fread(&(*data)[0], 1, size, fp)) == size; -+ */ - } - fclose(fp); - } -diff --git a/src/ccutil/mainblk.cpp b/src/ccutil/mainblk.cpp -index 52b04b04..80b26044 100644 ---- a/src/ccutil/mainblk.cpp -+++ b/src/ccutil/mainblk.cpp -@@ -55,8 +55,22 @@ void CCUtil::main_setup(const char *argv0, const char *basename) { - #if defined(_WIN32) - } else if (datadir == nullptr || _access(datadir.string(), 0) != 0) { - /* Look for tessdata in directory of executable. */ -+ /* -+ char drive[_MAX_DRIVE]; -+ char dir[_MAX_DIR]; -+ */ - char path[_MAX_PATH]; -- DWORD length = GetModuleFileName(nullptr, path, sizeof(path)); -+ int i; -+ /* DWORD length = */ GetModuleFileName(nullptr, path, sizeof(path)); -+ /* willus mod--avoid _splitpath_s -- not in XP */ -+ for (i=strlen(path)-1;i>=0 && path[i]!='/' && path[i]!='\\';i--); -+ if (i>=0) -+ { -+ path[i]='\0'; -+ datadir=path; -+ datadir += "/tessdata"; -+ } -+ /* - if (length > 0 && length < sizeof(path)) { - char* separator = std::strrchr(path, '\\'); - if (separator != nullptr) { -@@ -65,6 +79,7 @@ void CCUtil::main_setup(const char *argv0, const char *basename) { - datadir += "/tessdata"; - } - } -+ */ - #endif /* _WIN32 */ - #if defined(TESSDATA_PREFIX) - } else { -diff --git a/src/ccutil/params.cpp b/src/ccutil/params.cpp -index 00bf2563..486c5ce0 100644 ---- a/src/ccutil/params.cpp -+++ b/src/ccutil/params.cpp -@@ -82,7 +82,8 @@ bool ParamUtils::ReadParamsFromFp(SetParamConstraint constraint, TFile *fp, - - if (!foundit) { - anyerr = true; // had an error -- tprintf("Warning: Parameter not found: %s\n", line); -+ /* willus mod */ -+ tprintf("Tesseract warning: Parameter %s not found in file %s.\n",line,fp->tfile_filename); - } - } - } -diff --git a/src/ccutil/serialis.cpp b/src/ccutil/serialis.cpp -index 7def011f..6107a494 100644 ---- a/src/ccutil/serialis.cpp -+++ b/src/ccutil/serialis.cpp -@@ -201,6 +201,9 @@ bool TFile::Open(const STRING& filename, FileReader reader) { - offset_ = 0; - is_writing_ = false; - swap_ = false; -+ /* willus mod */ -+ strncpy(tfile_filename,filename.string(),511); -+ tfile_filename[511]='\0'; - if (reader == nullptr) - return LoadDataFromFile(filename, data_); - else -diff --git a/src/ccutil/serialis.h b/src/ccutil/serialis.h -index 095b9227..4cc8251e 100644 ---- a/src/ccutil/serialis.h -+++ b/src/ccutil/serialis.h -@@ -77,6 +77,8 @@ class TFile { - public: - TFile(); - ~TFile(); -+ /* willus mod */ -+ char tfile_filename[512]; - - // All the Open methods load the whole file into memory for reading. - // Opens a file with a supplied reader, or nullptr to use the default. -diff --git a/src/lstm/input.cpp b/src/lstm/input.cpp -index 73b584b3..0b0b54c3 100644 ---- a/src/lstm/input.cpp -+++ b/src/lstm/input.cpp -@@ -93,8 +93,11 @@ Pix* Input::PrepareLSTMInputs(const ImageData& image_data, - return nullptr; - } - if (width < min_width || height < min_width) { -+ /* willus mod -- no warning */ -+ /* - tprintf("Image too small to scale!! (%dx%d vs min width of %d)\n", width, - height, min_width); -+ */ - pixDestroy(&pix); - return nullptr; - } --- -2.22.0 - |