Commit 7fe5248d for tesseract
commit 7fe5248d91bf13cd6f9a3087c17e8c26e3aab416
Author: Stefan Weil <sw@weilnetz.de>
Date: Wed Apr 7 18:41:34 2021 +0200
Format public API files with modified rules for clang-format
Disable clang-format for version.h.in because it destroys
several macro definitions.
Signed-off-by: Stefan Weil <sw@weilnetz.de>
diff --git a/include/tesseract/baseapi.h b/include/tesseract/baseapi.h
index 96f5987c..c30c3c46 100644
--- a/include/tesseract/baseapi.h
+++ b/include/tesseract/baseapi.h
@@ -32,8 +32,8 @@
#include <tesseract/version.h>
#include <cstdio>
-#include <vector> // for std::vector
#include <tuple> // for std::tuple
+#include <vector> // for std::vector
struct Pix;
struct Pixa;
@@ -63,9 +63,10 @@ class Tesseract;
// Returns false on failure.
using FileReader = bool (*)(const char *filename, std::vector<char> *data);
-using DictFunc = int (Dict::*)(void *, const UNICHARSET &, UNICHAR_ID, bool) const;
-using ProbabilityInContextFunc = double (Dict::*)(const char *, const char *, int, const char *,
- int);
+using DictFunc = int (Dict::*)(void *, const UNICHARSET &, UNICHAR_ID,
+ bool) const;
+using ProbabilityInContextFunc = double (Dict::*)(const char *, const char *,
+ int, const char *, int);
/**
* Base class for all tesseract APIs.
@@ -196,21 +197,25 @@ public:
* If set_only_non_debug_params is true, only params that do not contain
* "debug" in the name will be set.
*/
- int Init(const char *datapath, const char *language, OcrEngineMode mode, char **configs,
- int configs_size, const std::vector<std::string> *vars_vec,
- const std::vector<std::string> *vars_values, bool set_only_non_debug_params);
+ int Init(const char *datapath, const char *language, OcrEngineMode mode,
+ char **configs, int configs_size,
+ const std::vector<std::string> *vars_vec,
+ const std::vector<std::string> *vars_values,
+ bool set_only_non_debug_params);
int Init(const char *datapath, const char *language, OcrEngineMode oem) {
return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false);
}
int Init(const char *datapath, const char *language) {
- return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr, false);
+ return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr,
+ false);
}
// In-memory version reads the traineddata file directly from the given
// data[data_size] array, and/or reads data via a FileReader.
- int Init(const char *data, int data_size, const char *language, OcrEngineMode mode,
- char **configs, int configs_size, const std::vector<std::string> *vars_vec,
- const std::vector<std::string> *vars_values, bool set_only_non_debug_params,
- FileReader reader);
+ int Init(const char *data, int data_size, const char *language,
+ OcrEngineMode mode, char **configs, int configs_size,
+ const std::vector<std::string> *vars_vec,
+ const std::vector<std::string> *vars_values,
+ bool set_only_non_debug_params, FileReader reader);
/**
* Returns the languages string used in the last valid initialization.
@@ -285,8 +290,9 @@ public:
* For advanced uses, use SetImage, (optionally) SetRectangle, Recognize,
* and one or more of the Get*Text functions below.
*/
- char *TesseractRect(const unsigned char *imagedata, int bytes_per_pixel, int bytes_per_line,
- int left, int top, int width, int height);
+ char *TesseractRect(const unsigned char *imagedata, int bytes_per_pixel,
+ int bytes_per_line, int left, int top, int width,
+ int height);
/**
* Call between pages or documents etc to free up memory and forget
@@ -309,8 +315,8 @@ public:
* full image, so it may be followed immediately by a GetUTF8Text, and it
* will automatically perform recognition.
*/
- void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel,
- int bytes_per_line);
+ void SetImage(const unsigned char *imagedata, int width, int height,
+ int bytes_per_pixel, int bytes_per_line);
/**
* Provide an image for Tesseract to recognize. As with SetImage above,
@@ -360,7 +366,8 @@ public:
* nullptr, the paragraph-id of each line within its block is also returned as
* an array of one element per line. delete [] after use.
*/
- Boxa *GetTextlines(bool raw_image, int raw_padding, Pixa **pixa, int **blockids, int **paraids);
+ Boxa *GetTextlines(bool raw_image, int raw_padding, Pixa **pixa,
+ int **blockids, int **paraids);
/*
Helper method to extract from the thresholded image. (most common usage)
*/
@@ -407,12 +414,14 @@ public:
* extracted instead of the thresholded image and padded with raw_padding. If
* text_only is true, then only text components are returned.
*/
- Boxa *GetComponentImages(PageIteratorLevel level, bool text_only, bool raw_image, int raw_padding,
- Pixa **pixa, int **blockids, int **paraids);
+ Boxa *GetComponentImages(PageIteratorLevel level, bool text_only,
+ bool raw_image, int raw_padding, Pixa **pixa,
+ int **blockids, int **paraids);
// Helper function to get binary images with no padding (most common usage).
- Boxa *GetComponentImages(const PageIteratorLevel level, const bool text_only, Pixa **pixa,
- int **blockids) {
- return GetComponentImages(level, text_only, false, 0, pixa, blockids, nullptr);
+ Boxa *GetComponentImages(const PageIteratorLevel level, const bool text_only,
+ Pixa **pixa, int **blockids) {
+ return GetComponentImages(level, text_only, false, 0, pixa, blockids,
+ nullptr);
}
/**
@@ -476,11 +485,11 @@ public:
*
* Returns true if successful, false on error.
*/
- bool ProcessPages(const char *filename, const char *retry_config, int timeout_millisec,
- TessResultRenderer *renderer);
+ bool ProcessPages(const char *filename, const char *retry_config,
+ int timeout_millisec, TessResultRenderer *renderer);
// Does the real work of ProcessPages.
- bool ProcessPagesInternal(const char *filename, const char *retry_config, int timeout_millisec,
- TessResultRenderer *renderer);
+ bool ProcessPagesInternal(const char *filename, const char *retry_config,
+ int timeout_millisec, TessResultRenderer *renderer);
/**
* Turn a single image into symbolic text.
@@ -491,8 +500,9 @@ public:
*
* See ProcessPages for desciptions of other parameters.
*/
- bool ProcessPage(Pix *pix, int page_index, const char *filename, const char *retry_config,
- int timeout_millisec, TessResultRenderer *renderer);
+ bool ProcessPage(Pix *pix, int page_index, const char *filename,
+ const char *retry_config, int timeout_millisec,
+ TessResultRenderer *renderer);
/**
* Get a reading-order iterator to the results of LayoutAnalysis and/or
@@ -519,27 +529,30 @@ public:
* as UTF8 and must be freed with the delete [] operator.
*/
char *GetUTF8Text();
-
+
size_t GetNumberOfTables();
-
+
/// Return the i-th table bounding box coordinates
///
- ///Gives the (top_left.x, top_left.y, bottom_right.x, bottom_right.y)
+ /// Gives the (top_left.x, top_left.y, bottom_right.x, bottom_right.y)
/// coordinates of the i-th table.
- std::tuple<int,int,int,int> GetTableBoundingBox(
- unsigned i///< Index of the table, for upper limit \see GetNumberOfTables()
+ std::tuple<int, int, int, int> GetTableBoundingBox(
+ unsigned
+ i ///< Index of the table, for upper limit \see GetNumberOfTables()
);
-
+
/// Get bounding boxes of the rows of a table
/// return values are (top_left.x, top_left.y, bottom_right.x, bottom_right.y)
- std::vector<std::tuple<int,int,int,int> > GetTableRows(
- unsigned i///< Index of the table, for upper limit \see GetNumberOfTables()
+ std::vector<std::tuple<int, int, int, int> > GetTableRows(
+ unsigned
+ i ///< Index of the table, for upper limit \see GetNumberOfTables()
);
-
+
/// Get bounding boxes of the cols of a table
/// return values are (top_left.x, top_left.y, bottom_right.x, bottom_right.y)
- std::vector<std::tuple<int,int,int,int> > GetTableCols(
- unsigned i///< Index of the table, for upper limit \see GetNumberOfTables()
+ std::vector<std::tuple<int, int, int, int> > GetTableCols(
+ unsigned
+ i ///< Index of the table, for upper limit \see GetNumberOfTables()
);
/**
@@ -621,8 +634,8 @@ public:
* script_conf is confidence level in the script
* Returns true on success and writes values to each parameter as an output
*/
- bool DetectOrientationScript(int *orient_deg, float *orient_conf, const char **script_name,
- float *script_conf);
+ bool DetectOrientationScript(int *orient_deg, float *orient_conf,
+ const char **script_name, float *script_conf);
/**
* The recognized text is returned as a char* which is coded
@@ -710,7 +723,8 @@ public:
* Return text orientation of each block as determined by an earlier run
* of layout analysis.
*/
- void GetBlockTextOrientations(int **block_orientation, bool **vertical_writing);
+ void GetBlockTextOrientations(int **block_orientation,
+ bool **vertical_writing);
/** This method returns the string form of the specified unichar. */
const char *GetUnichar(int unichar_id);
@@ -805,13 +819,16 @@ protected:
private:
// A list of image filenames gets special consideration
- bool ProcessPagesFileList(FILE *fp, std::string *buf, const char *retry_config,
- int timeout_millisec, TessResultRenderer *renderer,
+ bool ProcessPagesFileList(FILE *fp, std::string *buf,
+ const char *retry_config, int timeout_millisec,
+ TessResultRenderer *renderer,
int tessedit_page_number);
// TIFF supports multipage so gets special consideration.
- bool ProcessPagesMultipageTiff(const unsigned char *data, size_t size, const char *filename,
- const char *retry_config, int timeout_millisec,
- TessResultRenderer *renderer, int tessedit_page_number);
+ bool ProcessPagesMultipageTiff(const unsigned char *data, size_t size,
+ const char *filename, const char *retry_config,
+ int timeout_millisec,
+ TessResultRenderer *renderer,
+ int tessedit_page_number);
}; // class TessBaseAPI.
/** Escape a char string - remove &<>"' with HTML codes. */
diff --git a/include/tesseract/capi.h b/include/tesseract/capi.h
index a5003b14..eae114d4 100644
--- a/include/tesseract/capi.h
+++ b/include/tesseract/capi.h
@@ -138,7 +138,8 @@ typedef struct ETEXT_DESC ETEXT_DESC;
#endif
typedef bool (*TessCancelFunc)(void *cancel_this, int words);
-typedef bool (*TessProgressFunc)(ETEXT_DESC *ths, int left, int right, int top, int bottom);
+typedef bool (*TessProgressFunc)(ETEXT_DESC *ths, int left, int right, int top,
+ int bottom);
struct Pix;
struct Boxa;
@@ -154,21 +155,28 @@ TESS_API void TessDeleteIntArray(const int *arr);
/* Renderer API */
TESS_API TessResultRenderer *TessTextRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessHOcrRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessHOcrRendererCreate2(const char *outputbase, BOOL font_info);
+TESS_API TessResultRenderer *TessHOcrRendererCreate2(const char *outputbase,
+ BOOL font_info);
TESS_API TessResultRenderer *TessAltoRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessTsvRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessPDFRendererCreate(const char *outputbase, const char *datadir,
+TESS_API TessResultRenderer *TessPDFRendererCreate(const char *outputbase,
+ const char *datadir,
BOOL textonly);
TESS_API TessResultRenderer *TessUnlvRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessBoxTextRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessLSTMBoxRendererCreate(const char *outputbase);
-TESS_API TessResultRenderer *TessWordStrBoxRendererCreate(const char *outputbase);
+TESS_API TessResultRenderer *TessWordStrBoxRendererCreate(
+ const char *outputbase);
TESS_API void TessDeleteResultRenderer(TessResultRenderer *renderer);
-TESS_API void TessResultRendererInsert(TessResultRenderer *renderer, TessResultRenderer *next);
-TESS_API TessResultRenderer *TessResultRendererNext(TessResultRenderer *renderer);
-TESS_API BOOL TessResultRendererBeginDocument(TessResultRenderer *renderer, const char *title);
-TESS_API BOOL TessResultRendererAddImage(TessResultRenderer *renderer, TessBaseAPI *api);
+TESS_API void TessResultRendererInsert(TessResultRenderer *renderer,
+ TessResultRenderer *next);
+TESS_API TessResultRenderer *TessResultRendererNext(
+ TessResultRenderer *renderer);
+TESS_API BOOL TessResultRendererBeginDocument(TessResultRenderer *renderer,
+ const char *title);
+TESS_API BOOL TessResultRendererAddImage(TessResultRenderer *renderer,
+ TessBaseAPI *api);
TESS_API BOOL TessResultRendererEndDocument(TessResultRenderer *renderer);
TESS_API const char *TessResultRendererExtention(TessResultRenderer *renderer);
@@ -193,91 +201,122 @@ TESS_API const char *TessBaseAPIGetDatapath(TessBaseAPI *handle);
TESS_API void TessBaseAPISetOutputName(TessBaseAPI *handle, const char *name);
-TESS_API BOOL TessBaseAPISetVariable(TessBaseAPI *handle, const char *name, const char *value);
-TESS_API BOOL TessBaseAPISetDebugVariable(TessBaseAPI *handle, const char *name, const char *value);
+TESS_API BOOL TessBaseAPISetVariable(TessBaseAPI *handle, const char *name,
+ const char *value);
+TESS_API BOOL TessBaseAPISetDebugVariable(TessBaseAPI *handle, const char *name,
+ const char *value);
-TESS_API BOOL TessBaseAPIGetIntVariable(const TessBaseAPI *handle, const char *name, int *value);
-TESS_API BOOL TessBaseAPIGetBoolVariable(const TessBaseAPI *handle, const char *name, BOOL *value);
-TESS_API BOOL TessBaseAPIGetDoubleVariable(const TessBaseAPI *handle, const char *name,
- double *value);
-TESS_API const char *TessBaseAPIGetStringVariable(const TessBaseAPI *handle, const char *name);
+TESS_API BOOL TessBaseAPIGetIntVariable(const TessBaseAPI *handle,
+ const char *name, int *value);
+TESS_API BOOL TessBaseAPIGetBoolVariable(const TessBaseAPI *handle,
+ const char *name, BOOL *value);
+TESS_API BOOL TessBaseAPIGetDoubleVariable(const TessBaseAPI *handle,
+ const char *name, double *value);
+TESS_API const char *TessBaseAPIGetStringVariable(const TessBaseAPI *handle,
+ const char *name);
TESS_API void TessBaseAPIPrintVariables(const TessBaseAPI *handle, FILE *fp);
-TESS_API BOOL TessBaseAPIPrintVariablesToFile(const TessBaseAPI *handle, const char *filename);
-
-TESS_API int TessBaseAPIInit1(TessBaseAPI *handle, const char *datapath, const char *language,
- TessOcrEngineMode oem, char **configs, int configs_size);
-TESS_API int TessBaseAPIInit2(TessBaseAPI *handle, const char *datapath, const char *language,
- TessOcrEngineMode oem);
-TESS_API int TessBaseAPIInit3(TessBaseAPI *handle, const char *datapath, const char *language);
-
-TESS_API int TessBaseAPIInit4(TessBaseAPI *handle, const char *datapath, const char *language,
- TessOcrEngineMode mode, char **configs, int configs_size,
- char **vars_vec, char **vars_values, size_t vars_vec_size,
+TESS_API BOOL TessBaseAPIPrintVariablesToFile(const TessBaseAPI *handle,
+ const char *filename);
+
+TESS_API int TessBaseAPIInit1(TessBaseAPI *handle, const char *datapath,
+ const char *language, TessOcrEngineMode oem,
+ char **configs, int configs_size);
+TESS_API int TessBaseAPIInit2(TessBaseAPI *handle, const char *datapath,
+ const char *language, TessOcrEngineMode oem);
+TESS_API int TessBaseAPIInit3(TessBaseAPI *handle, const char *datapath,
+ const char *language);
+
+TESS_API int TessBaseAPIInit4(TessBaseAPI *handle, const char *datapath,
+ const char *language, TessOcrEngineMode mode,
+ char **configs, int configs_size, char **vars_vec,
+ char **vars_values, size_t vars_vec_size,
BOOL set_only_non_debug_params);
-TESS_API const char *TessBaseAPIGetInitLanguagesAsString(const TessBaseAPI *handle);
-TESS_API char **TessBaseAPIGetLoadedLanguagesAsVector(const TessBaseAPI *handle);
-TESS_API char **TessBaseAPIGetAvailableLanguagesAsVector(const TessBaseAPI *handle);
+TESS_API const char *TessBaseAPIGetInitLanguagesAsString(
+ const TessBaseAPI *handle);
+TESS_API char **TessBaseAPIGetLoadedLanguagesAsVector(
+ const TessBaseAPI *handle);
+TESS_API char **TessBaseAPIGetAvailableLanguagesAsVector(
+ const TessBaseAPI *handle);
TESS_API int TessBaseAPIInitLangMod(TessBaseAPI *handle, const char *datapath,
const char *language);
TESS_API void TessBaseAPIInitForAnalysePage(TessBaseAPI *handle);
-TESS_API void TessBaseAPIReadConfigFile(TessBaseAPI *handle, const char *filename);
-TESS_API void TessBaseAPIReadDebugConfigFile(TessBaseAPI *handle, const char *filename);
+TESS_API void TessBaseAPIReadConfigFile(TessBaseAPI *handle,
+ const char *filename);
+TESS_API void TessBaseAPIReadDebugConfigFile(TessBaseAPI *handle,
+ const char *filename);
-TESS_API void TessBaseAPISetPageSegMode(TessBaseAPI *handle, TessPageSegMode mode);
+TESS_API void TessBaseAPISetPageSegMode(TessBaseAPI *handle,
+ TessPageSegMode mode);
TESS_API TessPageSegMode TessBaseAPIGetPageSegMode(const TessBaseAPI *handle);
-TESS_API char *TessBaseAPIRect(TessBaseAPI *handle, const unsigned char *imagedata,
- int bytes_per_pixel, int bytes_per_line, int left, int top,
- int width, int height);
+TESS_API char *TessBaseAPIRect(TessBaseAPI *handle,
+ const unsigned char *imagedata,
+ int bytes_per_pixel, int bytes_per_line,
+ int left, int top, int width, int height);
TESS_API void TessBaseAPIClearAdaptiveClassifier(TessBaseAPI *handle);
-TESS_API void TessBaseAPISetImage(TessBaseAPI *handle, const unsigned char *imagedata, int width,
- int height, int bytes_per_pixel, int bytes_per_line);
+TESS_API void TessBaseAPISetImage(TessBaseAPI *handle,
+ const unsigned char *imagedata, int width,
+ int height, int bytes_per_pixel,
+ int bytes_per_line);
TESS_API void TessBaseAPISetImage2(TessBaseAPI *handle, struct Pix *pix);
TESS_API void TessBaseAPISetSourceResolution(TessBaseAPI *handle, int ppi);
-TESS_API void TessBaseAPISetRectangle(TessBaseAPI *handle, int left, int top, int width,
- int height);
+TESS_API void TessBaseAPISetRectangle(TessBaseAPI *handle, int left, int top,
+ int width, int height);
TESS_API struct Pix *TessBaseAPIGetThresholdedImage(TessBaseAPI *handle);
-TESS_API struct Boxa *TessBaseAPIGetRegions(TessBaseAPI *handle, struct Pixa **pixa);
-TESS_API struct Boxa *TessBaseAPIGetTextlines(TessBaseAPI *handle, struct Pixa **pixa,
+TESS_API struct Boxa *TessBaseAPIGetRegions(TessBaseAPI *handle,
+ struct Pixa **pixa);
+TESS_API struct Boxa *TessBaseAPIGetTextlines(TessBaseAPI *handle,
+ struct Pixa **pixa,
int **blockids);
-TESS_API struct Boxa *TessBaseAPIGetTextlines1(TessBaseAPI *handle, BOOL raw_image, int raw_padding,
- struct Pixa **pixa, int **blockids, int **paraids);
-TESS_API struct Boxa *TessBaseAPIGetStrips(TessBaseAPI *handle, struct Pixa **pixa, int **blockids);
-TESS_API struct Boxa *TessBaseAPIGetWords(TessBaseAPI *handle, struct Pixa **pixa);
-TESS_API struct Boxa *TessBaseAPIGetConnectedComponents(TessBaseAPI *handle, struct Pixa **cc);
+TESS_API struct Boxa *TessBaseAPIGetTextlines1(TessBaseAPI *handle,
+ BOOL raw_image, int raw_padding,
+ struct Pixa **pixa,
+ int **blockids, int **paraids);
+TESS_API struct Boxa *TessBaseAPIGetStrips(TessBaseAPI *handle,
+ struct Pixa **pixa, int **blockids);
+TESS_API struct Boxa *TessBaseAPIGetWords(TessBaseAPI *handle,
+ struct Pixa **pixa);
+TESS_API struct Boxa *TessBaseAPIGetConnectedComponents(TessBaseAPI *handle,
+ struct Pixa **cc);
TESS_API struct Boxa *TessBaseAPIGetComponentImages(TessBaseAPI *handle,
- TessPageIteratorLevel level, BOOL text_only,
- struct Pixa **pixa, int **blockids);
-TESS_API struct Boxa *TessBaseAPIGetComponentImages1(TessBaseAPI *handle,
- TessPageIteratorLevel level, BOOL text_only,
- BOOL raw_image, int raw_padding,
- struct Pixa **pixa, int **blockids,
- int **paraids);
-
-TESS_API int TessBaseAPIGetThresholdedImageScaleFactor(const TessBaseAPI *handle);
+ TessPageIteratorLevel level,
+ BOOL text_only,
+ struct Pixa **pixa,
+ int **blockids);
+TESS_API struct Boxa *TessBaseAPIGetComponentImages1(
+ TessBaseAPI *handle, TessPageIteratorLevel level, BOOL text_only,
+ BOOL raw_image, int raw_padding, struct Pixa **pixa, int **blockids,
+ int **paraids);
+
+TESS_API int TessBaseAPIGetThresholdedImageScaleFactor(
+ const TessBaseAPI *handle);
TESS_API TessPageIterator *TessBaseAPIAnalyseLayout(TessBaseAPI *handle);
TESS_API int TessBaseAPIRecognize(TessBaseAPI *handle, ETEXT_DESC *monitor);
TESS_API BOOL TessBaseAPIProcessPages(TessBaseAPI *handle, const char *filename,
- const char *retry_config, int timeout_millisec,
+ const char *retry_config,
+ int timeout_millisec,
TessResultRenderer *renderer);
-TESS_API BOOL TessBaseAPIProcessPage(TessBaseAPI *handle, struct Pix *pix, int page_index,
- const char *filename, const char *retry_config,
- int timeout_millisec, TessResultRenderer *renderer);
+TESS_API BOOL TessBaseAPIProcessPage(TessBaseAPI *handle, struct Pix *pix,
+ int page_index, const char *filename,
+ const char *retry_config,
+ int timeout_millisec,
+ TessResultRenderer *renderer);
TESS_API TessResultIterator *TessBaseAPIGetIterator(TessBaseAPI *handle);
-TESS_API TessMutableIterator *TessBaseAPIGetMutableIterator(TessBaseAPI *handle);
+TESS_API TessMutableIterator *TessBaseAPIGetMutableIterator(
+ TessBaseAPI *handle);
TESS_API char *TessBaseAPIGetUTF8Text(TessBaseAPI *handle);
TESS_API char *TessBaseAPIGetHOCRText(TessBaseAPI *handle, int page_number);
@@ -287,7 +326,8 @@ TESS_API char *TessBaseAPIGetTsvText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetBoxText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetLSTMBoxText(TessBaseAPI *handle, int page_number);
-TESS_API char *TessBaseAPIGetWordStrBoxText(TessBaseAPI *handle, int page_number);
+TESS_API char *TessBaseAPIGetWordStrBoxText(TessBaseAPI *handle,
+ int page_number);
TESS_API char *TessBaseAPIGetUNLVText(TessBaseAPI *handle);
TESS_API int TessBaseAPIMeanTextConf(TessBaseAPI *handle);
@@ -295,7 +335,8 @@ TESS_API int TessBaseAPIMeanTextConf(TessBaseAPI *handle);
TESS_API int *TessBaseAPIAllWordConfidences(TessBaseAPI *handle);
#ifndef DISABLED_LEGACY_ENGINE
-TESS_API BOOL TessBaseAPIAdaptToWordStr(TessBaseAPI *handle, TessPageSegMode mode,
+TESS_API BOOL TessBaseAPIAdaptToWordStr(TessBaseAPI *handle,
+ TessPageSegMode mode,
const char *wordstr);
#endif // #ifndef DISABLED_LEGACY_ENGINE
@@ -303,7 +344,8 @@ TESS_API void TessBaseAPIClear(TessBaseAPI *handle);
TESS_API void TessBaseAPIEnd(TessBaseAPI *handle);
TESS_API int TessBaseAPIIsValidWord(TessBaseAPI *handle, const char *word);
-TESS_API BOOL TessBaseAPIGetTextDirection(TessBaseAPI *handle, int *out_offset, float *out_slope);
+TESS_API BOOL TessBaseAPIGetTextDirection(TessBaseAPI *handle, int *out_offset,
+ float *out_slope);
TESS_API const char *TessBaseAPIGetUnichar(TessBaseAPI *handle, int unichar_id);
@@ -313,18 +355,22 @@ TESS_API void TessBaseAPIClearPersistentCache(TessBaseAPI *handle);
// Call TessDeleteText(*best_script_name) to free memory allocated by this
// function
-TESS_API BOOL TessBaseAPIDetectOrientationScript(TessBaseAPI *handle, int *orient_deg,
- float *orient_conf, const char **script_name,
+TESS_API BOOL TessBaseAPIDetectOrientationScript(TessBaseAPI *handle,
+ int *orient_deg,
+ float *orient_conf,
+ const char **script_name,
float *script_conf);
#endif // #ifndef DISABLED_LEGACY_ENGINE
-TESS_API void TessBaseAPISetMinOrientationMargin(TessBaseAPI *handle, double margin);
+TESS_API void TessBaseAPISetMinOrientationMargin(TessBaseAPI *handle,
+ double margin);
TESS_API int TessBaseAPINumDawgs(const TessBaseAPI *handle);
TESS_API TessOcrEngineMode TessBaseAPIOem(const TessBaseAPI *handle);
-TESS_API void TessBaseGetBlockTextOrientations(TessBaseAPI *handle, int **block_orientation,
+TESS_API void TessBaseGetBlockTextOrientations(TessBaseAPI *handle,
+ int **block_orientation,
bool **vertical_writing);
/* Page iterator */
@@ -335,7 +381,8 @@ TESS_API TessPageIterator *TessPageIteratorCopy(const TessPageIterator *handle);
TESS_API void TessPageIteratorBegin(TessPageIterator *handle);
-TESS_API BOOL TessPageIteratorNext(TessPageIterator *handle, TessPageIteratorLevel level);
+TESS_API BOOL TessPageIteratorNext(TessPageIterator *handle,
+ TessPageIteratorLevel level);
TESS_API BOOL TessPageIteratorIsAtBeginningOf(const TessPageIterator *handle,
TessPageIteratorLevel level);
@@ -345,70 +392,86 @@ TESS_API BOOL TessPageIteratorIsAtFinalElement(const TessPageIterator *handle,
TessPageIteratorLevel element);
TESS_API BOOL TessPageIteratorBoundingBox(const TessPageIterator *handle,
- TessPageIteratorLevel level, int *left, int *top,
- int *right, int *bottom);
+ TessPageIteratorLevel level,
+ int *left, int *top, int *right,
+ int *bottom);
-TESS_API TessPolyBlockType TessPageIteratorBlockType(const TessPageIterator *handle);
+TESS_API TessPolyBlockType
+TessPageIteratorBlockType(const TessPageIterator *handle);
-TESS_API struct Pix *TessPageIteratorGetBinaryImage(const TessPageIterator *handle,
- TessPageIteratorLevel level);
+TESS_API struct Pix *TessPageIteratorGetBinaryImage(
+ const TessPageIterator *handle, TessPageIteratorLevel level);
TESS_API struct Pix *TessPageIteratorGetImage(const TessPageIterator *handle,
- TessPageIteratorLevel level, int padding,
- struct Pix *original_image, int *left, int *top);
+ TessPageIteratorLevel level,
+ int padding,
+ struct Pix *original_image,
+ int *left, int *top);
-TESS_API BOOL TessPageIteratorBaseline(const TessPageIterator *handle, TessPageIteratorLevel level,
- int *x1, int *y1, int *x2, int *y2);
+TESS_API BOOL TessPageIteratorBaseline(const TessPageIterator *handle,
+ TessPageIteratorLevel level, int *x1,
+ int *y1, int *x2, int *y2);
-TESS_API void TessPageIteratorOrientation(TessPageIterator *handle, TessOrientation *orientation,
- TessWritingDirection *writing_direction,
- TessTextlineOrder *textline_order, float *deskew_angle);
+TESS_API void TessPageIteratorOrientation(
+ TessPageIterator *handle, TessOrientation *orientation,
+ TessWritingDirection *writing_direction, TessTextlineOrder *textline_order,
+ float *deskew_angle);
-TESS_API void TessPageIteratorParagraphInfo(TessPageIterator *handle,
- TessParagraphJustification *justification,
- BOOL *is_list_item, BOOL *is_crown,
- int *first_line_indent);
+TESS_API void TessPageIteratorParagraphInfo(
+ TessPageIterator *handle, TessParagraphJustification *justification,
+ BOOL *is_list_item, BOOL *is_crown, int *first_line_indent);
/* Result iterator */
TESS_API void TessResultIteratorDelete(TessResultIterator *handle);
-TESS_API TessResultIterator *TessResultIteratorCopy(const TessResultIterator *handle);
-TESS_API TessPageIterator *TessResultIteratorGetPageIterator(TessResultIterator *handle);
+TESS_API TessResultIterator *TessResultIteratorCopy(
+ const TessResultIterator *handle);
+TESS_API TessPageIterator *TessResultIteratorGetPageIterator(
+ TessResultIterator *handle);
TESS_API const TessPageIterator *TessResultIteratorGetPageIteratorConst(
const TessResultIterator *handle);
-TESS_API TessChoiceIterator *TessResultIteratorGetChoiceIterator(const TessResultIterator *handle);
+TESS_API TessChoiceIterator *TessResultIteratorGetChoiceIterator(
+ const TessResultIterator *handle);
-TESS_API BOOL TessResultIteratorNext(TessResultIterator *handle, TessPageIteratorLevel level);
+TESS_API BOOL TessResultIteratorNext(TessResultIterator *handle,
+ TessPageIteratorLevel level);
TESS_API char *TessResultIteratorGetUTF8Text(const TessResultIterator *handle,
TessPageIteratorLevel level);
TESS_API float TessResultIteratorConfidence(const TessResultIterator *handle,
TessPageIteratorLevel level);
-TESS_API const char *TessResultIteratorWordRecognitionLanguage(const TessResultIterator *handle);
-TESS_API const char *TessResultIteratorWordFontAttributes(const TessResultIterator *handle,
- BOOL *is_bold, BOOL *is_italic,
- BOOL *is_underlined, BOOL *is_monospace,
- BOOL *is_serif, BOOL *is_smallcaps,
- int *pointsize, int *font_id);
-
-TESS_API BOOL TessResultIteratorWordIsFromDictionary(const TessResultIterator *handle);
+TESS_API const char *TessResultIteratorWordRecognitionLanguage(
+ const TessResultIterator *handle);
+TESS_API const char *TessResultIteratorWordFontAttributes(
+ const TessResultIterator *handle, BOOL *is_bold, BOOL *is_italic,
+ BOOL *is_underlined, BOOL *is_monospace, BOOL *is_serif, BOOL *is_smallcaps,
+ int *pointsize, int *font_id);
+
+TESS_API BOOL
+TessResultIteratorWordIsFromDictionary(const TessResultIterator *handle);
TESS_API BOOL TessResultIteratorWordIsNumeric(const TessResultIterator *handle);
-TESS_API BOOL TessResultIteratorSymbolIsSuperscript(const TessResultIterator *handle);
-TESS_API BOOL TessResultIteratorSymbolIsSubscript(const TessResultIterator *handle);
-TESS_API BOOL TessResultIteratorSymbolIsDropcap(const TessResultIterator *handle);
+TESS_API BOOL
+TessResultIteratorSymbolIsSuperscript(const TessResultIterator *handle);
+TESS_API BOOL
+TessResultIteratorSymbolIsSubscript(const TessResultIterator *handle);
+TESS_API BOOL
+TessResultIteratorSymbolIsDropcap(const TessResultIterator *handle);
TESS_API void TessChoiceIteratorDelete(TessChoiceIterator *handle);
TESS_API BOOL TessChoiceIteratorNext(TessChoiceIterator *handle);
-TESS_API const char *TessChoiceIteratorGetUTF8Text(const TessChoiceIterator *handle);
+TESS_API const char *TessChoiceIteratorGetUTF8Text(
+ const TessChoiceIterator *handle);
TESS_API float TessChoiceIteratorConfidence(const TessChoiceIterator *handle);
/* Progress monitor */
TESS_API ETEXT_DESC *TessMonitorCreate();
TESS_API void TessMonitorDelete(ETEXT_DESC *monitor);
-TESS_API void TessMonitorSetCancelFunc(ETEXT_DESC *monitor, TessCancelFunc cancelFunc);
+TESS_API void TessMonitorSetCancelFunc(ETEXT_DESC *monitor,
+ TessCancelFunc cancelFunc);
TESS_API void TessMonitorSetCancelThis(ETEXT_DESC *monitor, void *cancelThis);
TESS_API void *TessMonitorGetCancelThis(ETEXT_DESC *monitor);
-TESS_API void TessMonitorSetProgressFunc(ETEXT_DESC *monitor, TessProgressFunc progressFunc);
+TESS_API void TessMonitorSetProgressFunc(ETEXT_DESC *monitor,
+ TessProgressFunc progressFunc);
TESS_API int TessMonitorGetProgress(ETEXT_DESC *monitor);
TESS_API void TessMonitorSetDeadlineMSecs(ETEXT_DESC *monitor, int deadline);
diff --git a/include/tesseract/ltrresultiterator.h b/include/tesseract/ltrresultiterator.h
index 2d304044..1ff45ee4 100644
--- a/include/tesseract/ltrresultiterator.h
+++ b/include/tesseract/ltrresultiterator.h
@@ -60,8 +60,9 @@ public:
// The scaled_yres indicates the effective resolution of the binary image
// that tesseract has been given by the Thresholder.
// After the constructor, Begin has already been called.
- LTRResultIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres,
- int rect_left, int rect_top, int rect_width, int rect_height);
+ LTRResultIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
+ int scaled_yres, int rect_left, int rect_top,
+ int rect_width, int rect_height);
~LTRResultIterator() override;
@@ -93,7 +94,8 @@ public:
float Confidence(PageIteratorLevel level) const;
// Returns the attributes of the current row.
- void RowAttributes(float *row_height, float *descenders, float *ascenders) const;
+ void RowAttributes(float *row_height, float *descenders,
+ float *ascenders) const;
// ============= Functions that refer to words only ============.
@@ -105,8 +107,9 @@ public:
// the iterator itself, ie rendered invalid by various members of
// TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI.
// Pointsize is returned in printers points (1/72 inch.)
- const char *WordFontAttributes(bool *is_bold, bool *is_italic, bool *is_underlined,
- bool *is_monospace, bool *is_serif, bool *is_smallcaps,
+ const char *WordFontAttributes(bool *is_bold, bool *is_italic,
+ bool *is_underlined, bool *is_monospace,
+ bool *is_serif, bool *is_smallcaps,
int *pointsize, int *font_id) const;
// Return the name of the language used to recognize this word.
diff --git a/include/tesseract/ocrclass.h b/include/tesseract/ocrclass.h
index 3832e654..46654c1e 100644
--- a/include/tesseract/ocrclass.h
+++ b/include/tesseract/ocrclass.h
@@ -105,32 +105,36 @@ public:
/** Progress monitor covers word recognition and it does not cover layout
* analysis.
* See Ray comment in https://github.com/tesseract-ocr/tesseract/pull/27 */
- int8_t more_to_come{0}; /// true if not last
- volatile int8_t ocr_alive{0}; /// ocr sets to 1, HP 0
- int8_t err_code{0}; /// for errcode use
- CANCEL_FUNC cancel{nullptr}; /// returns true to cancel
- PROGRESS_FUNC progress_callback{nullptr}; /// called whenever progress increases
- PROGRESS_FUNC2 progress_callback2; /// monitor-aware progress callback
- void *cancel_this{nullptr}; /// this or other data for cancel
+ int8_t more_to_come{0}; /// true if not last
+ volatile int8_t ocr_alive{0}; /// ocr sets to 1, HP 0
+ int8_t err_code{0}; /// for errcode use
+ CANCEL_FUNC cancel{nullptr}; /// returns true to cancel
+ PROGRESS_FUNC progress_callback{
+ nullptr}; /// called whenever progress increases
+ PROGRESS_FUNC2 progress_callback2; /// monitor-aware progress callback
+ void *cancel_this{nullptr}; /// this or other data for cancel
std::chrono::steady_clock::time_point end_time;
/// Time to stop. Expected to be set only
/// by call to set_deadline_msecs().
EANYCODE_CHAR text[1]{}; /// character data
ETEXT_DESC() : progress_callback2(&default_progress_func) {
- end_time = std::chrono::time_point<std::chrono::steady_clock, std::chrono::milliseconds>();
+ end_time = std::chrono::time_point<std::chrono::steady_clock,
+ std::chrono::milliseconds>();
}
// Sets the end time to be deadline_msecs milliseconds from now.
void set_deadline_msecs(int32_t deadline_msecs) {
if (deadline_msecs > 0) {
- end_time = std::chrono::steady_clock::now() + std::chrono::milliseconds(deadline_msecs);
+ end_time = std::chrono::steady_clock::now() +
+ std::chrono::milliseconds(deadline_msecs);
}
}
// Returns false if we've not passed the end_time, or have not set a deadline.
bool deadline_exceeded() const {
- if (end_time.time_since_epoch() == std::chrono::steady_clock::duration::zero()) {
+ if (end_time.time_since_epoch() ==
+ std::chrono::steady_clock::duration::zero()) {
return false;
}
auto now = std::chrono::steady_clock::now();
@@ -138,9 +142,11 @@ public:
}
private:
- static bool default_progress_func(ETEXT_DESC *ths, int left, int right, int top, int bottom) {
+ static bool default_progress_func(ETEXT_DESC *ths, int left, int right,
+ int top, int bottom) {
if (ths->progress_callback != nullptr) {
- return (*(ths->progress_callback))(ths->progress, left, right, top, bottom);
+ return (*(ths->progress_callback))(ths->progress, left, right, top,
+ bottom);
}
return true;
}
diff --git a/include/tesseract/osdetect.h b/include/tesseract/osdetect.h
index 2660628e..8052d2d2 100644
--- a/include/tesseract/osdetect.h
+++ b/include/tesseract/osdetect.h
@@ -38,7 +38,8 @@ class Tesseract;
const int kMaxNumberOfScripts = 116 + 1 + 2 + 1;
struct OSBestResult {
- OSBestResult() : orientation_id(0), script_id(0), sconfidence(0.0), oconfidence(0.0) {}
+ OSBestResult()
+ : orientation_id(0), script_id(0), sconfidence(0.0), oconfidence(0.0) {}
int orientation_id;
int script_id;
float sconfidence;
@@ -83,7 +84,8 @@ struct OSResults {
class OrientationDetector {
public:
- OrientationDetector(const std::vector<int> *allowed_scripts, OSResults *results);
+ OrientationDetector(const std::vector<int> *allowed_scripts,
+ OSResults *results);
bool detect_blob(BLOB_CHOICE_LIST *scores);
int get_orientation();
@@ -116,16 +118,19 @@ private:
const std::vector<int> *allowed_scripts_;
};
-int orientation_and_script_detection(const char *filename, OSResults *, tesseract::Tesseract *);
+int orientation_and_script_detection(const char *filename, OSResults *,
+ tesseract::Tesseract *);
-int os_detect(TO_BLOCK_LIST *port_blocks, OSResults *osr, tesseract::Tesseract *tess);
+int os_detect(TO_BLOCK_LIST *port_blocks, OSResults *osr,
+ tesseract::Tesseract *tess);
-int os_detect_blobs(const std::vector<int> *allowed_scripts, BLOBNBOX_CLIST *blob_list,
- OSResults *osr, tesseract::Tesseract *tess);
-
-bool os_detect_blob(BLOBNBOX *bbox, OrientationDetector *o, ScriptDetector *s, OSResults *,
+int os_detect_blobs(const std::vector<int> *allowed_scripts,
+ BLOBNBOX_CLIST *blob_list, OSResults *osr,
tesseract::Tesseract *tess);
+bool os_detect_blob(BLOBNBOX *bbox, OrientationDetector *o, ScriptDetector *s,
+ OSResults *, tesseract::Tesseract *tess);
+
// Helper method to convert an orientation index to its value in degrees.
// The value represents the amount of clockwise rotation in degrees that must be
// applied for the text to be upright (readable).
diff --git a/include/tesseract/pageiterator.h b/include/tesseract/pageiterator.h
index 8fa1aca0..670314aa 100644
--- a/include/tesseract/pageiterator.h
+++ b/include/tesseract/pageiterator.h
@@ -65,8 +65,9 @@ public:
* that tesseract has been given by the Thresholder.
* After the constructor, Begin has already been called.
*/
- PageIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left,
- int rect_top, int rect_width, int rect_height);
+ PageIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
+ int scaled_yres, int rect_left, int rect_top, int rect_width,
+ int rect_height);
virtual ~PageIterator();
/**
@@ -153,7 +154,8 @@ public:
* it.IsAtFinalElement(RIL_PARA, RIL_WORD) = true
* it.IsAtFinalElement(RIL_BLOCK, RIL_WORD) = false
*/
- virtual bool IsAtFinalElement(PageIteratorLevel level, PageIteratorLevel element) const;
+ virtual bool IsAtFinalElement(PageIteratorLevel level,
+ PageIteratorLevel element) const;
/**
* Returns whether this iterator is positioned
@@ -185,7 +187,8 @@ public:
* where the placement is obvious, and after recognition, it doesn't make as
* much difference, as the diacritics will already be included in the word.
*/
- void SetBoundingBoxComponents(bool include_upper_dots, bool include_lower_dots) {
+ void SetBoundingBoxComponents(bool include_upper_dots,
+ bool include_lower_dots) {
include_upper_dots_ = include_upper_dots;
include_lower_dots_ = include_lower_dots;
}
@@ -199,16 +202,17 @@ public:
* from a grey image. The padding argument to GetImage can be used to expand
* the image to include more foreground pixels. See GetImage below.
*/
- bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const;
- bool BoundingBox(PageIteratorLevel level, int padding, int *left, int *top, int *right,
+ bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right,
int *bottom) const;
+ bool BoundingBox(PageIteratorLevel level, int padding, int *left, int *top,
+ int *right, int *bottom) const;
/**
* Returns the bounding rectangle of the object in a coordinate system of the
* working image rectangle having its origin at (rect_left_, rect_top_) with
* respect to the original image and is scaled by a factor scale_.
*/
- bool BoundingBoxInternal(PageIteratorLevel level, int *left, int *top, int *right,
- int *bottom) const;
+ bool BoundingBoxInternal(PageIteratorLevel level, int *left, int *top,
+ int *right, int *bottom) const;
/** Returns whether there is no object of a given level. */
bool Empty(PageIteratorLevel level) const;
@@ -247,7 +251,8 @@ public:
* If you do not supply an original image, you will get a binary one.
* Use pixDestroy to delete the image after use.
*/
- Pix *GetImage(PageIteratorLevel level, int padding, Pix *original_img, int *left, int *top) const;
+ Pix *GetImage(PageIteratorLevel level, int padding, Pix *original_img,
+ int *left, int *top) const;
/**
* Returns the baseline of the current object at the given level.
@@ -255,7 +260,8 @@ public:
* WARNING: with vertical text, baselines may be vertical!
* Returns false if there is no baseline at the current position.
*/
- bool Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2, int *y2) const;
+ bool Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2,
+ int *y2) const;
/**
* Returns orientation for the block the iterator points to.
@@ -267,7 +273,8 @@ public:
*/
void Orientation(tesseract::Orientation *orientation,
tesseract::WritingDirection *writing_direction,
- tesseract::TextlineOrder *textline_order, float *deskew_angle) const;
+ tesseract::TextlineOrder *textline_order,
+ float *deskew_angle) const;
/**
* Returns information about the current paragraph, if available.
@@ -297,8 +304,9 @@ public:
* first_line_indent for subsequent paragraphs in this block
* of text.
*/
- void ParagraphInfo(tesseract::ParagraphJustification *justification, bool *is_list_item,
- bool *is_crown, int *first_line_indent) const;
+ void ParagraphInfo(tesseract::ParagraphJustification *justification,
+ bool *is_list_item, bool *is_crown,
+ int *first_line_indent) const;
// If the current WERD_RES (it_->word()) is not nullptr, sets the BlamerBundle
// of the current word to the given pointer (takes ownership of the pointer)
diff --git a/include/tesseract/publictypes.h b/include/tesseract/publictypes.h
index 25160083..e4821e91 100644
--- a/include/tesseract/publictypes.h
+++ b/include/tesseract/publictypes.h
@@ -75,12 +75,14 @@ inline bool PTIsLineType(PolyBlockType type) {
}
/** Returns true if PolyBlockType is of image type */
inline bool PTIsImageType(PolyBlockType type) {
- return type == PT_FLOWING_IMAGE || type == PT_HEADING_IMAGE || type == PT_PULLOUT_IMAGE;
+ return type == PT_FLOWING_IMAGE || type == PT_HEADING_IMAGE ||
+ type == PT_PULLOUT_IMAGE;
}
/** Returns true if PolyBlockType is of text type */
inline bool PTIsTextType(PolyBlockType type) {
- return type == PT_FLOWING_TEXT || type == PT_HEADING_TEXT || type == PT_PULLOUT_TEXT ||
- type == PT_TABLE || type == PT_VERTICAL_TEXT || type == PT_CAPTION_TEXT ||
+ return type == PT_FLOWING_TEXT || type == PT_HEADING_TEXT ||
+ type == PT_PULLOUT_TEXT || type == PT_TABLE ||
+ type == PT_VERTICAL_TEXT || type == PT_CAPTION_TEXT ||
type == PT_INLINE_EQUATION;
}
// Returns true if PolyBlockType is of pullout(inter-column) type
@@ -155,23 +157,24 @@ enum TextlineOrder {
* so that the inequality test macros below work.
*/
enum PageSegMode {
- PSM_OSD_ONLY = 0, ///< Orientation and script detection only.
- PSM_AUTO_OSD = 1, ///< Automatic page segmentation with orientation and
- ///< script detection. (OSD)
- PSM_AUTO_ONLY = 2, ///< Automatic page segmentation, but no OSD, or OCR.
- PSM_AUTO = 3, ///< Fully automatic page segmentation, but no OSD.
- PSM_SINGLE_COLUMN = 4, ///< Assume a single column of text of variable sizes.
+ PSM_OSD_ONLY = 0, ///< Orientation and script detection only.
+ PSM_AUTO_OSD = 1, ///< Automatic page segmentation with orientation and
+ ///< script detection. (OSD)
+ PSM_AUTO_ONLY = 2, ///< Automatic page segmentation, but no OSD, or OCR.
+ PSM_AUTO = 3, ///< Fully automatic page segmentation, but no OSD.
+ PSM_SINGLE_COLUMN = 4, ///< Assume a single column of text of variable sizes.
PSM_SINGLE_BLOCK_VERT_TEXT = 5, ///< Assume a single uniform block of
///< vertically aligned text.
- PSM_SINGLE_BLOCK = 6, ///< Assume a single uniform block of text. (Default.)
- PSM_SINGLE_LINE = 7, ///< Treat the image as a single text line.
- PSM_SINGLE_WORD = 8, ///< Treat the image as a single word.
- PSM_CIRCLE_WORD = 9, ///< Treat the image as a single word in a circle.
- PSM_SINGLE_CHAR = 10, ///< Treat the image as a single character.
- PSM_SPARSE_TEXT = 11, ///< Find as much text as possible in no particular order.
- PSM_SPARSE_TEXT_OSD = 12, ///< Sparse text with orientation and script det.
- PSM_RAW_LINE = 13, ///< Treat the image as a single text line, bypassing
- ///< hacks that are Tesseract-specific.
+ PSM_SINGLE_BLOCK = 6, ///< Assume a single uniform block of text. (Default.)
+ PSM_SINGLE_LINE = 7, ///< Treat the image as a single text line.
+ PSM_SINGLE_WORD = 8, ///< Treat the image as a single word.
+ PSM_CIRCLE_WORD = 9, ///< Treat the image as a single word in a circle.
+ PSM_SINGLE_CHAR = 10, ///< Treat the image as a single character.
+ PSM_SPARSE_TEXT =
+ 11, ///< Find as much text as possible in no particular order.
+ PSM_SPARSE_TEXT_OSD = 12, ///< Sparse text with orientation and script det.
+ PSM_RAW_LINE = 13, ///< Treat the image as a single text line, bypassing
+ ///< hacks that are Tesseract-specific.
PSM_COUNT ///< Number of enum entries.
};
diff --git a/include/tesseract/renderer.h b/include/tesseract/renderer.h
index 4fa38a74..eaca00cd 100644
--- a/include/tesseract/renderer.h
+++ b/include/tesseract/renderer.h
@@ -215,7 +215,8 @@ class TESS_API TessPDFRenderer : public TessResultRenderer {
public:
// datadir is the location of the TESSDATA. We need it because
// we load a custom PDF font from this location.
- TessPDFRenderer(const char *outputbase, const char *datadir, bool textonly = false);
+ TessPDFRenderer(const char *outputbase, const char *datadir,
+ bool textonly = false);
protected:
bool BeginDocumentHandler() override;
@@ -240,8 +241,9 @@ private:
// Create the /Contents object for an entire page.
char *GetPDFTextObjects(TessBaseAPI *api, double width, double height);
// Turn an image into a PDF object. Only transcode if we have to.
- static bool imageToPDFObj(Pix *pix, const char *filename, long int objnum, char **pdf_object,
- long int *pdf_object_size, int jpg_quality);
+ static bool imageToPDFObj(Pix *pix, const char *filename, long int objnum,
+ char **pdf_object, long int *pdf_object_size,
+ int jpg_quality);
};
/**
diff --git a/include/tesseract/resultiterator.h b/include/tesseract/resultiterator.h
index fc32c2fd..72ec2802 100644
--- a/include/tesseract/resultiterator.h
+++ b/include/tesseract/resultiterator.h
@@ -75,7 +75,8 @@ public:
* For instance, IsAtFinalElement(RIL_PARA, RIL_WORD) returns whether we
* point at the last word in a paragraph. See PageIterator for full comment.
*/
- bool IsAtFinalElement(PageIteratorLevel level, PageIteratorLevel element) const override;
+ bool IsAtFinalElement(PageIteratorLevel level,
+ PageIteratorLevel element) const override;
// ============= Functions that refer to words only ============.
// Returns the number of blanks before the current word.
@@ -94,8 +95,8 @@ public:
*/
virtual std::vector<std::vector<std::vector<std::pair<const char *, float>>>>
*GetRawLSTMTimesteps() const;
- virtual std::vector<std::vector<std::pair<const char *, float>>> *GetBestLSTMSymbolChoices()
- const;
+ virtual std::vector<std::vector<std::pair<const char *, float>>>
+ *GetBestLSTMSymbolChoices() const;
/**
* Return whether the current paragraph's dominant reading direction
@@ -127,9 +128,10 @@ public:
* Left-to-Right except for an RTL phrase in words 2, 3 in an ltr paragraph:
* { 0, 1, kMinorRunStart, 3, 2, kMinorRunEnd, 4 }
*/
- static void CalculateTextlineOrder(bool paragraph_is_ltr,
- const std::vector<StrongScriptDirection> &word_dirs,
- std::vector<int> *reading_order);
+ static void CalculateTextlineOrder(
+ bool paragraph_is_ltr,
+ const std::vector<StrongScriptDirection> &word_dirs,
+ std::vector<int> *reading_order);
static const int kMinorRunStart;
static const int kMinorRunEnd;
@@ -162,10 +164,12 @@ private:
* kComplexWord The previous word contains both left-to-right and
* right-to-left characters and was treated as neutral.
*/
- void CalculateTextlineOrder(bool paragraph_is_ltr, const LTRResultIterator &resit,
+ void CalculateTextlineOrder(bool paragraph_is_ltr,
+ const LTRResultIterator &resit,
std::vector<int> *indices) const;
/** Same as above, but the caller's ssd gets filled in if ssd != nullptr. */
- void CalculateTextlineOrder(bool paragraph_is_ltr, const LTRResultIterator &resit,
+ void CalculateTextlineOrder(bool paragraph_is_ltr,
+ const LTRResultIterator &resit,
std::vector<StrongScriptDirection> *ssd,
std::vector<int> *indices) const;
diff --git a/include/tesseract/version.h.in b/include/tesseract/version.h.in
index f48641a0..b283bc2e 100644
--- a/include/tesseract/version.h.in
+++ b/include/tesseract/version.h.in
@@ -18,11 +18,19 @@
#ifndef TESSERACT_API_VERSION_H_
#define TESSERACT_API_VERSION_H_
+// clang-format off
+
#define TESSERACT_MAJOR_VERSION @GENERIC_MAJOR_VERSION@
#define TESSERACT_MINOR_VERSION @GENERIC_MINOR_VERSION@
#define TESSERACT_MICRO_VERSION @GENERIC_MICRO_VERSION@
-#define TESSERACT_VERSION \
- (TESSERACT_MAJOR_VERSION << 16 | TESSERACT_MINOR_VERSION << 8 | TESSERACT_MICRO_VERSION)
+
+#define TESSERACT_VERSION \
+ (TESSERACT_MAJOR_VERSION << 16 | \
+ TESSERACT_MINOR_VERSION << 8 | \
+ TESSERACT_MICRO_VERSION)
+
#define TESSERACT_VERSION_STR "@PACKAGE_VERSION@"
+// clang-format on
+
#endif // TESSERACT_API_VERSION_H_