Dev news

Commit 21f1b393 for tesseract

commit 21f1b39358929a7b20b47ef39b806dc81aa3b2b4
Author: Viktor Szépe <viktor@szepe.net>
Date:   Thu Jan 1 16:59:05 2026 +0100

    Fix typos (#4497)

diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp
index 37020fa9..3b31b65a 100644
--- a/src/api/baseapi.cpp
+++ b/src/api/baseapi.cpp
@@ -1041,7 +1041,7 @@ bool TessBaseAPI::ProcessPagesInternal(const char *filename, const char *retry_c
                                 tesseract_->tessedit_page_number);
   }

-  // At this point we are officially in autodection territory.
+  // At this point we are officially in autodetection territory.
   // That means any data in stdin must be buffered, to make it
   // seekable.
   std::string buf;
diff --git a/src/api/pagerenderer.cpp b/src/api/pagerenderer.cpp
index cd2d0a5e..2ae64d32 100644
--- a/src/api/pagerenderer.cpp
+++ b/src/api/pagerenderer.cpp
@@ -473,7 +473,7 @@ Pta *ClipAndSimplifyBaseline(Pta *bottom_pts, Pta *baseline_pts,
 }

 ///
-/// Fit the baseline points into the existings polygon
+/// Fit the baseline points into the existing polygon
 ///
 Pta *FitBaselineIntoLinePolygon(Pta *bottom_pts, Pta *baseline_pts,
                                 tesseract::WritingDirection writing_direction) {
diff --git a/src/ccmain/equationdetect.cpp b/src/ccmain/equationdetect.cpp
index 7e4bbe6d..6cfa5dcf 100644
--- a/src/ccmain/equationdetect.cpp
+++ b/src/ccmain/equationdetect.cpp
@@ -48,7 +48,7 @@ static BOOL_VAR(equationdetect_save_seed_image, false, "Save the seed image");
 static BOOL_VAR(equationdetect_save_merged_image, false, "Save the merged image");

 ///////////////////////////////////////////////////////////////////////////
-// Utility ColParition sort functions.
+// Utility ColPartition sort functions.
 ///////////////////////////////////////////////////////////////////////////
 static int SortCPByTopReverse(const void *p1, const void *p2) {
   const ColPartition *cp1 = *static_cast<ColPartition *const *>(p1);
@@ -599,7 +599,7 @@ float EquationDetect::ComputeForegroundDensity(const TBOX &tbox) {
 bool EquationDetect::CheckSeedFgDensity(const float density_th, ColPartition *part) {
   ASSERT_HOST(part);

-  // Split part horizontall, and check for each sub part.
+  // Split part horizontally, and check for each sub part.
   std::vector<TBOX> sub_boxes;
   SplitCPHorLite(part, &sub_boxes);
   float parts_passed = 0.0;
@@ -1438,7 +1438,7 @@ void EquationDetect::PrintSpecialBlobsDensity(const ColPartition *part) const {
   ASSERT_HOST(part);
   TBOX box(part->bounding_box());
   int h = pixGetHeight(lang_tesseract_->BestPix());
-  tprintf("Printing special blobs density values for ColParition (t=%d,b=%d) ", h - box.top(),
+  tprintf("Printing special blobs density values for ColPartition (t=%d,b=%d) ", h - box.top(),
           h - box.bottom());
   box.print();
   tprintf("blobs count = %d, density = ", part->boxes_count());
diff --git a/src/ccstruct/coutln.cpp b/src/ccstruct/coutln.cpp
index db2e1cd6..55221d13 100644
--- a/src/ccstruct/coutln.cpp
+++ b/src/ccstruct/coutln.cpp
@@ -577,7 +577,7 @@ int16_t C_OUTLINE::turn_direction() const { // winding number
  * Reverse the direction of an outline.
  */

-void C_OUTLINE::reverse() {      // reverse drection
+void C_OUTLINE::reverse() {      // reverse direction
   DIR128 halfturn = MODULUS / 2; // amount to shift
   DIR128 stepdir;                // direction of step
   int16_t stepindex;             // index to cstep
diff --git a/src/ccstruct/matrix.h b/src/ccstruct/matrix.h
index a97912ad..7fa344e7 100644
--- a/src/ccstruct/matrix.h
+++ b/src/ccstruct/matrix.h
@@ -146,7 +146,7 @@ public:
   }

   // Writes to the given file. Returns false in case of error.
-  // Only works with bitwise-serializeable types!
+  // Only works with bitwise-serializable types!
   bool Serialize(FILE *fp) const {
     if (!SerializeSize(fp)) {
       return false;
@@ -170,7 +170,7 @@ public:
   }

   // Reads from the given file. Returns false in case of error.
-  // Only works with bitwise-serializeable types!
+  // Only works with bitwise-serializable types!
   // If swap is true, assumes a big/little-endian swap is needed.
   bool DeSerialize(bool swap, FILE *fp) {
     if (!DeSerializeSize(swap, fp)) {
diff --git a/src/ccstruct/ratngs.cpp b/src/ccstruct/ratngs.cpp
index 66d032bd..502ec4f4 100644
--- a/src/ccstruct/ratngs.cpp
+++ b/src/ccstruct/ratngs.cpp
@@ -660,7 +660,7 @@ int WERD_CHOICE::GetTopScriptID() const {
   return max_sid;
 }

-// Fixes the state_ for a chop at the given blob_posiiton.
+// Fixes the state_ for a chop at the given blob_position.
 void WERD_CHOICE::UpdateStateForSplit(int blob_position) {
   int total_chunks = 0;
   for (unsigned i = 0; i < length_; ++i) {
diff --git a/src/ccstruct/ratngs.h b/src/ccstruct/ratngs.h
index e9371c9c..cf18a95e 100644
--- a/src/ccstruct/ratngs.h
+++ b/src/ccstruct/ratngs.h
@@ -552,7 +552,7 @@ public:
   // Note that for Japanese, Hiragana and Katakana are simply treated as Han.
   int GetTopScriptID() const;

-  // Fixes the state_ for a chop at the given blob_posiiton.
+  // Fixes the state_ for a chop at the given blob_position.
   void UpdateStateForSplit(int blob_position);

   // Returns the sum of all the state elements, being the total number of blobs.
@@ -569,7 +569,7 @@ public:
   // one displayed) and waits for a click in the window.
   void DisplaySegmentation(TWERD *word);

-  WERD_CHOICE &operator+=(        // concatanate
+  WERD_CHOICE &operator+=(        // concatenate
       const WERD_CHOICE &second); // second on first

   WERD_CHOICE &operator=(const WERD_CHOICE &source);
diff --git a/src/ccstruct/rejctmap.h b/src/ccstruct/rejctmap.h
index beeb5373..fc4b9780 100644
--- a/src/ccstruct/rejctmap.h
+++ b/src/ccstruct/rejctmap.h
@@ -75,7 +75,7 @@ enum REJ_FLAGS {
   /* Reject modes generated after MM_ACCEPT but before QUALITY_ACCEPT */
   R_BAD_QUALITY, // TEMP Quality metrics bad for WERD

-  /* Reject modes generated after QUALITY_ACCEPT but before MINIMAL_REJ accep*/
+  /* Reject modes generated after QUALITY_ACCEPT but before MINIMAL_REJ accept */
   R_DOC_REJ,   // TEMP Document rejection
   R_BLOCK_REJ, // TEMP Block rejection
   R_ROW_REJ,   // TEMP Row rejection
diff --git a/src/ccstruct/werd.cpp b/src/ccstruct/werd.cpp
index 36aa0b60..287c82f2 100644
--- a/src/ccstruct/werd.cpp
+++ b/src/ccstruct/werd.cpp
@@ -513,7 +513,7 @@ void WERD::GetNoiseOutlines(std::vector<C_OUTLINE *> *outlines) {
   }
 }

-// Adds the selected outlines to the indcated real blobs, and puts the rest
+// Adds the selected outlines to the indicated real blobs, and puts the rest
 // back in rej_cblobs where they came from. Where the target_blobs entry is
 // nullptr, a run of wanted outlines is put into a single new blob.
 // Ownership of the outlines is transferred back to the word. (Hence
diff --git a/src/ccstruct/werd.h b/src/ccstruct/werd.h
index 2c19ef1e..6e5b405a 100644
--- a/src/ccstruct/werd.h
+++ b/src/ccstruct/werd.h
@@ -174,7 +174,7 @@ public:
   // Extracts all the noise outlines and stuffs the pointers into the given
   // vector of outlines. Afterwards, the outlines vector owns the pointers.
   void GetNoiseOutlines(std::vector<C_OUTLINE *> *outlines);
-  // Adds the selected outlines to the indcated real blobs, and puts the rest
+  // Adds the selected outlines to the indicated real blobs, and puts the rest
   // back in rej_cblobs where they came from. Where the target_blobs entry is
   // nullptr, a run of wanted outlines is put into a single new blob.
   // Ownership of the outlines is transferred back to the word. (Hence
diff --git a/src/ccutil/unicharset.h b/src/ccutil/unicharset.h
index dd0ff8f3..a37ad5cf 100644
--- a/src/ccutil/unicharset.h
+++ b/src/ccutil/unicharset.h
@@ -569,7 +569,7 @@ public:
   // src_unicharset with ranges in it. The unicharsets don't have to be the
   // same, and graphemes are correctly accounted for.
   void ExpandRangesFromOther(const UNICHARSET &src);
-  // Makes this a copy of src. Clears this completely first, so the automattic
+  // Makes this a copy of src. Clears this completely first, so the automatic
   // ids will not be present in this if not in src.
   void CopyFrom(const UNICHARSET &src);
   // For each id in src, if it does not occur in this, add it, as in
diff --git a/src/classify/intmatcher.h b/src/classify/intmatcher.h
index d46100cb..167e87a5 100644
--- a/src/classify/intmatcher.h
+++ b/src/classify/intmatcher.h
@@ -22,7 +22,7 @@

 namespace tesseract {

-// Character fragments could be present in the trained templaes
+// Character fragments could be present in the trained templates
 // but turned on/off on the language-by-language basis or depending
 // on particular properties of the corpus (e.g. when we expect the
 // images to have low exposure).
diff --git a/src/dict/dict.h b/src/dict/dict.h
index 3290dd0b..51872bcc 100644
--- a/src/dict/dict.h
+++ b/src/dict/dict.h
@@ -280,7 +280,7 @@ public:
   /* dict.cpp ****************************************************************/

   /// Initialize Dict class - load dawgs from [lang].traineddata and
-  /// user-specified wordlist and parttern list.
+  /// user-specified wordlist and pattern list.
   static DawgCache *GlobalDawgCache();
   // Sets up ready for a Load or LoadLSTM.
   void SetupForLoad(DawgCache *dawg_cache);
@@ -423,7 +423,7 @@ public:
   /// For each of the character classes of the given unichar_id (and the
   /// unichar_id itself) finds the corresponding outgoing node or self-loop
   /// in the given dawg and (after checking that it is valid) records it in
-  /// dawg_args->updated_ative_dawgs. Updates current_permuter if any valid
+  /// dawg_args->updated_active_dawgs. Updates current_permuter if any valid
   /// edges were found.
   void ProcessPatternEdges(const Dawg *dawg, const DawgPosition &info, UNICHAR_ID unichar_id,
                            bool word_end, DawgArgs *dawg_args,
diff --git a/src/textord/edgloop.cpp b/src/textord/edgloop.cpp
index 87dab705..bbee5618 100644
--- a/src/textord/edgloop.cpp
+++ b/src/textord/edgloop.cpp
@@ -127,7 +127,7 @@ int16_t loop_bounding_box( // get bounding box
   realstart = start;
   botleft = topright = ICOORD(edgept->pos.x(), edgept->pos.y());
   leftmost = edgept->pos.x();
-  length = 0; // coutn length
+  length = 0; // count length
   do {
     edgept = edgept->next;
     if (edgept->pos.x() < botleft.x()) {
diff --git a/src/textord/makerow.cpp b/src/textord/makerow.cpp
index 3d89b711..ca7aea7b 100644
--- a/src/textord/makerow.cpp
+++ b/src/textord/makerow.cpp
@@ -2078,7 +2078,7 @@ void make_baseline_spline(TO_ROW *row, // row to fit
 bool segment_baseline( // split baseline
     TO_ROW *row,       // row to fit
     TO_BLOCK *block,   // block it came from
-    int32_t &segments, // no fo segments
+    int32_t &segments, // no of segments
     int32_t *xstarts   // coords of segments
 ) {
   bool needs_curve; // needs curved line
@@ -2175,7 +2175,7 @@ bool segment_baseline( // split baseline
 double *linear_spline_baseline( // split baseline
     TO_ROW *row,                // row to fit
     TO_BLOCK *block,            // block it came from
-    int32_t &segments,          // no fo segments
+    int32_t &segments,          // no of segments
     int32_t xstarts[]           // coords of segments
 ) {
   int blobcount;         // no of blobs
diff --git a/src/textord/makerow.h b/src/textord/makerow.h
index cfee8ec6..3987d3f8 100644
--- a/src/textord/makerow.h
+++ b/src/textord/makerow.h
@@ -217,13 +217,13 @@ void make_baseline_spline(TO_ROW *row,      // row to fit
 bool segment_baseline(                      // split baseline
     TO_ROW *row,                            // row to fit
     TO_BLOCK *block,                        // block it came from
-    int32_t &segments,                      // no fo segments
+    int32_t &segments,                      // no of segments
     int32_t *xstarts                        // coords of segments
 );
 double *linear_spline_baseline( // split baseline
     TO_ROW *row,                // row to fit
     TO_BLOCK *block,            // block it came from
-    int32_t &segments,          // no fo segments
+    int32_t &segments,          // no of segments
     int32_t xstarts[]           // coords of segments
 );
 void assign_blobs_to_rows( // find lines
diff --git a/src/textord/pitsync1.cpp b/src/textord/pitsync1.cpp
index 5de3b768..7999558e 100644
--- a/src/textord/pitsync1.cpp
+++ b/src/textord/pitsync1.cpp
@@ -91,7 +91,7 @@ FPSEGPT::FPSEGPT(           // constructor
   double mean;                    // mean pitch
   double total;                   // total dists
   double factor;                  // cost function
-  FPSEGPT_IT pred_it = prev_list; // for previuos segment
+  FPSEGPT_IT pred_it = prev_list; // for previous segment

   cost = FLT_MAX;
   pred = nullptr;
diff --git a/src/textord/tabfind.cpp b/src/textord/tabfind.cpp
index fb6d99ae..45d47e72 100644
--- a/src/textord/tabfind.cpp
+++ b/src/textord/tabfind.cpp
@@ -133,7 +133,7 @@ void TabFind::SetBlockRuleEdges(TO_BLOCK *block) {
 }

 // Sets the left and right rule and crossing_rules for the blobs in the given
-// list by fiding the next outermost tabvectors for each blob.
+// list by finding the next outermost tabvectors for each blob.
 void TabFind::SetBlobRuleEdges(BLOBNBOX_LIST *blobs) {
   BLOBNBOX_IT blob_it(blobs);
   for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {