Commit 70948280 for tesseract
commit 709482807a11e0fe9bd48d91dbec3d23de486542
Author: Stefan Weil <sw@weilnetz.de>
Date: Thu Jun 4 17:46:53 2026 +0200
Replace C-style arrays with std::array
Assisted-by: OpenCode / BigPickle
Signed-off-by: Stefan Weil <sw@weilnetz.de>
diff --git a/src/ccstruct/blobbox.h b/src/ccstruct/blobbox.h
index f25d00fe..497da78b 100644
--- a/src/ccstruct/blobbox.h
+++ b/src/ccstruct/blobbox.h
@@ -35,6 +35,8 @@
#include "werd.h" // for WERD_LIST
#include <cinttypes> // for PRId32
+
+#include <array>
#include <cmath> // for std::sqrt
#include <cstdint> // for int16_t, int32_t
@@ -537,11 +539,11 @@ private:
int32_t line_crossings_; // Number of line intersections touched.
BLOBNBOX *base_char_blob_; // The blob that was the base char.
tesseract::ColPartition *owner_; // Who will delete me when I am not needed
- BLOBNBOX *neighbours_[BND_COUNT];
+ std::array<BLOBNBOX *, BND_COUNT> neighbours_;
float horz_stroke_width_ = 0.0f; // Median horizontal stroke width
float vert_stroke_width_ = 0.0f; // Median vertical stroke width
float area_stroke_width_ = 0.0f; // Stroke width from area/perimeter ratio.
- bool good_stroke_neighbours_[BND_COUNT];
+ std::array<bool, BND_COUNT> good_stroke_neighbours_;
bool horz_possible_; // Could be part of horizontal flow.
bool vert_possible_; // Could be part of vertical flow.
bool leader_on_left_; // There is a leader to the left.
diff --git a/src/ccstruct/coutln.cpp b/src/ccstruct/coutln.cpp
index 55221d13..0e81ad8a 100644
--- a/src/ccstruct/coutln.cpp
+++ b/src/ccstruct/coutln.cpp
@@ -42,7 +42,7 @@
namespace tesseract {
-ICOORD C_OUTLINE::step_coords[4] = {ICOORD(-1, 0), ICOORD(0, -1), ICOORD(1, 0), ICOORD(0, 1)};
+std::array<ICOORD, 4> C_OUTLINE::step_coords = {ICOORD(-1, 0), ICOORD(0, -1), ICOORD(1, 0), ICOORD(0, 1)};
/**
* @name C_OUTLINE::C_OUTLINE
diff --git a/src/ccstruct/coutln.h b/src/ccstruct/coutln.h
index fd08fd6d..043de38e 100644
--- a/src/ccstruct/coutln.h
+++ b/src/ccstruct/coutln.h
@@ -28,6 +28,8 @@
#include <tesseract/export.h> // for DLLSYM
#include <cstdint> // for int16_t, int32_t
+
+#include <array>
#include <bitset> // for std::bitset<16>
struct Pix;
@@ -289,7 +291,7 @@ private:
std::vector<uint8_t> steps; // step array
EdgeOffset *offsets; // Higher precision edge.
C_OUTLINE_LIST children; // child elements
- static ICOORD step_coords[4];
+ static std::array<ICOORD, 4> step_coords;
};
} // namespace tesseract
diff --git a/src/ccstruct/seam.h b/src/ccstruct/seam.h
index 73acaea7..7859780b 100644
--- a/src/ccstruct/seam.h
+++ b/src/ccstruct/seam.h
@@ -26,6 +26,8 @@
#include "blobs.h"
#include "split.h"
+#include <array>
+
namespace tesseract {
using PRIORITY = float; /* PRIORITY */
@@ -196,7 +198,7 @@ private:
// Number of splits_ that are used.
uint8_t num_splits_;
// Set of pairs of points that are the ends of each split in the SEAM.
- SPLIT splits_[kMaxNumSplits];
+ std::array<SPLIT, kMaxNumSplits> splits_;
};
void start_seam_list(TWERD *word, std::vector<SEAM *> *seam_array);
diff --git a/src/ccutil/ambigs.cpp b/src/ccutil/ambigs.cpp
index 5aafedb1..4fff07b6 100644
--- a/src/ccutil/ambigs.cpp
+++ b/src/ccutil/ambigs.cpp
@@ -36,8 +36,8 @@ static const char kIllegalUnicharMsg[] = "Illegal unichar %s in ambiguity specif
// Maximum line size:
// 10 for sizes of ambigs, tabs, abmig type and newline
-// UNICHAR_LEN * (MAX_AMBIG_SIZE + 1) for each part of the ambig
-const int kMaxAmbigStringSize = UNICHAR_LEN * (MAX_AMBIG_SIZE + 1);
+// UNICHAR_LEN * (kMaxAmbigSize + 1) for each part of the ambig
+const int kMaxAmbigStringSize = UNICHAR_LEN * (kMaxAmbigSize + 1);
AmbigSpec::AmbigSpec() : correct_ngram_id(INVALID_UNICHAR_ID), type(NOT_AMBIG), wrong_ngram_size(0) {
wrong_ngram[0] = INVALID_UNICHAR_ID;
@@ -81,7 +81,7 @@ void UnicharAmbigs::LoadUnicharAmbigs(const UNICHARSET &encoder_set, TFile *ambi
const int kBufferSize = 10 + 2 * kMaxAmbigStringSize;
char *buffer = new char[kBufferSize];
char replacement_string[kMaxAmbigStringSize];
- UNICHAR_ID test_unichar_ids[MAX_AMBIG_SIZE + 1];
+ std::array<UNICHAR_ID, kMaxAmbigSize + 1> test_unichar_ids;
int line_num = 0;
int type = NOT_AMBIG;
@@ -101,14 +101,14 @@ void UnicharAmbigs::LoadUnicharAmbigs(const UNICHARSET &encoder_set, TFile *ambi
}
++line_num;
if (!ParseAmbiguityLine(line_num, version, debug_level, encoder_set, buffer,
- &test_ambig_part_size, test_unichar_ids, &replacement_ambig_part_size,
+ &test_ambig_part_size, test_unichar_ids.data(), &replacement_ambig_part_size,
replacement_string, &type)) {
continue;
}
// Construct AmbigSpec and add it to the appropriate AmbigSpec_LIST.
auto *ambig_spec = new AmbigSpec();
if (!InsertIntoTable((type == REPLACE_AMBIG) ? replace_ambigs_ : dang_ambigs_,
- test_ambig_part_size, test_unichar_ids, replacement_ambig_part_size,
+ test_ambig_part_size, test_unichar_ids.data(), replacement_ambig_part_size,
replacement_string, type, ambig_spec, unicharset)) {
continue;
}
@@ -188,9 +188,9 @@ void UnicharAmbigs::LoadUnicharAmbigs(const UNICHARSET &encoder_set, TFile *ambi
for (lst_it.mark_cycle_pt(); !lst_it.cycled_list(); lst_it.forward()) {
AmbigSpec *ambig_spec = lst_it.data();
tprintf("wrong_ngram:");
- UnicharIdArrayUtils::print(ambig_spec->wrong_ngram, *unicharset);
+ UnicharIdArrayUtils::print(ambig_spec->wrong_ngram.data(), *unicharset);
tprintf("correct_fragments:");
- UnicharIdArrayUtils::print(ambig_spec->correct_fragments, *unicharset);
+ UnicharIdArrayUtils::print(ambig_spec->correct_fragments.data(), *unicharset);
}
}
}
@@ -235,7 +235,7 @@ bool UnicharAmbigs::ParseAmbiguityLine(int line_num, int version, int debug_leve
return false;
}
*test_ambig_part_size = unichars.size();
- if (*test_ambig_part_size > MAX_AMBIG_SIZE) {
+ if (*test_ambig_part_size > kMaxAmbigSize) {
if (debug_level) {
tprintf("Too many unichars in ambiguity on line %d\n", line_num);
}
@@ -251,7 +251,7 @@ bool UnicharAmbigs::ParseAmbiguityLine(int line_num, int version, int debug_leve
return false;
}
*replacement_ambig_part_size = unichars.size();
- if (*replacement_ambig_part_size > MAX_AMBIG_SIZE) {
+ if (*replacement_ambig_part_size > kMaxAmbigSize) {
if (debug_level) {
tprintf("Too many unichars in ambiguity on line %d\n", line_num);
}
@@ -276,7 +276,7 @@ bool UnicharAmbigs::ParseAmbiguityLine(int line_num, int version, int debug_leve
}
return false;
}
- if (*test_ambig_part_size > MAX_AMBIG_SIZE) {
+ if (*test_ambig_part_size > kMaxAmbigSize) {
if (debug_level) {
tprintf("Too many unichars in ambiguity on line %d\n", line_num);
}
@@ -304,7 +304,7 @@ bool UnicharAmbigs::ParseAmbiguityLine(int line_num, int version, int debug_leve
}
return false;
}
- if (*replacement_ambig_part_size > MAX_AMBIG_SIZE) {
+ if (*replacement_ambig_part_size > kMaxAmbigSize) {
if (debug_level) {
tprintf("Too many unichars in ambiguity on line %d\n", line_num);
}
@@ -362,7 +362,7 @@ bool UnicharAmbigs::InsertIntoTable(UnicharAmbigsVector &table, int test_ambig_p
}
ambig_spec->wrong_ngram_size =
- UnicharIdArrayUtils::copy(test_unichar_ids, ambig_spec->wrong_ngram);
+ UnicharIdArrayUtils::copy(test_unichar_ids, ambig_spec->wrong_ngram.data());
// Since we need to maintain a constant number of unichar positions in
// order to construct ambig_blob_choices vector in NoDangerousAmbig(), for
diff --git a/src/ccutil/ambigs.h b/src/ccutil/ambigs.h
index effedbf7..b80e7db4 100644
--- a/src/ccutil/ambigs.h
+++ b/src/ccutil/ambigs.h
@@ -31,10 +31,12 @@
# include "tprintf.h"
# include "unicharset.h"
-# define MAX_AMBIG_SIZE 10
+# include <array>
namespace tesseract {
+constexpr int kMaxAmbigSize = 10;
+
using UnicharIdVector = std::vector<UNICHAR_ID>;
enum AmbigType {
@@ -118,15 +120,15 @@ public:
// be sorted by their wrong_ngram arrays. Example of wrong_ngram vectors
// in a sorted AmbigSpec_LIST: [9 1 3], [9 3 4], [9 8], [9, 8 1].
static int compare_ambig_specs(const AmbigSpec *s1, const AmbigSpec *s2) {
- int result = UnicharIdArrayUtils::compare(s1->wrong_ngram, s2->wrong_ngram);
+ int result = UnicharIdArrayUtils::compare(s1->wrong_ngram.data(), s2->wrong_ngram.data());
if (result != 0) {
return result;
}
- return UnicharIdArrayUtils::compare(s1->correct_fragments, s2->correct_fragments);
+ return UnicharIdArrayUtils::compare(s1->correct_fragments.data(), s2->correct_fragments.data());
}
- UNICHAR_ID wrong_ngram[MAX_AMBIG_SIZE + 1];
- UNICHAR_ID correct_fragments[MAX_AMBIG_SIZE + 1];
+ std::array<UNICHAR_ID, kMaxAmbigSize + 1> wrong_ngram;
+ std::array<UNICHAR_ID, kMaxAmbigSize + 1> correct_fragments;
UNICHAR_ID correct_ngram_id;
AmbigType type;
int wrong_ngram_size;
diff --git a/src/ccutil/unicharcompress.h b/src/ccutil/unicharcompress.h
index 2e81bbde..45889d26 100644
--- a/src/ccutil/unicharcompress.h
+++ b/src/ccutil/unicharcompress.h
@@ -23,6 +23,9 @@
#include <unordered_map>
#include <vector>
+
+#include <array>
+
#include "serialis.h"
#include "unicharset.h"
@@ -35,7 +38,7 @@ public:
static const int kMaxCodeLen = 9;
RecodedCharID() : self_normalized_(1), length_(0) {
- memset(code_, 0, sizeof(code_));
+ code_.fill(0);
}
void Truncate(int length) {
length_ = length;
@@ -105,7 +108,7 @@ private:
// The number of elements in use in code_;
int32_t length_;
// The re-encoded form of the unichar-id to which this RecodedCharID relates.
- int32_t code_[kMaxCodeLen];
+ std::array<int32_t, kMaxCodeLen> code_;
};
// Class holds a "compression" of a unicharset to simplify the learning problem
diff --git a/src/classify/trainingsample.cpp b/src/classify/trainingsample.cpp
index f08c1a2b..b0f83008 100644
--- a/src/classify/trainingsample.cpp
+++ b/src/classify/trainingsample.cpp
@@ -36,8 +36,8 @@ namespace tesseract {
const int kRandomizingCenter = 128;
// Randomizing factors.
-const int TrainingSample::kYShiftValues[kSampleYShiftSize] = {6, 3, -3, -6, 0};
-const double TrainingSample::kScaleValues[kSampleScaleSize] = {1.0625, 0.9375, 1.0};
+const std::array<int, kSampleYShiftSize> TrainingSample::kYShiftValues = {6, 3, -3, -6, 0};
+const std::array<double, kSampleScaleSize> TrainingSample::kScaleValues = {1.0625, 0.9375, 1.0};
TrainingSample::~TrainingSample() {
delete[] features_;
@@ -77,10 +77,10 @@ bool TrainingSample::Serialize(FILE *fp) const {
num_micro_features_) {
return false;
}
- if (fwrite(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) != kNumCNParams) {
+ if (fwrite(cn_feature_.data(), sizeof(cn_feature_[0]), kNumCNParams, fp) != kNumCNParams) {
return false;
}
- if (fwrite(geo_feature_, sizeof(*geo_feature_), GeoCount, fp) != GeoCount) {
+ if (fwrite(geo_feature_.data(), sizeof(geo_feature_[0]), GeoCount, fp) != GeoCount) {
return false;
}
return true;
@@ -145,10 +145,10 @@ bool TrainingSample::DeSerialize(bool swap, FILE *fp) {
num_micro_features_) {
return false;
}
- if (fread(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) != kNumCNParams) {
+ if (fread(cn_feature_.data(), sizeof(cn_feature_[0]), kNumCNParams, fp) != kNumCNParams) {
return false;
}
- if (fread(geo_feature_, sizeof(*geo_feature_), GeoCount, fp) != GeoCount) {
+ if (fread(geo_feature_.data(), sizeof(geo_feature_[0]), GeoCount, fp) != GeoCount) {
return false;
}
return true;
@@ -227,8 +227,8 @@ TrainingSample *TrainingSample::Copy() const {
memcpy(sample->micro_features_, micro_features_,
num_micro_features_ * sizeof(micro_features_[0]));
}
- memcpy(sample->cn_feature_, cn_feature_, sizeof(*cn_feature_) * kNumCNParams);
- memcpy(sample->geo_feature_, geo_feature_, sizeof(*geo_feature_) * GeoCount);
+ memcpy(sample->cn_feature_.data(), cn_feature_.data(), sizeof(cn_feature_[0]) * kNumCNParams);
+ memcpy(sample->geo_feature_.data(), geo_feature_.data(), sizeof(geo_feature_[0]) * GeoCount);
return sample;
}
diff --git a/src/classify/trainingsample.h b/src/classify/trainingsample.h
index 211ab669..12cb0fa8 100644
--- a/src/classify/trainingsample.h
+++ b/src/classify/trainingsample.h
@@ -27,6 +27,8 @@
#include "shapetable.h"
#include "unicharset.h"
+#include <array>
+
struct Pix;
namespace tesseract {
@@ -221,10 +223,10 @@ private:
// Array of features.
MicroFeature *micro_features_;
// The one and only CN feature. Indexed by NORM_PARAM_NAME enum.
- float cn_feature_[kNumCNParams];
+ std::array<float, kNumCNParams> cn_feature_;
// The one and only geometric feature. (Aims at replacing cn_feature_).
// Indexed by GeoParams enum in picofeat.h
- int geo_feature_[GeoCount];
+ std::array<int, GeoCount> geo_feature_;
// Non-serialized cache data.
// Weight used for boosting training.
@@ -249,8 +251,8 @@ private:
bool is_error_;
// Randomizing factors.
- static const int kYShiftValues[kSampleYShiftSize];
- static const double kScaleValues[kSampleScaleSize];
+ static const std::array<int, kSampleYShiftSize> kYShiftValues;
+ static const std::array<double, kSampleScaleSize> kScaleValues;
};
ELISTIZEH(TrainingSample)
diff --git a/src/dict/stopper.cpp b/src/dict/stopper.cpp
index a1885daf..590d6cc7 100644
--- a/src/dict/stopper.cpp
+++ b/src/dict/stopper.cpp
@@ -198,7 +198,7 @@ bool Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, DANGERR *fixpt, bool fix_r
ambig_blob_choices.push_back(lst);
}
}
- UNICHAR_ID wrong_ngram[MAX_AMBIG_SIZE + 1];
+ UNICHAR_ID wrong_ngram[kMaxAmbigSize + 1];
int wrong_ngram_index;
int blob_index = 0;
for (unsigned i = 0; i < best_choice->length(); blob_index += best_choice->state(i), ++i) {
@@ -218,12 +218,12 @@ bool Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, DANGERR *fixpt, bool fix_r
for (spec_it.mark_cycle_pt(); !spec_it.cycled_list();) {
const AmbigSpec *ambig_spec = spec_it.data();
wrong_ngram[wrong_ngram_index + 1] = INVALID_UNICHAR_ID;
- int compare = UnicharIdArrayUtils::compare(wrong_ngram, ambig_spec->wrong_ngram);
+ int compare = UnicharIdArrayUtils::compare(wrong_ngram, ambig_spec->wrong_ngram.data());
if (stopper_debug_level > 2) {
tprintf("candidate ngram: ");
UnicharIdArrayUtils::print(wrong_ngram, getUnicharset());
tprintf("current ngram from spec: ");
- UnicharIdArrayUtils::print(ambig_spec->wrong_ngram, getUnicharset());
+ UnicharIdArrayUtils::print(ambig_spec->wrong_ngram.data(), getUnicharset());
tprintf("comparison result: %d\n", compare);
}
if (compare == 0) {
@@ -244,7 +244,7 @@ bool Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, DANGERR *fixpt, bool fix_r
if (stopper_debug_level > 2) {
tprintf("replace ambiguity with %s : ",
getUnicharset().id_to_unichar(ambig_spec->correct_ngram_id));
- UnicharIdArrayUtils::print(ambig_spec->correct_fragments, getUnicharset());
+ UnicharIdArrayUtils::print(ambig_spec->correct_fragments.data(), getUnicharset());
}
ReplaceAmbig(i, ambig_spec->wrong_ngram_size, ambig_spec->correct_ngram_id, best_choice,
ratings);
@@ -252,7 +252,7 @@ bool Dict::NoDangerousAmbig(WERD_CHOICE *best_choice, DANGERR *fixpt, bool fix_r
// We found dang ambig - update ambig_blob_choices.
if (stopper_debug_level > 2) {
tprintf("found ambiguity: ");
- UnicharIdArrayUtils::print(ambig_spec->correct_fragments, getUnicharset());
+ UnicharIdArrayUtils::print(ambig_spec->correct_fragments.data(), getUnicharset());
}
ambigs_found = true;
for (int tmp_index = 0; tmp_index <= wrong_ngram_index; ++tmp_index) {
diff --git a/src/wordrec/lm_consistency.h b/src/wordrec/lm_consistency.h
index e9ae2fc9..3bd30e8d 100644
--- a/src/wordrec/lm_consistency.h
+++ b/src/wordrec/lm_consistency.h
@@ -20,7 +20,9 @@
#ifndef TESSERACT_WORDREC_LM_CONSISTENCY_H_
#define TESSERACT_WORDREC_LM_CONSISTENCY_H_
+#include <array>
#include <cstdint> // for INT16_MAX
+
#include "dawg.h" // for EDGE_REF, NO_EDGE
#include "dict.h" // for XH_GOOD, XH_INCONSISTENT, XHeightConsi...
@@ -128,10 +130,10 @@ struct LMConsistencyInfo {
int script_id;
int num_inconsistent_spaces;
// Metrics clumped by position.
- float xht_lo[kNumPos];
- float xht_hi[kNumPos];
- int16_t xht_count[kNumPos];
- int16_t xht_count_punc[kNumPos];
+ std::array<float, kNumPos> xht_lo;
+ std::array<float, kNumPos> xht_hi;
+ std::array<int16_t, kNumPos> xht_count;
+ std::array<int16_t, kNumPos> xht_count_punc;
int16_t xht_sp;
int16_t xpos_entropy;
bool invalid_punc;