Commit d3ffeeb1 for tesseract
commit d3ffeeb1904db826e3485ec13c0a8dd7c2bf5b99
Author: Stefan Weil <sw@weilnetz.de>
Date: Sun Jun 21 11:49:31 2026 +0200
Remove more unused function parameters
Signed-off-by: Stefan Weil <sw@weilnetz.de>
diff --git a/src/ccmain/pagesegmain.cpp b/src/ccmain/pagesegmain.cpp
index 7e66b150..835dedf8 100644
--- a/src/ccmain/pagesegmain.cpp
+++ b/src/ccmain/pagesegmain.cpp
@@ -278,7 +278,6 @@ ColumnFinder *Tesseract::SetupPageSegAndDetectOrientation(PageSegMode pageseg_mo
int vertical_y = 1;
TabVector_LIST v_lines;
TabVector_LIST h_lines;
- ICOORD bleft(0, 0);
ASSERT_HOST(pix_binary_ != nullptr);
if (tessedit_dump_pageseg_images) {
diff --git a/src/ccmain/tfacepp.cpp b/src/ccmain/tfacepp.cpp
index bfa1b31e..fde4ff10 100644
--- a/src/ccmain/tfacepp.cpp
+++ b/src/ccmain/tfacepp.cpp
@@ -182,7 +182,6 @@ void Tesseract::split_word(WERD_RES *word, unsigned split_pt, WERD_RES **right_p
delete word2->chopped_word;
word2->chopped_word = nullptr;
- const UNICHARSET &unicharset = *word->uch_set;
word->ClearResults();
word2->ClearResults();
word->chopped_word = chopped;
diff --git a/src/textord/makerow.cpp b/src/textord/makerow.cpp
index f156d059..692c646e 100644
--- a/src/textord/makerow.cpp
+++ b/src/textord/makerow.cpp
@@ -2454,7 +2454,6 @@ OVERLAP_STATE most_overlapping_row( // find best row
float overlap; // of blob & row
float bestover; // nearest row
float merge_top, merge_bottom; // size of merged row
- ICOORD testpt; // testing only
TO_ROW *row; // current row
TO_ROW *test_row; // for multiple overlaps
BLOBNBOX_IT blob_it; // for merging rows
diff --git a/src/textord/strokewidth.cpp b/src/textord/strokewidth.cpp
index bbaf6345..a54ae956 100644
--- a/src/textord/strokewidth.cpp
+++ b/src/textord/strokewidth.cpp
@@ -373,10 +373,9 @@ void StrokeWidth::GradeBlobsIntoPartitions(PageSegMode pageseg_mode, const FCOOR
// Clear and re Insert to take advantage of the removed diacritics.
Clear();
InsertBlobs(block);
- FCOORD skew;
FindTextlineFlowDirection(pageseg_mode, true);
PartitionFindResult r = FindInitialPartitions(pageseg_mode, rerotation, true, block,
- diacritic_blobs, part_grid, big_parts, &skew);
+ diacritic_blobs, part_grid, big_parts);
if (r == PFR_NOISE) {
tprintf("Detected %d diacritics\n", diacritic_blobs->length());
// Noise was found, and removed.
@@ -384,7 +383,7 @@ void StrokeWidth::GradeBlobsIntoPartitions(PageSegMode pageseg_mode, const FCOOR
InsertBlobs(block);
FindTextlineFlowDirection(pageseg_mode, true);
r = FindInitialPartitions(pageseg_mode, rerotation, false, block, diacritic_blobs, part_grid,
- big_parts, &skew);
+ big_parts);
}
nontext_map_ = nullptr;
projection_ = nullptr;
@@ -1273,8 +1272,8 @@ void StrokeWidth::SmoothNeighbourTypes(PageSegMode pageseg_mode, bool reset_all,
// called again after cleaning up the partly done work.
PartitionFindResult StrokeWidth::FindInitialPartitions(
PageSegMode pageseg_mode, const FCOORD &rerotation, bool find_problems, TO_BLOCK *block,
- BLOBNBOX_LIST *diacritic_blobs, ColPartitionGrid *part_grid, ColPartition_LIST *big_parts,
- FCOORD *skew_angle) {
+ BLOBNBOX_LIST *diacritic_blobs, ColPartitionGrid *part_grid,
+ ColPartition_LIST *big_parts) {
if (!FindingHorizontalOnly(pageseg_mode)) {
FindVerticalTextChains(part_grid);
}
@@ -1288,10 +1287,6 @@ PartitionFindResult StrokeWidth::FindInitialPartitions(
projection_->DisplayProjection();
}
#endif
- if (find_problems) {
- // TODO(rays) Do something to find skew, set skew_angle and return if there
- // is some.
- }
part_grid->SplitOverlappingPartitions(big_parts);
EasyMerges(part_grid);
RemoveLargeUnusedBlobs(block, big_parts);
diff --git a/src/textord/strokewidth.h b/src/textord/strokewidth.h
index 2e2ca189..e7b5f43e 100644
--- a/src/textord/strokewidth.h
+++ b/src/textord/strokewidth.h
@@ -211,7 +211,7 @@ private:
bool find_problems, TO_BLOCK *block,
BLOBNBOX_LIST *diacritic_blobs,
ColPartitionGrid *part_grid,
- ColPartition_LIST *big_parts, FCOORD *skew_angle);
+ ColPartition_LIST *big_parts);
// Detects noise by a significant increase in partition overlap from
// pre_overlap to now, and removes noise from the union of all the overlapping
// partitions, placing the blobs in diacritic_blobs. Returns true if any noise
diff --git a/src/textord/topitch.cpp b/src/textord/topitch.cpp
index cf906594..d041ad81 100644
--- a/src/textord/topitch.cpp
+++ b/src/textord/topitch.cpp
@@ -75,7 +75,6 @@ static int sort_floats(const void *arg1, const void *arg2) {
void compute_fixed_pitch(ICOORD page_tr, // top right
TO_BLOCK_LIST *port_blocks, // input list
float gradient, // page skew
- FCOORD rotation, // for drawing
bool testing_on) { // correct orientation
TO_BLOCK_IT block_it; // iterator
TO_BLOCK *block; // current block;
@@ -103,9 +102,7 @@ void compute_fixed_pitch(ICOORD page_tr, // top right
block_index = 1;
for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
block = block_it.data();
- if (!try_block_fixed(block)) {
- try_rows_fixed(block, block_index, testing_on);
- }
+ try_rows_fixed(block, block_index, testing_on);
block_index++;
}
}
@@ -487,16 +484,6 @@ bool try_doc_fixed( // determine pitch
return false;
}
-/**********************************************************************
- * try_block_fixed
- *
- * Try to call the entire block fixed.
- **********************************************************************/
-
-bool try_block_fixed(TO_BLOCK *block) {
- return false;
-}
-
/**********************************************************************
* try_rows_fixed
*
diff --git a/src/textord/topitch.h b/src/textord/topitch.h
index 4ef0e558..204759b6 100644
--- a/src/textord/topitch.h
+++ b/src/textord/topitch.h
@@ -37,7 +37,6 @@ extern double_VAR_H(textord_balance_factor);
void compute_fixed_pitch(ICOORD page_tr, // top right
TO_BLOCK_LIST *port_blocks, // input list
float gradient, // page skew
- FCOORD rotation, // for drawing
bool testing_on); // correct orientation
void fix_row_pitch(TO_ROW *bad_row, // row to fix
TO_BLOCK_LIST *blocks, // blocks to scan
@@ -53,8 +52,6 @@ bool compute_rows_pitch( // find line stats
);
// determine pitch
bool try_doc_fixed(TO_BLOCK_LIST *port_blocks, float gradient);
-// find line stats
-bool try_block_fixed(TO_BLOCK *block);
bool try_rows_fixed( // find line stats
TO_BLOCK *block, // block to do
int32_t block_index, // block number
diff --git a/src/textord/wordseg.cpp b/src/textord/wordseg.cpp
index 897359fa..000175d5 100644
--- a/src/textord/wordseg.cpp
+++ b/src/textord/wordseg.cpp
@@ -107,14 +107,14 @@ void make_words(tesseract::Textord *textord,
if (textord->use_cjk_fp_model()) {
compute_fixed_pitch_cjk(page_tr, port_blocks);
} else {
- compute_fixed_pitch(page_tr, port_blocks, gradient, FCOORD(0.0f, -1.0f),
+ compute_fixed_pitch(page_tr, port_blocks, gradient,
!bool(textord_test_landscape));
}
textord->to_spacing(port_blocks);
block_it.set_to_list(port_blocks);
for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
block = block_it.data();
- make_real_words(textord, block, FCOORD(1.0f, 0.0f));
+ make_real_words(textord, block);
}
}
@@ -468,8 +468,7 @@ int32_t row_words2( // compute space size
*/
void make_real_words(tesseract::Textord *textord,
- TO_BLOCK *block, // block to do
- FCOORD rotation // for drawing
+ TO_BLOCK *block // block to do
) {
TO_ROW *row; // current row
TO_ROW_IT row_it = block->get_rows();
diff --git a/src/textord/wordseg.h b/src/textord/wordseg.h
index 27b401dc..71712b98 100644
--- a/src/textord/wordseg.h
+++ b/src/textord/wordseg.h
@@ -51,8 +51,7 @@ int32_t row_words2( // compute space size
bool testing_on // for debug
);
void make_real_words(tesseract::Textord *textord,
- TO_BLOCK *block, // block to do
- FCOORD rotation // for drawing
+ TO_BLOCK *block // block to do
);
ROW *make_rep_words( // make a row
TO_ROW *row, // row to convert
diff --git a/src/training/classifier_tester.cpp b/src/training/classifier_tester.cpp
index 6398dedf..02a707fd 100644
--- a/src/training/classifier_tester.cpp
+++ b/src/training/classifier_tester.cpp
@@ -36,8 +36,7 @@ enum ClassifierName { CN_PRUNER, CN_FULL, CN_COUNT };
static const char *names[] = {"pruner", "full"};
static tesseract::ShapeClassifier *InitializeClassifier(const char *classifier_name,
- const UNICHARSET &unicharset, int argc,
- char **argv, tesseract::TessBaseAPI **api) {
+ tesseract::TessBaseAPI **api) {
// Decode the classifier string.
ClassifierName classifier = CN_COUNT;
for (int c = 0; c < CN_COUNT; ++c) {
@@ -106,7 +105,7 @@ int main(int argc, char **argv) {
tesseract::TessBaseAPI *api;
// Decode the classifier string.
tesseract::ShapeClassifier *shape_classifier =
- InitializeClassifier(FLAGS_classifier.c_str(), trainer->unicharset(), argc, argv, &api);
+ InitializeClassifier(FLAGS_classifier.c_str(), &api);
if (shape_classifier == nullptr) {
fprintf(stderr, "Classifier init failed!:%s\n", FLAGS_classifier.c_str());
return EXIT_FAILURE;
diff --git a/src/training/common/errorcounter.cpp b/src/training/common/errorcounter.cpp
index 5fb83cca..c3dbf070 100644
--- a/src/training/common/errorcounter.cpp
+++ b/src/training/common/errorcounter.cpp
@@ -90,7 +90,7 @@ double ErrorCounter::ComputeErrorRate(ShapeClassifier *classifier, int report_le
++total_samples;
}
// Create the appropriate error report.
- unscaled_error = counter.ReportErrors(report_level, boosting_mode, fontinfo_table, *it,
+ unscaled_error = counter.ReportErrors(report_level, boosting_mode, fontinfo_table,
unichar_error, fonts_report);
if (scaled_error != nullptr) {
*scaled_error = counter.scaled_error_;
@@ -356,7 +356,7 @@ bool ErrorCounter::AccumulateJunk(bool debug, const std::vector<UnicharRating> &
// If not nullptr, the report string is saved in fonts_report.
// (Ignoring report_level).
double ErrorCounter::ReportErrors(int report_level, CountTypes boosting_mode,
- const FontInfoTable &fontinfo_table, const SampleIterator &it,
+ const FontInfoTable &fontinfo_table,
double *unichar_error, std::string *fonts_report) {
// Compute totals over all the fonts and report individual font results
// when required.
diff --git a/src/training/common/errorcounter.h b/src/training/common/errorcounter.h
index c20c222c..a31a2b5f 100644
--- a/src/training/common/errorcounter.h
+++ b/src/training/common/errorcounter.h
@@ -176,7 +176,7 @@ private:
// unichar_error. If not nullptr, the report string is saved in fonts_report.
// (Ignoring report_level).
double ReportErrors(int report_level, CountTypes boosting_mode,
- const FontInfoTable &fontinfo_table, const SampleIterator &it,
+ const FontInfoTable &fontinfo_table,
double *unichar_error, std::string *fonts_report);
// Sets the report string to a combined human and machine-readable report
diff --git a/src/training/common/intfeaturemap.cpp b/src/training/common/intfeaturemap.cpp
index fbbc84c3..4ae209f0 100644
--- a/src/training/common/intfeaturemap.cpp
+++ b/src/training/common/intfeaturemap.cpp
@@ -144,7 +144,7 @@ int IntFeatureMap::FindNZFeatureMapping(SampleIterator *it) {
feature_map_.Setup();
compact_size_ = feature_map_.CompactSize();
mapping_changed_ = true;
- FinalizeMapping(it);
+ FinalizeMapping();
tprintf("%d non-zero features found in %d samples\n", compact_size_, total_samples);
return compact_size_;
}
@@ -152,7 +152,7 @@ int IntFeatureMap::FindNZFeatureMapping(SampleIterator *it) {
// After deleting some features, finish setting up the mapping, and map
// all the samples. Returns the size of the compacted feature space.
-int IntFeatureMap::FinalizeMapping(SampleIterator *it) {
+int IntFeatureMap::FinalizeMapping() {
if (mapping_changed_) {
feature_map_.CompleteMerges();
compact_size_ = feature_map_.CompactSize();
diff --git a/src/training/common/intfeaturemap.h b/src/training/common/intfeaturemap.h
index 330d5c2b..75323745 100644
--- a/src/training/common/intfeaturemap.h
+++ b/src/training/common/intfeaturemap.h
@@ -98,7 +98,7 @@ public:
// After deleting some features, finish setting up the mapping, and map
// all the samples. Returns the size of the compacted feature space.
- int FinalizeMapping(SampleIterator *it);
+ int FinalizeMapping();
// Indexes the given array of features to a vector of sorted indices.
void IndexAndSortFeatures(const INT_FEATURE_STRUCT *features, int num_features,
diff --git a/src/training/common/trainingsampleset.cpp b/src/training/common/trainingsampleset.cpp
index 4f17212f..d05459b0 100644
--- a/src/training/common/trainingsampleset.cpp
+++ b/src/training/common/trainingsampleset.cpp
@@ -405,8 +405,8 @@ float TrainingSampleSet::ClusterDistance(int font_id1, int class_id1, int font_i
float TrainingSampleSet::ComputeClusterDistance(int font_id1, int class_id1, int font_id2,
int class_id2,
const IntFeatureMap &feature_map) const {
- int dist = ReliablySeparable(font_id1, class_id1, font_id2, class_id2, feature_map, false);
- dist += ReliablySeparable(font_id2, class_id2, font_id1, class_id1, feature_map, false);
+ int dist = ReliablySeparable(font_id1, class_id1, font_id2, class_id2, feature_map);
+ dist += ReliablySeparable(font_id2, class_id2, font_id1, class_id1, feature_map);
int denominator = GetCanonicalFeatures(font_id1, class_id1).size();
denominator += GetCanonicalFeatures(font_id2, class_id2).size();
return static_cast<float>(dist) / denominator;
@@ -449,7 +449,7 @@ static void AddNearFeatures(const IntFeatureMap &feature_map, int f, int levels,
// ComputeCanonicalFeatures and ComputeCloudFeatures must have been called
// first, or the results will be nonsense.
int TrainingSampleSet::ReliablySeparable(int font_id1, int class_id1, int font_id2, int class_id2,
- const IntFeatureMap &feature_map, bool thorough) const {
+ const IntFeatureMap &feature_map) const {
int result = 0;
const TrainingSample *sample2 = GetCanonicalSample(font_id2, class_id2);
if (sample2 == nullptr) {
diff --git a/src/training/common/trainingsampleset.h b/src/training/common/trainingsampleset.h
index 0329f68e..03d95656 100644
--- a/src/training/common/trainingsampleset.h
+++ b/src/training/common/trainingsampleset.h
@@ -136,7 +136,7 @@ public:
// ComputeCanonicalFeatures and ComputeCloudFeatures must have been called
// first, or the results will be nonsense.
int ReliablySeparable(int font_id1, int class_id1, int font_id2, int class_id2,
- const IntFeatureMap &feature_map, bool thorough) const;
+ const IntFeatureMap &feature_map) const;
// Returns the total index of the requested sample.
// OrganizeByFontAndClass must have been already called.
diff --git a/src/training/dawg2wordlist.cpp b/src/training/dawg2wordlist.cpp
index e1b9cb31..81586411 100644
--- a/src/training/dawg2wordlist.cpp
+++ b/src/training/dawg2wordlist.cpp
@@ -25,7 +25,7 @@
using namespace tesseract;
-static std::unique_ptr<tesseract::Dawg> LoadSquishedDawg(const UNICHARSET &unicharset, const char *filename) {
+static std::unique_ptr<tesseract::Dawg> LoadSquishedDawg(const char *filename) {
const int kDictDebugLevel = 1;
tesseract::TFile dawg_file;
if (!dawg_file.Open(filename, nullptr)) {
@@ -90,7 +90,7 @@ int main(int argc, char *argv[]) {
tprintf("Error loading unicharset from %s.\n", unicharset_file);
return EXIT_FAILURE;
}
- auto dict = LoadSquishedDawg(unicharset, dawg_file);
+ auto dict = LoadSquishedDawg(dawg_file);
if (dict == nullptr) {
tprintf("Error loading dictionary from %s.\n", dawg_file);
return EXIT_FAILURE;
diff --git a/src/training/lstmeval.cpp b/src/training/lstmeval.cpp
index 89402a07..dc43149b 100644
--- a/src/training/lstmeval.cpp
+++ b/src/training/lstmeval.cpp
@@ -63,8 +63,7 @@ int main(int argc, char **argv) {
tprintf("Failed to load eval data from: %s\n", FLAGS_eval_listfile.c_str());
return EXIT_FAILURE;
}
- double errs = 0.0;
- std::string result = tester.RunEvalSync(0, &errs, mgr,
+ std::string result = tester.RunEvalSync(0, mgr,
/*training_stage (irrelevant)*/ 0, FLAGS_verbosity);
tprintf("%s\n", result.c_str());
return EXIT_SUCCESS;
diff --git a/src/training/mftraining.cpp b/src/training/mftraining.cpp
index c274d0af..030160cb 100644
--- a/src/training/mftraining.cpp
+++ b/src/training/mftraining.cpp
@@ -53,7 +53,7 @@ using namespace tesseract;
Public Code
-----------------------------------------------------------------------------*/
#ifndef GRAPHICS_DISABLED
-static void DisplayProtoList(const char *ch, LIST protolist) {
+static void DisplayProtoList(LIST protolist) {
auto window = std::make_unique<ScrollView>("Char samples", 50, 200, 520, 520, 260, 260, true);
LIST proto = protolist;
iterate(proto) {
@@ -101,7 +101,7 @@ static LIST ClusterOneConfig(int shape_id, const char *class_label, LIST mf_clas
MergeInsignificantProtos(proto_list, class_label, clusterer, &Config);
#ifndef GRAPHICS_DISABLED
if (strcmp(FLAGS_test_ch.c_str(), class_label) == 0) {
- DisplayProtoList(FLAGS_test_ch.c_str(), proto_list);
+ DisplayProtoList(proto_list);
}
#endif // !GRAPHICS_DISABLED
// Delete the protos that will not be used in the inttemp output file.
diff --git a/src/training/unicharset/lstmtester.cpp b/src/training/unicharset/lstmtester.cpp
index 052460cf..33a87a24 100644
--- a/src/training/unicharset/lstmtester.cpp
+++ b/src/training/unicharset/lstmtester.cpp
@@ -77,7 +77,7 @@ std::string LSTMTester::RunEvalAsync(int iteration, const double *training_error
// Runs an evaluation synchronously on the stored data and returns a string
// describing the results.
-std::string LSTMTester::RunEvalSync(int iteration, const double *training_errors,
+std::string LSTMTester::RunEvalSync(int iteration,
const TessdataManager &model_mgr, int training_stage,
int verbosity) {
LSTMTrainer trainer;
@@ -132,7 +132,7 @@ std::string LSTMTester::RunEvalSync(int iteration, const double *training_errors
// it will call UnlockRunning to release the lock after RunEvalSync completes.
void LSTMTester::ThreadFunc() {
test_result_ =
- RunEvalSync(test_iteration_, test_training_errors_, test_model_mgr_, test_training_stage_,
+ RunEvalSync(test_iteration_, test_model_mgr_, test_training_stage_,
/*verbosity*/ 0);
UnlockRunning();
}
diff --git a/src/training/unicharset/lstmtester.h b/src/training/unicharset/lstmtester.h
index b0e3a1cf..e0987538 100644
--- a/src/training/unicharset/lstmtester.h
+++ b/src/training/unicharset/lstmtester.h
@@ -59,7 +59,7 @@ public:
// Runs an evaluation synchronously on the stored eval data and returns a
// string describing the results. Args as RunEvalAsync, except verbosity,
// which outputs errors, if 1, or all results if 2.
- std::string RunEvalSync(int iteration, const double *training_errors, const TessdataManager &model_mgr,
+ std::string RunEvalSync(int iteration, const TessdataManager &model_mgr,
int training_stage, int verbosity);
private:
diff --git a/src/training/unicharset/lstmtrainer.cpp b/src/training/unicharset/lstmtrainer.cpp
index 77105817..d3a97816 100644
--- a/src/training/unicharset/lstmtrainer.cpp
+++ b/src/training/unicharset/lstmtrainer.cpp
@@ -933,7 +933,7 @@ Trainability LSTMTrainer::PrepareForBackward(const ImageData *trainingdata,
targets->Resize(*fwd_outputs, network_->NumOutputs());
LossType loss_type = OutputLossType();
if (loss_type == LT_SOFTMAX) {
- if (!ComputeTextTargets(*fwd_outputs, truth_labels, targets)) {
+ if (!ComputeTextTargets(truth_labels, targets)) {
tprintf("Compute simple targets failed for %s!\n",
trainingdata->imagefilename().c_str());
return UNENCODABLE;
@@ -955,8 +955,7 @@ Trainability LSTMTrainer::PrepareForBackward(const ImageData *trainingdata,
if (loss_type != LT_CTC) {
LabelsFromOutputs(*targets, &truth_labels, &xcoords);
}
- if (!DebugLSTMTraining(inputs, *trainingdata, *fwd_outputs, truth_labels,
- *targets)) {
+ if (!DebugLSTMTraining(inputs, *fwd_outputs, truth_labels, *targets)) {
tprintf("Input width was %d\n", inputs.Width());
return UNENCODABLE;
}
@@ -1131,7 +1130,6 @@ void LSTMTrainer::EmptyConstructor() {
// corresponding x_starts.
// Returns false if the truth string is empty.
bool LSTMTrainer::DebugLSTMTraining(const NetworkIO &inputs,
- const ImageData &trainingdata,
const NetworkIO &fwd_outputs,
const std::vector<int> &truth_labels,
const NetworkIO &outputs) {
@@ -1208,8 +1206,7 @@ void LSTMTrainer::DisplayTargets(const NetworkIO &targets,
// Builds a no-compromises target where the first positions should be the
// truth labels and the rest is padded with the null_char_.
-bool LSTMTrainer::ComputeTextTargets(const NetworkIO &outputs,
- const std::vector<int> &truth_labels,
+bool LSTMTrainer::ComputeTextTargets(const std::vector<int> &truth_labels,
NetworkIO *targets) {
if (truth_labels.size() > targets->Width()) {
tprintf("Error: transcription %s too long to fit into target of width %d\n",
diff --git a/src/training/unicharset/lstmtrainer.h b/src/training/unicharset/lstmtrainer.h
index e10514c6..25bf8990 100644
--- a/src/training/unicharset/lstmtrainer.h
+++ b/src/training/unicharset/lstmtrainer.h
@@ -343,7 +343,7 @@ protected:
// as an image in the given window, and the corresponding labels at the
// corresponding x_starts.
// Returns false if the truth string is empty.
- bool DebugLSTMTraining(const NetworkIO &inputs, const ImageData &trainingdata,
+ bool DebugLSTMTraining(const NetworkIO &inputs,
const NetworkIO &fwd_outputs,
const std::vector<int> &truth_labels,
const NetworkIO &outputs);
@@ -353,8 +353,7 @@ protected:
// Builds a no-compromises target where the first positions should be the
// truth labels and the rest is padded with the null_char_.
- bool ComputeTextTargets(const NetworkIO &outputs,
- const std::vector<int> &truth_labels,
+ bool ComputeTextTargets(const std::vector<int> &truth_labels,
NetworkIO *targets);
// Builds a target using standard CTC. truth_labels should be pre-padded with
diff --git a/src/wordrec/language_model.cpp b/src/wordrec/language_model.cpp
index 5a6c4d44..d4c8c62b 100644
--- a/src/wordrec/language_model.cpp
+++ b/src/wordrec/language_model.cpp
@@ -248,7 +248,7 @@ static bool HasBetterCaseVariant(const UNICHARSET &unicharset, const BLOB_CHOICE
*/
bool LanguageModel::UpdateState(bool just_classified, int curr_col, int curr_row,
BLOB_CHOICE_LIST *curr_list, LanguageModelState *parent_node,
- LMPainPoints *pain_points, WERD_RES *word_res,
+ WERD_RES *word_res,
BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle) {
if (language_model_debug_level > 0) {
tprintf("\nUpdateState: col=%d row=%d %s", curr_col, curr_row,
@@ -337,7 +337,7 @@ bool LanguageModel::UpdateState(bool just_classified, int curr_col, int curr_row
blob_choice_flags |= kLowerCaseFlag;
}
new_changed |= AddViterbiStateEntry(blob_choice_flags, denom, word_end, curr_col, curr_row,
- choice, curr_state, nullptr, pain_points, word_res,
+ choice, curr_state, nullptr, word_res,
best_choice_bundle, blamer_bundle);
} else {
// Get viterbi entries from each parent ViterbiStateEntry.
@@ -367,7 +367,7 @@ bool LanguageModel::UpdateState(bool just_classified, int curr_col, int curr_row
// Create a new ViterbiStateEntry if BLOB_CHOICE in c_it.data()
// looks good according to the Dawgs or character ngram model.
new_changed |= AddViterbiStateEntry(top_choice_flags, denom, word_end, curr_col, curr_row,
- c_it.data(), curr_state, parent_vse, pain_points,
+ c_it.data(), curr_state, parent_vse,
word_res, best_choice_bundle, blamer_bundle);
}
}
@@ -577,7 +577,7 @@ ViterbiStateEntry *LanguageModel::GetNextParentVSE(bool just_classified, bool mi
bool LanguageModel::AddViterbiStateEntry(LanguageModelFlagsType top_choice_flags, float denom,
bool word_end, int curr_col, int curr_row, BLOB_CHOICE *b,
LanguageModelState *curr_state,
- ViterbiStateEntry *parent_vse, LMPainPoints *pain_points,
+ ViterbiStateEntry *parent_vse,
WERD_RES *word_res, BestChoiceBundle *best_choice_bundle,
BlamerBundle *blamer_bundle) {
ViterbiStateEntry_IT vit;
@@ -603,7 +603,7 @@ bool LanguageModel::AddViterbiStateEntry(LanguageModelFlagsType top_choice_flags
}
// Invoke Dawg language model component.
- LanguageModelDawgInfo *dawg_info = GenerateDawgInfo(word_end, curr_col, curr_row, *b, parent_vse);
+ LanguageModelDawgInfo *dawg_info = GenerateDawgInfo(word_end, curr_col, *b, parent_vse);
float outline_length = AssociateUtils::ComputeOutlineLength(rating_cert_scale_, *b);
// Invoke Ngram language model component.
@@ -611,7 +611,7 @@ bool LanguageModel::AddViterbiStateEntry(LanguageModelFlagsType top_choice_flags
if (language_model_ngram_on) {
ngram_info =
GenerateNgramInfo(dict_->getUnicharset().id_to_unichar(b->unichar_id()), b->certainty(),
- denom, curr_col, curr_row, outline_length, parent_vse);
+ denom, outline_length, parent_vse);
ASSERT_HOST(ngram_info != nullptr);
}
bool liked_by_language_model =
@@ -679,7 +679,7 @@ bool LanguageModel::AddViterbiStateEntry(LanguageModelFlagsType top_choice_flags
// Invoke Top Choice language model component to make the final adjustments
// to new_vse->top_choice_flags.
if (!curr_state->viterbi_state_entries.empty() && new_vse->top_choice_flags) {
- GenerateTopChoiceInfo(new_vse, parent_vse, curr_state);
+ GenerateTopChoiceInfo(new_vse, curr_state);
}
// If language model components did not like this unichar - return.
@@ -713,7 +713,7 @@ bool LanguageModel::AddViterbiStateEntry(LanguageModelFlagsType top_choice_flags
// Update best choice if needed.
if (word_end) {
- UpdateBestChoice(new_vse, pain_points, word_res, best_choice_bundle, blamer_bundle);
+ UpdateBestChoice(new_vse, word_res, best_choice_bundle, blamer_bundle);
// Discard the entry if UpdateBestChoice() found flaws in it.
if (new_vse->cost >= WERD_CHOICE::kBadRating && new_vse != best_choice_bundle->best_vse) {
if (language_model_debug_level > 1) {
@@ -774,7 +774,6 @@ bool LanguageModel::AddViterbiStateEntry(LanguageModelFlagsType top_choice_flags
}
void LanguageModel::GenerateTopChoiceInfo(ViterbiStateEntry *new_vse,
- const ViterbiStateEntry *parent_vse,
LanguageModelState *lms) {
ViterbiStateEntry_IT vit(&(lms->viterbi_state_entries));
for (vit.mark_cycle_pt();
@@ -789,7 +788,7 @@ void LanguageModel::GenerateTopChoiceInfo(ViterbiStateEntry *new_vse,
}
}
-LanguageModelDawgInfo *LanguageModel::GenerateDawgInfo(bool word_end, int curr_col, int curr_row,
+LanguageModelDawgInfo *LanguageModel::GenerateDawgInfo(bool word_end, int curr_col,
const BLOB_CHOICE &b,
const ViterbiStateEntry *parent_vse) {
// Initialize active_dawgs from parent_vse if it is not nullptr.
@@ -886,7 +885,7 @@ LanguageModelDawgInfo *LanguageModel::GenerateDawgInfo(bool word_end, int curr_c
}
LanguageModelNgramInfo *LanguageModel::GenerateNgramInfo(const char *unichar, float certainty,
- float denom, int curr_col, int curr_row,
+ float denom,
float outline_length,
const ViterbiStateEntry *parent_vse) {
// Initialize parent context.
@@ -1233,7 +1232,7 @@ float LanguageModel::ComputeAdjustedPathCost(ViterbiStateEntry *vse) {
}
}
-void LanguageModel::UpdateBestChoice(ViterbiStateEntry *vse, LMPainPoints *pain_points,
+void LanguageModel::UpdateBestChoice(ViterbiStateEntry *vse,
WERD_RES *word_res, BestChoiceBundle *best_choice_bundle,
BlamerBundle *blamer_bundle) {
bool truth_path;
diff --git a/src/wordrec/language_model.h b/src/wordrec/language_model.h
index 98f90f74..24e4bbb7 100644
--- a/src/wordrec/language_model.h
+++ b/src/wordrec/language_model.h
@@ -77,8 +77,7 @@ public:
float rating_cert_scale);
// Updates language model state of the given BLOB_CHOICE_LIST (from
- // the ratings matrix) and its parent. Updates pain_points if new
- // problematic points are found in the segmentation graph.
+ // the ratings matrix) and its parent.
//
// At most language_model_viterbi_list_size are kept in each
// LanguageModelState.viterbi_state_entries list.
@@ -89,7 +88,7 @@ public:
// The list ordered by cost that is computed collectively by several
// language model components (currently dawg and ngram components).
bool UpdateState(bool just_classified, int curr_col, int curr_row, BLOB_CHOICE_LIST *curr_list,
- LanguageModelState *parent_node, LMPainPoints *pain_points, WERD_RES *word_res,
+ LanguageModelState *parent_node, WERD_RES *word_res,
BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle);
// Returns true if an acceptable best choice was discovered.
@@ -182,7 +181,7 @@ protected:
bool AddViterbiStateEntry(LanguageModelFlagsType top_choice_flags, float denom, bool word_end,
int curr_col, int curr_row, BLOB_CHOICE *b,
LanguageModelState *curr_state, ViterbiStateEntry *parent_vse,
- LMPainPoints *pain_points, WERD_RES *word_res,
+ WERD_RES *word_res,
BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle);
// Determines whether a potential entry is a true top choice and
@@ -190,7 +189,7 @@ protected:
//
// Note: The function assumes that b, top_choice_flags and changed
// are not nullptr.
- void GenerateTopChoiceInfo(ViterbiStateEntry *new_vse, const ViterbiStateEntry *parent_vse,
+ void GenerateTopChoiceInfo(ViterbiStateEntry *new_vse,
LanguageModelState *lms);
// Calls dict_->LetterIsOk() with DawgArgs initialized from parent_vse and
@@ -198,8 +197,7 @@ protected:
// with updated active dawgs, constraints and permuter.
//
// Note: the caller is responsible for deleting the returned pointer.
- LanguageModelDawgInfo *GenerateDawgInfo(bool word_end, int curr_col, int curr_row,
- const BLOB_CHOICE &b,
+ LanguageModelDawgInfo *GenerateDawgInfo(bool word_end, int curr_col, const BLOB_CHOICE &b,
const ViterbiStateEntry *parent_vse);
// Computes p(unichar | parent context) and records it in ngram_cost.
@@ -210,7 +208,7 @@ protected:
//
// Note: the caller is responsible for deleting the returned pointer.
LanguageModelNgramInfo *GenerateNgramInfo(const char *unichar, float certainty, float denom,
- int curr_col, int curr_row, float outline_length,
+ float outline_length,
const ViterbiStateEntry *parent_vse);
// Computes -(log(prob(classifier)) + log(prob(ngram model)))
@@ -240,8 +238,8 @@ protected:
// constructed WERD_CHOICE is better than the best/raw choice recorded
// in the best_choice_bundle, this function updates the corresponding
// fields and sets best_choice_bunldle->updated to true.
- void UpdateBestChoice(ViterbiStateEntry *vse, LMPainPoints *pain_points, WERD_RES *word_res,
- BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle);
+ void UpdateBestChoice(ViterbiStateEntry *vse, WERD_RES *word_res, BestChoiceBundle *best_choice_bundle,
+ BlamerBundle *blamer_bundle);
// Constructs a WERD_CHOICE by tracing parent pointers starting with
// the given LanguageModelStateEntry. Returns the constructed word.
diff --git a/src/wordrec/lm_pain_points.h b/src/wordrec/lm_pain_points.h
index 77a861ac..40aef5ab 100644
--- a/src/wordrec/lm_pain_points.h
+++ b/src/wordrec/lm_pain_points.h
@@ -64,11 +64,10 @@ public:
return LMPainPointsTypeName[type];
}
- LMPainPoints(int max, float rat, bool fp, const Dict *d, int deb)
+ LMPainPoints(int max, float rat, bool fp, int deb)
: max_heap_size_(max)
, max_char_wh_ratio_(rat)
, fixed_pitch_(fp)
- , dict_(d)
, debug_level_(deb) {}
~LMPainPoints() = default;
@@ -122,8 +121,6 @@ private:
float max_char_wh_ratio_;
// Set to true if fixed pitch should be assumed.
bool fixed_pitch_;
- // Cached pointer to dictionary.
- const Dict *dict_;
// Debug level for print statements.
int debug_level_;
};
diff --git a/src/wordrec/segsearch.cpp b/src/wordrec/segsearch.cpp
index 2735c6dd..5669a323 100644
--- a/src/wordrec/segsearch.cpp
+++ b/src/wordrec/segsearch.cpp
@@ -33,7 +33,7 @@ namespace tesseract {
void Wordrec::SegSearch(WERD_RES *word_res, BestChoiceBundle *best_choice_bundle,
BlamerBundle *blamer_bundle) {
LMPainPoints pain_points(segsearch_max_pain_points, segsearch_max_char_wh_ratio,
- assume_fixed_pitch_char_segment, &getDict(), segsearch_debug_level);
+ assume_fixed_pitch_char_segment, segsearch_debug_level);
// Compute scaling factor that will help us recover blob outline length
// from classifier rating and certainty for the blob.
float rating_cert_scale = -1.0 * getDict().certainty_scale / rating_scale;
@@ -186,7 +186,7 @@ void Wordrec::UpdateSegSearchNodes(float rating_cert_scale, int starting_col,
LanguageModelState *parent_node = col == 0 ? nullptr : best_choice_bundle->beam[col - 1];
if (current_node != nullptr &&
language_model_->UpdateState((*pending)[col].IsRowJustClassified(row), col, row,
- current_node, parent_node, pain_points, word_res,
+ current_node, parent_node, word_res,
best_choice_bundle, blamer_bundle) &&
row + 1 < ratings->dimension()) {
// Since the language model state of this entry changed, process all
diff --git a/src/wordrec/tface.cpp b/src/wordrec/tface.cpp
index 107085d8..c058d4ab 100644
--- a/src/wordrec/tface.cpp
+++ b/src/wordrec/tface.cpp
@@ -59,7 +59,7 @@ void Wordrec::program_editup(const std::string &textbase, TessdataManager *init_
* Cleanup and exit the recog program.
*/
int Wordrec::end_recog() {
- program_editdown(0);
+ program_editdown();
return (0);
}
@@ -70,7 +70,7 @@ int Wordrec::end_recog() {
* This function holds any necessary post processing for the Wise Owl
* program.
*/
-void Wordrec::program_editdown(int32_t elapsed_time) {
+void Wordrec::program_editdown() {
#ifndef DISABLED_LEGACY_ENGINE
EndAdaptiveClassifier();
#endif // ndef DISABLED_LEGACY_ENGINE
diff --git a/src/wordrec/wordrec.h b/src/wordrec/wordrec.h
index f57d6fe9..a50ad871 100644
--- a/src/wordrec/wordrec.h
+++ b/src/wordrec/wordrec.h
@@ -52,7 +52,7 @@ public:
// tface.cpp
void program_editup(const std::string &textbase, TessdataManager *init_classifier,
TessdataManager *init_dict);
- void program_editdown(int32_t elapsed_time);
+ void program_editdown();
int end_recog();
int dict_word(const WERD_CHOICE &word);
@@ -246,7 +246,7 @@ public:
void program_editup(const std::string &textbase, TessdataManager *init_classifier,
TessdataManager *init_dict);
void cc_recog(WERD_RES *word);
- void program_editdown(int32_t elapsed_time);
+ void program_editdown();
void set_pass1();
void set_pass2();
int end_recog();
diff --git a/unittest/intfeaturemap_test.cc b/unittest/intfeaturemap_test.cc
index b422862f..228628ca 100644
--- a/unittest/intfeaturemap_test.cc
+++ b/unittest/intfeaturemap_test.cc
@@ -106,7 +106,7 @@ TEST_F(IntFeatureMapTest, Exhaustive) {
// test again.
map.DeleteMapFeature(0);
map.DeleteMapFeature(total_buckets - 1);
- map.FinalizeMapping(nullptr);
+ map.FinalizeMapping();
map.IndexAndSortFeatures(features.get(), total_size, &index_features);
// Has no effect on index features.
EXPECT_EQ(total_size, index_features.size());