Commit 3796e8ad for tesseract
commit 3796e8adc0d1541c9083d64555819d4f898e9327
Author: Stefan Weil <sw@weilnetz.de>
Date: Thu Jun 4 15:18:11 2026 +0200
Replace `const char *` by `std::string_view` in arguments
Assisted-by: OpenCode / BigPickle
Signed-off-by: Stefan Weil <sw@weilnetz.de>
diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp
index 89e2f669..8dd4ff6f 100644
--- a/src/api/baseapi.cpp
+++ b/src/api/baseapi.cpp
@@ -71,6 +71,7 @@
#include <memory> // for std::unique_ptr
#include <set> // for std::pair
#include <sstream> // for std::stringstream
+#include <string_view>
#include <vector> // for std::vector
#include <allheaders.h> // for pixDestroy, boxCreate, boxaAddBox, box...
@@ -125,18 +126,21 @@ static STRING_VAR(classify_font_name, kUnknownFontName,
// /path/to/dir/[lang].[fontname].exp[num]
// The [lang], [fontname] and [num] fields should not have '.' characters.
// If the global parameter classify_font_name is set, its value is used instead.
-static void ExtractFontName(const char* filename, std::string* fontname) {
+static void ExtractFontName(std::string_view filename, std::string* fontname) {
*fontname = classify_font_name;
if (*fontname == kUnknownFontName) {
// filename is expected to be of the form [lang].[fontname].exp[num]
// The [lang], [fontname] and [num] fields should not have '.' characters.
- const char *basename = strrchr(filename, '/');
- const char *firstdot = strchr(basename ? basename : filename, '.');
- const char *lastdot = strrchr(filename, '.');
- if (firstdot != lastdot && firstdot != nullptr && lastdot != nullptr) {
+ auto basename_pos = filename.find_last_of('/');
+ auto view = (basename_pos != std::string_view::npos)
+ ? filename.substr(basename_pos + 1)
+ : filename;
+ auto firstdot = view.find_first_of('.');
+ auto lastdot = view.find_last_of('.');
+ if (firstdot != lastdot && firstdot != std::string_view::npos &&
+ lastdot != std::string_view::npos) {
++firstdot;
- *fontname = firstdot;
- fontname->resize(lastdot - firstdot);
+ *fontname = view.substr(firstdot, lastdot - firstdot);
}
}
}
@@ -813,7 +817,7 @@ int TessBaseAPI::Recognize(ETEXT_DESC *monitor) {
#ifndef DISABLED_LEGACY_ENGINE
} else if (tesseract_->tessedit_train_from_boxes) {
std::string fontname;
- ExtractFontName(output_file_.c_str(), &fontname);
+ ExtractFontName(output_file_, &fontname);
tesseract_->ApplyBoxTraining(fontname, page_res_);
} else if (tesseract_->tessedit_ambigs_training) {
FILE *training_output_file = tesseract_->init_recog_training(input_file_.c_str());
diff --git a/src/ccstruct/pageres.cpp b/src/ccstruct/pageres.cpp
index 1bc5e163..4b52e76a 100644
--- a/src/ccstruct/pageres.cpp
+++ b/src/ccstruct/pageres.cpp
@@ -40,6 +40,7 @@
#include <cassert> // for assert
#include <cstdint> // for INT32_MAX
#include <cstring> // for strlen
+#include <string_view> // for std::string_view
struct Pix;
@@ -1021,14 +1022,19 @@ void WERD_RES::MergeAdjacentBlobs(unsigned index) {
// Utility function for fix_quotes
// Return true if the next character in the string (given the UTF8 length in
// bytes) is a quote character.
-static int is_simple_quote(const char *signed_str, int length) {
- const auto *str = reinterpret_cast<const unsigned char *>(signed_str);
+static int is_simple_quote(std::string_view str) {
// Standard 1 byte quotes.
- return (length == 1 && (*str == '\'' || *str == '`')) ||
- // UTF-8 3 bytes curved quotes.
- (length == 3 &&
- ((*str == 0xe2 && *(str + 1) == 0x80 && *(str + 2) == 0x98) ||
- (*str == 0xe2 && *(str + 1) == 0x80 && *(str + 2) == 0x99)));
+ if (str.size() == 1 && (str[0] == '\'' || str[0] == '`')) {
+ return true;
+ }
+ // UTF-8 3 bytes curved quotes.
+ if (str.size() == 3 && static_cast<unsigned char>(str[0]) == 0xe2 &&
+ static_cast<unsigned char>(str[1]) == 0x80 &&
+ (static_cast<unsigned char>(str[2]) == 0x98 ||
+ static_cast<unsigned char>(str[2]) == 0x99)) {
+ return true;
+ }
+ return false;
}
// Callback helper for fix_quotes returns a double quote if both
@@ -1036,8 +1042,7 @@ static int is_simple_quote(const char *signed_str, int length) {
UNICHAR_ID WERD_RES::BothQuotes(UNICHAR_ID id1, UNICHAR_ID id2) {
const char *ch = uch_set->id_to_unichar(id1);
const char *next_ch = uch_set->id_to_unichar(id2);
- if (is_simple_quote(ch, strlen(ch)) &&
- is_simple_quote(next_ch, strlen(next_ch))) {
+ if (is_simple_quote(ch) && is_simple_quote(next_ch)) {
return uch_set->unichar_to_id("\"");
}
return INVALID_UNICHAR_ID;
diff --git a/src/training/common/commandlineflags.cpp b/src/training/common/commandlineflags.cpp
index 8ecd1cb3..de389293 100644
--- a/src/training/common/commandlineflags.cpp
+++ b/src/training/common/commandlineflags.cpp
@@ -13,12 +13,13 @@
#include <cmath> // for std::isnan, NAN
#include <locale> // for std::locale::classic
#include <sstream> // for std::stringstream
+#include <string_view> // for std::string_view
#include <vector> // for std::vector
#include "errcode.h"
#include "tprintf.h" // for tprintf
namespace tesseract {
-static bool IntFlagExists(const char *flag_name, int32_t *value) {
+static bool IntFlagExists(std::string_view flag_name, int32_t *value) {
std::string full_flag_name("FLAGS_");
full_flag_name += flag_name;
std::vector<IntParam *> empty;
@@ -31,7 +32,7 @@ static bool IntFlagExists(const char *flag_name, int32_t *value) {
return true;
}
-static bool DoubleFlagExists(const char *flag_name, double *value) {
+static bool DoubleFlagExists(std::string_view flag_name, double *value) {
std::string full_flag_name("FLAGS_");
full_flag_name += flag_name;
std::vector<DoubleParam *> empty;
@@ -44,7 +45,7 @@ static bool DoubleFlagExists(const char *flag_name, double *value) {
return true;
}
-static bool BoolFlagExists(const char *flag_name, bool *value) {
+static bool BoolFlagExists(std::string_view flag_name, bool *value) {
std::string full_flag_name("FLAGS_");
full_flag_name += flag_name;
std::vector<BoolParam *> empty;
@@ -57,7 +58,7 @@ static bool BoolFlagExists(const char *flag_name, bool *value) {
return true;
}
-static bool StringFlagExists(const char *flag_name, const char **value) {
+static bool StringFlagExists(std::string_view flag_name, const char **value) {
std::string full_flag_name("FLAGS_");
full_flag_name += flag_name;
std::vector<StringParam *> empty;
@@ -67,7 +68,7 @@ static bool StringFlagExists(const char *flag_name, const char **value) {
return p != nullptr;
}
-static void SetIntFlagValue(const char *flag_name, const int32_t new_val) {
+static void SetIntFlagValue(std::string_view flag_name, const int32_t new_val) {
std::string full_flag_name("FLAGS_");
full_flag_name += flag_name;
std::vector<IntParam *> empty;
@@ -77,7 +78,7 @@ static void SetIntFlagValue(const char *flag_name, const int32_t new_val) {
p->set_value(new_val);
}
-static void SetDoubleFlagValue(const char *flag_name, const double new_val) {
+static void SetDoubleFlagValue(std::string_view flag_name, const double new_val) {
std::string full_flag_name("FLAGS_");
full_flag_name += flag_name;
std::vector<DoubleParam *> empty;
@@ -87,7 +88,7 @@ static void SetDoubleFlagValue(const char *flag_name, const double new_val) {
p->set_value(new_val);
}
-static void SetBoolFlagValue(const char *flag_name, const bool new_val) {
+static void SetBoolFlagValue(std::string_view flag_name, const bool new_val) {
std::string full_flag_name("FLAGS_");
full_flag_name += flag_name;
std::vector<BoolParam *> empty;
@@ -97,7 +98,7 @@ static void SetBoolFlagValue(const char *flag_name, const bool new_val) {
p->set_value(new_val);
}
-static void SetStringFlagValue(const char *flag_name, const char *new_val) {
+static void SetStringFlagValue(std::string_view flag_name, const char *new_val) {
std::string full_flag_name("FLAGS_");
full_flag_name += flag_name;
std::vector<StringParam *> empty;
@@ -218,7 +219,7 @@ void ParseCommandLineFlags(const char *usage, int *argc, char ***argv, const boo
// Find the flag name in the list of global flags.
// int32_t flag
int32_t int_val;
- if (IntFlagExists(lhs.c_str(), &int_val)) {
+ if (IntFlagExists(lhs, &int_val)) {
if (rhs != nullptr) {
if (!strlen(rhs)) {
// Bad input of the format --int_flag=
@@ -242,13 +243,13 @@ void ParseCommandLineFlags(const char *usage, int *argc, char ***argv, const boo
}
}
}
- SetIntFlagValue(lhs.c_str(), int_val);
+ SetIntFlagValue(lhs, int_val);
continue;
}
// double flag
double double_val;
- if (DoubleFlagExists(lhs.c_str(), &double_val)) {
+ if (DoubleFlagExists(lhs, &double_val)) {
if (rhs != nullptr) {
if (!strlen(rhs)) {
// Bad input of the format --double_flag=
@@ -272,14 +273,14 @@ void ParseCommandLineFlags(const char *usage, int *argc, char ***argv, const boo
}
}
}
- SetDoubleFlagValue(lhs.c_str(), double_val);
+ SetDoubleFlagValue(lhs, double_val);
continue;
}
// Bool flag. Allow input forms --flag (equivalent to --flag=true),
// --flag=false, --flag=true, --flag=0 and --flag=1
bool bool_val;
- if (BoolFlagExists(lhs.c_str(), &bool_val)) {
+ if (BoolFlagExists(lhs, &bool_val)) {
if (rhs == nullptr) {
// --flag form
bool_val = true;
@@ -298,13 +299,13 @@ void ParseCommandLineFlags(const char *usage, int *argc, char ***argv, const boo
exit(1);
}
}
- SetBoolFlagValue(lhs.c_str(), bool_val);
+ SetBoolFlagValue(lhs, bool_val);
continue;
}
// string flag
const char *string_val;
- if (StringFlagExists(lhs.c_str(), &string_val)) {
+ if (StringFlagExists(lhs, &string_val)) {
if (rhs != nullptr) {
string_val = rhs;
} else {
@@ -316,7 +317,7 @@ void ParseCommandLineFlags(const char *usage, int *argc, char ***argv, const boo
string_val = (*argv)[++i];
}
}
- SetStringFlagValue(lhs.c_str(), string_val);
+ SetStringFlagValue(lhs, string_val);
continue;
}
diff --git a/src/training/pango/pango_font_info.cpp b/src/training/pango/pango_font_info.cpp
index 610f42cf..e3a681ce 100644
--- a/src/training/pango/pango_font_info.cpp
+++ b/src/training/pango/pango_font_info.cpp
@@ -42,6 +42,7 @@
#include <cstdio>
#include <cstdlib>
#include <cstring>
+#include <string_view>
#ifndef _MSC_VER
# include <sys/param.h>
@@ -535,11 +536,11 @@ bool FontUtils::IsAvailableFont(const char *input_query_desc, std::string *best_
return equal;
}
-static bool ShouldIgnoreFontFamilyName(const char *query) {
+static bool ShouldIgnoreFontFamilyName(std::string_view query) {
static const char *kIgnoredFamilyNames[] = {"Sans", "Serif", "Monospace", nullptr};
const char **list = kIgnoredFamilyNames;
for (; *list != nullptr; ++list) {
- if (!strcmp(*list, query)) {
+ if (query == *list) {
return true;
}
}