Commit 3796e8ad for tesseract

commit 3796e8adc0d1541c9083d64555819d4f898e9327
Author: Stefan Weil <sw@weilnetz.de>
Date:   Thu Jun 4 15:18:11 2026 +0200

    Replace `const char *` by `std::string_view` in arguments

    Assisted-by: OpenCode / BigPickle
    Signed-off-by: Stefan Weil <sw@weilnetz.de>

diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp
index 89e2f669..8dd4ff6f 100644
--- a/src/api/baseapi.cpp
+++ b/src/api/baseapi.cpp
@@ -71,6 +71,7 @@
 #include <memory>   // for std::unique_ptr
 #include <set>      // for std::pair
 #include <sstream>  // for std::stringstream
+#include <string_view>
 #include <vector>   // for std::vector

 #include <allheaders.h> // for pixDestroy, boxCreate, boxaAddBox, box...
@@ -125,18 +126,21 @@ static STRING_VAR(classify_font_name, kUnknownFontName,
 // /path/to/dir/[lang].[fontname].exp[num]
 // The [lang], [fontname] and [num] fields should not have '.' characters.
 // If the global parameter classify_font_name is set, its value is used instead.
-static void ExtractFontName(const char* filename, std::string* fontname) {
+static void ExtractFontName(std::string_view filename, std::string* fontname) {
   *fontname = classify_font_name;
   if (*fontname == kUnknownFontName) {
     // filename is expected to be of the form [lang].[fontname].exp[num]
     // The [lang], [fontname] and [num] fields should not have '.' characters.
-    const char *basename = strrchr(filename, '/');
-    const char *firstdot = strchr(basename ? basename : filename, '.');
-    const char *lastdot  = strrchr(filename, '.');
-    if (firstdot != lastdot && firstdot != nullptr && lastdot != nullptr) {
+    auto basename_pos = filename.find_last_of('/');
+    auto view = (basename_pos != std::string_view::npos)
+                    ? filename.substr(basename_pos + 1)
+                    : filename;
+    auto firstdot = view.find_first_of('.');
+    auto lastdot = view.find_last_of('.');
+    if (firstdot != lastdot && firstdot != std::string_view::npos &&
+        lastdot != std::string_view::npos) {
       ++firstdot;
-      *fontname = firstdot;
-      fontname->resize(lastdot - firstdot);
+      *fontname = view.substr(firstdot, lastdot - firstdot);
     }
   }
 }
@@ -813,7 +817,7 @@ int TessBaseAPI::Recognize(ETEXT_DESC *monitor) {
 #ifndef DISABLED_LEGACY_ENGINE
   } else if (tesseract_->tessedit_train_from_boxes) {
     std::string fontname;
-    ExtractFontName(output_file_.c_str(), &fontname);
+    ExtractFontName(output_file_, &fontname);
     tesseract_->ApplyBoxTraining(fontname, page_res_);
   } else if (tesseract_->tessedit_ambigs_training) {
     FILE *training_output_file = tesseract_->init_recog_training(input_file_.c_str());
diff --git a/src/ccstruct/pageres.cpp b/src/ccstruct/pageres.cpp
index 1bc5e163..4b52e76a 100644
--- a/src/ccstruct/pageres.cpp
+++ b/src/ccstruct/pageres.cpp
@@ -40,6 +40,7 @@
 #include <cassert> // for assert
 #include <cstdint> // for INT32_MAX
 #include <cstring> // for strlen
+#include <string_view> // for std::string_view

 struct Pix;

@@ -1021,14 +1022,19 @@ void WERD_RES::MergeAdjacentBlobs(unsigned index) {
 // Utility function for fix_quotes
 // Return true if the next character in the string (given the UTF8 length in
 // bytes) is a quote character.
-static int is_simple_quote(const char *signed_str, int length) {
-  const auto *str = reinterpret_cast<const unsigned char *>(signed_str);
+static int is_simple_quote(std::string_view str) {
   // Standard 1 byte quotes.
-  return (length == 1 && (*str == '\'' || *str == '`')) ||
-         // UTF-8 3 bytes curved quotes.
-         (length == 3 &&
-          ((*str == 0xe2 && *(str + 1) == 0x80 && *(str + 2) == 0x98) ||
-           (*str == 0xe2 && *(str + 1) == 0x80 && *(str + 2) == 0x99)));
+  if (str.size() == 1 && (str[0] == '\'' || str[0] == '`')) {
+    return true;
+  }
+  // UTF-8 3 bytes curved quotes.
+  if (str.size() == 3 && static_cast<unsigned char>(str[0]) == 0xe2 &&
+      static_cast<unsigned char>(str[1]) == 0x80 &&
+      (static_cast<unsigned char>(str[2]) == 0x98 ||
+       static_cast<unsigned char>(str[2]) == 0x99)) {
+    return true;
+  }
+  return false;
 }

 // Callback helper for fix_quotes returns a double quote if both
@@ -1036,8 +1042,7 @@ static int is_simple_quote(const char *signed_str, int length) {
 UNICHAR_ID WERD_RES::BothQuotes(UNICHAR_ID id1, UNICHAR_ID id2) {
   const char *ch = uch_set->id_to_unichar(id1);
   const char *next_ch = uch_set->id_to_unichar(id2);
-  if (is_simple_quote(ch, strlen(ch)) &&
-      is_simple_quote(next_ch, strlen(next_ch))) {
+  if (is_simple_quote(ch) && is_simple_quote(next_ch)) {
     return uch_set->unichar_to_id("\"");
   }
   return INVALID_UNICHAR_ID;
diff --git a/src/training/common/commandlineflags.cpp b/src/training/common/commandlineflags.cpp
index 8ecd1cb3..de389293 100644
--- a/src/training/common/commandlineflags.cpp
+++ b/src/training/common/commandlineflags.cpp
@@ -13,12 +13,13 @@
 #include <cmath>               // for std::isnan, NAN
 #include <locale>              // for std::locale::classic
 #include <sstream>             // for std::stringstream
+#include <string_view>         // for std::string_view
 #include <vector>              // for std::vector
 #include "errcode.h"
 #include "tprintf.h" // for tprintf

 namespace tesseract {
-static bool IntFlagExists(const char *flag_name, int32_t *value) {
+static bool IntFlagExists(std::string_view flag_name, int32_t *value) {
   std::string full_flag_name("FLAGS_");
   full_flag_name += flag_name;
   std::vector<IntParam *> empty;
@@ -31,7 +32,7 @@ static bool IntFlagExists(const char *flag_name, int32_t *value) {
   return true;
 }

-static bool DoubleFlagExists(const char *flag_name, double *value) {
+static bool DoubleFlagExists(std::string_view flag_name, double *value) {
   std::string full_flag_name("FLAGS_");
   full_flag_name += flag_name;
   std::vector<DoubleParam *> empty;
@@ -44,7 +45,7 @@ static bool DoubleFlagExists(const char *flag_name, double *value) {
   return true;
 }

-static bool BoolFlagExists(const char *flag_name, bool *value) {
+static bool BoolFlagExists(std::string_view flag_name, bool *value) {
   std::string full_flag_name("FLAGS_");
   full_flag_name += flag_name;
   std::vector<BoolParam *> empty;
@@ -57,7 +58,7 @@ static bool BoolFlagExists(const char *flag_name, bool *value) {
   return true;
 }

-static bool StringFlagExists(const char *flag_name, const char **value) {
+static bool StringFlagExists(std::string_view flag_name, const char **value) {
   std::string full_flag_name("FLAGS_");
   full_flag_name += flag_name;
   std::vector<StringParam *> empty;
@@ -67,7 +68,7 @@ static bool StringFlagExists(const char *flag_name, const char **value) {
   return p != nullptr;
 }

-static void SetIntFlagValue(const char *flag_name, const int32_t new_val) {
+static void SetIntFlagValue(std::string_view flag_name, const int32_t new_val) {
   std::string full_flag_name("FLAGS_");
   full_flag_name += flag_name;
   std::vector<IntParam *> empty;
@@ -77,7 +78,7 @@ static void SetIntFlagValue(const char *flag_name, const int32_t new_val) {
   p->set_value(new_val);
 }

-static void SetDoubleFlagValue(const char *flag_name, const double new_val) {
+static void SetDoubleFlagValue(std::string_view flag_name, const double new_val) {
   std::string full_flag_name("FLAGS_");
   full_flag_name += flag_name;
   std::vector<DoubleParam *> empty;
@@ -87,7 +88,7 @@ static void SetDoubleFlagValue(const char *flag_name, const double new_val) {
   p->set_value(new_val);
 }

-static void SetBoolFlagValue(const char *flag_name, const bool new_val) {
+static void SetBoolFlagValue(std::string_view flag_name, const bool new_val) {
   std::string full_flag_name("FLAGS_");
   full_flag_name += flag_name;
   std::vector<BoolParam *> empty;
@@ -97,7 +98,7 @@ static void SetBoolFlagValue(const char *flag_name, const bool new_val) {
   p->set_value(new_val);
 }

-static void SetStringFlagValue(const char *flag_name, const char *new_val) {
+static void SetStringFlagValue(std::string_view flag_name, const char *new_val) {
   std::string full_flag_name("FLAGS_");
   full_flag_name += flag_name;
   std::vector<StringParam *> empty;
@@ -218,7 +219,7 @@ void ParseCommandLineFlags(const char *usage, int *argc, char ***argv, const boo
     // Find the flag name in the list of global flags.
     // int32_t flag
     int32_t int_val;
-    if (IntFlagExists(lhs.c_str(), &int_val)) {
+    if (IntFlagExists(lhs, &int_val)) {
       if (rhs != nullptr) {
         if (!strlen(rhs)) {
           // Bad input of the format --int_flag=
@@ -242,13 +243,13 @@ void ParseCommandLineFlags(const char *usage, int *argc, char ***argv, const boo
           }
         }
       }
-      SetIntFlagValue(lhs.c_str(), int_val);
+      SetIntFlagValue(lhs, int_val);
       continue;
     }

     // double flag
     double double_val;
-    if (DoubleFlagExists(lhs.c_str(), &double_val)) {
+    if (DoubleFlagExists(lhs, &double_val)) {
       if (rhs != nullptr) {
         if (!strlen(rhs)) {
           // Bad input of the format --double_flag=
@@ -272,14 +273,14 @@ void ParseCommandLineFlags(const char *usage, int *argc, char ***argv, const boo
           }
         }
       }
-      SetDoubleFlagValue(lhs.c_str(), double_val);
+      SetDoubleFlagValue(lhs, double_val);
       continue;
     }

     // Bool flag. Allow input forms --flag (equivalent to --flag=true),
     // --flag=false, --flag=true, --flag=0 and --flag=1
     bool bool_val;
-    if (BoolFlagExists(lhs.c_str(), &bool_val)) {
+    if (BoolFlagExists(lhs, &bool_val)) {
       if (rhs == nullptr) {
         // --flag form
         bool_val = true;
@@ -298,13 +299,13 @@ void ParseCommandLineFlags(const char *usage, int *argc, char ***argv, const boo
           exit(1);
         }
       }
-      SetBoolFlagValue(lhs.c_str(), bool_val);
+      SetBoolFlagValue(lhs, bool_val);
       continue;
     }

     // string flag
     const char *string_val;
-    if (StringFlagExists(lhs.c_str(), &string_val)) {
+    if (StringFlagExists(lhs, &string_val)) {
       if (rhs != nullptr) {
         string_val = rhs;
       } else {
@@ -316,7 +317,7 @@ void ParseCommandLineFlags(const char *usage, int *argc, char ***argv, const boo
           string_val = (*argv)[++i];
         }
       }
-      SetStringFlagValue(lhs.c_str(), string_val);
+      SetStringFlagValue(lhs, string_val);
       continue;
     }

diff --git a/src/training/pango/pango_font_info.cpp b/src/training/pango/pango_font_info.cpp
index 610f42cf..e3a681ce 100644
--- a/src/training/pango/pango_font_info.cpp
+++ b/src/training/pango/pango_font_info.cpp
@@ -42,6 +42,7 @@
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
+#include <string_view>

 #ifndef _MSC_VER
 #  include <sys/param.h>
@@ -535,11 +536,11 @@ bool FontUtils::IsAvailableFont(const char *input_query_desc, std::string *best_
   return equal;
 }

-static bool ShouldIgnoreFontFamilyName(const char *query) {
+static bool ShouldIgnoreFontFamilyName(std::string_view query) {
   static const char *kIgnoredFamilyNames[] = {"Sans", "Serif", "Monospace", nullptr};
   const char **list = kIgnoredFamilyNames;
   for (; *list != nullptr; ++list) {
-    if (!strcmp(*list, query)) {
+    if (query == *list) {
       return true;
     }
   }