Commit 962f2264 for tesseract
commit 962f2264fb1b0f58768f84bb4be21e49fbec94e6
Author: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Date: Sun Feb 8 18:22:09 2026 +0000
Fix malformed PAGE XML: add closing Page tag for each page
Co-authored-by: Stefan Weil <sw@weilnetz.de>
diff --git a/.gitignore b/.gitignore
index 81b9995a..bba79ab4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -115,3 +115,7 @@ times.txt
/*.snap
/*_source.tar.bz2
+
+# CodeQL and build artifacts
+_codeql_detected_source_root
+install-sh
diff --git a/src/api/pagerenderer.cpp b/src/api/pagerenderer.cpp
index 2ae64d32..0206a068 100644
--- a/src/api/pagerenderer.cpp
+++ b/src/api/pagerenderer.cpp
@@ -683,7 +683,7 @@ bool TessPAGERenderer::AddImageHandler(TessBaseAPI *api) {
/// Append the PAGE XML for the end of the document
///
bool TessPAGERenderer::EndDocumentHandler() {
- AppendString("\t\t</Page>\n</PcGts>\n");
+ AppendString("</PcGts>\n");
return true;
}
@@ -1123,6 +1123,7 @@ char *TessBaseAPI::GetPAGEText(ETEXT_DESC *monitor, int page_number) {
reading_order_str << page_str.str();
page_str.str("");
+ reading_order_str << "\t</Page>\n";
const std::string &text = reading_order_str.str();
reading_order_str.str("");