Commit 78a8ac88 for tesseract

commit 78a8ac88a39ad975b057209ec42a209d1234c2dc
Author: Copilot <198982749+Copilot@users.noreply.github.com>
Date:   Tue Apr 21 13:29:14 2026 +0200

    Use asciidoctor instead of asciidoc-py for manpage generation (#4534)

    - configure.ac: replace asciidoc+xsltproc checks with asciidoctor check,
      remove HAVE_XML_CATALOG_FILES conditional (not needed with asciidoctor)
    - Makefile.am: use asciidoctor -b manpage for man pages, asciidoctor -b
      html5 for HTML, asciidoctor-pdf for PDF; remove man_xslt variable and
      docbook intermediate steps
    - doc/generate_manpages.sh: replace asciidoc+xsltproc with asciidoctor;
      use separate variable for asciidoctor-pdf; make PDF optional if
      asciidoctor-pdf is not installed
    - ci: build and upload docs as artifact in autotools workflow

    Co-authored-by: amitdo <13571208+amitdo@users.noreply.github.com>
    Agent-Logs-Url: https://github.com/tesseract-ocr/tesseract/sessions/d1207ba5-e79d-4bc8-ae88-88d96e862581
    Agent-Logs-Url: https://github.com/tesseract-ocr/tesseract/sessions/435d91c8-c6e9-44b4-bc5a-08697f361829
    Agent-Logs-Url: https://github.com/tesseract-ocr/tesseract/sessions/6c63f971-fddc-4768-a958-136fa929ebd0
    Agent-Logs-Url: https://github.com/tesseract-ocr/tesseract/sessions/d4cb7ec7-8c7b-4778-a80b-a7a24e242067
    Agent-Logs-Url: https://github.com/tesseract-ocr/tesseract/sessions/ab3bd2c2-0ec8-465e-a2e1-354dc97879f5
    Agent-Logs-Url: https://github.com/tesseract-ocr/tesseract/sessions/a50bea45-117b-4da7-81af-935c731b02dd
    Agent-Logs-Url: https://github.com/tesseract-ocr/tesseract/sessions/0132497b-7b81-4ee6-849e-69d2451d9521

    Signed-off-by: Stefan Weil <sw@weilnetz.de>

diff --git a/.github/workflows/autotools.yml b/.github/workflows/autotools.yml
index 5250e553..6d108e0f 100644
--- a/.github/workflows/autotools.yml
+++ b/.github/workflows/autotools.yml
@@ -16,7 +16,7 @@ jobs:
         config:
           - { name: ubuntu-22.04-clang-15-autotools, os: ubuntu-22.04, cxx: clang++-15 } #installed

-          - { name: ubuntu-24.04-gcc-14-autotools, os: ubuntu-24.04, cxx: g++-14 } #installed
+          - { name: ubuntu-24.04-gcc-14-autotools, os: ubuntu-24.04, cxx: g++-14, docs: true } #installed
           - { name: ubuntu-22.04-gcc-12-autotools, os: ubuntu-22.04, cxx: g++-12 } #installed
           - { name: ubuntu-22.04-gcc-11-autotools, os: ubuntu-22.04, cxx: g++-11 } #installed

@@ -43,19 +43,40 @@ jobs:
            sudo apt-get install cabextract libarchive-dev -y
            sudo apt-get install libcurl4-openssl-dev libcurl4 curl -y

+    - name: Install documentation tools
+      if: matrix.config.docs
+      run: |
+           sudo apt-get install -y asciidoctor ruby-asciidoctor-pdf
+
     - name: Setup Tesseract
       run: |
            ./autogen.sh

     - name: Configure Tesseract
       run: |
-           ./configure '--disable-shared' '--disable-openmp' '--disable-doc' 'CXX=${{ matrix.config.cxx }}' 'CXXFLAGS=-g -O2'
+           ./configure '--disable-shared' '--disable-openmp' ${{ matrix.config.docs && '--enable-doc' || '--disable-doc' }} 'CXX=${{ matrix.config.cxx }}' 'CXXFLAGS=-g -O2'

     - name: Make and Install Tesseract
       run: |
            make -j 8
            sudo make install install

+    - name: Build documentation
+      if: matrix.config.docs
+      run: |
+           make html pdf
+
+    - name: Upload documentation artifacts
+      if: matrix.config.docs
+      uses: actions/upload-artifact@v4
+      with:
+        name: tesseract-docs
+        path: |
+          doc/*.html
+          doc/*.pdf
+          doc/*.1
+          doc/*.5
+
     - name: Make and Install Training Tools
       run: |
            make training -j 8
diff --git a/Makefile.am b/Makefile.am
index 9f2a367d..f47afec6 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1027,7 +1027,7 @@ fuzzer-api-512x256: unittest/fuzzers/fuzzer-api.cpp

 CLEANFILES += fuzzer-api fuzzer-api-512x256

-if ASCIIDOC
+if ASCIIDOCTOR

 man_MANS = doc/combine_lang_model.1
 man_MANS += doc/combine_tessdata.1
@@ -1051,30 +1051,22 @@ man_MANS += doc/shapeclustering.1
 man_MANS += doc/unicharambigs.5
 endif

-man_xslt = http://docbook.sourceforge.net/release/xsl/current/manpages/docbook.xsl
-
 EXTRA_DIST += $(man_MANS) doc/Doxyfile

 html: ${man_MANS:%=%.html}
+
 pdf: ${man_MANS:%=%.pdf}

 SUFFIXES = .asc .html .pdf

 .asc:
-if HAVE_XML_CATALOG_FILES
-	asciidoc -b docbook -d manpage -o - $< | \
-	XML_CATALOG_FILES=$(XML_CATALOG_FILES) xsltproc --nonet -o $@ $(man_xslt) -
-else
-	asciidoc -b docbook -d manpage -o - $< | \
-	xsltproc --nonet -o $@ $(man_xslt) -
-endif
+	asciidoctor -b manpage -o $@ $<

 .asc.html:
-	asciidoc -b html5 -o $@ $<
+	asciidoctor -b html5 -o $@ $<

 .asc.pdf:
-	asciidoc -b docbook -d manpage -o $*.dbk $<
-	docbook2pdf -o doc $*.dbk
+	asciidoctor-pdf -o $@ $<

 MAINTAINERCLEANFILES = $(man_MANS) Doxyfile

diff --git a/configure.ac b/configure.ac
index 8d026a4a..bfdd810b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -437,36 +437,24 @@ fi
 # Check for programs needed to build documentation.
 # ----------------------------------------

-AM_CONDITIONAL([ASCIIDOC], false)
-AM_CONDITIONAL([HAVE_XML_CATALOG_FILES], false)
+AM_CONDITIONAL([ASCIIDOCTOR], false)
+AM_CONDITIONAL([ASCIIDOCTOR_PDF], false)
 AC_ARG_ENABLE([doc],
               AS_HELP_STRING([--disable-doc], [disable build of documentation])
               [],
               [: m4_divert_text([DEFAULTS], [enable_doc=check])])
 AS_IF([test "$enable_doc" != "no"], [
-  AC_CHECK_PROG([have_asciidoc], asciidoc, true, false)
-  AC_CHECK_PROG([have_xsltproc], xsltproc, true, false)
-  # macOS with Homebrew requires the environment variable
-  # XML_CATALOG_FILES for xsltproc.
-  if $have_asciidoc && $have_xsltproc; then
-    AM_CONDITIONAL([ASCIIDOC], true)
-    XML_CATALOG_FILES=
-    if $have_brew; then
-      catalog_file=$brew_prefix/etc/xml/catalog
-      if test -f $catalog_file; then
-        AM_CONDITIONAL([HAVE_XML_CATALOG_FILES], true)
-        XML_CATALOG_FILES=file:$catalog_file
-      else
-        AC_MSG_WARN([Missing file $catalog_file.])
-      fi
-    fi
-    AC_SUBST([XML_CATALOG_FILES])
+  AC_CHECK_PROG([have_asciidoctor], asciidoctor, true, false)
+  if $have_asciidoctor; then
+    AM_CONDITIONAL([ASCIIDOCTOR], true)
   else
     AS_IF([test "x$enable_doc" != xcheck], [
       AC_MSG_FAILURE(
-        [--enable-doc was given, but test for asciidoc and xsltproc failed])
+        [--enable-doc was given, but test for asciidoctor failed])
     ])
   fi
+  AC_CHECK_PROG([have_asciidoctor_pdf], asciidoctor-pdf, true, false)
+  AM_CONDITIONAL([ASCIIDOCTOR_PDF], $have_asciidoctor_pdf)
 ])

 # ----------------------------------------
@@ -577,13 +565,17 @@ echo "$ sudo make install"
 echo "$ sudo ldconfig"
 echo ""

-AM_COND_IF([ASCIIDOC], [
-  echo "This will also build the documentation."
+AM_COND_IF([ASCIIDOCTOR], [
+  AM_COND_IF([ASCIIDOCTOR_PDF], [
+    echo "This will also build the documentation (including PDF)."
+  ], [
+    echo "This will also build the documentation (PDF skipped: asciidoctor-pdf not found)."
+  ])
 ], [
   AS_IF([test "$enable_doc" = "no"], [
     echo "Documentation will not be built because it was disabled."
   ], [
-    echo "Documentation will not be built because asciidoc or xsltproc is missing."
+    echo "Documentation will not be built because asciidoctor is missing."
   ])
 ])

diff --git a/doc/generate_manpages.sh b/doc/generate_manpages.sh
index 9f808d17..34dfe61d 100755
--- a/doc/generate_manpages.sh
+++ b/doc/generate_manpages.sh
@@ -16,18 +16,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-man_xslt=http://docbook.sourceforge.net/release/xsl/current/manpages/docbook.xsl
-asciidoc=$(which asciidoc)
-xsltproc=$(which xsltproc)
-if [[ -z "${asciidoc}" ]] || [[ -z "${xsltproc}" ]]; then
-  echo "Please make sure asciidoc and xsltproc are installed."
+asciidoctor=$(which asciidoctor)
+asciidoctor_pdf=$(which asciidoctor-pdf)
+if [[ -z "${asciidoctor}" ]]; then
+  echo "Please make sure asciidoctor is installed."
   exit 1
 else
   for src in *.asc; do
     pagename=${src/.asc/}
-    (${asciidoc} -d manpage "${src}" &&
-     ${asciidoc} -d manpage -b docbook "${src}" &&
-       ${xsltproc} --nonet ${man_xslt} "${pagename}".xml) ||
+    (${asciidoctor} -b manpage "${src}" -o "${pagename}" &&
+     ${asciidoctor} -b html5 "${src}" -o "${pagename}".html &&
+     { [[ -z "${asciidoctor_pdf}" ]] || ${asciidoctor_pdf} "${src}" -o "${pagename}".pdf; }) ||
        echo "Error generating ${pagename}"
   done
 fi
diff --git a/nsis/build.sh b/nsis/build.sh
index 49245a1b..22d0e021 100755
--- a/nsis/build.sh
+++ b/nsis/build.sh
@@ -28,7 +28,7 @@ PKG_ARCH=mingw-w64-${ARCH/_/-}
 # Install packages.
 sudo apt-get update --quiet
 sudo apt-get install --assume-yes --no-install-recommends --quiet \
-  asciidoc curl xsltproc docbook-xml docbook-xsl \
+  asciidoctor ruby-asciidoctor-pdf curl \
   automake dpkg-dev libtool pkg-config default-jdk-headless \
   mingw-w64-tools nsis g++-"$PKG_ARCH" \
   makepkg pacman-package-manager python3-venv unzip