Commit 28a36b3b for tesseract
commit 28a36b3b172e7d0e82a04f27c5c57623538b9207
Author: Stefan Weil <sw@weilnetz.de>
Date: Fri Jun 19 14:47:51 2026 +0200
Remove src/ccstruct/polyaprx.*
Signed-off-by: Stefan Weil <sw@weilnetz.de>
diff --git a/Makefile.am b/Makefile.am
index 56f692c0..8ef761ff 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -294,7 +294,6 @@ noinst_HEADERS += src/ccstruct/otsuthr.h
noinst_HEADERS += src/ccstruct/pageres.h
noinst_HEADERS += src/ccstruct/pdblock.h
noinst_HEADERS += src/ccstruct/points.h
-noinst_HEADERS += src/ccstruct/polyaprx.h
noinst_HEADERS += src/ccstruct/polyblk.h
noinst_HEADERS += src/ccstruct/quadlsq.h
noinst_HEADERS += src/ccstruct/quadratc.h
@@ -334,7 +333,6 @@ libtesseract_la_SOURCES += src/ccstruct/otsuthr.cpp
libtesseract_la_SOURCES += src/ccstruct/pageres.cpp
libtesseract_la_SOURCES += src/ccstruct/pdblock.cpp
libtesseract_la_SOURCES += src/ccstruct/points.cpp
-libtesseract_la_SOURCES += src/ccstruct/polyaprx.cpp
libtesseract_la_SOURCES += src/ccstruct/polyblk.cpp
libtesseract_la_SOURCES += src/ccstruct/quadlsq.cpp
libtesseract_la_SOURCES += src/ccstruct/quspline.cpp
diff --git a/cmake/SourceLists.cmake b/cmake/SourceLists.cmake
index da5982ea..78fc1785 100644
--- a/cmake/SourceLists.cmake
+++ b/cmake/SourceLists.cmake
@@ -109,7 +109,6 @@ set(TESSERACT_SRC_CCSTRUCT
src/ccstruct/params_training_featdef.cpp
src/ccstruct/pdblock.cpp
src/ccstruct/points.cpp
- src/ccstruct/polyaprx.cpp
src/ccstruct/polyblk.cpp
src/ccstruct/quadlsq.cpp
src/ccstruct/quspline.cpp
@@ -412,7 +411,6 @@ set(TESSERACT_HDR_INTERNAL
src/ccstruct/params_training_featdef.h
src/ccstruct/pdblock.h
src/ccstruct/points.h
- src/ccstruct/polyaprx.h
src/ccstruct/polyblk.h
src/ccstruct/quadlsq.h
src/ccstruct/quadratc.h
diff --git a/src/ccstruct/blobs.cpp b/src/ccstruct/blobs.cpp
index 6884f6ee..1d2ec145 100644
--- a/src/ccstruct/blobs.cpp
+++ b/src/ccstruct/blobs.cpp
@@ -29,17 +29,23 @@
#include "ccstruct.h"
#include "clst.h"
+#include "coutln.h" // for C_OUTLINE
+#include "errcode.h" // for ASSERT_HOST
#include "linlsq.h"
+#include "mod128.h" // for DIR128
#include "normalis.h"
#include "ocrblock.h"
#include "ocrrow.h"
-#include "points.h"
-#include "polyaprx.h"
+#include "params.h" // for BoolParam, BOOL_VAR
+#include "points.h" // for ICOORD
+#include "rect.h" // for TBOX
+#include "tprintf.h" // for tprintf
#include "werd.h"
#include "helpers.h"
#include <algorithm>
+#include <cstdint> // for INT16_MAX, int8_t
namespace tesseract {
@@ -52,6 +58,551 @@ const TPOINT kDivisibleVerticalUpright(0, 1);
// so it is only used for ApplyBoxes chopping to get a better segmentation.
const TPOINT kDivisibleVerticalItalic(1, 5);
+// Description: Code for polygonal approximation from old edgeprog.
+
+#define FASTEDGELENGTH 256
+
+static BOOL_VAR(poly_debug, false, "Debug old poly");
+static BOOL_VAR(poly_wide_objects_better, true,
+ "More accurate approx on wide things");
+
+#define fixed_dist 20 // really an int_variable
+#define approx_dist 15 // really an int_variable
+
+const int par1 = 4500 / (approx_dist * approx_dist);
+const int par2 = 6750 / (approx_dist * approx_dist);
+
+/**********************************************************************
+ *cutline(first,last,area) straightens out a line by partitioning
+ *and joining the ends by a straight line*
+ **********************************************************************/
+
+static void cutline( // recursive refine
+ EDGEPT *first, // ends of line
+ EDGEPT *last, int area // area of object
+) {
+ EDGEPT *edge; // current edge
+ TPOINT vecsum; // vector sum
+ int vlen; // approx length of vecsum
+ TPOINT vec; // accumulated vector
+ EDGEPT *maxpoint; // worst point
+ int maxperp; // max deviation
+ int perp; // perp distance
+ int ptcount; // no of points
+ int squaresum; // sum of perps
+
+ edge = first; // start of line
+ if (edge->next == last) {
+ return; // simple line
+ }
+
+ // vector sum
+ vecsum.x = last->pos.x - edge->pos.x;
+ vecsum.y = last->pos.y - edge->pos.y;
+ if (vecsum.x == 0 && vecsum.y == 0) {
+ // special case
+ vecsum.x = -edge->prev->vec.x;
+ vecsum.y = -edge->prev->vec.y;
+ }
+ // absolute value
+ vlen = vecsum.x > 0 ? vecsum.x : -vecsum.x;
+ if (vecsum.y > vlen) {
+ vlen = vecsum.y; // maximum
+ } else if (-vecsum.y > vlen) {
+ vlen = -vecsum.y; // absolute value
+ }
+
+ vec.x = edge->vec.x; // accumulated vector
+ vec.y = edge->vec.y;
+ maxperp = 0; // none yet
+ squaresum = ptcount = 0;
+ edge = edge->next; // move to actual point
+ maxpoint = edge; // in case there isn't one
+ do {
+ perp = vec.cross(vecsum); // get perp distance
+ if (perp != 0) {
+ perp *= perp; // squared deviation
+ }
+ squaresum += perp; // sum squares
+ ptcount++; // count points
+ if (poly_debug) {
+ tprintf("Cutline:Final perp=%d\n", perp);
+ }
+ if (perp > maxperp) {
+ maxperp = perp;
+ maxpoint = edge; // find greatest deviation
+ }
+ vec.x += edge->vec.x; // accumulate vectors
+ vec.y += edge->vec.y;
+ edge = edge->next;
+ } while (edge != last); // test all line
+
+ perp = vecsum.length2();
+ ASSERT_HOST(perp != 0);
+
+ if (maxperp < 256 * INT16_MAX) {
+ maxperp <<= 8;
+ maxperp /= perp; // true max perp
+ } else {
+ maxperp /= perp;
+ maxperp <<= 8; // avoid overflow
+ }
+ if (squaresum < 256 * INT16_MAX) {
+ // mean squared perp
+ perp = (squaresum << 8) / (perp * ptcount);
+ } else {
+ // avoid overflow
+ perp = (squaresum / perp << 8) / ptcount;
+ }
+
+ if (poly_debug) {
+ tprintf("Cutline:A=%d, max=%.2f(%.2f%%), msd=%.2f(%.2f%%)\n", area,
+ maxperp / 256.0, maxperp * 200.0 / area, perp / 256.0,
+ perp * 300.0 / area);
+ }
+ if (maxperp * par1 >= 10 * area || perp * par2 >= 10 * area || vlen >= 126) {
+ maxpoint->fixed = true;
+ // partitions
+ cutline(first, maxpoint, area);
+ cutline(maxpoint, last, area);
+ }
+}
+
+/**********************************************************************
+ * edgesteps_to_edgepts
+ *
+ * Convert a C_OUTLINE to EDGEPTs.
+ **********************************************************************/
+
+static EDGEPT *edgesteps_to_edgepts( // convert outline
+ C_OUTLINE *c_outline, // input
+ EDGEPT edgepts[] // output is array
+) {
+ int32_t length; // steps in path
+ ICOORD pos; // current coords
+ int32_t stepindex; // current step
+ int32_t stepinc; // increment
+ int32_t epindex; // current EDGEPT
+ ICOORD vec; // for this 8 step
+ ICOORD prev_vec;
+ int8_t epdir; // of this step
+ DIR128 prevdir; // previous dir
+ DIR128 dir; // of this step
+
+ pos = c_outline->start_pos(); // start of loop
+ length = c_outline->pathlength();
+ stepindex = 0;
+ epindex = 0;
+ prevdir = -1;
+ // repeated steps
+ uint32_t count = 0;
+ int prev_stepindex = 0;
+ do {
+ dir = c_outline->step_dir(stepindex);
+ vec = c_outline->step(stepindex);
+ if (stepindex < length - 1 &&
+ c_outline->step_dir(stepindex + 1) - dir == -32) {
+ dir += 128 - 16;
+ vec += c_outline->step(stepindex + 1);
+ stepinc = 2;
+ } else {
+ stepinc = 1;
+ }
+ if (count == 0) {
+ prevdir = dir;
+ prev_vec = vec;
+ }
+ if (prevdir.get_dir() != dir.get_dir()) {
+ edgepts[epindex].pos.x = pos.x();
+ edgepts[epindex].pos.y = pos.y();
+ prev_vec *= count;
+ edgepts[epindex].vec.x = prev_vec.x();
+ edgepts[epindex].vec.y = prev_vec.y();
+ pos += prev_vec;
+ edgepts[epindex].runlength = count;
+ edgepts[epindex].prev = &edgepts[epindex - 1];
+ // TODO: reset is_hidden, too?
+ edgepts[epindex].fixed = false;
+ edgepts[epindex].next = &edgepts[epindex + 1];
+ prevdir += 64;
+ epdir = DIR128(0) - prevdir;
+ epdir >>= 4;
+ epdir &= 7;
+ edgepts[epindex].dir = epdir;
+ edgepts[epindex].src_outline = c_outline;
+ edgepts[epindex].start_step = prev_stepindex;
+ edgepts[epindex].step_count = stepindex - prev_stepindex;
+ epindex++;
+ prevdir = dir;
+ prev_vec = vec;
+ count = 1;
+ prev_stepindex = stepindex;
+ } else {
+ count++;
+ }
+ stepindex += stepinc;
+ } while (stepindex < length);
+ edgepts[epindex].pos.x = pos.x();
+ edgepts[epindex].pos.y = pos.y();
+ prev_vec *= count;
+ edgepts[epindex].vec.x = prev_vec.x();
+ edgepts[epindex].vec.y = prev_vec.y();
+ pos += prev_vec;
+ edgepts[epindex].runlength = count;
+ // TODO: reset is_hidden, too?
+ edgepts[epindex].fixed = false;
+ edgepts[epindex].src_outline = c_outline;
+ edgepts[epindex].start_step = prev_stepindex;
+ edgepts[epindex].step_count = stepindex - prev_stepindex;
+ edgepts[epindex].prev = &edgepts[epindex - 1];
+ edgepts[epindex].next = &edgepts[0];
+ prevdir += 64;
+ epdir = DIR128(0) - prevdir;
+ epdir >>= 4;
+ epdir &= 7;
+ edgepts[epindex].dir = epdir;
+ edgepts[0].prev = &edgepts[epindex];
+ ASSERT_HOST(pos.x() == c_outline->start_pos().x() &&
+ pos.y() == c_outline->start_pos().y());
+ return &edgepts[0];
+}
+
+/**********************************************************************
+ *fix2(start,area) fixes points on the outline according to a trial method*
+ **********************************************************************/
+
+static void fix2( // polygonal approx
+ EDGEPT *start, // loop to approximate
+ int area) {
+ EDGEPT *edgept; // current point
+ EDGEPT *edgept1;
+ EDGEPT *loopstart; // modified start of loop
+ EDGEPT *linestart; // start of line segment
+ int fixed_count; // no of fixed points
+ int8_t dir;
+ int d01, d12, d23, gapmin;
+ TPOINT d01vec, d12vec, d23vec;
+ EDGEPT *edgefix, *startfix;
+ EDGEPT *edgefix0, *edgefix1, *edgefix2, *edgefix3;
+
+ edgept = start; // start of loop
+ while (((edgept->dir - edgept->prev->dir + 1) & 7) < 3 &&
+ (dir = (edgept->prev->dir - edgept->next->dir) & 7) != 2 && dir != 6) {
+ edgept = edgept->next; // find suitable start
+ }
+ loopstart = edgept; // remember start
+
+ // completed flag
+ bool stopped = false;
+ edgept->fixed = true; // fix it
+ do {
+ linestart = edgept; // possible start of line
+ auto dir1 = edgept->dir; // first direction
+ // length of dir1
+ auto sum1 = edgept->runlength;
+ edgept = edgept->next;
+ auto dir2 = edgept->dir; // 2nd direction
+ // length in dir2
+ auto sum2 = edgept->runlength;
+ if (((dir1 - dir2 + 1) & 7) < 3) {
+ while (edgept->prev->dir == edgept->next->dir) {
+ edgept = edgept->next; // look at next
+ if (edgept->dir == dir1) {
+ // sum lengths
+ sum1 += edgept->runlength;
+ } else {
+ sum2 += edgept->runlength;
+ }
+ }
+
+ if (edgept == loopstart) {
+ // finished
+ stopped = true;
+ }
+ if (sum2 + sum1 > 2 && linestart->prev->dir == dir2 &&
+ (linestart->prev->runlength > linestart->runlength || sum2 > sum1)) {
+ // start is back one
+ linestart = linestart->prev;
+ linestart->fixed = true;
+ }
+
+ if (((edgept->next->dir - edgept->dir + 1) & 7) >= 3 ||
+ (edgept->dir == dir1 && sum1 >= sum2) ||
+ ((edgept->prev->runlength < edgept->runlength ||
+ (edgept->dir == dir2 && sum2 >= sum1)) &&
+ linestart->next != edgept)) {
+ edgept = edgept->next;
+ }
+ }
+ // sharp bend
+ edgept->fixed = true;
+ }
+ // do whole loop
+ while (edgept != loopstart && !stopped);
+
+ edgept = start;
+ do {
+ if (((edgept->runlength >= 8) && (edgept->dir != 2) &&
+ (edgept->dir != 6)) ||
+ ((edgept->runlength >= 8) &&
+ ((edgept->dir == 2) || (edgept->dir == 6)))) {
+ edgept->fixed = true;
+ edgept1 = edgept->next;
+ edgept1->fixed = true;
+ }
+ edgept = edgept->next;
+ } while (edgept != start);
+
+ edgept = start;
+ do {
+ // single fixed step
+ if (edgept->fixed &&
+ edgept->runlength == 1
+ // and neighbours free
+ && edgept->next->fixed &&
+ !edgept->prev->fixed
+ // same pair of dirs
+ && !edgept->next->next->fixed &&
+ edgept->prev->dir == edgept->next->dir &&
+ edgept->prev->prev->dir == edgept->next->next->dir &&
+ ((edgept->prev->dir - edgept->dir + 1) & 7) < 3) {
+ // unfix it
+ edgept->fixed = false;
+ edgept->next->fixed = false;
+ }
+ edgept = edgept->next; // do all points
+ } while (edgept != start); // until finished
+
+ stopped = false;
+ if (area < 450) {
+ area = 450;
+ }
+
+ gapmin = area * fixed_dist * fixed_dist / 44000;
+
+ edgept = start;
+ fixed_count = 0;
+ do {
+ if (edgept->fixed) {
+ fixed_count++;
+ }
+ edgept = edgept->next;
+ } while (edgept != start);
+ while (!edgept->fixed) {
+ edgept = edgept->next;
+ }
+ edgefix0 = edgept;
+
+ edgept = edgept->next;
+ while (!edgept->fixed) {
+ edgept = edgept->next;
+ }
+ edgefix1 = edgept;
+
+ edgept = edgept->next;
+ while (!edgept->fixed) {
+ edgept = edgept->next;
+ }
+ edgefix2 = edgept;
+
+ edgept = edgept->next;
+ while (!edgept->fixed) {
+ edgept = edgept->next;
+ }
+ edgefix3 = edgept;
+
+ startfix = edgefix2;
+
+ do {
+ if (fixed_count <= 3) {
+ break; // already too few
+ }
+ d12vec.diff(edgefix1->pos, edgefix2->pos);
+ d12 = d12vec.length2();
+ // TODO(rays) investigate this change:
+ // Only unfix a point if it is part of a low-curvature section
+ // of outline and the total angle change of the outlines is
+ // less than 90 degrees, ie the scalar product is positive.
+ // if (d12 <= gapmin && edgefix0->vec.dot(edgefix2->vec) > 0) {
+ if (d12 <= gapmin) {
+ d01vec.diff(edgefix0->pos, edgefix1->pos);
+ d01 = d01vec.length2();
+ d23vec.diff(edgefix2->pos, edgefix3->pos);
+ d23 = d23vec.length2();
+ if (d01 > d23) {
+ edgefix2->fixed = false;
+ fixed_count--;
+ } else {
+ edgefix1->fixed = false;
+ fixed_count--;
+ edgefix1 = edgefix2;
+ }
+ } else {
+ edgefix0 = edgefix1;
+ edgefix1 = edgefix2;
+ }
+ edgefix2 = edgefix3;
+ edgept = edgept->next;
+ while (!edgept->fixed) {
+ if (edgept == startfix) {
+ stopped = true;
+ }
+ edgept = edgept->next;
+ }
+ edgefix3 = edgept;
+ edgefix = edgefix2;
+ } while ((edgefix != startfix) && (!stopped));
+}
+
+/**********************************************************************
+ *poly2(startpt,area,path) applies a second approximation to the outline
+ *using the points which have been fixed by the first approximation*
+ **********************************************************************/
+
+static EDGEPT *poly2( // second poly
+ EDGEPT *startpt, // start of loop
+ int area // area of blob box
+) {
+ EDGEPT *edgept; // current outline point
+ EDGEPT *loopstart; // starting point
+ EDGEPT *linestart; // start of line
+ int edgesum; // correction count
+
+ if (area < 1200) {
+ area = 1200; // minimum value
+ }
+
+ loopstart = nullptr; // not found it yet
+ edgept = startpt; // start of loop
+
+ do {
+ // current point fixed and next not
+ if (edgept->fixed && !edgept->next->fixed) {
+ loopstart = edgept; // start of repoly
+ break;
+ }
+ edgept = edgept->next; // next point
+ } while (edgept != startpt); // until found or finished
+
+ if (loopstart == nullptr && !startpt->fixed) {
+ // fixed start of loop
+ startpt->fixed = true;
+ loopstart = startpt; // or start of loop
+ }
+ if (loopstart) {
+ do {
+ edgept = loopstart; // first to do
+ do {
+ linestart = edgept;
+ edgesum = 0; // sum of lengths
+ do {
+ // sum lengths
+ edgesum += edgept->runlength;
+ edgept = edgept->next; // move on
+ } while (!edgept->fixed && edgept != loopstart && edgesum < 126);
+ if (poly_debug) {
+ tprintf("Poly2:starting at (%d,%d)+%d=(%d,%d),%d to (%d,%d)\n",
+ linestart->pos.x, linestart->pos.y, linestart->dir,
+ linestart->vec.x, linestart->vec.y, edgesum, edgept->pos.x,
+ edgept->pos.y);
+ }
+ // reapproximate
+ cutline(linestart, edgept, area);
+
+ while (edgept->next->fixed && edgept != loopstart) {
+ edgept = edgept->next; // look for next non-fixed
+ }
+ }
+ // do all the loop
+ while (edgept != loopstart);
+ edgesum = 0;
+ do {
+ if (edgept->fixed) {
+ edgesum++;
+ }
+ edgept = edgept->next;
+ }
+ // count fixed pts
+ while (edgept != loopstart);
+ if (edgesum < 3) {
+ area /= 2; // must have 3 pts
+ }
+ } while (edgesum < 3);
+ do {
+ linestart = edgept;
+ do {
+ edgept = edgept->next;
+ } while (!edgept->fixed);
+ linestart->next = edgept;
+ edgept->prev = linestart;
+ linestart->vec.x = edgept->pos.x - linestart->pos.x;
+ linestart->vec.y = edgept->pos.y - linestart->pos.y;
+ } while (edgept != loopstart);
+ } else {
+ edgept = startpt; // start of loop
+ }
+
+ loopstart = edgept; // new start
+ return loopstart; // correct exit
+}
+
+/**********************************************************************
+ * tesspoly_outline
+ *
+ * Approximate an outline from chain codes form using the old tess algorithm.
+ * If allow_detailed_fx is true, the EDGEPTs in the returned TBLOB
+ * contain pointers to the input C_OUTLINEs that enable higher-resolution
+ * feature extraction that does not use the polygonal approximation.
+ **********************************************************************/
+
+static TESSLINE *ApproximateOutline(bool allow_detailed_fx, C_OUTLINE *c_outline) {
+ EDGEPT stack_edgepts[FASTEDGELENGTH]; // converted path
+ EDGEPT *edgepts = stack_edgepts;
+
+ // Use heap memory if the stack buffer is not big enough.
+ if (c_outline->pathlength() > FASTEDGELENGTH) {
+ edgepts = new EDGEPT[c_outline->pathlength()];
+ }
+
+ // bounding box
+ const auto &loop_box = c_outline->bounding_box();
+ int32_t area = loop_box.height();
+ if (!poly_wide_objects_better && loop_box.width() > area) {
+ area = loop_box.width();
+ }
+ area *= area;
+ edgesteps_to_edgepts(c_outline, edgepts);
+ fix2(edgepts, area);
+ EDGEPT *edgept = poly2(edgepts, area); // 2nd approximation.
+ EDGEPT *startpt = edgept;
+ EDGEPT *result = nullptr;
+ EDGEPT *prev_result = nullptr;
+ do {
+ auto *new_pt = new EDGEPT;
+ new_pt->pos = edgept->pos;
+ new_pt->prev = prev_result;
+ if (prev_result == nullptr) {
+ result = new_pt;
+ } else {
+ prev_result->next = new_pt;
+ new_pt->prev = prev_result;
+ }
+ if (allow_detailed_fx) {
+ new_pt->src_outline = edgept->src_outline;
+ new_pt->start_step = edgept->start_step;
+ new_pt->step_count = edgept->step_count;
+ }
+ prev_result = new_pt;
+ edgept = edgept->next;
+ } while (edgept != startpt);
+ prev_result->next = result;
+ result->prev = prev_result;
+ if (edgepts != stack_edgepts) {
+ delete[] edgepts;
+ }
+ return TESSLINE::BuildFromOutlineList(result);
+}
+
/*----------------------------------------------------------------------
F u n c t i o n s
----------------------------------------------------------------------*/
diff --git a/src/ccstruct/polyaprx.cpp b/src/ccstruct/polyaprx.cpp
deleted file mode 100644
index 9492304c..00000000
--- a/src/ccstruct/polyaprx.cpp
+++ /dev/null
@@ -1,577 +0,0 @@
-/**********************************************************************
- * File: polyaprx.cpp
- * Description: Code for polygonal approximation from old edgeprog.
- * Author: Ray Smith
- *
- * (C) Copyright 1993, Hewlett-Packard Ltd.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#include "polyaprx.h"
-
-#include "blobs.h" // for EDGEPT, TPOINT, VECTOR, TESSLINE
-#include "coutln.h" // for C_OUTLINE
-#include "errcode.h" // for ASSERT_HOST
-#include "mod128.h" // for DIR128
-#include "params.h" // for BoolParam, BOOL_VAR
-#include "points.h" // for ICOORD
-#include "rect.h" // for TBOX
-#include "tprintf.h" // for tprintf
-
-#include <cstdint> // for INT16_MAX, int8_t
-
-namespace tesseract {
-
-#define FASTEDGELENGTH 256
-
-static BOOL_VAR(poly_debug, false, "Debug old poly");
-static BOOL_VAR(poly_wide_objects_better, true,
- "More accurate approx on wide things");
-
-#define fixed_dist 20 // really an int_variable
-#define approx_dist 15 // really an int_variable
-
-const int par1 = 4500 / (approx_dist * approx_dist);
-const int par2 = 6750 / (approx_dist * approx_dist);
-
-/**********************************************************************
- *cutline(first,last,area) straightens out a line by partitioning
- *and joining the ends by a straight line*
- **********************************************************************/
-
-static void cutline( // recursive refine
- EDGEPT *first, // ends of line
- EDGEPT *last, int area // area of object
-) {
- EDGEPT *edge; // current edge
- TPOINT vecsum; // vector sum
- int vlen; // approx length of vecsum
- TPOINT vec; // accumulated vector
- EDGEPT *maxpoint; // worst point
- int maxperp; // max deviation
- int perp; // perp distance
- int ptcount; // no of points
- int squaresum; // sum of perps
-
- edge = first; // start of line
- if (edge->next == last) {
- return; // simple line
- }
-
- // vector sum
- vecsum.x = last->pos.x - edge->pos.x;
- vecsum.y = last->pos.y - edge->pos.y;
- if (vecsum.x == 0 && vecsum.y == 0) {
- // special case
- vecsum.x = -edge->prev->vec.x;
- vecsum.y = -edge->prev->vec.y;
- }
- // absolute value
- vlen = vecsum.x > 0 ? vecsum.x : -vecsum.x;
- if (vecsum.y > vlen) {
- vlen = vecsum.y; // maximum
- } else if (-vecsum.y > vlen) {
- vlen = -vecsum.y; // absolute value
- }
-
- vec.x = edge->vec.x; // accumulated vector
- vec.y = edge->vec.y;
- maxperp = 0; // none yet
- squaresum = ptcount = 0;
- edge = edge->next; // move to actual point
- maxpoint = edge; // in case there isn't one
- do {
- perp = vec.cross(vecsum); // get perp distance
- if (perp != 0) {
- perp *= perp; // squared deviation
- }
- squaresum += perp; // sum squares
- ptcount++; // count points
- if (poly_debug) {
- tprintf("Cutline:Final perp=%d\n", perp);
- }
- if (perp > maxperp) {
- maxperp = perp;
- maxpoint = edge; // find greatest deviation
- }
- vec.x += edge->vec.x; // accumulate vectors
- vec.y += edge->vec.y;
- edge = edge->next;
- } while (edge != last); // test all line
-
- perp = vecsum.length2();
- ASSERT_HOST(perp != 0);
-
- if (maxperp < 256 * INT16_MAX) {
- maxperp <<= 8;
- maxperp /= perp; // true max perp
- } else {
- maxperp /= perp;
- maxperp <<= 8; // avoid overflow
- }
- if (squaresum < 256 * INT16_MAX) {
- // mean squared perp
- perp = (squaresum << 8) / (perp * ptcount);
- } else {
- // avoid overflow
- perp = (squaresum / perp << 8) / ptcount;
- }
-
- if (poly_debug) {
- tprintf("Cutline:A=%d, max=%.2f(%.2f%%), msd=%.2f(%.2f%%)\n", area,
- maxperp / 256.0, maxperp * 200.0 / area, perp / 256.0,
- perp * 300.0 / area);
- }
- if (maxperp * par1 >= 10 * area || perp * par2 >= 10 * area || vlen >= 126) {
- maxpoint->fixed = true;
- // partitions
- cutline(first, maxpoint, area);
- cutline(maxpoint, last, area);
- }
-}
-
-/**********************************************************************
- * edgesteps_to_edgepts
- *
- * Convert a C_OUTLINE to EDGEPTs.
- **********************************************************************/
-
-static EDGEPT *edgesteps_to_edgepts( // convert outline
- C_OUTLINE *c_outline, // input
- EDGEPT edgepts[] // output is array
-) {
- int32_t length; // steps in path
- ICOORD pos; // current coords
- int32_t stepindex; // current step
- int32_t stepinc; // increment
- int32_t epindex; // current EDGEPT
- ICOORD vec; // for this 8 step
- ICOORD prev_vec;
- int8_t epdir; // of this step
- DIR128 prevdir; // previous dir
- DIR128 dir; // of this step
-
- pos = c_outline->start_pos(); // start of loop
- length = c_outline->pathlength();
- stepindex = 0;
- epindex = 0;
- prevdir = -1;
- // repeated steps
- uint32_t count = 0;
- int prev_stepindex = 0;
- do {
- dir = c_outline->step_dir(stepindex);
- vec = c_outline->step(stepindex);
- if (stepindex < length - 1 &&
- c_outline->step_dir(stepindex + 1) - dir == -32) {
- dir += 128 - 16;
- vec += c_outline->step(stepindex + 1);
- stepinc = 2;
- } else {
- stepinc = 1;
- }
- if (count == 0) {
- prevdir = dir;
- prev_vec = vec;
- }
- if (prevdir.get_dir() != dir.get_dir()) {
- edgepts[epindex].pos.x = pos.x();
- edgepts[epindex].pos.y = pos.y();
- prev_vec *= count;
- edgepts[epindex].vec.x = prev_vec.x();
- edgepts[epindex].vec.y = prev_vec.y();
- pos += prev_vec;
- edgepts[epindex].runlength = count;
- edgepts[epindex].prev = &edgepts[epindex - 1];
- // TODO: reset is_hidden, too?
- edgepts[epindex].fixed = false;
- edgepts[epindex].next = &edgepts[epindex + 1];
- prevdir += 64;
- epdir = DIR128(0) - prevdir;
- epdir >>= 4;
- epdir &= 7;
- edgepts[epindex].dir = epdir;
- edgepts[epindex].src_outline = c_outline;
- edgepts[epindex].start_step = prev_stepindex;
- edgepts[epindex].step_count = stepindex - prev_stepindex;
- epindex++;
- prevdir = dir;
- prev_vec = vec;
- count = 1;
- prev_stepindex = stepindex;
- } else {
- count++;
- }
- stepindex += stepinc;
- } while (stepindex < length);
- edgepts[epindex].pos.x = pos.x();
- edgepts[epindex].pos.y = pos.y();
- prev_vec *= count;
- edgepts[epindex].vec.x = prev_vec.x();
- edgepts[epindex].vec.y = prev_vec.y();
- pos += prev_vec;
- edgepts[epindex].runlength = count;
- // TODO: reset is_hidden, too?
- edgepts[epindex].fixed = false;
- edgepts[epindex].src_outline = c_outline;
- edgepts[epindex].start_step = prev_stepindex;
- edgepts[epindex].step_count = stepindex - prev_stepindex;
- edgepts[epindex].prev = &edgepts[epindex - 1];
- edgepts[epindex].next = &edgepts[0];
- prevdir += 64;
- epdir = DIR128(0) - prevdir;
- epdir >>= 4;
- epdir &= 7;
- edgepts[epindex].dir = epdir;
- edgepts[0].prev = &edgepts[epindex];
- ASSERT_HOST(pos.x() == c_outline->start_pos().x() &&
- pos.y() == c_outline->start_pos().y());
- return &edgepts[0];
-}
-
-/**********************************************************************
- *fix2(start,area) fixes points on the outline according to a trial method*
- **********************************************************************/
-
-static void fix2( // polygonal approx
- EDGEPT *start, // loop to approximate
- int area) {
- EDGEPT *edgept; // current point
- EDGEPT *edgept1;
- EDGEPT *loopstart; // modified start of loop
- EDGEPT *linestart; // start of line segment
- int fixed_count; // no of fixed points
- int8_t dir;
- int d01, d12, d23, gapmin;
- TPOINT d01vec, d12vec, d23vec;
- EDGEPT *edgefix, *startfix;
- EDGEPT *edgefix0, *edgefix1, *edgefix2, *edgefix3;
-
- edgept = start; // start of loop
- while (((edgept->dir - edgept->prev->dir + 1) & 7) < 3 &&
- (dir = (edgept->prev->dir - edgept->next->dir) & 7) != 2 && dir != 6) {
- edgept = edgept->next; // find suitable start
- }
- loopstart = edgept; // remember start
-
- // completed flag
- bool stopped = false;
- edgept->fixed = true; // fix it
- do {
- linestart = edgept; // possible start of line
- auto dir1 = edgept->dir; // first direction
- // length of dir1
- auto sum1 = edgept->runlength;
- edgept = edgept->next;
- auto dir2 = edgept->dir; // 2nd direction
- // length in dir2
- auto sum2 = edgept->runlength;
- if (((dir1 - dir2 + 1) & 7) < 3) {
- while (edgept->prev->dir == edgept->next->dir) {
- edgept = edgept->next; // look at next
- if (edgept->dir == dir1) {
- // sum lengths
- sum1 += edgept->runlength;
- } else {
- sum2 += edgept->runlength;
- }
- }
-
- if (edgept == loopstart) {
- // finished
- stopped = true;
- }
- if (sum2 + sum1 > 2 && linestart->prev->dir == dir2 &&
- (linestart->prev->runlength > linestart->runlength || sum2 > sum1)) {
- // start is back one
- linestart = linestart->prev;
- linestart->fixed = true;
- }
-
- if (((edgept->next->dir - edgept->dir + 1) & 7) >= 3 ||
- (edgept->dir == dir1 && sum1 >= sum2) ||
- ((edgept->prev->runlength < edgept->runlength ||
- (edgept->dir == dir2 && sum2 >= sum1)) &&
- linestart->next != edgept)) {
- edgept = edgept->next;
- }
- }
- // sharp bend
- edgept->fixed = true;
- }
- // do whole loop
- while (edgept != loopstart && !stopped);
-
- edgept = start;
- do {
- if (((edgept->runlength >= 8) && (edgept->dir != 2) &&
- (edgept->dir != 6)) ||
- ((edgept->runlength >= 8) &&
- ((edgept->dir == 2) || (edgept->dir == 6)))) {
- edgept->fixed = true;
- edgept1 = edgept->next;
- edgept1->fixed = true;
- }
- edgept = edgept->next;
- } while (edgept != start);
-
- edgept = start;
- do {
- // single fixed step
- if (edgept->fixed &&
- edgept->runlength == 1
- // and neighbours free
- && edgept->next->fixed &&
- !edgept->prev->fixed
- // same pair of dirs
- && !edgept->next->next->fixed &&
- edgept->prev->dir == edgept->next->dir &&
- edgept->prev->prev->dir == edgept->next->next->dir &&
- ((edgept->prev->dir - edgept->dir + 1) & 7) < 3) {
- // unfix it
- edgept->fixed = false;
- edgept->next->fixed = false;
- }
- edgept = edgept->next; // do all points
- } while (edgept != start); // until finished
-
- stopped = false;
- if (area < 450) {
- area = 450;
- }
-
- gapmin = area * fixed_dist * fixed_dist / 44000;
-
- edgept = start;
- fixed_count = 0;
- do {
- if (edgept->fixed) {
- fixed_count++;
- }
- edgept = edgept->next;
- } while (edgept != start);
- while (!edgept->fixed) {
- edgept = edgept->next;
- }
- edgefix0 = edgept;
-
- edgept = edgept->next;
- while (!edgept->fixed) {
- edgept = edgept->next;
- }
- edgefix1 = edgept;
-
- edgept = edgept->next;
- while (!edgept->fixed) {
- edgept = edgept->next;
- }
- edgefix2 = edgept;
-
- edgept = edgept->next;
- while (!edgept->fixed) {
- edgept = edgept->next;
- }
- edgefix3 = edgept;
-
- startfix = edgefix2;
-
- do {
- if (fixed_count <= 3) {
- break; // already too few
- }
- d12vec.diff(edgefix1->pos, edgefix2->pos);
- d12 = d12vec.length2();
- // TODO(rays) investigate this change:
- // Only unfix a point if it is part of a low-curvature section
- // of outline and the total angle change of the outlines is
- // less than 90 degrees, ie the scalar product is positive.
- // if (d12 <= gapmin && edgefix0->vec.dot(edgefix2->vec) > 0) {
- if (d12 <= gapmin) {
- d01vec.diff(edgefix0->pos, edgefix1->pos);
- d01 = d01vec.length2();
- d23vec.diff(edgefix2->pos, edgefix3->pos);
- d23 = d23vec.length2();
- if (d01 > d23) {
- edgefix2->fixed = false;
- fixed_count--;
- } else {
- edgefix1->fixed = false;
- fixed_count--;
- edgefix1 = edgefix2;
- }
- } else {
- edgefix0 = edgefix1;
- edgefix1 = edgefix2;
- }
- edgefix2 = edgefix3;
- edgept = edgept->next;
- while (!edgept->fixed) {
- if (edgept == startfix) {
- stopped = true;
- }
- edgept = edgept->next;
- }
- edgefix3 = edgept;
- edgefix = edgefix2;
- } while ((edgefix != startfix) && (!stopped));
-}
-
-/**********************************************************************
- *poly2(startpt,area,path) applies a second approximation to the outline
- *using the points which have been fixed by the first approximation*
- **********************************************************************/
-
-static EDGEPT *poly2( // second poly
- EDGEPT *startpt, // start of loop
- int area // area of blob box
-) {
- EDGEPT *edgept; // current outline point
- EDGEPT *loopstart; // starting point
- EDGEPT *linestart; // start of line
- int edgesum; // correction count
-
- if (area < 1200) {
- area = 1200; // minimum value
- }
-
- loopstart = nullptr; // not found it yet
- edgept = startpt; // start of loop
-
- do {
- // current point fixed and next not
- if (edgept->fixed && !edgept->next->fixed) {
- loopstart = edgept; // start of repoly
- break;
- }
- edgept = edgept->next; // next point
- } while (edgept != startpt); // until found or finished
-
- if (loopstart == nullptr && !startpt->fixed) {
- // fixed start of loop
- startpt->fixed = true;
- loopstart = startpt; // or start of loop
- }
- if (loopstart) {
- do {
- edgept = loopstart; // first to do
- do {
- linestart = edgept;
- edgesum = 0; // sum of lengths
- do {
- // sum lengths
- edgesum += edgept->runlength;
- edgept = edgept->next; // move on
- } while (!edgept->fixed && edgept != loopstart && edgesum < 126);
- if (poly_debug) {
- tprintf("Poly2:starting at (%d,%d)+%d=(%d,%d),%d to (%d,%d)\n",
- linestart->pos.x, linestart->pos.y, linestart->dir,
- linestart->vec.x, linestart->vec.y, edgesum, edgept->pos.x,
- edgept->pos.y);
- }
- // reapproximate
- cutline(linestart, edgept, area);
-
- while (edgept->next->fixed && edgept != loopstart) {
- edgept = edgept->next; // look for next non-fixed
- }
- }
- // do all the loop
- while (edgept != loopstart);
- edgesum = 0;
- do {
- if (edgept->fixed) {
- edgesum++;
- }
- edgept = edgept->next;
- }
- // count fixed pts
- while (edgept != loopstart);
- if (edgesum < 3) {
- area /= 2; // must have 3 pts
- }
- } while (edgesum < 3);
- do {
- linestart = edgept;
- do {
- edgept = edgept->next;
- } while (!edgept->fixed);
- linestart->next = edgept;
- edgept->prev = linestart;
- linestart->vec.x = edgept->pos.x - linestart->pos.x;
- linestart->vec.y = edgept->pos.y - linestart->pos.y;
- } while (edgept != loopstart);
- } else {
- edgept = startpt; // start of loop
- }
-
- loopstart = edgept; // new start
- return loopstart; // correct exit
-}
-
-/**********************************************************************
- * tesspoly_outline
- *
- * Approximate an outline from chain codes form using the old tess algorithm.
- * If allow_detailed_fx is true, the EDGEPTs in the returned TBLOB
- * contain pointers to the input C_OUTLINEs that enable higher-resolution
- * feature extraction that does not use the polygonal approximation.
- **********************************************************************/
-
-TESSLINE *ApproximateOutline(bool allow_detailed_fx, C_OUTLINE *c_outline) {
- EDGEPT stack_edgepts[FASTEDGELENGTH]; // converted path
- EDGEPT *edgepts = stack_edgepts;
-
- // Use heap memory if the stack buffer is not big enough.
- if (c_outline->pathlength() > FASTEDGELENGTH) {
- edgepts = new EDGEPT[c_outline->pathlength()];
- }
-
- // bounding box
- const auto &loop_box = c_outline->bounding_box();
- int32_t area = loop_box.height();
- if (!poly_wide_objects_better && loop_box.width() > area) {
- area = loop_box.width();
- }
- area *= area;
- edgesteps_to_edgepts(c_outline, edgepts);
- fix2(edgepts, area);
- EDGEPT *edgept = poly2(edgepts, area); // 2nd approximation.
- EDGEPT *startpt = edgept;
- EDGEPT *result = nullptr;
- EDGEPT *prev_result = nullptr;
- do {
- auto *new_pt = new EDGEPT;
- new_pt->pos = edgept->pos;
- new_pt->prev = prev_result;
- if (prev_result == nullptr) {
- result = new_pt;
- } else {
- prev_result->next = new_pt;
- new_pt->prev = prev_result;
- }
- if (allow_detailed_fx) {
- new_pt->src_outline = edgept->src_outline;
- new_pt->start_step = edgept->start_step;
- new_pt->step_count = edgept->step_count;
- }
- prev_result = new_pt;
- edgept = edgept->next;
- } while (edgept != startpt);
- prev_result->next = result;
- result->prev = prev_result;
- if (edgepts != stack_edgepts) {
- delete[] edgepts;
- }
- return TESSLINE::BuildFromOutlineList(result);
-}
-
-} // namespace tesseract
diff --git a/src/ccstruct/polyaprx.h b/src/ccstruct/polyaprx.h
deleted file mode 100644
index f2f99ea9..00000000
--- a/src/ccstruct/polyaprx.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/**********************************************************************
- * File: polyaprx.h
- * Description: Code for polygonal approximation from old edgeprog.
- * Author: Ray Smith
- *
- * (C) Copyright 1993, Hewlett-Packard Ltd.
- ** Licensed under the Apache License, Version 2.0 (the "License");
- ** you may not use this file except in compliance with the License.
- ** You may obtain a copy of the License at
- ** http://www.apache.org/licenses/LICENSE-2.0
- ** Unless required by applicable law or agreed to in writing, software
- ** distributed under the License is distributed on an "AS IS" BASIS,
- ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ** See the License for the specific language governing permissions and
- ** limitations under the License.
- *
- **********************************************************************/
-
-#ifndef POLYAPRX_H
-#define POLYAPRX_H
-
-namespace tesseract {
-
-class C_OUTLINE;
-struct TESSLINE;
-
-// convert a chain-coded input to the old OUTLINE approximation
-TESSLINE *ApproximateOutline(bool allow_detailed_fx, C_OUTLINE *c_outline);
-
-} // namespace tesseract
-
-#endif