/usr/include/tesseract/wordrec.h is in libtesseract-dev 3.02.01-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 | ///////////////////////////////////////////////////////////////////////
// File: wordrec.h
// Description: wordrec class.
// Author: Samuel Charron
//
// (C) Copyright 2006, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_WORDREC_WORDREC_H__
#define TESSERACT_WORDREC_WORDREC_H__
#include "associate.h"
#include "classify.h"
#include "dict.h"
#include "language_model.h"
#include "ratngs.h"
#include "matrix.h"
#include "matchtab.h"
#include "oldheap.h"
#include "gradechop.h"
#include "seam.h"
#include "states.h"
#include "findseam.h"
#include "callcpp.h"
struct CHUNKS_RECORD;
struct SEARCH_RECORD;
class WERD_RES;
// A struct for storing child/parent pairs of the BLOB_CHOICE_LISTs
// to be processed by the segmentation search.
struct SEG_SEARCH_PENDING : public ELIST_LINK {
SEG_SEARCH_PENDING(int child_row_arg,
BLOB_CHOICE_LIST *parent_arg,
tesseract::LanguageModelFlagsType changed_arg) :
child_row(child_row_arg), parent(parent_arg), changed(changed_arg) {}
// Comparator function for add_sorted().
static int compare(const void *p1, const void *p2) {
const SEG_SEARCH_PENDING *e1 = *reinterpret_cast<
const SEG_SEARCH_PENDING * const *>(p1);
const SEG_SEARCH_PENDING *e2 = *reinterpret_cast<
const SEG_SEARCH_PENDING * const *>(p2);
if (e1->child_row == e2->child_row &&
e1->parent == e2->parent) return 0;
return (e1->child_row < e2->child_row) ? -1 : 1;
}
int child_row; // row of the child in the ratings matrix
BLOB_CHOICE_LIST *parent; // pointer to the parent BLOB_CHOICE_LIST
// Flags that indicate which language model components are still active
// on the parent path (i.e. recorded some changes to the language model
// state) and need to be invoked for this pending entry.
// This field is used as an argument to LanguageModel::UpdateState()
// in Wordrec::UpdateSegSearchNodes().
tesseract::LanguageModelFlagsType changed;
};
ELISTIZEH(SEG_SEARCH_PENDING);
namespace tesseract {
/* ccmain/tstruct.cpp *********************************************************/
class FRAGMENT:public ELIST_LINK
{
public:
FRAGMENT() { //constructor
}
FRAGMENT(EDGEPT *head_pt, //start
EDGEPT *tail_pt); //end
ICOORD head; //coords of start
ICOORD tail; //coords of end
EDGEPT *headpt; //start point
EDGEPT *tailpt; //end point
};
ELISTIZEH(FRAGMENT)
class Wordrec : public Classify {
public:
// config parameters *******************************************************
BOOL_VAR_H(merge_fragments_in_matrix, TRUE,
"Merge the fragments in the ratings matrix and delete them "
"after merging");
BOOL_VAR_H(wordrec_no_block, FALSE, "Don't output block information");
BOOL_VAR_H(wordrec_enable_assoc, TRUE, "Associator Enable");
BOOL_VAR_H(force_word_assoc, FALSE,
"force associator to run regardless of what enable_assoc is."
"This is used for CJK where component grouping is necessary.");
INT_VAR_H(wordrec_num_seg_states, 30, "Segmentation states");
double_VAR_H(wordrec_worst_state, 1, "Worst segmentation state");
BOOL_VAR_H(fragments_guide_chopper, FALSE,
"Use information from fragments to guide chopping process");
INT_VAR_H(repair_unchopped_blobs, 1, "Fix blobs that aren't chopped");
double_VAR_H(tessedit_certainty_threshold, -2.25, "Good blob limit");
INT_VAR_H(chop_debug, 0, "Chop debug");
BOOL_VAR_H(chop_enable, 1, "Chop enable");
BOOL_VAR_H(chop_vertical_creep, 0, "Vertical creep");
INT_VAR_H(chop_split_length, 10000, "Split Length");
INT_VAR_H(chop_same_distance, 2, "Same distance");
INT_VAR_H(chop_min_outline_points, 6, "Min Number of Points on Outline");
INT_VAR_H(chop_inside_angle, -50, "Min Inside Angle Bend");
INT_VAR_H(chop_min_outline_area, 2000, "Min Outline Area");
double_VAR_H(chop_split_dist_knob, 0.5, "Split length adjustment");
double_VAR_H(chop_overlap_knob, 0.9, "Split overlap adjustment");
double_VAR_H(chop_center_knob, 0.15, "Split center adjustment");
double_VAR_H(chop_sharpness_knob, 0.06, "Split sharpness adjustment");
double_VAR_H(chop_width_change_knob, 5.0, "Width change adjustment");
double_VAR_H(chop_ok_split, 100.0, "OK split limit");
double_VAR_H(chop_good_split, 50.0, "Good split limit");
INT_VAR_H(chop_x_y_weight, 3, "X / Y length weight");
INT_VAR_H(segment_adjust_debug, 0, "Segmentation adjustment debug");
BOOL_VAR_H(assume_fixed_pitch_char_segment, FALSE,
"include fixed-pitch heuristics in char segmentation");
BOOL_VAR_H(use_new_state_cost, FALSE,
"use new state cost heuristics for segmentation state evaluation");
double_VAR_H(heuristic_segcost_rating_base, 1.25,
"base factor for adding segmentation cost into word rating."
"It's a multiplying factor, the larger the value above 1, "
"the bigger the effect of segmentation cost.");
double_VAR_H(heuristic_weight_rating, 1,
"weight associated with char rating in combined cost of state");
double_VAR_H(heuristic_weight_width, 0,
"weight associated with width evidence in combined cost of state");
double_VAR_H(heuristic_weight_seamcut, 0,
"weight associated with seam cut in combined cost of state");
double_VAR_H(heuristic_max_char_wh_ratio, 2.0,
"max char width-to-height ratio allowed in segmentation");
INT_VAR_H(wordrec_debug_level, 0, "Debug level for wordrec");
BOOL_VAR_H(wordrec_debug_blamer, false, "Print blamer debug messages");
BOOL_VAR_H(wordrec_run_blamer, false, "Try to set the blame for errors");
BOOL_VAR_H(enable_new_segsearch, false,
"Enable new segmentation search path.");
INT_VAR_H(segsearch_debug_level, 0, "SegSearch debug level");
INT_VAR_H(segsearch_max_pain_points, 2000,
"Maximum number of pain points stored in the queue");
INT_VAR_H(segsearch_max_futile_classifications, 10,
"Maximum number of pain point classifications per word.");
double_VAR_H(segsearch_max_char_wh_ratio, 2.0,
"Maximum character width-to-height ratio");
double_VAR_H(segsearch_max_fixed_pitch_char_wh_ratio, 2.0,
"Maximum character width-to-height ratio for"
"fixed pitch fonts");
BOOL_VAR_H(save_alt_choices, false,
"Save alternative paths found during chopping "
"and segmentation search");
// methods from wordrec/*.cpp ***********************************************
Wordrec();
virtual ~Wordrec();
void CopyCharChoices(const BLOB_CHOICE_LIST_VECTOR &from,
BLOB_CHOICE_LIST_VECTOR *to);
// Returns true if text recorded in choice is the same as truth_text.
bool ChoiceIsCorrect(const UNICHARSET& uni_set,
const WERD_CHOICE *choice,
const GenericVector<STRING> &truth_text);
// Fills word->alt_choices with alternative paths found during
// chopping/segmentation search that are kept in best_choices.
// TODO(antonova): the function currently saves unchar ids, rating and
// certainty information for each alternative choice.
// We might need to add saving blob choices and segmentation state
// associated with each alt choice if needed.
void SaveAltChoices(const LIST &best_choices, WERD_RES *word);
// Fills character choice lattice in the given BlamerBundle
// using the given ratings matrix and best choice list.
void FillLattice(const MATRIX &ratings, const LIST &best_choices,
const UNICHARSET &unicharset, BlamerBundle *blamer_bundle);
// Calls fill_lattice_ member function
// (assumes that fill_lattice_ is not NULL).
void CallFillLattice(const MATRIX &ratings, const LIST &best_choices,
const UNICHARSET &unicharset, BlamerBundle *blamer_bundle) {
(this->*fill_lattice_)(ratings, best_choices, unicharset, blamer_bundle);
}
// tface.cpp
void program_editup(const char *textbase,
bool init_classifier,
bool init_permute);
BLOB_CHOICE_LIST_VECTOR *cc_recog(WERD_RES *word);
void program_editdown(inT32 elasped_time);
void set_pass1();
void set_pass2();
int end_recog();
BLOB_CHOICE_LIST *call_matcher(const DENORM* denorm, TBLOB* blob);
int dict_word(const WERD_CHOICE &word);
// wordclass.cpp
BLOB_CHOICE_LIST *classify_blob(TBLOB *blob,
const DENORM& denorm,
const char *string,
C_COL color,
BlamerBundle *blamer_bundle);
BLOB_CHOICE_LIST *fake_classify_blob(UNICHAR_ID class_id,
float rating, float certainty);
void update_blob_classifications(TWERD *word,
const BLOB_CHOICE_LIST_VECTOR &choices);
// bestfirst.cpp
BLOB_CHOICE_LIST_VECTOR *evaluate_chunks(CHUNKS_RECORD *chunks_record,
SEARCH_STATE search_state,
BlamerBundle *blamer_bundle);
void update_ratings(const BLOB_CHOICE_LIST_VECTOR &new_choices,
const CHUNKS_RECORD *chunks_record,
const SEARCH_STATE search_state);
inT16 evaluate_state(CHUNKS_RECORD *chunks_record,
SEARCH_RECORD *the_search,
DANGERR *fixpt,
BlamerBundle *blamer_bundle);
SEARCH_RECORD *new_search(CHUNKS_RECORD *chunks_record,
int num_joints,
BLOB_CHOICE_LIST_VECTOR *best_char_choices,
WERD_CHOICE *best_choice,
WERD_CHOICE *raw_choice,
STATE *state);
void best_first_search(CHUNKS_RECORD *chunks_record,
BLOB_CHOICE_LIST_VECTOR *best_char_choices,
WERD_RES *word,
STATE *state,
DANGERR *fixpt,
STATE *best_state);
void delete_search(SEARCH_RECORD *the_search);
void expand_node(FLOAT32 worst_priority,
CHUNKS_RECORD *chunks_record,
SEARCH_RECORD *the_search);
void replace_char_widths(CHUNKS_RECORD *chunks_record,
SEARCH_STATE state);
// Transfers the given state to the word's output fields: rebuild_word,
// best_state, box_word, and returns the corresponding blob choices.
BLOB_CHOICE_LIST_VECTOR *rebuild_current_state(
WERD_RES *word,
STATE *state,
BLOB_CHOICE_LIST_VECTOR *char_choices,
MATRIX *ratings);
// Creates a fake blob choice from the combination of the given fragments.
// unichar is the class to be made from the combination,
// expanded_fragment_lengths[choice_index] is the number of fragments to use.
// old_choices[choice_index] has the classifier output for each fragment.
// choice index initially indexes the last fragment and should be decremented
// expanded_fragment_lengths[choice_index] times to get the earlier fragments.
// Guarantees to return something non-null, or abort!
BLOB_CHOICE* rebuild_fragments(
const char* unichar,
const char* expanded_fragment_lengths,
int choice_index,
BLOB_CHOICE_LIST_VECTOR *old_choices);
// Creates a joined copy of the blobs between x and y (inclusive) and
// insert into the rebuild_word in word.
// Returns a deep copy of the classifier results for the blob.
BLOB_CHOICE_LIST *join_blobs_and_classify(
WERD_RES* word, int x, int y, int choice_index, MATRIX *ratings,
BLOB_CHOICE_LIST_VECTOR *old_choices);
STATE *pop_queue(HEAP *queue);
void push_queue(HEAP *queue, STATE *state, FLOAT32 worst_priority,
FLOAT32 priority, bool debug);
// segsearch.cpp
// SegSearch works on the lower diagonal matrix of BLOB_CHOICE_LISTs.
// Each entry in the matrix represents the classification choice
// for a chunk, i.e. an entry in row 2, column 1 represents the list
// of ratings for the chunks 1 and 2 classified as a single blob.
// The entries on the diagonal of the matrix are classifier choice lists
// for a single chunk from the maximal segmentation.
//
// The ratings matrix given to SegSearch represents the segmentation
// graph / trellis for the current word. The nodes in the graph are the
// individual BLOB_CHOICEs in each of the BLOB_CHOICE_LISTs in the ratings
// matrix. The children of each node (nodes connected by outgoing links)
// are the entries in the column that is equal to node's row+1. The parents
// (nodes connected by the incoming links) are the entries in the row that
// is equal to the node's column-1. Here is an example ratings matrix:
//
// 0 1 2 3 4
// -------------------------
// 0| c,( |
// 1| d l,1 |
// 2| o |
// 3| c,( |
// 4| g,y l,1 |
// -------------------------
//
// In the example above node "o" has children (outgoing connection to nodes)
// "c","(","g","y" and parents (incoming connections from nodes) "l","1","d".
//
// The objective of the search is to find the least cost path, where the cost
// is determined by the language model components and the properties of the
// cut between the blobs on the path. SegSearch starts by populating the
// matrix with the all the entries that were classified by the chopper and
// finding the initial best path. Based on the classifier ratings, language
// model scores and the properties of each cut, a list of "pain points" is
// constructed - those are the points on the path where the choices do not
// look consistent with the neighboring choices, the cuts look particularly
// problematic, or the certainties of the blobs are low. The most troublesome
// "pain point" is picked from the list and the new entry in the ratings
// matrix corresponding to this "pain point" is filled in. Then the language
// model state is updated to reflect the new classification and the new
// "pain points" are added to the list and the next most troublesome
// "pain point" is determined. This continues until either the word choice
// composed from the best paths in the segmentation graph is "good enough"
// (e.g. above a certain certainty threshold, is an unambiguous dictionary
// word, etc) or there are no more "pain points" to explore.
void SegSearch(CHUNKS_RECORD *chunks_record,
WERD_CHOICE *best_choice,
BLOB_CHOICE_LIST_VECTOR *best_char_choices,
WERD_CHOICE *raw_choice,
STATE *output_best_state,
BlamerBundle *blamer_bundle);
// chop.cpp
PRIORITY point_priority(EDGEPT *point);
void add_point_to_list(POINT_GROUP point_list, EDGEPT *point);
int angle_change(EDGEPT *point1, EDGEPT *point2, EDGEPT *point3);
int is_little_chunk(EDGEPT *point1, EDGEPT *point2);
int is_small_area(EDGEPT *point1, EDGEPT *point2);
EDGEPT *pick_close_point(EDGEPT *critical_point,
EDGEPT *vertical_point,
int *best_dist);
void prioritize_points(TESSLINE *outline, POINT_GROUP points);
void new_min_point(EDGEPT *local_min, POINT_GROUP points);
void new_max_point(EDGEPT *local_max, POINT_GROUP points);
void vertical_projection_point(EDGEPT *split_point, EDGEPT *target_point,
EDGEPT** best_point,
EDGEPT_CLIST *new_points);
// chopper.cpp
SEAM *attempt_blob_chop(TWERD *word, TBLOB *blob, inT32 blob_number,
bool italic_blob, SEAMS seam_list);
SEAM *chop_numbered_blob(TWERD *word, inT32 blob_number,
bool italic_blob, SEAMS seam_list);
SEAM *chop_overlapping_blob(const GenericVector<TBOX>& boxes,
WERD_RES *word_res, inT32 *blob_number,
bool italic_blob, SEAMS seam_list);
bool improve_one_blob(WERD_RES *word_res,
BLOB_CHOICE_LIST_VECTOR *char_choices,
inT32 *blob_number,
SEAMS *seam_list,
DANGERR *fixpt,
bool split_next_to_fragment,
BlamerBundle *blamer_bundle);
void modify_blob_choice(BLOB_CHOICE_LIST *answer,
int chop_index);
bool chop_one_blob(TWERD *word,
BLOB_CHOICE_LIST_VECTOR *char_choices,
inT32 *blob_number,
SEAMS *seam_list,
int *right_chop_index);
bool chop_one_blob2(const GenericVector<TBOX>& boxes,
WERD_RES *word_res, SEAMS *seam_list);
BLOB_CHOICE_LIST_VECTOR *chop_word_main(WERD_RES *word);
void improve_by_chopping(WERD_RES *word,
BLOB_CHOICE_LIST_VECTOR *char_choices,
STATE *best_state,
BLOB_CHOICE_LIST_VECTOR *best_char_choices,
DANGERR *fixpt,
bool *updated_best_choice);
MATRIX *word_associator(bool only_create_ratings_matrtix,
WERD_RES *word,
STATE *state,
BLOB_CHOICE_LIST_VECTOR *best_char_choices,
DANGERR *fixpt,
STATE *best_state);
inT16 select_blob_to_split(const BLOB_CHOICE_LIST_VECTOR &char_choices,
float rating_ceiling,
bool split_next_to_fragment);
void set_chopper_blame(WERD_RES *word);
// findseam.cpp
void junk_worst_seam(SEAM_QUEUE seams, SEAM *new_seam, float new_priority);
void choose_best_seam(SEAM_QUEUE seam_queue,
SEAM_PILE *seam_pile,
SPLIT *split,
PRIORITY priority,
SEAM **seam_result,
TBLOB *blob);
void combine_seam(SEAM_QUEUE seam_queue, SEAM_PILE seam_pile, SEAM *seam);
inT16 constrained_split(SPLIT *split, TBLOB *blob);
void delete_seam_pile(SEAM_PILE seam_pile);
SEAM *pick_good_seam(TBLOB *blob);
PRIORITY seam_priority(SEAM *seam, inT16 xmin, inT16 xmax);
void try_point_pairs (EDGEPT * points[MAX_NUM_POINTS],
inT16 num_points,
SEAM_QUEUE seam_queue,
SEAM_PILE * seam_pile, SEAM ** seam, TBLOB * blob);
void try_vertical_splits(EDGEPT * points[MAX_NUM_POINTS],
inT16 num_points,
EDGEPT_CLIST *new_points,
SEAM_QUEUE seam_queue,
SEAM_PILE * seam_pile, SEAM ** seam, TBLOB * blob);
// gradechop.cpp
PRIORITY full_split_priority(SPLIT *split, inT16 xmin, inT16 xmax);
PRIORITY grade_center_of_blob(register BOUNDS_RECT rect);
PRIORITY grade_overlap(register BOUNDS_RECT rect);
PRIORITY grade_split_length(register SPLIT *split);
PRIORITY grade_sharpness(register SPLIT *split);
PRIORITY grade_width_change(register BOUNDS_RECT rect);
void set_outline_bounds(register EDGEPT *point1,
register EDGEPT *point2,
BOUNDS_RECT rect);
// outlines.cpp
int crosses_outline(EDGEPT *p0, EDGEPT *p1, EDGEPT *outline);
int is_crossed(TPOINT a0, TPOINT a1, TPOINT b0, TPOINT b1);
int is_same_edgept(EDGEPT *p1, EDGEPT *p2);
bool near_point(EDGEPT *point, EDGEPT *line_pt_0, EDGEPT *line_pt_1,
EDGEPT **near_pt);
void reverse_outline(EDGEPT *outline);
// pieces.cpp
virtual BLOB_CHOICE_LIST *classify_piece(TBLOB *pieces,
const DENORM& denorm,
SEAMS seams,
inT16 start,
inT16 end,
BlamerBundle *blamer_bundle);
// Try to merge fragments in the ratings matrix and put the result in
// the corresponding row and column
void merge_fragments(MATRIX *ratings,
inT16 num_blobs);
// Recursively go through the ratings matrix to find lists of fragments
// to be merged in the function merge_and_put_fragment_lists.
// current_frag is the postion of the piece we are looking for.
// current_row is the row in the rating matrix we are currently at.
// start is the row we started initially, so that we can know where
// to append the results to the matrix. num_frag_parts is the total
// number of pieces we are looking for and num_blobs is the size of the
// ratings matrix.
void get_fragment_lists(inT16 current_frag,
inT16 current_row,
inT16 start,
inT16 num_frag_parts,
inT16 num_blobs,
MATRIX *ratings,
BLOB_CHOICE_LIST *choice_lists);
// Merge the fragment lists in choice_lists and append it to the
// ratings matrix
void merge_and_put_fragment_lists(inT16 row,
inT16 column,
inT16 num_frag_parts,
BLOB_CHOICE_LIST *choice_lists,
MATRIX *ratings);
// Filter the fragment list so that the filtered_choices only contain
// fragments that are in the correct position. choices is the list
// that we are going to filter. fragment_pos is the position in the
// fragment that we are looking for and num_frag_parts is the the
// total number of pieces. The result will be appended to
// filtered_choices.
void fill_filtered_fragment_list(BLOB_CHOICE_LIST *choices,
int fragment_pos,
int num_frag_parts,
BLOB_CHOICE_LIST *filtered_choices);
BLOB_CHOICE_LIST *get_piece_rating(MATRIX *ratings,
TBLOB *blobs,
const DENORM& denorm,
SEAMS seams,
inT16 start,
inT16 end,
BlamerBundle *blamer_bundle);
// returns an array of bounding boxes for the given list of blobs.
TBOX *record_blob_bounds(TBLOB *blobs);
MATRIX *record_piece_ratings(TBLOB *blobs);
// heuristic.cpp
WIDTH_RECORD* state_char_widths(WIDTH_RECORD *chunk_widths,
STATE *state,
int num_joints);
FLOAT32 get_width_variance(WIDTH_RECORD *wrec, float norm_height);
FLOAT32 get_gap_variance(WIDTH_RECORD *wrec, float norm_height);
FLOAT32 prioritize_state(CHUNKS_RECORD *chunks_record,
SEARCH_RECORD *the_search);
FLOAT32 width_priority(CHUNKS_RECORD *chunks_record,
STATE *state,
int num_joints);
FLOAT32 seamcut_priority(SEAMS seams,
STATE *state,
int num_joints);
FLOAT32 rating_priority(CHUNKS_RECORD *chunks_record,
STATE *state,
int num_joints);
// Member variables.
LanguageModel *language_model_;
PRIORITY pass2_ok_split;
int pass2_seg_states;
int num_joints;
int num_pushed;
int num_popped;
BlobMatchTable blob_match_table;
EVALUATION_ARRAY last_segmentation;
// Stores the best choice for the previous word in the paragraph.
// This variable is modified by PAGE_RES_IT when iterating over
// words to OCR on the page.
WERD_CHOICE *prev_word_best_choice_;
// Sums of blame reasons computed by the blamer.
GenericVector<int> blame_reasons_;
// Function used to fill char choice lattices.
void (Wordrec::*fill_lattice_)(const MATRIX &ratings,
const LIST &best_choices,
const UNICHARSET &unicharset,
BlamerBundle *blamer_bundle);
protected:
inline bool SegSearchDone(int num_futile_classifications) {
return (language_model_->AcceptableChoiceFound() ||
num_futile_classifications >=
segsearch_max_futile_classifications);
}
// Updates the language model state recorded for the child entries specified
// in pending[starting_col]. Enqueues the children of the updated entries
// into pending and proceeds to update (and remove from pending) all the
// remaining entries in pending[col] (col >= starting_col). Upon termination
// of this function all the pending[col] lists will be empty.
//
// The arguments:
//
// starting_col: index of the column in chunks_record->ratings from
// which the update should be started
//
// pending: list of entries listing chunks_record->ratings entries
// that should be updated
//
// pain_points: priority heap listing the pain points generated by
// the language model
//
// temp_pain_points: temporary storage for tentative pain points generated
// by the language model after a single call to LanguageModel::UpdateState()
// (the argument is passed in rather than created before each
// LanguageModel::UpdateState() call to avoid dynamic memory re-allocation)
//
// best_choice_bundle: a collection of variables that should be updated
// if a new best choice is found
//
void UpdateSegSearchNodes(int starting_col,
SEG_SEARCH_PENDING_LIST *pending[],
BestPathByColumn *best_path_by_column[],
CHUNKS_RECORD *chunks_record,
HEAP *pain_points,
BestChoiceBundle *best_choice_bundle,
BlamerBundle *blamer_bundle);
// Process the given pain point: classify the corresponding blob, enqueue
// new pain points to join the newly classified blob with its neighbors.
void ProcessSegSearchPainPoint(float pain_point_priority,
const MATRIX_COORD &pain_point,
const WERD_CHOICE *best_choice,
SEG_SEARCH_PENDING_LIST *pending[],
CHUNKS_RECORD *chunks_record,
HEAP *pain_points,
BlamerBundle *blamer_bundle);
// Add pain points for classifying blobs on the correct segmentation path
// (so that we can evaluate correct segmentation path and discover the reason
// for incorrect result).
void InitBlamerForSegSearch(const WERD_CHOICE *best_choice,
CHUNKS_RECORD *chunks_record,
HEAP *pain_points,
BlamerBundle *blamer_bundle,
STRING *blamer_debug);
// Analyze the contents of BlamerBundle and set incorrect result reason.
void FinishBlamerForSegSearch(const WERD_CHOICE *best_choice,
BlamerBundle *blamer_bundle,
STRING *blamer_debug);
};
} // namespace tesseract
#endif // TESSERACT_WORDREC_WORDREC_H__
|