"Fossies" - the Fresh Open Source Software Archive  

Source code changes of the file "src/training/unicharset/lstmtrainer.cpp" between
tesseract-5.0.0-rc2.tar.gz and tesseract-5.0.0-rc3.tar.gz

About: Tesseract is an Optical Character Recognition (OCR) engine. Release candidate.

lstmtrainer.cpp  (tesseract-5.0.0-rc2):lstmtrainer.cpp  (tesseract-5.0.0-rc3)
skipping to change at line 333 skipping to change at line 333
// Reset the inputs, as we have overwritten *this. // Reset the inputs, as we have overwritten *this.
error_rate = CharError(); error_rate = CharError();
iteration = learning_iteration(); iteration = learning_iteration();
PrepareLogMsg(log_msg); PrepareLogMsg(log_msg);
} }
} }
bool result = true; // Something interesting happened. bool result = true; // Something interesting happened.
std::vector<char> rec_model_data; std::vector<char> rec_model_data;
if (error_rate < best_error_rate_) { if (error_rate < best_error_rate_) {
SaveRecognitionDump(&rec_model_data); SaveRecognitionDump(&rec_model_data);
log_msg += " New best char error = " + std::to_string(error_rate); log_msg += " New best BCER = " + std::to_string(error_rate);
log_msg += UpdateErrorGraph(iteration, error_rate, rec_model_data, tester); log_msg += UpdateErrorGraph(iteration, error_rate, rec_model_data, tester);
// If sub_trainer_ is not nullptr, either *this beat it to a new best, or it // If sub_trainer_ is not nullptr, either *this beat it to a new best, or it
// just overwrote *this. In either case, we have finished with it. // just overwrote *this. In either case, we have finished with it.
sub_trainer_.reset(); sub_trainer_.reset();
stall_iteration_ = learning_iteration() + kMinStallIterations; stall_iteration_ = learning_iteration() + kMinStallIterations;
if (TransitionTrainingStage(kStageTransitionThreshold)) { if (TransitionTrainingStage(kStageTransitionThreshold)) {
log_msg += log_msg +=
" Transitioned to stage " + std::to_string(CurrentTrainingStage()); " Transitioned to stage " + std::to_string(CurrentTrainingStage());
} }
SaveTrainingDump(NO_BEST_TRAINER, *this, &best_trainer_); SaveTrainingDump(NO_BEST_TRAINER, *this, &best_trainer_);
skipping to change at line 356 skipping to change at line 356
if (!SaveDataToFile(best_trainer_, best_model_name.c_str())) { if (!SaveDataToFile(best_trainer_, best_model_name.c_str())) {
log_msg += " failed to write best model:"; log_msg += " failed to write best model:";
} else { } else {
log_msg += " wrote best model:"; log_msg += " wrote best model:";
error_rate_of_last_saved_best_ = best_error_rate_; error_rate_of_last_saved_best_ = best_error_rate_;
} }
log_msg += best_model_name; log_msg += best_model_name;
} }
} else if (error_rate > worst_error_rate_) { } else if (error_rate > worst_error_rate_) {
SaveRecognitionDump(&rec_model_data); SaveRecognitionDump(&rec_model_data);
log_msg += " New worst char error = " + std::to_string(error_rate); log_msg += " New worst BCER = " + std::to_string(error_rate);
log_msg += UpdateErrorGraph(iteration, error_rate, rec_model_data, tester); log_msg += UpdateErrorGraph(iteration, error_rate, rec_model_data, tester);
if (worst_error_rate_ > best_error_rate_ + kMinDivergenceRate && if (worst_error_rate_ > best_error_rate_ + kMinDivergenceRate &&
best_error_rate_ < kMinStartedErrorRate && !best_trainer_.empty()) { best_error_rate_ < kMinStartedErrorRate && !best_trainer_.empty()) {
// Error rate has ballooned. Go back to the best model. // Error rate has ballooned. Go back to the best model.
log_msg += "\nDivergence! "; log_msg += "\nDivergence! ";
// Copy best_trainer_ before reading it, as it will get overwritten. // Copy best_trainer_ before reading it, as it will get overwritten.
std::vector<char> revert_data(best_trainer_); std::vector<char> revert_data(best_trainer_);
if (ReadTrainingDump(revert_data, *this)) { if (ReadTrainingDump(revert_data, *this)) {
LogIterations("Reverted to", log_msg); LogIterations("Reverted to", log_msg);
ReduceLearningRates(this, log_msg); ReduceLearningRates(this, log_msg);
skipping to change at line 399 skipping to change at line 399
} }
log_msg += "\n"; log_msg += "\n";
return result; return result;
} }
// Builds a string containing a progress message with current error rates. // Builds a string containing a progress message with current error rates.
void LSTMTrainer::PrepareLogMsg(std::string &log_msg) const { void LSTMTrainer::PrepareLogMsg(std::string &log_msg) const {
LogIterations("At", log_msg); LogIterations("At", log_msg);
log_msg += ", Mean rms=" + std::to_string(error_rates_[ET_RMS]); log_msg += ", Mean rms=" + std::to_string(error_rates_[ET_RMS]);
log_msg += "%, delta=" + std::to_string(error_rates_[ET_DELTA]); log_msg += "%, delta=" + std::to_string(error_rates_[ET_DELTA]);
log_msg += "%, char train=" + std::to_string(error_rates_[ET_CHAR_ERROR]); log_msg += "%, BCER train=" + std::to_string(error_rates_[ET_CHAR_ERROR]);
log_msg += "%, word train=" + std::to_string(error_rates_[ET_WORD_RECERR]); log_msg += "%, BWER train=" + std::to_string(error_rates_[ET_WORD_RECERR]);
log_msg += "%, skip ratio=" + std::to_string(error_rates_[ET_SKIP_RATIO]); log_msg += "%, skip ratio=" + std::to_string(error_rates_[ET_SKIP_RATIO]);
log_msg += "%, "; log_msg += "%, ";
} }
// Appends <intro_str> iteration learning_iteration()/training_iteration()/ // Appends <intro_str> iteration learning_iteration()/training_iteration()/
// sample_iteration() to the log_msg. // sample_iteration() to the log_msg.
void LSTMTrainer::LogIterations(const char *intro_str, void LSTMTrainer::LogIterations(const char *intro_str,
std::string &log_msg) const { std::string &log_msg) const {
log_msg += intro_str; log_msg += intro_str;
log_msg += " iteration " + std::to_string(learning_iteration()); log_msg += " iteration " + std::to_string(learning_iteration());
skipping to change at line 928 skipping to change at line 928
trainingdata->language().c_str()); trainingdata->language().c_str());
return UNENCODABLE; return UNENCODABLE;
} }
bool upside_down = false; bool upside_down = false;
if (randomly_rotate_) { if (randomly_rotate_) {
// This ensures consistent training results. // This ensures consistent training results.
SetRandomSeed(); SetRandomSeed();
upside_down = randomizer_.SignedRand(1.0) > 0.0; upside_down = randomizer_.SignedRand(1.0) > 0.0;
if (upside_down) { if (upside_down) {
// Modify the truth labels to match the rotation: // Modify the truth labels to match the rotation:
// Apart from space and null, increment the label. This is changes the // Apart from space and null, increment the label. This changes the
// script-id to the same script-id but upside-down. // script-id to the same script-id but upside-down.
// The labels need to be reversed in order, as the first is now the last. // The labels need to be reversed in order, as the first is now the last.
for (auto truth_label : truth_labels) { for (auto truth_label : truth_labels) {
if (truth_label != UNICHAR_SPACE && truth_label != null_char_) { if (truth_label != UNICHAR_SPACE && truth_label != null_char_) {
++truth_label; ++truth_label;
} }
} }
std::reverse(truth_labels.begin(), truth_labels.end()); std::reverse(truth_labels.begin(), truth_labels.end());
} }
} }
 End of changes. 4 change blocks. 
5 lines changed or deleted 5 lines changed or added

Home  |  About  |  Features  |  All  |  Newest  |  Dox  |  Diffs  |  RSS Feeds  |  Screenshots  |  Comments  |  Imprint  |  Privacy  |  HTTP(S)