"Fossies" - the Fresh Open Source Software Archive  

Source code changes of the file "qt/src/Recognizer.cc" between
gimagereader-3.3.0.tar.xz and gimagereader-3.3.1.tar.xz

About: gImageReader is a graphical GTK frontend to the tesseract OCR engine (requires Python).

Recognizer.cc  (gimagereader-3.3.0.tar.xz):Recognizer.cc  (gimagereader-3.3.1.tar.xz)
/* -*- Mode: C++; indent-tabs-mode: t; c-basic-offset: 4; tab-width: 4 -*- */ /* -*- Mode: C++; indent-tabs-mode: t; c-basic-offset: 4; tab-width: 4 -*- */
/* /*
* Recognizer.hh * Recognizer.hh
* Copyright (C) 2013-2018 Sandro Mani <manisandro@gmail.com> * Copyright (C) 2013-2019 Sandro Mani <manisandro@gmail.com>
* *
* gImageReader is free software: you can redistribute it and/or modify it * gImageReader is free software: you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the * under the terms of the GNU General Public License as published by the
* Free Software Foundation, either version 3 of the License, or * Free Software Foundation, either version 3 of the License, or
* (at your option) any later version. * (at your option) any later version.
* *
* gImageReader is distributed in the hope that it will be useful, but * gImageReader is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of * WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU General Public License for more details. * See the GNU General Public License for more details.
skipping to change at line 88 skipping to change at line 88
QAction* multiplePagesAction = new QAction(_("Multiple Pages..."), this); QAction* multiplePagesAction = new QAction(_("Multiple Pages..."), this);
multiplePagesAction->setData(static_cast<int>(PageSelection::Multiple)); multiplePagesAction->setData(static_cast<int>(PageSelection::Multiple));
m_menuPages = new QMenu(ui.toolButtonRecognize); m_menuPages = new QMenu(ui.toolButtonRecognize);
m_menuPages->addAction(currentPageAction); m_menuPages->addAction(currentPageAction);
m_menuPages->addAction(multiplePagesAction); m_menuPages->addAction(multiplePagesAction);
m_pagesDialog = new QDialog(MAIN); m_pagesDialog = new QDialog(MAIN);
m_pagesDialogUi.setupUi(m_pagesDialog); m_pagesDialogUi.setupUi(m_pagesDialog);
m_charListDialog = new QDialog(MAIN);
m_charListDialogUi.setupUi(m_charListDialog);
ui.toolButtonRecognize->setText(QString("%1\n%2").arg(m_modeLabel).arg(m_ langLabel)); ui.toolButtonRecognize->setText(QString("%1\n%2").arg(m_modeLabel).arg(m_ langLabel));
ui.menuLanguages->installEventFilter(this); ui.menuLanguages->installEventFilter(this);
connect(ui.toolButtonRecognize, SIGNAL(clicked()), this, SLOT(recognizeBu ttonClicked())); connect(ui.toolButtonRecognize, SIGNAL(clicked()), this, SLOT(recognizeBu ttonClicked()));
connect(currentPageAction, SIGNAL(triggered()), this, SLOT(recognizeCurre ntPage())); connect(currentPageAction, SIGNAL(triggered()), this, SLOT(recognizeCurre ntPage()));
connect(multiplePagesAction, SIGNAL(triggered()), this, SLOT(recognizeMul tiplePages())); connect(multiplePagesAction, SIGNAL(triggered()), this, SLOT(recognizeMul tiplePages()));
connect(m_pagesDialogUi.lineEditPageRange, SIGNAL(textChanged(QString)), this, SLOT(clearLineEditPageRangeStyle())); connect(m_pagesDialogUi.lineEditPageRange, SIGNAL(textChanged(QString)), this, SLOT(clearLineEditPageRangeStyle()));
connect(m_charListDialogUi.radioButtonBlacklist, SIGNAL(toggled(bool)), m
_charListDialogUi.lineEditBlacklist, SLOT(setEnabled(bool)));
connect(m_charListDialogUi.radioButtonWhitelist, SIGNAL(toggled(bool)), m
_charListDialogUi.lineEditWhitelist, SLOT(setEnabled(bool)));
ADD_SETTING(VarSetting<QString>("language", "eng:en_EN")); ADD_SETTING(VarSetting<QString>("language", "eng:en_EN"));
ADD_SETTING(ComboSetting("ocrregionstrategy", m_pagesDialogUi.comboBoxRec ognitionArea, 0)); ADD_SETTING(ComboSetting("ocrregionstrategy", m_pagesDialogUi.comboBoxRec ognitionArea, 0));
ADD_SETTING(SwitchSetting("ocraddsourcefilename", m_pagesDialogUi.checkBo xPrependFilename)); ADD_SETTING(SwitchSetting("ocraddsourcefilename", m_pagesDialogUi.checkBo xPrependFilename));
ADD_SETTING(SwitchSetting("ocraddsourcepage", m_pagesDialogUi.checkBoxPre pendPage)); ADD_SETTING(SwitchSetting("ocraddsourcepage", m_pagesDialogUi.checkBoxPre pendPage));
ADD_SETTING(LineEditSetting("ocrcharwhitelist", m_charListDialogUi.lineEd
itWhitelist));
ADD_SETTING(LineEditSetting("ocrcharblacklist", m_charListDialogUi.lineEd
itBlacklist));
ADD_SETTING(SwitchSetting("ocrblacklistenabled", m_charListDialogUi.radio
ButtonBlacklist, true));
ADD_SETTING(SwitchSetting("ocrwhitelistenabled", m_charListDialogUi.radio
ButtonWhitelist, false));
ADD_SETTING(VarSetting<int>("psm", 6)); ADD_SETTING(VarSetting<int>("psm", 6));
} }
QStringList Recognizer::getAvailableLanguages() const { QStringList Recognizer::getAvailableLanguages() const {
tesseract::TessBaseAPI tess = initTesseract(); tesseract::TessBaseAPI tess = initTesseract();
GenericVector<STRING> availLanguages; GenericVector<STRING> availLanguages;
tess.GetAvailableLanguagesAsVector(&availLanguages); tess.GetAvailableLanguagesAsVector(&availLanguages);
QStringList result; QStringList result;
for(int i = 0; i < availLanguages.size(); ++i) { for(int i = 0; i < availLanguages.size(); ++i) {
result.append(availLanguages[i].string()); result.append(availLanguages[i].string());
skipping to change at line 123 skipping to change at line 132
bool s2Script = s2.startsWith("script") || s2.left(1) == s2.left( 1).toUpper(); bool s2Script = s2.startsWith("script") || s2.left(1) == s2.left( 1).toUpper();
if(s1Script != s2Script) { if(s1Script != s2Script) {
return !s1Script; return !s1Script;
} else { } else {
return s1 < s2; return s1 < s2;
} }
}); });
return result; return result;
} }
static int g_pipe[2];
static jmp_buf g_restore_point;
static void tessCrashHandler(int /*signal*/) {
fflush(stderr);
char buf[1025];
int bytesRead = 0;
QString captured;
do {
if((bytesRead = read(g_pipe[0], buf, sizeof(buf) - 1)) > 0) {
buf[bytesRead] = 0;
captured += buf;
}
} while(bytesRead == sizeof(buf) - 1);
tesseract::TessBaseAPI tess;
QString errMsg = QString(_("Tesseract crashed with the following message:
\n\n"
"%1\n\n"
"This typically happens for one of the followi
ng reasons:\n"
"- Outdated traineddata files are used.\n"
"- Auxiliary language data files are missing.\
n"
"- Corrupt language data files.\n\n"
"Make sure your language data files are valid
and compatible with tesseract %2.")).arg(captured).arg(tess.Version());
QMessageBox::critical(MAIN, _("Error"), errMsg);
longjmp(g_restore_point, SIGSEGV);
}
tesseract::TessBaseAPI Recognizer::initTesseract(const char* language, bool* ok) const { tesseract::TessBaseAPI Recognizer::initTesseract(const char* language, bool* ok) const {
// unfortunately tesseract creates deliberate aborts when an error occurs
std::signal(SIGABRT, MainWindow::tesseractCrash);
QByteArray current = setlocale(LC_ALL, NULL); QByteArray current = setlocale(LC_ALL, NULL);
setlocale(LC_ALL, "C"); setlocale(LC_ALL, "C");
tesseract::TessBaseAPI tess; tesseract::TessBaseAPI tess;
// unfortunately tesseract creates deliberate segfaults when an error occ int ret = tess.Init(nullptr, language);
urs
std::signal(SIGSEGV, tessCrashHandler);
pipe(g_pipe);
fflush(stderr);
int oldstderr = dup(fileno(stderr));
dup2(g_pipe[1], fileno(stderr));
int ret = -1;
int fault_code = setjmp(g_restore_point);
if(fault_code == 0) {
ret = tess.Init(nullptr, language);
} else {
ret = -1;
}
dup2(oldstderr, fileno(stderr));
std::signal(SIGSEGV, MainWindow::signalHandler);
setlocale(LC_NUMERIC, current.constData()); setlocale(LC_NUMERIC, current.constData());
close(g_pipe[0]);
close(g_pipe[1]);
if(ok) { if(ok) {
*ok = ret != -1; *ok = ret != -1;
} }
return tess; return tess;
} }
void Recognizer::updateLanguagesMenu() { void Recognizer::updateLanguagesMenu() {
ui.menuLanguages->clear(); ui.menuLanguages->clear();
delete m_langMenuRadioGroup; delete m_langMenuRadioGroup;
m_langMenuRadioGroup = new QActionGroup(this); m_langMenuRadioGroup = new QActionGroup(this);
skipping to change at line 330 skipping to change at line 299
item->setData(entry.psmMode); item->setData(entry.psmMode);
item->setEnabled(!entry.requireOsd || haveOsd); item->setEnabled(!entry.requireOsd || haveOsd);
item->setCheckable(true); item->setCheckable(true);
item->setChecked(activePsm == entry.psmMode); item->setChecked(activePsm == entry.psmMode);
m_psmCheckGroup->addAction(item); m_psmCheckGroup->addAction(item);
} }
QAction* psmAction = new QAction(_("Page segmentation mode"), ui.menuLang uages); QAction* psmAction = new QAction(_("Page segmentation mode"), ui.menuLang uages);
psmAction->setMenu(psmMenu); psmAction->setMenu(psmMenu);
ui.menuLanguages->addAction(psmAction); ui.menuLanguages->addAction(psmAction);
ui.menuLanguages->addAction(_("Character whitelist / blacklist..."), this , SLOT(manageCharacterLists()));
// Add installer item // Add installer item
ui.menuLanguages->addSeparator(); ui.menuLanguages->addSeparator();
ui.menuLanguages->addAction(_("Manage languages..."), MAIN, SLOT(manageLa nguages())); ui.menuLanguages->addAction(_("Manage languages..."), MAIN, SLOT(manageLa nguages()));
} }
void Recognizer::setLanguage() { void Recognizer::setLanguage() {
QAction* item = qobject_cast<QAction*>(QObject::sender()); QAction* item = qobject_cast<QAction*>(QObject::sender());
if(item->isChecked()) { if(item->isChecked()) {
Config::Lang lang = item->data().value<Config::Lang>(); Config::Lang lang = item->data().value<Config::Lang>();
skipping to change at line 384 skipping to change at line 354
} }
void Recognizer::clearLineEditPageRangeStyle() { void Recognizer::clearLineEditPageRangeStyle() {
qobject_cast<QLineEdit*>(QObject::sender())->setStyleSheet(""); qobject_cast<QLineEdit*>(QObject::sender())->setStyleSheet("");
} }
void Recognizer::psmSelected(QAction* action) { void Recognizer::psmSelected(QAction* action) {
ConfigSettings::get<VarSetting<int>>("psm")->setValue(action->data().toIn t()); ConfigSettings::get<VarSetting<int>>("psm")->setValue(action->data().toIn t());
} }
void Recognizer::manageCharacterLists() {
m_charListDialog->exec();
}
QList<int> Recognizer::selectPages(bool& autodetectLayout) { QList<int> Recognizer::selectPages(bool& autodetectLayout) {
int nPages = MAIN->getDisplayer()->getNPages(); int nPages = MAIN->getDisplayer()->getNPages();
m_pagesDialogUi.lineEditPageRange->setText(QString("1-%1").arg(nPages)); m_pagesDialogUi.lineEditPageRange->setText(QString("1-%1").arg(nPages));
m_pagesDialogUi.lineEditPageRange->setFocus(); m_pagesDialogUi.lineEditPageRange->setFocus();
m_pagesDialogUi.labelRecognitionArea->setVisible(MAIN->getDisplayer()->al lowAutodetectOCRAreas()); m_pagesDialogUi.labelRecognitionArea->setVisible(MAIN->getDisplayer()->al lowAutodetectOCRAreas());
m_pagesDialogUi.comboBoxRecognitionArea->setVisible(MAIN->getDisplayer()- >allowAutodetectOCRAreas()); m_pagesDialogUi.comboBoxRecognitionArea->setVisible(MAIN->getDisplayer()- >allowAutodetectOCRAreas());
m_pagesDialogUi.groupBoxPrepend->setVisible(MAIN->getDisplayer()->allowAu todetectOCRAreas()); m_pagesDialogUi.groupBoxPrepend->setVisible(MAIN->getDisplayer()->allowAu todetectOCRAreas());
m_pagesDialogUi.comboBoxRecognitionArea->setItemText(0, MAIN->getDisplaye r()->hasMultipleOCRAreas() ? _("Current selection") : _("Entire page")); m_pagesDialogUi.comboBoxRecognitionArea->setItemText(0, MAIN->getDisplaye r()->hasMultipleOCRAreas() ? _("Current selection") : _("Entire page"));
skipping to change at line 463 skipping to change at line 437
} }
void Recognizer::recognize(const QList<int>& pages, bool autodetectLayout) { void Recognizer::recognize(const QList<int>& pages, bool autodetectLayout) {
bool prependFile = pages.size() > 1 && ConfigSettings::get<SwitchSetting> ("ocraddsourcefilename")->getValue(); bool prependFile = pages.size() > 1 && ConfigSettings::get<SwitchSetting> ("ocraddsourcefilename")->getValue();
bool prependPage = pages.size() > 1 && ConfigSettings::get<SwitchSetting> ("ocraddsourcepage")->getValue(); bool prependPage = pages.size() > 1 && ConfigSettings::get<SwitchSetting> ("ocraddsourcepage")->getValue();
bool ok = false; bool ok = false;
tesseract::TessBaseAPI tess = initTesseract(m_curLang.prefix.toLocal8Bit( ).constData(), &ok); tesseract::TessBaseAPI tess = initTesseract(m_curLang.prefix.toLocal8Bit( ).constData(), &ok);
if(ok) { if(ok) {
QString failed; QString failed;
tess.SetPageSegMode(static_cast<tesseract::PageSegMode>(m_psmChec kGroup->checkedAction()->data().toInt())); tess.SetPageSegMode(static_cast<tesseract::PageSegMode>(m_psmChec kGroup->checkedAction()->data().toInt()));
if(m_charListDialogUi.radioButtonWhitelist->isChecked()) {
tess.SetVariable("tessedit_char_whitelist", m_charListDia
logUi.lineEditWhitelist->text().toLocal8Bit());
}
if(m_charListDialogUi.radioButtonBlacklist->isChecked()) {
tess.SetVariable("tessedit_char_blacklist", m_charListDia
logUi.lineEditBlacklist->text().toLocal8Bit());
}
OutputEditor::ReadSessionData* readSessionData = MAIN->getOutputE ditor()->initRead(tess); OutputEditor::ReadSessionData* readSessionData = MAIN->getOutputE ditor()->initRead(tess);
ProgressMonitor monitor(pages.size()); ProgressMonitor monitor(pages.size());
MAIN->showProgress(&monitor); MAIN->showProgress(&monitor);
Utils::busyTask([&] { Utils::busyTask([&] {
int npages = pages.size(); int npages = pages.size();
int idx = 0; int idx = 0;
QString prevFile; QString prevFile;
for(int page : pages) { for(int page : pages) {
monitor.desc.progress = 0; monitor.desc.progress = 0;
++idx; ++idx;
 End of changes. 11 change blocks. 
49 lines changed or deleted 32 lines changed or added

Home  |  About  |  Features  |  All  |  Newest  |  Dox  |  Diffs  |  RSS Feeds  |  Screenshots  |  Comments  |  Imprint  |  Privacy  |  HTTP(S)