"Fossies" - the Fresh Open Source Software Archive  

Source code changes of the file "utils/smallut.cpp" between
recoll-1.27.11.tar.gz and recoll-1.27.12.tar.gz

About: Recoll is a personal full text search tool based on Xapian as back-end (with Qt GUI).

smallut.cpp  (recoll-1.27.11):smallut.cpp  (recoll-1.27.12)
/* Copyright (C) 2006-2016 J.F.Dockes /* Copyright (C) 2006-2020 J.F.Dockes
* *
* This library is free software; you can redistribute it and/or * This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public * modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either * License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version. * version 2.1 of the License, or (at your option) any later version.
* *
* This library is distributed in the hope that it will be useful, * This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of * but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details. * Lesser General Public License for more details.
* *
* You should have received a copy of the GNU Lesser General Public * You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software * License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301 USA * 02110-1301 USA
*/ */
#include "smallut.h"
#include <algorithm> #include <algorithm>
#include <cctype>
#include <cerrno>
#include <cinttypes>
#include <cmath>
#include <cstdio> #include <cstdio>
#include <cstdlib> #include <cstdlib>
#include <cinttypes> #include <cstring>
#include <ctime>
#include <iostream>
#include <list>
#include <numeric>
#include <set>
#include <string>
#include <unordered_map>
#include <unordered_set>
#ifdef _WIN32 #ifdef _WIN32
// needed for localtime_r under mingw? // needed for localtime_r under mingw?
#define _POSIX_THREAD_SAFE_FUNCTIONS #define _POSIX_THREAD_SAFE_FUNCTIONS
#ifdef _MSC_VER #ifdef _MSC_VER
#define localtime_r(a,b) localtime_s(b,a) #define localtime_r(a,b) localtime_s(b,a)
#endif /* _MSC_VER */ #endif /* _MSC_VER */
#endif /* _WIN32 */ #endif /* _WIN32 */
#include <ctime>
#include <cctype>
#include <cerrno>
#include <cstring>
#include <cmath>
// Older compilers don't support stdc++ regex, but Windows does not // Older compilers don't support stdc++ regex, but Windows does not
// have the Linux one. Have a simple class to solve the simple cases. // have the Linux one. Have a simple class to solve the simple cases.
#if defined(_WIN32) #if defined(_WIN32)
#define USE_STD_REGEX #define USE_STD_REGEX
#include <regex> #include <regex>
#else #else
#define USE_LINUX_REGEX #define USE_LINUX_REGEX
#include <regex.h> #include <regex.h>
#endif #endif
#include <string>
#include <iostream>
#include <list>
#include <numeric>
#include <unordered_map>
#include <unordered_set>
#include "smallut.h"
using namespace std; using namespace std;
int stringicmp(const string& s1, const string& s2) int stringicmp(const string& s1, const string& s2)
{ {
string::const_iterator it1 = s1.begin(); return strcasecmp(s1.c_str(), s2.c_str());
string::const_iterator it2 = s2.begin();
string::size_type size1 = s1.length(), size2 = s2.length();
char c1, c2;
if (size1 < size2) {
while (it1 != s1.end()) {
c1 = ::toupper(*it1);
c2 = ::toupper(*it2);
if (c1 != c2) {
return c1 > c2 ? 1 : -1;
}
++it1;
++it2;
}
return size1 == size2 ? 0 : -1;
}
while (it2 != s2.end()) {
c1 = ::toupper(*it1);
c2 = ::toupper(*it2);
if (c1 != c2) {
return c1 > c2 ? 1 : -1;
}
++it1;
++it2;
}
return size1 == size2 ? 0 : 1;
} }
void stringtolower(string& io) void stringtolower(string& io)
{ {
std::transform(io.begin(), io.end(), io.begin(), [](unsigned char c) { retur n std::tolower(c); }); std::transform(io.begin(), io.end(), io.begin(), [](unsigned char c) { retur n std::tolower(c); });
} }
string stringtolower(const string& i) string stringtolower(const string& i)
{ {
string o = i; string o = i;
stringtolower(o); stringtolower(o);
return o; return o;
skipping to change at line 113 skipping to change at line 86
std::transform(io.begin(), io.end(), io.begin(), [](unsigned char c) { retur n std::toupper(c); }); std::transform(io.begin(), io.end(), io.begin(), [](unsigned char c) { retur n std::toupper(c); });
} }
string stringtoupper(const string& i) string stringtoupper(const string& i)
{ {
string o = i; string o = i;
stringtoupper(o); stringtoupper(o);
return o; return o;
} }
extern int stringisuffcmp(const string& s1, const string& s2)
{
string::const_reverse_iterator r1 = s1.rbegin(), re1 = s1.rend(),
r2 = s2.rbegin(), re2 = s2.rend();
while (r1 != re1 && r2 != re2) {
char c1 = ::toupper(*r1);
char c2 = ::toupper(*r2);
if (c1 != c2) {
return c1 > c2 ? 1 : -1;
}
++r1;
++r2;
}
return 0;
}
// s1 is already lowercase // s1 is already lowercase
int stringlowercmp(const string& s1, const string& s2) int stringlowercmp(const string& s1, const string& s2)
{ {
string::const_iterator it1 = s1.begin(); string::const_iterator it1 = s1.begin();
string::const_iterator it2 = s2.begin(); string::const_iterator it2 = s2.begin();
string::size_type size1 = s1.length(), size2 = s2.length(); string::size_type size1 = s1.length(), size2 = s2.length();
char c2; char c2;
if (size1 < size2) { if (size1 < size2) {
while (it1 != s1.end()) { while (it1 != s1.end()) {
skipping to change at line 196 skipping to change at line 153
++it2; ++it2;
} }
return size1 == size2 ? 0 : 1; return size1 == size2 ? 0 : 1;
} }
bool beginswith(const std::string& big, const std::string& small) bool beginswith(const std::string& big, const std::string& small)
{ {
return big.compare(0, small.size(), small) == 0; return big.compare(0, small.size(), small) == 0;
} }
// Compare charset names, removing the more common spelling variations
bool samecharset(const string& cs1, const string& cs2)
{
auto mcs1 = std::accumulate(cs1.begin(), cs1.end(), "", [](const char* m, ch
ar i) { return (i != '_' && i != '-') ? m + ::tolower(i) : m; });
auto mcs2 = std::accumulate(cs2.begin(), cs2.end(), "", [](const char* m, ch
ar i) { return (i != '_' && i != '-') ? m + ::tolower(i) : m; });
return mcs1 == mcs2;
}
template <class T> bool stringToStrings(const string& s, T& tokens, template <class T> bool stringToStrings(const string& s, T& tokens,
const string& addseps) const string& addseps)
{ {
string current; string current;
tokens.clear(); tokens.clear();
enum states {SPACE, TOKEN, INQUOTE, ESCAPE}; enum states {SPACE, TOKEN, INQUOTE, ESCAPE};
states state = SPACE; states state = SPACE;
for (char i : s) { for (char i : s) {
switch (i) { switch (i) {
case '"': case '"':
skipping to change at line 313 skipping to change at line 262
case TOKEN: case TOKEN:
tokens.insert(tokens.end(), current); tokens.insert(tokens.end(), current);
break; break;
case INQUOTE: case INQUOTE:
case ESCAPE: case ESCAPE:
return false; return false;
} }
return true; return true;
} }
template bool stringToStrings<list<string> >(const string&,
list<string>&, const string&);
template bool stringToStrings<vector<string> >(const string&,
vector<string>&, const string&);
template bool stringToStrings<set<string> >(const string&,
set<string>&, const string&);
template bool stringToStrings<std::unordered_set<string> >
(const string&, std::unordered_set<string>&, const string&);
template <class T> void stringsToString(const T& tokens, string& s) template <class T> void stringsToString(const T& tokens, string& s)
{ {
for (auto it = tokens.begin(); for (auto it = tokens.begin();
it != tokens.end(); it++) { it != tokens.end(); it++) {
bool hasblanks = false; bool hasblanks = false;
if (it->find_first_of(" \t\n") != string::npos) { if (it->find_first_of(" \t\n") != string::npos) {
hasblanks = true; hasblanks = true;
} }
if (it != tokens.begin()) { if (it != tokens.begin()) {
s.append(1, ' '); s.append(1, ' ');
skipping to change at line 350 skipping to change at line 290
s.append(1, car); s.append(1, car);
} else { } else {
s.append(1, car); s.append(1, car);
} }
} }
if (hasblanks) { if (hasblanks) {
s.append(1, '"'); s.append(1, '"');
} }
} }
} }
template void stringsToString<list<string> >(const list<string>&, string&);
template void stringsToString<vector<string> >(const vector<string>&, string&);
template void stringsToString<set<string> >(const set<string>&, string&);
template void stringsToString<unordered_set<string> >(const unordered_set<string
>&, string&);
template <class T> string stringsToString(const T& tokens) template <class T> string stringsToString(const T& tokens)
{ {
string out; string out;
stringsToString<T>(tokens, out); stringsToString<T>(tokens, out);
return out; return out;
} }
template string stringsToString<list<string> >(const list<string>&);
template string stringsToString<vector<string> >(const vector<string>&);
template string stringsToString<set<string> >(const set<string>&);
template string stringsToString<unordered_set<string> >(const unordered_set<stri
ng>&);
template <class T> void stringsToCSV(const T& tokens, string& s, template <class T> void stringsToCSV(const T& tokens, string& s,
char sep) char sep)
{ {
s.erase(); s.erase();
for (auto it = tokens.begin(); for (auto it = tokens.begin();
it != tokens.end(); it++) { it != tokens.end(); it++) {
bool needquotes = false; bool needquotes = false;
if (it->empty() || if (it->empty() ||
it->find_first_of(string(1, sep) + "\"\n") != string::npos) { it->find_first_of(string(1, sep) + "\"\n") != string::npos) {
skipping to change at line 395 skipping to change at line 328
s.append(2, '"'); s.append(2, '"');
} else { } else {
s.append(1, car); s.append(1, car);
} }
} }
if (needquotes) { if (needquotes) {
s.append(1, '"'); s.append(1, '"');
} }
} }
} }
#ifdef SMALLUT_EXTERNAL_INSTANTIATIONS
#include "smallut_instantiate.h"
#else
template bool stringToStrings<list<string> >(const string&,
list<string>&, const string&);
template bool stringToStrings<vector<string> >(const string&,
vector<string>&, const string&);
template bool stringToStrings<set<string> >(const string&,
set<string>&, const string&);
template bool stringToStrings<std::unordered_set<string> >
(const string&, std::unordered_set<string>&, const string&);
template void stringsToString<list<string> >(const list<string>&, string&);
template void stringsToString<vector<string> >(const vector<string>&, string&);
template void stringsToString<set<string> >(const set<string>&, string&);
template void stringsToString<unordered_set<string> >(const unordered_set<string
>&, string&);
template string stringsToString<list<string> >(const list<string>&);
template string stringsToString<vector<string> >(const vector<string>&);
template string stringsToString<set<string> >(const set<string>&);
template string stringsToString<unordered_set<string> >(const unordered_set<stri
ng>&);
template void stringsToCSV<list<string> >(const list<string>&, string&, char); template void stringsToCSV<list<string> >(const list<string>&, string&, char);
template void stringsToCSV<vector<string> >(const vector<string>&, string&, template void stringsToCSV<vector<string> >(const vector<string>&, string&,
char); char);
#endif
void stringToTokens(const string& str, vector<string>& tokens, void stringToTokens(const string& str, vector<string>& tokens,
const string& delims, bool skipinit) const string& delims, bool skipinit)
{ {
string::size_type startPos = 0, pos; string::size_type startPos = 0, pos;
// Skip initial delims, return empty if this eats all. // Skip initial delims, return empty if this eats all.
if (skipinit && if (skipinit &&
(startPos = str.find_first_not_of(delims, 0)) == string::npos) { (startPos = str.find_first_not_of(delims, 0)) == string::npos) {
return; return;
skipping to change at line 1203 skipping to change at line 1157
// In short it's a mess, but thanks to c++ function overloading and smart // In short it's a mess, but thanks to c++ function overloading and smart
// people, we have a solution: // people, we have a solution:
// https://www.zverovich.net/2015/03/13/reliable-detection-of-strerror-varia nts.html // https://www.zverovich.net/2015/03/13/reliable-detection-of-strerror-varia nts.html
char errbuf[200]; char errbuf[200];
errbuf[0] = 0; errbuf[0] = 0;
reason->append(_check_strerror_r( reason->append(_check_strerror_r(
strerror_r(_errno, errbuf, sizeof(errbuf)), errbuf)); strerror_r(_errno, errbuf, sizeof(errbuf)), errbuf));
#endif #endif
} }
static const std::unordered_map<string, string> lang_to_code { #ifndef SMALLUT_NO_REGEX
{"be", "cp1251"},
{"bg", "cp1251"},
{"cs", "iso-8859-2"},
{"el", "iso-8859-7"},
{"he", "iso-8859-8"},
{"hr", "iso-8859-2"},
{"hu", "iso-8859-2"},
{"ja", "eucjp"},
{"kk", "pt154"},
{"ko", "euckr"},
{"lt", "iso-8859-13"},
{"lv", "iso-8859-13"},
{"pl", "iso-8859-2"},
{"rs", "iso-8859-2"},
{"ro", "iso-8859-2"},
{"ru", "koi8-r"},
{"sk", "iso-8859-2"},
{"sl", "iso-8859-2"},
{"sr", "iso-8859-2"},
{"th", "iso-8859-11"},
{"tr", "iso-8859-9"},
{"uk", "koi8-u"},
};
static const string cstr_cp1252("CP1252");
string langtocode(const string& lang)
{
const auto it = lang_to_code.find(lang);
// Use cp1252 by default...
if (it == lang_to_code.end()) {
return cstr_cp1252;
}
return it->second;
}
string localelang()
{
const char *lang = getenv("LANG");
if (lang == nullptr || *lang == 0 || !strcmp(lang, "C") ||
!strcmp(lang, "POSIX")) {
return "en";
}
string locale(lang);
string::size_type under = locale.find_first_of('_');
if (under == string::npos) {
return locale;
}
return locale.substr(0, under);
}
#ifdef USE_STD_REGEX #ifdef USE_STD_REGEX
class SimpleRegexp::Internal { class SimpleRegexp::Internal {
public: public:
Internal(const string& exp, int flags, int nm) Internal(const string& exp, int flags, int nm)
: expr(exp, : expr(exp,
basic_regex<char>::flag_type( basic_regex<char>::flag_type(
regex_constants::extended | regex_constants::extended |
((flags&SRE_ICASE) ? int(regex_constants::icase) : 0) | ((flags&SRE_ICASE) ? int(regex_constants::icase) : 0) |
((flags&SRE_NOSUB) ? int(regex_constants::nosubs) : 0) ((flags&SRE_NOSUB) ? int(regex_constants::nosubs) : 0)
skipping to change at line 1282 skipping to change at line 1183
int nmatch; int nmatch;
}; };
bool SimpleRegexp::simpleMatch(const string& val) const bool SimpleRegexp::simpleMatch(const string& val) const
{ {
if (!ok()) if (!ok())
return false; return false;
return regex_search(val, m->res, m->expr); return regex_search(val, m->res, m->expr);
} }
// Substitute one instance of regular expression
std::string SimpleRegexp::simpleSub(
const std::string& in, const std::string& repl)
{
if (!ok()) {
return std::string();
}
return regex_replace(
in, m->expr, repl, std::regex_constants::format_first_only);
}
string SimpleRegexp::getMatch(const string&, int i) const string SimpleRegexp::getMatch(const string&, int i) const
{ {
return m->res.str(i); return m->res.str(i);
} }
#else // -> !WIN32 #else // -> !WIN32
class SimpleRegexp::Internal { class SimpleRegexp::Internal {
public: public:
Internal(const string& exp, int flags, int nm) : nmatch(nm) { Internal(const string& exp, int flags, int nm) : nmatch(nm) {
skipping to change at line 1308 skipping to change at line 1220
} }
~Internal() { ~Internal() {
regfree(&expr); regfree(&expr);
} }
bool ok; bool ok;
regex_t expr; regex_t expr;
int nmatch; int nmatch;
vector<regmatch_t> matches; vector<regmatch_t> matches;
}; };
// Substitute one instance of regular expression
std::string SimpleRegexp::simpleSub(
const std::string& in, const std::string& repl)
{
if (!ok()) {
return std::string();
}
int err;
if ((err = regexec(&m->expr, in.c_str(),
m->nmatch + 1, &m->matches[0], 0))) {
#if SIMPLESUB_DBG
const int ERRSIZE = 200;
char errbuf[ERRSIZE + 1];
regerror(err, &expr, errbuf, ERRSIZE);
std::cerr << "simpleSub: regexec(" << sexp << ") failed: "
<< errbuf << "\n";
#endif
return in;
}
if (m->matches[0].rm_so == -1) {
// No match
return in;
}
string out = in.substr(0, m->matches[0].rm_so);
out += repl;
out += in.substr(m->matches[0].rm_eo);
return out;
}
bool SimpleRegexp::simpleMatch(const string& val) const bool SimpleRegexp::simpleMatch(const string& val) const
{ {
if (!ok()) if (!ok())
return false; return false;
return regexec(&m->expr, val.c_str(), m->nmatch + 1, &m->matches[0], 0) == 0 ; return regexec(&m->expr, val.c_str(), m->nmatch + 1, &m->matches[0], 0) == 0 ;
} }
string SimpleRegexp::getMatch(const string& val, int i) const string SimpleRegexp::getMatch(const string& val, int i) const
{ {
if (i > m->nmatch) { if (i > m->nmatch) {
return string(); return string();
} }
return val.substr(m->matches[i].rm_so, return val.substr(m->matches[i].rm_so,
m->matches[i].rm_eo - m->matches[i].rm_so); m->matches[i].rm_eo - m->matches[i].rm_so);
} }
#endif // win/notwinf #endif // !windows, using C regexps
SimpleRegexp::SimpleRegexp(const string& exp, int flags, int nmatch) SimpleRegexp::SimpleRegexp(const string& exp, int flags, int nmatch)
: m(new Internal(exp, flags, nmatch)) : m(new Internal(exp, flags, nmatch))
{ {
} }
SimpleRegexp::~SimpleRegexp() SimpleRegexp::~SimpleRegexp()
{ {
delete m; delete m;
} }
bool SimpleRegexp::ok() const bool SimpleRegexp::ok() const
{ {
return m->ok; return m->ok;
} }
bool SimpleRegexp::operator() (const string& val) const bool SimpleRegexp::operator() (const string& val) const
{ {
return simpleMatch(val); return simpleMatch(val);
} }
#endif // SMALLUT_NO_REGEX
string flagsToString(const vector<CharFlags>& flags, unsigned int val) string flagsToString(const vector<CharFlags>& flags, unsigned int val)
{ {
const char *s; const char *s;
string out; string out;
for (const auto& flag : flags) { for (const auto& flag : flags) {
if ((val & flag.value) == flag.value) { if ((val & flag.value) == flag.value) {
s = flag.yesname; s = flag.yesname;
} else { } else {
s = flag.noname; s = flag.noname;
skipping to change at line 1385 skipping to change at line 1328
} }
} }
{ {
char mybuf[100]; char mybuf[100];
sprintf(mybuf, "Unknown Value 0x%x", val); sprintf(mybuf, "Unknown Value 0x%x", val);
out = mybuf; out = mybuf;
} }
return out; return out;
} }
unsigned int stringToFlags(const vector<CharFlags>& flags,
const string& input, const char *sep)
{
unsigned int out = 0;
vector<string> toks;
stringToTokens(input, toks, sep);
for (auto& tok: toks) {
trimstring(tok);
out = std::accumulate(
flags.begin(), flags.end(), out,
[&](unsigned int o, CharFlags flag) {
return tok == flag.yesname ? o | flag.value : o;
});
}
return out;
}
// Initialization for static stuff to be called from main thread before going // Initialization for static stuff to be called from main thread before going
// multiple // multiple
void smallut_init_mt() void smallut_init_mt()
{ {
// Init langtocode() static table
langtocode("");
} }
 End of changes. 22 change blocks. 
165 lines changed or deleted 86 lines changed or added

Home  |  About  |  Features  |  All  |  Newest  |  Dox  |  Diffs  |  RSS Feeds  |  Screenshots  |  Comments  |  Imprint  |  Privacy  |  HTTP(S)