"Fossies" - the Fresh Open Source Software Archive  

Source code changes of the file "scriptindex.cc" between
xapian-omega-1.4.18.tar.xz and xapian-omega-1.4.19.tar.xz

About: Xapian Omega is an application built on Xapian, consisting of indexers and a CGI search frontend.

scriptindex.cc  (xapian-omega-1.4.18.tar.xz):scriptindex.cc  (xapian-omega-1.4.19.tar.xz)
skipping to change at line 83 skipping to change at line 83
static inline bool static inline bool
prefix_needs_colon(const string & prefix, unsigned ch) prefix_needs_colon(const string & prefix, unsigned ch)
{ {
if (!C_isupper(ch) && ch != ':') return false; if (!C_isupper(ch) && ch != ':') return false;
string::size_type len = prefix.length(); string::size_type len = prefix.length();
return (len > 1 && prefix[len - 1] != ':'); return (len > 1 && prefix[len - 1] != ':');
} }
const char * action_names[] = { const char * action_names[] = {
"bad", "new", // Actions used internally:
"boolean", "date", "field", "gap", "hash", "hextobin", "index", "bad",
"indexnopos", "load", "lower", "parsedate", "spell", "split", "truncate", "new",
"unhtml", "unique", "value", "valuenumeric", "valuepacked", "weight" // Actual actions:
"boolean",
"date",
"field",
"gap",
"hash",
"hextobin",
"index",
"indexnopos",
"load",
"lower",
"ltrim",
"parsedate",
"rtrim",
"spell",
"split",
"squash",
"trim",
"truncate",
"unhtml",
"unique",
"value",
"valuenumeric",
"valuepacked",
"weight"
}; };
// For debugging: // For debugging:
#define DUMP_ACTION(A) cout << action_names[(A).get_action()] << "(" << (A).get_ string_arg() << "," << (A).get_num_arg() << ")" << endl #define DUMP_ACTION(A) cout << action_names[(A).get_action()] << "(" << (A).get_ string_arg() << "," << (A).get_num_arg() << ")" << endl
class Action { class Action {
public: public:
typedef enum { typedef enum {
BAD, NEW, // Actions used internally:
BOOLEAN, DATE, FIELD, GAP, HASH, HEXTOBIN, INDEX, INDEXNOPOS, LOAD, BAD,
LOWER, PARSEDATE, SPELL, SPLIT, TRUNCATE, UNHTML, UNIQUE, VALUE, NEW,
VALUENUMERIC, VALUEPACKED, WEIGHT // Actual actions:
BOOLEAN,
DATE,
FIELD,
GAP,
HASH,
HEXTOBIN,
INDEX,
INDEXNOPOS,
LOAD,
LOWER,
LTRIM,
PARSEDATE,
RTRIM,
SPELL,
SPLIT,
SQUASH,
TRIM,
TRUNCATE,
UNHTML,
UNIQUE,
VALUE,
VALUENUMERIC,
VALUEPACKED,
WEIGHT
} type; } type;
enum { SPLIT_NONE, SPLIT_DEDUP, SPLIT_SORT, SPLIT_PREFIXES }; enum { SPLIT_NONE, SPLIT_DEDUP, SPLIT_SORT, SPLIT_PREFIXES };
private: private:
type action; type action;
int num_arg; int num_arg;
string string_arg; string string_arg;
// Offset into indexscript line. // Offset into indexscript line.
size_t pos; size_t pos;
public: public:
Action(type action_, size_t pos_) Action(type action_, size_t pos_)
skipping to change at line 138 skipping to change at line 186
inline bool inline bool
operator==(Action::type t, const Action& a) { return a.get_action() == t; } operator==(Action::type t, const Action& a) { return a.get_action() == t; }
inline bool inline bool
operator!=(const Action& a, Action::type t) { return !(a == t); } operator!=(const Action& a, Action::type t) { return !(a == t); }
inline bool inline bool
operator!=(Action::type t, const Action& a) { return !(t == a); } operator!=(Action::type t, const Action& a) { return !(t == a); }
static void
ltrim(string& s, const string& chars)
{
auto i = s.find_first_not_of(chars);
if (i) s.erase(0, i);
}
static void
rtrim(string& s, const string& chars)
{
s.resize(s.find_last_not_of(chars) + 1);
}
static void
squash(string& s, const string& chars)
{
string output;
output.reserve(s.size());
string::size_type i = 0;
while ((i = s.find_first_not_of(chars, i)) != string::npos) {
auto j = s.find_first_of(chars, i);
if (!output.empty()) output += ' ';
output.append(s, i, j - i);
i = j;
}
s = std::move(output);
}
enum diag_type { DIAG_ERROR, DIAG_WARN, DIAG_NOTE }; enum diag_type { DIAG_ERROR, DIAG_WARN, DIAG_NOTE };
static void static void
report_location(enum diag_type type, report_location(enum diag_type type,
const string& filename, const string& filename,
size_t line = 0, size_t line = 0,
size_t pos = string::npos) size_t pos = string::npos)
{ {
cerr << filename; cerr << filename;
if (line != 0) { if (line != 0) {
skipping to change at line 195 skipping to change at line 271
parse_index_script(const string &filename) parse_index_script(const string &filename)
{ {
ifstream script(filename.c_str()); ifstream script(filename.c_str());
if (!script.is_open()) { if (!script.is_open()) {
report_location(DIAG_ERROR, filename); report_location(DIAG_ERROR, filename);
cerr << strerror(errno) << endl; cerr << strerror(errno) << endl;
exit(1); exit(1);
} }
string line; string line;
size_t line_no = 0; size_t line_no = 0;
bool had_unique = false; // Line number where we saw a `unique` action, or -1 if we haven't.
int unique_line_no = -1;
while (getline(script, line)) { while (getline(script, line)) {
++line_no; ++line_no;
vector<string> fields; vector<string> fields;
vector<Action> actions; vector<Action> actions;
string::const_iterator i, j; string::const_iterator i, j;
const string &s = line; const string &s = line;
i = find_if(s.begin(), s.end(), [](char ch) { return !C_isspace(ch); }); i = find_if(s.begin(), s.end(), [](char ch) { return !C_isspace(ch); });
if (i == s.end() || *i == '#') { if (i == s.end() || *i == '#') {
// Blank line or comment. // Blank line or comment.
continue; continue;
skipping to change at line 290 skipping to change at line 367
} else if (action == "indexnopos") { } else if (action == "indexnopos") {
code = Action::INDEXNOPOS; code = Action::INDEXNOPOS;
max_args = 1; max_args = 1;
} }
break; break;
case 'l': case 'l':
if (action == "lower") { if (action == "lower") {
code = Action::LOWER; code = Action::LOWER;
} else if (action == "load") { } else if (action == "load") {
code = Action::LOAD; code = Action::LOAD;
} else if (action == "ltrim") {
code = Action::LTRIM;
max_args = 1;
} }
break; break;
case 'p': case 'p':
if (action == "parsedate") { if (action == "parsedate") {
code = Action::PARSEDATE; code = Action::PARSEDATE;
min_args = max_args = 1; min_args = max_args = 1;
} }
break; break;
case 'r':
if (action == "rtrim") {
code = Action::RTRIM;
max_args = 1;
}
break;
case 's': case 's':
if (action == "spell") { if (action == "spell") {
code = Action::SPELL; code = Action::SPELL;
} else if (action == "split") { } else if (action == "split") {
code = Action::SPLIT; code = Action::SPLIT;
min_args = 1; min_args = 1;
max_args = 2; max_args = 2;
} else if (action == "squash") {
code = Action::SQUASH;
max_args = 1;
} }
break; break;
case 't': case 't':
if (action == "truncate") { if (action == "truncate") {
code = Action::TRUNCATE; code = Action::TRUNCATE;
min_args = max_args = 1; min_args = max_args = 1;
takes_integer_argument = true; takes_integer_argument = true;
} else if (action == "trim") {
code = Action::TRIM;
max_args = 1;
} }
break; break;
case 'u': case 'u':
if (action == "unhtml") { if (action == "unhtml") {
code = Action::UNHTML; code = Action::UNHTML;
} else if (action == "unique") { } else if (action == "unique") {
code = Action::UNIQUE; code = Action::UNIQUE;
min_args = max_args = 1; min_args = max_args = 1;
} }
break; break;
skipping to change at line 599 skipping to change at line 691
* num_arg n, so that we don't needlessly * num_arg n, so that we don't needlessly
* allocate memory and read data we're just * allocate memory and read data we're just
* going to ignore. * going to ignore.
*/ */
actions.pop_back(); actions.pop_back();
code = Action::LOAD; code = Action::LOAD;
} }
actions.emplace_back(code, action_pos, val); actions.emplace_back(code, action_pos, val);
break; break;
case Action::UNIQUE: case Action::UNIQUE:
if (had_unique) { if (unique_line_no >= 0) {
report_location(DIAG_ERROR, filename, line_no, report_location(DIAG_ERROR, filename, line_no,
action_pos); action_pos);
cerr << "Index action 'unique' used more than once" cerr << "Index action 'unique' used more than once"
<< endl; << endl;
report_location(DIAG_NOTE, filename,
unique_line_no);
cerr << "Previously used here" << endl;
exit(1); exit(1);
} }
had_unique = true; unique_line_no = line_no;
if (boolmap.find(val) == boolmap.end()) if (boolmap.find(val) == boolmap.end())
boolmap[val] = Action::UNIQUE; boolmap[val] = Action::UNIQUE;
actions.emplace_back(code, action_pos, val); actions.emplace_back(code, action_pos, val);
break; break;
case Action::GAP: { case Action::GAP: {
actions.emplace_back(code, action_pos, val); actions.emplace_back(code, action_pos, val);
auto& obj = actions.back(); auto& obj = actions.back();
auto gap_size = obj.get_num_arg(); auto gap_size = obj.get_num_arg();
if (gap_size <= 0) { if (gap_size <= 0) {
report_location(DIAG_ERROR, filename, line_no, report_location(DIAG_ERROR, filename, line_no,
skipping to change at line 637 skipping to change at line 732
auto max_length = obj.get_num_arg(); auto max_length = obj.get_num_arg();
if (max_length < 6) { if (max_length < 6) {
report_location(DIAG_ERROR, filename, line_no, report_location(DIAG_ERROR, filename, line_no,
obj.get_pos() + 4 + 1); obj.get_pos() + 4 + 1);
cerr << "Index action 'hash' takes an integer " cerr << "Index action 'hash' takes an integer "
"argument which must be at least 6" << endl; "argument which must be at least 6" << endl;
exit(1); exit(1);
} }
break; break;
} }
case Action::LTRIM:
case Action::RTRIM:
case Action::SQUASH:
case Action::TRIM:
for (unsigned char ch : val) {
if (ch >= 0x80) {
auto column = actions.back().get_pos() +
strlen(action_names[code]) + 1;
report_location(DIAG_ERROR, filename, line_no,
column);
cerr << "Index action '" << action_names[code]
<< "' only support ASCII characters "
"currently\n";
exit(1);
}
}
actions.emplace_back(code, action_pos, val);
break;
case Action::BOOLEAN: case Action::BOOLEAN:
boolmap[val] = Action::BOOLEAN; boolmap[val] = Action::BOOLEAN;
/* FALLTHRU */ /* FALLTHRU */
default: default:
actions.emplace_back(code, action_pos, val); actions.emplace_back(code, action_pos, val);
} }
i = find_if(i, s.end(), [](char ch) { return !C_isspace(ch); }); i = find_if(i, s.end(), [](char ch) { return !C_isspace(ch); });
} else { } else {
if (min_args > 0) { if (min_args > 0) {
report_location(DIAG_ERROR, filename, line_no, report_location(DIAG_ERROR, filename, line_no,
i_after_action - s.begin()); i_after_action - s.begin());
if (min_args == max_args) { if (min_args == max_args) {
cerr << "Index action '" << action << "' requires " cerr << "Index action '" << action << "' requires "
<< min_args << " arguments" << endl; << min_args << " arguments" << endl;
exit(1); exit(1);
} }
cerr << "Index action '" << action << "' requires at least " cerr << "Index action '" << action << "' requires at least "
<< min_args << " arguments" << endl; << min_args << " arguments" << endl;
exit(1); exit(1);
} }
if (code == Action::INDEX || code == Action::INDEXNOPOS) { switch (code) {
useless_weight_pos = string::npos; case Action::INDEX:
actions.emplace_back(code, action_pos, "", weight); case Action::INDEXNOPOS:
} else if (code == Action::GAP) { useless_weight_pos = string::npos;
actions.emplace_back(code, action_pos, "", 100); actions.emplace_back(code, action_pos, "", weight);
} else if (code == Action::HASH) { break;
actions.emplace_back(code, action_pos, "", case Action::GAP:
MAX_SAFE_TERM_LENGTH - 1); actions.emplace_back(code, action_pos, "", 100);
} else { break;
actions.emplace_back(code, action_pos); case Action::HASH:
actions.emplace_back(code, action_pos, "",
MAX_SAFE_TERM_LENGTH - 1);
break;
case Action::LTRIM:
case Action::RTRIM:
case Action::SQUASH:
case Action::TRIM:
actions.emplace_back(code, action_pos, " \t\f\v\r\n");
break;
default:
actions.emplace_back(code, action_pos);
break;
} }
} }
j = i; j = i;
} }
if (useless_weight_pos != string::npos) { if (useless_weight_pos != string::npos) {
report_useless_action(filename, line_no, useless_weight_pos, report_useless_action(filename, line_no, useless_weight_pos,
"weight"); "weight");
} }
while (!actions.empty()) { while (!actions.empty()) {
bool done = true; bool done = true;
Action::type action = actions.back().get_action(); Action::type action = actions.back().get_action();
switch (action) { switch (action) {
case Action::HASH: case Action::HASH:
case Action::HEXTOBIN: case Action::HEXTOBIN:
case Action::LOWER: case Action::LOWER:
case Action::LTRIM:
case Action::PARSEDATE: case Action::PARSEDATE:
case Action::RTRIM:
case Action::SPELL: case Action::SPELL:
case Action::SQUASH:
case Action::TRIM:
case Action::TRUNCATE: case Action::TRUNCATE:
case Action::UNHTML: case Action::UNHTML:
done = false; done = false;
report_useless_action(filename, line_no, report_useless_action(filename, line_no,
actions.back().get_pos(), actions.back().get_pos(),
action_names[action]); action_names[action]);
actions.pop_back(); actions.pop_back();
break; break;
default: default:
break; break;
skipping to change at line 836 skipping to change at line 965
output.push_back(r); output.push_back(r);
} }
value = std::move(output); value = std::move(output);
} }
badhex: badhex:
break; break;
} }
case Action::LOWER: case Action::LOWER:
value = Xapian::Unicode::tolower(value); value = Xapian::Unicode::tolower(value);
break; break;
case Action::LTRIM:
ltrim(value, action.get_string_arg());
break;
case Action::RTRIM:
rtrim(value, action.get_string_arg());
break;
case Action::TRIM:
rtrim(value, action.get_string_arg());
ltrim(value, action.get_string_arg());
break;
case Action::SQUASH:
squash(value, action.get_string_arg());
break;
case Action::LOAD: { case Action::LOAD: {
// If there's no input, just issue a warning. // If there's no input, just issue a warning.
if (value.empty()) { if (value.empty()) {
report_location(DIAG_WARN, fname, line_no); report_location(DIAG_WARN, fname, line_no);
cerr << "Empty filename in LOAD action" << endl; cerr << "Empty filename in LOAD action" << endl;
break; break;
} }
bool truncated = false; bool truncated = false;
string filename = std::move(value); string filename = std::move(value);
// FIXME: Use NOATIME if we own the file or are root. // FIXME: Use NOATIME if we own the file or are root.
 End of changes. 17 change blocks. 
21 lines changed or deleted 163 lines changed or added

Home  |  About  |  Features  |  All  |  Newest  |  Dox  |  Diffs  |  RSS Feeds  |  Screenshots  |  Comments  |  Imprint  |  Privacy  |  HTTP(S)