"Fossies" - the Fresh Open Source Software Archive  

Source code changes of the file "transform.cc" between
xapian-omega-1.4.19.tar.xz and xapian-omega-1.4.20.tar.xz

About: Xapian Omega is an application built on Xapian, consisting of indexers and a CGI search frontend.

transform.cc  (xapian-omega-1.4.19.tar.xz):transform.cc  (xapian-omega-1.4.20.tar.xz)
/** @file /** @file
* @brief Implement OmegaScript $transform function. * @brief Implement OmegaScript $transform function.
*/ */
/* Copyright (C) 2003,2009,2015 Olly Betts /* Copyright (C) 2003,2009,2015,2022 Olly Betts
* *
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or * the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version. * (at your option) any later version.
* *
* This program is distributed in the hope that it will be useful, * This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of * but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details. * GNU General Public License for more details.
* *
* You should have received a copy of the GNU General Public License * You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software * along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/ */
#include <config.h> #include <config.h>
#include "transform.h" #include "transform.h"
#include <pcre.h> #define PCRE2_CODE_UNIT_WIDTH 8
#include <pcre2.h>
#include <cstdint>
#include <map> #include <map>
#include <string> #include <string>
#include <vector> #include <vector>
using namespace std; using namespace std;
static map<pair<string, int>, pcre *> re_cache; static map<pair<string, uint32_t>, pcre2_code*> re_cache;
static pcre2_match_data* md = NULL;
static pcre * static pcre2_code*
get_re(const string & pattern, int options) get_re(const string& pattern, uint32_t options)
{ {
pair<string, int> re_key = make_pair(pattern, options); pair<string, uint32_t> re_key = make_pair(pattern, options);
auto re_it = re_cache.find(re_key); auto re_it = re_cache.find(re_key);
if (re_it != re_cache.end()) { if (re_it != re_cache.end()) {
return re_it->second; return re_it->second;
} }
const char *error; if (!md) {
int erroffset; // Create lazily - here is a good point as it's a single place we
pcre * re = // have to pass through before executing a regex.
pcre_compile(pattern.c_str(), options, &error, &erroffset, NULL); md = pcre2_match_data_create(10, NULL);
}
int error_code;
PCRE2_SIZE erroffset;
auto re = pcre2_compile(PCRE2_SPTR8(pattern.data()), pattern.size(),
options, &error_code, &erroffset, NULL);
if (!re) { if (!re) {
string m = "$transform failed to compile its regular expression: "; string m = "$transform failed to compile its regular expression: ";
m += error; // pcre2api(3) says that "a buffer size of 120 code units is ample".
unsigned char buf[120];
pcre2_get_error_message(error_code, buf, sizeof(buf));
m += reinterpret_cast<char*>(buf);
throw m; throw m;
} }
re_cache.insert(make_pair(re_key, re)); re_cache.insert(make_pair(re_key, re));
return re; return re;
} }
void void
omegascript_match(string & value, const vector<string> & args) omegascript_match(string & value, const vector<string> & args)
{ {
int offsets[30]; uint32_t options = PCRE2_UTF;
int options = 0;
if (args.size() > 2) { if (args.size() > 2) {
const string &opts = args[2]; const string &opts = args[2];
for (string::const_iterator i = opts.begin(); i != opts.end(); ++i) { for (char ch : opts) {
switch (*i) { switch (ch) {
case 'i': case 'i':
options |= PCRE_CASELESS; options |= PCRE2_CASELESS;
break; break;
case 'm': case 'm':
options |= PCRE_MULTILINE; options |= PCRE2_MULTILINE;
break; break;
case 's': case 's':
options |= PCRE_DOTALL; options |= PCRE2_DOTALL;
break; break;
case 'x': case 'x':
options |= PCRE_EXTENDED; options |= PCRE2_EXTENDED;
break; break;
default: { default: {
string m = "Unknown $match option character: "; string m = "Unknown $match option character: ";
m += *i; m += ch;
throw m; throw m;
} }
} }
} }
} }
pcre * re = get_re(args[0], options); pcre2_code* re = get_re(args[0], options);
int matches = pcre_exec(re, NULL, args[1].data(), args[1].size(), int matches = pcre2_match(re, PCRE2_SPTR8(args[1].data()), args[1].size(),
0, 0, offsets, 30); 0, 0, md, NULL);
if (matches > 0) { if (matches > 0) {
value += "true"; value += "true";
} }
} }
void void
omegascript_transform(string & value, const vector<string> & args) omegascript_transform(string & value, const vector<string> & args)
{ {
int offsets[30];
bool replace_all = false; bool replace_all = false;
int options = 0; uint32_t options = PCRE2_UTF;
if (args.size() > 3) { if (args.size() > 3) {
const string & opts = args[3]; const string & opts = args[3];
for (string::const_iterator i = opts.begin(); i != opts.end(); ++i) { for (char ch : opts) {
switch (*i) { switch (ch) {
case 'g': case 'g':
replace_all = true; replace_all = true;
break; break;
case 'i': case 'i':
options |= PCRE_CASELESS; options |= PCRE2_CASELESS;
break; break;
case 'm': case 'm':
options |= PCRE_MULTILINE; options |= PCRE2_MULTILINE;
break; break;
case 's': case 's':
options |= PCRE_DOTALL; options |= PCRE2_DOTALL;
break; break;
case 'x': case 'x':
options |= PCRE_EXTENDED; options |= PCRE2_EXTENDED;
break; break;
default: { default: {
string m = "Unknown $transform option character: "; string m = "Unknown $transform option character: ";
m += *i; m += ch;
throw m; throw m;
} }
} }
} }
} }
pcre * re = get_re(args[0], options); pcre2_code* re = get_re(args[0], options);
size_t start = 0; PCRE2_SIZE start = 0;
do { do {
int matches = pcre_exec(re, NULL, args[2].data(), args[2].size(), int matches = pcre2_match(re,
int(start), 0, offsets, 30); PCRE2_SPTR8(args[2].data()), args[2].size(),
start, 0, md, NULL);
if (matches <= 0) { if (matches <= 0) {
// (matches == PCRE_ERROR_NOMATCH) is OK, otherwise this is an // (matches == PCRE_ERROR_NOMATCH) is OK, otherwise this is an
// error. FIXME: should we report this rather than ignoring it? // error. FIXME: should we report this rather than ignoring it?
break; break;
} }
// Substitute \1 ... \9, and \\. // Substitute \1 ... \9, and \\.
string::const_iterator i; PCRE2_SIZE* offsets = pcre2_get_ovector_pointer(md);
value.append(args[2], start, offsets[0] - start); value.append(args[2], start, offsets[0] - start);
for (i = args[1].begin(); i != args[1].end(); ++i) { for (auto i = args[1].begin(); i != args[1].end(); ++i) {
char ch = *i; char ch = *i;
if (ch != '\\') { if (ch != '\\') {
value += ch; value += ch;
continue; continue;
} }
if (rare(++i == args[1].end())) { if (rare(++i == args[1].end())) {
// Trailing single '\'. // Trailing single '\'.
value += ch; value += ch;
break; break;
 End of changes. 28 change blocks. 
38 lines changed or deleted 49 lines changed or added

Home  |  About  |  Features  |  All  |  Newest  |  Dox  |  Diffs  |  RSS Feeds  |  Screenshots  |  Comments  |  Imprint  |  Privacy  |  HTTP(S)