"Fossies" - the Fresh Open Source Software archive 
Member "evlog-1.6.1/user/lib/query/qopt.c" of archive evlog-1.6.1.tar.gz:
/*
* Linux Event Logging for the Enterprise
* Copyright (c) International Business Machines Corp., 2001
*
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Please send e-mail to lkessler@users.sourceforge.net if you have
* questions or comments.
*
* Project Website: http://evlog.sourceforge.net/
*
*/
#include <stdlib.h>
#include <malloc.h>
#include <assert.h>
#include "posix_evlog.h"
#include "evl_parse.h"
#include "q.tab.h"
/*
* This file implements query optimization on a normalized query tree.
*
* So far, we do the following optimizations:
*
* 1) Rewrite a query such as 'facility=100 && severity>info && foo=46'
* into a subquery that can be evaluated without referencing templates
* (i.e., 'facility=100 && severity>info') and a subquery that may need
* the template. Evaluate the former first, then the latter only if
* necessary.
*
* 2) Annotate the query with info that allows us to short-circuit the query
* if non-standard attributes mentioned in the query are missing in the
* record we're evaluating. This allows us to know, without populating
* the template (or even loading it, in many cases), that the query 'foo=46'
* will fail for any record whose whose template is lacking, or whose
* template doesn't mention foo.
*
* TODO: Read up on query optimization.
*/
static void divideTree(pnode_t *root);
/*
* If possible, reorder the comparisons in the indicated tree so that the
* cheap comparisons are made first, in hopes that the expensive ones can
* be avoided in many cases.
*
* For now, we just do the following:
* Rewrite a query such as 'facility=100 && severity>info && foo=46'
* into a subquery that can be evaluated without referencing templates
* (i.e., 'facility=100 && severity>info') and a subquery that may need
* the template. Evaluate the former first, then the latter only if
* necessary.
*
* For the above example, we rewrite the query as
* (facility=100 && severity>info) AND_LATER (foo=46)
* The idea is to move as many "pure" comparisons (those that refer only
* to standard attributes) to the left of the AND_LATER (or OR_LATER) node.
* When the query is evaluated, the subtree to the left of the root
* (AND_LATER or OR_LATER) node is evaluated first. If that's not
* sufficient to resolve the truth of the query expression, then we
* bring in the template and evaluate the right subtree.
*
* TODO: Reorder subtrees based on the costs of different kinds of
* comparisons:
* integer (cheapest)
* CONTAINS (more expensive)
* regular expression (even more expensive)
* age comparison (calls time() system call, so also expensive)
*/
void
_evlQReorderTree(pnode_t *root, evl_nonStdAtts_t *nsAtts)
{
if (!nsAtts) {
/*
* For now, we're interested only if we have non-standard
* attributes.
*/
return;
}
divideTree(root);
}
/*** Optimization #1 ***/
/* Information used by divideTree() and its helpers */
typedef struct _div_info {
int diRootOp; /* AND or OR */
evl_list_t *diPureList; /* nodes that don't ref non-std atts */
evl_list_t *diImpureList; /* nodes that ref non-std atts */
evl_list_t *diRecycleList; /* AND/OR nodes to reuse when */
/* rebuilding tree */
} div_info_t;
/*
* Returns 1 (true) if the subtree rooted at node refers to any non-standard
* attributes, or 0 otherwise.
*/
static int
refsNonStdAtts(pnode_t *node)
{
if (!node) {
return 0;
}
switch (node->node_type) {
case nt_op:
switch (node->attr_flag) {
case '!':
return refsNonStdAtts(pnLeft(node));
case AND:
case OR:
return (refsNonStdAtts(pnLeft(node))
|| refsNonStdAtts(pnRight(node)));
default:
/*
* Note that the expression
* data contains "foo"
* tests the contents of the variable portion
* of the (STRING) record, NOT the result after
* the template (if any) is applied.
*/
return (pnLeft(node)->node_type == nt_nsaname);
}
case nt_nsaname:
return 1;
case nt_attname:
return 0;
default:
assert(0);
/* NOTREACHED */
return 0;
}
/* NOTREACHED */
}
/* Helper function for divideTree(), which see. */
static void
diClassify(pnode_t *node, div_info_t *di)
{
if (node->node_type == nt_op && node->attr_flag == di->diRootOp) {
di->diRecycleList = _evlAppendToList(di->diRecycleList, node);
diClassify(pnLeft(node), di);
diClassify(pnRight(node), di);
} else {
if (refsNonStdAtts(node)) {
di->diImpureList = _evlAppendToList(di->diImpureList, node);
} else {
di->diPureList = _evlAppendToList(di->diPureList, node);
}
}
}
/*
* Remove the first node on the list pointed to by plist, and return a pointer
* to that node's data. Update *plist to point to the new (possibly NULL)
* list head.
*/
static pnode_t *
behead(evl_list_t **plist)
{
pnode_t *pnode;
assert(plist != NULL && *plist != NULL);
pnode = (pnode_t*) (*plist)->li_data;
*plist = _evlRemoveNode(*plist, *plist, NULL);
return pnode;
}
/*
* plist points to either di->diPureList or di->diImpureList. Make a subtree
* out of the remaining nodes in the list, depopulating the list as we go.
* For example, given the list { n1, n2, n3, n4 } and di->diRootOp == OR,
* we create (n1 OR (n2 OR (n3 OR n4))). Return a pointer to the subtree's
* root.
*/
static pnode_t *
diRebuildSubtree(evl_list_t **plist, div_info_t *di)
{
pnode_t *node = behead(plist);
if (*plist == NULL) {
/* node is the last in the list. */
return node;
} else {
/* Return (node AND/OR restOfList). */
pnode_t *parent = behead(&di->diRecycleList);
pnLeft(parent) = node;
pnRight(parent) = diRebuildSubtree(plist, di);
return parent;
}
/* NOTREACHED */
}
/*
* Given the query
* expr1 && nexpr1 && expr2 && nexpr2 && nexpr3
* where
* expr1 and expr2 reference only standard attributes
* and
* nexpr1, nexpr2, and nexpr3 reference non-standard attributes,
* rewrite the query as
* (expr1 && expr2) AND_LATER (nexpr1 && (nexpr2 && nexpr3))
* I.e., convert the root (AND) node to an AND_LATER node, and move all the refs
* to non-standard attributes to the right of the AND_LATER node.
*
* Works similarly if the root node is OR -- we convert it to OR_LATER, etc.
*
* If the query cannot be transformed in this way (e.g., all subexpressions
* refer to non-standard attributes), do nothing.
*
* Asserts if the query doesn't reference at least one non-standard attribute.
*/
static void
divideTree(pnode_t *root)
{
int rootOp;
div_info_t *di;
if (root->node_type != nt_op) {
return;
}
rootOp = root->attr_flag;
if (rootOp != AND && rootOp != OR) {
return;
}
di = (div_info_t*) malloc(sizeof(div_info_t));
assert(di != NULL);
di->diRootOp = rootOp;
di->diPureList = NULL;
di->diImpureList = NULL;
di->diRecycleList = NULL;
/*
* Classify the subtrees. Given the aforementioned sample query,
* we add expr1 and expr2 to the pure list; nexpr1, nexpr2, and nexpr3
* to the impure list, and all the indicated AND nodes (except root)
* to the recycle list.
*/
diClassify(pnLeft(root), di);
diClassify(pnRight(root), di);
assert(di->diImpureList != NULL);
if (di->diPureList == NULL) {
/* Can't be reordered. */
goto done;
}
/*
* Rebuild the left subtree from the pure nodes and the right
* subtree from the impure nodes.
*/
pnLeft(root) = diRebuildSubtree(&di->diPureList, di);
pnRight(root) = diRebuildSubtree(&di->diImpureList, di);
assert(di->diPureList == NULL);
assert(di->diImpureList == NULL);
assert(di->diRecycleList == NULL);
root->attr_flag = ((rootOp == AND) ? AND_LATER : OR_LATER);
free(di);
return;
done:
/* _evlFreeList() handles null lists. */
_evlFreeList(di->diPureList, 0);
_evlFreeList(di->diImpureList, 0);
_evlFreeList(di->diRecycleList, 0);
free(di);
}
/*** Optimization #2 ***/
/*
* What would the query expression rooted at root evaluate to if the
* attribute specified by nsa did not have a value? Return evlFuzzyTrue
* if it would always evaluate to true, evlFuzzyFalse if it would always
* evaluate to false, or evlFuzzyMaybe if we can't say for sure.
*
* If nsa==NULL, simulate the absence of all non-standard attributes.
*
* Assumes the query tree is already normalized.
*/
int
simulateMissingNsa(pnode_t *root, evl_nonStdAtt_t *nsa)
{
if (!root) {
return evlFuzzyTrue;
}
switch (root->node_type) {
case nt_attname:
/*
* Name of a standard attribute. All but data are always
* present.
*/
if (root->attr_flag == POSIX_LOG_ENTRY_DATA) {
return evlFuzzyMaybe;
} else {
return evlFuzzyTrue;
}
case nt_nsaname:
/* Name of a non-standard attribute */
if (!nsa || root->attr_nsa == nsa) {
/* By definition, this attribute is missing. */
return evlFuzzyFalse;
} else {
return evlFuzzyMaybe;
}
case nt_op:
switch (root->attr_flag) {
case '!':
switch (simulateMissingNsa(pnLeft(root), nsa)) {
case evlFuzzyFalse: return evlFuzzyTrue;
case evlFuzzyTrue: return evlFuzzyFalse;
default: return evlFuzzyMaybe;
}
case AND:
case AND_LATER:
{
int left = simulateMissingNsa(pnLeft(root), nsa);
int right = simulateMissingNsa(pnRight(root), nsa);
return (left & right);
}
case OR:
case OR_LATER:
{
int left = simulateMissingNsa(pnLeft(root), nsa);
int right = simulateMissingNsa(pnRight(root), nsa);
return (left | right);
}
default:
{
pnode_t *attNode = pnLeft(root);
if (attNode->node_type == nt_nsaname
&& (!nsa || attNode->attr_nsa == nsa)) {
/* Comparison with missing attribute */
return evlFuzzyFalse;
} else {
return evlFuzzyMaybe;
}
}
}
default:
assert(0);
/* NOTREACHED */
return evlFuzzyMaybe;
}
/* NOTREACHED */
}
/*
* Do the two optimizations outlined above:
*
* 1) Rearrange the query tree so that expensive operations can be
* postponed or foregone.
*
* 2) For each attribute in nsAtts, set the nsaResultIfMissing value.
* Also set nsResultIfAllAttsMissing for the query as a whole.
* This info is used to short-circuit evaluation of records that don't
* contain the specified non-standard attributes.
*
* Assumes the query tree is already normalized.
*/
void
_evlQOptimizeTree(pnode_t *qexpr, evl_nonStdAtts_t *nsAtts) {
evl_listnode_t *head, *end, *p;
if (!nsAtts) {
return;
}
// _evlQPrTree(qexpr);
_evlQReorderTree(qexpr, nsAtts);
// _evlQPrTree(qexpr);
head = nsAtts->nsAtts;
for (p=head, end=NULL; p!=end; end=head, p=p->li_next) {
evl_nonStdAtt_t *nsa = (evl_nonStdAtt_t*) p->li_data;
nsa->nsaResultIfMissing = simulateMissingNsa(qexpr, nsa);
}
nsAtts->nsResultIfAllAttsMissing = simulateMissingNsa(qexpr, NULL);
}