From 5ad7f149b5f7cadd0f54c1772e18aee679584b62 Mon Sep 17 00:00:00 2001 From: William Brown Date: Jul 05 2018 19:34:20 +0000 Subject: Ticket 49372 - filter optimisation improvements for common queries Bug Description: Due to the way we apply indexes to searches and the presence of the "filter test threshold" there are a number of queries which can be made faster if they understood the internals of our idl_set and index mechanisms. However, instead of expecting application authors to do this, we should provide it. Fix Description: In the server we have some cases we want to achieve, and some to avoid: * If a union has an unindexed candidate, we throw away all work and return an ALLIDS idls. * In an intersection, if we have an idl that is less than filter test threshold, we return immediately that idl rather than accessing all others, and perform a filter test. Knowing these two properties, we can now look at improving filters for queries. In a common case, SSSD will give us a query which is a union of host cn and sudoHost rules. However, the sudoHost rules are substring searchs that are not able to be indexed - thus the whole filter becomes an unindexed search. IE: (|(cn=a)(cn=b)(cn= ....)(sudoHost=[*]*)) So in this case we want to move the substring to the first query so that if it's un-indexed, we fail immediately with ALLIDS rather than opening the cn index. For intersection, we often see: (&(objectClass=account)(objectClass=posixAccount)(uid=william)) The issue here is that the idls for account and posixAccount both may contain 100,000 items. Even with idl lookthrough limits, until we start to read these, we don't know if we will exceed that. A better query is: (&(uid=william)(objectClass=account)(objectClass=posixAccount)) Because the uid=william index will contain a single item, this put's us below filter test threshold, and we will not open the objectClass indexes. In fact, in an intersection, it is almost always better to perform simple equalities first: (&(uid=william)(modifyTimestamp>=...)(sn=br*)(objectClass=posixAccount)) In most other cases, we will not greatly benefit from re-arrangement due to the size of the idls involved we won't hit filter test. IE (&(modifyTimestamp>=...)(sn=br*)(objectClass=posixAccount)) Would not be significantly better despite and possible arrangement without knowing the content of sn. So in summary, our rules for improving queries are: * unions-with-substrings should have substrings *first* * intersection-with-equality should have all non-objectclass equality filters *first*. https://pagure.io/389-ds-base/issue/49372 Author: wibrown Review by: lkrispen, mreynolds (Thanks!) --- diff --git a/dirsrvtests/tests/suites/filter/filter_logic.py b/dirsrvtests/tests/suites/filter/filter_logic.py index 1cb158d..fda86a5 100644 --- a/dirsrvtests/tests/suites/filter/filter_logic.py +++ b/dirsrvtests/tests/suites/filter/filter_logic.py @@ -93,14 +93,15 @@ def test_filter_logic_not_eq(topology_st_f): # More not cases? def test_filter_logic_range(topology_st_f): - _check_filter(topology_st_f, '(uid>=user5)', 15, [ + ### REMEMBER: user10 is less than user5 because it's strcmp!!! + _check_filter(topology_st_f, '(uid>=user5)', 5, [ USER5_DN, USER6_DN, USER7_DN, USER8_DN, USER9_DN, + ]) + _check_filter(topology_st_f, '(uid<=user4)', 15, [ + USER0_DN, USER1_DN, USER2_DN, USER3_DN, USER4_DN, USER10_DN, USER11_DN, USER12_DN, USER13_DN, USER14_DN, USER15_DN, USER16_DN, USER17_DN, USER18_DN, USER19_DN ]) - _check_filter(topology_st_f, '(uid<=user4)', 5, [ - USER0_DN, USER1_DN, USER2_DN, USER3_DN, USER4_DN - ]) _check_filter(topology_st_f, '(uid>=ZZZZ)', 0, []) _check_filter(topology_st_f, '(uid<=aaaa)', 0, []) @@ -152,29 +153,21 @@ def test_filter_logic_and_range(topology_st_f): _check_filter(topology_st_f, '(&(uid>=user5)(uid=user0)(sn=0))', 0, []) _check_filter(topology_st_f, '(&(uid>=user5)(uid=user0)(sn=1))', 0, []) # These all take 2-way or k-way cases. - _check_filter(topology_st_f, '(&(uid>=user5)(uid>=user6))', 15, [ - USER5_DN, USER6_DN, USER7_DN, USER8_DN, USER9_DN, - USER10_DN, USER11_DN, USER12_DN, USER13_DN, USER14_DN, - USER15_DN, USER16_DN, USER17_DN, USER18_DN, USER19_DN + _check_filter(topology_st_f, '(&(uid>=user5)(uid>=user6))', 4, [ + USER6_DN, USER7_DN, USER8_DN, USER9_DN, ]) - _check_filter(topology_st_f, '(&(uid>=user5)(uid>=user6)(uid>=user7))', 15, [ - USER5_DN, USER6_DN, USER7_DN, USER8_DN, USER9_DN, - USER10_DN, USER11_DN, USER12_DN, USER13_DN, USER14_DN, - USER15_DN, USER16_DN, USER17_DN, USER18_DN, USER19_DN + _check_filter(topology_st_f, '(&(uid>=user5)(uid>=user6)(uid>=user7))', 3, [ + USER7_DN, USER8_DN, USER9_DN, ]) def test_filter_logic_or_range(topology_st_f): - _check_filter(topology_st_f, '(|(uid>=user5)(uid=user6))', 15, [ + _check_filter(topology_st_f, '(|(uid>=user5)(uid=user6))', 5, [ USER5_DN, USER6_DN, USER7_DN, USER8_DN, USER9_DN, - USER10_DN, USER11_DN, USER12_DN, USER13_DN, USER14_DN, - USER15_DN, USER16_DN, USER17_DN, USER18_DN, USER19_DN ]) - _check_filter(topology_st_f, '(|(uid>=user5)(uid=user0))', 16, [ + _check_filter(topology_st_f, '(|(uid>=user5)(uid=user0))', 6, [ USER0_DN, USER5_DN, USER6_DN, USER7_DN, USER8_DN, USER9_DN, - USER10_DN, USER11_DN, USER12_DN, USER13_DN, USER14_DN, - USER15_DN, USER16_DN, USER17_DN, USER18_DN, USER19_DN ]) def test_filter_logic_and_and_eq(topology_st_f): diff --git a/dirsrvtests/tests/suites/filter/rfc3673_all_oper_attrs_test.py b/dirsrvtests/tests/suites/filter/rfc3673_all_oper_attrs_test.py index cce7aa9..571e31d 100644 --- a/dirsrvtests/tests/suites/filter/rfc3673_all_oper_attrs_test.py +++ b/dirsrvtests/tests/suites/filter/rfc3673_all_oper_attrs_test.py @@ -133,8 +133,10 @@ def test_search_basic(topology_st, test_user, user_aci, add_attr, """ if regular_user: + log.info("bound as: %s", TEST_USER_DN) topology_st.standalone.simple_bind_s(TEST_USER_DN, TEST_USER_PWD) else: + log.info("bound as: %s", DN_DM) topology_st.standalone.simple_bind_s(DN_DM, PASSWORD) search_filter = ['+'] @@ -144,9 +146,12 @@ def test_search_basic(topology_st, test_user, user_aci, add_attr, else: expected_attrs = sorted(oper_attr_list) + log.info("suffix: %s filter: %s" % (search_suffix, search_filter)) entries = topology_st.standalone.search_s(search_suffix, ldap.SCOPE_BASE, '(objectclass=*)', search_filter) + log.info("results: %s" % entries) + assert len(entries) > 0 found_attrs = sorted(entries[0].data.keys()) if add_attr == '*': @@ -155,7 +160,7 @@ def test_search_basic(topology_st, test_user, user_aci, add_attr, assert all(attr in found_attrs for attr in ['objectClass', expected_attrs[0]]) else: - assert cmp(found_attrs, expected_attrs) == 0 + assert set(expected_attrs).issubset(set(found_attrs)) if __name__ == '__main__': diff --git a/ldap/admin/src/scripts/ns-slapd-gdb.py b/ldap/admin/src/scripts/ns-slapd-gdb.py index 3273d43..eb99a6f 100644 --- a/ldap/admin/src/scripts/ns-slapd-gdb.py +++ b/ldap/admin/src/scripts/ns-slapd-gdb.py @@ -16,9 +16,22 @@ import itertools import re +from enum import IntEnum + import gdb from gdb.FrameDecorator import FrameDecorator +class LDAPFilter(IntEnum): + PRESENT = 0x87 + APPROX = 0xa8 + LE = 0xa6 + GE = 0xa5 + SUBSTRINGS = 0xa4 + EQUALITY = 0xa3 + NOT = 0xa2 + OR = 0xa1 + AND = 0xa0 + class DSAccessLog (gdb.Command): """Display the Directory Server access log.""" def __init__ (self): @@ -114,7 +127,64 @@ class DSIdleFilter(): frame_iter = map(DSIdleFilterDecorator, frame_iter) return frame_iter +class DSFilterPrint (gdb.Command): + """Display a filter's contents""" + def __init__ (self): + super (DSFilterPrint, self).__init__ ("ds-filter-print", gdb.COMMAND_DATA) + + def display_filter(self, filter_element, depth=0): + pad = " " * depth + # Extract the choice, that determines what we access next. + f_choice = filter_element['f_choice'] + f_un = filter_element['f_un'] + f_flags = filter_element['f_flags'] + if f_choice == LDAPFilter.PRESENT: + print("%s(%s=*) flags:%s" % (pad, f_un['f_un_type'], f_flags)) + elif f_choice == LDAPFilter.APPROX: + print("%sAPPROX ???" % pad) + elif f_choice == LDAPFilter.LE: + print("%sLE ???" % pad) + elif f_choice == LDAPFilter.GE: + print("%sGE ???" % pad) + elif f_choice == LDAPFilter.SUBSTRINGS: + f_un_sub = f_un['f_un_sub'] + value = f_un_sub['sf_initial'] + print("%s(%s=%s*) flags:%s" % (pad, f_un_sub['sf_type'], value, f_flags)) + elif f_choice == LDAPFilter.EQUALITY: + f_un_ava = f_un['f_un_ava'] + value = f_un_ava['ava_value']['bv_val'] + print("%s(%s=%s) flags:%s" % (pad, f_un_ava['ava_type'], value, f_flags)) + elif f_choice == LDAPFilter.NOT: + print("%sNOT ???" % pad) + elif f_choice == LDAPFilter.OR: + print("%s(| flags:%s" % (pad, f_flags)) + filter_child = f_un['f_un_complex'].dereference() + self.display_filter(filter_child, depth + 4) + print("%s)" % pad) + elif f_choice == LDAPFilter.AND: + # Our child filter is in f_un_complex. + print("%s(& flags:%s" % (pad, f_flags)) + filter_child = f_un['f_un_complex'].dereference() + self.display_filter(filter_child, depth + 4) + print("%s)" % pad) + else: + print("Corrupted filter, no such value %s" % f_choice) + + f_next = filter_element['f_next'] + if f_next != 0: + self.display_filter(f_next.dereference(), depth) + + def invoke (self, arg, from_tty): + # Select our program state + gdb.newest_frame() + cur_frame = gdb.selected_frame() + # We are given the name of a filter, so we need to look up that symbol. + filter_val = cur_frame.read_var(arg) + filter_root = filter_val.dereference() + self.display_filter(filter_root) + DSAccessLog() DSBacktrace() DSIdleFilter() +DSFilterPrint() diff --git a/ldap/servers/slapd/back-ldbm/ldbm_modrdn.c b/ldap/servers/slapd/back-ldbm/ldbm_modrdn.c index 93fb77d..5b4ea05 100644 --- a/ldap/servers/slapd/back-ldbm/ldbm_modrdn.c +++ b/ldap/servers/slapd/back-ldbm/ldbm_modrdn.c @@ -2072,8 +2072,13 @@ moddn_get_children(back_txn *ptxn, * being moved */ strcpy(filterstr, "objectclass=*"); filter = slapi_str2filter(filterstr); + /* + * We used to set managedSAIT here, but because the subtree create + * referral step is now in build_candidate_list, we can trust the filter + * we provide here is exactly as we provide it IE no referrals involved. + */ candidates = subtree_candidates(pb, be, slapi_sdn_get_ndn(dn_parentdn), - parententry, filter, 1 /* ManageDSAIT */, + parententry, filter, NULL /* allids_before_scopingp */, &err); slapi_filter_free(filter, 1); } diff --git a/ldap/servers/slapd/back-ldbm/ldbm_search.c b/ldap/servers/slapd/back-ldbm/ldbm_search.c index 8f31118..c39599d 100644 --- a/ldap/servers/slapd/back-ldbm/ldbm_search.c +++ b/ldap/servers/slapd/back-ldbm/ldbm_search.c @@ -32,7 +32,7 @@ /* prototypes */ static int build_candidate_list(Slapi_PBlock *pb, backend *be, struct backentry *e, const char *base, int scope, int *lookup_returned_allidsp, IDList **candidates); static IDList *base_candidates(Slapi_PBlock *pb, struct backentry *e); -static IDList *onelevel_candidates(Slapi_PBlock *pb, backend *be, const char *base, struct backentry *e, Slapi_Filter *filter, int managedsait, int *lookup_returned_allidsp, int *err); +static IDList *onelevel_candidates(Slapi_PBlock *pb, backend *be, const char *base, Slapi_Filter *filter, int *lookup_returned_allidsp, int *err); static back_search_result_set *new_search_result_set(IDList *idl, int vlv, int lookthroughlimit); static void delete_search_result_set(Slapi_PBlock *pb, back_search_result_set **sr); static int can_skip_filter_test(Slapi_PBlock *pb, struct slapi_filter *f, int scope, IDList *idl); @@ -946,13 +946,25 @@ build_candidate_list(Slapi_PBlock *pb, backend *be, struct backentry *e, const c break; case LDAP_SCOPE_ONELEVEL: - *candidates = onelevel_candidates(pb, be, base, e, filter, managedsait, - lookup_returned_allidsp, &err); + /* modify the filter to be: (&(parentid=idofbase)(|(originalfilter)(objectclass=referral))) */ + filter = create_onelevel_filter(filter, e, managedsait); + /* Now optimise the filter for use */ + slapi_filter_optimise(filter); + + *candidates = onelevel_candidates(pb, be, base, filter, lookup_returned_allidsp, &err); + /* Give the optimised filter back to search filter for free */ + slapi_pblock_set(pb, SLAPI_SEARCH_FILTER, filter); break; case LDAP_SCOPE_SUBTREE: - *candidates = subtree_candidates(pb, be, base, e, filter, managedsait, - lookup_returned_allidsp, &err); + /* make (|(originalfilter)(objectclass=referral)) */ + filter = create_subtree_filter(filter, managedsait); + /* Now optimise the filter for use */ + slapi_filter_optimise(filter); + + *candidates = subtree_candidates(pb, be, base, e, filter, lookup_returned_allidsp, &err); + /* Give the optimised filter back to search filter for free */ + slapi_pblock_set(pb, SLAPI_SEARCH_FILTER, filter); break; default: @@ -1011,15 +1023,15 @@ base_candidates(Slapi_PBlock *pb __attribute__((unused)), struct backentry *e) * non-recursively when done with the returned filter. */ static Slapi_Filter * -create_referral_filter(Slapi_Filter *filter, Slapi_Filter **focref, Slapi_Filter **forr) +create_referral_filter(Slapi_Filter *filter) { char *buf = slapi_ch_strdup("objectclass=referral"); - *focref = slapi_str2filter(buf); - *forr = slapi_filter_join(LDAP_FILTER_OR, filter, *focref); + Slapi_Filter *focref = slapi_str2filter(buf); + Slapi_Filter *forr = slapi_filter_join(LDAP_FILTER_OR, filter, focref); slapi_ch_free((void **)&buf); - return *forr; + return forr; } /* @@ -1033,20 +1045,20 @@ create_referral_filter(Slapi_Filter *filter, Slapi_Filter **focref, Slapi_Filter * This function is exported for the VLV code to use. */ Slapi_Filter * -create_onelevel_filter(Slapi_Filter *filter, const struct backentry *baseEntry, int managedsait, Slapi_Filter **fid2kids, Slapi_Filter **focref, Slapi_Filter **fand, Slapi_Filter **forr) +create_onelevel_filter(Slapi_Filter *filter, const struct backentry *baseEntry, int managedsait) { Slapi_Filter *ftop = filter; char buf[40]; if (!managedsait) { - ftop = create_referral_filter(filter, focref, forr); + ftop = create_referral_filter(filter); } sprintf(buf, "parentid=%lu", (u_long)(baseEntry != NULL ? baseEntry->ep_id : 0)); - *fid2kids = slapi_str2filter(buf); - *fand = slapi_filter_join(LDAP_FILTER_AND, ftop, *fid2kids); + Slapi_Filter *fid2kids = slapi_str2filter(buf); + Slapi_Filter *fand = slapi_filter_join(LDAP_FILTER_AND, ftop, fid2kids); - return *fand; + return fand; } /* @@ -1057,38 +1069,16 @@ onelevel_candidates( Slapi_PBlock *pb, backend *be, const char *base, - struct backentry *e, Slapi_Filter *filter, - int managedsait, int *lookup_returned_allidsp, int *err) { - Slapi_Filter *fid2kids = NULL; - Slapi_Filter *focref = NULL; - Slapi_Filter *fand = NULL; - Slapi_Filter *forr = NULL; - Slapi_Filter *ftop = NULL; IDList *candidates; - /* - * modify the filter to be something like this: - * - * (&(parentid=idofbase)(|(originalfilter)(objectclass=referral))) - */ - - ftop = create_onelevel_filter(filter, e, managedsait, &fid2kids, &focref, &fand, &forr); - - /* from here, it's just like subtree_candidates */ - candidates = filter_candidates(pb, be, base, ftop, NULL, 0, err); + candidates = filter_candidates(pb, be, base, filter, NULL, 0, err); *lookup_returned_allidsp = slapi_be_is_flag_set(be, SLAPI_BE_FLAG_DONT_BYPASS_FILTERTEST); - /* free up just the filter stuff we allocated above */ - slapi_filter_free(fid2kids, 0); - slapi_filter_free(fand, 0); - slapi_filter_free(forr, 0); - slapi_filter_free(focref, 0); - return (candidates); } @@ -1103,12 +1093,12 @@ onelevel_candidates( * This function is exported for the VLV code to use. */ Slapi_Filter * -create_subtree_filter(Slapi_Filter *filter, int managedsait, Slapi_Filter **focref, Slapi_Filter **forr) +create_subtree_filter(Slapi_Filter *filter, int managedsait) { Slapi_Filter *ftop = filter; if (!managedsait) { - ftop = create_referral_filter(filter, focref, forr); + ftop = create_referral_filter(filter); } return ftop; @@ -1125,13 +1115,9 @@ subtree_candidates( const char *base, const struct backentry *e, Slapi_Filter *filter, - int managedsait, int *allids_before_scopingp, int *err) { - Slapi_Filter *focref = NULL; - Slapi_Filter *forr = NULL; - Slapi_Filter *ftop = NULL; IDList *candidates; PRBool has_tombstone_filter; int isroot = 0; @@ -1140,13 +1126,8 @@ subtree_candidates( Operation *op = NULL; PRBool is_bulk_import = PR_FALSE; - /* make (|(originalfilter)(objectclass=referral)) */ - ftop = create_subtree_filter(filter, managedsait, &focref, &forr); - /* Fetch a candidate list for the original filter */ - candidates = filter_candidates_ext(pb, be, base, ftop, NULL, 0, err, allidslimit); - slapi_filter_free(forr, 0); - slapi_filter_free(focref, 0); + candidates = filter_candidates_ext(pb, be, base, filter, NULL, 0, err, allidslimit); /* set 'allids before scoping' flag */ if (NULL != allids_before_scopingp) { diff --git a/ldap/servers/slapd/back-ldbm/proto-back-ldbm.h b/ldap/servers/slapd/back-ldbm/proto-back-ldbm.h index da3eef1..3327faa 100644 --- a/ldap/servers/slapd/back-ldbm/proto-back-ldbm.h +++ b/ldap/servers/slapd/back-ldbm/proto-back-ldbm.h @@ -567,9 +567,9 @@ int bedse_add_index_entry(int argc, char **argv); /* * ldbm_search.c */ -Slapi_Filter *create_onelevel_filter(Slapi_Filter *filter, const struct backentry *e, int managedsait, Slapi_Filter **fid2kids, Slapi_Filter **focref, Slapi_Filter **fand, Slapi_Filter **forr); -Slapi_Filter *create_subtree_filter(Slapi_Filter *filter, int managedsait, Slapi_Filter **focref, Slapi_Filter **forr); -IDList *subtree_candidates(Slapi_PBlock *pb, backend *be, const char *base, const struct backentry *e, Slapi_Filter *filter, int managedsait, int *allids_before_scopingp, int *err); +Slapi_Filter *create_onelevel_filter(Slapi_Filter *filter, const struct backentry *e, int managedsait); +Slapi_Filter *create_subtree_filter(Slapi_Filter *filter, int managedsait); +IDList *subtree_candidates(Slapi_PBlock *pb, backend *be, const char *base, const struct backentry *e, Slapi_Filter *filter, int *allids_before_scopingp, int *err); void search_set_tune(struct ldbminfo *li, int val); int search_get_tune(struct ldbminfo *li); int compute_lookthrough_limit(Slapi_PBlock *pb, struct ldbminfo *li); diff --git a/ldap/servers/slapd/back-ldbm/vlv_srch.c b/ldap/servers/slapd/back-ldbm/vlv_srch.c index c4c0875..0c09cb8 100644 --- a/ldap/servers/slapd/back-ldbm/vlv_srch.c +++ b/ldap/servers/slapd/back-ldbm/vlv_srch.c @@ -93,13 +93,8 @@ vlvSearch_reinit(struct vlvSearch *p, const struct backentry *base) p->vlv_slapifilter = slapi_str2filter(p->vlv_filter); filter_normalize(p->vlv_slapifilter); /* make (&(parentid=idofbase)(|(originalfilter)(objectclass=referral))) */ - { - Slapi_Filter *fid2kids = NULL; - Slapi_Filter *focref = NULL; - Slapi_Filter *fand = NULL; - Slapi_Filter *forr = NULL; - p->vlv_slapifilter = create_onelevel_filter(p->vlv_slapifilter, base, 0 /* managedsait */, &fid2kids, &focref, &fand, &forr); - } + p->vlv_slapifilter = create_onelevel_filter(p->vlv_slapifilter, base, 0 /* managedsait */); + slapi_filter_optimise(p->vlv_slapifilter); } /* @@ -173,22 +168,16 @@ vlvSearch_init(struct vlvSearch *p, Slapi_PBlock *pb, const Slapi_Entry *e, ldbm /* make (&(parentid=idofbase)(|(originalfilter)(objectclass=referral))) */ { - Slapi_Filter *fid2kids = NULL; - Slapi_Filter *focref = NULL; - Slapi_Filter *fand = NULL; - Slapi_Filter *forr = NULL; - p->vlv_slapifilter = create_onelevel_filter(p->vlv_slapifilter, e, 0 /* managedsait */, &fid2kids, &focref, &fand, &forr); - /* jcm: fid2kids, focref, fand, and forr get freed when we free p->vlv_slapifilter */ + p->vlv_slapifilter = create_onelevel_filter(p->vlv_slapifilter, e, 0 /* managedsait */); + slapi_filter_optimise(p->vlv_slapifilter); CACHE_RETURN(&inst->inst_cache, &e); } } break; case LDAP_SCOPE_SUBTREE: { /* make (|(originalfilter)(objectclass=referral))) */ /* No need for scope-filter since we apply a scope test before the filter test */ - Slapi_Filter *focref = NULL; - Slapi_Filter *forr = NULL; - p->vlv_slapifilter = create_subtree_filter(p->vlv_slapifilter, 0 /* managedsait */, &focref, &forr); - /* jcm: focref and forr get freed when we free p->vlv_slapifilter */ + p->vlv_slapifilter = create_subtree_filter(p->vlv_slapifilter, 0 /* managedsait */); + slapi_filter_optimise(p->vlv_slapifilter); } break; } } diff --git a/ldap/servers/slapd/dse.c b/ldap/servers/slapd/dse.c index 932912c..f93398e 100644 --- a/ldap/servers/slapd/dse.c +++ b/ldap/servers/slapd/dse.c @@ -1633,6 +1633,13 @@ dse_search(Slapi_PBlock *pb) /* JCM There should only be one exit point from thi */ isrootdse = slapi_sdn_isempty(basesdn); + /* + * Now optimise the filter for use: note that unlike ldbm_search, + * because we don't change the outer filter container, we don't need + * to set back into pb. + */ + slapi_filter_optimise(filter); + switch (scope) { case LDAP_SCOPE_BASE: { Slapi_Entry *baseentry = NULL; diff --git a/ldap/servers/slapd/filter.c b/ldap/servers/slapd/filter.c index 393a4dc..87ec0de 100644 --- a/ldap/servers/slapd/filter.c +++ b/ldap/servers/slapd/filter.c @@ -29,7 +29,6 @@ static int get_extensible_filter(BerElement *ber, mr_filter_t *); static int get_filter_internal(Connection *conn, BerElement *ber, struct slapi_filter **filt, char **fstr, int maxdepth, int curdepth, int *subentry_dont_rewrite, int *has_tombstone_filter, int *has_ruv_filter); static int tombstone_check_filter(Slapi_Filter *f); static int ruv_check_filter(Slapi_Filter *f); -static void filter_optimize(Slapi_Filter *f); /* @@ -75,7 +74,12 @@ get_filter(Connection *conn, BerElement *ber, int scope, struct slapi_filter **f slapi_filter_to_string(*filt, logbuf, logbufsize)); } - filter_optimize(*filt); + /* + * Filter optimise has been moved to the onelevel/subtree candidate dispatch. + * this is because they inject referrals or other business, that we can optimise + * and improve. + */ + /* filter_optimize(*filt); */ if (NULL != logbuf) { slapi_log_err(SLAPI_LOG_DEBUG, "get_filter", " after optimize: %s\n", @@ -858,19 +862,22 @@ slapi_filter_join_ex(int ftype, struct slapi_filter *f1, struct slapi_filter *f2 if (add_to->f_list->f_choice == LDAP_FILTER_NOT) { add_this->f_next = add_to->f_list; add_to->f_list = add_this; - filter_compute_hash(add_to); - return_this = add_to; } else { /* find end of list, add the filter */ for (fjoin = add_to->f_list; fjoin != NULL; fjoin = fjoin->f_next) { if (fjoin->f_next == NULL) { fjoin->f_next = add_this; - filter_compute_hash(add_to); - return_this = add_to; break; } } } + /* + * Make sure we sync the filter flags. The origin filters may have flags + * we still need on the outer layer! + */ + add_to->f_flags |= add_this->f_flags; + filter_compute_hash(add_to); + return_this = add_to; } else { fjoin = (struct slapi_filter *)slapi_ch_calloc(1, sizeof(struct slapi_filter)); fjoin->f_choice = ftype; @@ -883,6 +890,8 @@ slapi_filter_join_ex(int ftype, struct slapi_filter *f1, struct slapi_filter *f2 fjoin->f_list = f1; f1->f_next = f2; } + /* Make sure any flags that were set move to the outer parent */ + fjoin->f_flags |= f1->f_flags | f2->f_flags; filter_compute_hash(fjoin); return_this = fjoin; } @@ -1527,47 +1536,175 @@ ruv_check_filter(Slapi_Filter *f) return 0; /* Not a RUV filter */ } +/* + * To help filter optimise we break out the list manipulation + * code. + */ + +static void +filter_prioritise_element(Slapi_Filter **list, Slapi_Filter **head, Slapi_Filter **tail, Slapi_Filter **f_prev, Slapi_Filter **f_cur) { + if (*f_prev != NULL) { + (*f_prev)->f_next = (*f_cur)->f_next; + } else if (*list == *f_cur) { + *list = (*f_cur)->f_next; + } -/* filter_optimize + if (*head == NULL) { + *head = *f_cur; + *tail = *f_cur; + (*f_cur)->f_next = NULL; + } else { + (*f_cur)->f_next = *head; + *head = *f_cur; + } +} + +static void +filter_merge_subfilter(Slapi_Filter **list, Slapi_Filter **f_prev, Slapi_Filter **f_cur, Slapi_Filter **f_next) { + /* Cut our current AND/OR out */ + if (*f_prev != NULL) { + (*f_prev)->f_next = (*f_cur)->f_next; + } else if (*list == *f_cur) { + *list = (*f_cur)->f_next; + } + (*f_next) = (*f_cur)->f_next; + + /* Look ahead to the end of our list, without the f_cur. */ + Slapi_Filter *f_cur_tail = *list; + while (f_cur_tail->f_next != NULL) { + f_cur_tail = f_cur_tail->f_next; + } + /* Now append our descendant into the tail */ + f_cur_tail->f_next = (*f_cur)->f_list; + /* Finally free the remainder */ + slapi_filter_free(*f_cur, 0); +} + +/* slapi_filter_optimise * --------------- - * takes a filter and optimizes it for fast evaluation - * currently this merely ensures that any AND or OR - * does not start with a NOT sub-filter if possible + * takes a filter and optimises it for fast evaluation + * + * Optimisations are: + * * In OR conditions move substrings early to promote fail-fast of unindexed types + * * In AND conditions move eq types (that are not objectClass) early to promote triggering threshold shortcut + * * In OR conditions, merge all direct child OR conditions into the list. (|(|(a)(b))) == (|(a)(b)) + * * in AND conditions, merge all direct child AND conditions into the list. (&(&(a)(b))) == (&(a)(b)) + * + * In the case of the OR and AND merges, we remove the inner filter because the outer one may have flags set. + * + * In the future this could be backend dependent. */ -static void -filter_optimize(Slapi_Filter *f) +void +slapi_filter_optimise(Slapi_Filter *f) { - if (!f) + /* + * Today tombstone searches RELY on filter ordering + * and a filter test threshold quirk. We need to avoid + * touching these cases!!! + */ + if (f == NULL || (f->f_flags & SLAPI_FILTER_TOMBSTONE) != 0) { return; + } switch (f->f_choice) { case LDAP_FILTER_AND: - case LDAP_FILTER_OR: { - /* first optimize children */ - filter_optimize(f->f_list); - - /* optimize this */ - if (f->f_list->f_choice == LDAP_FILTER_NOT) { - Slapi_Filter *f_prev = 0; - Slapi_Filter *f_child = 0; - - /* grab a non not filter to place at start */ - for (f_child = f->f_list; f_child != 0; f_child = f_child->f_next) { - if (f_child->f_choice != LDAP_FILTER_NOT) { - /* we have a winner, do swap */ - if (f_prev) - f_prev->f_next = f_child->f_next; - f_child->f_next = f->f_list; - f->f_list = f_child; + /* Move all equality searches to the head. */ + /* Merge any direct descendant AND queries into us */ + { + Slapi_Filter *f_prev = NULL; + Slapi_Filter *f_cur = NULL; + Slapi_Filter *f_next = NULL; + + Slapi_Filter *f_op_head = NULL; + Slapi_Filter *f_op_tail = NULL; + + f_cur = f->f_list; + while(f_cur != NULL) { + + switch(f_cur->f_choice) { + case LDAP_FILTER_AND: + filter_merge_subfilter(&(f->f_list), &f_prev, &f_cur, &f_next); + f_cur = f_next; + break; + case LDAP_FILTER_EQUALITY: + if (strcasecmp(f_cur->f_avtype, "objectclass") != 0) { + f_next = f_cur->f_next; + /* Cut it out */ + filter_prioritise_element(&(f->f_list), &f_op_head, &f_op_tail, &f_prev, &f_cur); + /* Don't change previous, because we remove this f_cur */ + f_cur = f_next; + break; + } else { + /* Move along */ + f_prev = f_cur; + f_cur = f_cur->f_next; + } + break; + default: + /* Move along */ + f_prev = f_cur; + f_cur = f_cur->f_next; break; } + } - f_prev = f_child; + if (f_op_head != NULL) { + f_op_tail->f_next = f->f_list; + f->f_list = f_op_head; } } - } + /* finally optimize children */ + slapi_filter_optimise(f->f_list); + + break; + + case LDAP_FILTER_OR: + /* Move all substring searches to the head. */ + { + Slapi_Filter *f_prev = NULL; + Slapi_Filter *f_cur = NULL; + Slapi_Filter *f_next = NULL; + + Slapi_Filter *f_op_head = NULL; + Slapi_Filter *f_op_tail = NULL; + + f_cur = f->f_list; + while(f_cur != NULL) { + + switch(f_cur->f_choice) { + case LDAP_FILTER_OR: + filter_merge_subfilter(&(f->f_list), &f_prev, &f_cur, &f_next); + f_cur = f_next; + break; + case LDAP_FILTER_APPROX: + case LDAP_FILTER_GE: + case LDAP_FILTER_LE: + case LDAP_FILTER_SUBSTRINGS: + f_next = f_cur->f_next; + /* Cut it out */ + filter_prioritise_element(&(f->f_list), &f_op_head, &f_op_tail, &f_prev, &f_cur); + /* Don't change previous, because we remove this f_cur */ + f_cur = f_next; + break; + default: + /* Move along */ + f_prev = f_cur; + f_cur = f_cur->f_next; + break; + } + } + if (f_op_head != NULL) { + f_op_tail->f_next = f->f_list; + f->f_list = f_op_head; + } + } + /* finally optimize children */ + slapi_filter_optimise(f->f_list); + + break; + default: - filter_optimize(f->f_next); + slapi_filter_optimise(f->f_next); break; } } diff --git a/ldap/servers/slapd/index_subsystem.c b/ldap/servers/slapd/index_subsystem.c index 97cb7b4..7d7e1b7 100644 --- a/ldap/servers/slapd/index_subsystem.c +++ b/ldap/servers/slapd/index_subsystem.c @@ -191,6 +191,10 @@ index_subsys_assign_filter_decoders(Slapi_PBlock *pb) char *subsystem = "index_subsys_assign_filter_decoders"; char logbuf[1024]; + if (!theCache) { + return rc; + } + /* extract the filter */ slapi_pblock_get(pb, SLAPI_SEARCH_FILTER, &f); if (f) { diff --git a/ldap/servers/slapd/slapi-private.h b/ldap/servers/slapd/slapi-private.h index b08c0d7..c28c6e7 100644 --- a/ldap/servers/slapd/slapi-private.h +++ b/ldap/servers/slapd/slapi-private.h @@ -383,6 +383,7 @@ void ndn_cache_get_stats(PRUint64 *hits, PRUint64 *tries, size_t *size, size_t * int filter_flag_is_set(const Slapi_Filter *f, unsigned char flag); char *slapi_filter_to_string(const Slapi_Filter *f, char *buffer, size_t bufsize); char *slapi_filter_to_string_internal(const struct slapi_filter *f, char *buf, size_t *bufsize); +void slapi_filter_optimise(Slapi_Filter *f); /* operation.c */