From b751bc74fe681b6b649b0f63a61ba91e6e246c88 Mon Sep 17 00:00:00 2001 From: Dmitrii Dolgov <9erthalion6@gmail.com> Date: Mon, 8 Jun 2020 20:33:56 +0200 Subject: [PATCH v37 3/6] Extend UniqueKeys Prepares index skip scan implementation using UniqueKeys. Allows to specify what are the "requested" keys that should be unique, and add them to necessary Paths to make them useful later. Proposed by David Rowley, contains few bits out of previous version from Jesper Pedersen. --- src/backend/optimizer/path/pathkeys.c | 62 +++++++++++++++++++++++++ src/backend/optimizer/path/uniquekeys.c | 61 ++++++++++++++++++++++++ src/backend/optimizer/plan/planner.c | 36 +++++++++++++- src/backend/optimizer/util/pathnode.c | 32 +++++++++---- src/include/nodes/pathnodes.h | 5 ++ src/include/optimizer/pathnode.h | 1 + src/include/optimizer/paths.h | 8 ++++ 7 files changed, 195 insertions(+), 10 deletions(-) diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 7e596d4194..97e1c2de83 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -29,6 +29,7 @@ #include "utils/lsyscache.h" +static bool pathkey_is_unique(PathKey *new_pathkey, List *pathkeys); static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys); static bool matches_boolean_partition_clause(RestrictInfo *rinfo, RelOptInfo *partrel, @@ -95,6 +96,29 @@ make_canonical_pathkey(PlannerInfo *root, return pk; } +/* + * pathkey_is_unique + * Checks if the new pathkey's equivalence class is the same as that of + * any existing member of the pathkey list. + */ +static bool +pathkey_is_unique(PathKey *new_pathkey, List *pathkeys) +{ + EquivalenceClass *new_ec = new_pathkey->pk_eclass; + ListCell *lc; + + /* If same EC already is already in the list, then not unique */ + foreach(lc, pathkeys) + { + PathKey *old_pathkey = (PathKey *) lfirst(lc); + + if (new_ec == old_pathkey->pk_eclass) + return false; + } + + return true; +} + /* * pathkey_is_redundant * Is a pathkey redundant with one already in the given list? @@ -1151,6 +1175,44 @@ make_pathkeys_for_sortclauses(PlannerInfo *root, return pathkeys; } +/* + * make_pathkeys_for_uniquekeyclauses + * Generate a pathkeys list to be used for uniquekey clauses + */ +List * +make_pathkeys_for_uniquekeys(PlannerInfo *root, + List *sortclauses, + List *tlist) +{ + List *pathkeys = NIL; + ListCell *l; + + foreach(l, sortclauses) + { + SortGroupClause *sortcl = (SortGroupClause *) lfirst(l); + Expr *sortkey; + PathKey *pathkey; + + sortkey = (Expr *) get_sortgroupclause_expr(sortcl, tlist); + Assert(OidIsValid(sortcl->sortop)); + pathkey = make_pathkey_from_sortop(root, + sortkey, + root->nullable_baserels, + sortcl->sortop, + sortcl->nulls_first, + sortcl->tleSortGroupRef, + true); + + if (EC_MUST_BE_REDUNDANT(pathkey->pk_eclass)) + continue; + + if (pathkey_is_unique(pathkey, pathkeys)) + pathkeys = lappend(pathkeys, pathkey); + } + + return pathkeys; +} + /**************************************************************************** * PATHKEYS AND MERGECLAUSES ****************************************************************************/ diff --git a/src/backend/optimizer/path/uniquekeys.c b/src/backend/optimizer/path/uniquekeys.c index c7ad76d28f..988d977715 100644 --- a/src/backend/optimizer/path/uniquekeys.c +++ b/src/backend/optimizer/path/uniquekeys.c @@ -1129,3 +1129,64 @@ add_combined_uniquekey(RelOptInfo *joinrel, } return false; } + +List* +build_uniquekeys(PlannerInfo *root, List *sortclauses) +{ + List *result = NIL; + List *sortkeys; + ListCell *l; + List *exprs = NIL; + + sortkeys = make_pathkeys_for_uniquekeys(root, + sortclauses, + root->processed_tlist); + + /* Create a uniquekey and add it to the list */ + foreach(l, sortkeys) + { + PathKey *pathkey = (PathKey *) lfirst(l); + EquivalenceClass *ec = pathkey->pk_eclass; + EquivalenceMember *mem = (EquivalenceMember*) lfirst(list_head(ec->ec_members)); + exprs = lappend(exprs, mem->em_expr); + } + + result = lappend(result, makeUniqueKey(exprs, false)); + + return result; +} + +bool +query_has_uniquekeys_for(PlannerInfo *root, List *pathuniquekeys, + bool allow_multinulls) +{ + ListCell *lc; + ListCell *lc2; + + /* root->query_uniquekeys are the requested DISTINCT clauses on query level + * pathuniquekeys are the unique keys on current path. + * All requested query_uniquekeys must be satisfied by the pathuniquekeys + */ + foreach(lc, root->query_uniquekeys) + { + UniqueKey *query_ukey = lfirst_node(UniqueKey, lc); + bool satisfied = false; + foreach(lc2, pathuniquekeys) + { + UniqueKey *ukey = lfirst_node(UniqueKey, lc2); + if (ukey->multi_nullvals && !allow_multinulls) + continue; + if (list_length(ukey->exprs) == 0 && + list_length(query_ukey->exprs) != 0) + continue; + if (list_is_subset(ukey->exprs, query_ukey->exprs)) + { + satisfied = true; + break; + } + } + if (!satisfied) + return false; + } + return true; +} diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index f1bbb8c427..7e0714a73e 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -3623,12 +3623,18 @@ standard_qp_callback(PlannerInfo *root, void *extra) if (parse->distinctClause && grouping_is_sortable(parse->distinctClause)) + { root->distinct_pathkeys = make_pathkeys_for_sortclauses(root, parse->distinctClause, tlist); + root->query_uniquekeys = build_uniquekeys(root, parse->distinctClause); + } else + { root->distinct_pathkeys = NIL; + root->query_uniquekeys = NIL; + } root->sort_pathkeys = make_pathkeys_for_sortclauses(root, @@ -4839,13 +4845,19 @@ create_distinct_paths(PlannerInfo *root, Path *path = (Path *) lfirst(lc); if (pathkeys_contained_in(needed_pathkeys, path->pathkeys)) - { add_path(distinct_rel, (Path *) create_upper_unique_path(root, distinct_rel, path, list_length(root->distinct_pathkeys), numDistinctRows)); - } + } + + foreach(lc, input_rel->unique_pathlist) + { + Path *path = (Path *) lfirst(lc); + + if (query_has_uniquekeys_for(root, path->uniquekeys, false)) + add_path(distinct_rel, path); } /* For explicit-sort case, always use the more rigorous clause */ @@ -7489,6 +7501,26 @@ apply_scanjoin_target_to_paths(PlannerInfo *root, } } + foreach(lc, rel->unique_pathlist) + { + Path *subpath = (Path *) lfirst(lc); + + /* Shouldn't have any parameterized paths anymore */ + Assert(subpath->param_info == NULL); + + if (tlist_same_exprs) + subpath->pathtarget->sortgrouprefs = + scanjoin_target->sortgrouprefs; + else + { + Path *newpath; + + newpath = (Path *) create_projection_path(root, rel, subpath, + scanjoin_target); + lfirst(lc) = newpath; + } + } + /* * Now, if final scan/join target contains SRFs, insert ProjectSetPath(s) * atop each existing path. (Note that this function doesn't look at the diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 5281a2f998..abcfe4d9d7 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -416,10 +416,10 @@ set_cheapest(RelOptInfo *parent_rel) * 'parent_rel' is the relation entry to which the path corresponds. * 'new_path' is a potential path for parent_rel. * - * Returns nothing, but modifies parent_rel->pathlist. + * Returns modified pathlist. */ -void -add_path(RelOptInfo *parent_rel, Path *new_path) +static List * +add_path_to(RelOptInfo *parent_rel, List *pathlist, Path *new_path) { bool accept_new = true; /* unless we find a superior old path */ int insert_at = 0; /* where to insert new item */ @@ -440,7 +440,7 @@ add_path(RelOptInfo *parent_rel, Path *new_path) * for more than one old path to be tossed out because new_path dominates * it. */ - foreach(p1, parent_rel->pathlist) + foreach(p1, pathlist) { Path *old_path = (Path *) lfirst(p1); bool remove_old = false; /* unless new proves superior */ @@ -584,8 +584,7 @@ add_path(RelOptInfo *parent_rel, Path *new_path) */ if (remove_old) { - parent_rel->pathlist = foreach_delete_current(parent_rel->pathlist, - p1); + pathlist = foreach_delete_current(pathlist, p1); /* * Delete the data pointed-to by the deleted cell, if possible @@ -612,8 +611,7 @@ add_path(RelOptInfo *parent_rel, Path *new_path) if (accept_new) { /* Accept the new path: insert it at proper place in pathlist */ - parent_rel->pathlist = - list_insert_nth(parent_rel->pathlist, insert_at, new_path); + pathlist = list_insert_nth(pathlist, insert_at, new_path); } else { @@ -621,6 +619,23 @@ add_path(RelOptInfo *parent_rel, Path *new_path) if (!IsA(new_path, IndexPath)) pfree(new_path); } + + return pathlist; +} + +void +add_path(RelOptInfo *parent_rel, Path *new_path) +{ + parent_rel->pathlist = add_path_to(parent_rel, + parent_rel->pathlist, new_path); +} + +void +add_unique_path(RelOptInfo *parent_rel, Path *new_path) +{ + parent_rel->unique_pathlist = add_path_to(parent_rel, + parent_rel->unique_pathlist, + new_path); } /* @@ -2572,6 +2587,7 @@ create_projection_path(PlannerInfo *root, pathnode->path.pathkeys = subpath->pathkeys; pathnode->subpath = subpath; + pathnode->path.uniquekeys = subpath->uniquekeys; /* * We might not need a separate Result node. If the input plan node type diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 5737cd76ce..4a4ae3e926 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -295,6 +295,7 @@ struct PlannerInfo List *query_pathkeys; /* desired pathkeys for query_planner() */ + List *query_uniquekeys; /* unique keys required for the query */ List *group_pathkeys; /* groupClause pathkeys, if any */ List *window_pathkeys; /* pathkeys of bottom window, if any */ List *distinct_pathkeys; /* distinctClause pathkeys, if any */ @@ -678,6 +679,7 @@ typedef struct RelOptInfo List *pathlist; /* Path structures */ List *ppilist; /* ParamPathInfos used in pathlist */ List *partial_pathlist; /* partial Paths */ + List *unique_pathlist; /* unique Paths */ struct Path *cheapest_startup_path; struct Path *cheapest_total_path; struct Path *cheapest_unique_path; @@ -865,6 +867,7 @@ struct IndexOptInfo bool amsearchnulls; /* can AM search for NULL/NOT NULL entries? */ bool amhasgettuple; /* does AM have amgettuple interface? */ bool amhasgetbitmap; /* does AM have amgetbitmap interface? */ + bool amcanskip; /* can AM skip duplicate values? */ bool amcanparallel; /* does AM support parallel scan? */ /* Rather than include amapi.h here, we declare amcostestimate like this */ void (*amcostestimate) (); /* AM's cost estimator */ @@ -1181,6 +1184,8 @@ typedef struct Path List *pathkeys; /* sort ordering of path's output */ /* pathkeys is a List of PathKey nodes; see above */ + + List *uniquekeys; /* the unique keys, or NIL if none */ } Path; /* Macro for extracting a path's parameterization relids; beware double eval */ diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 715a24ad29..6796ad8cb7 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -27,6 +27,7 @@ extern int compare_fractional_path_costs(Path *path1, Path *path2, double fraction); extern void set_cheapest(RelOptInfo *parent_rel); extern void add_path(RelOptInfo *parent_rel, Path *new_path); +extern void add_unique_path(RelOptInfo *parent_rel, Path *new_path); extern bool add_path_precheck(RelOptInfo *parent_rel, Cost startup_cost, Cost total_cost, List *pathkeys, Relids required_outer); diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 9217a8d6c6..0cb8030e33 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -215,6 +215,9 @@ extern List *build_join_pathkeys(PlannerInfo *root, extern List *make_pathkeys_for_sortclauses(PlannerInfo *root, List *sortclauses, List *tlist); +extern List *make_pathkeys_for_uniquekeys(PlannerInfo *root, + List *sortclauses, + List *tlist); extern void initialize_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo); extern void update_mergeclause_eclasses(PlannerInfo *root, @@ -282,6 +285,11 @@ extern bool relation_has_uniquekeys_for(PlannerInfo *root, RelOptInfo *rel, List *exprs, bool allow_multinulls); +extern bool query_has_uniquekeys_for(PlannerInfo *root, + List *exprs, + bool allow_multinulls); extern bool relation_is_onerow(RelOptInfo *rel); +extern List *build_uniquekeys(PlannerInfo *root, List *sortclauses); + #endif /* PATHS_H */ -- 2.21.0