From df1225c9280c0a3b23b84750f1031470f765b640 Mon Sep 17 00:00:00 2001 From: Floris van Nee Date: Sun, 12 Jul 2020 13:36:25 +0200 Subject: [PATCH 7/7] planner fixes --- src/backend/optimizer/path/indxpath.c | 44 +++++++++--- src/backend/optimizer/path/pathkeys.c | 72 +++++++++++++++++++ src/backend/optimizer/path/uniquekeys.c | 65 ++++++++--------- src/backend/optimizer/plan/planner.c | 16 ++--- src/backend/optimizer/util/pathnode.c | 41 ++--------- src/include/optimizer/pathnode.h | 2 +- src/include/optimizer/paths.h | 4 ++ src/test/regress/expected/select_distinct.out | 14 ++-- src/test/regress/expected/sysviews.out | 2 +- 9 files changed, 162 insertions(+), 98 deletions(-) diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 340bbfcbb3..9eff71e5d5 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -1119,15 +1119,27 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel, /* Consider index skip scan as well */ if (root->query_uniquekeys != NULL && can_skip && !not_empty_qual) { - ListCell *lc; + int numusefulkeys = list_length(useful_pathkeys); + int numsortkeys = list_length(root->query_pathkeys); - foreach(lc, root->query_uniquekeys) + if (numusefulkeys == numsortkeys) { - UniqueKey *ukey = lfirst_node(UniqueKey, lc); + int prefix; + if (list_length(root->distinct_pathkeys) > 0) + prefix = find_index_prefix_for_pathkey(root, + index, + ForwardScanDirection, + llast_node(PathKey, + root->distinct_pathkeys)); + else + /* all are distinct keys are constant and optimized away. + * skipping with 1 is sufficient as all are constant anyway + */ + prefix = 1; + result = lappend(result, create_skipscan_unique_path(root, index, - (Path *) ipath, - ukey->exprs)); + (Path *) ipath, prefix)); } } @@ -1189,15 +1201,27 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel, /* Consider index skip scan as well */ if (root->query_uniquekeys != NULL && can_skip && !not_empty_qual) { - ListCell *lc; + int numusefulkeys = list_length(useful_pathkeys); + int numsortkeys = list_length(root->query_pathkeys); - foreach(lc, root->query_uniquekeys) + if (numusefulkeys == numsortkeys) { - UniqueKey *ukey = lfirst_node(UniqueKey, lc); + int prefix; + if (list_length(root->distinct_pathkeys) > 0) + prefix = find_index_prefix_for_pathkey(root, + index, + BackwardScanDirection, + llast_node(PathKey, + root->distinct_pathkeys)); + else + /* all are distinct keys are constant and optimized away. + * skipping with 1 is sufficient as all are constant anyway + */ + prefix = 1; + result = lappend(result, create_skipscan_unique_path(root, index, - (Path *) ipath, - ukey->exprs)); + (Path *) ipath, prefix)); } } diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index a4fc4f252d..3fa533be95 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -522,6 +522,78 @@ get_cheapest_parallel_safe_total_inner(List *paths) * NEW PATHKEY FORMATION ****************************************************************************/ +/* + * Find the prefix size for a specific path key in an index. + * For example, an index with (a,b,c) finding path key b will + * return prefix 2. + * Returns 0 when not found. + */ +int +find_index_prefix_for_pathkey(PlannerInfo *root, + IndexOptInfo *index, + ScanDirection scandir, + PathKey *pathkey) +{ + ListCell *lc; + int i; + + i = 0; + foreach(lc, index->indextlist) + { + TargetEntry *indextle = (TargetEntry *) lfirst(lc); + Expr *indexkey; + bool reverse_sort; + bool nulls_first; + PathKey *cpathkey; + + /* + * INCLUDE columns are stored in index unordered, so they don't + * support ordered index scan. + */ + if (i >= index->nkeycolumns) + break; + + /* We assume we don't need to make a copy of the tlist item */ + indexkey = indextle->expr; + + if (ScanDirectionIsBackward(scandir)) + { + reverse_sort = !index->reverse_sort[i]; + nulls_first = !index->nulls_first[i]; + } + else + { + reverse_sort = index->reverse_sort[i]; + nulls_first = index->nulls_first[i]; + } + + /* + * OK, try to make a canonical pathkey for this sort key. Note we're + * underneath any outer joins, so nullable_relids should be NULL. + */ + cpathkey = make_pathkey_from_sortinfo(root, + indexkey, + NULL, + index->sortopfamily[i], + index->opcintype[i], + index->indexcollations[i], + reverse_sort, + nulls_first, + 0, + index->rel->relids, + false); + + if (cpathkey == pathkey) + { + return i + 1; + } + + i++; + } + + return 0; +} + /* * build_index_pathkeys * Build a pathkeys list that describes the ordering induced by an index diff --git a/src/backend/optimizer/path/uniquekeys.c b/src/backend/optimizer/path/uniquekeys.c index 7d9c0aefbf..4bc16ea023 100644 --- a/src/backend/optimizer/path/uniquekeys.c +++ b/src/backend/optimizer/path/uniquekeys.c @@ -1136,6 +1136,7 @@ build_uniquekeys(PlannerInfo *root, List *sortclauses) List *result = NIL; List *sortkeys; ListCell *l; + List *exprs = NIL; sortkeys = make_pathkeys_for_uniquekeys(root, sortclauses, @@ -1146,54 +1147,48 @@ build_uniquekeys(PlannerInfo *root, List *sortclauses) { PathKey *pathkey = (PathKey *) lfirst(l); EquivalenceClass *ec = pathkey->pk_eclass; - ListCell *k; - List *exprs = NIL; - - foreach(k, ec->ec_members) - { - EquivalenceMember *mem = (EquivalenceMember *) lfirst(k); - exprs = lappend(exprs, mem->em_expr); - } - - result = lappend(result, makeUniqueKey(exprs, false, false)); + EquivalenceMember *mem = (EquivalenceMember*) lfirst(list_head(ec->ec_members)); + if (EC_MUST_BE_REDUNDANT(ec)) + continue; + exprs = lappend(exprs, mem->em_expr); } + result = lappend(result, makeUniqueKey(exprs, false)); + return result; } bool -query_has_uniquekeys_for(PlannerInfo *root, List *pathkeys, +query_has_uniquekeys_for(PlannerInfo *root, List *pathuniquekeys, bool allow_multinulls) { ListCell *lc; - List *exprs = NIL; + ListCell *lc2; - /* For UniqueKey->onerow case, the uniquekey->exprs is empty as well - * so we can't rely on list_is_subset to handle this special cases + /* root->query_uniquekeys are the requested DISTINCT clauses on query level + * pathuniquekeys are the unique keys on current path. + * All requested query_uniquekeys must be satisfied by the pathuniquekeys */ - if (pathkeys == NIL) - return false; - - foreach(lc, pathkeys) + foreach(lc, root->query_uniquekeys) { - PathKey *pathkey = (PathKey *) lfirst(lc); - EquivalenceClass *ec = pathkey->pk_eclass; - ListCell *k; - - foreach(k, ec->ec_members) + UniqueKey *query_ukey = lfirst_node(UniqueKey, lc); + bool satisfied = false; + foreach(lc2, pathuniquekeys) { - EquivalenceMember *mem = (EquivalenceMember *) lfirst(k); - exprs = lappend(exprs, mem->em_expr); + UniqueKey *ukey = lfirst_node(UniqueKey, lc2); + if (ukey->multi_nullvals && !allow_multinulls) + continue; + if (list_length(ukey->exprs) == 0 && + list_length(query_ukey->exprs) != 0) + continue; + if (list_is_subset(ukey->exprs, query_ukey->exprs)) + { + satisfied = true; + break; + } } + if (!satisfied) + return false; } - - foreach(lc, root->query_uniquekeys) - { - UniqueKey *ukey = lfirst_node(UniqueKey, lc); - if (ukey->multi_nullvals && !allow_multinulls) - continue; - if (list_is_subset(ukey->exprs, exprs)) - return true; - } - return false; + return true; } diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index a5f3c28fb5..0d986e87e6 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -3627,12 +3627,18 @@ standard_qp_callback(PlannerInfo *root, void *extra) if (parse->distinctClause && grouping_is_sortable(parse->distinctClause)) + { root->distinct_pathkeys = make_pathkeys_for_sortclauses(root, parse->distinctClause, tlist); + root->query_uniquekeys = build_uniquekeys(root, parse->distinctClause); + } else + { root->distinct_pathkeys = NIL; + root->query_uniquekeys = NIL; + } root->sort_pathkeys = make_pathkeys_for_sortclauses(root, @@ -3663,17 +3669,9 @@ standard_qp_callback(PlannerInfo *root, void *extra) root->query_pathkeys = root->window_pathkeys; else if (list_length(root->distinct_pathkeys) > list_length(root->sort_pathkeys)) - { root->query_pathkeys = root->distinct_pathkeys; - root->query_uniquekeys = build_uniquekeys(root, parse->distinctClause); - } else if (root->sort_pathkeys) - { root->query_pathkeys = root->sort_pathkeys; - - if (root->distinct_pathkeys) - root->query_uniquekeys = build_uniquekeys(root, parse->distinctClause); - } else root->query_pathkeys = NIL; } @@ -4834,7 +4832,7 @@ create_distinct_paths(PlannerInfo *root, { Path *path = (Path *) lfirst(lc); - if (query_has_uniquekeys_for(root, needed_pathkeys, false)) + if (query_has_uniquekeys_for(root, path->uniquekeys, false)) add_path(distinct_rel, path); } diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index cac2ca0b0f..3e0d89071f 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -2981,51 +2981,24 @@ create_upper_unique_path(PlannerInfo *root, */ IndexPath * create_skipscan_unique_path(PlannerInfo *root, IndexOptInfo *index, - Path *basepath, List *unique_exprs) + Path *basepath, int prefix) { IndexPath *pathnode = makeNode(IndexPath); int numDistinctRows; - int distinctPrefixKeys; - ListCell *lc; - List *exprs = NIL; - - - distinctPrefixKeys = list_length(unique_exprs); + UniqueKey *ukey; Assert(IsA(basepath, IndexPath)); /* We don't want to modify basepath, so make a copy. */ memcpy(pathnode, basepath, sizeof(IndexPath)); - /* - * Normally we can think about distinctPrefixKeys as just - * a number of distinct keys. But if lets say we have a - * distinct key a, and the index contains b, a in exactly - * this order. In such situation we need to use position - * of a in the index as distinctPrefixKeys, otherwise skip - * will happen only by the first column. - */ - foreach(lc, unique_exprs) - { - Expr *unique_expr = (Expr *) lfirst(lc); - Var *var = (Var *) unique_expr; - - exprs = lappend(exprs, unique_expr); - - for (int i = 0; i < index->ncolumns; i++) - { - if (index->indexkeys[i] == var->varattno) - { - distinctPrefixKeys = Max(i + 1, distinctPrefixKeys); - break; - } - } - } + ukey = linitial_node(UniqueKey, root->query_uniquekeys); - Assert(distinctPrefixKeys > 0); - pathnode->indexskipprefix = distinctPrefixKeys; + Assert(prefix > 0); + pathnode->indexskipprefix = prefix; + pathnode->path.uniquekeys = root->query_uniquekeys; - numDistinctRows = estimate_num_groups(root, exprs, + numDistinctRows = estimate_num_groups(root, ukey->exprs, pathnode->path.rows, NULL); diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 705deaf7bd..8ec1780a56 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -210,7 +210,7 @@ extern UpperUniquePath *create_upper_unique_path(PlannerInfo *root, extern IndexPath *create_skipscan_unique_path(PlannerInfo *root, IndexOptInfo *index, Path *subpath, - List *unique_exprs); + int prefix); extern AggPath *create_agg_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 0cb8030e33..f934f0011a 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -198,6 +198,10 @@ extern Path *get_cheapest_fractional_path_for_pathkeys(List *paths, Relids required_outer, double fraction); extern Path *get_cheapest_parallel_safe_total_inner(List *paths); +extern int find_index_prefix_for_pathkey(PlannerInfo *root, + IndexOptInfo *index, + ScanDirection scandir, + PathKey *pathkey); extern List *build_index_pathkeys(PlannerInfo *root, IndexOptInfo *index, ScanDirection scandir); extern List *build_partition_pathkeys(PlannerInfo *root, RelOptInfo *partrel, diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out index b811f7d7a1..7af4636b1b 100644 --- a/src/test/regress/expected/select_distinct.out +++ b/src/test/regress/expected/select_distinct.out @@ -647,14 +647,12 @@ FROM distinct_a ORDER BY a; EXPLAIN (COSTS OFF) SELECT DISTINCT ON (a) a, b, c FROM distinct_a WHERE a = 1 ORDER BY a; - QUERY PLAN ------------------------------------------------------ - Unique - -> Bitmap Heap Scan on distinct_a - Recheck Cond: (a = 1) - -> Bitmap Index Scan on distinct_a_a_b_idx - Index Cond: (a = 1) -(5 rows) + QUERY PLAN +--------------------------------------------------- + Index Scan using distinct_a_a_b_idx on distinct_a + Skip scan: true + Index Cond: (a = 1) +(3 rows) -- check colums order SELECT DISTINCT a FROM distinct_a WHERE b = 2 AND c = 10; diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out index 4da9594925..e64e20a8cb 100644 --- a/src/test/regress/expected/sysviews.out +++ b/src/test/regress/expected/sysviews.out @@ -91,7 +91,7 @@ select name, setting from pg_settings where name like 'enable%'; enable_seqscan | on enable_sort | on enable_tidscan | on -(20 rows) +(19 rows) -- Test that the pg_timezone_names and pg_timezone_abbrevs views are -- more-or-less working. We can't test their contents in any great detail -- 2.25.0