From c44c8a94a8f91767e97ca90d073be939e33adcd2 Mon Sep 17 00:00:00 2001 From: "dgrowley@gmail.com" Date: Fri, 26 Oct 2018 09:18:09 +1300 Subject: [PATCH v4] Allow Append to be used in place of MergeAppend for some cases For RANGE partitioned tables with no default partition the subpaths of a MergeAppend are always arranged in range order. This means that MergeAppend, when sorting by the partition key or a superset of the partition key, will always output tuples from earlier subpaths before later subpaths. LIST partitioned tables provide the same guarantee if they also don't have a default partition, providing that none of the partitions are defined to allow Datums with values which are interleaved with other partitions. For simplicity and speed of checking we currently just disallow the optimization if any partition allows more than one Datum. We may want to expand this later, but for now, it's a very cheap check to implement. A more thorough check would require performing analysis on the partition bound. --- src/backend/nodes/outfuncs.c | 1 + src/backend/optimizer/path/allpaths.c | 144 ++++++++++++++++++-- src/backend/optimizer/path/costsize.c | 51 ++++++- src/backend/optimizer/path/joinrels.c | 2 +- src/backend/optimizer/path/pathkeys.c | 54 ++++++++ src/backend/optimizer/plan/createplan.c | 91 +++++++++---- src/backend/optimizer/plan/planner.c | 3 +- src/backend/optimizer/prep/prepunion.c | 6 +- src/backend/optimizer/util/pathnode.c | 23 +++- src/backend/partitioning/partprune.c | 59 ++++++++ src/backend/utils/cache/partcache.c | 10 +- src/include/nodes/relation.h | 1 + src/include/optimizer/cost.h | 2 +- src/include/optimizer/pathnode.h | 2 +- src/include/optimizer/paths.h | 2 + src/include/partitioning/partprune.h | 1 + src/test/regress/expected/inherit.out | 188 +++++++++++++++++++++++++- src/test/regress/expected/partition_prune.out | 64 ++++----- src/test/regress/sql/inherit.sql | 81 +++++++++++ src/test/regress/sql/partition_prune.sql | 10 +- 20 files changed, 694 insertions(+), 101 deletions(-) diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 69731ccdea..5597dc6154 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -1939,6 +1939,7 @@ _outAppendPath(StringInfo str, const AppendPath *node) WRITE_NODE_FIELD(partitioned_rels); WRITE_NODE_FIELD(subpaths); WRITE_INT_FIELD(first_partial_path); + WRITE_FLOAT_FIELD(limit_tuples, "%.0f"); } static void diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 5f74d3b36d..6205828656 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -104,6 +104,7 @@ static Path *get_cheapest_parameterized_child_path(PlannerInfo *root, Relids required_outer); static void accumulate_append_subpath(Path *path, List **subpaths, List **special_subpaths); +static Path *get_singleton_append_subpath(Path *path); static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte); static void set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, @@ -1597,7 +1598,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, */ if (subpaths_valid) add_path(rel, (Path *) create_append_path(root, rel, subpaths, NIL, - NULL, 0, false, + NIL, NULL, 0, false, partitioned_rels, -1)); /* @@ -1639,7 +1640,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, /* Generate a partial append path. */ appendpath = create_append_path(root, rel, NIL, partial_subpaths, - NULL, parallel_workers, + NIL, NULL, parallel_workers, enable_parallel_append, partitioned_rels, -1); @@ -1689,7 +1690,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, appendpath = create_append_path(root, rel, pa_nonpartial_subpaths, pa_partial_subpaths, - NULL, parallel_workers, true, + NIL, NULL, parallel_workers, true, partitioned_rels, partial_rows); add_partial_path(rel, (Path *) appendpath); } @@ -1751,7 +1752,7 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, if (subpaths_valid) add_path(rel, (Path *) create_append_path(root, rel, subpaths, NIL, - required_outer, 0, false, + NIL, required_outer, 0, false, partitioned_rels, -1)); } } @@ -1786,6 +1787,24 @@ generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel, List *partitioned_rels) { ListCell *lcp; + List *partition_pathkeys = NIL; + List *partition_pathkeys_desc = NIL; + + /* + * Some partitioned table setups may allow us to use an Append node + * instead of a MergeAppend. This is possible in cases such as RANGE + * partitioned tables where it's guaranteed that an earlier partition must + * contain rows which come earlier in the sort order. + */ + if (rel->part_scheme != NULL && IS_SIMPLE_REL(rel) && + partitions_are_ordered(root, rel)) + { + partition_pathkeys = build_partition_pathkeys(root, rel, + ForwardScanDirection); + + partition_pathkeys_desc = build_partition_pathkeys(root, rel, + BackwardScanDirection); + } foreach(lcp, all_child_pathkeys) { @@ -1794,6 +1813,20 @@ generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel, List *total_subpaths = NIL; bool startup_neq_total = false; ListCell *lcr; + bool partition_order; + bool partition_order_desc; + + /* + * Determine if these pathkeys match the partition order, or reverse + * partition order. It can't match both, so only go to the trouble of + * checking the reverse order when it's not in ascending partition + * order. + */ + partition_order = pathkeys_contained_in(pathkeys, + partition_pathkeys); + partition_order_desc = !partition_order && + pathkeys_contained_in(pathkeys, + partition_pathkeys_desc); /* Select the child paths for this ordering... */ foreach(lcr, live_childrels) @@ -1836,26 +1869,81 @@ generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel, if (cheapest_startup != cheapest_total) startup_neq_total = true; - accumulate_append_subpath(cheapest_startup, - &startup_subpaths, NULL); - accumulate_append_subpath(cheapest_total, - &total_subpaths, NULL); + /* + * When in partition order or decending partition order don't + * flatten any sub-partition's paths unless they're an Append or + * MergeAppend with a single subpath. For the desceding order + * case we build the path list in reverse so that the Append scan + * correctly scans the partitions in reverse order. + */ + if (partition_order) + { + /* Do the Append/MergeAppend flattening, when possible */ + cheapest_startup = get_singleton_append_subpath(cheapest_startup); + cheapest_total = get_singleton_append_subpath(cheapest_total); + + startup_subpaths = lappend(startup_subpaths, cheapest_startup); + total_subpaths = lappend(total_subpaths, cheapest_total); + } + else if (partition_order_desc) + { + cheapest_startup = get_singleton_append_subpath(cheapest_startup); + cheapest_total = get_singleton_append_subpath(cheapest_total); + + startup_subpaths = lcons(cheapest_startup, startup_subpaths); + total_subpaths = lcons(cheapest_total, total_subpaths); + } + else + { + accumulate_append_subpath(cheapest_startup, + &startup_subpaths, NULL); + accumulate_append_subpath(cheapest_total, + &total_subpaths, NULL); + } } - /* ... and build the MergeAppend paths */ - add_path(rel, (Path *) create_merge_append_path(root, + /* Build a simple Append path if in partition asc/desc order */ + if (partition_order || partition_order_desc) + { + add_path(rel, (Path *) create_append_path(root, rel, startup_subpaths, + NIL, pathkeys, NULL, - partitioned_rels)); - if (startup_neq_total) - add_path(rel, (Path *) create_merge_append_path(root, + 0, + false, + partitioned_rels, + -1)); + if (startup_neq_total) + add_path(rel, (Path *) create_append_path(root, rel, total_subpaths, + NIL, + pathkeys, + NULL, + 0, + false, + partitioned_rels, + -1)); + } + else + { + /* else just build the MergeAppend paths */ + add_path(rel, (Path *) create_merge_append_path(root, + rel, + startup_subpaths, pathkeys, NULL, partitioned_rels)); + if (startup_neq_total) + add_path(rel, (Path *) create_merge_append_path(root, + rel, + total_subpaths, + pathkeys, + NULL, + partitioned_rels)); + } } } @@ -1996,6 +2084,34 @@ accumulate_append_subpath(Path *path, List **subpaths, List **special_subpaths) *subpaths = lappend(*subpaths, path); } +/* + * get_singleton_append_subpath + * Returns the singleton subpath of a Append or MergeAppend or + * return 'path' if it's not a single sub-path Append/MergeAppend. + */ +static Path * +get_singleton_append_subpath(Path *path) +{ + if (IsA(path, AppendPath)) + { + AppendPath *apath = (AppendPath *)path; + + Assert(!apath->path.parallel_aware); + + if (list_length(apath->subpaths) == 1) + return (Path *) linitial(apath->subpaths); + } + else if (IsA(path, MergeAppendPath)) + { + MergeAppendPath *mpath = (MergeAppendPath *)path; + + if (list_length(mpath->subpaths) == 1) + return (Path *) linitial(mpath->subpaths); + } + + return path; +} + /* * set_dummy_rel_pathlist * Build a dummy path for a relation that's been excluded by constraints @@ -2016,7 +2132,7 @@ set_dummy_rel_pathlist(RelOptInfo *rel) rel->pathlist = NIL; rel->partial_pathlist = NIL; - add_path(rel, (Path *) create_append_path(NULL, rel, NIL, NIL, NULL, + add_path(rel, (Path *) create_append_path(NULL, rel, NIL, NIL, NIL, NULL, 0, false, NIL, -1)); /* diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 7bf67a0529..e616bc91a4 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -1837,7 +1837,7 @@ append_nonpartial_cost(List *subpaths, int numpaths, int parallel_workers) * Determines and returns the cost of an Append node. */ void -cost_append(AppendPath *apath) +cost_append(PlannerInfo *root, AppendPath *apath) { ListCell *l; @@ -1849,21 +1849,56 @@ cost_append(AppendPath *apath) if (!apath->path.parallel_aware) { - Path *subpath = (Path *) linitial(apath->subpaths); - + Path *isubpath = (Path *) linitial(apath->subpaths); + List *pathkeys = apath->path.pathkeys; /* * Startup cost of non-parallel-aware Append is the startup cost of - * first subpath. + * first subpath. This may be overwritten below if the initial path + * requires a sort. */ - apath->path.startup_cost = subpath->startup_cost; + apath->path.startup_cost = isubpath->startup_cost; - /* Compute rows and costs as sums of subplan rows and costs. */ + /* + * Compute rows and costs as sums of subplan rows and costs taking + * into account the cost of any sorts which may be required on + * subplans. + */ foreach(l, apath->subpaths) { Path *subpath = (Path *) lfirst(l); apath->path.rows += subpath->rows; - apath->path.total_cost += subpath->total_cost; + + if (pathkeys != NIL && + !pathkeys_contained_in(pathkeys, subpath->pathkeys)) + { + Path sort_path; /* dummy for result of cost_sort */ + + /* + *We'll need to insert a Sort node, so include cost for that + */ + cost_sort(&sort_path, + root, + pathkeys, + subpath->total_cost, + subpath->parent->tuples, + subpath->pathtarget->width, + 0.0, + work_mem, + apath->limit_tuples); + apath->path.total_cost += sort_path.total_cost; + + /* + * When the first subpath needs sorted, set the startup cost + * of the sort as the startup cost of the Append + */ + if (subpath == isubpath) + apath->path.startup_cost = sort_path.startup_cost; + } + else + { + apath->path.total_cost += subpath->total_cost; + } } } else /* parallel-aware */ @@ -1871,6 +1906,8 @@ cost_append(AppendPath *apath) int i = 0; double parallel_divisor = get_parallel_divisor(&apath->path); + Assert(apath->path.pathkeys == NIL); + /* Calculate startup cost. */ foreach(l, apath->subpaths) { diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index d3d21fed5d..2f9fc50bf2 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -1231,7 +1231,7 @@ mark_dummy_rel(RelOptInfo *rel) rel->partial_pathlist = NIL; /* Set up the dummy path */ - add_path(rel, (Path *) create_append_path(NULL, rel, NIL, NIL, NULL, + add_path(rel, (Path *) create_append_path(NULL, rel, NIL, NIL, NIL, NULL, 0, false, NIL, -1)); /* Set or update cheapest_total_path and related fields */ diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index ec66cb9c3c..f225541751 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -25,6 +25,7 @@ #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/tlist.h" +#include "partitioning/partbounds.h" #include "utils/lsyscache.h" @@ -547,6 +548,59 @@ build_index_pathkeys(PlannerInfo *root, return retval; } +/* + * build_partition_pathkeys + * Build a pathkeys list that describes the ordering induced by the + * partitions of 'partrel'. (Callers must ensure that this partitioned + * table guarantees that lower order tuples never will be found in a + * later partition.) + */ +List * +build_partition_pathkeys(PlannerInfo *root, RelOptInfo *partrel, + ScanDirection scandir) +{ + PartitionScheme partscheme; + List *retval = NIL; + int i; + + partscheme = partrel->part_scheme; + + for (i = 0; i < partscheme->partnatts; i++) + { + PathKey *cpathkey; + Expr *keyCol = linitial(partrel->partexprs[i]); + + /* + * OK, try to make a canonical pathkey for this part key. Note we're + * underneath any outer joins, so nullable_relids should be NULL. + */ + cpathkey = make_pathkey_from_sortinfo(root, + keyCol, + NULL, + partscheme->partopfamily[i], + partscheme->partopcintype[i], + partscheme->partcollation[i], + ScanDirectionIsBackward(scandir), + ScanDirectionIsBackward(scandir), + 0, + partrel->relids, + false); + + /* + * When unable to create the pathkey we'll just need to return + * whatever ones we have so far. + */ + if (cpathkey == NULL) + break; + + /* Add it to list, unless it's redundant. */ + if (!pathkey_is_redundant(cpathkey, retval)) + retval = lappend(retval, cpathkey); + } + + return retval; +} + /* * build_expression_pathkey * Build a pathkeys list that describes an ordering by a single expression diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index ae46b0140e..a276b7f3b1 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -201,8 +201,6 @@ static NamedTuplestoreScan *make_namedtuplestorescan(List *qptlist, List *qpqual Index scanrelid, char *enrname); static WorkTableScan *make_worktablescan(List *qptlist, List *qpqual, Index scanrelid, int wtParam); -static Append *make_append(List *appendplans, int first_partial_plan, - List *tlist, PartitionPruneInfo *partpruneinfo); static RecursiveUnion *make_recursive_union(List *tlist, Plan *lefttree, Plan *righttree, @@ -1025,12 +1023,24 @@ create_join_plan(PlannerInfo *root, JoinPath *best_path) static Plan * create_append_plan(PlannerInfo *root, AppendPath *best_path) { - Append *plan; + Append *node = makeNode(Append); + Plan *plan = &node->plan; List *tlist = build_path_tlist(root, &best_path->path); + List *pathkeys = best_path->path.pathkeys; List *subplans = NIL; ListCell *subpaths; RelOptInfo *rel = best_path->path.parent; PartitionPruneInfo *partpruneinfo = NULL; + int nodenumsortkeys; + AttrNumber *nodeSortColIdx; + Oid *nodeSortOperators; + Oid *nodeCollations; + bool *nodeNullsFirst; + + plan->targetlist = tlist; + plan->qual = NIL; + plan->lefttree = NULL; + plan->righttree = NULL; /* * The subpaths list could be empty, if every child was proven empty by @@ -1056,6 +1066,23 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) return plan; } + if (pathkeys != NIL) + { + /* + * Compute sort column info, and adjust the Append's tlist as needed. + * We only need the 'nodeSortColIdx' from all of the output params. + */ + (void) prepare_sort_from_pathkeys(plan, pathkeys, + best_path->path.parent->relids, + NULL, + true, + &nodenumsortkeys, + &nodeSortColIdx, + &nodeSortOperators, + &nodeCollations, + &nodeNullsFirst); + } + /* Build the plan for each child */ foreach(subpaths, best_path->subpaths) { @@ -1065,6 +1092,40 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) /* Must insist that all children return the same tlist */ subplan = create_plan_recurse(root, subpath, CP_EXACT_TLIST); + /* + * Now, for appends with pathkeys, insert a Sort node if subplan isn't + * sufficiently ordered. + */ + if (pathkeys != NIL) + { + int numsortkeys; + AttrNumber *sortColIdx; + Oid *sortOperators; + Oid *collations; + bool *nullsFirst; + + /* Compute sort column info, and adjust subplan's tlist as needed */ + subplan = prepare_sort_from_pathkeys(subplan, pathkeys, + subpath->parent->relids, + nodeSortColIdx, + false, + &numsortkeys, + &sortColIdx, + &sortOperators, + &collations, + &nullsFirst); + + if (!pathkeys_contained_in(pathkeys, subpath->pathkeys)) + { + + Sort *sort = make_sort(subplan, numsortkeys, + sortColIdx, sortOperators, + collations, nullsFirst); + + label_sort_with_costsize(root, sort, best_path->limit_tuples); + subplan = (Plan *) sort; + } + } subplans = lappend(subplans, subplan); } @@ -1107,10 +1168,11 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) * parent-rel Vars it'll be asked to emit. */ - plan = make_append(subplans, best_path->first_partial_path, - tlist, partpruneinfo); + node->appendplans = subplans; + node->first_partial_plan = best_path->first_partial_path; + node->part_prune_info = partpruneinfo; - copy_generic_path_info(&plan->plan, (Path *) best_path); + copy_generic_path_info(plan, (Path *) best_path); return (Plan *) plan; } @@ -5340,23 +5402,6 @@ make_foreignscan(List *qptlist, return node; } -static Append * -make_append(List *appendplans, int first_partial_plan, - List *tlist, PartitionPruneInfo *partpruneinfo) -{ - Append *node = makeNode(Append); - Plan *plan = &node->plan; - - plan->targetlist = tlist; - plan->qual = NIL; - plan->lefttree = NULL; - plan->righttree = NULL; - node->appendplans = appendplans; - node->first_partial_plan = first_partial_plan; - node->part_prune_info = partpruneinfo; - return node; -} - static RecursiveUnion * make_recursive_union(List *tlist, Plan *lefttree, diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index c729a99f8b..78b834032d 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -3899,6 +3899,7 @@ create_degenerate_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, grouped_rel, paths, NIL, + NIL, NULL, 0, false, @@ -6878,7 +6879,7 @@ apply_scanjoin_target_to_paths(PlannerInfo *root, * node, which would cause this relation to stop appearing to be a * dummy rel.) */ - rel->pathlist = list_make1(create_append_path(root, rel, NIL, NIL, + rel->pathlist = list_make1(create_append_path(root, rel, NIL, NIL, NIL, NULL, 0, false, NIL, -1)); rel->partial_pathlist = NIL; diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index d5720518a8..6d4657a4c1 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -656,7 +656,7 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root, /* * Append the child results together. */ - path = (Path *) create_append_path(root, result_rel, pathlist, NIL, + path = (Path *) create_append_path(root, result_rel, pathlist, NIL, NIL, NULL, 0, false, NIL, -1); /* @@ -711,7 +711,7 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root, Assert(parallel_workers > 0); ppath = (Path *) - create_append_path(root, result_rel, NIL, partial_pathlist, + create_append_path(root, result_rel, NIL, partial_pathlist, NIL, NULL, parallel_workers, enable_parallel_append, NIL, -1); ppath = (Path *) @@ -822,7 +822,7 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root, /* * Append the child results together. */ - path = (Path *) create_append_path(root, result_rel, pathlist, NIL, + path = (Path *) create_append_path(root, result_rel, pathlist, NIL, NIL, NULL, 0, false, NIL, -1); /* Identify the grouping semantics */ diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index d50d86b252..ca021fca8d 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1219,7 +1219,7 @@ AppendPath * create_append_path(PlannerInfo *root, RelOptInfo *rel, List *subpaths, List *partial_subpaths, - Relids required_outer, + List *pathkeys, Relids required_outer, int parallel_workers, bool parallel_aware, List *partitioned_rels, double rows) { @@ -1253,7 +1253,7 @@ create_append_path(PlannerInfo *root, pathnode->path.parallel_aware = parallel_aware; pathnode->path.parallel_safe = rel->consider_parallel; pathnode->path.parallel_workers = parallel_workers; - pathnode->path.pathkeys = NIL; /* result is always considered unsorted */ + pathnode->path.pathkeys = pathkeys; pathnode->partitioned_rels = list_copy(partitioned_rels); /* @@ -1263,10 +1263,14 @@ create_append_path(PlannerInfo *root, * costs. There may be some paths that require to do startup work by a * single worker. In such case, it's better for workers to choose the * expensive ones first, whereas the leader should choose the cheapest - * startup plan. + * startup plan. Note: We mustn't fiddle with the order of subpaths + * when the Append has valid pathkeys. The order they're listed in + * is critical to keeping the pathkeys valid. */ if (pathnode->path.parallel_aware) { + Assert(pathkeys == NIL); + subpaths = list_qsort(subpaths, append_total_cost_compare); partial_subpaths = list_qsort(partial_subpaths, append_startup_cost_compare); @@ -1274,6 +1278,15 @@ create_append_path(PlannerInfo *root, pathnode->first_partial_path = list_length(subpaths); pathnode->subpaths = list_concat(subpaths, partial_subpaths); + /* + * Apply query-wide LIMIT if known and path is for sole base relation. + * (Handling this at this low level is a bit klugy.) + */ + if (root != NULL && bms_equal(rel->relids, root->all_baserels)) + pathnode->limit_tuples = root->limit_tuples; + else + pathnode->limit_tuples = -1.0; + foreach(l, pathnode->subpaths) { Path *subpath = (Path *) lfirst(l); @@ -1287,7 +1300,7 @@ create_append_path(PlannerInfo *root, Assert(!parallel_aware || pathnode->path.parallel_safe); - cost_append(pathnode); + cost_append(root, pathnode); /* If the caller provided a row estimate, override the computed value. */ if (rows >= 0) @@ -3587,7 +3600,7 @@ reparameterize_path(PlannerInfo *root, Path *path, } return (Path *) create_append_path(root, rel, childpaths, partialpaths, - required_outer, + NIL, required_outer, apath->path.parallel_workers, apath->path.parallel_aware, apath->partitioned_rels, diff --git a/src/backend/partitioning/partprune.c b/src/backend/partitioning/partprune.c index d6ca03de4a..d21f3ebdb5 100644 --- a/src/backend/partitioning/partprune.c +++ b/src/backend/partitioning/partprune.c @@ -178,7 +178,66 @@ static bool partkey_datum_from_expr(PartitionPruneContext *context, Expr *expr, int stateidx, Datum *value, bool *isnull); +/* + * partitions_are_ordered + * For the partitioned table given in 'partrel', returns true if the + * partitioned table guarantees that tuples which sort earlier according + * to the partition bound are stored in an earlier partition. Returns + * false this is not possible, or if we have insufficient means to prove + * it. + * + * This assumes nothing about the order of tuples inside the actual + * partitions. + */ +bool +partitions_are_ordered(PlannerInfo *root, RelOptInfo *partrel) +{ + PartitionBoundInfo boundinfo = partrel->boundinfo; + + Assert(boundinfo != NULL); + + switch (boundinfo->strategy) + { + /* + * RANGE type partitions guarantee that the partitions can be scanned + * in the order that they're defined in the PartitionDesc to provide + * non-overlapping ranges of tuples. We must disallow when a DEFAULT + * partition exists as this could contain tuples from either below or + * above the defined range, or contain tuples belonging to gaps in the + * defined range. + */ + case PARTITION_STRATEGY_RANGE: + if (partition_bound_has_default(boundinfo)) + return false; + break; + + /* + * LIST partitions can also guarantee ordering, but we'd need to + * ensure that partitions don't allow interleaved values. We could + * likely check for this looking at each partition, in order, and + * checking which Datums are accepted. If we find a Datum in a + * partition that's greater than one previously already seen, then + * values could become out of order and we'd have to disable the + * optimization. For now, let's just keep it simple and just accept + * LIST partitions without a DEFAULT partition which only accept a + * single Datum per partition. This is cheap as it does not require + * any per-partition processing. Maybe we'd like to handle more + * complex cases in the future. + */ + case PARTITION_STRATEGY_LIST: + if (partition_bound_has_default(boundinfo)) + return false; + if (boundinfo->ndatums + partition_bound_accepts_nulls(boundinfo) != partrel->nparts) + return false; + break; + + default: + return false; + } + + return true; +} /* * make_partition_pruneinfo * Builds a PartitionPruneInfo which can be used in the executor to allow diff --git a/src/backend/utils/cache/partcache.c b/src/backend/utils/cache/partcache.c index 5757301d05..deb205c44f 100644 --- a/src/backend/utils/cache/partcache.c +++ b/src/backend/utils/cache/partcache.c @@ -937,6 +937,8 @@ qsort_partition_hbound_cmp(const void *a, const void *b) * qsort_partition_list_value_cmp * * Compare two list partition bound datums + * + * Note: If changing this, see build_partition_pathkeys() */ static int32 qsort_partition_list_value_cmp(const void *a, const void *b, void *arg) @@ -950,7 +952,13 @@ qsort_partition_list_value_cmp(const void *a, const void *b, void *arg) val1, val2)); } -/* Used when sorting range bounds across all range partitions */ +/* + * qsort_partition_rbound_cmp + * + * Used when sorting range bounds across all range partitions + * + * Note: If changing this, see build_partition_pathkeys() + */ static int32 qsort_partition_rbound_cmp(const void *a, const void *b, void *arg) { diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 88d37236f7..5a60fb860d 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -1321,6 +1321,7 @@ typedef struct AppendPath /* Index of first partial path in subpaths */ int first_partial_path; + double limit_tuples; /* hard limit on output tuples, or -1 */ } AppendPath; #define IS_DUMMY_PATH(p) \ diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index 77ca7ff837..7cb5644dd3 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -110,7 +110,7 @@ extern void cost_sort(Path *path, PlannerInfo *root, List *pathkeys, Cost input_cost, double tuples, int width, Cost comparison_cost, int sort_mem, double limit_tuples); -extern void cost_append(AppendPath *path); +extern void cost_append(PlannerInfo *root, AppendPath *path); extern void cost_merge_append(Path *path, PlannerInfo *root, List *pathkeys, int n_streams, Cost input_startup_cost, Cost input_total_cost, diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 81abcf53a8..5a790cf6be 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -65,7 +65,7 @@ extern BitmapOrPath *create_bitmap_or_path(PlannerInfo *root, extern TidPath *create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals, Relids required_outer); extern AppendPath *create_append_path(PlannerInfo *root, RelOptInfo *rel, - List *subpaths, List *partial_subpaths, + List *subpaths, List *partial_subpaths, List *pathkeys, Relids required_outer, int parallel_workers, bool parallel_aware, List *partitioned_rels, double rows); diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index cafde307ad..ee958a0f07 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -201,6 +201,8 @@ extern Path *get_cheapest_fractional_path_for_pathkeys(List *paths, extern Path *get_cheapest_parallel_safe_total_inner(List *paths); extern List *build_index_pathkeys(PlannerInfo *root, IndexOptInfo *index, ScanDirection scandir); +extern List *build_partition_pathkeys(PlannerInfo *root, RelOptInfo *partrel, + ScanDirection scandir); extern List *build_expression_pathkey(PlannerInfo *root, Expr *expr, Relids nullable_relids, Oid opno, Relids rel, bool create_it); diff --git a/src/include/partitioning/partprune.h b/src/include/partitioning/partprune.h index e07aaaf798..bc02b1bacb 100644 --- a/src/include/partitioning/partprune.h +++ b/src/include/partitioning/partprune.h @@ -71,6 +71,7 @@ typedef struct PartitionPruneContext #define PruneCxtStateIdx(partnatts, step_id, keyno) \ ((partnatts) * (step_id) + (keyno)) +extern bool partitions_are_ordered(PlannerInfo *root, RelOptInfo *partrel); extern PartitionPruneInfo *make_partition_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, List *subpaths, diff --git a/src/test/regress/expected/inherit.out b/src/test/regress/expected/inherit.out index 4f29d9f891..a0ef0e18b3 100644 --- a/src/test/regress/expected/inherit.out +++ b/src/test/regress/expected/inherit.out @@ -2032,7 +2032,187 @@ explain (costs off) select * from mcrparted where a = 20 and c > 20; -- scans mc Filter: ((c > 20) AND (a = 20)) (9 rows) +-- Test code that uses Append nodes in place of MergeAppend when the +-- partitions guarantee earlier partitions means lower sort order of the +-- tuples contained within. +create index mcrparted_a_abs_c_idx on mcrparted (a, abs(b), c); +-- check MergeAppend is uses when a default partition exists +explain (costs off) select * from mcrparted order by a, abs(b), c; + QUERY PLAN +------------------------------------------------------------------- + Merge Append + Sort Key: mcrparted0.a, (abs(mcrparted0.b)), mcrparted0.c + -> Index Scan using mcrparted0_a_abs_c_idx on mcrparted0 + -> Index Scan using mcrparted1_a_abs_c_idx on mcrparted1 + -> Index Scan using mcrparted2_a_abs_c_idx on mcrparted2 + -> Index Scan using mcrparted3_a_abs_c_idx on mcrparted3 + -> Index Scan using mcrparted4_a_abs_c_idx on mcrparted4 + -> Index Scan using mcrparted5_a_abs_c_idx on mcrparted5 + -> Index Scan using mcrparted_def_a_abs_c_idx on mcrparted_def +(9 rows) + +drop table mcrparted_def; +-- check Append is used for RANGE partitioned table with no default and no subpartitions +explain (costs off) select * from mcrparted order by a, abs(b), c; + QUERY PLAN +------------------------------------------------------------- + Append + -> Index Scan using mcrparted0_a_abs_c_idx on mcrparted0 + -> Index Scan using mcrparted1_a_abs_c_idx on mcrparted1 + -> Index Scan using mcrparted2_a_abs_c_idx on mcrparted2 + -> Index Scan using mcrparted3_a_abs_c_idx on mcrparted3 + -> Index Scan using mcrparted4_a_abs_c_idx on mcrparted4 + -> Index Scan using mcrparted5_a_abs_c_idx on mcrparted5 +(7 rows) + +-- check Append is used with subpaths in reverse order with backwards index scans. +explain (costs off) select * from mcrparted order by a desc, abs(b) desc, c desc; + QUERY PLAN +---------------------------------------------------------------------- + Append + -> Index Scan Backward using mcrparted5_a_abs_c_idx on mcrparted5 + -> Index Scan Backward using mcrparted4_a_abs_c_idx on mcrparted4 + -> Index Scan Backward using mcrparted3_a_abs_c_idx on mcrparted3 + -> Index Scan Backward using mcrparted2_a_abs_c_idx on mcrparted2 + -> Index Scan Backward using mcrparted1_a_abs_c_idx on mcrparted1 + -> Index Scan Backward using mcrparted0_a_abs_c_idx on mcrparted0 +(7 rows) + +-- check that Append plan is used containing a MergeAppend for sub-partitions +-- that are unordered. +drop table mcrparted5; +create table mcrparted5 partition of mcrparted for values from (20, 20, 20) to (maxvalue, maxvalue, maxvalue) partition by list (a); +create table mcrparted5a partition of mcrparted5 for values in(20); +create table mcrparted5_def partition of mcrparted5 default; +explain (costs off) select * from mcrparted order by a, abs(b), c; + QUERY PLAN +--------------------------------------------------------------------------- + Append + -> Index Scan using mcrparted0_a_abs_c_idx on mcrparted0 + -> Index Scan using mcrparted1_a_abs_c_idx on mcrparted1 + -> Index Scan using mcrparted2_a_abs_c_idx on mcrparted2 + -> Index Scan using mcrparted3_a_abs_c_idx on mcrparted3 + -> Index Scan using mcrparted4_a_abs_c_idx on mcrparted4 + -> Merge Append + Sort Key: mcrparted5a.a, (abs(mcrparted5a.b)), mcrparted5a.c + -> Index Scan using mcrparted5a_a_abs_c_idx on mcrparted5a + -> Index Scan using mcrparted5_def_a_abs_c_idx on mcrparted5_def +(10 rows) + +drop table mcrparted5_def; +-- check that an Append plan is used and the sub-partitions are flattened +-- into the main Append when the sub-partition is unordered but contains +-- just a single sub-partition. +explain (costs off) select a, abs(b) from mcrparted order by a, abs(b), c; + QUERY PLAN +--------------------------------------------------------------- + Append + -> Index Scan using mcrparted0_a_abs_c_idx on mcrparted0 + -> Index Scan using mcrparted1_a_abs_c_idx on mcrparted1 + -> Index Scan using mcrparted2_a_abs_c_idx on mcrparted2 + -> Index Scan using mcrparted3_a_abs_c_idx on mcrparted3 + -> Index Scan using mcrparted4_a_abs_c_idx on mcrparted4 + -> Index Scan using mcrparted5a_a_abs_c_idx on mcrparted5a +(7 rows) + +-- check that Append is used when the sub-partitioned tables are pruned during planning. +explain (costs off) select * from mcrparted where a < 20 order by a, abs(b), c; + QUERY PLAN +------------------------------------------------------------- + Append + -> Index Scan using mcrparted0_a_abs_c_idx on mcrparted0 + Index Cond: (a < 20) + -> Index Scan using mcrparted1_a_abs_c_idx on mcrparted1 + Index Cond: (a < 20) + -> Index Scan using mcrparted2_a_abs_c_idx on mcrparted2 + Index Cond: (a < 20) + -> Index Scan using mcrparted3_a_abs_c_idx on mcrparted3 + Index Cond: (a < 20) +(9 rows) + +create table mclparted (a int) partition by list(a); +create table mclparted1 partition of mclparted for values in(1); +create table mclparted2 partition of mclparted for values in(2); +create index on mclparted (a); +-- Ensure an Append is used to for a list partition with an order by. +explain (costs off) select * from mclparted order by a; + QUERY PLAN +------------------------------------------------------------ + Append + -> Index Only Scan using mclparted1_a_idx on mclparted1 + -> Index Only Scan using mclparted2_a_idx on mclparted2 +(3 rows) + +-- Ensure a MergeAppend is used when a partition exists with interleaved +-- datums in the partition bound. +create table mclparted3_5 partition of mclparted for values in(3,5); +create table mclparted4 partition of mclparted for values in(4); +explain (costs off) select * from mclparted order by a; + QUERY PLAN +---------------------------------------------------------------- + Merge Append + Sort Key: mclparted1.a + -> Index Only Scan using mclparted1_a_idx on mclparted1 + -> Index Only Scan using mclparted2_a_idx on mclparted2 + -> Index Only Scan using mclparted3_5_a_idx on mclparted3_5 + -> Index Only Scan using mclparted4_a_idx on mclparted4 +(6 rows) + +drop table mclparted; +-- Ensure subplans which don't have a path with the correct pathkeys get +-- sorted correctly. +drop index mcrparted_a_abs_c_idx; +create index on mcrparted1 (a, abs(b), c); +create index on mcrparted2 (a, abs(b), c); +create index on mcrparted3 (a, abs(b), c); +create index on mcrparted4 (a, abs(b), c); +set enable_seqscan = 0; +explain (costs off) select * from mcrparted where a < 20 order by a, abs(b), c; + QUERY PLAN +------------------------------------------------------------------- + Append + -> Sort + Sort Key: mcrparted0.a, (abs(mcrparted0.b)), mcrparted0.c + -> Seq Scan on mcrparted0 + Filter: (a < 20) + -> Index Scan using mcrparted1_a_abs_c_idx on mcrparted1 + Index Cond: (a < 20) + -> Index Scan using mcrparted2_a_abs_c_idx on mcrparted2 + Index Cond: (a < 20) + -> Index Scan using mcrparted3_a_abs_c_idx on mcrparted3 + Index Cond: (a < 20) +(11 rows) + +reset enable_seqscan; +set enable_bitmapscan = 0; +-- Ensure Append node can be used when the partition is ordered by some +-- pathkeys which were deemed redundant. +explain (costs off) select * from mcrparted where a = 10 order by a, abs(b), c; + QUERY PLAN +------------------------------------------------------------- + Append + -> Index Scan using mcrparted1_a_abs_c_idx on mcrparted1 + Index Cond: (a = 10) + -> Index Scan using mcrparted2_a_abs_c_idx on mcrparted2 + Index Cond: (a = 10) +(5 rows) + +reset enable_bitmapscan; drop table mcrparted; +-- Ensure LIST partitions allow an Append to be used instead of a MergeAppend +create table bool_rp (b bool) partition by list(b); +create table bool_rp_true partition of bool_rp for values in(true); +create table bool_rp_false partition of bool_rp for values in(false); +create index on bool_rp (b); +explain (costs off) select * from bool_rp order by b; + QUERY PLAN +------------------------------------------------------------------ + Append + -> Index Only Scan using bool_rp_false_b_idx on bool_rp_false + -> Index Only Scan using bool_rp_true_b_idx on bool_rp_true +(3 rows) + +drop table bool_rp; -- check that partitioned table Appends cope with being referenced in -- subplans create table parted_minmax (a int, b varchar(16)) partition by range (a); @@ -2045,17 +2225,15 @@ explain (costs off) select min(a), max(a) from parted_minmax where b = '12345'; Result InitPlan 1 (returns $0) -> Limit - -> Merge Append - Sort Key: parted_minmax1.a + -> Append -> Index Only Scan using parted_minmax1i on parted_minmax1 Index Cond: ((a IS NOT NULL) AND (b = '12345'::text)) InitPlan 2 (returns $1) -> Limit - -> Merge Append - Sort Key: parted_minmax1_1.a DESC + -> Append -> Index Only Scan Backward using parted_minmax1i on parted_minmax1 parted_minmax1_1 Index Cond: ((a IS NOT NULL) AND (b = '12345'::text)) -(13 rows) +(11 rows) select min(a), max(a) from parted_minmax where b = '12345'; min | max diff --git a/src/test/regress/expected/partition_prune.out b/src/test/regress/expected/partition_prune.out index 24313e8c78..d7c268c5af 100644 --- a/src/test/regress/expected/partition_prune.out +++ b/src/test/regress/expected/partition_prune.out @@ -3013,14 +3013,14 @@ drop table boolp; -- set enable_seqscan = off; set enable_sort = off; -create table ma_test (a int) partition by range (a); +create table ma_test (a int, b int) partition by range (a); create table ma_test_p1 partition of ma_test for values from (0) to (10); create table ma_test_p2 partition of ma_test for values from (10) to (20); create table ma_test_p3 partition of ma_test for values from (20) to (30); -insert into ma_test select x from generate_series(0,29) t(x); -create index on ma_test (a); +insert into ma_test select x,x from generate_series(0,29) t(x); +create index on ma_test (b); analyze ma_test; -prepare mt_q1 (int) as select * from ma_test where a >= $1 and a % 10 = 5 order by a; +prepare mt_q1 (int) as select a from ma_test where a >= $1 and a % 10 = 5 order by b; -- Execute query 5 times to allow choose_custom_plan -- to start considering a generic plan. execute mt_q1(0); @@ -3067,17 +3067,15 @@ explain (analyze, costs off, summary off, timing off) execute mt_q1(15); QUERY PLAN ------------------------------------------------------------------------------- Merge Append (actual rows=2 loops=1) - Sort Key: ma_test_p2.a + Sort Key: ma_test_p2.b Subplans Removed: 1 - -> Index Scan using ma_test_p2_a_idx on ma_test_p2 (actual rows=1 loops=1) - Index Cond: (a >= $1) - Filter: ((a % 10) = 5) - Rows Removed by Filter: 4 - -> Index Scan using ma_test_p3_a_idx on ma_test_p3 (actual rows=1 loops=1) - Index Cond: (a >= $1) - Filter: ((a % 10) = 5) + -> Index Scan using ma_test_p2_b_idx on ma_test_p2 (actual rows=1 loops=1) + Filter: ((a >= $1) AND ((a % 10) = 5)) Rows Removed by Filter: 9 -(11 rows) + -> Index Scan using ma_test_p3_b_idx on ma_test_p3 (actual rows=1 loops=1) + Filter: ((a >= $1) AND ((a % 10) = 5)) + Rows Removed by Filter: 9 +(9 rows) execute mt_q1(15); a @@ -3090,13 +3088,12 @@ explain (analyze, costs off, summary off, timing off) execute mt_q1(25); QUERY PLAN ------------------------------------------------------------------------------- Merge Append (actual rows=1 loops=1) - Sort Key: ma_test_p3.a + Sort Key: ma_test_p3.b Subplans Removed: 2 - -> Index Scan using ma_test_p3_a_idx on ma_test_p3 (actual rows=1 loops=1) - Index Cond: (a >= $1) - Filter: ((a % 10) = 5) - Rows Removed by Filter: 4 -(7 rows) + -> Index Scan using ma_test_p3_b_idx on ma_test_p3 (actual rows=1 loops=1) + Filter: ((a >= $1) AND ((a % 10) = 5)) + Rows Removed by Filter: 9 +(6 rows) execute mt_q1(25); a @@ -3109,12 +3106,11 @@ explain (analyze, costs off, summary off, timing off) execute mt_q1(35); QUERY PLAN ------------------------------------------------------------------------ Merge Append (actual rows=0 loops=1) - Sort Key: ma_test_p1.a + Sort Key: ma_test_p1.b Subplans Removed: 2 - -> Index Scan using ma_test_p1_a_idx on ma_test_p1 (never executed) - Index Cond: (a >= $1) - Filter: ((a % 10) = 5) -(6 rows) + -> Index Scan using ma_test_p1_b_idx on ma_test_p1 (never executed) + Filter: ((a >= $1) AND ((a % 10) = 5)) +(5 rows) execute mt_q1(35); a @@ -3123,23 +3119,23 @@ execute mt_q1(35); deallocate mt_q1; -- ensure initplan params properly prune partitions -explain (analyze, costs off, summary off, timing off) select * from ma_test where a >= (select min(a) from ma_test_p2) order by a; +explain (analyze, costs off, summary off, timing off) select * from ma_test where a >= (select min(b) from ma_test_p2) order by b; QUERY PLAN ------------------------------------------------------------------------------------------------------------ Merge Append (actual rows=20 loops=1) - Sort Key: ma_test_p1.a + Sort Key: ma_test_p1.b InitPlan 2 (returns $1) -> Result (actual rows=1 loops=1) InitPlan 1 (returns $0) -> Limit (actual rows=1 loops=1) - -> Index Scan using ma_test_p2_a_idx on ma_test_p2 ma_test_p2_1 (actual rows=1 loops=1) - Index Cond: (a IS NOT NULL) - -> Index Scan using ma_test_p1_a_idx on ma_test_p1 (never executed) - Index Cond: (a >= $1) - -> Index Scan using ma_test_p2_a_idx on ma_test_p2 (actual rows=10 loops=1) - Index Cond: (a >= $1) - -> Index Scan using ma_test_p3_a_idx on ma_test_p3 (actual rows=10 loops=1) - Index Cond: (a >= $1) + -> Index Scan using ma_test_p2_b_idx on ma_test_p2 ma_test_p2_1 (actual rows=1 loops=1) + Index Cond: (b IS NOT NULL) + -> Index Scan using ma_test_p1_b_idx on ma_test_p1 (never executed) + Filter: (a >= $1) + -> Index Scan using ma_test_p2_b_idx on ma_test_p2 (actual rows=10 loops=1) + Filter: (a >= $1) + -> Index Scan using ma_test_p3_b_idx on ma_test_p3 (actual rows=10 loops=1) + Filter: (a >= $1) (14 rows) reset enable_seqscan; diff --git a/src/test/regress/sql/inherit.sql b/src/test/regress/sql/inherit.sql index a6e541d4da..a1416b2240 100644 --- a/src/test/regress/sql/inherit.sql +++ b/src/test/regress/sql/inherit.sql @@ -721,8 +721,89 @@ explain (costs off) select * from mcrparted where abs(b) = 5; -- scans all parti explain (costs off) select * from mcrparted where a > -1; -- scans all partitions explain (costs off) select * from mcrparted where a = 20 and abs(b) = 10 and c > 10; -- scans mcrparted4 explain (costs off) select * from mcrparted where a = 20 and c > 20; -- scans mcrparted3, mcrparte4, mcrparte5, mcrparted_def + +-- Test code that uses Append nodes in place of MergeAppend when the +-- partitions guarantee earlier partitions means lower sort order of the +-- tuples contained within. +create index mcrparted_a_abs_c_idx on mcrparted (a, abs(b), c); + +-- check MergeAppend is uses when a default partition exists +explain (costs off) select * from mcrparted order by a, abs(b), c; + +drop table mcrparted_def; + +-- check Append is used for RANGE partitioned table with no default and no subpartitions +explain (costs off) select * from mcrparted order by a, abs(b), c; + +-- check Append is used with subpaths in reverse order with backwards index scans. +explain (costs off) select * from mcrparted order by a desc, abs(b) desc, c desc; + +-- check that Append plan is used containing a MergeAppend for sub-partitions +-- that are unordered. +drop table mcrparted5; +create table mcrparted5 partition of mcrparted for values from (20, 20, 20) to (maxvalue, maxvalue, maxvalue) partition by list (a); +create table mcrparted5a partition of mcrparted5 for values in(20); +create table mcrparted5_def partition of mcrparted5 default; + +explain (costs off) select * from mcrparted order by a, abs(b), c; + +drop table mcrparted5_def; + +-- check that an Append plan is used and the sub-partitions are flattened +-- into the main Append when the sub-partition is unordered but contains +-- just a single sub-partition. +explain (costs off) select a, abs(b) from mcrparted order by a, abs(b), c; + +-- check that Append is used when the sub-partitioned tables are pruned during planning. +explain (costs off) select * from mcrparted where a < 20 order by a, abs(b), c; + +create table mclparted (a int) partition by list(a); +create table mclparted1 partition of mclparted for values in(1); +create table mclparted2 partition of mclparted for values in(2); +create index on mclparted (a); + +-- Ensure an Append is used to for a list partition with an order by. +explain (costs off) select * from mclparted order by a; + +-- Ensure a MergeAppend is used when a partition exists with interleaved +-- datums in the partition bound. +create table mclparted3_5 partition of mclparted for values in(3,5); +create table mclparted4 partition of mclparted for values in(4); + +explain (costs off) select * from mclparted order by a; + +drop table mclparted; + +-- Ensure subplans which don't have a path with the correct pathkeys get +-- sorted correctly. +drop index mcrparted_a_abs_c_idx; +create index on mcrparted1 (a, abs(b), c); +create index on mcrparted2 (a, abs(b), c); +create index on mcrparted3 (a, abs(b), c); +create index on mcrparted4 (a, abs(b), c); + +set enable_seqscan = 0; +explain (costs off) select * from mcrparted where a < 20 order by a, abs(b), c; +reset enable_seqscan; + +set enable_bitmapscan = 0; +-- Ensure Append node can be used when the partition is ordered by some +-- pathkeys which were deemed redundant. +explain (costs off) select * from mcrparted where a = 10 order by a, abs(b), c; +reset enable_bitmapscan; + drop table mcrparted; +-- Ensure LIST partitions allow an Append to be used instead of a MergeAppend +create table bool_rp (b bool) partition by list(b); +create table bool_rp_true partition of bool_rp for values in(true); +create table bool_rp_false partition of bool_rp for values in(false); +create index on bool_rp (b); + +explain (costs off) select * from bool_rp order by b; + +drop table bool_rp; + -- check that partitioned table Appends cope with being referenced in -- subplans create table parted_minmax (a int, b varchar(16)) partition by range (a); diff --git a/src/test/regress/sql/partition_prune.sql b/src/test/regress/sql/partition_prune.sql index eca1a7c5ac..a834afd572 100644 --- a/src/test/regress/sql/partition_prune.sql +++ b/src/test/regress/sql/partition_prune.sql @@ -740,15 +740,15 @@ drop table boolp; -- set enable_seqscan = off; set enable_sort = off; -create table ma_test (a int) partition by range (a); +create table ma_test (a int, b int) partition by range (a); create table ma_test_p1 partition of ma_test for values from (0) to (10); create table ma_test_p2 partition of ma_test for values from (10) to (20); create table ma_test_p3 partition of ma_test for values from (20) to (30); -insert into ma_test select x from generate_series(0,29) t(x); -create index on ma_test (a); +insert into ma_test select x,x from generate_series(0,29) t(x); +create index on ma_test (b); analyze ma_test; -prepare mt_q1 (int) as select * from ma_test where a >= $1 and a % 10 = 5 order by a; +prepare mt_q1 (int) as select a from ma_test where a >= $1 and a % 10 = 5 order by b; -- Execute query 5 times to allow choose_custom_plan -- to start considering a generic plan. @@ -769,7 +769,7 @@ execute mt_q1(35); deallocate mt_q1; -- ensure initplan params properly prune partitions -explain (analyze, costs off, summary off, timing off) select * from ma_test where a >= (select min(a) from ma_test_p2) order by a; +explain (analyze, costs off, summary off, timing off) select * from ma_test where a >= (select min(b) from ma_test_p2) order by b; reset enable_seqscan; reset enable_sort; -- 2.16.2.windows.1