From 32dd4d7cd0306371a0b3116008aa783db5e472ca Mon Sep 17 00:00:00 2001 From: David Rowley Date: Fri, 2 Mar 2018 01:46:33 +1300 Subject: [PATCH v14 5/5] Allow MergeAppend's subnodes to be pruned during execution Already supported for Append nodes, this commit allows partition pruning to occur in MergeAppend using values which are only known during execution. --- src/backend/executor/nodeMergeAppend.c | 52 ++++++++-- src/backend/nodes/copyfuncs.c | 1 + src/backend/nodes/outfuncs.c | 2 + src/backend/nodes/readfuncs.c | 1 + src/backend/optimizer/plan/createplan.c | 34 +++++++ src/include/nodes/execnodes.h | 6 ++ src/include/nodes/plannodes.h | 4 + src/test/regress/expected/partition_prune.out | 137 ++++++++++++++++++++++++++ src/test/regress/sql/partition_prune.sql | 38 +++++++ 9 files changed, 269 insertions(+), 6 deletions(-) diff --git a/src/backend/executor/nodeMergeAppend.c b/src/backend/executor/nodeMergeAppend.c index 118f4ef..eb55596 100644 --- a/src/backend/executor/nodeMergeAppend.c +++ b/src/backend/executor/nodeMergeAppend.c @@ -39,6 +39,7 @@ #include "postgres.h" #include "executor/execdebug.h" +#include "executor/execPartition.h" #include "executor/nodeMergeAppend.h" #include "lib/binaryheap.h" #include "miscadmin.h" @@ -100,9 +101,6 @@ ExecInitMergeAppend(MergeAppend *node, EState *estate, int eflags) /* * Miscellaneous initialization - * - * MergeAppend plans don't have expression contexts because they never - * call ExecQual or ExecProject. */ /* @@ -153,6 +151,28 @@ ExecInitMergeAppend(MergeAppend *node, EState *estate, int eflags) PrepareSortSupportFromOrderingOp(node->sortOperators[i], sortKey); } + if (node->part_prune_infos != NIL) + { + ExecAssignExprContext(estate, &mergestate->ps); + + /* + * When run-time partition pruning is enabled, setup the pruning data + * structure. + */ + mergestate->partition_pruning = + ExecSetupPartitionPruning(&mergestate->ps, + node->part_prune_infos); + } + else + { + /* + * When run-time partition pruning is not enabled we can just mark + * all subplans as valid. + */ + mergestate->ms_valid_subplans = bms_add_range(NULL, 0, nplans - 1); + mergestate->partition_pruning = NULL; + } + /* * initialize to show we have not run the subplans yet */ @@ -178,11 +198,18 @@ ExecMergeAppend(PlanState *pstate) if (!node->ms_initialized) { + + /* Determine minimum set of matching partitions, if not already set */ + if (node->ms_valid_subplans == NULL) + node->ms_valid_subplans = + ExecFindMatchingSubPlans(node->partition_pruning); + /* - * First time through: pull the first tuple from each subplan, and set - * up the heap. + * First time through: pull the first tuple from each valid subplan, + * and set up the heap. */ - for (i = 0; i < node->ms_nplans; i++) + i = -1; + while ((i = bms_next_member(node->ms_valid_subplans, i)) >= 0) { node->ms_slots[i] = ExecProcNode(node->mergeplans[i]); if (!TupIsNull(node->ms_slots[i])) @@ -295,6 +322,19 @@ ExecReScanMergeAppend(MergeAppendState *node) { int i; + /* + * If any of the parameters being used for partition pruning have changed, + * then we'd better unset the valid subplans so that they are reselected + * for the new parameter values. + */ + if (node->partition_pruning && + bms_overlap(node->ps.chgParam, + node->partition_pruning->prune_param_ids)) + { + bms_free(node->ms_valid_subplans); + node->ms_valid_subplans = NULL; + } + for (i = 0; i < node->ms_nplans; i++) { PlanState *subnode = node->mergeplans[i]; diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 65e02a2..80cc265 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -272,6 +272,7 @@ _copyMergeAppend(const MergeAppend *from) COPY_POINTER_FIELD(sortOperators, from->numCols * sizeof(Oid)); COPY_POINTER_FIELD(collations, from->numCols * sizeof(Oid)); COPY_POINTER_FIELD(nullsFirst, from->numCols * sizeof(bool)); + COPY_NODE_FIELD(part_prune_infos); return newnode; } diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index a827cc6..1ada14d 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -433,6 +433,8 @@ _outMergeAppend(StringInfo str, const MergeAppend *node) appendStringInfoString(str, " :nullsFirst"); for (i = 0; i < node->numCols; i++) appendStringInfo(str, " %s", booltostr(node->nullsFirst[i])); + + WRITE_NODE_FIELD(part_prune_infos); } static void diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index fa869a0..f403231 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -1645,6 +1645,7 @@ _readMergeAppend(void) READ_OID_ARRAY(sortOperators, local_node->numCols); READ_OID_ARRAY(collations, local_node->numCols); READ_BOOL_ARRAY(nullsFirst, local_node->numCols); + READ_NODE_FIELD(part_prune_infos); READ_DONE(); } diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 1a788eb..eba525b 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -1125,6 +1125,8 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path) List *pathkeys = best_path->path.pathkeys; List *subplans = NIL; ListCell *subpaths; + RelOptInfo *rel = best_path->path.parent; + List *partpruneinfos = NIL; /* * We don't have the actual creation of the MergeAppend node split out @@ -1210,8 +1212,40 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path) subplans = lappend(subplans, subplan); } + if (rel->reloptkind == RELOPT_BASEREL && + best_path->partitioned_rels != NIL) + { + List *prunequal; + + prunequal = extract_actual_clauses(rel->baserestrictinfo, false); + + if (best_path->path.param_info) + { + + List *prmquals = best_path->path.param_info->ppi_clauses; + + prmquals = extract_actual_clauses(prmquals, false); + prmquals = (List *) replace_nestloop_params(root, + (Node *) prmquals); + + prunequal = list_concat(prunequal, prmquals); + } + + /* + * If any quals exist, then these may be useful to allow us to perform + * further partition pruning during execution. We'll generate a + * PartitionPruneInfo for each partitioned rel to store these quals + * and allow translation of partition indexes into subpath indexes. + */ + if (prunequal != NIL) + partpruneinfos = make_partition_pruneinfo(root, rel, + best_path->partitioned_rels, + best_path->subpaths, prunequal); + } + node->partitioned_rels = best_path->partitioned_rels; node->mergeplans = subplans; + node->part_prune_infos = partpruneinfos; return (Plan *) node; } diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 4141f34..70e6db7 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1046,6 +1046,10 @@ struct AppendState * slots current output tuple of each subplan * heap heap of active tuples * initialized true if we have fetched first tuple from each subplan + * partition_pruning details required to allow partitions to be + * eliminated from the scan, or NULL if not possible. + * ms_valid_subplans for runtime pruning, valid mergeplans indexes to + * scan. * ---------------- */ typedef struct MergeAppendState @@ -1058,6 +1062,8 @@ typedef struct MergeAppendState TupleTableSlot **ms_slots; /* array of length ms_nplans */ struct binaryheap *ms_heap; /* binary heap of slot indices */ bool ms_initialized; /* are subplans started? */ + PartitionPruning *partition_pruning; + Bitmapset *ms_valid_subplans; } MergeAppendState; /* ---------------- diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index c94937edae..25e9574 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -274,6 +274,10 @@ typedef struct MergeAppend Oid *sortOperators; /* OIDs of operators to sort them by */ Oid *collations; /* OIDs of collations */ bool *nullsFirst; /* NULLS FIRST/LAST directions */ + /* + * Mapping details for run-time subplan pruning, one per partitioned_rels + */ + List *part_prune_infos; } MergeAppend; /* ---------------- diff --git a/src/test/regress/expected/partition_prune.out b/src/test/regress/expected/partition_prune.out index 424b3a7..1875628 100644 --- a/src/test/regress/expected/partition_prune.out +++ b/src/test/regress/expected/partition_prune.out @@ -2590,3 +2590,140 @@ select * from boolp where a = (select value from boolvalues where not value); Filter: (a = $0) (9 rows) +-- +-- Test run-time pruning of MergeAppend subnodes +-- +set enable_seqscan = off; +set enable_sort = off; +create table ma_test (a int) partition by range (a); +create table ma_test_p1 partition of ma_test for values from (0) to (10); +create table ma_test_p2 partition of ma_test for values from (10) to (20); +create table ma_test_p3 partition of ma_test for values from (20) to (30); +insert into ma_test select x from generate_series(0,29) t(x); +create index on ma_test (a); +analyze ma_test; +prepare mt_q1 (int) as select * from ma_test where a >= $1 and a % 10 = 5 order by a; +-- Execute query 5 times to allow choose_custom_plan +-- to start considering a generic plan. +execute mt_q1(0); + a +---- + 5 + 15 + 25 +(3 rows) + +execute mt_q1(0); + a +---- + 5 + 15 + 25 +(3 rows) + +execute mt_q1(0); + a +---- + 5 + 15 + 25 +(3 rows) + +execute mt_q1(0); + a +---- + 5 + 15 + 25 +(3 rows) + +execute mt_q1(0); + a +---- + 5 + 15 + 25 +(3 rows) + +explain (analyze, costs off, summary off, timing off) execute mt_q1(15); + QUERY PLAN +------------------------------------------------------------------------------------ + Merge Append (actual rows=2 loops=1) + Sort Key: ma_test_p1.a + -> Index Only Scan using ma_test_p1_a_idx on ma_test_p1 (never executed) + Index Cond: (a >= $1) + Filter: ((a % 10) = 5) + Heap Fetches: 0 + -> Index Only Scan using ma_test_p2_a_idx on ma_test_p2 (actual rows=1 loops=1) + Index Cond: (a >= $1) + Filter: ((a % 10) = 5) + Rows Removed by Filter: 4 + Heap Fetches: 5 + -> Index Only Scan using ma_test_p3_a_idx on ma_test_p3 (actual rows=1 loops=1) + Index Cond: (a >= $1) + Filter: ((a % 10) = 5) + Rows Removed by Filter: 9 + Heap Fetches: 10 +(16 rows) + +execute mt_q1(15); + a +---- + 15 + 25 +(2 rows) + +explain (analyze, costs off, summary off, timing off) execute mt_q1(25); + QUERY PLAN +------------------------------------------------------------------------------------ + Merge Append (actual rows=1 loops=1) + Sort Key: ma_test_p1.a + -> Index Only Scan using ma_test_p1_a_idx on ma_test_p1 (never executed) + Index Cond: (a >= $1) + Filter: ((a % 10) = 5) + Heap Fetches: 0 + -> Index Only Scan using ma_test_p2_a_idx on ma_test_p2 (never executed) + Index Cond: (a >= $1) + Filter: ((a % 10) = 5) + Heap Fetches: 0 + -> Index Only Scan using ma_test_p3_a_idx on ma_test_p3 (actual rows=1 loops=1) + Index Cond: (a >= $1) + Filter: ((a % 10) = 5) + Rows Removed by Filter: 4 + Heap Fetches: 5 +(15 rows) + +execute mt_q1(25); + a +---- + 25 +(1 row) + +deallocate mt_q1; +-- ensure initplan params properly prune partitions +explain (analyze, costs off, summary off, timing off) select * from ma_test where a >= (select min(a) from ma_test_p2) order by a; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------- + Merge Append (actual rows=20 loops=1) + Sort Key: ma_test_p1.a + InitPlan 2 (returns $1) + -> Result (actual rows=1 loops=1) + InitPlan 1 (returns $0) + -> Limit (actual rows=1 loops=1) + -> Index Only Scan using ma_test_p2_a_idx on ma_test_p2 ma_test_p2_1 (actual rows=1 loops=1) + Index Cond: (a IS NOT NULL) + Heap Fetches: 1 + -> Index Only Scan using ma_test_p1_a_idx on ma_test_p1 (never executed) + Index Cond: (a >= $1) + Heap Fetches: 0 + -> Index Only Scan using ma_test_p2_a_idx on ma_test_p2 (actual rows=10 loops=1) + Index Cond: (a >= $1) + Heap Fetches: 10 + -> Index Only Scan using ma_test_p3_a_idx on ma_test_p3 (actual rows=10 loops=1) + Index Cond: (a >= $1) + Heap Fetches: 10 +(18 rows) + +reset enable_seqscan; +reset enable_sort; +drop table ma_test; diff --git a/src/test/regress/sql/partition_prune.sql b/src/test/regress/sql/partition_prune.sql index 73b4d10..1f31204 100644 --- a/src/test/regress/sql/partition_prune.sql +++ b/src/test/regress/sql/partition_prune.sql @@ -565,3 +565,41 @@ select * from boolp where a = (select value from boolvalues where value); explain (analyze, costs off, summary off, timing off) select * from boolp where a = (select value from boolvalues where not value); + +-- +-- Test run-time pruning of MergeAppend subnodes +-- +set enable_seqscan = off; +set enable_sort = off; +create table ma_test (a int) partition by range (a); +create table ma_test_p1 partition of ma_test for values from (0) to (10); +create table ma_test_p2 partition of ma_test for values from (10) to (20); +create table ma_test_p3 partition of ma_test for values from (20) to (30); +insert into ma_test select x from generate_series(0,29) t(x); +create index on ma_test (a); + +analyze ma_test; +prepare mt_q1 (int) as select * from ma_test where a >= $1 and a % 10 = 5 order by a; + +-- Execute query 5 times to allow choose_custom_plan +-- to start considering a generic plan. +execute mt_q1(0); +execute mt_q1(0); +execute mt_q1(0); +execute mt_q1(0); +execute mt_q1(0); + +explain (analyze, costs off, summary off, timing off) execute mt_q1(15); +execute mt_q1(15); +explain (analyze, costs off, summary off, timing off) execute mt_q1(25); +execute mt_q1(25); + +deallocate mt_q1; + +-- ensure initplan params properly prune partitions +explain (analyze, costs off, summary off, timing off) select * from ma_test where a >= (select min(a) from ma_test_p2) order by a; + +reset enable_seqscan; +reset enable_sort; + +drop table ma_test; -- 1.9.5.msysgit.1