From 066f0c0b60a738cb6766542b91c894905b672af2 Mon Sep 17 00:00:00 2001 From: Antonin Houska Date: Mon, 18 May 2020 14:28:31 +0200 Subject: [PATCH 3/3] Use also partial paths as the input for grouped paths. --- src/backend/optimizer/path/allpaths.c | 46 +++++- src/backend/optimizer/util/relnode.c | 46 +++--- src/test/regress/expected/agg_pushdown.out | 157 +++++++++++++++++++++ src/test/regress/sql/agg_pushdown.sql | 66 +++++++++ 4 files changed, 285 insertions(+), 30 deletions(-) diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 84a918dc58..e0b5418de4 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -128,7 +128,7 @@ static void set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte); static void add_grouped_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, AggStrategy aggstrategy, - RelAggInfo *agg_info); + RelAggInfo *agg_info, bool partial); static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist); static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery, pushdown_safety_info *safetyInfo); @@ -3021,6 +3021,7 @@ generate_grouping_paths(PlannerInfo *root, RelOptInfo *rel_grouped, RelOptInfo *rel_plain, RelAggInfo *agg_info) { ListCell *lc; + Path *path; if (IS_DUMMY_REL(rel_plain)) { @@ -3030,7 +3031,7 @@ generate_grouping_paths(PlannerInfo *root, RelOptInfo *rel_grouped, foreach(lc, rel_plain->pathlist) { - Path *path = (Path *) lfirst(lc); + path = (Path *) lfirst(lc); /* * Since the path originates from the non-grouped relation which is @@ -3044,7 +3045,8 @@ generate_grouping_paths(PlannerInfo *root, RelOptInfo *rel_grouped, * add_grouped_path() will check whether the path has suitable * pathkeys. */ - add_grouped_path(root, rel_grouped, path, AGG_SORTED, agg_info); + add_grouped_path(root, rel_grouped, path, AGG_SORTED, agg_info, + false); /* * Repeated creation of hash table (for new parameter values) should @@ -3052,12 +3054,38 @@ generate_grouping_paths(PlannerInfo *root, RelOptInfo *rel_grouped, * efficiency. */ if (path->param_info == NULL) - add_grouped_path(root, rel_grouped, path, AGG_HASHED, agg_info); + add_grouped_path(root, rel_grouped, path, AGG_HASHED, agg_info, + false); } /* Could not generate any grouped paths? */ if (rel_grouped->pathlist == NIL) + { mark_dummy_rel(rel_grouped); + return; + } + + /* + * Almost the same for partial paths. + * + * The difference is that parameterized paths are never created, see + * add_partial_path() for explanation. + */ + foreach(lc, rel_plain->partial_pathlist) + { + path = (Path *) lfirst(lc); + + if (path->param_info != NULL) + continue; + + path = (Path *) create_projection_path(root, rel_grouped, path, + agg_info->agg_input); + + add_grouped_path(root, rel_grouped, path, AGG_SORTED, agg_info, + true); + add_grouped_path(root, rel_grouped, path, AGG_HASHED, agg_info, + true); + } } /* @@ -3065,7 +3093,8 @@ generate_grouping_paths(PlannerInfo *root, RelOptInfo *rel_grouped, */ static void add_grouped_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, - AggStrategy aggstrategy, RelAggInfo *agg_info) + AggStrategy aggstrategy, RelAggInfo *agg_info, + bool partial) { Path *agg_path; @@ -3081,7 +3110,12 @@ add_grouped_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, /* Add the grouped path to the list of grouped base paths. */ if (agg_path != NULL) - add_path(rel, (Path *) agg_path); + { + if (!partial) + add_path(rel, (Path *) agg_path); + else + add_partial_path(rel, (Path *) agg_path); + } } /* diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 7460794b46..4164f4b0e4 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -958,33 +958,12 @@ build_join_rel(PlannerInfo *root, build_joinrel_partition_info(joinrel, outer_rel, inner_rel, restrictlist, sjinfo->jointype); + /* + * Set estimates of the joinrel's size. + */ if (!grouped) - { - /* - * Set estimates of the joinrel's size. - */ set_joinrel_size_estimates(root, joinrel, outer_rel, inner_rel, sjinfo, restrictlist); - - /* - * Set the consider_parallel flag if this joinrel could potentially be - * scanned within a parallel worker. If this flag is false for either - * inner_rel or outer_rel, then it must be false for the joinrel also. - * Even if both are true, there might be parallel-restricted - * expressions in the targetlist or quals. - * - * Note that if there are more than two rels in this relation, they - * could be divided between inner_rel and outer_rel in any arbitrary - * way. We assume this doesn't matter, because we should hit all the - * same baserels and joinclauses while building up to this joinrel no - * matter which we take; therefore, we should make the same decision - * here however we get here. - */ - if (inner_rel->consider_parallel && outer_rel->consider_parallel && - is_parallel_safe(root, (Node *) restrictlist) && - is_parallel_safe(root, (Node *) joinrel->reltarget->exprs)) - joinrel->consider_parallel = true; - } else { /* @@ -1000,6 +979,25 @@ build_join_rel(PlannerInfo *root, agg_info->input_rows, NULL); } + /* + * Set the consider_parallel flag if this joinrel could potentially be + * scanned within a parallel worker. If this flag is false for either + * inner_rel or outer_rel, then it must be false for the joinrel also. + * Even if both are true, there might be parallel-restricted expressions + * in the targetlist or quals. + * + * Note that if there are more than two rels in this relation, they could + * be divided between inner_rel and outer_rel in any arbitrary way. We + * assume this doesn't matter, because we should hit all the same baserels + * and joinclauses while building up to this joinrel no matter which we + * take; therefore, we should make the same decision here however we get + * here. + */ + if (inner_rel->consider_parallel && outer_rel->consider_parallel && + is_parallel_safe(root, (Node *) restrictlist) && + is_parallel_safe(root, (Node *) joinrel->reltarget->exprs)) + joinrel->consider_parallel = true; + /* Add the joinrel to the PlannerInfo. */ if (!grouped) add_join_rel(root, joinrel); diff --git a/src/test/regress/expected/agg_pushdown.out b/src/test/regress/expected/agg_pushdown.out index 413abc147e..66d36d122e 100644 --- a/src/test/regress/expected/agg_pushdown.out +++ b/src/test/regress/expected/agg_pushdown.out @@ -91,6 +91,7 @@ AS c1 ON c1.parent = p.i GROUP BY p.i; -> Seq Scan on agg_pushdown_child1 c1 (12 rows) +-- Restore the default values. SET enable_nestloop TO on; SET enable_hashjoin TO on; -- Scan index on agg_pushdown_child1(parent) column and aggregate the result @@ -213,3 +214,159 @@ c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i; -> Index Only Scan using agg_pushdown_parent_pkey on agg_pushdown_parent p (13 rows) +-- Most of the tests above with parallel query processing enforced. +SET min_parallel_index_scan_size = 0; +SET min_parallel_table_scan_size = 0; +SET parallel_setup_cost = 0; +SET parallel_tuple_cost = 0; +-- Partially aggregate a single relation. +-- +-- Nestloop join. +SET enable_nestloop TO on; +SET enable_hashjoin TO off; +SET enable_mergejoin TO off; +EXPLAIN (COSTS off) +SELECT p.x, avg(c1.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1 +AS c1 ON c1.parent = p.i GROUP BY p.i; + QUERY PLAN +------------------------------------------------------------------------------------------------------------ + Finalize GroupAggregate + Group Key: p.i + -> Gather Merge + Workers Planned: 1 + -> Nested Loop + -> Partial GroupAggregate + Group Key: c1.parent + -> Parallel Index Scan using agg_pushdown_child1_parent_idx on agg_pushdown_child1 c1 + -> Index Scan using agg_pushdown_parent_pkey on agg_pushdown_parent p + Index Cond: (i = c1.parent) +(10 rows) + +-- Hash join. +SET enable_nestloop TO off; +SET enable_hashjoin TO on; +EXPLAIN (COSTS off) +SELECT p.i, avg(c1.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1 +AS c1 ON c1.parent = p.i GROUP BY p.i; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------ + Finalize GroupAggregate + Group Key: p.i + -> Sort + Sort Key: p.i + -> Gather + Workers Planned: 1 + -> Parallel Hash Join + Hash Cond: (c1.parent = p.i) + -> Partial GroupAggregate + Group Key: c1.parent + -> Parallel Index Scan using agg_pushdown_child1_parent_idx on agg_pushdown_child1 c1 + -> Parallel Hash + -> Parallel Index Only Scan using agg_pushdown_parent_pkey on agg_pushdown_parent p +(13 rows) + +-- Merge join. +SET enable_hashjoin TO off; +SET enable_mergejoin TO on; +EXPLAIN (COSTS off) +SELECT p.i, avg(c1.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1 +AS c1 ON c1.parent = p.i GROUP BY p.i; + QUERY PLAN +------------------------------------------------------------------------------------------------------------ + Finalize GroupAggregate + Group Key: p.i + -> Gather Merge + Workers Planned: 1 + -> Merge Join + Merge Cond: (c1.parent = p.i) + -> Partial GroupAggregate + Group Key: c1.parent + -> Parallel Index Scan using agg_pushdown_child1_parent_idx on agg_pushdown_child1 c1 + -> Index Only Scan using agg_pushdown_parent_pkey on agg_pushdown_parent p +(10 rows) + +SET enable_nestloop TO on; +SET enable_hashjoin TO on; +-- Perform nestloop join between agg_pushdown_child1 and agg_pushdown_child2 +-- and aggregate the result. +SET enable_nestloop TO on; +SET enable_hashjoin TO off; +SET enable_mergejoin TO off; +EXPLAIN (COSTS off) +SELECT p.i, avg(c1.v + c2.v) FROM agg_pushdown_parent AS p JOIN +agg_pushdown_child1 AS c1 ON c1.parent = p.i JOIN agg_pushdown_child2 AS c2 ON +c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------ + Finalize GroupAggregate + Group Key: p.i + -> Gather Merge + Workers Planned: 2 + -> Sort + Sort Key: p.i + -> Nested Loop + -> Partial HashAggregate + Group Key: c1.parent + -> Nested Loop + -> Parallel Index Scan using agg_pushdown_child1_pkey on agg_pushdown_child1 c1 + -> Index Scan using agg_pushdown_child2_pkey on agg_pushdown_child2 c2 + Index Cond: ((k = c1.j) AND (parent = c1.parent)) + -> Index Only Scan using agg_pushdown_parent_pkey on agg_pushdown_parent p + Index Cond: (i = c1.parent) +(15 rows) + +-- The same for hash join. +SET enable_nestloop TO off; +SET enable_hashjoin TO on; +EXPLAIN (COSTS off) +SELECT p.i, avg(c1.v + c2.v) FROM agg_pushdown_parent AS p JOIN +agg_pushdown_child1 AS c1 ON c1.parent = p.i JOIN agg_pushdown_child2 AS c2 ON +c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------ + Finalize GroupAggregate + Group Key: p.i + -> Gather Merge + Workers Planned: 1 + -> Sort + Sort Key: p.i + -> Parallel Hash Join + Hash Cond: (c1.parent = p.i) + -> Partial HashAggregate + Group Key: c1.parent + -> Parallel Hash Join + Hash Cond: ((c1.parent = c2.parent) AND (c1.j = c2.k)) + -> Parallel Index Scan using agg_pushdown_child1_parent_idx on agg_pushdown_child1 c1 + -> Parallel Hash + -> Parallel Index Scan using agg_pushdown_child2_pkey on agg_pushdown_child2 c2 + -> Parallel Hash + -> Parallel Index Only Scan using agg_pushdown_parent_pkey on agg_pushdown_parent p +(17 rows) + +-- The same for merge join. +SET enable_hashjoin TO off; +SET enable_mergejoin TO on; +SET enable_seqscan TO off; +EXPLAIN (COSTS off) +SELECT p.i, avg(c1.v + c2.v) FROM agg_pushdown_parent AS p JOIN +agg_pushdown_child1 AS c1 ON c1.parent = p.i JOIN agg_pushdown_child2 AS c2 ON +c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------ + Finalize GroupAggregate + Group Key: p.i + -> Gather Merge + Workers Planned: 2 + -> Merge Join + Merge Cond: (c1.parent = p.i) + -> Sort + Sort Key: c1.parent + -> Partial HashAggregate + Group Key: c1.parent + -> Merge Join + Merge Cond: ((c1.j = c2.k) AND (c1.parent = c2.parent)) + -> Parallel Index Scan using agg_pushdown_child1_pkey on agg_pushdown_child1 c1 + -> Index Scan using agg_pushdown_child2_pkey on agg_pushdown_child2 c2 + -> Index Only Scan using agg_pushdown_parent_pkey on agg_pushdown_parent p +(15 rows) + diff --git a/src/test/regress/sql/agg_pushdown.sql b/src/test/regress/sql/agg_pushdown.sql index 6bbbc7a8a1..49ba6dd67c 100644 --- a/src/test/regress/sql/agg_pushdown.sql +++ b/src/test/regress/sql/agg_pushdown.sql @@ -61,6 +61,7 @@ EXPLAIN (COSTS off) SELECT p.i, avg(c1.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1 AS c1 ON c1.parent = p.i GROUP BY p.i; +-- Restore the default values. SET enable_nestloop TO on; SET enable_hashjoin TO on; @@ -112,3 +113,68 @@ EXPLAIN (COSTS off) SELECT p.i, avg(c1.v + c2.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1 AS c1 ON c1.parent = p.i JOIN agg_pushdown_child2 AS c2 ON c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i; + +-- Most of the tests above with parallel query processing enforced. +SET min_parallel_index_scan_size = 0; +SET min_parallel_table_scan_size = 0; +SET parallel_setup_cost = 0; +SET parallel_tuple_cost = 0; + +-- Partially aggregate a single relation. +-- +-- Nestloop join. +SET enable_nestloop TO on; +SET enable_hashjoin TO off; +SET enable_mergejoin TO off; +EXPLAIN (COSTS off) +SELECT p.x, avg(c1.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1 +AS c1 ON c1.parent = p.i GROUP BY p.i; + +-- Hash join. +SET enable_nestloop TO off; +SET enable_hashjoin TO on; + +EXPLAIN (COSTS off) +SELECT p.i, avg(c1.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1 +AS c1 ON c1.parent = p.i GROUP BY p.i; + +-- Merge join. +SET enable_hashjoin TO off; +SET enable_mergejoin TO on; + +EXPLAIN (COSTS off) +SELECT p.i, avg(c1.v) FROM agg_pushdown_parent AS p JOIN agg_pushdown_child1 +AS c1 ON c1.parent = p.i GROUP BY p.i; + +SET enable_nestloop TO on; +SET enable_hashjoin TO on; + +-- Perform nestloop join between agg_pushdown_child1 and agg_pushdown_child2 +-- and aggregate the result. +SET enable_nestloop TO on; +SET enable_hashjoin TO off; +SET enable_mergejoin TO off; + +EXPLAIN (COSTS off) +SELECT p.i, avg(c1.v + c2.v) FROM agg_pushdown_parent AS p JOIN +agg_pushdown_child1 AS c1 ON c1.parent = p.i JOIN agg_pushdown_child2 AS c2 ON +c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i; + +-- The same for hash join. +SET enable_nestloop TO off; +SET enable_hashjoin TO on; + +EXPLAIN (COSTS off) +SELECT p.i, avg(c1.v + c2.v) FROM agg_pushdown_parent AS p JOIN +agg_pushdown_child1 AS c1 ON c1.parent = p.i JOIN agg_pushdown_child2 AS c2 ON +c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i; + +-- The same for merge join. +SET enable_hashjoin TO off; +SET enable_mergejoin TO on; +SET enable_seqscan TO off; + +EXPLAIN (COSTS off) +SELECT p.i, avg(c1.v + c2.v) FROM agg_pushdown_parent AS p JOIN +agg_pushdown_child1 AS c1 ON c1.parent = p.i JOIN agg_pushdown_child2 AS c2 ON +c2.parent = p.i WHERE c1.j = c2.k GROUP BY p.i; -- 2.20.1