diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index 511603b..223dfe1 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -150,10 +150,12 @@ clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids, * it will just duplicate its left input. * * This is true for a left join for which the join condition cannot match - * more than one inner-side row. (There are other possibly interesting - * cases, but we don't have the infrastructure to prove them.) We also - * have to check that the inner side doesn't generate any variables needed - * above the join. + * more than one inner-side row. We can also allow removal of joins to + * relations that may match more than one inner-side row if a DISTINCT or + * GROUP BY clause would subsequently remove any duplicates caused by the + * join. (There are other possibly interesting cases, but we don't have the + * infrastructure to prove them.) We also have to check that the inner side + * doesn't generate any variables needed above the join. */ static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) @@ -237,6 +239,22 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) } /* + * When a DISTINCT or GROUP BY clause is present, the unreferenced + * relation's join has no ability to duplicate rows in result set, as any + * duplicate rows would been removed by the DISTINCT/GROUP BY clause + * anyway. However, we're unable to apply this when aggregate functions + * are present as we must aggregate any duplicated rows. We needn't bother + * checking the actual distinct/grouping exprs here, as we already know + * from the above checks that no Var is present from the relation we're + * trying to remove. + */ + if (root->parse->distinctClause != NIL) + return true; + + if (root->parse->groupClause != NIL && !root->parse->hasAggs) + return true; + + /* * Search for mergejoinable clauses that constrain the inner rel against * either the outer rel or a pseudoconstant. If an operator is * mergejoinable then it behaves like equality for some btree opclass, so @@ -597,15 +615,25 @@ rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel) if (rel->rtekind == RTE_RELATION) { /* - * For a plain relation, we only know how to prove uniqueness by - * reference to unique indexes. Make sure there's at least one - * suitable unique index. It must be immediately enforced, and if - * it's a partial index, it must match the query. (Keep these - * conditions in sync with relation_has_unique_index_for!) + * For a plain relation, we can make use of DISTINCT or GROUP BY + * clauses as unique proofs. We also know how to prove uniqueness by + * reference to unique indexes. */ ListCell *lc; - foreach(lc, rel->indexlist) + if (root->parse->distinctClause != NIL) + return true; + + if (root->parse->groupClause != NIL && !root->parse->hasAggs) + return true; + + /* + * Make sure there's at least one suitable unique index. It must be + * immediately enforced, and if it's a partial index, it must match + * the query. (Keep these conditions in sync with + * relation_has_unique_index_for!) + */ + foreach(lc, rel->indexlist) { IndexOptInfo *ind = (IndexOptInfo *) lfirst(lc); diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index f47449b..cd25b45 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -4051,6 +4051,59 @@ select d.* from d left join (select id from a union select id from b) s Seq Scan on d (1 row) +-- check join removal works without a unique index on the left joined table +-- when a DISTINCT or GROUP BY is present which would remove row duplication +explain (costs off) +select distinct a.*,b.* from a left join b on a.b_id = b.id left join d + on a.b_id = d.a; + QUERY PLAN +----------------------------------------- + HashAggregate + Group Key: a.id, a.b_id, b.id, b.c_id + -> Hash Left Join + Hash Cond: (a.b_id = b.id) + -> Seq Scan on a + -> Hash + -> Seq Scan on b +(7 rows) + +-- as above but with GROUP BY +explain (costs off) +select a.id,b.id from a left join b on a.b_id = b.id left join d + on a.b_id = d.a group by a.id,b.id; + QUERY PLAN +------------------------------------ + HashAggregate + Group Key: a.id, b.id + -> Hash Left Join + Hash Cond: (a.b_id = b.id) + -> Seq Scan on a + -> Hash + -> Seq Scan on b +(7 rows) + +-- ensure no join removal when we must aggregate any duplicated rows +explain (costs off) +select a.id,b.id,count(*) from a left join b on a.b_id = b.id left join d + on a.b_id = d.a group by a.id,b.id; + QUERY PLAN +------------------------------------------------ + HashAggregate + Group Key: a.id, b.id + -> Merge Left Join + Merge Cond: (a.b_id = d.a) + -> Sort + Sort Key: a.b_id + -> Hash Left Join + Hash Cond: (a.b_id = b.id) + -> Seq Scan on a + -> Hash + -> Seq Scan on b + -> Sort + Sort Key: d.a + -> Seq Scan on d +(14 rows) + -- check join removal with a cross-type comparison operator explain (costs off) select i8.* from int8_tbl i8 left join (select f1 from int4_tbl group by f1) i4 diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index d847d53..f14e2e8 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -1331,6 +1331,22 @@ explain (costs off) select d.* from d left join (select id from a union select id from b) s on d.a = s.id; +-- check join removal works without a unique index on the left joined table +-- when a DISTINCT or GROUP BY is present which would remove row duplication +explain (costs off) +select distinct a.*,b.* from a left join b on a.b_id = b.id left join d + on a.b_id = d.a; + +-- as above but with GROUP BY +explain (costs off) +select a.id,b.id from a left join b on a.b_id = b.id left join d + on a.b_id = d.a group by a.id,b.id; + +-- ensure no join removal when we must aggregate any duplicated rows +explain (costs off) +select a.id,b.id,count(*) from a left join b on a.b_id = b.id left join d + on a.b_id = d.a group by a.id,b.id; + -- check join removal with a cross-type comparison operator explain (costs off) select i8.* from int8_tbl i8 left join (select f1 from int4_tbl group by f1) i4