From d720c0b37b28fea3afa66233ab467afeabe9dd77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B8=80=E6=8C=83?= Date: Wed, 6 May 2020 16:04:10 +0800 Subject: [PATCH v7 4/6] Remove distinct node & AggNode if the input is unique already. For the AggNode removal, currently we have to check if there is no agg function in targetlist. --- src/backend/optimizer/plan/planner.c | 30 +- src/test/regress/expected/aggregates.out | 73 ++-- src/test/regress/expected/select_distinct.out | 335 ++++++++++++++++++ src/test/regress/sql/aggregates.sql | 6 +- src/test/regress/sql/select_distinct.sql | 118 ++++++ 5 files changed, 512 insertions(+), 50 deletions(-) diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 654fa1fe35..52fa7c5a45 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -3812,7 +3812,27 @@ create_grouping_paths(PlannerInfo *root, { Query *parse = root->parse; RelOptInfo *grouped_rel; + bool group_unique_input = false; RelOptInfo *partially_grouped_rel; + List *groupExprs = NIL; + + if (root->parse->groupingSets == NIL) + { + groupExprs = get_sortgrouplist_exprs(parse->groupClause, + parse->targetList); + /* + * If the groupby clauses is unique already, groupping node is not necessary + * if there is no aggregation functions. + */ + group_unique_input = relation_has_uniquekeys_for(root, input_rel, + groupExprs, false); + if (group_unique_input && + groupExprs != NIL && + !parse->hasAggs && + !parse->hasWindowFuncs && + parse->havingQual == NULL) + return input_rel; + } /* * Create grouping relation to hold fully aggregated grouping and/or @@ -4739,6 +4759,12 @@ create_distinct_paths(PlannerInfo *root, bool allow_hash; Path *path; ListCell *lc; + List *distinctExprs = get_sortgrouplist_exprs(parse->distinctClause, + parse->targetList); + + /* If the result is unique already, we just return the input_rel directly */ + if (relation_has_uniquekeys_for(root, input_rel, distinctExprs, false)) + return input_rel; /* For now, do all work in the (DISTINCT, NULL) upperrel */ distinct_rel = fetch_upper_rel(root, UPPERREL_DISTINCT, NULL); @@ -4776,10 +4802,6 @@ create_distinct_paths(PlannerInfo *root, /* * Otherwise, the UNIQUE filter has effects comparable to GROUP BY. */ - List *distinctExprs; - - distinctExprs = get_sortgrouplist_exprs(parse->distinctClause, - parse->targetList); numDistinctRows = estimate_num_groups(root, distinctExprs, cheapest_input_path->rows, NULL); diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index 14cdcfcca6..42bd180895 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -870,14 +870,12 @@ explain (costs off) select distinct max(unique2) from tenk1; QUERY PLAN --------------------------------------------------------------------- - HashAggregate - Group Key: $0 + Result InitPlan 1 (returns $0) -> Limit -> Index Only Scan Backward using tenk1_unique2 on tenk1 Index Cond: (unique2 IS NOT NULL) - -> Result -(7 rows) +(5 rows) select distinct max(unique2) from tenk1; max @@ -1036,7 +1034,7 @@ explain (costs off) select distinct min(f1), max(f1) from minmaxtest; QUERY PLAN --------------------------------------------------------------------------------------------- - Unique + Result InitPlan 1 (returns $0) -> Limit -> Merge Append @@ -1059,10 +1057,7 @@ explain (costs off) -> Index Only Scan using minmaxtest2i on minmaxtest2 minmaxtest_8 Index Cond: (f1 IS NOT NULL) -> Index Only Scan Backward using minmaxtest3i on minmaxtest3 minmaxtest_9 - -> Sort - Sort Key: ($0), ($1) - -> Result -(26 rows) +(23 rows) select distinct min(f1), max(f1) from minmaxtest; min | max @@ -1092,12 +1087,10 @@ create temp table t2 (x int, y int, z int, primary key (x, y)); create temp table t3 (a int, b int, c int, primary key(a, b) deferrable); -- Non-primary-key columns can be removed from GROUP BY explain (costs off) select * from t1 group by a,b,c,d; - QUERY PLAN ----------------------- - HashAggregate - Group Key: a, b - -> Seq Scan on t1 -(3 rows) + QUERY PLAN +---------------- + Seq Scan on t1 +(1 row) -- No removal can happen if the complete PK is not present in GROUP BY explain (costs off) select a,c from t1 group by a,c,d; @@ -1109,29 +1102,27 @@ explain (costs off) select a,c from t1 group by a,c,d; (3 rows) -- Test removal across multiple relations -explain (costs off) select * +explain (costs off) select t2.* from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y -group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.y,t2.z; - QUERY PLAN ------------------------------------------------------- - HashAggregate - Group Key: t1.a, t1.b, t2.x, t2.y - -> Hash Join - Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b)) - -> Seq Scan on t2 - -> Hash - -> Seq Scan on t1 -(7 rows) +group by t1.a,t1.c,t1.d,t2.x,t2.y,t2.z; + QUERY PLAN +------------------------------------------------ + Hash Join + Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b)) + -> Seq Scan on t2 + -> Hash + -> Seq Scan on t1 +(5 rows) -- Test case where t1 can be optimized but not t2 explain (costs off) select t1.*,t2.x,t2.z -from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y +from t1 right join t2 on t1.a = t2.x and t1.b = t2.y group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.z; QUERY PLAN ------------------------------------------------------ HashAggregate Group Key: t1.a, t1.b, t2.x, t2.z - -> Hash Join + -> Hash Left Join Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b)) -> Seq Scan on t2 -> Hash @@ -1161,12 +1152,10 @@ explain (costs off) select * from t1 group by a,b,c,d; -- Okay to remove columns if we're only querying the parent. explain (costs off) select * from only t1 group by a,b,c,d; - QUERY PLAN ----------------------- - HashAggregate - Group Key: a, b - -> Seq Scan on t1 -(3 rows) + QUERY PLAN +---------------- + Seq Scan on t1 +(1 row) create temp table p_t1 ( a int, @@ -1179,14 +1168,12 @@ create temp table p_t1_1 partition of p_t1 for values in(1); create temp table p_t1_2 partition of p_t1 for values in(2); -- Ensure we can remove non-PK columns for partitioned tables. explain (costs off) select * from p_t1 group by a,b,c,d; - QUERY PLAN --------------------------------- - HashAggregate - Group Key: p_t1.a, p_t1.b - -> Append - -> Seq Scan on p_t1_1 - -> Seq Scan on p_t1_2 -(5 rows) + QUERY PLAN +-------------------------- + Append + -> Seq Scan on p_t1_1 + -> Seq Scan on p_t1_2 +(3 rows) drop table t1 cascade; NOTICE: drop cascades to table t1c diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out index 11c6f50fbf..eeee17b468 100644 --- a/src/test/regress/expected/select_distinct.out +++ b/src/test/regress/expected/select_distinct.out @@ -306,3 +306,338 @@ SELECT null IS NOT DISTINCT FROM null as "yes"; t (1 row) +CREATE TABLE uqk1(a int, pk int primary key, c int, d int); +CREATE TABLE uqk2(a int, pk int primary key, c int, d int); +INSERT INTO uqk1 VALUES(1, 1, 1, 1), (2, 2, 2, 2), (3, 3, 3, 3); +INSERT INTO uqk2 VALUES(1, 1, 1, 1), (4, 4, 4, 4), (5, 5, 5, 5); +ANALYZE uqk1; +ANALYZE uqk2; +-- Test single table +EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM uqk1; + QUERY PLAN +------------------ + Seq Scan on uqk1 +(1 row) + +EXPLAIN (COSTS OFF) SELECT DISTINCT c, d FROM uqk1; + QUERY PLAN +------------------------------ + Unique + -> Sort + Sort Key: c, d + -> Seq Scan on uqk1 +(4 rows) + +CREATE UNIQUE INDEX uqk1_ukcd ON uqk1(c, d); +EXPLAIN (COSTS OFF) SELECT DISTINCT c, d FROM uqk1; + QUERY PLAN +------------------------------ + Unique + -> Sort + Sort Key: c, d + -> Seq Scan on uqk1 +(4 rows) + +EXPLAIN (COSTS OFF) SELECT DISTINCT c, d FROM uqk1 WHERE c is NOT NULL; + QUERY PLAN +--------------------------------------- + Unique + -> Sort + Sort Key: c, d + -> Seq Scan on uqk1 + Filter: (c IS NOT NULL) +(5 rows) + +ALTER TABLE uqk1 ALTER COLUMN d SET NOT NULL; +EXPLAIN (COSTS OFF) SELECT DISTINCT c, d FROM uqk1 WHERE c is NOT NULL; + QUERY PLAN +--------------------------- + Seq Scan on uqk1 + Filter: (c IS NOT NULL) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT DISTINCT d FROM uqk1 WHERE c is NOT NULL; + QUERY PLAN +--------------------------------- + HashAggregate + Group Key: d + -> Seq Scan on uqk1 + Filter: (c IS NOT NULL) +(4 rows) + +EXPLAIN (COSTS OFF) SELECT DISTINCT a FROM uqk1 WHERE pk = 1; + QUERY PLAN +-------------------- + Seq Scan on uqk1 + Filter: (pk = 1) +(2 rows) + +-- Test join +-- both uqk1 (c, d) and uqk2(pk) are unique key, so distinct is not needed. +EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.c, uqk1.d FROM uqk1, uqk2 +WHERE uqk1.c is NOT NULL AND uqk1.a = uqk2.pk; + QUERY PLAN +--------------------------------- + Hash Join + Hash Cond: (uqk1.a = uqk2.pk) + -> Seq Scan on uqk1 + Filter: (c IS NOT NULL) + -> Hash + -> Seq Scan on uqk2 +(6 rows) + +-- Distinct is needed since the outer join allows multi null values. +EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.pk FROM uqk1 RIGHT JOIN uqk2 ON (uqk1.a = uqk2.pk) +order BY 1; + QUERY PLAN +--------------------------------------------- + Unique + -> Sort + Sort Key: uqk1.pk + -> Hash Right Join + Hash Cond: (uqk1.a = uqk2.pk) + -> Seq Scan on uqk1 + -> Hash + -> Seq Scan on uqk2 +(8 rows) + +SELECT DISTINCT uqk1.pk FROM uqk1 RIGHT JOIN uqk2 ON (uqk1.a = uqk2.pk) order BY 1; + pk +---- + 1 + +(2 rows) + +-- Distinct is not needed since uqk1 is the left table in outer join +EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.c, uqk1.d FROM uqk1 LEFT JOIN uqk2 ON (uqk1.a = uqk2.pk) +WHERE uqk1.c is NOT NULL order BY 1, 2; + QUERY PLAN +--------------------------------- + Sort + Sort Key: uqk1.c, uqk1.d + -> Seq Scan on uqk1 + Filter: (c IS NOT NULL) +(4 rows) + +SELECT DISTINCT uqk1.c, uqk1.d FROM uqk1 LEFT JOIN uqk2 ON (uqk1.a = uqk2.pk) +WHERE uqk1.c is NOT NULL order BY 1, 2; + c | d +---+--- + 1 | 1 + 2 | 2 + 3 | 3 +(3 rows) + +-- The combination of UniqueKey is still unique no mather the join clause +EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM uqk1, uqk2; + QUERY PLAN +------------------------------ + Nested Loop + -> Seq Scan on uqk1 + -> Materialize + -> Seq Scan on uqk2 +(4 rows) + +SELECT DISTINCT * FROM uqk1, uqk2 order BY 1, 2, 3, 4, 5, 6; + a | pk | c | d | a | pk | c | d +---+----+---+---+---+----+---+--- + 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 + 1 | 1 | 1 | 1 | 4 | 4 | 4 | 4 + 1 | 1 | 1 | 1 | 5 | 5 | 5 | 5 + 2 | 2 | 2 | 2 | 1 | 1 | 1 | 1 + 2 | 2 | 2 | 2 | 4 | 4 | 4 | 4 + 2 | 2 | 2 | 2 | 5 | 5 | 5 | 5 + 3 | 3 | 3 | 3 | 1 | 1 | 1 | 1 + 3 | 3 | 3 | 3 | 4 | 4 | 4 | 4 + 3 | 3 | 3 | 3 | 5 | 5 | 5 | 5 +(9 rows) + +-- Test Semi/Anti JOIN +EXPLAIN (COSTS OFF) SELECT DISTINCT pk FROM uqk1 WHERE d in (SELECT d FROM uqk2); + QUERY PLAN +-------------------------------- + Hash Semi Join + Hash Cond: (uqk1.d = uqk2.d) + -> Seq Scan on uqk1 + -> Hash + -> Seq Scan on uqk2 +(5 rows) + +EXPLAIN (COSTS OFF) SELECT DISTINCT pk FROM uqk1 WHERE d NOT in (SELECT d FROM uqk2); + QUERY PLAN +------------------------------------ + Seq Scan on uqk1 + Filter: (NOT (hashed SubPlan 1)) + SubPlan 1 + -> Seq Scan on uqk2 +(4 rows) + +-- Test Unique Key FOR one-row case, DISTINCT is NOT needed as well. +-- uqk1.d is the a uniquekey due to onerow rule. uqk2.pk is pk +EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.d FROM uqk1, uqk2 WHERE uqk1.pk = 2 AND uqk1.d = uqk2.pk; + QUERY PLAN +----------------------------------- + Nested Loop + Join Filter: (uqk1.d = uqk2.pk) + -> Seq Scan on uqk1 + Filter: (pk = 2) + -> Seq Scan on uqk2 +(5 rows) + +SELECT DISTINCT uqk1.d FROM uqk1, uqk2 WHERE uqk1.pk = 2 AND uqk1.d = uqk2.pk order BY 1; + d +--- +(0 rows) + +-- Both uqk1.d AND uqk2.c are the a uniquekey due to onerow rule +EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.d FROM uqk1, uqk2 WHERE uqk1.pk = 2 +AND uqk2.pk = 1 AND uqk1.d = uqk2.d ; + QUERY PLAN +---------------------------------- + Nested Loop + Join Filter: (uqk1.d = uqk2.d) + -> Seq Scan on uqk1 + Filter: (pk = 2) + -> Seq Scan on uqk2 + Filter: (pk = 1) +(6 rows) + +SELECT DISTINCT uqk1.d FROM uqk1, uqk2 WHERE uqk1.pk = 2 AND uqk2.pk = 1 +AND uqk1.d = uqk2.d order BY 1; + d +--- +(0 rows) + +-- Both UniqueKey in targetList, so distinct is not needed. +EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.c, uqk2.c FROM uqk1, uqk2 WHERE uqk1.pk = 2 AND uqk2.pk = 1; + QUERY PLAN +-------------------------- + Nested Loop + -> Seq Scan on uqk1 + Filter: (pk = 2) + -> Seq Scan on uqk2 + Filter: (pk = 1) +(5 rows) + +SELECT DISTINCT uqk1.c, uqk2.c FROM uqk1, uqk2 WHERE uqk1.pk = 2 AND uqk2.pk = 1 order BY 1, 2; + c | c +---+--- + 2 | 1 +(1 row) + +-- Test SubQuery +-- t2(a, c) is UNIQUE key because of group by +EXPLAIN (COSTS OFF) SELECT DISTINCT t2.a, t2.c FROM uqk1 t1 inner JOIN +(SELECT a, c, sum(pk) as t2b FROM uqk2 group BY a, c) t2 +ON (t2.t2b = t1.pk); + QUERY PLAN +--------------------------------------- + Hash Join + Hash Cond: ((sum(uqk2.pk)) = t1.pk) + -> HashAggregate + Group Key: uqk2.a, uqk2.c + -> Seq Scan on uqk2 + -> Hash + -> Seq Scan on uqk1 t1 +(7 rows) + +-- Test Partition TABLE +-- Test partitioned TABLE +CREATE TABLE dist_p (a int, b int NOT NULL, c int NOT NULL, d int) PARTITION BY RANGE (b); +CREATE TABLE dist_p0 PARTITION OF dist_p FOR VALUES FROM (1) to (10); +CREATE TABLE dist_p1 PARTITION OF dist_p FOR VALUES FROM (11) to (20); +-- CREATE unqiue INDEX ON dist_p +CREATE UNIQUE INDEX dist_p_uk_b_c ON dist_p(b, c); +EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM dist_p; + QUERY PLAN +------------------------------------ + Append + -> Seq Scan on dist_p0 dist_p_1 + -> Seq Scan on dist_p1 dist_p_2 +(3 rows) + +DROP INDEX dist_p_uk_b_c; +-- we also support CREATE unqiue INDEX ON each child tables +CREATE UNIQUE INDEX dist_p0_uk_bc ON dist_p0(b, c); +-- NOT ok, since dist_p1 have no such INDEX +EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM dist_p; + QUERY PLAN +----------------------------------------------------- + HashAggregate + Group Key: dist_p.a, dist_p.b, dist_p.c, dist_p.d + -> Append + -> Seq Scan on dist_p0 dist_p_1 + -> Seq Scan on dist_p1 dist_p_2 +(5 rows) + +CREATE UNIQUE INDEX dist_p1_uk_bc ON dist_p1(b, c); +-- OK now +EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM dist_p; + QUERY PLAN +------------------------------------ + Append + -> Seq Scan on dist_p0 dist_p_1 + -> Seq Scan on dist_p1 dist_p_2 +(3 rows) + +DROP INDEX dist_p0_uk_bc; +DROP INDEX dist_p1_uk_bc; +-- uk is same ON all child tables, however it doesn't include the partkey, so NOT ok as well. +CREATE UNIQUE INDEX dist_p0_uk_c ON dist_p0(c); +CREATE UNIQUE INDEX dist_p1_uk_c ON dist_p1(c); +EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM dist_p; + QUERY PLAN +----------------------------------------------------- + HashAggregate + Group Key: dist_p.a, dist_p.b, dist_p.c, dist_p.d + -> Append + -> Seq Scan on dist_p0 dist_p_1 + -> Seq Scan on dist_p1 dist_p_2 +(5 rows) + +DROP TABLE dist_p; +-- Test some VIEW +CREATE VIEW distinct_v1 as SELECT DISTINCT c, d FROM uqk1 WHERE c is NOT NULL; +EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM distinct_v1; + QUERY PLAN +--------------------------- + Seq Scan on uqk1 + Filter: (c IS NOT NULL) +(2 rows) + +ALTER TABLE uqk1 ALTER COLUMN d DROP NOT NULL; +EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM distinct_v1; + QUERY PLAN +--------------------------------------- + Unique + -> Sort + Sort Key: uqk1.c, uqk1.d + -> Seq Scan on uqk1 + Filter: (c IS NOT NULL) +(5 rows) + +-- Test generic plan +ALTER TABLE uqk1 ALTER COLUMN d SET NOT NULL; +prepare pt as SELECT * FROM distinct_v1; +EXPLAIN (COSTS OFF) execute pt; + QUERY PLAN +--------------------------- + Seq Scan on uqk1 + Filter: (c IS NOT NULL) +(2 rows) + +ALTER TABLE uqk1 ALTER COLUMN d DROP NOT NULL; +EXPLAIN (COSTS OFF) execute pt; + QUERY PLAN +--------------------------------------- + Unique + -> Sort + Sort Key: uqk1.c, uqk1.d + -> Seq Scan on uqk1 + Filter: (c IS NOT NULL) +(5 rows) + +DEALLOCATE pt; +DROP VIEW distinct_v1; +DROP TABLE uqk1; +DROP TABLE uqk2; diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql index 9480abd577..3446c3e9fd 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -394,13 +394,13 @@ explain (costs off) select * from t1 group by a,b,c,d; explain (costs off) select a,c from t1 group by a,c,d; -- Test removal across multiple relations -explain (costs off) select * +explain (costs off) select t2.* from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y -group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.y,t2.z; +group by t1.a,t1.c,t1.d,t2.x,t2.y,t2.z; -- Test case where t1 can be optimized but not t2 explain (costs off) select t1.*,t2.x,t2.z -from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y +from t1 right join t2 on t1.a = t2.x and t1.b = t2.y group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.z; -- Cannot optimize when PK is deferrable diff --git a/src/test/regress/sql/select_distinct.sql b/src/test/regress/sql/select_distinct.sql index 33102744eb..19e8a3e4be 100644 --- a/src/test/regress/sql/select_distinct.sql +++ b/src/test/regress/sql/select_distinct.sql @@ -135,3 +135,121 @@ SELECT 1 IS NOT DISTINCT FROM 2 as "no"; SELECT 2 IS NOT DISTINCT FROM 2 as "yes"; SELECT 2 IS NOT DISTINCT FROM null as "no"; SELECT null IS NOT DISTINCT FROM null as "yes"; + + +CREATE TABLE uqk1(a int, pk int primary key, c int, d int); +CREATE TABLE uqk2(a int, pk int primary key, c int, d int); +INSERT INTO uqk1 VALUES(1, 1, 1, 1), (2, 2, 2, 2), (3, 3, 3, 3); +INSERT INTO uqk2 VALUES(1, 1, 1, 1), (4, 4, 4, 4), (5, 5, 5, 5); +ANALYZE uqk1; +ANALYZE uqk2; + +-- Test single table +EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM uqk1; +EXPLAIN (COSTS OFF) SELECT DISTINCT c, d FROM uqk1; + +CREATE UNIQUE INDEX uqk1_ukcd ON uqk1(c, d); + +EXPLAIN (COSTS OFF) SELECT DISTINCT c, d FROM uqk1; +EXPLAIN (COSTS OFF) SELECT DISTINCT c, d FROM uqk1 WHERE c is NOT NULL; +ALTER TABLE uqk1 ALTER COLUMN d SET NOT NULL; +EXPLAIN (COSTS OFF) SELECT DISTINCT c, d FROM uqk1 WHERE c is NOT NULL; +EXPLAIN (COSTS OFF) SELECT DISTINCT d FROM uqk1 WHERE c is NOT NULL; +EXPLAIN (COSTS OFF) SELECT DISTINCT a FROM uqk1 WHERE pk = 1; + + +-- Test join +-- both uqk1 (c, d) and uqk2(pk) are unique key, so distinct is not needed. + +EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.c, uqk1.d FROM uqk1, uqk2 +WHERE uqk1.c is NOT NULL AND uqk1.a = uqk2.pk; + +-- Distinct is needed since the outer join allows multi null values. +EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.pk FROM uqk1 RIGHT JOIN uqk2 ON (uqk1.a = uqk2.pk) +order BY 1; + +SELECT DISTINCT uqk1.pk FROM uqk1 RIGHT JOIN uqk2 ON (uqk1.a = uqk2.pk) order BY 1; + +-- Distinct is not needed since uqk1 is the left table in outer join +EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.c, uqk1.d FROM uqk1 LEFT JOIN uqk2 ON (uqk1.a = uqk2.pk) +WHERE uqk1.c is NOT NULL order BY 1, 2; + +SELECT DISTINCT uqk1.c, uqk1.d FROM uqk1 LEFT JOIN uqk2 ON (uqk1.a = uqk2.pk) +WHERE uqk1.c is NOT NULL order BY 1, 2; + +-- The combination of UniqueKey is still unique no mather the join clause +EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM uqk1, uqk2; +SELECT DISTINCT * FROM uqk1, uqk2 order BY 1, 2, 3, 4, 5, 6; + +-- Test Semi/Anti JOIN +EXPLAIN (COSTS OFF) SELECT DISTINCT pk FROM uqk1 WHERE d in (SELECT d FROM uqk2); +EXPLAIN (COSTS OFF) SELECT DISTINCT pk FROM uqk1 WHERE d NOT in (SELECT d FROM uqk2); + +-- Test Unique Key FOR one-row case, DISTINCT is NOT needed as well. +-- uqk1.d is the a uniquekey due to onerow rule. uqk2.pk is pk +EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.d FROM uqk1, uqk2 WHERE uqk1.pk = 2 AND uqk1.d = uqk2.pk; +SELECT DISTINCT uqk1.d FROM uqk1, uqk2 WHERE uqk1.pk = 2 AND uqk1.d = uqk2.pk order BY 1; + +-- Both uqk1.d AND uqk2.c are the a uniquekey due to onerow rule +EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.d FROM uqk1, uqk2 WHERE uqk1.pk = 2 +AND uqk2.pk = 1 AND uqk1.d = uqk2.d ; + +SELECT DISTINCT uqk1.d FROM uqk1, uqk2 WHERE uqk1.pk = 2 AND uqk2.pk = 1 +AND uqk1.d = uqk2.d order BY 1; + +-- Both UniqueKey in targetList, so distinct is not needed. +EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.c, uqk2.c FROM uqk1, uqk2 WHERE uqk1.pk = 2 AND uqk2.pk = 1; +SELECT DISTINCT uqk1.c, uqk2.c FROM uqk1, uqk2 WHERE uqk1.pk = 2 AND uqk2.pk = 1 order BY 1, 2; + +-- Test SubQuery +-- t2(a, c) is UNIQUE key because of group by +EXPLAIN (COSTS OFF) SELECT DISTINCT t2.a, t2.c FROM uqk1 t1 inner JOIN +(SELECT a, c, sum(pk) as t2b FROM uqk2 group BY a, c) t2 +ON (t2.t2b = t1.pk); + +-- Test Partition TABLE +-- Test partitioned TABLE +CREATE TABLE dist_p (a int, b int NOT NULL, c int NOT NULL, d int) PARTITION BY RANGE (b); +CREATE TABLE dist_p0 PARTITION OF dist_p FOR VALUES FROM (1) to (10); +CREATE TABLE dist_p1 PARTITION OF dist_p FOR VALUES FROM (11) to (20); + +-- CREATE unqiue INDEX ON dist_p +CREATE UNIQUE INDEX dist_p_uk_b_c ON dist_p(b, c); +EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM dist_p; +DROP INDEX dist_p_uk_b_c; + +-- we also support CREATE unqiue INDEX ON each child tables +CREATE UNIQUE INDEX dist_p0_uk_bc ON dist_p0(b, c); +-- NOT ok, since dist_p1 have no such INDEX +EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM dist_p; +CREATE UNIQUE INDEX dist_p1_uk_bc ON dist_p1(b, c); +-- OK now +EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM dist_p; + +DROP INDEX dist_p0_uk_bc; +DROP INDEX dist_p1_uk_bc; + +-- uk is same ON all child tables, however it doesn't include the partkey, so NOT ok as well. +CREATE UNIQUE INDEX dist_p0_uk_c ON dist_p0(c); +CREATE UNIQUE INDEX dist_p1_uk_c ON dist_p1(c); +EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM dist_p; + +DROP TABLE dist_p; + +-- Test some VIEW +CREATE VIEW distinct_v1 as SELECT DISTINCT c, d FROM uqk1 WHERE c is NOT NULL; +EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM distinct_v1; +ALTER TABLE uqk1 ALTER COLUMN d DROP NOT NULL; +EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM distinct_v1; + +-- Test generic plan +ALTER TABLE uqk1 ALTER COLUMN d SET NOT NULL; +prepare pt as SELECT * FROM distinct_v1; +EXPLAIN (COSTS OFF) execute pt; +ALTER TABLE uqk1 ALTER COLUMN d DROP NOT NULL; +EXPLAIN (COSTS OFF) execute pt; +DEALLOCATE pt; + +DROP VIEW distinct_v1; +DROP TABLE uqk1; +DROP TABLE uqk2; -- 2.21.0