From aa39394a639efbffcb80c7da632e75e92feb2a9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B8=80=E6=8C=83?= Date: Fri, 3 Apr 2020 09:55:22 +0800 Subject: [PATCH v4] Maintain UniqueKey at each RelOptInfo, this information can be used to erasing distinct/group path if we are sure the result is unique already. And rel_is_unique_for is modified to use UniqueKey to detect more cases. --- .../postgres_fdw/expected/postgres_fdw.out | 32 +- contrib/postgres_fdw/sql/postgres_fdw.sql | 1 + src/backend/nodes/list.c | 31 + src/backend/nodes/makefuncs.c | 16 + src/backend/optimizer/path/Makefile | 3 +- src/backend/optimizer/path/allpaths.c | 25 +- src/backend/optimizer/path/joinrels.c | 2 + src/backend/optimizer/path/uniquekeys.c | 1110 +++++++++++++++++ src/backend/optimizer/plan/analyzejoins.c | 143 +-- src/backend/optimizer/plan/initsplan.c | 10 + src/backend/optimizer/plan/planmain.c | 13 - src/backend/optimizer/plan/planner.c | 37 +- src/backend/optimizer/prep/prepunion.c | 2 + src/backend/optimizer/util/plancat.c | 10 + src/include/nodes/makefuncs.h | 3 + src/include/nodes/nodes.h | 1 + src/include/nodes/pathnodes.h | 32 + src/include/nodes/pg_list.h | 2 + src/include/optimizer/paths.h | 34 + src/test/regress/expected/aggregates.out | 73 +- src/test/regress/expected/join.out | 55 +- src/test/regress/expected/select_distinct.out | 335 +++++ src/test/regress/sql/aggregates.sql | 6 +- src/test/regress/sql/join.sql | 12 +- src/test/regress/sql/select_distinct.sql | 118 ++ 25 files changed, 1874 insertions(+), 232 deletions(-) create mode 100644 src/backend/optimizer/path/uniquekeys.c diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index 62c2697920..88441568b7 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -1378,6 +1378,7 @@ SELECT t1.c1, ss.a, ss.b FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t1 (8 rows) -- d. test deparsing rowmarked relations as subqueries +-- YYY: The inner table in the plan is "T 3", which has a primary key on c1, that's why we have the new added "Inner Unique: true". EXPLAIN (VERBOSE, COSTS OFF) SELECT t1.c1, ss.a, ss.b FROM (SELECT c1 FROM "S 1"."T 3" WHERE c1 = 50) t1 INNER JOIN (SELECT t2.c1, t3.c1 FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t2 FULL JOIN (SELECT c1 FROM ft5 WHERE c1 between 50 and 60) t3 ON (t2.c1 = t3.c1) WHERE t2.c1 IS NULL OR t2.c1 IS NOT NULL) ss(a, b) ON (TRUE) ORDER BY t1.c1, ss.a, ss.b FOR UPDATE OF t1; QUERY PLAN @@ -1386,6 +1387,7 @@ SELECT t1.c1, ss.a, ss.b FROM (SELECT c1 FROM "S 1"."T 3" WHERE c1 = 50) t1 INNE Output: "T 3".c1, ft4.c1, ft5.c1, "T 3".ctid, ft4.*, ft5.* -> Nested Loop Output: "T 3".c1, ft4.c1, ft5.c1, "T 3".ctid, ft4.*, ft5.* + Inner Unique: true -> Foreign Scan Output: ft4.c1, ft4.*, ft5.c1, ft5.* Relations: (public.ft4) FULL JOIN (public.ft5) @@ -1410,7 +1412,7 @@ SELECT t1.c1, ss.a, ss.b FROM (SELECT c1 FROM "S 1"."T 3" WHERE c1 = 50) t1 INNE -> Seq Scan on "S 1"."T 3" Output: "T 3".c1, "T 3".ctid Filter: ("T 3".c1 = 50) -(28 rows) +(29 rows) SELECT t1.c1, ss.a, ss.b FROM (SELECT c1 FROM "S 1"."T 3" WHERE c1 = 50) t1 INNER JOIN (SELECT t2.c1, t3.c1 FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t2 FULL JOIN (SELECT c1 FROM ft5 WHERE c1 between 50 and 60) t3 ON (t2.c1 = t3.c1) WHERE t2.c1 IS NULL OR t2.c1 IS NOT NULL) ss(a, b) ON (TRUE) ORDER BY t1.c1, ss.a, ss.b FOR UPDATE OF t1; c1 | a | b @@ -2902,22 +2904,20 @@ select sum(c1%3), sum(distinct c1%3 order by c1%3) filter (where c1%3 < 2), c2 f -- Outer query is aggregation query explain (verbose, costs off) select distinct (select count(*) filter (where t2.c2 = 6 and t2.c1 < 10) from ft1 t1 where t1.c1 = 6) from ft2 t2 where t2.c2 % 6 = 0 order by 1; - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------- - Unique + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------ + Sort Output: ((SubPlan 1)) - -> Sort - Output: ((SubPlan 1)) - Sort Key: ((SubPlan 1)) - -> Foreign Scan - Output: (SubPlan 1) - Relations: Aggregate on (public.ft2 t2) - Remote SQL: SELECT count(*) FILTER (WHERE ((c2 = 6) AND ("C 1" < 10))) FROM "S 1"."T 1" WHERE (((c2 % 6) = 0)) - SubPlan 1 - -> Foreign Scan on public.ft1 t1 - Output: (count(*) FILTER (WHERE ((t2.c2 = 6) AND (t2.c1 < 10)))) - Remote SQL: SELECT NULL FROM "S 1"."T 1" WHERE (("C 1" = 6)) -(13 rows) + Sort Key: ((SubPlan 1)) + -> Foreign Scan + Output: (SubPlan 1) + Relations: Aggregate on (public.ft2 t2) + Remote SQL: SELECT count(*) FILTER (WHERE ((c2 = 6) AND ("C 1" < 10))) FROM "S 1"."T 1" WHERE (((c2 % 6) = 0)) + SubPlan 1 + -> Foreign Scan on public.ft1 t1 + Output: (count(*) FILTER (WHERE ((t2.c2 = 6) AND (t2.c1 < 10)))) + Remote SQL: SELECT NULL FROM "S 1"."T 1" WHERE (("C 1" = 6)) +(11 rows) select distinct (select count(*) filter (where t2.c2 = 6 and t2.c1 < 10) from ft1 t1 where t1.c1 = 6) from ft2 t2 where t2.c2 % 6 = 0 order by 1; count diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql index 83971665e3..a42cfa134d 100644 --- a/contrib/postgres_fdw/sql/postgres_fdw.sql +++ b/contrib/postgres_fdw/sql/postgres_fdw.sql @@ -453,6 +453,7 @@ EXPLAIN (VERBOSE, COSTS OFF) SELECT t1.c1, ss.a, ss.b FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t1 FULL JOIN (SELECT t2.c1, t3.c1 FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t2 FULL JOIN (SELECT c1 FROM ft5 WHERE c1 between 50 and 60) t3 ON (t2.c1 = t3.c1) WHERE t2.c1 IS NULL OR t2.c1 IS NOT NULL) ss(a, b) ON (t1.c1 = ss.a) ORDER BY t1.c1, ss.a, ss.b; SELECT t1.c1, ss.a, ss.b FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t1 FULL JOIN (SELECT t2.c1, t3.c1 FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t2 FULL JOIN (SELECT c1 FROM ft5 WHERE c1 between 50 and 60) t3 ON (t2.c1 = t3.c1) WHERE t2.c1 IS NULL OR t2.c1 IS NOT NULL) ss(a, b) ON (t1.c1 = ss.a) ORDER BY t1.c1, ss.a, ss.b; -- d. test deparsing rowmarked relations as subqueries +-- YYY: The inner table in the plan is "T 3", which has a primary key on c1, that's why we have the new added "Inner Unique: true". EXPLAIN (VERBOSE, COSTS OFF) SELECT t1.c1, ss.a, ss.b FROM (SELECT c1 FROM "S 1"."T 3" WHERE c1 = 50) t1 INNER JOIN (SELECT t2.c1, t3.c1 FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t2 FULL JOIN (SELECT c1 FROM ft5 WHERE c1 between 50 and 60) t3 ON (t2.c1 = t3.c1) WHERE t2.c1 IS NULL OR t2.c1 IS NOT NULL) ss(a, b) ON (TRUE) ORDER BY t1.c1, ss.a, ss.b FOR UPDATE OF t1; SELECT t1.c1, ss.a, ss.b FROM (SELECT c1 FROM "S 1"."T 3" WHERE c1 = 50) t1 INNER JOIN (SELECT t2.c1, t3.c1 FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t2 FULL JOIN (SELECT c1 FROM ft5 WHERE c1 between 50 and 60) t3 ON (t2.c1 = t3.c1) WHERE t2.c1 IS NULL OR t2.c1 IS NOT NULL) ss(a, b) ON (TRUE) ORDER BY t1.c1, ss.a, ss.b FOR UPDATE OF t1; diff --git a/src/backend/nodes/list.c b/src/backend/nodes/list.c index bd0c58cd81..889eb2940e 100644 --- a/src/backend/nodes/list.c +++ b/src/backend/nodes/list.c @@ -688,6 +688,37 @@ list_member_oid(const List *list, Oid datum) return false; } +/* + * Return true iff there is an equal member in target for every + * member in members + */ +bool +list_is_subset(const List *members, const List *target) +{ + const ListCell *lc1, *lc2; + + Assert(IsPointerList(members)); + Assert(IsPointerList(target)); + check_list_invariants(members); + check_list_invariants(target); + + foreach(lc1, members) + { + bool found = false; + foreach(lc2, target) + { + if (equal(lfirst(lc1), lfirst(lc2))) + { + found = true; + break; + } + } + if (!found) + return false; + } + return true; +} + /* * Delete the n'th cell (counting from 0) in list. * diff --git a/src/backend/nodes/makefuncs.c b/src/backend/nodes/makefuncs.c index e8cdc90c31..e79757cb43 100644 --- a/src/backend/nodes/makefuncs.c +++ b/src/backend/nodes/makefuncs.c @@ -809,3 +809,19 @@ makeVacuumRelation(RangeVar *relation, Oid oid, List *va_cols) v->va_cols = va_cols; return v; } + + +/* + * makeUniqueKey + */ +UniqueKey* +makeUniqueKey(List *exprs, List* positions, bool multi_nullvals, bool onerow) +{ + UniqueKey * ukey = makeNode(UniqueKey); + Assert(list_length(exprs) == list_length(positions)); + ukey->exprs = exprs; + ukey->positions = positions; + ukey->multi_nullvals = multi_nullvals; + ukey->onerow = onerow; + return ukey; +} diff --git a/src/backend/optimizer/path/Makefile b/src/backend/optimizer/path/Makefile index 1e199ff66f..7b9820c25f 100644 --- a/src/backend/optimizer/path/Makefile +++ b/src/backend/optimizer/path/Makefile @@ -21,6 +21,7 @@ OBJS = \ joinpath.o \ joinrels.o \ pathkeys.o \ - tidpath.o + tidpath.o \ + uniquekeys.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 905bbe77d8..d6f1a45f06 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -39,6 +39,7 @@ #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/plancat.h" +#include "optimizer/planmain.h" #include "optimizer/planner.h" #include "optimizer/restrictinfo.h" #include "optimizer/tlist.h" @@ -222,13 +223,24 @@ make_one_rel(PlannerInfo *root, List *joinlist) set_base_rel_pathlists(root); /* - * Generate access paths for the entire join tree. + * Remove any useless outer joins. Ideally this would be done during + * jointree preprocessing, but the necessary information isn't available + * until we've built baserel data structures, classified qual clauses + * and uniquekeys */ - rel = make_rel_from_joinlist(root, joinlist); + joinlist = remove_useless_joins(root, joinlist); + + /* + * Also, reduce any semijoins with unique inner rels to plain inner joins. + * Likewise, this can't be done until now for lack of needed info. + */ + reduce_unique_semijoins(root); /* - * The result should join all and only the query's base rels. + * Generate access paths for the entire join tree. */ + rel = make_rel_from_joinlist(root, joinlist); + Assert(bms_equal(rel->relids, root->all_baserels)); return rel; @@ -786,6 +798,9 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) /* Consider TID scans */ create_tidscan_paths(root, rel); + + /* Set UniqueKeys for this relation */ + populate_baserel_uniquekeys(root, rel, rel->indexlist); } /* @@ -1276,6 +1291,8 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, /* Add paths to the append relation. */ add_paths_to_append_rel(root, rel, live_childrels); + if (IS_PARTITIONED_REL(rel)) + populate_partitionedrel_uniquekeys(root, rel, live_childrels); } @@ -2349,6 +2366,8 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, pathkeys, required_outer)); } + convert_subquery_uniquekeys(root, rel, sub_final_rel); + /* If outer rel allows parallelism, do same for partial paths. */ if (rel->consider_parallel && bms_is_empty(required_outer)) { diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index a21c295b99..c6799aa48c 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -920,6 +920,8 @@ populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1, /* Apply partitionwise join technique, if possible. */ try_partitionwise_join(root, rel1, rel2, joinrel, sjinfo, restrictlist); + + populate_joinrel_uniquekeys(root, joinrel, rel1, rel2, restrictlist, sjinfo->jointype); } diff --git a/src/backend/optimizer/path/uniquekeys.c b/src/backend/optimizer/path/uniquekeys.c new file mode 100644 index 0000000000..4cba7c9698 --- /dev/null +++ b/src/backend/optimizer/path/uniquekeys.c @@ -0,0 +1,1110 @@ +/*------------------------------------------------------------------------- + * + * uniquekeys.c + * Utilities for matching and building unique keys + * + * Portions Copyright (c) 2020, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/optimizer/path/uniquekeys.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/pathnode.h" +#include "optimizer/paths.h" +#include "optimizer/optimizer.h" +#include "rewrite/rewriteManip.h" + + +/* + * This struct is used to help populate_joinrel_uniquekeys, + * Set added_to_joinrel to true if a uniquekey has been added to joinrel. + * For a joinrel, if both sides have UniqueKey, then the combine of them + * must be unique for the joinrel as well, But we don't need to add it if + * either of them has been added to joinrel already. We use this struct to + * maintain such info. + */ +typedef struct UniqueKeyContextData +{ + UniqueKey *uniquekey; + /* Set to true if the unique key has been added to joinrel->uniquekeys */ + bool added_to_joinrel; + /* If this uniquekey is still useful after join */ + bool useful; +} *UniqueKeyContext; + + +static List *gather_mergeable_baserestrictlist(RelOptInfo *rel); +static List *gather_mergeable_joinclauses(RelOptInfo *joinrel, + RelOptInfo *rel1, + RelOptInfo *rel2, + List *restirctlist, + JoinType jointype); +static bool match_index_to_baserestrictinfo(IndexOptInfo *unique_ind, + List *restrictlist); +static List *initililze_uniquecontext_for_joinrel(RelOptInfo *joinrel, + RelOptInfo *inputrel); + +static bool innerrel_keeps_unique(PlannerInfo *root, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + List *restrictlist, + bool reverse); +static bool clause_sides_match_join(RestrictInfo *rinfo, + Relids outerrelids, + Relids innerrelids); +static void add_uniquekey_from_index(RelOptInfo *rel, + IndexOptInfo *unique_index); +static void add_uniquekey_for_onerow(RelOptInfo *rel); + +/* Used for unique indexes checking for partitioned table */ +static bool index_constains_partkey(RelOptInfo *partrel, IndexOptInfo *ind); +static IndexOptInfo *simple_copy_indexinfo_to_parent(RelOptInfo *parentrel, + IndexOptInfo *from); +static bool simple_indexinfo_equal(IndexOptInfo *ind1, IndexOptInfo *ind2); +static void adjust_partition_unique_indexlist(RelOptInfo *parentrel, + RelOptInfo *childrel, + List **global_unique_index); +/* Helper function for groupres/distinctrel */ +static void add_uniquekey_from_sortgroups(PlannerInfo *root, + RelOptInfo *rel, + List *sortgroups); + +/* + * populate_baserel_uniquekeys + * Populate 'baserel' uniquekeys list by looking at the rel's unique index + * add baserestrictinfo + */ +void +populate_baserel_uniquekeys(PlannerInfo *root, + RelOptInfo *baserel, + List *indexlist) +{ + ListCell *lc; + List *restrictlist = gather_mergeable_baserestrictlist(baserel); + bool return_one_row = false; + List *matched_uk_indexes = NIL; + + Assert(baserel->rtekind == RTE_RELATION); + + if (root->parse->hasTargetSRFs) + return; + + if (baserel->reloptkind == RELOPT_OTHER_MEMBER_REL) + /* + * Set UniqueKey on member rel is useless, we have to recompute it at + * upper level, see populate_partitionedrel_uniquekeys for reference + */ + return; + + foreach(lc, indexlist) + { + IndexOptInfo *ind = (IndexOptInfo *) lfirst(lc); + if (!ind->unique || !ind->immediate || + (ind->indpred != NIL && !ind->predOK)) + continue; + + if (match_index_to_baserestrictinfo(ind, restrictlist)) + { + return_one_row = true; + break; + } + + if (ind->indexprs != NIL) + /* We can't guarantee if an expression returns a NULL value, so ignore it */ + continue; + matched_uk_indexes = lappend(matched_uk_indexes, ind); + } + + if (return_one_row) + { + add_uniquekey_for_onerow(baserel); + } + else + { + foreach(lc, matched_uk_indexes) + add_uniquekey_from_index(baserel, lfirst_node(IndexOptInfo, lc)); + } +} + + +/* + * populate_partitioned_rel_uniquekeys + * The unique index can be used for UniqueKey based on: + * 1). It must include partition keys + * 2). All the childrels must has the same indexes. + */ +void +populate_partitionedrel_uniquekeys(PlannerInfo *root, + RelOptInfo *rel, + List *childrels) +{ + ListCell *lc; + List *global_unique_indexlist = NIL; + RelOptInfo *childrel; + bool is_first = true; + + Assert(IS_PARTITIONED_REL(rel)); + + if (root->parse->hasTargetSRFs) + return; + + if (childrels == NIL) + return; + + childrel = linitial_node(RelOptInfo, childrels); + foreach(lc, childrel->indexlist) + { + IndexOptInfo *ind = lfirst(lc); + IndexOptInfo *global_ind; + if (!ind->unique || !ind->immediate || + (ind->indpred != NIL && !ind->predOK)) + continue; + + global_ind = simple_copy_indexinfo_to_parent(rel, ind); + /* + * If the unique index doesn't contain partkey, then it is unique + * on this partition only, so it is useless for us. + */ + if (!index_constains_partkey(rel, global_ind)) + continue; + global_unique_indexlist = lappend(global_unique_indexlist, global_ind); + } + + /* Fast path */ + if (global_unique_indexlist == NIL) + return; + + foreach(lc, childrels) + { + RelOptInfo *child = lfirst(lc); + if (is_first) + { + is_first = false; + continue; + } + adjust_partition_unique_indexlist(rel, child, &global_unique_indexlist); + } + + /* Now we have the unique index list which as exactly same on all childrels, + * Set the UniqueIndex just like it is non-partition table + */ + populate_baserel_uniquekeys(root, rel, global_unique_indexlist); +} + + +/* + * populate_distinctrel_uniquekeys + */ +void +populate_distinctrel_uniquekeys(PlannerInfo *root, + RelOptInfo *inputrel, + RelOptInfo *distinctrel) +{ + /* The unique key before the distinct is still valid */ + distinctrel->uniquekeys = list_copy(inputrel->uniquekeys); + add_uniquekey_from_sortgroups(root, distinctrel, root->parse->distinctClause); +} + +/* + * populate_grouprel_uniquekeys + */ +void +populate_grouprel_uniquekeys(PlannerInfo *root, + RelOptInfo *grouprel) +{ + Query *parse = root->parse; + if (parse->hasTargetSRFs) + return; + + if (parse->groupingSets) + return; + + /* A Normal group by without grouping set */ + if (parse->groupClause) + add_uniquekey_from_sortgroups(root, + grouprel, + root->parse->groupClause); + else + /* it has aggregation but without a group by, so must be one line return */ + add_uniquekey_for_onerow(grouprel); +} + +/* + * simple_copy_uniquekeys + * Using a function for the one-line code makes us easy to check where we simply + * copied the uniquiekeys. + */ +void +simple_copy_uniquekeys(RelOptInfo *oldrel, + RelOptInfo *newrel) +{ + newrel->uniquekeys = oldrel->uniquekeys; +} + +/* + * populate_unionrel_uniquiekeys + */ +void +populate_unionrel_uniquiekeys(PlannerInfo *root, + RelOptInfo *unionrel) +{ + ListCell *lc; + List *exprs = NIL; + List *colnos = NIL; + int i = 1; + + Assert(unionrel->uniquekeys == NIL); + + foreach(lc, unionrel->reltarget->exprs) + { + exprs = lappend(exprs, lfirst(lc)); + colnos = lappend_int(colnos, i); + i++; + } + unionrel->uniquekeys = lappend(unionrel->uniquekeys, + makeUniqueKey(exprs, colnos, true, false)); +} + +/* + * populate_joinrel_uniquekeys + * + * populate uniquekeys for joinrel. We will check each relation to see if it's + * UniqueKey is still valid via innerrel_keeps_unique, if so, we add it to + * joinrel. The multi_nullvals field will be changed from false to true + * for some outer join cases. + + * For the uniquekey in either baserel which can't be unique after join, we still + * check to see if combination of UniqueKeys from both side is still useful for us. + * if yes, we add it to joinrel as well. multi_nullvals is set to true if either + * side have multi_nullvals equals true. + */ +void +populate_joinrel_uniquekeys(PlannerInfo *root, RelOptInfo *joinrel, + RelOptInfo *outerrel, RelOptInfo *innerrel, + List *restrictlist, JoinType jointype) +{ + ListCell *lc, *lc2; + List *clause_list = NIL; + List *outerrel_ukey_ctx; + List *innerrel_ukey_ctx; + bool outer_is_onerow, inner_is_onerow; + if (root->parse->hasTargetSRFs) + return; + + /* Care about the outerrel relation only for SEMI/ANTI join */ + if (jointype == JOIN_SEMI || jointype == JOIN_ANTI) + { + foreach(lc, outerrel->uniquekeys) + { + UniqueKey *uniquekey = lfirst_node(UniqueKey, lc); + if (list_is_subset(uniquekey->exprs, joinrel->reltarget->exprs)) + joinrel->uniquekeys = lappend(joinrel->uniquekeys, uniquekey); + } + return; + } + + /* Fast path */ + if (innerrel->uniquekeys == NIL || outerrel->uniquekeys == NIL) + return; + + outer_is_onerow = relation_is_onerow(outerrel); + inner_is_onerow = relation_is_onerow(innerrel); + + outerrel_ukey_ctx = initililze_uniquecontext_for_joinrel(joinrel, outerrel); + innerrel_ukey_ctx = initililze_uniquecontext_for_joinrel(joinrel, innerrel); + + clause_list = gather_mergeable_joinclauses(joinrel, outerrel, innerrel, + restrictlist, jointype); + + + if (innerrel_keeps_unique(root, outerrel, innerrel, clause_list, false)) + { + foreach(lc, innerrel_ukey_ctx) + { + UniqueKeyContext ctx = (UniqueKeyContext)lfirst(lc); + if (!list_is_subset(ctx->uniquekey->exprs, joinrel->reltarget->exprs)) + { + /* The UniqueKey on baserel is not useful on the joinrel */ + ctx->useful = false; + continue; + } + if ((jointype == JOIN_LEFT || jointype == JOIN_FULL) && !ctx->uniquekey->multi_nullvals) + { + /* Change the multi_nullvals to true at this case */ + joinrel->uniquekeys = lappend(joinrel->uniquekeys, + makeUniqueKey(ctx->uniquekey->exprs, + ctx->uniquekey->positions, + true, + false)); + } + else if (inner_is_onerow) + { + /* Since rows in innerrel can't be duplicated AND if innerrel is onerow, + * the join result will be onerow also as well. Note: onerow implies + * multi_nullvals = false. + */ + add_uniquekey_for_onerow(joinrel); + return; + } + else + { + joinrel->uniquekeys = lappend(joinrel->uniquekeys, ctx->uniquekey); + ctx->added_to_joinrel = true; + } + } + } + + if (innerrel_keeps_unique(root, innerrel, outerrel, clause_list, true)) + { + foreach(lc, outerrel_ukey_ctx) + { + UniqueKeyContext ctx = (UniqueKeyContext)lfirst(lc); + if (!list_is_subset(ctx->uniquekey->exprs, joinrel->reltarget->exprs)) + { + ctx->useful = false; + continue; + } + /* NULL values in outer rel can be duplicated under JOIN_FULL only */ + if (jointype == JOIN_FULL && ctx->uniquekey->multi_nullvals) + { + joinrel->uniquekeys = lappend(joinrel->uniquekeys, + makeUniqueKey(ctx->uniquekey->exprs, + ctx->uniquekey->positions, + true, + false)); + + } + else if (outer_is_onerow) + { + add_uniquekey_for_onerow(joinrel); + return; + } + else + { + joinrel->uniquekeys = lappend(joinrel->uniquekeys, ctx->uniquekey); + ctx->added_to_joinrel = true; + } + } + } + + /* The combination of the UniqueKey from both sides is unique as well regardless of + * join type, But no bother to add it if its subset has been added already. + */ + foreach(lc, outerrel_ukey_ctx) + { + UniqueKeyContext ctx1 = (UniqueKeyContext) lfirst(lc); + if (ctx1->added_to_joinrel || !ctx1->useful) + continue; + foreach(lc2, innerrel_ukey_ctx) + { + UniqueKeyContext ctx2 = (UniqueKeyContext) lfirst(lc2); + List *exprs = NIL, *colnos = NIL; + bool multi_nullvals; + if (ctx2->added_to_joinrel || !ctx2->useful) + continue; + exprs = list_copy(ctx1->uniquekey->exprs); + colnos = list_copy(ctx1->uniquekey->positions); + exprs = list_concat(exprs, ctx2->uniquekey->exprs); + colnos = list_concat(colnos, ctx2->uniquekey->positions); + + multi_nullvals = ctx1->uniquekey->multi_nullvals || ctx2->uniquekey->multi_nullvals; + joinrel->uniquekeys = lappend(joinrel->uniquekeys, + makeUniqueKey(exprs, + colnos, + multi_nullvals, + /* All onerow cases has been handled above */ + false)); + } + } +} + + +/* + * Used to avoid multi scan of rel->reltarget->exprs, See populate_subquery_uniquekeys + */ +typedef struct SubqueryUniqueKeyData +{ + /* + * Only the Var reference to subquery's unique is unique as well, we can't + * guarantee others + */ + Var *var; + + /* The position of the var in the rel->reltarget */ + int pos; +} *SubqueryUniqueKeyContext; + +/* + * convert_subquery_uniquekeys + * + * currel is the RelOptInfo in current level, sub_final_rel is get from the fetch_upper_rel + * we need to convert the UniqueKey from sub_final_rel to currel via the positions info in + * UniqueKey. Example: + * + * select t2.colx from t1, (select max(y), colx from t3 group by colx) t2 where .. + * The UniqueKey in sub_final_rel is Var(varno=1, varattrno=N), position=2. + * the UniqueKey in currel will be Var(varno=2, varattrno=2), position= 1 + */ +void convert_subquery_uniquekeys(PlannerInfo *root, + RelOptInfo *currel, + RelOptInfo *sub_final_rel) +{ + SubqueryUniqueKeyContext *ctx_array; + SubqueryUniqueKeyContext ctx; + Index max_colno_subq = 0; + ListCell *lc, *lc2; + int pos = 0; + + if (sub_final_rel->uniquekeys == NIL) + /* This should be a common case */ + return; + + if (relation_is_onerow(sub_final_rel)) + { + add_uniquekey_for_onerow(currel); + return; + } + /* + * Calculate max_colno in subquery. In fact we can check this with + * list_length(sub_final_rel->reltarget->exprs), However, reltarget + * is not set on UPPERREL_FINAL relation, so do it this way + */ + foreach(lc, sub_final_rel->uniquekeys) + { + UniqueKey * ukey = lfirst_node(UniqueKey, lc); + foreach(lc2, ukey->positions) + { + Index colno = lfirst_int(lc2); + if (max_colno_subq < colno) + max_colno_subq = colno; + } + } + + Assert(max_colno_subq > 0); + ctx_array = palloc0(sizeof(SubqueryUniqueKeyContext) * (max_colno_subq + 1)); + + /* + * Create an array for each expr in currel->reltarget->exprs, the array index + * is the colno in subquery, so that we can get the expr quickly given a colno_subq + */ + foreach(lc, currel->reltarget->exprs) + { + Var *var; + int colno_subq; + pos++; + if (!IsA(lfirst(lc), Var)) + continue; + + var = lfirst_node(Var, lc); + colno_subq = var->varattno; + if (colno_subq > max_colno_subq) + continue; + ctx_array[colno_subq] = palloc0(sizeof(struct SubqueryUniqueKeyData)); + ctx = ctx_array[colno_subq]; /* corresponding to subquery's uniquekey->positions[x] */ + ctx->pos = pos; /* the position in current targetlist, will be used to set UniqueKey */ + ctx->var = var; + } + + /* Convert the UniqueKey from sub_final_rel to currel */ + foreach(lc, sub_final_rel->uniquekeys) + { + UniqueKey * ukey = lfirst_node(UniqueKey, lc); + bool uniquekey_useful = true; + List *exprs = NIL; + List *colnos = NIL; + foreach(lc2, ukey->positions) + { + Index sub_colno = lfirst_int(lc2); + ctx = ctx_array[sub_colno]; + if (ctx == NULL) + { + /* The column is not used outside */ + uniquekey_useful = false; + break; + } + exprs = lappend(exprs, ctx->var); + colnos = lappend_int(colnos, ctx->pos); + } + if (uniquekey_useful) + currel->uniquekeys = lappend(currel->uniquekeys, + makeUniqueKey(exprs, + colnos, + ukey->multi_nullvals, + ukey->onerow)); + } +} + + +/* + * innerrel_keeps_unique + * + * Check if Unique key of the innerrel is valid after join. innerrel's UniqueKey + * will be still valid if innerrel's uniquekey mergeop outrerel's uniquekey exists + * in clause_list. + * Note: the clause_list must be a list of mergeable restrictinfo already. + */ +static bool +innerrel_keeps_unique(PlannerInfo *root, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + List *clause_list, + bool reverse) +{ + ListCell *lc, *lc2, *lc3; + + if (outerrel->uniquekeys == NIL || innerrel->uniquekeys == NIL) + return false; + + if (relation_is_onerow(innerrel)) + return true; + + foreach(lc, outerrel->uniquekeys) + { + List *outer_uq_exprs = lfirst_node(UniqueKey, lc)->exprs; + bool clauselist_matchs_all_exprs = true; + foreach(lc2, outer_uq_exprs) + { + Node *outer_uq_expr = lfirst(lc2); + bool find_uq_expr_in_clauselist = false; + foreach(lc3, clause_list) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc3); + Node *outer_expr; + if (reverse) + outer_expr = rinfo->outer_is_left ? get_rightop(rinfo->clause) : get_leftop(rinfo->clause); + else + outer_expr = rinfo->outer_is_left ? get_leftop(rinfo->clause) : get_rightop(rinfo->clause); + if (equal(outer_expr, outer_uq_expr)) + { + find_uq_expr_in_clauselist = true; + break; + } + } + if (!find_uq_expr_in_clauselist) + { + /* No need to check the next exprs in the current uniquekey */ + clauselist_matchs_all_exprs = false; + break; + } + } + + if (clauselist_matchs_all_exprs) + /* If the clauselist match any uk from outerrel, the innerrel will be unique + * based on the fact that innerrel->uniquekeys != NIL which is checked at the + * beginning + */ + return true; + } + return false; +} + + +/* + * relation_is_onerow + * Check if it is a one-row relation by checking UniqueKey + */ +bool +relation_is_onerow(RelOptInfo *rel) +{ + UniqueKey *ukey; + if (rel->uniquekeys == NIL) + return false; + ukey = linitial_node(UniqueKey, rel->uniquekeys); + if (ukey->onerow) + { + /* Some helpful tiny check for UniqueKey */ + + /* 1. We will only store one UniqueKey for this rel */ + Assert(list_length(rel->uniquekeys) == 1); + /* 2. multi_nullvals must be false */ + Assert(!ukey->multi_nullvals); + /* 3. exprs & positions must be NIL */ + Assert(ukey->exprs == NIL); + Assert(ukey->positions == NIL); + } + return ukey->onerow; +} + +/* + * relation_has_uniquekeys_for + * Returns true if we have proofs that 'rel' cannot return multiple rows with + * the same values in each of 'exprs'. Otherwise returns false. + */ +bool +relation_has_uniquekeys_for(PlannerInfo *root, RelOptInfo *rel, List *exprs) +{ + ListCell *lc; + + foreach(lc, rel->uniquekeys) + { + UniqueKey *ukey = lfirst_node(UniqueKey, lc); + if (ukey->multi_nullvals) + continue; + if (list_is_subset(ukey->exprs, exprs)) + return true; + } + return false; +} + + +/* + * Examine the rel's restriction clauses for usable var = const clauses + */ +static List* +gather_mergeable_baserestrictlist(RelOptInfo *rel) +{ + List *restrictlist = NIL; + ListCell *lc; + foreach(lc, rel->baserestrictinfo) + { + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc); + + /* + * Note: can_join won't be set for a restriction clause, but + * mergeopfamilies will be if it has a mergejoinable operator and + * doesn't contain volatile functions. + */ + if (restrictinfo->mergeopfamilies == NIL) + continue; /* not mergejoinable */ + + /* + * The clause certainly doesn't refer to anything but the given rel. + * If either side is pseudoconstant then we can use it. + */ + if (bms_is_empty(restrictinfo->left_relids)) + { + /* righthand side is inner */ + restrictinfo->outer_is_left = true; + } + else if (bms_is_empty(restrictinfo->right_relids)) + { + /* lefthand side is inner */ + restrictinfo->outer_is_left = false; + } + else + continue; + + /* OK, add to list */ + restrictlist = lappend(restrictlist, restrictinfo); + } + return restrictlist; +} + + +/* + * gather_mergeable_joinclauses + */ +static List* +gather_mergeable_joinclauses(RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + List *restrictlist, + JoinType jointype) +{ + List *clause_list = NIL; + ListCell *lc; + foreach(lc, restrictlist) + { + RestrictInfo *restrictinfo = (RestrictInfo *)lfirst(lc); + if (IS_OUTER_JOIN(jointype) && + RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids)) + continue; + + /* Ignore if it's not a mergejoinable clause */ + if (!restrictinfo->can_join || + restrictinfo->mergeopfamilies == NIL) + continue; /* not mergejoinable */ + + /* + * Check if clause has the form "outer op inner" or "inner op outer", + * and if so mark which side is inner. + */ + if (!clause_sides_match_join(restrictinfo, outerrel->relids, innerrel->relids)) + continue; /* no good for these input relations */ + + /* OK, add to list */ + clause_list = lappend(clause_list, restrictinfo); + } + return clause_list; +} + + +/* + * Return true if uk = Const in the restrictlist + */ +static bool +match_index_to_baserestrictinfo(IndexOptInfo *unique_ind, List *restrictlist) +{ + int c = 0; + + /* A fast path to avoid the 2 loop scan */ + if (list_length(restrictlist) < unique_ind->ncolumns) + return false; + + for(c = 0; c < unique_ind->ncolumns; c++) + { + ListCell *lc; + bool found_in_restrictinfo = false; + foreach(lc, restrictlist) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + Node *rexpr; + + /* + * The condition's equality operator must be a member of the + * index opfamily, else it is not asserting the right kind of + * equality behavior for this index. We check this first + * since it's probably cheaper than match_index_to_operand(). + */ + if (!list_member_oid(rinfo->mergeopfamilies, unique_ind->opfamily[c])) + continue; + + /* + * XXX at some point we may need to check collations here too. + * For the moment we assume all collations reduce to the same + * notion of equality. + */ + + /* OK, see if the condition operand matches the index key */ + if (rinfo->outer_is_left) + rexpr = get_rightop(rinfo->clause); + else + rexpr = get_leftop(rinfo->clause); + + if (match_index_to_operand(rexpr, c, unique_ind)) + { + found_in_restrictinfo = true; + break; + } + } + if (!found_in_restrictinfo) + return false; + } + return true; +} + +/* + * add_uniquekey_from_index + * We only add the Index Vars whose expr exists in rel->reltarget + */ +static void +add_uniquekey_from_index(RelOptInfo *rel, IndexOptInfo *unique_index) +{ + int pos; + List *exprs = NIL; + List *positions = NIL; + bool multi_nullvals = false; + + /* Fast path. Check if the indexed columns are used in this relation + * If not, return fast. + */ + for(pos = 0; pos < unique_index->ncolumns; pos++) + { + int attno = unique_index->indexkeys[pos] - rel->min_attr; + if (bms_is_empty(rel->attr_needed[attno])) + return; + } + + /* We still need to check the rel->reltarget->exprs to get the exprs and positions */ + for(pos = 0; pos < unique_index->ncolumns; pos++) + { + ListCell *lc; + bool find_in_exprs = false; + + foreach(lc, rel->reltarget->exprs) + { + Var *var; + if (!IsA(lfirst(lc), Var)) + continue; + var = lfirst_node(Var, lc); + if (match_index_to_operand((Node *)lfirst(lc), pos, unique_index)) + { + find_in_exprs = true; + exprs = lappend(exprs, lfirst(lc)); + positions = lappend_int(positions, pos+1); + if (!bms_is_member(var->varattno - FirstLowInvalidHeapAttributeNumber, + rel->notnullattrs)) + multi_nullvals = true; + break; + } + } + if (!find_in_exprs) + return; + } + + if (exprs != NIL) + { + rel->uniquekeys = lappend(rel->uniquekeys, + makeUniqueKey(exprs, positions, multi_nullvals, false)); + } +} + + +/* + * add_uniquekey_for_onerow + * If we are sure about the relation only returns one row, then all the columns + * are unique. There is no need to create UniqueKey for every expr, we just set + * UniqueKey->onerow to true is OK + */ +void +add_uniquekey_for_onerow(RelOptInfo *rel) +{ + rel->uniquekeys = list_make1(makeUniqueKey(NIL, /* No need to set exprs */ + NIL, /* No need to set positions */ + false, /* onerow can't have multi_nullvals */ + true)); + +} + +/* + * initililze_uniquecontext_for_joinrel + * Return a List of UniqueKeyContext for an inputrel, we also filter out + * all the uniquekeys which are not possible to use later + */ +static List * +initililze_uniquecontext_for_joinrel(RelOptInfo *joinrel, RelOptInfo *inputrel) +{ + List *res = NIL; + ListCell *lc; + foreach(lc, inputrel->uniquekeys) + { + UniqueKeyContext context; + context = palloc(sizeof(struct UniqueKeyContextData)); + context->uniquekey = lfirst_node(UniqueKey, lc); + context->added_to_joinrel = false; + context->useful = true; + res = lappend(res, context); + } + return res; +} + +/* + * clause_sides_match_join + * Determine whether a join clause is of the right form to use in this join. + * + * We already know that the clause is a binary opclause referencing only the + * rels in the current join. The point here is to check whether it has the + * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr", + * rather than mixing outer and inner vars on either side. If it matches, + * we set the transient flag outer_is_left to identify which side is which. + */ +static bool +clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids, + Relids innerrelids) +{ + if (bms_is_subset(rinfo->left_relids, outerrelids) && + bms_is_subset(rinfo->right_relids, innerrelids)) + { + /* lefthand side is outer */ + rinfo->outer_is_left = true; + return true; + } + else if (bms_is_subset(rinfo->left_relids, innerrelids) && + bms_is_subset(rinfo->right_relids, outerrelids)) + { + /* righthand side is outer */ + rinfo->outer_is_left = false; + return true; + } + return false; /* no good for these input relations */ +} + + +/* + * Partitioned table Unique Keys. + * The partition table unique key is maintained as: + * 1. The index must be unique as usual. + * 2. The index must contains partition key. + * 3. The index must exist on all the child rel. see simple_indexinfo_equal for + * how we compare it. + */ + +/* index_constains_partkey + * return true if the index contains the partiton key. + */ +static bool +index_constains_partkey(RelOptInfo *partrel, IndexOptInfo *ind) +{ + ListCell *lc; + int i; + Assert(IS_PARTITIONED_REL(partrel)); + + for(i = 0; i < partrel->part_scheme->partnatts; i++) + { + Node *part_expr = linitial(partrel->partexprs[i]); + bool found_in_index = false; + foreach(lc, ind->indextlist) + { + Expr *index_expr = lfirst_node(TargetEntry, lc)->expr; + if (equal(index_expr, part_expr)) + { + found_in_index = true; + break; + } + } + if (!found_in_index) + return false; + } + return true; +} + +/* + * simple_indexinfo_equal + * + * Used to check if the 2 index is same as each other. The index here + * is COPIED from childrel and did some tiny changes(see simple_copy_indexinfo_to_parent) + */ + +static bool +simple_indexinfo_equal(IndexOptInfo *ind1, IndexOptInfo *ind2) +{ + Size oid_cmp_len = sizeof(Oid) * ind1->ncolumns; + return ind1->ncolumns == ind2->ncolumns && + ind1->unique == ind2->unique && + memcmp(ind1->indexkeys, ind2->indexkeys, sizeof(int) * ind1->ncolumns) == 0 && + memcmp(ind1->opfamily, ind2->opfamily, oid_cmp_len) == 0 && + memcmp(ind1->opcintype, ind2->opcintype, oid_cmp_len) == 0 && + memcmp(ind1->sortopfamily, ind2->sortopfamily, oid_cmp_len) == 0 && + equal(ind1->indextlist, ind2->indextlist); +} + +/* + * Copy these macros from copyfuncs.c since I don't want make + * simple_copy_indexinfo_to_parent public since it is a so customized copy. + */ + +#define COPY_POINTER_FIELD(fldname, sz) \ + do { \ + Size _size = (sz); \ + newnode->fldname = palloc(_size); \ + memcpy(newnode->fldname, from->fldname, _size); \ + } while (0) + +#define COPY_NODE_FIELD(fldname) \ + (newnode->fldname = copyObjectImpl(from->fldname)) + +#define COPY_SCALAR_FIELD(fldname) \ + (newnode->fldname = from->fldname) + + +/* + * simple_copy_indexinfo_to_parent + * Copy the IndexInfo from child index info to parent, which will be used to + * 1. Test if the same index exists in all the childrels. + * 2. if the parentrel->reltarget/basicrestrict info matches this index. + * The copied and modified index is just used in this scope. + */ +static IndexOptInfo * +simple_copy_indexinfo_to_parent(RelOptInfo *parentrel, + IndexOptInfo *from) +{ + IndexOptInfo *newnode = makeNode(IndexOptInfo); + + COPY_SCALAR_FIELD(ncolumns); + COPY_SCALAR_FIELD(nkeycolumns); + COPY_SCALAR_FIELD(unique); + COPY_SCALAR_FIELD(immediate); + /* We just need to know if it is NIL or not */ + COPY_SCALAR_FIELD(indpred); + COPY_SCALAR_FIELD(predOK); + COPY_POINTER_FIELD(indexkeys, from->ncolumns * sizeof(int)); + COPY_POINTER_FIELD(indexcollations, from->ncolumns * sizeof(Oid)); + COPY_POINTER_FIELD(opfamily, from->ncolumns * sizeof(Oid)); + COPY_POINTER_FIELD(opcintype, from->ncolumns * sizeof(Oid)); + COPY_POINTER_FIELD(sortopfamily, from->ncolumns * sizeof(Oid)); + COPY_NODE_FIELD(indextlist); + + /* + * We have to change this to let the later index match (like pk = 1) + * rel->reltarget work + */ + ChangeVarNodes((Node*) newnode->indextlist, + from->rel->relid, + parentrel->relid, 0); + newnode->rel = parentrel; + return newnode; +} + +/* + * adjust_partition_unique_indexlist + * + * Check the current known global_unique_indexes to see if every index here + * all exists in the given childrel, if not, it will be removed from + * the list + */ +static void +adjust_partition_unique_indexlist(RelOptInfo *parentrel, + RelOptInfo *childrel, + List **global_unique_indexes) +{ + ListCell *lc, *lc2; + foreach(lc, *global_unique_indexes) + { + IndexOptInfo *g_ind = lfirst_node(IndexOptInfo, lc); + bool found_in_child = false; + + foreach(lc2, childrel->indexlist) + { + IndexOptInfo *p_ind = lfirst_node(IndexOptInfo, lc2); + IndexOptInfo *p_ind_copy; + if (!p_ind->unique || !p_ind->immediate || + (p_ind->indpred != NIL && !p_ind->predOK)) + continue; + p_ind_copy = simple_copy_indexinfo_to_parent(parentrel, p_ind); + if (simple_indexinfo_equal(p_ind_copy, g_ind)) + { + found_in_child = true; + break; + } + } + + if (!found_in_child) + /* There is no same index on other childrel, remove it */ + *global_unique_indexes = foreach_delete_current(*global_unique_indexes, lc); + } +} + +/* Helper function for groupres/distinctrel */ +static void +add_uniquekey_from_sortgroups(PlannerInfo *root, RelOptInfo *rel, List *sortgroups) +{ + Query *parse = root->parse; + ListCell *lc; + List *exprs = NIL, *colnos = NIL; + + /* XXX: If there are some vars which is not in current levelsup, the semantic is + * imprecise, should we avoid it? levelsup = 1 is just a demo, maybe we need to + * check every level other than 0, if so, we need write another pull_var_walker. + */ + List *upper_vars = pull_vars_of_level((Node*)sortgroups, 1); + + if (upper_vars != NIL) + return; + + foreach(lc, sortgroups) + { + Index sortref = lfirst_node(SortGroupClause, lc)->tleSortGroupRef; + int c = 1; + foreach(lc, parse->targetList) + { + TargetEntry *tle = lfirst_node(TargetEntry, lc); + if (tle->ressortgroupref == sortref) + { + exprs = lappend(exprs, tle->expr); + colnos = lappend_int(colnos, c); + } + ++c; + } + } + rel->uniquekeys = lappend(rel->uniquekeys, + makeUniqueKey(exprs, + colnos, + false, /* sortgroupclause can't be multi_nullvals */ + relation_is_onerow(rel) /* should be always false */ + )); +} diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index d0ff660284..f8491e2bcf 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -439,6 +439,8 @@ remove_rel_from_query(PlannerInfo *root, int relid, Relids joinrelids) * There may be references to the rel in root->fkey_list, but if so, * match_foreign_keys_to_quals() will get rid of them. */ + + root->all_baserels = bms_del_member(root->all_baserels, relid); } /* @@ -574,49 +576,11 @@ reduce_unique_semijoins(PlannerInfo *root) * rel_supports_distinctness * Could the relation possibly be proven distinct on some set of columns? * - * This is effectively a pre-checking function for rel_is_distinct_for(). - * It must return true if rel_is_distinct_for() could possibly return true - * with this rel, but it should not expend a lot of cycles. The idea is - * that callers can avoid doing possibly-expensive processing to compute - * rel_is_distinct_for()'s argument lists if the call could not possibly - * succeed. */ static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel) { - /* We only know about baserels ... */ - if (rel->reloptkind != RELOPT_BASEREL) - return false; - if (rel->rtekind == RTE_RELATION) - { - /* - * For a plain relation, we only know how to prove uniqueness by - * reference to unique indexes. Make sure there's at least one - * suitable unique index. It must be immediately enforced, and if - * it's a partial index, it must match the query. (Keep these - * conditions in sync with relation_has_unique_index_for!) - */ - ListCell *lc; - - foreach(lc, rel->indexlist) - { - IndexOptInfo *ind = (IndexOptInfo *) lfirst(lc); - - if (ind->unique && ind->immediate && - (ind->indpred == NIL || ind->predOK)) - return true; - } - } - else if (rel->rtekind == RTE_SUBQUERY) - { - Query *subquery = root->simple_rte_array[rel->relid]->subquery; - - /* Check if the subquery has any qualities that support distinctness */ - if (query_supports_distinctness(subquery)) - return true; - } - /* We have no proof rules for any other rtekinds. */ - return false; + return rel->uniquekeys != NIL; } /* @@ -640,83 +604,33 @@ rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel) static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list) { - /* - * We could skip a couple of tests here if we assume all callers checked - * rel_supports_distinctness first, but it doesn't seem worth taking any - * risk for. - */ - if (rel->reloptkind != RELOPT_BASEREL) - return false; - if (rel->rtekind == RTE_RELATION) - { - /* - * Examine the indexes to see if we have a matching unique index. - * relation_has_unique_index_for automatically adds any usable - * restriction clauses for the rel, so we needn't do that here. - */ - if (relation_has_unique_index_for(root, rel, clause_list, NIL, NIL)) - return true; - } - else if (rel->rtekind == RTE_SUBQUERY) - { - Index relid = rel->relid; - Query *subquery = root->simple_rte_array[relid]->subquery; - List *colnos = NIL; - List *opids = NIL; - ListCell *l; - /* - * Build the argument lists for query_is_distinct_for: a list of - * output column numbers that the query needs to be distinct over, and - * a list of equality operators that the output columns need to be - * distinct according to. - * - * (XXX we are not considering restriction clauses attached to the - * subquery; is that worth doing?) - */ - foreach(l, clause_list) + ListCell *lc1, *lc2, *lc3; + foreach(lc1, rel->uniquekeys) + { + UniqueKey *uqk = lfirst_node(UniqueKey, lc1); + bool all_uqk_exprs_found = true; + foreach(lc2, uqk->exprs) { - RestrictInfo *rinfo = lfirst_node(RestrictInfo, l); - Oid op; - Var *var; - - /* - * Get the equality operator we need uniqueness according to. - * (This might be a cross-type operator and thus not exactly the - * same operator the subquery would consider; that's all right - * since query_is_distinct_for can resolve such cases.) The - * caller's mergejoinability test should have selected only - * OpExprs. - */ - op = castNode(OpExpr, rinfo->clause)->opno; - - /* caller identified the inner side for us */ - if (rinfo->outer_is_left) - var = (Var *) get_rightop(rinfo->clause); - else - var = (Var *) get_leftop(rinfo->clause); - - /* - * We may ignore any RelabelType node above the operand. (There - * won't be more than one, since eval_const_expressions() has been - * applied already.) - */ - if (var && IsA(var, RelabelType)) - var = (Var *) ((RelabelType *) var)->arg; - - /* - * If inner side isn't a Var referencing a subquery output column, - * this clause doesn't help us. - */ - if (!var || !IsA(var, Var) || - var->varno != relid || var->varlevelsup != 0) - continue; - - colnos = lappend_int(colnos, var->varattno); - opids = lappend_oid(opids, op); + Node *uq_expr = lfirst(lc2); + bool find_uq_exprs_in_clause_list = false; + foreach(lc3, clause_list) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc3); + Node *clause_expr = rinfo->outer_is_left ? get_rightop(rinfo->clause): get_leftop(rinfo->clause) ; + if (equal(uq_expr, clause_expr)) + { + find_uq_exprs_in_clause_list = true; + break; + } + } + if (!find_uq_exprs_in_clause_list) + { + all_uqk_exprs_found = false; + break; + } } - - if (query_is_distinct_for(subquery, colnos, opids)) + if (all_uqk_exprs_found) return true; } return false; @@ -972,6 +886,9 @@ innerrel_is_unique(PlannerInfo *root, MemoryContext old_context; ListCell *lc; + if (relation_is_onerow(innerrel)) + return true; + /* Certainly can't prove uniqueness when there are no joinclauses */ if (restrictlist == NIL) return false; diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index e978b491f6..ca6303915c 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -830,6 +830,16 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join, { Node *qual = (Node *) lfirst(l); + /* set the not null info now */ + ListCell *lc; + List *non_nullable_vars = find_nonnullable_vars(qual); + foreach(lc, non_nullable_vars) + { + Var *var = lfirst_node(Var, lc); + RelOptInfo *rel = root->simple_rel_array[var->varno]; + rel->notnullattrs = bms_add_member(rel->notnullattrs, + var->varattno - FirstLowInvalidHeapAttributeNumber); + } distribute_qual_to_rels(root, qual, false, below_outer_join, JOIN_INNER, root->qual_security_level, diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index 62dfc6d44a..6ad73cb57b 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -213,19 +213,6 @@ query_planner(PlannerInfo *root, */ fix_placeholder_input_needed_levels(root); - /* - * Remove any useless outer joins. Ideally this would be done during - * jointree preprocessing, but the necessary information isn't available - * until we've built baserel data structures and classified qual clauses. - */ - joinlist = remove_useless_joins(root, joinlist); - - /* - * Also, reduce any semijoins with unique inner rels to plain inner joins. - * Likewise, this can't be done until now for lack of needed info. - */ - reduce_unique_semijoins(root); - /* * Now distribute "placeholders" to base rels as needed. This has to be * done after join removal because removal could change whether a diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index b65abf6046..4fff019b52 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -2386,6 +2386,8 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, add_path(final_rel, path); } + simple_copy_uniquekeys(current_rel, final_rel); + /* * Generate partial paths for final_rel, too, if outer query levels might * be able to make use of them. @@ -3810,6 +3812,23 @@ create_grouping_paths(PlannerInfo *root, Query *parse = root->parse; RelOptInfo *grouped_rel; RelOptInfo *partially_grouped_rel; + List *groupExprs = NIL; + + if (root->parse->groupingSets == NIL) + { + groupExprs = get_sortgrouplist_exprs(parse->groupClause, + parse->targetList); + /* + * If the groupby clauses is unique already, groupping node is not necessary + * if there is no aggreation functions + */ + if (groupExprs != NIL && + !parse->hasAggs && + !parse->hasWindowFuncs && + parse->havingQual == NULL && + relation_has_uniquekeys_for(root, input_rel, groupExprs)) + return input_rel; + } /* * Create grouping relation to hold fully aggregated grouping and/or @@ -3898,6 +3917,8 @@ create_grouping_paths(PlannerInfo *root, } set_cheapest(grouped_rel); + + populate_grouprel_uniquekeys(root, grouped_rel); return grouped_rel; } @@ -4615,7 +4636,7 @@ create_window_paths(PlannerInfo *root, /* Now choose the best path(s) */ set_cheapest(window_rel); - + simple_copy_uniquekeys(input_rel, window_rel); return window_rel; } @@ -4734,6 +4755,12 @@ create_distinct_paths(PlannerInfo *root, bool allow_hash; Path *path; ListCell *lc; + List *distinctExprs = get_sortgrouplist_exprs(parse->distinctClause, + parse->targetList); + + /* If the result is unique already, we just return the input_rel directly */ + if (relation_has_uniquekeys_for(root, input_rel, distinctExprs)) + return input_rel; /* For now, do all work in the (DISTINCT, NULL) upperrel */ distinct_rel = fetch_upper_rel(root, UPPERREL_DISTINCT, NULL); @@ -4771,10 +4798,6 @@ create_distinct_paths(PlannerInfo *root, /* * Otherwise, the UNIQUE filter has effects comparable to GROUP BY. */ - List *distinctExprs; - - distinctExprs = get_sortgrouplist_exprs(parse->distinctClause, - parse->targetList); numDistinctRows = estimate_num_groups(root, distinctExprs, cheapest_input_path->rows, NULL); @@ -4912,7 +4935,7 @@ create_distinct_paths(PlannerInfo *root, /* Now choose the best path(s) */ set_cheapest(distinct_rel); - + populate_distinctrel_uniquekeys(root, input_rel, distinct_rel); return distinct_rel; } @@ -5060,6 +5083,8 @@ create_ordered_paths(PlannerInfo *root, */ Assert(ordered_rel->pathlist != NIL); + simple_copy_uniquekeys(input_rel, ordered_rel); + return ordered_rel; } diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index 951aed80e7..8aa4d24cb0 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -689,6 +689,8 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root, /* Undo effects of possibly forcing tuple_fraction to 0 */ root->tuple_fraction = save_fraction; + /* Add the UniqueKeys */ + populate_unionrel_uniquiekeys(root, result_rel); return result_rel; } diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index d82fc5ab8b..2373e39acf 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -117,6 +117,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, Relation relation; bool hasindex; List *indexinfos = NIL; + int i; /* * We need not lock the relation since it was already locked, either by @@ -460,6 +461,15 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, if (inhparent && relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) set_relation_partition_info(root, rel, relation); + Assert(rel->notnullattrs == NULL); + for(i = 0; i < relation->rd_att->natts; i++) + { + FormData_pg_attribute attr = relation->rd_att->attrs[i]; + if (attr.attnotnull) + rel->notnullattrs = bms_add_member(rel->notnullattrs, + attr.attnum - FirstLowInvalidHeapAttributeNumber); + } + table_close(relation, NoLock); /* diff --git a/src/include/nodes/makefuncs.h b/src/include/nodes/makefuncs.h index 31d9aedeeb..fd050249d2 100644 --- a/src/include/nodes/makefuncs.h +++ b/src/include/nodes/makefuncs.h @@ -16,6 +16,7 @@ #include "nodes/execnodes.h" #include "nodes/parsenodes.h" +#include "nodes/pathnodes.h" extern A_Expr *makeA_Expr(A_Expr_Kind kind, List *name, @@ -105,4 +106,6 @@ extern GroupingSet *makeGroupingSet(GroupingSetKind kind, List *content, int loc extern VacuumRelation *makeVacuumRelation(RangeVar *relation, Oid oid, List *va_cols); +extern UniqueKey* makeUniqueKey(List *exprs, List *positions, + bool multi_nullvals, bool onerow); #endif /* MAKEFUNC_H */ diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 8a76afe8cc..679cc4cc9c 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -261,6 +261,7 @@ typedef enum NodeTag T_EquivalenceMember, T_PathKey, T_PathTarget, + T_UniqueKey, T_RestrictInfo, T_IndexClause, T_PlaceHolderVar, diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 0ceb809644..d777d896d2 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -687,6 +687,8 @@ typedef struct RelOptInfo PlannerInfo *subroot; /* if subquery */ List *subplan_params; /* if subquery */ int rel_parallel_workers; /* wanted number of parallel workers */ + /* Not null attrs, start from -FirstLowInvalidHeapAttributeNumber */ + Bitmapset *notnullattrs; /* Information about foreign tables and foreign joins */ Oid serverid; /* identifies server for the table or join */ @@ -706,6 +708,7 @@ typedef struct RelOptInfo QualCost baserestrictcost; /* cost of evaluating the above */ Index baserestrict_min_security; /* min security_level found in * baserestrictinfo */ + List *uniquekeys; /* List of UniqueKey */ List *joininfo; /* RestrictInfo structures for join clauses * involving this rel */ bool has_eclass_joins; /* T means joininfo is incomplete */ @@ -1017,6 +1020,35 @@ typedef struct PathKey } PathKey; +/* + * UniqueKey + * + * Represents the unique properties held by a RelOptInfo. + * + * exprs is a list of exprs which is unique on current RelOptInfo. + * positions is a list of position where the corresponding exprs's location in + * current reloptinfo->reltarget. It will be used when we translate the UniqueKey + * in subquery. + * multi_nullvals: true means multi null values may exists in these exprs, so the + * uniqueness is not guaranteed in this case. This field is necessary for + * remove_useless_join & reduce_unique_semijoins where we don't mind these + * duplicated NULL values. It is set to true for 2 cases. One is a unique key + * from a unique index but the related column is nullable. The other one is for + * outer join. see populate_joinrel_uniquekeys for detail. + * onerow means the related relation return 1 row only. Like filter with unique + * index, aggregate without group node, join 2 1-row relations. An optimization + * is if the onerow is set to true, we will set not record every expr as a UniqueKey, + * we store exprs as a NIL. + */ +typedef struct UniqueKey +{ + NodeTag type; + List *exprs; + List *positions; + bool multi_nullvals; + bool onerow; +} UniqueKey; + /* * PathTarget * diff --git a/src/include/nodes/pg_list.h b/src/include/nodes/pg_list.h index 14ea2766ad..621f54a9f8 100644 --- a/src/include/nodes/pg_list.h +++ b/src/include/nodes/pg_list.h @@ -528,6 +528,8 @@ extern bool list_member_ptr(const List *list, const void *datum); extern bool list_member_int(const List *list, int datum); extern bool list_member_oid(const List *list, Oid datum); +extern bool list_is_subset(const List *members, const List *target); + extern List *list_delete(List *list, void *datum); extern List *list_delete_ptr(List *list, void *datum); extern List *list_delete_int(List *list, int datum); diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 9ab73bd20c..534cf04616 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -240,4 +240,38 @@ extern PathKey *make_canonical_pathkey(PlannerInfo *root, extern void add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, List *live_childrels); +/* + * uniquekeys.c + * Utilities for matching and building unique keys + */ +extern void populate_baserel_uniquekeys(PlannerInfo *root, + RelOptInfo *baserel, + List* unique_index_list); +extern void populate_partitionedrel_uniquekeys(PlannerInfo *root, + RelOptInfo *rel, + List *childrels); +extern void populate_distinctrel_uniquekeys(PlannerInfo *root, + RelOptInfo *inputrel, + RelOptInfo *distinctrel); +extern void populate_grouprel_uniquekeys(PlannerInfo *root, + RelOptInfo *grouprel); +extern void populate_unionrel_uniquiekeys(PlannerInfo *root, + RelOptInfo *unionrel); +extern void simple_copy_uniquekeys(RelOptInfo *oldrel, + RelOptInfo *newrel); +extern void convert_subquery_uniquekeys(PlannerInfo *root, + RelOptInfo *currel, + RelOptInfo *sub_final_rel); +extern void populate_joinrel_uniquekeys(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *rel1, + RelOptInfo *rel2, + List *restrictlist, + JoinType jointype); + +extern bool relation_has_uniquekeys_for(PlannerInfo *root, + RelOptInfo *rel, + List *exprs); +extern bool relation_is_onerow(RelOptInfo *rel); + #endif /* PATHS_H */ diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index 14cdcfcca6..42bd180895 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -870,14 +870,12 @@ explain (costs off) select distinct max(unique2) from tenk1; QUERY PLAN --------------------------------------------------------------------- - HashAggregate - Group Key: $0 + Result InitPlan 1 (returns $0) -> Limit -> Index Only Scan Backward using tenk1_unique2 on tenk1 Index Cond: (unique2 IS NOT NULL) - -> Result -(7 rows) +(5 rows) select distinct max(unique2) from tenk1; max @@ -1036,7 +1034,7 @@ explain (costs off) select distinct min(f1), max(f1) from minmaxtest; QUERY PLAN --------------------------------------------------------------------------------------------- - Unique + Result InitPlan 1 (returns $0) -> Limit -> Merge Append @@ -1059,10 +1057,7 @@ explain (costs off) -> Index Only Scan using minmaxtest2i on minmaxtest2 minmaxtest_8 Index Cond: (f1 IS NOT NULL) -> Index Only Scan Backward using minmaxtest3i on minmaxtest3 minmaxtest_9 - -> Sort - Sort Key: ($0), ($1) - -> Result -(26 rows) +(23 rows) select distinct min(f1), max(f1) from minmaxtest; min | max @@ -1092,12 +1087,10 @@ create temp table t2 (x int, y int, z int, primary key (x, y)); create temp table t3 (a int, b int, c int, primary key(a, b) deferrable); -- Non-primary-key columns can be removed from GROUP BY explain (costs off) select * from t1 group by a,b,c,d; - QUERY PLAN ----------------------- - HashAggregate - Group Key: a, b - -> Seq Scan on t1 -(3 rows) + QUERY PLAN +---------------- + Seq Scan on t1 +(1 row) -- No removal can happen if the complete PK is not present in GROUP BY explain (costs off) select a,c from t1 group by a,c,d; @@ -1109,29 +1102,27 @@ explain (costs off) select a,c from t1 group by a,c,d; (3 rows) -- Test removal across multiple relations -explain (costs off) select * +explain (costs off) select t2.* from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y -group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.y,t2.z; - QUERY PLAN ------------------------------------------------------- - HashAggregate - Group Key: t1.a, t1.b, t2.x, t2.y - -> Hash Join - Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b)) - -> Seq Scan on t2 - -> Hash - -> Seq Scan on t1 -(7 rows) +group by t1.a,t1.c,t1.d,t2.x,t2.y,t2.z; + QUERY PLAN +------------------------------------------------ + Hash Join + Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b)) + -> Seq Scan on t2 + -> Hash + -> Seq Scan on t1 +(5 rows) -- Test case where t1 can be optimized but not t2 explain (costs off) select t1.*,t2.x,t2.z -from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y +from t1 right join t2 on t1.a = t2.x and t1.b = t2.y group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.z; QUERY PLAN ------------------------------------------------------ HashAggregate Group Key: t1.a, t1.b, t2.x, t2.z - -> Hash Join + -> Hash Left Join Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b)) -> Seq Scan on t2 -> Hash @@ -1161,12 +1152,10 @@ explain (costs off) select * from t1 group by a,b,c,d; -- Okay to remove columns if we're only querying the parent. explain (costs off) select * from only t1 group by a,b,c,d; - QUERY PLAN ----------------------- - HashAggregate - Group Key: a, b - -> Seq Scan on t1 -(3 rows) + QUERY PLAN +---------------- + Seq Scan on t1 +(1 row) create temp table p_t1 ( a int, @@ -1179,14 +1168,12 @@ create temp table p_t1_1 partition of p_t1 for values in(1); create temp table p_t1_2 partition of p_t1 for values in(2); -- Ensure we can remove non-PK columns for partitioned tables. explain (costs off) select * from p_t1 group by a,b,c,d; - QUERY PLAN --------------------------------- - HashAggregate - Group Key: p_t1.a, p_t1.b - -> Append - -> Seq Scan on p_t1_1 - -> Seq Scan on p_t1_2 -(5 rows) + QUERY PLAN +-------------------------- + Append + -> Seq Scan on p_t1_1 + -> Seq Scan on p_t1_2 +(3 rows) drop table t1 cascade; NOTICE: drop cascades to table t1c diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 761376b007..54d987405a 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -4349,11 +4349,11 @@ reset enable_nestloop; -- begin; CREATE TEMP TABLE a (id int PRIMARY KEY, b_id int); -CREATE TEMP TABLE b (id int PRIMARY KEY, c_id int); +CREATE TEMP TABLE b (id int PRIMARY KEY, c_id int, d int); CREATE TEMP TABLE c (id int PRIMARY KEY); CREATE TEMP TABLE d (a int, b int); INSERT INTO a VALUES (0, 0), (1, NULL); -INSERT INTO b VALUES (0, 0), (1, NULL); +INSERT INTO b VALUES (0, 0, 1), (1, NULL, 1); INSERT INTO c VALUES (0), (1); INSERT INTO d VALUES (1,3), (2,2), (3,1); -- all three cases should be optimizable into a simple seqscan @@ -4415,36 +4415,35 @@ select d.* from d left join (select distinct * from b) s -- primary key and so drop b.c_id from the GROUP BY of the resulting plan; -- but this happens too late for join removal in the outer plan level.) explain (costs off) -select d.* from d left join (select * from b group by b.id, b.c_id) s - on d.a = s.id; - QUERY PLAN ------------------------------------------- - Merge Right Join - Merge Cond: (b.id = d.a) - -> Group - Group Key: b.id - -> Index Scan using b_pkey on b - -> Sort - Sort Key: d.a - -> Seq Scan on d +select d.* from d left join (select d, c_id from b group by b.d, b.c_id) s + on d.a = s.d; + QUERY PLAN +-------------------------------------------- + Hash Left Join + Hash Cond: (d.a = s.d) + -> Seq Scan on d + -> Hash + -> Subquery Scan on s + -> HashAggregate + Group Key: b.d, b.c_id + -> Seq Scan on b (8 rows) -- similarly, but keying off a DISTINCT clause explain (costs off) -select d.* from d left join (select distinct * from b) s - on d.a = s.id; - QUERY PLAN --------------------------------------- - Merge Right Join - Merge Cond: (b.id = d.a) - -> Unique - -> Sort - Sort Key: b.id, b.c_id - -> Seq Scan on b - -> Sort - Sort Key: d.a - -> Seq Scan on d -(9 rows) +select d.* from d left join (select distinct c_id, d from b) s + on d.a = s.d; + QUERY PLAN +-------------------------------------------- + Hash Left Join + Hash Cond: (d.a = s.d) + -> Seq Scan on d + -> Hash + -> Subquery Scan on s + -> HashAggregate + Group Key: b.c_id, b.d + -> Seq Scan on b +(8 rows) -- check join removal works when uniqueness of the join condition is enforced -- by a UNION diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out index 11c6f50fbf..227bc27af4 100644 --- a/src/test/regress/expected/select_distinct.out +++ b/src/test/regress/expected/select_distinct.out @@ -306,3 +306,338 @@ SELECT null IS NOT DISTINCT FROM null as "yes"; t (1 row) +CREATE TABLE uqk1(a int, pk int primary key, c int, d int); +CREATE TABLE uqk2(a int, pk int primary key, c int, d int); +INSERT INTO uqk1 VALUES(1, 1, 1, 1), (2, 2, 2, 2), (3, 3, 3, 3); +INSERT INTO uqk2 VALUES(1, 1, 1, 1), (4, 4, 4, 4), (5, 5, 5, 5); +ANALYZE uqk1; +ANALYZE uqk2; +-- Test single table +EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM uqk1; + QUERY PLAN +------------------ + Seq Scan on uqk1 +(1 row) + +EXPLAIN (COSTS OFF) SELECT DISTINCT c, d FROM uqk1; + QUERY PLAN +------------------------------ + Unique + -> Sort + Sort Key: c, d + -> Seq Scan on uqk1 +(4 rows) + +CREATE UNIQUE INDEX uqk1_ukcd ON uqk1(c, d); +EXPLAIN (COSTS OFF) SELECT DISTINCT c, d FROM uqk1; + QUERY PLAN +------------------------------ + Unique + -> Sort + Sort Key: c, d + -> Seq Scan on uqk1 +(4 rows) + +EXPLAIN (COSTS OFF) SELECT DISTINCT c, d FROM uqk1 WHERE c is NOT NULL; + QUERY PLAN +--------------------------------------- + Unique + -> Sort + Sort Key: c, d + -> Seq Scan on uqk1 + Filter: (c IS NOT NULL) +(5 rows) + +ALTER TABLE uqk1 ALTER COLUMN d SET NOT NULL; +EXPLAIN (COSTS OFF) SELECT DISTINCT c, d FROM uqk1 WHERE c is NOT NULL; + QUERY PLAN +--------------------------- + Seq Scan on uqk1 + Filter: (c IS NOT NULL) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT DISTINCT d FROM uqk1 WHERE c is NOT NULL; + QUERY PLAN +--------------------------------- + HashAggregate + Group Key: d + -> Seq Scan on uqk1 + Filter: (c IS NOT NULL) +(4 rows) + +EXPLAIN (COSTS OFF) SELECT DISTINCT a FROM uqk1 WHERE pk = 1; + QUERY PLAN +-------------------- + Seq Scan on uqk1 + Filter: (pk = 1) +(2 rows) + +-- Test join +-- both uqk1 (c, d) and uqk2(pk) are unique key, so distinct is not needed. +EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.c, uqk1.d FROM uqk1, uqk2 +WHERE uqk1.c is NOT NULL AND uqk1.a = uqk2.pk; + QUERY PLAN +--------------------------------- + Hash Join + Hash Cond: (uqk1.a = uqk2.pk) + -> Seq Scan on uqk1 + Filter: (c IS NOT NULL) + -> Hash + -> Seq Scan on uqk2 +(6 rows) + +-- Distinct is needed since the outer join +EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.pk FROM uqk1 RIGHT JOIN uqk2 ON (uqk1.a = uqk2.pk) +order BY 1; + QUERY PLAN +--------------------------------------------- + Unique + -> Sort + Sort Key: uqk1.pk + -> Hash Right Join + Hash Cond: (uqk1.a = uqk2.pk) + -> Seq Scan on uqk1 + -> Hash + -> Seq Scan on uqk2 +(8 rows) + +SELECT DISTINCT uqk1.pk FROM uqk1 RIGHT JOIN uqk2 ON (uqk1.a = uqk2.pk) order BY 1; + pk +---- + 1 + +(2 rows) + +-- Distinct is not needed since uqk1 is the left table in outer join +EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.c, uqk1.d FROM uqk1 LEFT JOIN uqk2 ON (uqk1.a = uqk2.pk) +WHERE uqk1.c is NOT NULL order BY 1, 2; + QUERY PLAN +--------------------------------- + Sort + Sort Key: uqk1.c, uqk1.d + -> Seq Scan on uqk1 + Filter: (c IS NOT NULL) +(4 rows) + +SELECT DISTINCT uqk1.c, uqk1.d FROM uqk1 LEFT JOIN uqk2 ON (uqk1.a = uqk2.pk) +WHERE uqk1.c is NOT NULL order BY 1, 2; + c | d +---+--- + 1 | 1 + 2 | 2 + 3 | 3 +(3 rows) + +-- Distinct is ok even with NOT clause-list both UNIQUE keys shown in targetlist +EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM uqk1, uqk2; + QUERY PLAN +------------------------------ + Nested Loop + -> Seq Scan on uqk1 + -> Materialize + -> Seq Scan on uqk2 +(4 rows) + +SELECT DISTINCT * FROM uqk1, uqk2 order BY 1, 2, 3, 4, 5, 6; + a | pk | c | d | a | pk | c | d +---+----+---+---+---+----+---+--- + 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 + 1 | 1 | 1 | 1 | 4 | 4 | 4 | 4 + 1 | 1 | 1 | 1 | 5 | 5 | 5 | 5 + 2 | 2 | 2 | 2 | 1 | 1 | 1 | 1 + 2 | 2 | 2 | 2 | 4 | 4 | 4 | 4 + 2 | 2 | 2 | 2 | 5 | 5 | 5 | 5 + 3 | 3 | 3 | 3 | 1 | 1 | 1 | 1 + 3 | 3 | 3 | 3 | 4 | 4 | 4 | 4 + 3 | 3 | 3 | 3 | 5 | 5 | 5 | 5 +(9 rows) + +-- Test Semi/Anti JOIN +EXPLAIN (COSTS OFF) SELECT DISTINCT pk FROM uqk1 WHERE d in (SELECT d FROM uqk2); + QUERY PLAN +-------------------------------- + Hash Semi Join + Hash Cond: (uqk1.d = uqk2.d) + -> Seq Scan on uqk1 + -> Hash + -> Seq Scan on uqk2 +(5 rows) + +EXPLAIN (COSTS OFF) SELECT DISTINCT pk FROM uqk1 WHERE d NOT in (SELECT d FROM uqk2); + QUERY PLAN +------------------------------------ + Seq Scan on uqk1 + Filter: (NOT (hashed SubPlan 1)) + SubPlan 1 + -> Seq Scan on uqk2 +(4 rows) + +-- Test Unique Key FOR one-row case, DISTINCT is NOT needed as well. +-- uqk1.d is the a uniquekey due to onerow rule. uqk2.pk is pk +EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.d FROM uqk1, uqk2 WHERE uqk1.pk = 2 AND uqk1.d = uqk2.pk; + QUERY PLAN +----------------------------------- + Nested Loop + Join Filter: (uqk1.d = uqk2.pk) + -> Seq Scan on uqk1 + Filter: (pk = 2) + -> Seq Scan on uqk2 +(5 rows) + +SELECT DISTINCT uqk1.d FROM uqk1, uqk2 WHERE uqk1.pk = 2 AND uqk1.d = uqk2.pk order BY 1; + d +--- +(0 rows) + +-- Both uqk1.d AND uqk2.c are the a uniquekey due to onerow rule +EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.d FROM uqk1, uqk2 WHERE uqk1.pk = 2 +AND uqk2.pk = 1 AND uqk1.d = uqk2.d ; + QUERY PLAN +---------------------------------- + Nested Loop + Join Filter: (uqk1.d = uqk2.d) + -> Seq Scan on uqk1 + Filter: (pk = 2) + -> Seq Scan on uqk2 + Filter: (pk = 1) +(6 rows) + +SELECT DISTINCT uqk1.d FROM uqk1, uqk2 WHERE uqk1.pk = 2 AND uqk2.pk = 1 +AND uqk1.d = uqk2.d order BY 1; + d +--- +(0 rows) + +-- Both UniqueKey in targetList +EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.c, uqk2.c FROM uqk1, uqk2 WHERE uqk1.pk = 2 AND uqk2.pk = 1; + QUERY PLAN +-------------------------- + Nested Loop + -> Seq Scan on uqk1 + Filter: (pk = 2) + -> Seq Scan on uqk2 + Filter: (pk = 1) +(5 rows) + +SELECT DISTINCT uqk1.c, uqk2.c FROM uqk1, uqk2 WHERE uqk1.pk = 2 AND uqk2.pk = 1 order BY 1, 2; + c | c +---+--- + 2 | 1 +(1 row) + +-- Test SubQuery +-- t2(a, c) is UNIQUE key because OF group BY +EXPLAIN (COSTS OFF) SELECT DISTINCT t2.a, t2.c FROM uqk1 t1 inner JOIN +(SELECT a, c, sum(pk) as t2b FROM uqk2 group BY a, c) t2 +ON (t2.t2b = t1.pk); + QUERY PLAN +--------------------------------------- + Hash Join + Hash Cond: ((sum(uqk2.pk)) = t1.pk) + -> HashAggregate + Group Key: uqk2.a, uqk2.c + -> Seq Scan on uqk2 + -> Hash + -> Seq Scan on uqk1 t1 +(7 rows) + +-- Test Partition TABLE +-- Test partitioned TABLE +CREATE TABLE dist_p (a int, b int NOT NULL, c int NOT NULL, d int) PARTITION BY RANGE (b); +CREATE TABLE dist_p0 PARTITION OF dist_p FOR VALUES FROM (1) to (10); +CREATE TABLE dist_p1 PARTITION OF dist_p FOR VALUES FROM (11) to (20); +-- CREATE unqiue INDEX ON dist_p +CREATE UNIQUE INDEX dist_p_uk_b_c ON dist_p(b, c); +EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM dist_p; + QUERY PLAN +------------------------------------ + Append + -> Seq Scan on dist_p0 dist_p_1 + -> Seq Scan on dist_p1 dist_p_2 +(3 rows) + +DROP INDEX dist_p_uk_b_c; +-- we also support CREATE unqiue INDEX ON each child tables +CREATE UNIQUE INDEX dist_p0_uk_bc ON dist_p0(b, c); +-- NOT ok, since dist_p1 have no such INDEX +EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM dist_p; + QUERY PLAN +----------------------------------------------------- + HashAggregate + Group Key: dist_p.a, dist_p.b, dist_p.c, dist_p.d + -> Append + -> Seq Scan on dist_p0 dist_p_1 + -> Seq Scan on dist_p1 dist_p_2 +(5 rows) + +CREATE UNIQUE INDEX dist_p1_uk_bc ON dist_p1(b, c); +-- OK now +EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM dist_p; + QUERY PLAN +------------------------------------ + Append + -> Seq Scan on dist_p0 dist_p_1 + -> Seq Scan on dist_p1 dist_p_2 +(3 rows) + +DROP INDEX dist_p0_uk_bc; +DROP INDEX dist_p1_uk_bc; +-- uk is same ON all child tables, however it doesn't include the partkey, so NOT ok as well. +CREATE UNIQUE INDEX dist_p0_uk_c ON dist_p0(c); +CREATE UNIQUE INDEX dist_p1_uk_c ON dist_p1(c); +EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM dist_p; + QUERY PLAN +----------------------------------------------------- + HashAggregate + Group Key: dist_p.a, dist_p.b, dist_p.c, dist_p.d + -> Append + -> Seq Scan on dist_p0 dist_p_1 + -> Seq Scan on dist_p1 dist_p_2 +(5 rows) + +DROP TABLE dist_p; +-- Test some VIEW +CREATE VIEW distinct_v1 as SELECT DISTINCT c, d FROM uqk1 WHERE c is NOT NULL; +EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM distinct_v1; + QUERY PLAN +--------------------------- + Seq Scan on uqk1 + Filter: (c IS NOT NULL) +(2 rows) + +ALTER TABLE uqk1 ALTER COLUMN d DROP NOT NULL; +EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM distinct_v1; + QUERY PLAN +--------------------------------------- + Unique + -> Sort + Sort Key: uqk1.c, uqk1.d + -> Seq Scan on uqk1 + Filter: (c IS NOT NULL) +(5 rows) + +-- Test generic plan +ALTER TABLE uqk1 ALTER COLUMN d SET NOT NULL; +prepare pt as SELECT * FROM distinct_v1; +EXPLAIN (COSTS OFF) execute pt; + QUERY PLAN +--------------------------- + Seq Scan on uqk1 + Filter: (c IS NOT NULL) +(2 rows) + +ALTER TABLE uqk1 ALTER COLUMN d DROP NOT NULL; +EXPLAIN (COSTS OFF) execute pt; + QUERY PLAN +--------------------------------------- + Unique + -> Sort + Sort Key: uqk1.c, uqk1.d + -> Seq Scan on uqk1 + Filter: (c IS NOT NULL) +(5 rows) + +DEALLOCATE pt; +DROP VIEW distinct_v1; +DROP TABLE uqk1; +DROP TABLE uqk2; diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql index 9480abd577..3446c3e9fd 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -394,13 +394,13 @@ explain (costs off) select * from t1 group by a,b,c,d; explain (costs off) select a,c from t1 group by a,c,d; -- Test removal across multiple relations -explain (costs off) select * +explain (costs off) select t2.* from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y -group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.y,t2.z; +group by t1.a,t1.c,t1.d,t2.x,t2.y,t2.z; -- Test case where t1 can be optimized but not t2 explain (costs off) select t1.*,t2.x,t2.z -from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y +from t1 right join t2 on t1.a = t2.x and t1.b = t2.y group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.z; -- Cannot optimize when PK is deferrable diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 5fc6617369..d52a6052de 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -1479,11 +1479,11 @@ reset enable_nestloop; begin; CREATE TEMP TABLE a (id int PRIMARY KEY, b_id int); -CREATE TEMP TABLE b (id int PRIMARY KEY, c_id int); +CREATE TEMP TABLE b (id int PRIMARY KEY, c_id int, d int); CREATE TEMP TABLE c (id int PRIMARY KEY); CREATE TEMP TABLE d (a int, b int); INSERT INTO a VALUES (0, 0), (1, NULL); -INSERT INTO b VALUES (0, 0), (1, NULL); +INSERT INTO b VALUES (0, 0, 1), (1, NULL, 1); INSERT INTO c VALUES (0), (1); INSERT INTO d VALUES (1,3), (2,2), (3,1); @@ -1516,13 +1516,13 @@ select d.* from d left join (select distinct * from b) s -- primary key and so drop b.c_id from the GROUP BY of the resulting plan; -- but this happens too late for join removal in the outer plan level.) explain (costs off) -select d.* from d left join (select * from b group by b.id, b.c_id) s - on d.a = s.id; +select d.* from d left join (select d, c_id from b group by b.d, b.c_id) s + on d.a = s.d; -- similarly, but keying off a DISTINCT clause explain (costs off) -select d.* from d left join (select distinct * from b) s - on d.a = s.id; +select d.* from d left join (select distinct c_id, d from b) s + on d.a = s.d; -- check join removal works when uniqueness of the join condition is enforced -- by a UNION diff --git a/src/test/regress/sql/select_distinct.sql b/src/test/regress/sql/select_distinct.sql index 33102744eb..72716217c0 100644 --- a/src/test/regress/sql/select_distinct.sql +++ b/src/test/regress/sql/select_distinct.sql @@ -135,3 +135,121 @@ SELECT 1 IS NOT DISTINCT FROM 2 as "no"; SELECT 2 IS NOT DISTINCT FROM 2 as "yes"; SELECT 2 IS NOT DISTINCT FROM null as "no"; SELECT null IS NOT DISTINCT FROM null as "yes"; + + +CREATE TABLE uqk1(a int, pk int primary key, c int, d int); +CREATE TABLE uqk2(a int, pk int primary key, c int, d int); +INSERT INTO uqk1 VALUES(1, 1, 1, 1), (2, 2, 2, 2), (3, 3, 3, 3); +INSERT INTO uqk2 VALUES(1, 1, 1, 1), (4, 4, 4, 4), (5, 5, 5, 5); +ANALYZE uqk1; +ANALYZE uqk2; + +-- Test single table +EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM uqk1; +EXPLAIN (COSTS OFF) SELECT DISTINCT c, d FROM uqk1; + +CREATE UNIQUE INDEX uqk1_ukcd ON uqk1(c, d); + +EXPLAIN (COSTS OFF) SELECT DISTINCT c, d FROM uqk1; +EXPLAIN (COSTS OFF) SELECT DISTINCT c, d FROM uqk1 WHERE c is NOT NULL; +ALTER TABLE uqk1 ALTER COLUMN d SET NOT NULL; +EXPLAIN (COSTS OFF) SELECT DISTINCT c, d FROM uqk1 WHERE c is NOT NULL; +EXPLAIN (COSTS OFF) SELECT DISTINCT d FROM uqk1 WHERE c is NOT NULL; +EXPLAIN (COSTS OFF) SELECT DISTINCT a FROM uqk1 WHERE pk = 1; + + +-- Test join +-- both uqk1 (c, d) and uqk2(pk) are unique key, so distinct is not needed. + +EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.c, uqk1.d FROM uqk1, uqk2 +WHERE uqk1.c is NOT NULL AND uqk1.a = uqk2.pk; + +-- Distinct is needed since the outer join +EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.pk FROM uqk1 RIGHT JOIN uqk2 ON (uqk1.a = uqk2.pk) +order BY 1; + +SELECT DISTINCT uqk1.pk FROM uqk1 RIGHT JOIN uqk2 ON (uqk1.a = uqk2.pk) order BY 1; + +-- Distinct is not needed since uqk1 is the left table in outer join +EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.c, uqk1.d FROM uqk1 LEFT JOIN uqk2 ON (uqk1.a = uqk2.pk) +WHERE uqk1.c is NOT NULL order BY 1, 2; + +SELECT DISTINCT uqk1.c, uqk1.d FROM uqk1 LEFT JOIN uqk2 ON (uqk1.a = uqk2.pk) +WHERE uqk1.c is NOT NULL order BY 1, 2; + +-- Distinct is ok even with NOT clause-list both UNIQUE keys shown in targetlist +EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM uqk1, uqk2; +SELECT DISTINCT * FROM uqk1, uqk2 order BY 1, 2, 3, 4, 5, 6; + +-- Test Semi/Anti JOIN +EXPLAIN (COSTS OFF) SELECT DISTINCT pk FROM uqk1 WHERE d in (SELECT d FROM uqk2); +EXPLAIN (COSTS OFF) SELECT DISTINCT pk FROM uqk1 WHERE d NOT in (SELECT d FROM uqk2); + +-- Test Unique Key FOR one-row case, DISTINCT is NOT needed as well. +-- uqk1.d is the a uniquekey due to onerow rule. uqk2.pk is pk +EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.d FROM uqk1, uqk2 WHERE uqk1.pk = 2 AND uqk1.d = uqk2.pk; +SELECT DISTINCT uqk1.d FROM uqk1, uqk2 WHERE uqk1.pk = 2 AND uqk1.d = uqk2.pk order BY 1; + +-- Both uqk1.d AND uqk2.c are the a uniquekey due to onerow rule +EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.d FROM uqk1, uqk2 WHERE uqk1.pk = 2 +AND uqk2.pk = 1 AND uqk1.d = uqk2.d ; + +SELECT DISTINCT uqk1.d FROM uqk1, uqk2 WHERE uqk1.pk = 2 AND uqk2.pk = 1 +AND uqk1.d = uqk2.d order BY 1; + +-- Both UniqueKey in targetList +EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.c, uqk2.c FROM uqk1, uqk2 WHERE uqk1.pk = 2 AND uqk2.pk = 1; +SELECT DISTINCT uqk1.c, uqk2.c FROM uqk1, uqk2 WHERE uqk1.pk = 2 AND uqk2.pk = 1 order BY 1, 2; + +-- Test SubQuery +-- t2(a, c) is UNIQUE key because OF group BY +EXPLAIN (COSTS OFF) SELECT DISTINCT t2.a, t2.c FROM uqk1 t1 inner JOIN +(SELECT a, c, sum(pk) as t2b FROM uqk2 group BY a, c) t2 +ON (t2.t2b = t1.pk); + +-- Test Partition TABLE +-- Test partitioned TABLE +CREATE TABLE dist_p (a int, b int NOT NULL, c int NOT NULL, d int) PARTITION BY RANGE (b); +CREATE TABLE dist_p0 PARTITION OF dist_p FOR VALUES FROM (1) to (10); +CREATE TABLE dist_p1 PARTITION OF dist_p FOR VALUES FROM (11) to (20); + +-- CREATE unqiue INDEX ON dist_p +CREATE UNIQUE INDEX dist_p_uk_b_c ON dist_p(b, c); +EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM dist_p; +DROP INDEX dist_p_uk_b_c; + +-- we also support CREATE unqiue INDEX ON each child tables +CREATE UNIQUE INDEX dist_p0_uk_bc ON dist_p0(b, c); +-- NOT ok, since dist_p1 have no such INDEX +EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM dist_p; +CREATE UNIQUE INDEX dist_p1_uk_bc ON dist_p1(b, c); +-- OK now +EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM dist_p; + +DROP INDEX dist_p0_uk_bc; +DROP INDEX dist_p1_uk_bc; + +-- uk is same ON all child tables, however it doesn't include the partkey, so NOT ok as well. +CREATE UNIQUE INDEX dist_p0_uk_c ON dist_p0(c); +CREATE UNIQUE INDEX dist_p1_uk_c ON dist_p1(c); +EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM dist_p; + +DROP TABLE dist_p; + +-- Test some VIEW +CREATE VIEW distinct_v1 as SELECT DISTINCT c, d FROM uqk1 WHERE c is NOT NULL; +EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM distinct_v1; +ALTER TABLE uqk1 ALTER COLUMN d DROP NOT NULL; +EXPLAIN (COSTS OFF) SELECT DISTINCT * FROM distinct_v1; + +-- Test generic plan +ALTER TABLE uqk1 ALTER COLUMN d SET NOT NULL; +prepare pt as SELECT * FROM distinct_v1; +EXPLAIN (COSTS OFF) execute pt; +ALTER TABLE uqk1 ALTER COLUMN d DROP NOT NULL; +EXPLAIN (COSTS OFF) execute pt; +DEALLOCATE pt; + +DROP VIEW distinct_v1; +DROP TABLE uqk1; +DROP TABLE uqk2; -- 2.21.0