From cd431def79c379143572c71ebb2082b669470415 Mon Sep 17 00:00:00 2001 From: =?utf-8?q?=E4=B8=80=E6=8C=83?= Date: Mon, 11 May 2020 15:50:52 +0800 Subject: [PATCH v37 2/6] Introduce UniqueKey attributes on RelOptInfo struct. UniqueKey is a set of exprs on RelOptInfo which represents the exprs will be unique on the given RelOptInfo. You can see README.uniquekey for more information. --- src/backend/nodes/copyfuncs.c | 13 + src/backend/nodes/list.c | 31 + src/backend/nodes/makefuncs.c | 13 + src/backend/nodes/outfuncs.c | 11 + src/backend/nodes/readfuncs.c | 10 + src/backend/optimizer/path/Makefile | 3 +- src/backend/optimizer/path/README.uniquekey | 131 +++ src/backend/optimizer/path/allpaths.c | 10 + src/backend/optimizer/path/joinpath.c | 9 +- src/backend/optimizer/path/joinrels.c | 2 + src/backend/optimizer/path/pathkeys.c | 3 +- src/backend/optimizer/path/uniquekeys.c | 1131 +++++++++++++++++++ src/backend/optimizer/plan/planner.c | 13 +- src/backend/optimizer/prep/prepunion.c | 2 + src/backend/optimizer/util/appendinfo.c | 44 + src/backend/optimizer/util/inherit.c | 16 +- src/include/nodes/makefuncs.h | 3 + src/include/nodes/nodes.h | 1 + src/include/nodes/pathnodes.h | 29 +- src/include/nodes/pg_list.h | 2 + src/include/optimizer/appendinfo.h | 3 + src/include/optimizer/optimizer.h | 2 + src/include/optimizer/paths.h | 43 + 23 files changed, 1502 insertions(+), 23 deletions(-) create mode 100644 src/backend/optimizer/path/README.uniquekey create mode 100644 src/backend/optimizer/path/uniquekeys.c diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 2b4d7654cc..34392f5553 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -2271,6 +2271,16 @@ _copyPathKey(const PathKey *from) return newnode; } +static UniqueKey * +_copyUniqueKey(const UniqueKey *from) +{ + UniqueKey *newnode = makeNode(UniqueKey); + + COPY_NODE_FIELD(exprs); + COPY_SCALAR_FIELD(multi_nullvals); + + return newnode; +} /* * _copyRestrictInfo */ @@ -5149,6 +5159,9 @@ copyObjectImpl(const void *from) case T_PathKey: retval = _copyPathKey(from); break; + case T_UniqueKey: + retval = _copyUniqueKey(from); + break; case T_RestrictInfo: retval = _copyRestrictInfo(from); break; diff --git a/src/backend/nodes/list.c b/src/backend/nodes/list.c index efa44342c4..20daf4a9fd 100644 --- a/src/backend/nodes/list.c +++ b/src/backend/nodes/list.c @@ -687,6 +687,37 @@ list_member_oid(const List *list, Oid datum) return false; } +/* + * return true iff every entry in "members" list is also present + * in the "target" list. + */ +bool +list_is_subset(const List *members, const List *target) +{ + const ListCell *lc1, *lc2; + + Assert(IsPointerList(members)); + Assert(IsPointerList(target)); + check_list_invariants(members); + check_list_invariants(target); + + foreach(lc1, members) + { + bool found = false; + foreach(lc2, target) + { + if (equal(lfirst(lc1), lfirst(lc2))) + { + found = true; + break; + } + } + if (!found) + return false; + } + return true; +} + /* * Delete the n'th cell (counting from 0) in list. * diff --git a/src/backend/nodes/makefuncs.c b/src/backend/nodes/makefuncs.c index 49de285f01..646cf7c9a1 100644 --- a/src/backend/nodes/makefuncs.c +++ b/src/backend/nodes/makefuncs.c @@ -814,3 +814,16 @@ makeVacuumRelation(RangeVar *relation, Oid oid, List *va_cols) v->va_cols = va_cols; return v; } + + +/* + * makeUniqueKey + */ +UniqueKey* +makeUniqueKey(List *exprs, bool multi_nullvals) +{ + UniqueKey * ukey = makeNode(UniqueKey); + ukey->exprs = exprs; + ukey->multi_nullvals = multi_nullvals; + return ukey; +} diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 08a049232e..53cf4fcfa1 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -2426,6 +2426,14 @@ _outPathKey(StringInfo str, const PathKey *node) WRITE_BOOL_FIELD(pk_nulls_first); } +static void +_outUniqueKey(StringInfo str, const UniqueKey *node) +{ + WRITE_NODE_TYPE("UNIQUEKEY"); + WRITE_NODE_FIELD(exprs); + WRITE_BOOL_FIELD(multi_nullvals); +} + static void _outPathTarget(StringInfo str, const PathTarget *node) { @@ -4125,6 +4133,9 @@ outNode(StringInfo str, const void *obj) case T_PathKey: _outPathKey(str, obj); break; + case T_UniqueKey: + _outUniqueKey(str, obj); + break; case T_PathTarget: _outPathTarget(str, obj); break; diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index ab7b535caa..7b9e8c3292 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -452,6 +452,14 @@ _readSetOperationStmt(void) READ_DONE(); } +static UniqueKey * +_readUniqueKey(void) +{ + READ_LOCALS(UniqueKey); + READ_NODE_FIELD(exprs); + READ_BOOL_FIELD(multi_nullvals); + READ_DONE(); +} /* * Stuff from primnodes.h. @@ -2654,6 +2662,8 @@ parseNodeString(void) return_value = _readCommonTableExpr(); else if (MATCH("SETOPERATIONSTMT", 16)) return_value = _readSetOperationStmt(); + else if (MATCH("UNIQUEKEY", 9)) + return_value = _readUniqueKey(); else if (MATCH("ALIAS", 5)) return_value = _readAlias(); else if (MATCH("RANGEVAR", 8)) diff --git a/src/backend/optimizer/path/Makefile b/src/backend/optimizer/path/Makefile index 1e199ff66f..7b9820c25f 100644 --- a/src/backend/optimizer/path/Makefile +++ b/src/backend/optimizer/path/Makefile @@ -21,6 +21,7 @@ OBJS = \ joinpath.o \ joinrels.o \ pathkeys.o \ - tidpath.o + tidpath.o \ + uniquekeys.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/optimizer/path/README.uniquekey b/src/backend/optimizer/path/README.uniquekey new file mode 100644 index 0000000000..5eac761995 --- /dev/null +++ b/src/backend/optimizer/path/README.uniquekey @@ -0,0 +1,131 @@ +1. What is UniqueKey? +We can think UniqueKey is a set of exprs for a RelOptInfo, which we are insure +that doesn't yields same result among all the rows. The simplest UniqueKey +format is primary key. + +However we define the UnqiueKey as below. + +typedef struct UniqueKey +{ + NodeTag type; + List *exprs; + bool multi_nullvals; +} UniqueKey; + +exprs is a list of exprs which is unique on current RelOptInfo. exprs = NIL +is a special case of UniqueKey, which means there is only one row in that +relation.it has a stronger semantic than others. like SELECT uk FROM t; uk is +normal unique key and may have different values. SELECT colx FROM t WHERE uk = +const. colx is unique AND we have only 1 value. This field can used for +innerrel_is_unique. this logic is handled specially in add_uniquekey_for_onerow +function. + +multi_nullvals: true means multi null values may exist in these exprs, so the +uniqueness is not guaranteed in this case. This field is necessary for +remove_useless_join & reduce_unique_semijoins where we don't mind these +duplicated NULL values. It is set to true for 2 cases. One is a unique key +from a unique index but the related column is nullable. The other one is for +outer join. see populate_joinrel_uniquekeys for detail. + + +The UniqueKey can be used at the following cases at least: +1. remove_useless_joins. +2. reduce_semianti_joins +3. remove distinct node if distinct clause is unique. +4. remove aggnode if group by clause is unique. +5. Index Skip Scan (WIP) +6. Aggregation Push Down without 2 phase aggregation if the join can't + duplicated the aggregated rows. (work in progress feature) + +2. How is it maintained? + +We have a set of populate_xxx_unqiuekeys functions to maintain the uniquekey on +various cases. xxx includes baserel, joinrel, partitionedrel, distinctrel, +groupedrel, unionrel. and we also need to convert the uniquekey from subquery +to outer relation, which is what convert_subquery_uniquekeys does. + +1. The first part is about baserel. We handled 3 cases. suppose we have Unique +Index on (a, b). + +1. SELECT a, b FROM t. UniqueKey (a, b) +2. SELECT a FROM t WHERE b = 1; UniqueKey (a) +3. SELECT .. FROM t WHERE a = 1 AND b = 1; UniqueKey (NIL). onerow case, every + column is Unique. + +2. The next part is joinrel, this part is most error-prone, we simplified the rules +like below: +1. If the relation's UniqueKey can't be duplicated after join, then is will be + still valid for the join rel. The function we used here is + innerrel_keeps_unique. The basic idea is innerrel.any_col = outer.uk. + +2. If the UnqiueKey can't keep valid via the rule 1, the combination of the + UniqueKey from both sides are valid for sure. We can prove this as: if the + unique exprs from rel1 is duplicated by rel2, the duplicated rows must + contains different unique exprs from rel2. + +More considerations about onerow: +1. If relation with one row and it can't be duplicated, it is still possible + contains mulit_nullvas after outer join. +2. If the either UniqueKey can be duplicated after join, the can get one row + only when both side is one row AND there is no outer join. +3. Whenever the onerow UniqueKey is not a valid any more, we need to convert one + row UniqueKey to normal unique key since we don't store exprs for one-row + relation. get_exprs_from_uniquekeys will be used here. + + +More considerations about multi_nullvals after join: +1. If the original UnqiueKey has multi_nullvals, the final UniqueKey will have + mulit_nullvals in any case. +2. If a unique key doesn't allow mulit_nullvals, after some outer join, it + allows some outer join. + + +3. When we comes to subquery, we need to convert_subquery_unqiuekeys just like +convert_subquery_pathkeys. Only the UniqueKey insides subquery is referenced as +a Var in outer relation will be reused. The relationship between the outerrel.Var +and subquery.exprs is built with outerel->subroot->processed_tlist. + + +4. As for the SRF functions, it will break the uniqueness of uniquekey, However it +is handled in adjust_paths_for_srfs, which happens after the query_planner. so +we will maintain the UniqueKey until there and reset it to NIL at that +places. This can't help on distinct/group by elimination cases but probably help +in some other cases, like reduce_unqiue_semijoins/remove_useless_joins and it is +semantic correctly. + + +5. As for inherit table, we first main the UnqiueKey on childrel as well. But for +partitioned table we need to maintain 2 different kinds of +UnqiueKey. 1). UniqueKey on the parent relation 2). UniqueKey on child +relation for partition wise query. + +Example: +CREATE TABLE p (a int not null, b int not null) partition by list (a); +CREATE TABLE p0 partition of p for values in (1); +CREATE TABLE p1 partition of p for values in (2); + +create unique index p0_b on p0(b); +create unique index p1_b on p1(b); + +Now b is only unique on partition level, so the distinct can't be removed on +the following cases. SELECT DISTINCT b FROM p; + +Another example is SELECT DISTINCT a, b FROM p WHERE a = 1; Since only one +partition is chosen, the UniqueKey on child relation is same as the UniqueKey on +parent relation. + +Another usage of UniqueKey on partition level is it be helpful for +partition-wise join. + +As for the UniqueKey on parent table level, it comes with 2 different ways, +1). the UniqueKey is also derived in UniqueKey index, but the index must be same +in all the related children relations and the unique index must contains +Partition Key in it. Example: + +CREATE UNIQUE INDEX p_ab ON p(a, b); -- where a is the partition key. + +-- Query +SELECT a, b FROM p; the (a, b) is a UniqueKey of p. + +2). If the parent relation has only one childrel, the UniqueKey on childrel is + the UniqueKey on parent as well. diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 754f6d64f6..66d246fa1a 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -579,6 +579,12 @@ set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) */ check_index_predicates(root, rel); + /* + * Now that we've marked which partial indexes are suitable, we can now + * build the relation's unique keys. + */ + populate_baserel_uniquekeys(root, rel, rel->indexlist); + /* Mark rel with estimated output rows, width, etc */ set_baserel_size_estimates(root, rel); } @@ -1314,6 +1320,8 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, /* Add paths to the append relation. */ add_paths_to_append_rel(root, rel, live_childrels); + if (IS_PARTITIONED_REL(rel)) + populate_partitionedrel_uniquekeys(root, rel, live_childrels); } @@ -2387,6 +2395,8 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, pathkeys, required_outer)); } + convert_subquery_uniquekeys(root, rel, sub_final_rel); + /* If outer rel allows parallelism, do same for partial paths. */ if (rel->consider_parallel && bms_is_empty(required_outer)) { diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index 4a35903b29..f41be18e82 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -71,13 +71,6 @@ static void consider_parallel_mergejoin(PlannerInfo *root, static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra); -static List *select_mergejoin_clauses(PlannerInfo *root, - RelOptInfo *joinrel, - RelOptInfo *outerrel, - RelOptInfo *innerrel, - List *restrictlist, - JoinType jointype, - bool *mergejoin_allowed); static void generate_mergejoin_paths(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *innerrel, @@ -1927,7 +1920,7 @@ hash_inner_and_outer(PlannerInfo *root, * if it is mergejoinable and involves vars from the two sub-relations * currently of interest. */ -static List * +List * select_mergejoin_clauses(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index 2d343cd293..b9163ee8ff 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -924,6 +924,8 @@ populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1, /* Apply partitionwise join technique, if possible. */ try_partitionwise_join(root, rel1, rel2, joinrel, sjinfo, restrictlist); + + populate_joinrel_uniquekeys(root, joinrel, rel1, rel2, restrictlist, sjinfo->jointype); } diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index ce9bf87e9b..7e596d4194 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -33,7 +33,6 @@ static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys); static bool matches_boolean_partition_clause(RestrictInfo *rinfo, RelOptInfo *partrel, int partkeycol); -static Var *find_var_for_subquery_tle(RelOptInfo *rel, TargetEntry *tle); static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey); @@ -1035,7 +1034,7 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, * We need this to ensure that we don't return pathkeys describing values * that are unavailable above the level of the subquery scan. */ -static Var * +Var * find_var_for_subquery_tle(RelOptInfo *rel, TargetEntry *tle) { ListCell *lc; diff --git a/src/backend/optimizer/path/uniquekeys.c b/src/backend/optimizer/path/uniquekeys.c new file mode 100644 index 0000000000..c7ad76d28f --- /dev/null +++ b/src/backend/optimizer/path/uniquekeys.c @@ -0,0 +1,1131 @@ +/*------------------------------------------------------------------------- + * + * uniquekeys.c + * Utilities for matching and building unique keys + * + * Portions Copyright (c) 2020, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/optimizer/path/uniquekeys.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/pathnode.h" +#include "optimizer/paths.h" +#include "optimizer/appendinfo.h" +#include "optimizer/optimizer.h" +#include "optimizer/tlist.h" +#include "rewrite/rewriteManip.h" + + +/* + * This struct is used to help populate_joinrel_uniquekeys. + * + * added_to_joinrel is true if a uniquekey (from outerrel or innerrel) + * has been added to joinrel. + * useful is true if the exprs of the uniquekey still appears in joinrel. + */ +typedef struct UniqueKeyContextData +{ + UniqueKey *uniquekey; + bool added_to_joinrel; + bool useful; +} *UniqueKeyContext; + +static List *initililze_uniquecontext_for_joinrel(RelOptInfo *inputrel); +static bool innerrel_keeps_unique(PlannerInfo *root, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + List *restrictlist, + bool reverse); + +static List *get_exprs_from_uniqueindex(IndexOptInfo *unique_index, + List *const_exprs, + List *const_expr_opfamilies, + Bitmapset *used_varattrs, + bool *useful, + bool *multi_nullvals); +static List *get_exprs_from_uniquekey(RelOptInfo *joinrel, + RelOptInfo *rel1, + UniqueKey *ukey); +static void add_uniquekey_for_onerow(RelOptInfo *rel); +static bool add_combined_uniquekey(RelOptInfo *joinrel, + RelOptInfo *outer_rel, + RelOptInfo *inner_rel, + UniqueKey *outer_ukey, + UniqueKey *inner_ukey, + JoinType jointype); + +/* Used for unique indexes checking for partitioned table */ +static bool index_constains_partkey(RelOptInfo *partrel, IndexOptInfo *ind); +static IndexOptInfo *simple_copy_indexinfo_to_parent(PlannerInfo *root, + RelOptInfo *parentrel, + IndexOptInfo *from); +static bool simple_indexinfo_equal(IndexOptInfo *ind1, IndexOptInfo *ind2); +static void adjust_partition_unique_indexlist(PlannerInfo *root, + RelOptInfo *parentrel, + RelOptInfo *childrel, + List **global_unique_index); + +/* Helper function for grouped relation and distinct relation. */ +static void add_uniquekey_from_sortgroups(PlannerInfo *root, + RelOptInfo *rel, + List *sortgroups); + +/* + * populate_baserel_uniquekeys + * Populate 'baserel' uniquekeys list by looking at the rel's unique index + * and baserestrictinfo + */ +void +populate_baserel_uniquekeys(PlannerInfo *root, + RelOptInfo *baserel, + List *indexlist) +{ + ListCell *lc; + List *matched_uniq_indexes = NIL; + + /* Attrs appears in rel->reltarget->exprs. */ + Bitmapset *used_attrs = NULL; + + List *const_exprs = NIL; + List *expr_opfamilies = NIL; + + Assert(baserel->rtekind == RTE_RELATION); + + foreach(lc, indexlist) + { + IndexOptInfo *ind = (IndexOptInfo *) lfirst(lc); + if (!ind->unique || !ind->immediate || + (ind->indpred != NIL && !ind->predOK)) + continue; + matched_uniq_indexes = lappend(matched_uniq_indexes, ind); + } + + if (matched_uniq_indexes == NIL) + return; + + /* Check which attrs is used in baserel->reltarget */ + pull_varattnos((Node *)baserel->reltarget->exprs, baserel->relid, &used_attrs); + + /* Check which attrno is used at a mergeable const filter */ + foreach(lc, baserel->baserestrictinfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + if (rinfo->mergeopfamilies == NIL) + continue; + + if (bms_is_empty(rinfo->left_relids)) + { + const_exprs = lappend(const_exprs, get_rightop(rinfo->clause)); + } + else if (bms_is_empty(rinfo->right_relids)) + { + const_exprs = lappend(const_exprs, get_leftop(rinfo->clause)); + } + else + continue; + + expr_opfamilies = lappend(expr_opfamilies, rinfo->mergeopfamilies); + } + + foreach(lc, matched_uniq_indexes) + { + bool multi_nullvals, useful; + List *exprs = get_exprs_from_uniqueindex(lfirst_node(IndexOptInfo, lc), + const_exprs, + expr_opfamilies, + used_attrs, + &useful, + &multi_nullvals); + if (useful) + { + if (exprs == NIL) + { + /* All the columns in Unique Index matched with a restrictinfo */ + add_uniquekey_for_onerow(baserel); + return; + } + baserel->uniquekeys = lappend(baserel->uniquekeys, + makeUniqueKey(exprs, multi_nullvals)); + } + } +} + + +/* + * populate_partitionedrel_uniquekeys + * The UniqueKey on partitionrel comes from 2 cases: + * 1). Only one partition is involved in this query, the unique key can be + * copied to parent rel from childrel. + * 2). There are some unique index which includes partition key and exists + * in all the related partitions. + * We never mind rule 2 if we hit rule 1. + */ + +void +populate_partitionedrel_uniquekeys(PlannerInfo *root, + RelOptInfo *rel, + List *childrels) +{ + ListCell *lc; + List *global_uniq_indexlist = NIL; + RelOptInfo *childrel; + bool is_first = true; + + Assert(IS_PARTITIONED_REL(rel)); + + if (childrels == NIL) + return; + + /* + * If there is only one partition used in this query, the UniqueKey in childrel is + * still valid in parent level, but we need convert the format from child expr to + * parent expr. + */ + if (list_length(childrels) == 1) + { + /* Check for Rule 1 */ + RelOptInfo *childrel = linitial_node(RelOptInfo, childrels); + ListCell *lc; + Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL); + if (relation_is_onerow(childrel)) + { + add_uniquekey_for_onerow(rel); + return; + } + + foreach(lc, childrel->uniquekeys) + { + UniqueKey *ukey = lfirst_node(UniqueKey, lc); + AppendRelInfo *appinfo = find_appinfo_by_child(root, childrel->relid); + List *parent_exprs = NIL; + bool can_reuse = true; + ListCell *lc2; + foreach(lc2, ukey->exprs) + { + Var *var = (Var *)lfirst(lc2); + /* + * If the expr comes from a expression, it is hard to build the expression + * in parent so ignore that case for now. + */ + if(!IsA(var, Var)) + { + can_reuse = false; + break; + } + /* Convert it to parent var */ + parent_exprs = lappend(parent_exprs, find_parent_var(appinfo, var)); + } + if (can_reuse) + rel->uniquekeys = lappend(rel->uniquekeys, + makeUniqueKey(parent_exprs, + ukey->multi_nullvals)); + } + } + else + { + /* Check for rule 2 */ + childrel = linitial_node(RelOptInfo, childrels); + foreach(lc, childrel->indexlist) + { + IndexOptInfo *ind = lfirst(lc); + IndexOptInfo *modified_index; + if (!ind->unique || !ind->immediate || + (ind->indpred != NIL && !ind->predOK)) + continue; + + /* + * During simple_copy_indexinfo_to_parent, we need to convert var from + * child var to parent var, index on expression is too complex to handle. + * so ignore it for now. + */ + if (ind->indexprs != NIL) + continue; + + modified_index = simple_copy_indexinfo_to_parent(root, rel, ind); + /* + * If the unique index doesn't contain partkey, then it is unique + * on this partition only, so it is useless for us. + */ + if (!index_constains_partkey(rel, modified_index)) + continue; + + global_uniq_indexlist = lappend(global_uniq_indexlist, modified_index); + } + + if (global_uniq_indexlist != NIL) + { + foreach(lc, childrels) + { + RelOptInfo *child = lfirst(lc); + if (is_first) + { + is_first = false; + continue; + } + adjust_partition_unique_indexlist(root, rel, child, &global_uniq_indexlist); + } + /* Now we have a list of unique index which are exactly same on all childrels, + * Set the UniqueKey just like it is non-partition table + */ + populate_baserel_uniquekeys(root, rel, global_uniq_indexlist); + } + } +} + + +/* + * populate_distinctrel_uniquekeys + */ +void +populate_distinctrel_uniquekeys(PlannerInfo *root, + RelOptInfo *inputrel, + RelOptInfo *distinctrel) +{ + /* The unique key before the distinct is still valid. */ + distinctrel->uniquekeys = list_copy(inputrel->uniquekeys); + add_uniquekey_from_sortgroups(root, distinctrel, root->parse->distinctClause); +} + +/* + * populate_grouprel_uniquekeys + */ +void +populate_grouprel_uniquekeys(PlannerInfo *root, + RelOptInfo *grouprel, + RelOptInfo *inputrel) + +{ + Query *parse = root->parse; + bool input_ukey_added = false; + ListCell *lc; + + if (relation_is_onerow(inputrel)) + { + add_uniquekey_for_onerow(grouprel); + return; + } + if (parse->groupingSets) + return; + + /* A Normal group by without grouping set. */ + if (parse->groupClause) + { + /* + * Current even the groupby clause is Unique already, but if query has aggref + * We have to create grouprel still. To keep the UnqiueKey short, we will check + * the UniqueKey of input_rel still valid, if so we reuse it. + */ + foreach(lc, inputrel->uniquekeys) + { + UniqueKey *ukey = lfirst_node(UniqueKey, lc); + if (list_is_subset(ukey->exprs, grouprel->reltarget->exprs)) + { + grouprel->uniquekeys = lappend(grouprel->uniquekeys, + ukey); + input_ukey_added = true; + } + } + if (!input_ukey_added) + /* + * group by clause must be a super-set of grouprel->reltarget->exprs except the + * aggregation expr, so if such exprs is unique already, no bother to generate + * new uniquekey for group by exprs. + */ + add_uniquekey_from_sortgroups(root, + grouprel, + root->parse->groupClause); + } + else + /* It has aggregation but without a group by, so only one row returned */ + add_uniquekey_for_onerow(grouprel); +} + +/* + * simple_copy_uniquekeys + * Using a function for the one-line code makes us easy to check where we simply + * copied the uniquekey. + */ +void +simple_copy_uniquekeys(RelOptInfo *oldrel, + RelOptInfo *newrel) +{ + newrel->uniquekeys = oldrel->uniquekeys; +} + +/* + * populate_unionrel_uniquekeys + */ +void +populate_unionrel_uniquekeys(PlannerInfo *root, + RelOptInfo *unionrel) +{ + ListCell *lc; + List *exprs = NIL; + + Assert(unionrel->uniquekeys == NIL); + + foreach(lc, unionrel->reltarget->exprs) + { + exprs = lappend(exprs, lfirst(lc)); + } + + if (exprs == NIL) + /* SQL: select union select; is valid, we need to handle it here. */ + add_uniquekey_for_onerow(unionrel); + else + unionrel->uniquekeys = lappend(unionrel->uniquekeys, + makeUniqueKey(exprs,false)); + +} + +/* + * populate_joinrel_uniquekeys + * + * populate uniquekeys for joinrel. We will check each relation to see if its + * UniqueKey is still valid via innerrel_keeps_unique, if so, we add it to + * joinrel. The multi_nullvals field will be changed to true for some outer + * join cases and one-row UniqueKey needs to be converted to normal UniqueKey + * for the same case as well. + * For the uniquekey in either baserel which can't be unique after join, we still + * check to see if combination of UniqueKeys from both side is still useful for us. + * if yes, we add it to joinrel as well. + */ +void +populate_joinrel_uniquekeys(PlannerInfo *root, RelOptInfo *joinrel, + RelOptInfo *outerrel, RelOptInfo *innerrel, + List *restrictlist, JoinType jointype) +{ + ListCell *lc, *lc2; + List *clause_list = NIL; + List *outerrel_ukey_ctx; + List *innerrel_ukey_ctx; + bool inner_onerow, outer_onerow; + bool mergejoin_allowed; + + /* Care about the outerrel relation only for SEMI/ANTI join */ + if (jointype == JOIN_SEMI || jointype == JOIN_ANTI) + { + foreach(lc, outerrel->uniquekeys) + { + UniqueKey *uniquekey = lfirst_node(UniqueKey, lc); + if (list_is_subset(uniquekey->exprs, joinrel->reltarget->exprs)) + joinrel->uniquekeys = lappend(joinrel->uniquekeys, uniquekey); + } + return; + } + + Assert(jointype == JOIN_LEFT || jointype == JOIN_FULL || jointype == JOIN_INNER); + + /* Fast path */ + if (innerrel->uniquekeys == NIL || outerrel->uniquekeys == NIL) + return; + + inner_onerow = relation_is_onerow(innerrel); + outer_onerow = relation_is_onerow(outerrel); + + outerrel_ukey_ctx = initililze_uniquecontext_for_joinrel(outerrel); + innerrel_ukey_ctx = initililze_uniquecontext_for_joinrel(innerrel); + + clause_list = select_mergejoin_clauses(root, joinrel, outerrel, innerrel, + restrictlist, jointype, + &mergejoin_allowed); + + if (innerrel_keeps_unique(root, innerrel, outerrel, clause_list, true /* reverse */)) + { + bool outer_impact = jointype == JOIN_FULL; + foreach(lc, outerrel_ukey_ctx) + { + UniqueKeyContext ctx = (UniqueKeyContext)lfirst(lc); + + if (!list_is_subset(ctx->uniquekey->exprs, joinrel->reltarget->exprs)) + { + ctx->useful = false; + continue; + } + + /* Outer relation has one row, and the unique key is not duplicated after join, + * the joinrel will still has one row unless the jointype == JOIN_FULL. + */ + if (outer_onerow && !outer_impact) + { + add_uniquekey_for_onerow(joinrel); + return; + } + else if (outer_onerow) + { + /* + * The onerow outerrel becomes multi rows and multi_nullvals + * will be changed to true. We also need to set the exprs correctly since it + * can't be NIL any more. + */ + ListCell *lc2; + foreach(lc2, get_exprs_from_uniquekey(joinrel, outerrel, NULL)) + { + joinrel->uniquekeys = lappend(joinrel->uniquekeys, + makeUniqueKey(lfirst(lc2), true)); + } + } + else + { + if (!ctx->uniquekey->multi_nullvals && outer_impact) + /* Change multi_nullvals to true due to the full join. */ + joinrel->uniquekeys = lappend(joinrel->uniquekeys, + makeUniqueKey(ctx->uniquekey->exprs, true)); + else + /* Just reuse it */ + joinrel->uniquekeys = lappend(joinrel->uniquekeys, + ctx->uniquekey); + } + ctx->added_to_joinrel = true; + } + } + + if (innerrel_keeps_unique(root, outerrel, innerrel, clause_list, false)) + { + bool outer_impact = jointype == JOIN_FULL || jointype == JOIN_LEFT;; + + foreach(lc, innerrel_ukey_ctx) + { + UniqueKeyContext ctx = (UniqueKeyContext)lfirst(lc); + + if (!list_is_subset(ctx->uniquekey->exprs, joinrel->reltarget->exprs)) + { + ctx->useful = false; + continue; + } + + if (inner_onerow && !outer_impact) + { + add_uniquekey_for_onerow(joinrel); + return; + } + else if (inner_onerow) + { + ListCell *lc2; + foreach(lc2, get_exprs_from_uniquekey(joinrel, innerrel, NULL)) + { + joinrel->uniquekeys = lappend(joinrel->uniquekeys, + makeUniqueKey(lfirst(lc2), true)); + } + } + else + { + if (!ctx->uniquekey->multi_nullvals && outer_impact) + /* Need to change multi_nullvals to true due to the outer join. */ + joinrel->uniquekeys = lappend(joinrel->uniquekeys, + makeUniqueKey(ctx->uniquekey->exprs, + true)); + else + joinrel->uniquekeys = lappend(joinrel->uniquekeys, + ctx->uniquekey); + + } + ctx->added_to_joinrel = true; + } + } + + /* + * The combination of the UniqueKey from both sides is unique as well regardless + * of join type, but no bother to add it if its subset has been added to joinrel + * already or it is not useful for the joinrel. + */ + foreach(lc, outerrel_ukey_ctx) + { + UniqueKeyContext ctx1 = (UniqueKeyContext) lfirst(lc); + if (ctx1->added_to_joinrel || !ctx1->useful) + continue; + foreach(lc2, innerrel_ukey_ctx) + { + UniqueKeyContext ctx2 = (UniqueKeyContext) lfirst(lc2); + if (ctx2->added_to_joinrel || !ctx2->useful) + continue; + if (add_combined_uniquekey(joinrel, outerrel, innerrel, + ctx1->uniquekey, ctx2->uniquekey, + jointype)) + /* If we set a onerow UniqueKey to joinrel, we don't need other. */ + return; + } + } +} + + +/* + * convert_subquery_uniquekeys + * + * Covert the UniqueKey in subquery to outer relation. + */ +void convert_subquery_uniquekeys(PlannerInfo *root, + RelOptInfo *currel, + RelOptInfo *sub_final_rel) +{ + ListCell *lc; + + if (sub_final_rel->uniquekeys == NIL) + return; + + if (relation_is_onerow(sub_final_rel)) + { + add_uniquekey_for_onerow(currel); + return; + } + + Assert(currel->subroot != NULL); + + foreach(lc, sub_final_rel->uniquekeys) + { + UniqueKey *ukey = lfirst_node(UniqueKey, lc); + ListCell *lc; + List *exprs = NIL; + bool ukey_useful = true; + + /* One row case is handled above */ + Assert(ukey->exprs != NIL); + foreach(lc, ukey->exprs) + { + Var *var; + TargetEntry *tle = tlist_member(lfirst(lc), + currel->subroot->processed_tlist); + if (tle == NULL) + { + ukey_useful = false; + break; + } + var = find_var_for_subquery_tle(currel, tle); + if (var == NULL) + { + ukey_useful = false; + break; + } + exprs = lappend(exprs, var); + } + + if (ukey_useful) + currel->uniquekeys = lappend(currel->uniquekeys, + makeUniqueKey(exprs, + ukey->multi_nullvals)); + + } +} + +/* + * innerrel_keeps_unique + * + * Check if Unique key of the innerrel is valid after join. innerrel's UniqueKey + * will be still valid if innerrel's any-column mergeop outrerel's uniquekey + * exists in clause_list. + * + * Note: the clause_list must be a list of mergeable restrictinfo already. + */ +static bool +innerrel_keeps_unique(PlannerInfo *root, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + List *clause_list, + bool reverse) +{ + ListCell *lc, *lc2, *lc3; + + if (outerrel->uniquekeys == NIL || innerrel->uniquekeys == NIL) + return false; + + /* Check if there is outerrel's uniquekey in mergeable clause. */ + foreach(lc, outerrel->uniquekeys) + { + List *outer_uq_exprs = lfirst_node(UniqueKey, lc)->exprs; + bool clauselist_matchs_all_exprs = true; + foreach(lc2, outer_uq_exprs) + { + Node *outer_uq_expr = lfirst(lc2); + bool find_uq_expr_in_clauselist = false; + foreach(lc3, clause_list) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc3); + Node *outer_expr; + if (reverse) + outer_expr = rinfo->outer_is_left ? get_rightop(rinfo->clause) : get_leftop(rinfo->clause); + else + outer_expr = rinfo->outer_is_left ? get_leftop(rinfo->clause) : get_rightop(rinfo->clause); + if (equal(outer_expr, outer_uq_expr)) + { + find_uq_expr_in_clauselist = true; + break; + } + } + if (!find_uq_expr_in_clauselist) + { + /* No need to check the next exprs in the current uniquekey */ + clauselist_matchs_all_exprs = false; + break; + } + } + + if (clauselist_matchs_all_exprs) + return true; + } + return false; +} + + +/* + * relation_is_onerow + * Check if it is a one-row relation by checking UniqueKey. + */ +bool +relation_is_onerow(RelOptInfo *rel) +{ + UniqueKey *ukey; + if (rel->uniquekeys == NIL) + return false; + ukey = linitial_node(UniqueKey, rel->uniquekeys); + return ukey->exprs == NIL && list_length(rel->uniquekeys) == 1; +} + +/* + * relation_has_uniquekeys_for + * Returns true if we have proofs that 'rel' cannot return multiple rows with + * the same values in each of 'exprs'. Otherwise returns false. + */ +bool +relation_has_uniquekeys_for(PlannerInfo *root, RelOptInfo *rel, + List *exprs, bool allow_multinulls) +{ + ListCell *lc; + + /* + * For UniqueKey->onerow case, the uniquekey->exprs is empty as well + * so we can't rely on list_is_subset to handle this special cases + */ + if (exprs == NIL) + return false; + + foreach(lc, rel->uniquekeys) + { + UniqueKey *ukey = lfirst_node(UniqueKey, lc); + if (ukey->multi_nullvals && !allow_multinulls) + continue; + if (list_is_subset(ukey->exprs, exprs)) + return true; + } + return false; +} + + +/* + * get_exprs_from_uniqueindex + * + * Return a list of exprs which is unique. set useful to false if this + * unique index is not useful for us. + */ +static List * +get_exprs_from_uniqueindex(IndexOptInfo *unique_index, + List *const_exprs, + List *const_expr_opfamilies, + Bitmapset *used_varattrs, + bool *useful, + bool *multi_nullvals) +{ + List *exprs = NIL; + ListCell *indexpr_item; + int c = 0; + + *useful = true; + *multi_nullvals = false; + + indexpr_item = list_head(unique_index->indexprs); + for(c = 0; c < unique_index->nkeycolumns; c++) + { + int attr = unique_index->indexkeys[c]; + Expr *expr; + bool matched_const = false; + ListCell *lc1, *lc2; + + if(attr > 0) + { + expr = list_nth_node(TargetEntry, unique_index->indextlist, c)->expr; + } + else if (attr == 0) + { + /* Expression index */ + expr = lfirst(indexpr_item); + indexpr_item = lnext(unique_index->indexprs, indexpr_item); + } + else /* attr < 0 */ + { + /* Index on system column is not supported */ + Assert(false); + } + + /* + * Check index_col = Const case with regarding to opfamily checking + * If we can remove the index_col from the final UniqueKey->exprs. + */ + forboth(lc1, const_exprs, lc2, const_expr_opfamilies) + { + if (list_member_oid((List *)lfirst(lc2), unique_index->opfamily[c]) + && match_index_to_operand((Node *) lfirst(lc1), c, unique_index)) + { + matched_const = true; + break; + } + } + + if (matched_const) + continue; + + /* Check if the indexed expr is used in rel */ + if (attr > 0) + { + /* + * Normal Indexed column, if the col is not used, then the index is useless + * for uniquekey. + */ + attr -= FirstLowInvalidHeapAttributeNumber; + + if (!bms_is_member(attr, used_varattrs)) + { + *useful = false; + break; + } + } + else if (!list_member(unique_index->rel->reltarget->exprs, expr)) + { + /* Expression index but the expression is not used in rel */ + *useful = false; + break; + } + + /* check not null property. */ + if (attr == 0) + { + /* We never know if a expression yields null or not */ + *multi_nullvals = true; + } + else if (!bms_is_member(attr, unique_index->rel->notnullattrs) + && !bms_is_member(0 - FirstLowInvalidHeapAttributeNumber, + unique_index->rel->notnullattrs)) + { + *multi_nullvals = true; + } + + exprs = lappend(exprs, expr); + } + return exprs; +} + + +/* + * add_uniquekey_for_onerow + * If we are sure that the relation only returns one row, then all the columns + * are unique. However we don't need to create UniqueKey for every column, we + * just set exprs = NIL and overwrites all the other UniqueKey on this RelOptInfo + * since this one has strongest semantics. + */ +void +add_uniquekey_for_onerow(RelOptInfo *rel) +{ + /* + * We overwrite the previous UniqueKey on purpose since this one has the + * strongest semantic. + */ + rel->uniquekeys = list_make1(makeUniqueKey(NIL, false)); +} + + +/* + * initililze_uniquecontext_for_joinrel + * Return a List of UniqueKeyContext for an inputrel + */ +static List * +initililze_uniquecontext_for_joinrel(RelOptInfo *inputrel) +{ + List *res = NIL; + ListCell *lc; + foreach(lc, inputrel->uniquekeys) + { + UniqueKeyContext context; + context = palloc(sizeof(struct UniqueKeyContextData)); + context->uniquekey = lfirst_node(UniqueKey, lc); + context->added_to_joinrel = false; + context->useful = true; + res = lappend(res, context); + } + return res; +} + + +/* + * get_exprs_from_uniquekey + * Unify the way of get List of exprs from a one-row UniqueKey or + * normal UniqueKey. for the onerow case, every expr in rel1 is a valid + * UniqueKey. Return a List of exprs. + * + * rel1: The relation which you want to get the exprs. + * ukey: The UniqueKey you want to get the exprs. + */ +static List * +get_exprs_from_uniquekey(RelOptInfo *joinrel, RelOptInfo *rel1, UniqueKey *ukey) +{ + ListCell *lc; + bool onerow = rel1 != NULL && relation_is_onerow(rel1); + + List *res = NIL; + Assert(onerow || ukey); + if (onerow) + { + /* Only cares about the exprs still exist in joinrel */ + foreach(lc, joinrel->reltarget->exprs) + { + Bitmapset *relids = pull_varnos(lfirst(lc)); + if (bms_is_subset(relids, rel1->relids)) + { + res = lappend(res, list_make1(lfirst(lc))); + } + } + } + else + { + res = list_make1(ukey->exprs); + } + return res; +} + +/* + * Partitioned table Unique Keys. + * The partition table unique key is maintained as: + * 1. The index must be unique as usual. + * 2. The index must contains partition key. + * 3. The index must exist on all the child rel. see simple_indexinfo_equal for + * how we compare it. + */ + +/* + * index_constains_partkey + * return true if the index contains the partiton key. + */ +static bool +index_constains_partkey(RelOptInfo *partrel, IndexOptInfo *ind) +{ + ListCell *lc; + int i; + Assert(IS_PARTITIONED_REL(partrel)); + Assert(partrel->part_scheme->partnatts > 0); + + for(i = 0; i < partrel->part_scheme->partnatts; i++) + { + Node *part_expr = linitial(partrel->partexprs[i]); + bool found_in_index = false; + foreach(lc, ind->indextlist) + { + Expr *index_expr = lfirst_node(TargetEntry, lc)->expr; + if (equal(index_expr, part_expr)) + { + found_in_index = true; + break; + } + } + if (!found_in_index) + return false; + } + return true; +} + +/* + * simple_indexinfo_equal + * + * Used to check if the 2 index is same as each other. The index here + * is COPIED from childrel and did some tiny changes(see + * simple_copy_indexinfo_to_parent) + */ +static bool +simple_indexinfo_equal(IndexOptInfo *ind1, IndexOptInfo *ind2) +{ + Size oid_cmp_len = sizeof(Oid) * ind1->ncolumns; + + return ind1->ncolumns == ind2->ncolumns && + ind1->unique == ind2->unique && + memcmp(ind1->indexkeys, ind2->indexkeys, sizeof(int) * ind1->ncolumns) == 0 && + memcmp(ind1->opfamily, ind2->opfamily, oid_cmp_len) == 0 && + memcmp(ind1->opcintype, ind2->opcintype, oid_cmp_len) == 0 && + memcmp(ind1->sortopfamily, ind2->sortopfamily, oid_cmp_len) == 0 && + equal(get_tlist_exprs(ind1->indextlist, true), + get_tlist_exprs(ind2->indextlist, true)); +} + + +/* + * The below macros are used for simple_copy_indexinfo_to_parent which is so + * customized that I don't want to put it to copyfuncs.c. So copy it here. + */ +#define COPY_POINTER_FIELD(fldname, sz) \ + do { \ + Size _size = (sz); \ + newnode->fldname = palloc(_size); \ + memcpy(newnode->fldname, from->fldname, _size); \ + } while (0) + +#define COPY_NODE_FIELD(fldname) \ + (newnode->fldname = copyObjectImpl(from->fldname)) + +#define COPY_SCALAR_FIELD(fldname) \ + (newnode->fldname = from->fldname) + + +/* + * simple_copy_indexinfo_to_parent (from partition) + * Copy the IndexInfo from child relation to parent relation with some modification, + * which is used to test: + * 1. If the same index exists in all the childrels. + * 2. If the parentrel->reltarget/basicrestrict info matches this index. + */ +static IndexOptInfo * +simple_copy_indexinfo_to_parent(PlannerInfo *root, + RelOptInfo *parentrel, + IndexOptInfo *from) +{ + IndexOptInfo *newnode = makeNode(IndexOptInfo); + AppendRelInfo *appinfo = find_appinfo_by_child(root, from->rel->relid); + ListCell *lc; + int idx = 0; + + COPY_SCALAR_FIELD(ncolumns); + COPY_SCALAR_FIELD(nkeycolumns); + COPY_SCALAR_FIELD(unique); + COPY_SCALAR_FIELD(immediate); + /* We just need to know if it is NIL or not */ + COPY_SCALAR_FIELD(indpred); + COPY_SCALAR_FIELD(predOK); + COPY_POINTER_FIELD(indexkeys, from->ncolumns * sizeof(int)); + COPY_POINTER_FIELD(indexcollations, from->ncolumns * sizeof(Oid)); + COPY_POINTER_FIELD(opfamily, from->ncolumns * sizeof(Oid)); + COPY_POINTER_FIELD(opcintype, from->ncolumns * sizeof(Oid)); + COPY_POINTER_FIELD(sortopfamily, from->ncolumns * sizeof(Oid)); + COPY_NODE_FIELD(indextlist); + + /* Convert index exprs on child expr to expr on parent */ + foreach(lc, newnode->indextlist) + { + TargetEntry *tle = lfirst_node(TargetEntry, lc); + /* Index on expression is ignored */ + Assert(IsA(tle->expr, Var)); + tle->expr = (Expr *) find_parent_var(appinfo, (Var *) tle->expr); + newnode->indexkeys[idx] = castNode(Var, tle->expr)->varattno; + idx++; + } + newnode->rel = parentrel; + return newnode; +} + +/* + * adjust_partition_unique_indexlist + * + * global_unique_indexes: At the beginning, it contains the copy & modified + * unique index from the first partition. And then check if each index in it still + * exists in the following partitions. If no, remove it. at last, it has an + * index list which exists in all the partitions. + */ +static void +adjust_partition_unique_indexlist(PlannerInfo *root, + RelOptInfo *parentrel, + RelOptInfo *childrel, + List **global_unique_indexes) +{ + ListCell *lc, *lc2; + foreach(lc, *global_unique_indexes) + { + IndexOptInfo *g_ind = lfirst_node(IndexOptInfo, lc); + bool found_in_child = false; + + foreach(lc2, childrel->indexlist) + { + IndexOptInfo *p_ind = lfirst_node(IndexOptInfo, lc2); + IndexOptInfo *p_ind_copy; + if (!p_ind->unique || !p_ind->immediate || + (p_ind->indpred != NIL && !p_ind->predOK)) + continue; + p_ind_copy = simple_copy_indexinfo_to_parent(root, parentrel, p_ind); + if (simple_indexinfo_equal(p_ind_copy, g_ind)) + { + found_in_child = true; + break; + } + } + if (!found_in_child) + /* The index doesn't exist in childrel, remove it from global_unique_indexes */ + *global_unique_indexes = foreach_delete_current(*global_unique_indexes, lc); + } +} + +/* Helper function for groupres/distinctrel */ +static void +add_uniquekey_from_sortgroups(PlannerInfo *root, RelOptInfo *rel, List *sortgroups) +{ + Query *parse = root->parse; + List *exprs; + + /* + * XXX: If there are some vars which is not in current levelsup, the semantic is + * imprecise, should we avoid it or not? levelsup = 1 is just a demo, maybe we need to + * check every level other than 0, if so, looks we have to write another + * pull_var_walker. + */ + List *upper_vars = pull_vars_of_level((Node*)sortgroups, 1); + + if (upper_vars != NIL) + return; + + exprs = get_sortgrouplist_exprs(sortgroups, parse->targetList); + rel->uniquekeys = lappend(rel->uniquekeys, + makeUniqueKey(exprs, + false /* sortgroupclause can't be multi_nullvals */)); +} + + +/* + * add_combined_uniquekey + * The combination of both UniqueKeys is a valid UniqueKey for joinrel no matter + * the jointype. + */ +bool +add_combined_uniquekey(RelOptInfo *joinrel, + RelOptInfo *outer_rel, + RelOptInfo *inner_rel, + UniqueKey *outer_ukey, + UniqueKey *inner_ukey, + JoinType jointype) +{ + + ListCell *lc1, *lc2; + + /* Either side has multi_nullvals or we have outer join, + * the combined UniqueKey has multi_nullvals */ + bool multi_nullvals = outer_ukey->multi_nullvals || + inner_ukey->multi_nullvals || IS_OUTER_JOIN(jointype); + + /* The only case we can get onerow joinrel after join */ + if (relation_is_onerow(outer_rel) + && relation_is_onerow(inner_rel) + && jointype == JOIN_INNER) + { + add_uniquekey_for_onerow(joinrel); + return true; + } + + foreach(lc1, get_exprs_from_uniquekey(joinrel, outer_rel, outer_ukey)) + { + foreach(lc2, get_exprs_from_uniquekey(joinrel, inner_rel, inner_ukey)) + { + List *exprs = list_concat_copy(lfirst_node(List, lc1), lfirst_node(List, lc2)); + joinrel->uniquekeys = lappend(joinrel->uniquekeys, + makeUniqueKey(exprs, + multi_nullvals)); + } + } + return false; +} diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 986d7a52e3..f1bbb8c427 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -2385,6 +2385,8 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, add_path(final_rel, path); } + simple_copy_uniquekeys(current_rel, final_rel); + /* * Generate partial paths for final_rel, too, if outer query levels might * be able to make use of them. @@ -3895,6 +3897,8 @@ create_grouping_paths(PlannerInfo *root, } set_cheapest(grouped_rel); + + populate_grouprel_uniquekeys(root, grouped_rel, input_rel); return grouped_rel; } @@ -4615,7 +4619,7 @@ create_window_paths(PlannerInfo *root, /* Now choose the best path(s) */ set_cheapest(window_rel); - + simple_copy_uniquekeys(input_rel, window_rel); return window_rel; } @@ -4929,7 +4933,7 @@ create_distinct_paths(PlannerInfo *root, /* Now choose the best path(s) */ set_cheapest(distinct_rel); - + populate_distinctrel_uniquekeys(root, input_rel, distinct_rel); return distinct_rel; } @@ -5190,6 +5194,8 @@ create_ordered_paths(PlannerInfo *root, */ Assert(ordered_rel->pathlist != NIL); + simple_copy_uniquekeys(input_rel, ordered_rel); + return ordered_rel; } @@ -6067,6 +6073,9 @@ adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel, if (list_length(targets) == 1) return; + /* UniqueKey is not valid after handling the SRF. */ + rel->uniquekeys = NIL; + /* * Stack SRF-evaluation nodes atop each path for the rel. * diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index 745f443e5c..ce290cb97b 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -689,6 +689,8 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root, /* Undo effects of possibly forcing tuple_fraction to 0 */ root->tuple_fraction = save_fraction; + /* Add the UniqueKeys */ + populate_unionrel_uniquekeys(root, result_rel); return result_rel; } diff --git a/src/backend/optimizer/util/appendinfo.c b/src/backend/optimizer/util/appendinfo.c index d722063cf3..44c37ecffc 100644 --- a/src/backend/optimizer/util/appendinfo.c +++ b/src/backend/optimizer/util/appendinfo.c @@ -746,3 +746,47 @@ find_appinfos_by_relids(PlannerInfo *root, Relids relids, int *nappinfos) } return appinfos; } + +/* + * find_appinfo_by_child + * + */ +AppendRelInfo * +find_appinfo_by_child(PlannerInfo *root, Index child_index) +{ + ListCell *lc; + foreach(lc, root->append_rel_list) + { + AppendRelInfo *appinfo = lfirst_node(AppendRelInfo, lc); + if (appinfo->child_relid == child_index) + return appinfo; + } + elog(ERROR, "parent relation cant be found"); + return NULL; +} + +/* + * find_parent_var + * + */ +Var * +find_parent_var(AppendRelInfo *appinfo, Var *child_var) +{ + ListCell *lc; + Var *res = NULL; + Index attno = 1; + foreach(lc, appinfo->translated_vars) + { + Node *child_node = lfirst(lc); + if (equal(child_node, child_var)) + { + res = copyObject(child_var); + res->varattno = attno; + res->varno = appinfo->parent_relid; + } + attno++; + } + if (res == NULL) + elog(ERROR, "parent var can't be found."); + return res; +} diff --git a/src/backend/optimizer/util/inherit.c b/src/backend/optimizer/util/inherit.c index 3132fd35a5..d66b40ec50 100644 --- a/src/backend/optimizer/util/inherit.c +++ b/src/backend/optimizer/util/inherit.c @@ -736,6 +736,7 @@ apply_child_basequals(PlannerInfo *root, RelOptInfo *parentrel, { Node *onecq = (Node *) lfirst(lc2); bool pseudoconstant; + RestrictInfo *child_rinfo; /* check for pseudoconstant (no Vars or volatile functions) */ pseudoconstant = @@ -747,13 +748,14 @@ apply_child_basequals(PlannerInfo *root, RelOptInfo *parentrel, root->hasPseudoConstantQuals = true; } /* reconstitute RestrictInfo with appropriate properties */ - childquals = lappend(childquals, - make_restrictinfo((Expr *) onecq, - rinfo->is_pushed_down, - rinfo->outerjoin_delayed, - pseudoconstant, - rinfo->security_level, - NULL, NULL, NULL)); + child_rinfo = make_restrictinfo((Expr *) onecq, + rinfo->is_pushed_down, + rinfo->outerjoin_delayed, + pseudoconstant, + rinfo->security_level, + NULL, NULL, NULL); + child_rinfo->mergeopfamilies = rinfo->mergeopfamilies; + childquals = lappend(childquals, child_rinfo); /* track minimum security level among child quals */ cq_min_security = Min(cq_min_security, rinfo->security_level); } diff --git a/src/include/nodes/makefuncs.h b/src/include/nodes/makefuncs.h index 31d9aedeeb..c83f17acb7 100644 --- a/src/include/nodes/makefuncs.h +++ b/src/include/nodes/makefuncs.h @@ -16,6 +16,7 @@ #include "nodes/execnodes.h" #include "nodes/parsenodes.h" +#include "nodes/pathnodes.h" extern A_Expr *makeA_Expr(A_Expr_Kind kind, List *name, @@ -105,4 +106,6 @@ extern GroupingSet *makeGroupingSet(GroupingSetKind kind, List *content, int loc extern VacuumRelation *makeVacuumRelation(RangeVar *relation, Oid oid, List *va_cols); +extern UniqueKey* makeUniqueKey(List *exprs, bool multi_nullvals); + #endif /* MAKEFUNC_H */ diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 7ddd8c011b..2bfbd353c7 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -263,6 +263,7 @@ typedef enum NodeTag T_EquivalenceMember, T_PathKey, T_PathTarget, + T_UniqueKey, T_RestrictInfo, T_IndexClause, T_PlaceHolderVar, diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 925f2eac3f..5737cd76ce 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -729,6 +729,7 @@ typedef struct RelOptInfo QualCost baserestrictcost; /* cost of evaluating the above */ Index baserestrict_min_security; /* min security_level found in * baserestrictinfo */ + List *uniquekeys; /* List of UniqueKey */ List *joininfo; /* RestrictInfo structures for join clauses * involving this rel */ bool has_eclass_joins; /* T means joininfo is incomplete */ @@ -1046,6 +1047,28 @@ typedef struct PathKey } PathKey; +/* + * UniqueKey + * + * Represents the unique properties held by a RelOptInfo. + * + * exprs is a list of exprs which is unique on current RelOptInfo. exprs = NIL + * is a special case of UniqueKey, which means there is only 1 row in that + * relation. + * multi_nullvals: true means multi null values may exist in these exprs, so the + * uniqueness is not guaranteed in this case. This field is necessary for + * remove_useless_join & reduce_unique_semijoins where we don't mind these + * duplicated NULL values. It is set to true for 2 cases. One is a unique key + * from a unique index but the related column is nullable. The other one is for + * outer join. see populate_joinrel_uniquekeys for detail. + */ +typedef struct UniqueKey +{ + NodeTag type; + List *exprs; + bool multi_nullvals; +} UniqueKey; + /* * PathTarget * @@ -2472,7 +2495,7 @@ typedef enum * * flags indicating what kinds of grouping are possible. * partial_costs_set is true if the agg_partial_costs and agg_final_costs - * have been initialized. + * have been initialized. * agg_partial_costs gives partial aggregation costs. * agg_final_costs gives finalization costs. * target_parallel_safe is true if target is parallel safe. @@ -2502,8 +2525,8 @@ typedef struct * limit_tuples is an estimated bound on the number of output tuples, * or -1 if no LIMIT or couldn't estimate. * count_est and offset_est are the estimated values of the LIMIT and OFFSET - * expressions computed by preprocess_limit() (see comments for - * preprocess_limit() for more information). + * expressions computed by preprocess_limit() (see comments for + * preprocess_limit() for more information). */ typedef struct { diff --git a/src/include/nodes/pg_list.h b/src/include/nodes/pg_list.h index ec231010ce..a1e279815c 100644 --- a/src/include/nodes/pg_list.h +++ b/src/include/nodes/pg_list.h @@ -543,6 +543,8 @@ extern bool list_member_ptr(const List *list, const void *datum); extern bool list_member_int(const List *list, int datum); extern bool list_member_oid(const List *list, Oid datum); +extern bool list_is_subset(const List *members, const List *target); + extern List *list_delete(List *list, void *datum); extern List *list_delete_ptr(List *list, void *datum); extern List *list_delete_int(List *list, int datum); diff --git a/src/include/optimizer/appendinfo.h b/src/include/optimizer/appendinfo.h index d6a27a60dd..e87c92a054 100644 --- a/src/include/optimizer/appendinfo.h +++ b/src/include/optimizer/appendinfo.h @@ -32,4 +32,7 @@ extern Relids adjust_child_relids_multilevel(PlannerInfo *root, Relids relids, extern AppendRelInfo **find_appinfos_by_relids(PlannerInfo *root, Relids relids, int *nappinfos); +extern AppendRelInfo *find_appinfo_by_child(PlannerInfo *root, Index child_index); +extern Var *find_parent_var(AppendRelInfo *appinfo, Var *child_var); + #endif /* APPENDINFO_H */ diff --git a/src/include/optimizer/optimizer.h b/src/include/optimizer/optimizer.h index 3e4171056e..9445141263 100644 --- a/src/include/optimizer/optimizer.h +++ b/src/include/optimizer/optimizer.h @@ -23,6 +23,7 @@ #define OPTIMIZER_H #include "nodes/parsenodes.h" +#include "nodes/pathnodes.h" /* * We don't want to include nodes/pathnodes.h here, because non-planner @@ -156,6 +157,7 @@ extern TargetEntry *get_sortgroupref_tle(Index sortref, List *targetList); extern TargetEntry *get_sortgroupclause_tle(SortGroupClause *sgClause, List *targetList); +extern Var *find_var_for_subquery_tle(RelOptInfo *rel, TargetEntry *tle); extern Node *get_sortgroupclause_expr(SortGroupClause *sgClause, List *targetList); extern List *get_sortgrouplist_exprs(List *sgClauses, diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 10b6e81079..9217a8d6c6 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -240,5 +240,48 @@ extern PathKey *make_canonical_pathkey(PlannerInfo *root, int strategy, bool nulls_first); extern void add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, List *live_childrels); +extern List *select_mergejoin_clauses(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + List *restrictlist, + JoinType jointype, + bool *mergejoin_allowed); + +/* + * uniquekeys.c + * Utilities for matching and building unique keys + */ +extern void populate_baserel_uniquekeys(PlannerInfo *root, + RelOptInfo *baserel, + List* unique_index_list); +extern void populate_partitionedrel_uniquekeys(PlannerInfo *root, + RelOptInfo *rel, + List *childrels); +extern void populate_distinctrel_uniquekeys(PlannerInfo *root, + RelOptInfo *inputrel, + RelOptInfo *distinctrel); +extern void populate_grouprel_uniquekeys(PlannerInfo *root, + RelOptInfo *grouprel, + RelOptInfo *inputrel); +extern void populate_unionrel_uniquekeys(PlannerInfo *root, + RelOptInfo *unionrel); +extern void simple_copy_uniquekeys(RelOptInfo *oldrel, + RelOptInfo *newrel); +extern void convert_subquery_uniquekeys(PlannerInfo *root, + RelOptInfo *currel, + RelOptInfo *sub_final_rel); +extern void populate_joinrel_uniquekeys(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *rel1, + RelOptInfo *rel2, + List *restrictlist, + JoinType jointype); + +extern bool relation_has_uniquekeys_for(PlannerInfo *root, + RelOptInfo *rel, + List *exprs, + bool allow_multinulls); +extern bool relation_is_onerow(RelOptInfo *rel); #endif /* PATHS_H */ -- 2.21.0