diff --git a/doc/src/sgml/ddl.sgml b/doc/src/sgml/ddl.sgml
index b05a9c2..5a436a1 100644
--- a/doc/src/sgml/ddl.sgml
+++ b/doc/src/sgml/ddl.sgml
@@ -2993,6 +2993,11 @@ VALUES ('Albany', NULL, NULL, 'NY');
foreign table partitions.
+
+ Updating the partition key of a row might cause it to be moved into a
+ different partition where this row satisfies its partition constraint.
+
+
Example
@@ -3285,9 +3290,20 @@ ALTER TABLE measurement ATTACH PARTITION measurement_y2008m02
- An UPDATE> that causes a row to move from one partition to
- another fails, because the new value of the row fails to satisfy the
- implicit partition constraint of the original partition.
+ When an UPDATE> causes a row to move from one partition to
+ another, there is a chance that another concurrent UPDATE> or
+ DELETE> misses this row. Suppose, during the row movement,
+ the row is still visible for the concurrent session, and it is about to
+ do an UPDATE> or DELETE> operation on the same
+ row. This DML operation can silently miss this row if the row now gets
+ deleted from the partition by the first session as part of its
+ UPDATE> row movement. In such case, the concurrent
+ UPDATE>/DELETE>, being unaware of the row
+ movement, interprets that the row has just been deleted so there is
+ nothing to be done for this row. Whereas, in the usual case where the
+ table is not partitioned, or where there is no row movement, the second
+ session would have identified the newly updated row and carried
+ UPDATE>/DELETE> on this new row version.
diff --git a/doc/src/sgml/ref/update.sgml b/doc/src/sgml/ref/update.sgml
index 8a1619f..28cfc1a 100644
--- a/doc/src/sgml/ref/update.sgml
+++ b/doc/src/sgml/ref/update.sgml
@@ -282,10 +282,17 @@ UPDATE count
In the case of a partitioned table, updating a row might cause it to no
- longer satisfy the partition constraint. Since there is no provision to
- move the row to the partition appropriate to the new value of its
- partitioning key, an error will occur in this case. This can also happen
- when updating a partition directly.
+ longer satisfy the partition constraint of the containing partition. In that
+ case, if there is some other partition in the partition tree for which this
+ row satisfies its partition constraint, then the row is moved to that
+ partition. If there isn't such a partition, an error will occur. The error
+ will also occur when updating a partition directly. Behind the scenes, the
+ row movement is actually a DELETE> and
+ INSERT> operation. However, there is a possibility that a
+ concurrent UPDATE> or DELETE> on the same row may miss
+ this row. For details see the section
+ .
+
diff --git a/doc/src/sgml/trigger.sgml b/doc/src/sgml/trigger.sgml
index 950245d..72300a0 100644
--- a/doc/src/sgml/trigger.sgml
+++ b/doc/src/sgml/trigger.sgml
@@ -160,6 +160,29 @@
+ If an UPDATE on a partitioned table causes a row to
+ move to another partition, it will be performed as a
+ DELETE from the original partition followed by
+ INSERT into the new partition. In this case, all
+ row-level BEFORE> UPDATE triggers and all
+ row-level BEFORE> DELETE triggers are fired
+ on the original partition. Then all row-level BEFORE>
+ INSERT triggers are fired on the destination partition.
+ The possibility of surprising outcomes should be considered when all these
+ triggers affect the row being moved. As far as AFTER ROW>
+ triggers are concerned, AFTER> DELETE and
+ AFTER> INSERT triggers are applied; but
+ AFTER> UPDATE triggers are not applied
+ because the UPDATE has been converted to a
+ DELETE and INSERT. As far as
+ statement-level triggers are concerned, none of the
+ DELETE or INSERT triggers are fired,
+ even if row movement occurs; only the UPDATE triggers
+ defined on the target table used in the UPDATE statement
+ will be fired.
+
+
+
Trigger functions invoked by per-statement triggers should always
return NULL. Trigger functions invoked by per-row
triggers can return a table row (a value of
diff --git a/src/backend/catalog/partition.c b/src/backend/catalog/partition.c
index f8c55b1..c9f5dd6 100644
--- a/src/backend/catalog/partition.c
+++ b/src/backend/catalog/partition.c
@@ -921,7 +921,8 @@ get_qual_from_partbound(Relation rel, Relation parent,
/*
* map_partition_varattnos - maps varattno of any Vars in expr from the
- * parent attno to partition attno.
+ * attno's of 'from_rel' partition to the attno's of 'to_rel' partition.
+ * The rels can be both leaf partition or a partitioned table.
*
* We must allow for cases where physical attnos of a partition can be
* different from the parent's.
@@ -931,8 +932,8 @@ get_qual_from_partbound(Relation rel, Relation parent,
* are working on Lists, so it's less messy to do the casts internally.
*/
List *
-map_partition_varattnos(List *expr, int target_varno,
- Relation partrel, Relation parent)
+map_partition_varattnos(List *expr, int fromrel_varno,
+ Relation to_rel, Relation from_rel)
{
AttrNumber *part_attnos;
bool found_whole_row;
@@ -940,13 +941,13 @@ map_partition_varattnos(List *expr, int target_varno,
if (expr == NIL)
return NIL;
- part_attnos = convert_tuples_by_name_map(RelationGetDescr(partrel),
- RelationGetDescr(parent),
+ part_attnos = convert_tuples_by_name_map(RelationGetDescr(to_rel),
+ RelationGetDescr(from_rel),
gettext_noop("could not convert row type"));
expr = (List *) map_variable_attnos((Node *) expr,
- target_varno, 0,
+ fromrel_varno, 0,
part_attnos,
- RelationGetDescr(parent)->natts,
+ RelationGetDescr(from_rel)->natts,
&found_whole_row);
/* There can never be a whole-row reference here */
if (found_whole_row)
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index f391828..2706af2 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -168,7 +168,7 @@ typedef struct CopyStateData
PartitionDispatch *partition_dispatch_info;
int num_dispatch; /* Number of entries in the above array */
int num_partitions; /* Number of members in the following arrays */
- ResultRelInfo *partitions; /* Per partition result relation */
+ ResultRelInfo **partitions; /* Per partition result relation pointers */
TupleConversionMap **partition_tupconv_maps;
TupleTableSlot *partition_tuple_slot;
TransitionCaptureState *transition_capture;
@@ -1426,13 +1426,13 @@ BeginCopy(ParseState *pstate,
if (is_from && rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
{
PartitionDispatch *partition_dispatch_info;
- ResultRelInfo *partitions;
+ ResultRelInfo **partitions;
TupleConversionMap **partition_tupconv_maps;
TupleTableSlot *partition_tuple_slot;
int num_parted,
num_partitions;
- ExecSetupPartitionTupleRouting(rel,
+ ExecSetupPartitionTupleRouting(rel, NULL, 0,
&partition_dispatch_info,
&partitions,
&partition_tupconv_maps,
@@ -1461,7 +1461,7 @@ BeginCopy(ParseState *pstate,
for (i = 0; i < cstate->num_partitions; ++i)
{
cstate->transition_tupconv_maps[i] =
- convert_tuples_by_name(RelationGetDescr(cstate->partitions[i].ri_RelationDesc),
+ convert_tuples_by_name(RelationGetDescr(cstate->partitions[i]->ri_RelationDesc),
RelationGetDescr(rel),
gettext_noop("could not convert row type"));
}
@@ -2608,7 +2608,7 @@ CopyFrom(CopyState cstate)
* to the selected partition.
*/
saved_resultRelInfo = resultRelInfo;
- resultRelInfo = cstate->partitions + leaf_part_index;
+ resultRelInfo = cstate->partitions[leaf_part_index];
/* We do not yet have a way to insert into a foreign partition */
if (resultRelInfo->ri_FdwRoutine)
@@ -2717,7 +2717,7 @@ CopyFrom(CopyState cstate)
/* Check the constraints of the tuple */
if (cstate->rel->rd_att->constr || check_partition_constr)
- ExecConstraints(resultRelInfo, slot, estate);
+ ExecConstraints(resultRelInfo, slot, estate, true);
if (useHeapMultiInsert)
{
@@ -2837,7 +2837,7 @@ CopyFrom(CopyState cstate)
}
for (i = 0; i < cstate->num_partitions; i++)
{
- ResultRelInfo *resultRelInfo = cstate->partitions + i;
+ ResultRelInfo *resultRelInfo = cstate->partitions[i];
ExecCloseIndices(resultRelInfo);
heap_close(resultRelInfo->ri_RelationDesc, NoLock);
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index 0f08283..e448d18 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -64,6 +64,18 @@
#include "utils/snapmgr.h"
#include "utils/tqual.h"
+/*
+ * Entry of a temporary hash table. During UPDATE tuple routing, we want to
+ * know which of the leaf partitions are present in the UPDATE per-subplan
+ * resultRelInfo array (ModifyTableState->resultRelInfo[]). This hash table
+ * is searchable by the oids of the subplan result rels.
+ */
+typedef struct ResultRelOidsEntry
+{
+ Oid rel_oid;
+ ResultRelInfo *resultRelInfo;
+} ResultRelOidsEntry;
+
/* Hooks for plugins to get control in ExecutorStart/Run/Finish/End */
ExecutorStart_hook_type ExecutorStart_hook = NULL;
@@ -103,8 +115,6 @@ static char *ExecBuildSlotPartitionKeyDescription(Relation rel,
int maxfieldlen);
static void EvalPlanQualStart(EPQState *epqstate, EState *parentestate,
Plan *planTree);
-static void ExecPartitionCheck(ResultRelInfo *resultRelInfo,
- TupleTableSlot *slot, EState *estate);
/*
* Note that GetUpdatedColumns() also exists in commands/trigger.c. There does
@@ -1823,15 +1833,10 @@ ExecRelCheck(ResultRelInfo *resultRelInfo,
/*
* ExecPartitionCheck --- check that tuple meets the partition constraint.
*/
-static void
+bool
ExecPartitionCheck(ResultRelInfo *resultRelInfo, TupleTableSlot *slot,
EState *estate)
{
- Relation rel = resultRelInfo->ri_RelationDesc;
- TupleDesc tupdesc = RelationGetDescr(rel);
- Bitmapset *modifiedCols;
- Bitmapset *insertedCols;
- Bitmapset *updatedCols;
ExprContext *econtext;
/*
@@ -1859,51 +1864,65 @@ ExecPartitionCheck(ResultRelInfo *resultRelInfo, TupleTableSlot *slot,
* As in case of the catalogued constraints, we treat a NULL result as
* success here, not a failure.
*/
- if (!ExecCheck(resultRelInfo->ri_PartitionCheckExpr, econtext))
- {
- char *val_desc;
- Relation orig_rel = rel;
+ return ExecCheck(resultRelInfo->ri_PartitionCheckExpr, econtext);
+}
+
+/*
+ * ExecPartitionCheckEmitError - Form and emit an error message after a failed
+ * partition constraint check.
+ */
+void
+ExecPartitionCheckEmitError(ResultRelInfo *resultRelInfo,
+ TupleTableSlot *slot,
+ EState *estate)
+{
+ Relation rel = resultRelInfo->ri_RelationDesc;
+ Relation orig_rel = rel;
+ TupleDesc tupdesc = RelationGetDescr(rel);
+ char *val_desc;
+ Bitmapset *modifiedCols;
+ Bitmapset *insertedCols;
+ Bitmapset *updatedCols;
- /* See the comment above. */
- if (resultRelInfo->ri_PartitionRoot)
+ /* See the comments in ExecConstraints. */
+ if (resultRelInfo->ri_PartitionRoot)
+ {
+ HeapTuple tuple = ExecFetchSlotTuple(slot);
+ TupleDesc old_tupdesc = RelationGetDescr(rel);
+ TupleConversionMap *map;
+
+ rel = resultRelInfo->ri_PartitionRoot;
+ tupdesc = RelationGetDescr(rel);
+ /* a reverse map */
+ map = convert_tuples_by_name(old_tupdesc, tupdesc,
+ gettext_noop("could not convert row type"));
+ if (map != NULL)
{
- HeapTuple tuple = ExecFetchSlotTuple(slot);
- TupleDesc old_tupdesc = RelationGetDescr(rel);
- TupleConversionMap *map;
-
- rel = resultRelInfo->ri_PartitionRoot;
- tupdesc = RelationGetDescr(rel);
- /* a reverse map */
- map = convert_tuples_by_name(old_tupdesc, tupdesc,
- gettext_noop("could not convert row type"));
- if (map != NULL)
- {
- tuple = do_convert_tuple(tuple, map);
- ExecStoreTuple(tuple, slot, InvalidBuffer, false);
- }
+ tuple = do_convert_tuple(tuple, map);
+ ExecStoreTuple(tuple, slot, InvalidBuffer, false);
}
-
- insertedCols = GetInsertedColumns(resultRelInfo, estate);
- updatedCols = GetUpdatedColumns(resultRelInfo, estate);
- modifiedCols = bms_union(insertedCols, updatedCols);
- val_desc = ExecBuildSlotValueDescription(RelationGetRelid(rel),
- slot,
- tupdesc,
- modifiedCols,
- 64);
- ereport(ERROR,
- (errcode(ERRCODE_CHECK_VIOLATION),
- errmsg("new row for relation \"%s\" violates partition constraint",
- RelationGetRelationName(orig_rel)),
- val_desc ? errdetail("Failing row contains %s.", val_desc) : 0));
}
+
+ insertedCols = GetInsertedColumns(resultRelInfo, estate);
+ updatedCols = GetUpdatedColumns(resultRelInfo, estate);
+ modifiedCols = bms_union(insertedCols, updatedCols);
+ val_desc = ExecBuildSlotValueDescription(RelationGetRelid(rel),
+ slot,
+ tupdesc,
+ modifiedCols,
+ 64);
+ ereport(ERROR,
+ (errcode(ERRCODE_CHECK_VIOLATION),
+ errmsg("new row for relation \"%s\" violates partition constraint",
+ RelationGetRelationName(orig_rel)),
+ val_desc ? errdetail("Failing row contains %s.", val_desc) : 0));
}
/*
* ExecConstraints - check constraints of the tuple in 'slot'
*
- * This checks the traditional NOT NULL and check constraints, as well as
- * the partition constraint, if any.
+ * This checks the traditional NOT NULL and check constraints, and if requested,
+ * checks the partition constraint.
*
* Note: 'slot' contains the tuple to check the constraints of, which may
* have been converted from the original input tuple after tuple routing.
@@ -1911,7 +1930,8 @@ ExecPartitionCheck(ResultRelInfo *resultRelInfo, TupleTableSlot *slot,
*/
void
ExecConstraints(ResultRelInfo *resultRelInfo,
- TupleTableSlot *slot, EState *estate)
+ TupleTableSlot *slot, EState *estate,
+ bool check_partition_constraint)
{
Relation rel = resultRelInfo->ri_RelationDesc;
TupleDesc tupdesc = RelationGetDescr(rel);
@@ -2024,8 +2044,9 @@ ExecConstraints(ResultRelInfo *resultRelInfo,
}
}
- if (resultRelInfo->ri_PartitionCheck)
- ExecPartitionCheck(resultRelInfo, slot, estate);
+ if (check_partition_constraint && resultRelInfo->ri_PartitionCheck &&
+ !ExecPartitionCheck(resultRelInfo, slot, estate))
+ ExecPartitionCheckEmitError(resultRelInfo, slot, estate);
}
@@ -3190,10 +3211,14 @@ EvalPlanQualEnd(EPQState *epqstate)
* ExecSetupPartitionTupleRouting - set up information needed during
* tuple routing for partitioned tables
*
+ * 'update_rri' has the UPDATE per-subplan result rels.
+ * 'num_update_rri' : number of UPDATE per-subplan result rels. For INSERT,
+ * this is 0.
+ *
* Output arguments:
* 'pd' receives an array of PartitionDispatch objects with one entry for
* every partitioned table in the partition tree
- * 'partitions' receives an array of ResultRelInfo objects with one entry for
+ * 'partitions' receives an array of ResultRelInfo* objects with one entry for
* every leaf partition in the partition tree
* 'tup_conv_maps' receives an array of TupleConversionMap objects with one
* entry for every leaf partition (required to convert input tuple based
@@ -3213,8 +3238,10 @@ EvalPlanQualEnd(EPQState *epqstate)
*/
void
ExecSetupPartitionTupleRouting(Relation rel,
+ ResultRelInfo *update_rri,
+ int num_update_rri,
PartitionDispatch **pd,
- ResultRelInfo **partitions,
+ ResultRelInfo ***partitions,
TupleConversionMap ***tup_conv_maps,
TupleTableSlot **partition_tuple_slot,
int *num_parted, int *num_partitions)
@@ -3223,18 +3250,60 @@ ExecSetupPartitionTupleRouting(Relation rel,
List *leaf_parts;
ListCell *cell;
int i;
- ResultRelInfo *leaf_part_rri;
+ HTAB *result_rel_oids = NULL;
+ HASHCTL ctl;
+ ResultRelOidsEntry *hash_entry;
+ ResultRelInfo *leaf_part_arr;
/* Get the tuple-routing information and lock partitions */
*pd = RelationGetPartitionDispatchInfo(rel, RowExclusiveLock, num_parted,
&leaf_parts);
*num_partitions = list_length(leaf_parts);
- *partitions = (ResultRelInfo *) palloc(*num_partitions *
- sizeof(ResultRelInfo));
+ *partitions = (ResultRelInfo **) palloc(*num_partitions *
+ sizeof(ResultRelInfo*));
*tup_conv_maps = (TupleConversionMap **) palloc0(*num_partitions *
sizeof(TupleConversionMap *));
/*
+ * For Updates, if the leaf partition is already present in the per-subplan
+ * result rels, we re-use that rather than initialize a new result rel. So
+ * to find whether a given leaf partition already has a resultRel, we build
+ * the hash table for searching each of the leaf partitions by oid.
+ */
+ if (num_update_rri != 0)
+ {
+ ResultRelInfo *resultRelInfo;
+
+ memset(&ctl, 0, sizeof(ctl));
+ ctl.keysize = sizeof(Oid);
+ ctl.entrysize = sizeof(ResultRelOidsEntry);
+ ctl.hcxt = CurrentMemoryContext;
+ result_rel_oids = hash_create("result_rel_oids temporary hash",
+ 32, /* start small and extend */
+ &ctl,
+ HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
+
+ resultRelInfo = update_rri;
+ for (i = 0; i < num_update_rri; i++, resultRelInfo++)
+ {
+ Oid reloid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
+
+ hash_entry = hash_search(result_rel_oids, &reloid,
+ HASH_ENTER, NULL);
+ hash_entry->resultRelInfo = resultRelInfo;
+ }
+ }
+ else
+ {
+ /*
+ * For inserts, we need to create all new result rels, so avoid repeated
+ * pallocs by allocating memory for all the result rels in bulk.
+ */
+ leaf_part_arr = (ResultRelInfo *) palloc0(*num_partitions *
+ sizeof(ResultRelInfo));
+ }
+
+ /*
* Initialize an empty slot that will be used to manipulate tuples of any
* given partition's rowtype. It is attached to the caller-specified node
* (such as ModifyTableState) and released when the node finishes
@@ -3242,23 +3311,65 @@ ExecSetupPartitionTupleRouting(Relation rel,
*/
*partition_tuple_slot = MakeTupleTableSlot();
- leaf_part_rri = *partitions;
i = 0;
foreach(cell, leaf_parts)
{
- Relation partrel;
+ ResultRelInfo *leaf_part_rri;
+ Relation partrel = NULL;
TupleDesc part_tupdesc;
+ Oid leaf_oid = lfirst_oid(cell);
+
+ if (num_update_rri != 0)
+ {
+ /*
+ * If this leaf partition is already present in the per-subplan
+ * resultRelInfos, re-use that resultRelInfo along with its
+ * already-opened relation; otherwise create a new result rel.
+ */
+ hash_entry = hash_search(result_rel_oids, &leaf_oid,
+ HASH_FIND, NULL);
+ if (hash_entry != NULL)
+ {
+ leaf_part_rri = hash_entry->resultRelInfo;
+ partrel = leaf_part_rri->ri_RelationDesc;
+
+ /*
+ * This is required when converting tuple as per root partition
+ * tuple descriptor. When generating the update plans, this was
+ * not set.
+ */
+ leaf_part_rri->ri_PartitionRoot = rel;
+ }
+ else
+ leaf_part_rri = (ResultRelInfo *) palloc0(sizeof(ResultRelInfo));
+ }
+ else
+ {
+ /* For INSERTs, we already have an array of result rels allocated */
+ leaf_part_rri = leaf_part_arr + i;
+ }
/*
- * We locked all the partitions above including the leaf partitions.
- * Note that each of the relations in *partitions are eventually
- * closed by the caller.
+ * If we didn't open the partition rel, it means we haven't initialized
+ * the result rel as well.
*/
- partrel = heap_open(lfirst_oid(cell), NoLock);
+ if (!partrel)
+ {
+ /*
+ * We locked all the partitions above including the leaf partitions.
+ * Note that each of the newly opened relations in *partitions are
+ * eventually closed by the caller.
+ */
+ partrel = heap_open(leaf_oid, NoLock);
+ InitResultRelInfo(leaf_part_rri, partrel, 1 /* dummy */, rel, 0);
+ }
+
part_tupdesc = RelationGetDescr(partrel);
/*
- * Verify result relation is a valid target for the current operation.
+ * Verify result relation is a valid target for insert operation.
+ * Even for updates, we are doing this for tuple-routing, so again,
+ * we need to check the validity for insert operation.
*/
CheckValidResultRel(partrel, CMD_INSERT);
@@ -3269,12 +3380,6 @@ ExecSetupPartitionTupleRouting(Relation rel,
(*tup_conv_maps)[i] = convert_tuples_by_name(tupDesc, part_tupdesc,
gettext_noop("could not convert row type"));
- InitResultRelInfo(leaf_part_rri,
- partrel,
- 1, /* dummy */
- rel,
- 0);
-
/*
* Open partition indices (remember we do not support ON CONFLICT in
* case of partitioned tables, so we do not need support information
@@ -3284,9 +3389,12 @@ ExecSetupPartitionTupleRouting(Relation rel,
leaf_part_rri->ri_IndexRelationDescs == NULL)
ExecOpenIndices(leaf_part_rri, false);
- leaf_part_rri++;
+ (*partitions)[i] = leaf_part_rri;
i++;
}
+
+ if (result_rel_oids != NULL)
+ hash_destroy(result_rel_oids);
}
/*
@@ -3312,8 +3420,9 @@ ExecFindPartition(ResultRelInfo *resultRelInfo, PartitionDispatch *pd,
* First check the root table's partition constraint, if any. No point in
* routing the tuple it if it doesn't belong in the root table itself.
*/
- if (resultRelInfo->ri_PartitionCheck)
- ExecPartitionCheck(resultRelInfo, slot, estate);
+ if (resultRelInfo->ri_PartitionCheck &&
+ !ExecPartitionCheck(resultRelInfo, slot, estate))
+ ExecPartitionCheckEmitError(resultRelInfo, slot, estate);
result = get_partition_for_tuple(pd, slot, estate,
&failed_at, &failed_slot);
diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
index bc53d07..eca60f2 100644
--- a/src/backend/executor/execReplication.c
+++ b/src/backend/executor/execReplication.c
@@ -402,7 +402,7 @@ ExecSimpleRelationInsert(EState *estate, TupleTableSlot *slot)
/* Check the constraints of the tuple */
if (rel->rd_att->constr)
- ExecConstraints(resultRelInfo, slot, estate);
+ ExecConstraints(resultRelInfo, slot, estate, true);
/* Store the slot into tuple that we can inspect. */
tuple = ExecMaterializeSlot(slot);
@@ -467,7 +467,7 @@ ExecSimpleRelationUpdate(EState *estate, EPQState *epqstate,
/* Check the constraints of the tuple */
if (rel->rd_att->constr)
- ExecConstraints(resultRelInfo, slot, estate);
+ ExecConstraints(resultRelInfo, slot, estate, true);
/* Store the slot into tuple that we can write. */
tuple = ExecMaterializeSlot(slot);
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index 8d17425..51931f4 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -45,6 +45,7 @@
#include "foreign/fdwapi.h"
#include "miscadmin.h"
#include "nodes/nodeFuncs.h"
+#include "optimizer/var.h"
#include "parser/parsetree.h"
#include "storage/bufmgr.h"
#include "storage/lmgr.h"
@@ -53,6 +54,8 @@
#include "utils/rel.h"
#include "utils/tqual.h"
+#define GetUpdatedColumns(relinfo, estate) \
+ (rt_fetch((relinfo)->ri_RangeTableIndex, (estate)->es_range_table)->updatedCols)
static bool ExecOnConflictUpdate(ModifyTableState *mtstate,
ResultRelInfo *resultRelInfo,
@@ -239,6 +242,34 @@ ExecCheckTIDVisible(EState *estate,
ReleaseBuffer(buffer);
}
+/*
+ * ConvertPartitionTupleSlot -- convenience function for converting tuple and
+ * storing it into a dedicated partition tuple slot. Passes the partition
+ * tuple slot back into output param p_slot. If no mapping present, keeps
+ * p_slot unchanged.
+ *
+ * Returns the converted tuple.
+ */
+static HeapTuple
+ConvertPartitionTupleSlot(ModifyTableState *mtstate, TupleConversionMap *map,
+ HeapTuple tuple, TupleTableSlot **p_slot)
+{
+ if (!map)
+ return tuple;
+
+ tuple = do_convert_tuple(tuple, map);
+
+ /*
+ * Change the partition tuple slot descriptor, as per converted tuple.
+ */
+ *p_slot = mtstate->mt_partition_tuple_slot;
+ Assert(*p_slot != NULL);
+ ExecSetSlotDescriptor(*p_slot, map->outdesc);
+ ExecStoreTuple(tuple, *p_slot, InvalidBuffer, true);
+
+ return tuple;
+}
+
/* ----------------------------------------------------------------
* ExecInsert
*
@@ -280,7 +311,38 @@ ExecInsert(ModifyTableState *mtstate,
if (mtstate->mt_partition_dispatch_info)
{
int leaf_part_index;
- TupleConversionMap *map;
+ ResultRelInfo *rootResultRelInfo;
+
+ /*
+ * If the original operation is UPDATE, the root partition rel needs to
+ * be fetched from mtstate->rootResultRelInfo.
+ */
+ rootResultRelInfo = (mtstate->rootResultRelInfo ?
+ mtstate->rootResultRelInfo : resultRelInfo);
+
+ /*
+ * If the resultRelInfo is not the root partition (which happens for
+ * UPDATE), we should convert the tuple into root partition's tuple
+ * descriptor, since ExecFindPartition() starts the search from root.
+ * The tuple conversion map list is in the order of
+ * mstate->resultRelInfo[], so to retrieve the one for this resultRel,
+ * we need to know the position of the resultRel in
+ * mtstate->resultRelInfo[]. Note: We assume that if the resultRelInfo
+ * does not belong to subplans, then it already matches the root tuple
+ * descriptor; although there is no such known scenario where this
+ * could happen.
+ */
+ if (rootResultRelInfo != resultRelInfo &&
+ mtstate->mt_resultrel_maps != NULL &&
+ resultRelInfo >= mtstate->resultRelInfo &&
+ resultRelInfo <= mtstate->resultRelInfo + mtstate->mt_nplans-1)
+ {
+ int map_index = resultRelInfo - mtstate->resultRelInfo;
+
+ tuple = ConvertPartitionTupleSlot(mtstate,
+ mtstate->mt_resultrel_maps[map_index],
+ tuple, &slot);
+ }
/*
* Away we go ... If we end up not finding a partition after all,
@@ -290,7 +352,7 @@ ExecInsert(ModifyTableState *mtstate,
* the ResultRelInfo and TupleConversionMap for the partition,
* respectively.
*/
- leaf_part_index = ExecFindPartition(resultRelInfo,
+ leaf_part_index = ExecFindPartition(rootResultRelInfo,
mtstate->mt_partition_dispatch_info,
slot,
estate);
@@ -302,7 +364,7 @@ ExecInsert(ModifyTableState *mtstate,
* the selected partition.
*/
saved_resultRelInfo = resultRelInfo;
- resultRelInfo = mtstate->mt_partitions + leaf_part_index;
+ resultRelInfo = mtstate->mt_partitions[leaf_part_index];
/* We do not yet have a way to insert into a foreign partition */
if (resultRelInfo->ri_FdwRoutine)
@@ -347,23 +409,9 @@ ExecInsert(ModifyTableState *mtstate,
* We might need to convert from the parent rowtype to the partition
* rowtype.
*/
- map = mtstate->mt_partition_tupconv_maps[leaf_part_index];
- if (map)
- {
- Relation partrel = resultRelInfo->ri_RelationDesc;
-
- tuple = do_convert_tuple(tuple, map);
-
- /*
- * We must use the partition's tuple descriptor from this point
- * on, until we're finished dealing with the partition. Use the
- * dedicated slot for that.
- */
- slot = mtstate->mt_partition_tuple_slot;
- Assert(slot != NULL);
- ExecSetSlotDescriptor(slot, RelationGetDescr(partrel));
- ExecStoreTuple(tuple, slot, InvalidBuffer, true);
- }
+ tuple = ConvertPartitionTupleSlot(mtstate,
+ mtstate->mt_partition_tupconv_maps[leaf_part_index],
+ tuple, &slot);
}
resultRelationDesc = resultRelInfo->ri_RelationDesc;
@@ -481,7 +529,7 @@ ExecInsert(ModifyTableState *mtstate,
/* Check the constraints of the tuple */
if (resultRelationDesc->rd_att->constr || check_partition_constr)
- ExecConstraints(resultRelInfo, slot, estate);
+ ExecConstraints(resultRelInfo, slot, estate, true);
if (onconflict != ONCONFLICT_NONE && resultRelInfo->ri_NumIndices > 0)
{
@@ -673,6 +721,8 @@ ExecDelete(ModifyTableState *mtstate,
TupleTableSlot *planSlot,
EPQState *epqstate,
EState *estate,
+ bool *concurrently_deleted,
+ bool process_returning,
bool canSetTag)
{
ResultRelInfo *resultRelInfo;
@@ -681,6 +731,9 @@ ExecDelete(ModifyTableState *mtstate,
HeapUpdateFailureData hufd;
TupleTableSlot *slot = NULL;
+ if (concurrently_deleted)
+ *concurrently_deleted = false;
+
/*
* get information on the (current) result relation
*/
@@ -824,6 +877,8 @@ ldelete:;
}
}
/* tuple already deleted; nothing to do */
+ if (concurrently_deleted)
+ *concurrently_deleted = true;
return NULL;
default:
@@ -848,8 +903,8 @@ ldelete:;
ExecARDeleteTriggers(estate, resultRelInfo, tupleid, oldtuple,
mtstate->mt_transition_capture);
- /* Process RETURNING if present */
- if (resultRelInfo->ri_projectReturning)
+ /* Process RETURNING if present and if requested */
+ if (process_returning && resultRelInfo->ri_projectReturning)
{
/*
* We have to put the target tuple into a slot, which means first we
@@ -942,6 +997,8 @@ ExecUpdate(ModifyTableState *mtstate,
HTSU_Result result;
HeapUpdateFailureData hufd;
List *recheckIndexes = NIL;
+ bool partition_check_passed = true;
+ bool has_br_trigger;
/*
* abort the operation if not running transactions
@@ -962,16 +1019,56 @@ ExecUpdate(ModifyTableState *mtstate,
resultRelationDesc = resultRelInfo->ri_RelationDesc;
/* BEFORE ROW UPDATE Triggers */
- if (resultRelInfo->ri_TrigDesc &&
- resultRelInfo->ri_TrigDesc->trig_update_before_row)
+ has_br_trigger = (resultRelInfo->ri_TrigDesc &&
+ resultRelInfo->ri_TrigDesc->trig_update_before_row);
+
+ if (has_br_trigger)
{
- slot = ExecBRUpdateTriggers(estate, epqstate, resultRelInfo,
- tupleid, oldtuple, slot);
+ TupleTableSlot *trig_slot;
- if (slot == NULL) /* "do nothing" */
+ trig_slot = ExecBRUpdateTriggers(estate, epqstate, resultRelInfo,
+ tupleid, oldtuple, slot);
+
+ if (trig_slot == NULL) /* "do nothing" */
return NULL;
+ if (resultRelInfo->ri_PartitionCheck)
+ {
+ bool partition_check_passed_with_trig_tuple;
+
+ partition_check_passed =
+ (resultRelInfo->ri_PartitionCheck &&
+ ExecPartitionCheck(resultRelInfo, slot, estate));
+
+ partition_check_passed_with_trig_tuple =
+ (resultRelInfo->ri_PartitionCheck &&
+ ExecPartitionCheck(resultRelInfo, trig_slot, estate));
+
+ if (partition_check_passed)
+ {
+ /*
+ * If it's the trigger that is causing partition constraint
+ * violation, abort. We don't want a trigger to cause tuple
+ * routing.
+ */
+ if (!partition_check_passed_with_trig_tuple)
+ ExecPartitionCheckEmitError(resultRelInfo,
+ trig_slot, estate);
+ }
+ else
+ {
+ /*
+ * Partition constraint failed with original NEW tuple. But the
+ * trigger might even have modifed the tuple such that it fits
+ * back into the partition. So partition constraint check
+ * should be based on *final* NEW tuple.
+ */
+ partition_check_passed = partition_check_passed_with_trig_tuple;
+ }
+ }
+
/* trigger might have changed tuple */
+ slot = trig_slot;
tuple = ExecMaterializeSlot(slot);
}
@@ -1038,12 +1135,60 @@ lreplace:;
resultRelInfo, slot, estate);
/*
+ * If a partition check fails, try to move the row into the right
+ * partition. With a BR trigger, the tuple has already gone through EPQ
+ * and has been locked; so it won't change again. So, avoid an extra
+ * partition check if we already did it above in the presence of BR
+ * triggers.
+ */
+ if (!has_br_trigger)
+ {
+ partition_check_passed =
+ (!resultRelInfo->ri_PartitionCheck ||
+ ExecPartitionCheck(resultRelInfo, slot, estate));
+ }
+
+ if (!partition_check_passed)
+ {
+ bool concurrently_deleted;
+
+ /*
+ * When an UPDATE is run with a leaf partition, we would not have
+ * partition tuple routing setup. In that case, fail with partition
+ * constraint violation error.
+ */
+ if (mtstate->mt_partition_dispatch_info == NULL)
+ ExecPartitionCheckEmitError(resultRelInfo, slot, estate);
+
+ /* Do the row movement. */
+
+ /*
+ * Skip RETURNING processing for DELETE. We want to return rows
+ * from INSERT.
+ */
+ ExecDelete(mtstate, tupleid, oldtuple, planSlot, epqstate, estate,
+ &concurrently_deleted, false, false);
+
+ /*
+ * The row was already deleted by a concurrent DELETE. So we don't
+ * have anything to update.
+ */
+ if (concurrently_deleted)
+ return NULL;
+
+ return ExecInsert(mtstate, slot, planSlot, NULL,
+ ONCONFLICT_NONE, estate, canSetTag);
+ }
+
+ /*
* Check the constraints of the tuple. Note that we pass the same
* slot for the orig_slot argument, because unlike ExecInsert(), no
* tuple-routing is performed here, hence the slot remains unchanged.
+ * We have already checked partition constraints above, so skip them
+ * below.
*/
- if (resultRelationDesc->rd_att->constr || resultRelInfo->ri_PartitionCheck)
- ExecConstraints(resultRelInfo, slot, estate);
+ if (resultRelationDesc->rd_att->constr)
+ ExecConstraints(resultRelInfo, slot, estate, false);
/*
* replace the heap tuple
@@ -1462,6 +1607,36 @@ fireASTriggers(ModifyTableState *node)
}
/*
+ * Check whether partition key is modified for any of the relations.
+ */
+static bool
+IsPartitionKeyUpdate(EState *estate, ResultRelInfo *result_rels, int num_rels)
+{
+ int i;
+
+ /*
+ * Each of the result relations has the updated columns set stored
+ * according to its own column ordering. So we need to pull the attno of
+ * the partition quals of each of the relations, and check if the updated
+ * column attributes are present in the vars in the partition quals.
+ */
+ for (i = 0; i < num_rels; i++)
+ {
+ ResultRelInfo *resultRelInfo = &result_rels[i];
+ Relation rel = resultRelInfo->ri_RelationDesc;
+ Bitmapset *expr_attrs = NULL;
+
+ pull_varattnos((Node *) rel->rd_partcheck, 1, &expr_attrs);
+
+ /* Both bitmaps are offset by FirstLowInvalidHeapAttributeNumber. */
+ if (bms_overlap(expr_attrs, GetUpdatedColumns(resultRelInfo, estate)))
+ return true;
+ }
+
+ return false;
+}
+
+/*
* Set up the state needed for collecting transition tuples for AFTER
* triggers.
*/
@@ -1482,23 +1657,22 @@ ExecSetupTransitionCaptureState(ModifyTableState *mtstate, EState *estate)
*/
if (mtstate->mt_transition_capture != NULL)
{
- ResultRelInfo *resultRelInfos;
+ ResultRelInfo *resultRelInfo;
int numResultRelInfos;
+ bool tuple_routing = (mtstate->mt_partition_dispatch_info != NULL);
/* Find the set of partitions so that we can find their TupleDescs. */
- if (mtstate->mt_partition_dispatch_info != NULL)
+ if (tuple_routing)
{
/*
* For INSERT via partitioned table, so we need TupleDescs based
* on the partition routing table.
*/
- resultRelInfos = mtstate->mt_partitions;
numResultRelInfos = mtstate->mt_num_partitions;
}
else
{
/* Otherwise we need the ResultRelInfo for each subplan. */
- resultRelInfos = mtstate->resultRelInfo;
numResultRelInfos = mtstate->mt_nplans;
}
@@ -1512,8 +1686,15 @@ ExecSetupTransitionCaptureState(ModifyTableState *mtstate, EState *estate)
palloc0(sizeof(TupleConversionMap *) * numResultRelInfos);
for (i = 0; i < numResultRelInfos; ++i)
{
+ /*
+ * As stated above, mapping source is different for INSERT or
+ * otherwise.
+ */
+ resultRelInfo = (tuple_routing ?
+ mtstate->mt_partitions[i] : &mtstate->resultRelInfo[i]);
+
mtstate->mt_transition_tupconv_maps[i] =
- convert_tuples_by_name(RelationGetDescr(resultRelInfos[i].ri_RelationDesc),
+ convert_tuples_by_name(RelationGetDescr(resultRelInfo->ri_RelationDesc),
RelationGetDescr(targetRelInfo->ri_RelationDesc),
gettext_noop("could not convert row type"));
}
@@ -1746,7 +1927,8 @@ ExecModifyTable(ModifyTableState *node)
break;
case CMD_DELETE:
slot = ExecDelete(node, tupleid, oldtuple, planSlot,
- &node->mt_epqstate, estate, node->canSetTag);
+ &node->mt_epqstate, estate,
+ NULL, true, node->canSetTag);
break;
default:
elog(ERROR, "unknown operation");
@@ -1786,11 +1968,14 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
{
ModifyTableState *mtstate;
CmdType operation = node->operation;
+ bool is_partitionkey_update = false;
int nplans = list_length(node->plans);
ResultRelInfo *saved_resultRelInfo;
ResultRelInfo *resultRelInfo;
TupleDesc tupDesc;
Plan *subplan;
+ int firstVarno = 0;
+ Relation firstResultRel = NULL;
ListCell *l;
int i;
Relation rel;
@@ -1902,18 +2087,30 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
else
rel = mtstate->resultRelInfo->ri_RelationDesc;
- /* Build state for INSERT tuple routing */
- if (operation == CMD_INSERT &&
- rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ /* Remember whether it is going to be an update of partition key. */
+ is_partitionkey_update =
+ (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
+ operation == CMD_UPDATE &&
+ IsPartitionKeyUpdate(estate, mtstate->resultRelInfo, nplans));
+
+ /*
+ * Build state for tuple routing if it's an INSERT or if it's an UPDATE of
+ * partition key.
+ */
+ if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
+ (operation == CMD_INSERT || is_partitionkey_update))
{
PartitionDispatch *partition_dispatch_info;
- ResultRelInfo *partitions;
+ ResultRelInfo **partitions;
TupleConversionMap **partition_tupconv_maps;
TupleTableSlot *partition_tuple_slot;
int num_parted,
num_partitions;
ExecSetupPartitionTupleRouting(rel,
+ (operation == CMD_UPDATE ?
+ mtstate->resultRelInfo : NULL),
+ (operation == CMD_UPDATE ? nplans : 0),
&partition_dispatch_info,
&partitions,
&partition_tupconv_maps,
@@ -1925,6 +2122,43 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
mtstate->mt_num_partitions = num_partitions;
mtstate->mt_partition_tupconv_maps = partition_tupconv_maps;
mtstate->mt_partition_tuple_slot = partition_tuple_slot;
+
+ /*
+ * Below are required as reference objects for mapping partition
+ * attno's in expressions such as WCO and RETURNING.
+ */
+ firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex;
+ firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc;
+ }
+
+ /*
+ * Construct mapping from each of the resultRelInfo attnos to the root
+ * attno. This is required when during update row movement the tuple
+ * descriptor of a source partition does not match the root partition
+ * descriptor. In such case we need to convert tuples to the root partition
+ * tuple descriptor, because the search for destination partition starts
+ * from the root. Skip this setup if it's not a partition key update or if
+ * there are no partitions below this partitioned table.
+ */
+ if (is_partitionkey_update && mtstate->mt_num_partitions > 0)
+ {
+ TupleConversionMap **tup_conv_maps;
+ TupleDesc outdesc;
+
+ mtstate->mt_resultrel_maps =
+ (TupleConversionMap **) palloc0(sizeof(TupleConversionMap*) * nplans);
+
+ /* Get tuple descriptor of the root partition. */
+ outdesc = RelationGetDescr(mtstate->mt_partition_dispatch_info[0]->reldesc);
+
+ resultRelInfo = mtstate->resultRelInfo;
+ tup_conv_maps = mtstate->mt_resultrel_maps;
+ for (i = 0; i < nplans; i++)
+ {
+ TupleDesc indesc = RelationGetDescr(resultRelInfo[i].ri_RelationDesc);
+ tup_conv_maps[i] = convert_tuples_by_name(indesc, outdesc,
+ gettext_noop("could not convert row type"));
+ }
}
/* Build state for collecting transition tuples */
@@ -1960,50 +2194,52 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
* Build WITH CHECK OPTION constraints for each leaf partition rel. Note
* that we didn't build the withCheckOptionList for each partition within
* the planner, but simple translation of the varattnos for each partition
- * will suffice. This only occurs for the INSERT case; UPDATE/DELETE
- * cases are handled above.
+ * will suffice. This only occurs for the INSERT case or for UPDATE
+ * row movement. DELETEs and local UPDATEs are handled above.
*/
if (node->withCheckOptionLists != NIL && mtstate->mt_num_partitions > 0)
{
- List *wcoList;
- PlanState *plan;
+ List *firstWco;
/*
* In case of INSERT on partitioned tables, there is only one plan.
* Likewise, there is only one WITH CHECK OPTIONS list, not one per
- * partition. We make a copy of the WCO qual for each partition; note
- * that, if there are SubPlans in there, they all end up attached to
- * the one parent Plan node.
+ * partition. Whereas for UPDATE, there are as many WCOs as there are
+ * plans. So in either case, use the WCO expression of the first
+ * resultRelInfo as a reference to calculate attno's for the WCO
+ * expression of each of the partitions. We make a copy of the WCO qual
+ * for each partition. Note that, if there are SubPlans in there, they
+ * all end up attached to the one parent Plan node.
*/
- Assert(operation == CMD_INSERT &&
+ Assert(is_partitionkey_update ||
+ (operation == CMD_INSERT &&
list_length(node->withCheckOptionLists) == 1 &&
- mtstate->mt_nplans == 1);
- wcoList = linitial(node->withCheckOptionLists);
- plan = mtstate->mt_plans[0];
- resultRelInfo = mtstate->mt_partitions;
+ mtstate->mt_nplans == 1));
+
+ firstWco = linitial(node->withCheckOptionLists);
for (i = 0; i < mtstate->mt_num_partitions; i++)
{
- Relation partrel = resultRelInfo->ri_RelationDesc;
- List *mapped_wcoList;
+ Relation partrel;
+ List *mappedWco;
List *wcoExprs = NIL;
ListCell *ll;
- /* varno = node->nominalRelation */
- mapped_wcoList = map_partition_varattnos(wcoList,
- node->nominalRelation,
- partrel, rel);
- foreach(ll, mapped_wcoList)
+ resultRelInfo = mtstate->mt_partitions[i];
+
+ partrel = resultRelInfo->ri_RelationDesc;
+ mappedWco = map_partition_varattnos(firstWco, firstVarno,
+ partrel, firstResultRel);
+ foreach(ll, mappedWco)
{
WithCheckOption *wco = castNode(WithCheckOption, lfirst(ll));
ExprState *wcoExpr = ExecInitQual(castNode(List, wco->qual),
- plan);
+ &mtstate->ps);
wcoExprs = lappend(wcoExprs, wcoExpr);
}
- resultRelInfo->ri_WithCheckOptions = mapped_wcoList;
+ resultRelInfo->ri_WithCheckOptions = mappedWco;
resultRelInfo->ri_WithCheckOptionExprs = wcoExprs;
- resultRelInfo++;
}
}
@@ -2014,7 +2250,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
{
TupleTableSlot *slot;
ExprContext *econtext;
- List *returningList;
+ List *firstReturningList;
/*
* Initialize result tuple slot and assign its rowtype using the first
@@ -2051,20 +2287,25 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
* Build a projection for each leaf partition rel. Note that we
* didn't build the returningList for each partition within the
* planner, but simple translation of the varattnos for each partition
- * will suffice. This only occurs for the INSERT case; UPDATE/DELETE
- * are handled above.
+ * will suffice. This only occurs for the INSERT case or for UPDATE
+ * row movement. DELETEs and local UPDATEs are handled above.
*/
- resultRelInfo = mtstate->mt_partitions;
- returningList = linitial(node->returningLists);
+ firstReturningList = linitial(node->returningLists);
for (i = 0; i < mtstate->mt_num_partitions; i++)
{
- Relation partrel = resultRelInfo->ri_RelationDesc;
+ Relation partrel;
List *rlist;
- /* varno = node->nominalRelation */
- rlist = map_partition_varattnos(returningList,
- node->nominalRelation,
- partrel, rel);
+ resultRelInfo = mtstate->mt_partitions[i];
+ partrel = resultRelInfo->ri_RelationDesc;
+
+ /*
+ * Use the returning expression of the first resultRelInfo as a
+ * reference to calculate attno's for the returning expression of
+ * each of the partitions.
+ */
+ rlist = map_partition_varattnos(firstReturningList, firstVarno,
+ partrel, firstResultRel);
resultRelInfo->ri_projectReturning =
ExecBuildProjectionInfo(rlist, econtext, slot, &mtstate->ps,
resultRelInfo->ri_RelationDesc->rd_att);
@@ -2307,6 +2548,7 @@ void
ExecEndModifyTable(ModifyTableState *node)
{
int i;
+ CmdType operation = node->operation;
/* Free transition tables */
if (node->mt_transition_capture != NULL)
@@ -2343,7 +2585,17 @@ ExecEndModifyTable(ModifyTableState *node)
}
for (i = 0; i < node->mt_num_partitions; i++)
{
- ResultRelInfo *resultRelInfo = node->mt_partitions + i;
+ ResultRelInfo *resultRelInfo = node->mt_partitions[i];
+
+ /*
+ * If this result rel is one of the subplan result rels, let
+ * ExecEndPlan() close it. For INSERTs, this does not apply because
+ * all leaf partition result rels are anyway newly allocated.
+ */
+ if (operation == CMD_UPDATE &&
+ resultRelInfo >= node->resultRelInfo &&
+ resultRelInfo < node->resultRelInfo + node->mt_nplans)
+ continue;
ExecCloseIndices(resultRelInfo);
heap_close(resultRelInfo->ri_RelationDesc, NoLock);
diff --git a/src/include/catalog/partition.h b/src/include/catalog/partition.h
index f10879a..b1a60c2 100644
--- a/src/include/catalog/partition.h
+++ b/src/include/catalog/partition.h
@@ -79,8 +79,8 @@ extern void check_new_partition_bound(char *relname, Relation parent,
extern Oid get_partition_parent(Oid relid);
extern List *get_qual_from_partbound(Relation rel, Relation parent,
PartitionBoundSpec *spec);
-extern List *map_partition_varattnos(List *expr, int target_varno,
- Relation partrel, Relation parent);
+extern List *map_partition_varattnos(List *expr, int fromrel_varno,
+ Relation to_rel, Relation from_rel);
extern List *RelationGetPartitionQual(Relation rel);
extern Expr *get_partition_qual_relid(Oid relid);
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index e25cfa3..ea4205d 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -187,7 +187,10 @@ extern ResultRelInfo *ExecGetTriggerResultRel(EState *estate, Oid relid);
extern void ExecCleanUpTriggerState(EState *estate);
extern bool ExecContextForcesOids(PlanState *planstate, bool *hasoids);
extern void ExecConstraints(ResultRelInfo *resultRelInfo,
- TupleTableSlot *slot, EState *estate);
+ TupleTableSlot *slot, EState *estate,
+ bool check_partition_constraint);
+extern void ExecPartitionCheckEmitError(ResultRelInfo *resultRelInfo,
+ TupleTableSlot *slot, EState *estate);
extern void ExecWithCheckOptions(WCOKind kind, ResultRelInfo *resultRelInfo,
TupleTableSlot *slot, EState *estate);
extern LockTupleMode ExecUpdateLockMode(EState *estate, ResultRelInfo *relinfo);
@@ -207,8 +210,10 @@ extern void EvalPlanQualSetTuple(EPQState *epqstate, Index rti,
HeapTuple tuple);
extern HeapTuple EvalPlanQualGetTuple(EPQState *epqstate, Index rti);
extern void ExecSetupPartitionTupleRouting(Relation rel,
+ ResultRelInfo *update_rri,
+ int num_update_rri,
PartitionDispatch **pd,
- ResultRelInfo **partitions,
+ ResultRelInfo ***partitions,
TupleConversionMap ***tup_conv_maps,
TupleTableSlot **partition_tuple_slot,
int *num_parted, int *num_partitions);
@@ -216,6 +221,8 @@ extern int ExecFindPartition(ResultRelInfo *resultRelInfo,
PartitionDispatch *pd,
TupleTableSlot *slot,
EState *estate);
+extern bool ExecPartitionCheck(ResultRelInfo *resultRelInfo,
+ TupleTableSlot *slot, EState *estate);
#define EvalPlanQualSetSlot(epqstate, slot) ((epqstate)->origslot = (slot))
extern void EvalPlanQualFetchRowMarks(EPQState *epqstate);
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 85fac8a..276b65b 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -959,9 +959,13 @@ typedef struct ModifyTableState
int mt_num_dispatch; /* Number of entries in the above array */
int mt_num_partitions; /* Number of members in the following
* arrays */
- ResultRelInfo *mt_partitions; /* Per partition result relation */
- TupleConversionMap **mt_partition_tupconv_maps;
+ ResultRelInfo **mt_partitions; /* Per partition result relation pointers */
+
/* Per partition tuple conversion map */
+ TupleConversionMap **mt_partition_tupconv_maps;
+ /* Per resultRelInfo conversion map to convert tuples to root partition */
+ TupleConversionMap **mt_resultrel_maps;
+
TupleTableSlot *mt_partition_tuple_slot;
struct TransitionCaptureState *mt_transition_capture;
/* controls transition table population */
diff --git a/src/test/regress/expected/update.out b/src/test/regress/expected/update.out
index 9366f04..f3c03a7 100644
--- a/src/test/regress/expected/update.out
+++ b/src/test/regress/expected/update.out
@@ -198,25 +198,189 @@ INSERT INTO upsert_test VALUES (1, 'Bat') ON CONFLICT(a)
DROP TABLE update_test;
DROP TABLE upsert_test;
--- update to a partition should check partition bound constraint for the new tuple
-create table range_parted (
+-- update to a partition should check partition bound constraint for the new tuple.
+-- If partition key is updated, the row should be moved to the appropriate
+-- partition. updatable views using partitions should enforce the check options
+-- for the rows that have been moved.
+create table mintab(c1 int);
+insert into mintab values (120);
+CREATE TABLE range_parted (
a text,
- b int
+ b int,
+ c int
) partition by range (a, b);
+CREATE VIEW upview AS SELECT * FROM range_parted WHERE (select c > c1 from mintab) WITH CHECK OPTION;
create table part_a_1_a_10 partition of range_parted for values from ('a', 1) to ('a', 10);
create table part_a_10_a_20 partition of range_parted for values from ('a', 10) to ('a', 20);
create table part_b_1_b_10 partition of range_parted for values from ('b', 1) to ('b', 10);
-create table part_b_10_b_20 partition of range_parted for values from ('b', 10) to ('b', 20);
+create table part_b_10_b_20 partition of range_parted for values from ('b', 10) to ('b', 20) partition by range (c);
+-- This tests partition-key UPDATE on a partitioned table that does not have any child partitions
+update part_b_10_b_20 set b = b - 6;
+create table part_c_1_100 (b int, c int, a text);
+alter table part_b_10_b_20 attach partition part_c_1_100 for values from (1) to (100);
+create table part_c_100_200 (c int, a text, b int);
+alter table part_b_10_b_20 attach partition part_c_100_200 for values from (100) to (200);
insert into part_a_1_a_10 values ('a', 1);
-insert into part_b_10_b_20 values ('b', 10);
--- fail
-update part_a_1_a_10 set a = 'b' where a = 'a';
-ERROR: new row for relation "part_a_1_a_10" violates partition constraint
-DETAIL: Failing row contains (b, 1).
-update range_parted set b = b - 1 where b = 10;
+insert into part_a_10_a_20 values ('a', 10, 200);
+insert into part_c_1_100 (a, b, c) values ('b', 12, 96);
+insert into part_c_1_100 (a, b, c) values ('b', 13, 97);
+insert into part_c_100_200 (a, b, c) values ('b', 15, 105);
+insert into part_c_100_200 (a, b, c) values ('b', 17, 105);
+-- fail (row movement happens only within the partition subtree) :
+update part_c_1_100 set c = c + 20 where c = 96;
+ERROR: new row for relation "part_c_1_100" violates partition constraint
+DETAIL: Failing row contains (12, 116, b).
+-- No row found :
+update part_c_1_100 set c = c + 20 where c = 98;
+-- ok (row movement)
+update part_b_10_b_20 set c = c + 20 returning c, b, a;
+ c | b | a
+-----+----+---
+ 116 | 12 | b
+ 117 | 13 | b
+ 125 | 15 | b
+ 125 | 17 | b
+(4 rows)
+
+select a, b, c from part_c_1_100 order by 1, 2, 3;
+ a | b | c
+---+---+---
+(0 rows)
+
+select a, b, c from part_c_100_200 order by 1, 2, 3;
+ a | b | c
+---+----+-----
+ b | 12 | 116
+ b | 13 | 117
+ b | 15 | 125
+ b | 17 | 125
+(4 rows)
+
+-- fail (row movement happens only within the partition subtree) :
+update part_b_10_b_20 set b = b - 6 where c > 116 returning *;
ERROR: new row for relation "part_b_10_b_20" violates partition constraint
-DETAIL: Failing row contains (b, 9).
--- ok
-update range_parted set b = b + 1 where b = 10;
+DETAIL: Failing row contains (b, 7, 117).
+-- ok (row movement, with subset of rows moved into different partition)
+update range_parted set b = b - 6 where c > 116 returning a, b + c;
+ a | ?column?
+---+----------
+ a | 204
+ b | 124
+ b | 134
+ b | 136
+(4 rows)
+
+select tableoid::regclass partname, * from range_parted order by 1, 2, 3, 4;
+ partname | a | b | c
+----------------+---+----+-----
+ part_a_1_a_10 | a | 1 |
+ part_a_1_a_10 | a | 4 | 200
+ part_b_1_b_10 | b | 7 | 117
+ part_b_1_b_10 | b | 9 | 125
+ part_c_100_200 | b | 11 | 125
+ part_c_100_200 | b | 12 | 116
+(6 rows)
+
+-- update partition key using updatable view.
+-- succeeds
+update upview set c = 199 where b = 4;
+-- fail, check option violation
+update upview set c = 120 where b = 4;
+ERROR: new row violates check option for view "upview"
+DETAIL: Failing row contains (a, 4, 120).
+-- fail, row movement with check option violation
+update upview set a = 'b', b = 15, c = 120 where b = 4;
+ERROR: new row violates check option for view "upview"
+DETAIL: Failing row contains (120, b, 15).
+-- succeeds, row movement , check option passes
+update upview set a = 'b', b = 15 where b = 4;
+select tableoid::regclass partname, * from range_parted order by 1, 2, 3, 4;
+ partname | a | b | c
+----------------+---+----+-----
+ part_a_1_a_10 | a | 1 |
+ part_b_1_b_10 | b | 7 | 117
+ part_b_1_b_10 | b | 9 | 125
+ part_c_100_200 | b | 11 | 125
+ part_c_100_200 | b | 12 | 116
+ part_c_100_200 | b | 15 | 199
+(6 rows)
+
-- cleanup
-drop table range_parted;
+drop view upview;
+drop table mintab, range_parted;
+--------------
+-- UPDATE with
+-- partition key or non-partition columns, with different column ordering,
+-- triggers.
+--------------
+-- Setup
+--------
+create table list_parted (a int, b int, c int) partition by list (a);
+create table sub_parted partition of list_parted for values in (1) partition by list (b);
+create table sub_part1(b int, c int, a int);
+alter table sub_parted attach partition sub_part1 for values in (1);
+create table sub_part2(b int, c int, a int);
+alter table sub_parted attach partition sub_part2 for values in (2);
+create table list_part1(a int, b int, c int);
+alter table list_parted attach partition list_part1 for values in (2,3);
+insert into list_parted values (2,5,50);
+insert into list_parted values (3,6,60);
+insert into sub_parted values (1,1,60);
+insert into sub_parted values (1,2,10);
+-- Test partition constraint violation when intermediate ancestor is used and
+-- constraint is inherited from upper root.
+update sub_parted set a = 2 where c = 10;
+ERROR: new row for relation "sub_parted" violates partition constraint
+DETAIL: Failing row contains (2, 2, 10).
+-- UPDATE which does not modify partition key of partitions that are chosen for update.
+select tableoid::regclass , * from list_parted where a = 2 order by 1;
+ tableoid | a | b | c
+------------+---+---+----
+ list_part1 | 2 | 5 | 50
+(1 row)
+
+update list_parted set b = c + a where a = 2;
+select tableoid::regclass , * from list_parted where a = 2 order by 1;
+ tableoid | a | b | c
+------------+---+----+----
+ list_part1 | 2 | 52 | 50
+(1 row)
+
+-----------
+-- Triggers should not be allowed to initiate the update row movement
+-----------
+create function func_parted_mod_b() returns trigger as $$
+begin
+ NEW.b = 2; -- THis is changing partition key column.
+ return NEW;
+end $$ language plpgsql;
+create trigger parted_mod_b before update on sub_part1
+ for each row execute procedure func_parted_mod_b();
+select tableoid::regclass , * from list_parted order by 1, 2, 3, 4;
+ tableoid | a | b | c
+------------+---+----+----
+ sub_part1 | 1 | 1 | 60
+ sub_part2 | 1 | 2 | 10
+ list_part1 | 2 | 52 | 50
+ list_part1 | 3 | 6 | 60
+(4 rows)
+
+-- This should fail because trigger on sub_part1 would change column 'b' which
+-- would violate "b in (1)" constraint.
+update list_parted set c = 70 where b = 1 ;
+ERROR: new row for relation "sub_part1" violates partition constraint
+DETAIL: Failing row contains (2, 70, 1).
+drop trigger parted_mod_b ON sub_part1 ;
+-- Now that the trigger is dropped, the same update should succeed
+update list_parted set c = 70 where b = 1 ;
+select tableoid::regclass , * from list_parted order by 1, 2, 3, 4;
+ tableoid | a | b | c
+------------+---+----+----
+ sub_part1 | 1 | 1 | 70
+ sub_part2 | 1 | 2 | 10
+ list_part1 | 2 | 52 | 50
+ list_part1 | 3 | 6 | 60
+(4 rows)
+
+drop function func_parted_mod_b ( ) ;
+drop table list_parted;
diff --git a/src/test/regress/sql/update.sql b/src/test/regress/sql/update.sql
index 6637119..0113c7d 100644
--- a/src/test/regress/sql/update.sql
+++ b/src/test/regress/sql/update.sql
@@ -107,23 +107,128 @@ INSERT INTO upsert_test VALUES (1, 'Bat') ON CONFLICT(a)
DROP TABLE update_test;
DROP TABLE upsert_test;
--- update to a partition should check partition bound constraint for the new tuple
-create table range_parted (
+-- update to a partition should check partition bound constraint for the new tuple.
+-- If partition key is updated, the row should be moved to the appropriate
+-- partition. updatable views using partitions should enforce the check options
+-- for the rows that have been moved.
+create table mintab(c1 int);
+insert into mintab values (120);
+CREATE TABLE range_parted (
a text,
- b int
+ b int,
+ c int
) partition by range (a, b);
+CREATE VIEW upview AS SELECT * FROM range_parted WHERE (select c > c1 from mintab) WITH CHECK OPTION;
+
create table part_a_1_a_10 partition of range_parted for values from ('a', 1) to ('a', 10);
create table part_a_10_a_20 partition of range_parted for values from ('a', 10) to ('a', 20);
create table part_b_1_b_10 partition of range_parted for values from ('b', 1) to ('b', 10);
-create table part_b_10_b_20 partition of range_parted for values from ('b', 10) to ('b', 20);
-insert into part_a_1_a_10 values ('a', 1);
-insert into part_b_10_b_20 values ('b', 10);
+create table part_b_10_b_20 partition of range_parted for values from ('b', 10) to ('b', 20) partition by range (c);
+
+-- This tests partition-key UPDATE on a partitioned table that does not have any child partitions
+update part_b_10_b_20 set b = b - 6;
--- fail
-update part_a_1_a_10 set a = 'b' where a = 'a';
-update range_parted set b = b - 1 where b = 10;
--- ok
-update range_parted set b = b + 1 where b = 10;
+create table part_c_1_100 (b int, c int, a text);
+alter table part_b_10_b_20 attach partition part_c_1_100 for values from (1) to (100);
+create table part_c_100_200 (c int, a text, b int);
+alter table part_b_10_b_20 attach partition part_c_100_200 for values from (100) to (200);
+
+insert into part_a_1_a_10 values ('a', 1);
+insert into part_a_10_a_20 values ('a', 10, 200);
+insert into part_c_1_100 (a, b, c) values ('b', 12, 96);
+insert into part_c_1_100 (a, b, c) values ('b', 13, 97);
+insert into part_c_100_200 (a, b, c) values ('b', 15, 105);
+insert into part_c_100_200 (a, b, c) values ('b', 17, 105);
+
+-- fail (row movement happens only within the partition subtree) :
+update part_c_1_100 set c = c + 20 where c = 96;
+-- No row found :
+update part_c_1_100 set c = c + 20 where c = 98;
+-- ok (row movement)
+update part_b_10_b_20 set c = c + 20 returning c, b, a;
+select a, b, c from part_c_1_100 order by 1, 2, 3;
+select a, b, c from part_c_100_200 order by 1, 2, 3;
+
+-- fail (row movement happens only within the partition subtree) :
+update part_b_10_b_20 set b = b - 6 where c > 116 returning *;
+-- ok (row movement, with subset of rows moved into different partition)
+update range_parted set b = b - 6 where c > 116 returning a, b + c;
+
+select tableoid::regclass partname, * from range_parted order by 1, 2, 3, 4;
+
+-- update partition key using updatable view.
+
+-- succeeds
+update upview set c = 199 where b = 4;
+-- fail, check option violation
+update upview set c = 120 where b = 4;
+-- fail, row movement with check option violation
+update upview set a = 'b', b = 15, c = 120 where b = 4;
+-- succeeds, row movement , check option passes
+update upview set a = 'b', b = 15 where b = 4;
+
+select tableoid::regclass partname, * from range_parted order by 1, 2, 3, 4;
-- cleanup
-drop table range_parted;
+drop view upview;
+drop table mintab, range_parted;
+
+
+
+--------------
+-- UPDATE with
+-- partition key or non-partition columns, with different column ordering,
+-- triggers.
+--------------
+
+-- Setup
+--------
+create table list_parted (a int, b int, c int) partition by list (a);
+create table sub_parted partition of list_parted for values in (1) partition by list (b);
+
+create table sub_part1(b int, c int, a int);
+alter table sub_parted attach partition sub_part1 for values in (1);
+create table sub_part2(b int, c int, a int);
+alter table sub_parted attach partition sub_part2 for values in (2);
+
+create table list_part1(a int, b int, c int);
+alter table list_parted attach partition list_part1 for values in (2,3);
+
+insert into list_parted values (2,5,50);
+insert into list_parted values (3,6,60);
+insert into sub_parted values (1,1,60);
+insert into sub_parted values (1,2,10);
+
+-- Test partition constraint violation when intermediate ancestor is used and
+-- constraint is inherited from upper root.
+update sub_parted set a = 2 where c = 10;
+
+-- UPDATE which does not modify partition key of partitions that are chosen for update.
+select tableoid::regclass , * from list_parted where a = 2 order by 1;
+update list_parted set b = c + a where a = 2;
+select tableoid::regclass , * from list_parted where a = 2 order by 1;
+
+
+-----------
+-- Triggers should not be allowed to initiate the update row movement
+-----------
+create function func_parted_mod_b() returns trigger as $$
+begin
+ NEW.b = 2; -- THis is changing partition key column.
+ return NEW;
+end $$ language plpgsql;
+create trigger parted_mod_b before update on sub_part1
+ for each row execute procedure func_parted_mod_b();
+
+select tableoid::regclass , * from list_parted order by 1, 2, 3, 4;
+
+-- This should fail because trigger on sub_part1 would change column 'b' which
+-- would violate "b in (1)" constraint.
+update list_parted set c = 70 where b = 1 ;
+drop trigger parted_mod_b ON sub_part1 ;
+-- Now that the trigger is dropped, the same update should succeed
+update list_parted set c = 70 where b = 1 ;
+select tableoid::regclass , * from list_parted order by 1, 2, 3, 4;
+
+drop function func_parted_mod_b ( ) ;
+drop table list_parted;