From fa8b918516f9151e105a2ad0dc0e2f16705a3eff Mon Sep 17 00:00:00 2001 From: amit Date: Tue, 30 Jul 2019 10:51:35 +0900 Subject: [PATCH v4 3/4] Revise child-to-root tuple conversion map management Transition tuple capture requires to convert child tuples to the inheritance root table format because that's the format the transition tuplestore stores tuple in. For INSERTs into partitioned tables, the conversion is handled by tuple routing code which constructs the map for a given partition only if the partition is targeted, but for UPDATE and DELETE, maps for all result relations are made and stored in an array in ModifyTableState during ExecInitModifyTable, which requires their ResultRelInfos to have been already built. During execution, map for the currently active result relation is set in TransitionCaptureState.tcs_map. This commit removes TransitionCaptureMap.tcs_map in favor a new map field in ResultRelInfo named ri_ChildToRootMap that is initialized when the ResultRelInfo for a given result relation is. This way is less confusing and less bug-prone than setting and resetting tcs_map. Also, this will also allow us to delay creating the map for a given result relation to when that relation is actually processed during execution. --- src/backend/commands/copy.c | 30 +---- src/backend/commands/trigger.c | 9 +- src/backend/executor/execPartition.c | 20 +++- src/backend/executor/nodeModifyTable.c | 203 ++++++++------------------------- src/include/commands/trigger.h | 10 +- src/include/nodes/execnodes.h | 11 +- 6 files changed, 85 insertions(+), 198 deletions(-) diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index db7d24a..155ac5b 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -3117,32 +3117,14 @@ CopyFrom(CopyState cstate) estate->es_result_relation_info = resultRelInfo; /* - * If we're capturing transition tuples, we might need to convert - * from the partition rowtype to root rowtype. + * If we're capturing transition tuples and there are no BEFORE + * triggers on the partition which may change the tuple, we can + * just remember the original unconverted tuple to avoid a + * needless round trip conversion. */ if (cstate->transition_capture != NULL) - { - if (has_before_insert_row_trig) - { - /* - * If there are any BEFORE triggers on the partition, - * we'll have to be ready to convert their result back to - * tuplestore format. - */ - cstate->transition_capture->tcs_original_insert_tuple = NULL; - cstate->transition_capture->tcs_map = - resultRelInfo->ri_PartitionInfo->pi_PartitionToRootMap; - } - else - { - /* - * Otherwise, just remember the original unconverted - * tuple, to avoid a needless round trip conversion. - */ - cstate->transition_capture->tcs_original_insert_tuple = myslot; - cstate->transition_capture->tcs_map = NULL; - } - } + cstate->transition_capture->tcs_original_insert_tuple = + !has_before_insert_row_trig ? myslot : NULL; /* * We might need to convert from the root rowtype to the partition diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index 672fccf..d1b5a03 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -35,6 +35,7 @@ #include "commands/defrem.h" #include "commands/trigger.h" #include "executor/executor.h" +#include "executor/execPartition.h" #include "miscadmin.h" #include "nodes/bitmapset.h" #include "nodes/makefuncs.h" @@ -4293,8 +4294,8 @@ GetAfterTriggersTableData(Oid relid, CmdType cmdType) * tables, then return NULL. * * The resulting object can be passed to the ExecAR* functions. The caller - * should set tcs_map or tcs_original_insert_tuple as appropriate when dealing - * with child tables. + * should set tcs_original_insert_tuple as appropriate when dealing with child + * tables * * Note that we copy the flags from a parent table into this struct (rather * than subsequently using the relation's TriggerDesc directly) so that we can @@ -5389,7 +5390,9 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, if (row_trigger && transition_capture != NULL) { TupleTableSlot *original_insert_tuple = transition_capture->tcs_original_insert_tuple; - TupleConversionMap *map = transition_capture->tcs_map; + PartitionRoutingInfo *pinfo = relinfo->ri_PartitionInfo; + TupleConversionMap *map = pinfo ? pinfo->pi_PartitionToRootMap : + relinfo->ri_ChildToRootMap; bool delete_old_table = transition_capture->tcs_delete_old_table; bool update_old_table = transition_capture->tcs_update_old_table; bool update_new_table = transition_capture->tcs_update_new_table; diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c index fe02238..2ab41f1 100644 --- a/src/backend/executor/execPartition.c +++ b/src/backend/executor/execPartition.c @@ -926,9 +926,23 @@ ExecInitRoutingInfo(ModifyTableState *mtstate, if (mtstate && (mtstate->mt_transition_capture || mtstate->mt_oc_transition_capture)) { - partrouteinfo->pi_PartitionToRootMap = - convert_tuples_by_name(RelationGetDescr(partRelInfo->ri_RelationDesc), - RelationGetDescr(partRelInfo->ri_PartitionRoot)); + ModifyTable *node = (ModifyTable *) mtstate->ps.plan; + + /* + * If the partition appears to be a reused UPDATE result relation, the + * necessary map would already have been set in ri_ChildToRootMap by + * ExecInitModifyTable(), so use that one instead of building one from + * scratch. One can tell if it's actually a reused UPDATE result + * relation by looking at its ri_RangeTableIndex which must be + * different from the root RT index. + */ + if (node && node->operation == CMD_UPDATE && + node->rootRelation != partRelInfo->ri_RangeTableIndex) + partrouteinfo->pi_PartitionToRootMap = partRelInfo->ri_ChildToRootMap; + else + partrouteinfo->pi_PartitionToRootMap = + convert_tuples_by_name(RelationGetDescr(partRelInfo->ri_RelationDesc), + RelationGetDescr(partRelInfo->ri_PartitionRoot)); } else partrouteinfo->pi_PartitionToRootMap = NULL; diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 09a9871..f8f4254 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -72,9 +72,6 @@ static TupleTableSlot *ExecPrepareTupleRouting(ModifyTableState *mtstate, ResultRelInfo *targetRelInfo, TupleTableSlot *slot); static ResultRelInfo *getTargetResultRelInfo(ModifyTableState *node); -static void ExecSetupChildParentMapForSubplan(ModifyTableState *mtstate); -static TupleConversionMap *tupconv_map_for_subplan(ModifyTableState *node, - int whichplan); /* * Verify that the tuples to be produced by INSERT or UPDATE match the @@ -1080,7 +1077,6 @@ ExecUpdate(ModifyTableState *mtstate, TM_Result result; TM_FailureData tmfd; List *recheckIndexes = NIL; - TupleConversionMap *saved_tcs_map = NULL; /* * abort the operation if not running transactions @@ -1205,7 +1201,6 @@ lreplace:; TupleTableSlot *ret_slot; TupleTableSlot *epqslot = NULL; PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing; - int map_index; TupleConversionMap *tupconv_map; /* @@ -1275,26 +1270,11 @@ lreplace:; } /* - * Updates set the transition capture map only when a new subplan - * is chosen. But for inserts, it is set for each row. So after - * INSERT, we need to revert back to the map created for UPDATE; - * otherwise the next UPDATE will incorrectly use the one created - * for INSERT. So first save the one created for UPDATE. - */ - if (mtstate->mt_transition_capture) - saved_tcs_map = mtstate->mt_transition_capture->tcs_map; - - /* * resultRelInfo is one of the per-subplan resultRelInfos. So we * should convert the tuple into root's tuple descriptor, since - * ExecInsert() starts the search from root. The tuple conversion - * map list is in the order of mtstate->resultRelInfo[], so to - * retrieve the one for this resultRel, we need to know the - * position of the resultRel in mtstate->resultRelInfo[]. + * ExecInsert() starts the search from root. */ - map_index = resultRelInfo - mtstate->resultRelInfo; - Assert(map_index >= 0 && map_index < mtstate->mt_nplans); - tupconv_map = tupconv_map_for_subplan(mtstate, map_index); + tupconv_map = resultRelInfo->ri_ChildToRootMap; if (tupconv_map != NULL) slot = execute_attr_map_slot(tupconv_map->attrMap, slot, @@ -1313,11 +1293,13 @@ lreplace:; /* Revert ExecPrepareTupleRouting's node change. */ estate->es_result_relation_info = resultRelInfo; + + /* + * Reset the transition state that may possibly have been written + * by INSERT. + */ if (mtstate->mt_transition_capture) - { mtstate->mt_transition_capture->tcs_original_insert_tuple = NULL; - mtstate->mt_transition_capture->tcs_map = saved_tcs_map; - } return ret_slot; } @@ -1837,28 +1819,6 @@ ExecSetupTransitionCaptureState(ModifyTableState *mtstate, EState *estate) MakeTransitionCaptureState(targetRelInfo->ri_TrigDesc, RelationGetRelid(targetRelInfo->ri_RelationDesc), CMD_UPDATE); - - /* - * If we found that we need to collect transition tuples then we may also - * need tuple conversion maps for any children that have TupleDescs that - * aren't compatible with the tuplestores. (We can share these maps - * between the regular and ON CONFLICT cases.) - */ - if (mtstate->mt_transition_capture != NULL || - mtstate->mt_oc_transition_capture != NULL) - { - ExecSetupChildParentMapForSubplan(mtstate); - - /* - * Install the conversion map for the first plan for UPDATE and DELETE - * operations. It will be advanced each time we switch to the next - * plan. (INSERT operations set it every time, so we need not update - * mtstate->mt_oc_transition_capture here.) - */ - if (mtstate->mt_transition_capture && mtstate->operation != CMD_INSERT) - mtstate->mt_transition_capture->tcs_map = - tupconv_map_for_subplan(mtstate, 0); - } } /* @@ -1882,6 +1842,7 @@ ExecPrepareTupleRouting(ModifyTableState *mtstate, ResultRelInfo *partrel; PartitionRoutingInfo *partrouteinfo; TupleConversionMap *map; + bool has_before_insert_row_trig; /* * Lookup the target partition's ResultRelInfo. If ExecFindPartition does @@ -1900,37 +1861,15 @@ ExecPrepareTupleRouting(ModifyTableState *mtstate, estate->es_result_relation_info = partrel; /* - * If we're capturing transition tuples, we might need to convert from the - * partition rowtype to root partitioned table's rowtype. + * If we're capturing transition tuples and there are no BEFORE triggers + * on the partition which may change the tuple, we can just remember the + * original unconverted tuple to avoid a needless round trip conversion. */ + has_before_insert_row_trig = (partrel->ri_TrigDesc && + partrel->ri_TrigDesc->trig_insert_before_row); if (mtstate->mt_transition_capture != NULL) - { - if (partrel->ri_TrigDesc && - partrel->ri_TrigDesc->trig_insert_before_row) - { - /* - * If there are any BEFORE triggers on the partition, we'll have - * to be ready to convert their result back to tuplestore format. - */ - mtstate->mt_transition_capture->tcs_original_insert_tuple = NULL; - mtstate->mt_transition_capture->tcs_map = - partrouteinfo->pi_PartitionToRootMap; - } - else - { - /* - * Otherwise, just remember the original unconverted tuple, to - * avoid a needless round trip conversion. - */ - mtstate->mt_transition_capture->tcs_original_insert_tuple = slot; - mtstate->mt_transition_capture->tcs_map = NULL; - } - } - if (mtstate->mt_oc_transition_capture != NULL) - { - mtstate->mt_oc_transition_capture->tcs_map = - partrouteinfo->pi_PartitionToRootMap; - } + mtstate->mt_transition_capture->tcs_original_insert_tuple = + !has_before_insert_row_trig ? slot : NULL; /* * Convert the tuple, if necessary. @@ -1946,58 +1885,6 @@ ExecPrepareTupleRouting(ModifyTableState *mtstate, return slot; } -/* - * Initialize the child-to-root tuple conversion map array for UPDATE subplans. - * - * This map array is required to convert the tuple from the subplan result rel - * to the target table descriptor. This requirement arises for two independent - * scenarios: - * 1. For update-tuple-routing. - * 2. For capturing tuples in transition tables. - */ -static void -ExecSetupChildParentMapForSubplan(ModifyTableState *mtstate) -{ - ResultRelInfo *targetRelInfo = getTargetResultRelInfo(mtstate); - ResultRelInfo *resultRelInfos = mtstate->resultRelInfo; - TupleDesc outdesc; - int numResultRelInfos = mtstate->mt_nplans; - int i; - - /* - * Build array of conversion maps from each child's TupleDesc to the one - * used in the target relation. The map pointers may be NULL when no - * conversion is necessary, which is hopefully a common case. - */ - - /* Get tuple descriptor of the target rel. */ - outdesc = RelationGetDescr(targetRelInfo->ri_RelationDesc); - - mtstate->mt_per_subplan_tupconv_maps = (TupleConversionMap **) - palloc(sizeof(TupleConversionMap *) * numResultRelInfos); - - for (i = 0; i < numResultRelInfos; ++i) - { - mtstate->mt_per_subplan_tupconv_maps[i] = - convert_tuples_by_name(RelationGetDescr(resultRelInfos[i].ri_RelationDesc), - outdesc); - } -} - -/* - * For a given subplan index, get the tuple conversion map. - */ -static TupleConversionMap * -tupconv_map_for_subplan(ModifyTableState *mtstate, int whichplan) -{ - /* If nobody else set the per-subplan array of maps, do so ourselves. */ - if (mtstate->mt_per_subplan_tupconv_maps == NULL) - ExecSetupChildParentMapForSubplan(mtstate); - - Assert(whichplan >= 0 && whichplan < mtstate->mt_nplans); - return mtstate->mt_per_subplan_tupconv_maps[whichplan]; -} - /* ---------------------------------------------------------------- * ExecModifyTable * @@ -2107,17 +1994,6 @@ ExecModifyTable(PlanState *pstate) estate->es_result_relation_info = resultRelInfo; EvalPlanQualSetPlan(&node->mt_epqstate, subplanstate->plan, node->mt_arowmarks[node->mt_whichplan]); - /* Prepare to convert transition tuples from this child. */ - if (node->mt_transition_capture != NULL) - { - node->mt_transition_capture->tcs_map = - tupconv_map_for_subplan(node, node->mt_whichplan); - } - if (node->mt_oc_transition_capture != NULL) - { - node->mt_oc_transition_capture->tcs_map = - tupconv_map_for_subplan(node, node->mt_whichplan); - } continue; } else @@ -2298,6 +2174,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) int i; Relation rel; bool update_tuple_routing_needed = node->partColsUpdated; + ResultRelInfo *rootResultRel; /* check for unsupported flags */ Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); @@ -2320,8 +2197,13 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) /* If modifying a partitioned table, initialize the root table info */ if (node->rootResultRelIndex >= 0) + { mtstate->rootResultRelInfo = estate->es_root_result_relations + node->rootResultRelIndex; + rootResultRel = mtstate->rootResultRelInfo; + } + else + rootResultRel = mtstate->resultRelInfo; mtstate->mt_arowmarks = (List **) palloc0(sizeof(List *) * nplans); mtstate->mt_nplans = nplans; @@ -2331,6 +2213,13 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) mtstate->fireBSTriggers = true; /* + * Build state for collecting transition tuples. This requires having a + * valid trigger query context, so skip it in explain-only mode. + */ + if (!(eflags & EXEC_FLAG_EXPLAIN_ONLY)) + ExecSetupTransitionCaptureState(mtstate, estate); + + /* * call ExecInitNode on each of the plans to be executed and save the * results into the array "mt_plans". This is also a convenient place to * verify that the proposed target relations are valid and open their @@ -2403,6 +2292,22 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) eflags); } + /* + * If needed, initialize a map to convert tuples in the child format + * to the format of the table mentioned in the query (root relation). + * It's needed for update tuple routing, because the routing starts + * from the root relation. It's also needed for capturing transition + * tuples, because the transition tuple store can only store tuples + * in the root table format. During INSERT, partition tuples to + * store into the transition tuple store are converted using + * PartitionToRoot map in the partition's PartitionRoutingInfo. + */ + if (update_tuple_routing_needed || + (mtstate->mt_transition_capture && + mtstate->operation != CMD_INSERT)) + resultRelInfo->ri_ChildToRootMap = + convert_tuples_by_name(RelationGetDescr(resultRelInfo->ri_RelationDesc), + RelationGetDescr(rootResultRel->ri_RelationDesc)); resultRelInfo++; i++; } @@ -2429,26 +2334,12 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) ExecSetupPartitionTupleRouting(estate, mtstate, rel); /* - * Build state for collecting transition tuples. This requires having a - * valid trigger query context, so skip it in explain-only mode. - */ - if (!(eflags & EXEC_FLAG_EXPLAIN_ONLY)) - ExecSetupTransitionCaptureState(mtstate, estate); - - /* - * Construct mapping from each of the per-subplan partition attnos to the - * root attno. This is required when during update row movement the tuple - * descriptor of a source partition does not match the root partitioned - * table descriptor. In such a case we need to convert tuples to the root - * tuple descriptor, because the search for destination partition starts - * from the root. We'll also need a slot to store these converted tuples. - * We can skip this setup if it's not a partition key update. + * For update row movement we'll need a dedicated slot to store the + * tuples that have been converted from partition format to the root + * table format. */ if (update_tuple_routing_needed) - { - ExecSetupChildParentMapForSubplan(mtstate); mtstate->mt_root_tuple_slot = table_slot_create(rel, NULL); - } /* * Initialize any WITH CHECK OPTION constraints if needed. diff --git a/src/include/commands/trigger.h b/src/include/commands/trigger.h index a40ddf5..e38d732 100644 --- a/src/include/commands/trigger.h +++ b/src/include/commands/trigger.h @@ -46,7 +46,7 @@ typedef struct TriggerData * The state for capturing old and new tuples into transition tables for a * single ModifyTable node (or other operation source, e.g. copy.c). * - * This is per-caller to avoid conflicts in setting tcs_map or + * This is per-caller to avoid conflicts in setting * tcs_original_insert_tuple. Note, however, that the pointed-to * private data may be shared across multiple callers. */ @@ -66,14 +66,6 @@ typedef struct TransitionCaptureState bool tcs_insert_new_table; /* - * For UPDATE and DELETE, AfterTriggerSaveEvent may need to convert the - * new and old tuples from a child table's format to the format of the - * relation named in a query so that it is compatible with the transition - * tuplestores. The caller must store the conversion map here if so. - */ - TupleConversionMap *tcs_map; - - /* * For INSERT and COPY, it would be wasteful to convert tuples from child * format to parent format after they have already been converted in the * opposite direction during routing. In that case we bypass conversion diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index cf832d7..647bb79 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -491,6 +491,14 @@ typedef struct ResultRelInfo /* For use by copy.c when performing multi-inserts */ struct CopyMultiInsertBuffer *ri_CopyMultiInsertBuffer; + + /* + * Map to convert child result relation tuples to the format of the + * table actually mentioned in the query (called "root"). Set only + * if either transition tuple capture or update partition row + * movement is active. + */ + TupleConversionMap *ri_ChildToRootMap; } ResultRelInfo; /* ---------------- @@ -1192,9 +1200,6 @@ typedef struct ModifyTableState /* controls transition table population for INSERT...ON CONFLICT UPDATE */ struct TransitionCaptureState *mt_oc_transition_capture; - - /* Per plan map for tuple conversion from child to root */ - TupleConversionMap **mt_per_subplan_tupconv_maps; } ModifyTableState; /* ---------------- -- 1.8.3.1