From: Amit Khandekar Date: Fri, 22 Sep 2017 09:54:15 +0530 Subject: [PATCH 1/2] Re-use UPDATE result rels created in InitPlan. For UPDATE tuple routing, we need to have result rels for the leaf partitions. Since we already have at least a subset of those result rels in the form of UPDATE per-subplan result rels, arrange for re-using them instead of creating new ones for all of the leaf partitions. For this, the mtstate->mt_partitions needs to be an array of ResultRelInfo * rather than an array of ResultRelInfo. This way, when a leaf partition already has a result rel allocated in the mtstate->resultRelInfo, the mt_partitions array element would point to this allocated structure. --- src/backend/commands/copy.c | 12 ++-- src/backend/executor/execMain.c | 125 ++++++++++++++++++++++++++++----- src/backend/executor/nodeModifyTable.c | 75 ++++++++++++-------- src/include/executor/executor.h | 4 +- src/include/nodes/execnodes.h | 2 +- 5 files changed, 163 insertions(+), 55 deletions(-) diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index c6fa445..098bc66 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -168,7 +168,7 @@ typedef struct CopyStateData PartitionDispatch *partition_dispatch_info; int num_dispatch; /* Number of entries in the above array */ int num_partitions; /* Number of members in the following arrays */ - ResultRelInfo *partitions; /* Per partition result relation */ + ResultRelInfo **partitions; /* Per partition result relation pointers */ TupleConversionMap **partition_tupconv_maps; TupleTableSlot *partition_tuple_slot; TransitionCaptureState *transition_capture; @@ -2451,13 +2451,15 @@ CopyFrom(CopyState cstate) if (cstate->rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) { PartitionDispatch *partition_dispatch_info; - ResultRelInfo *partitions; + ResultRelInfo **partitions; TupleConversionMap **partition_tupconv_maps; TupleTableSlot *partition_tuple_slot; int num_parted, num_partitions; ExecSetupPartitionTupleRouting(cstate->rel, + NULL, + 0, 1, estate, &partition_dispatch_info, @@ -2487,7 +2489,7 @@ CopyFrom(CopyState cstate) for (i = 0; i < cstate->num_partitions; ++i) { cstate->transition_tupconv_maps[i] = - convert_tuples_by_name(RelationGetDescr(cstate->partitions[i].ri_RelationDesc), + convert_tuples_by_name(RelationGetDescr(cstate->partitions[i]->ri_RelationDesc), RelationGetDescr(cstate->rel), gettext_noop("could not convert row type")); } @@ -2618,7 +2620,7 @@ CopyFrom(CopyState cstate) * to the selected partition. */ saved_resultRelInfo = resultRelInfo; - resultRelInfo = cstate->partitions + leaf_part_index; + resultRelInfo = cstate->partitions[leaf_part_index]; /* We do not yet have a way to insert into a foreign partition */ if (resultRelInfo->ri_FdwRoutine) @@ -2848,7 +2850,7 @@ CopyFrom(CopyState cstate) } for (i = 0; i < cstate->num_partitions; i++) { - ResultRelInfo *resultRelInfo = cstate->partitions + i; + ResultRelInfo *resultRelInfo = cstate->partitions[i]; ExecCloseIndices(resultRelInfo); heap_close(resultRelInfo->ri_RelationDesc, NoLock); diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 62fb05e..b31ab36 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -3243,10 +3243,14 @@ EvalPlanQualEnd(EPQState *epqstate) * ExecSetupPartitionTupleRouting - set up information needed during * tuple routing for partitioned tables * + * 'update_rri' has the UPDATE per-subplan result rels. + * 'num_update_rri' : number of UPDATE per-subplan result rels. For INSERT, + * this is 0. + * * Output arguments: * 'pd' receives an array of PartitionDispatch objects with one entry for * every partitioned table in the partition tree - * 'partitions' receives an array of ResultRelInfo objects with one entry for + * 'partitions' receives an array of ResultRelInfo* objects with one entry for * every leaf partition in the partition tree * 'tup_conv_maps' receives an array of TupleConversionMap objects with one * entry for every leaf partition (required to convert input tuple based @@ -3266,10 +3270,12 @@ EvalPlanQualEnd(EPQState *epqstate) */ void ExecSetupPartitionTupleRouting(Relation rel, + ResultRelInfo *update_rri, + int num_update_rri, Index resultRTindex, EState *estate, PartitionDispatch **pd, - ResultRelInfo **partitions, + ResultRelInfo ***partitions, TupleConversionMap ***tup_conv_maps, TupleTableSlot **partition_tuple_slot, int *num_parted, int *num_partitions) @@ -3278,7 +3284,9 @@ ExecSetupPartitionTupleRouting(Relation rel, List *leaf_parts; ListCell *cell; int i; - ResultRelInfo *leaf_part_rri; + ResultRelInfo *leaf_part_arr; + ResultRelInfo *cur_update_rri; + Oid cur_reloid = InvalidOid; /* * Get the information about the partition tree after locking all the @@ -3287,11 +3295,38 @@ ExecSetupPartitionTupleRouting(Relation rel, (void) find_all_inheritors(RelationGetRelid(rel), RowExclusiveLock, NULL); *pd = RelationGetPartitionDispatchInfo(rel, num_parted, &leaf_parts); *num_partitions = list_length(leaf_parts); - *partitions = (ResultRelInfo *) palloc(*num_partitions * - sizeof(ResultRelInfo)); + *partitions = (ResultRelInfo **) palloc(*num_partitions * + sizeof(ResultRelInfo *)); *tup_conv_maps = (TupleConversionMap **) palloc0(*num_partitions * sizeof(TupleConversionMap *)); + if (num_update_rri != 0) + { + /* + * For Updates, if the leaf partition is already present in the + * per-subplan result rels, we re-use that rather than initialize a + * new result rel. The per-subplan resultrels and the resultrels of + * the leaf partitions are both in the same canonical order. So while + * going through the leaf partition oids, we need to keep track of the + * next per-subplan result rel to be looked for in the leaf partition + * resultrels. So, set the position of cur_update_rri to the first + * per-subplan result rel, and then shift it as we find them one by + * one while scanning the leaf partition oids. + */ + cur_update_rri = update_rri; + cur_reloid = RelationGetRelid(cur_update_rri->ri_RelationDesc); + } + else + { + /* + * For inserts, we need to create all new result rels, so avoid + * repeated pallocs by allocating memory for all the result rels in + * bulk. + */ + leaf_part_arr = (ResultRelInfo *) palloc0(*num_partitions * + sizeof(ResultRelInfo)); + } + /* * Initialize an empty slot that will be used to manipulate tuples of any * given partition's rowtype. It is attached to the caller-specified node @@ -3300,19 +3335,70 @@ ExecSetupPartitionTupleRouting(Relation rel, */ *partition_tuple_slot = MakeTupleTableSlot(); - leaf_part_rri = *partitions; i = 0; foreach(cell, leaf_parts) { - Relation partrel; + ResultRelInfo *leaf_part_rri; + Relation partrel = NULL; TupleDesc part_tupdesc; + Oid leaf_oid = lfirst_oid(cell); + + if (num_update_rri != 0) + { + /* Is this leaf partition present in the update resultrel ? */ + if (cur_reloid == leaf_oid) + { + Assert(cur_update_rri <= update_rri + num_update_rri - 1); + + leaf_part_rri = cur_update_rri; + partrel = leaf_part_rri->ri_RelationDesc; + + /* + * This is required when converting tuple as per root + * partition tuple descriptor. When generating the update + * plans, this was not set. + */ + leaf_part_rri->ri_PartitionRoot = rel; + + cur_update_rri++; + + /* + * If this was the last UPDATE resultrel, indicate that by + * invalidating the cur_reloid. + */ + if (cur_update_rri == update_rri + num_update_rri) + cur_reloid = InvalidOid; + else + cur_reloid = RelationGetRelid(cur_update_rri->ri_RelationDesc); + } + else + leaf_part_rri = (ResultRelInfo *) palloc0(sizeof(ResultRelInfo)); + } + else + { + /* For INSERTs, we already have an array of result rels allocated */ + leaf_part_rri = leaf_part_arr + i; + } /* - * We locked all the partitions above including the leaf partitions. - * Note that each of the relations in *partitions are eventually - * closed by the caller. + * If we didn't open the partition rel, it means we haven't + * initialized the result rel as well. */ - partrel = heap_open(lfirst_oid(cell), NoLock); + if (!partrel) + { + /* + * We locked all the partitions above including the leaf + * partitions. Note that each of the newly opened relations in + * *partitions are eventually closed by the caller. + */ + partrel = heap_open(leaf_oid, NoLock); + InitResultRelInfo(leaf_part_rri, + partrel, + resultRTindex, + rel, + estate->es_instrument); + } + part_tupdesc = RelationGetDescr(partrel); /* @@ -3322,12 +3408,6 @@ ExecSetupPartitionTupleRouting(Relation rel, (*tup_conv_maps)[i] = convert_tuples_by_name(tupDesc, part_tupdesc, gettext_noop("could not convert row type")); - InitResultRelInfo(leaf_part_rri, - partrel, - resultRTindex, - rel, - estate->es_instrument); - /* * Verify result relation is a valid target for INSERT. */ @@ -3345,9 +3425,18 @@ ExecSetupPartitionTupleRouting(Relation rel, estate->es_leaf_result_relations = lappend(estate->es_leaf_result_relations, leaf_part_rri); - leaf_part_rri++; + (*partitions)[i] = leaf_part_rri; i++; } + + /* + * For UPDATE, we should have found all the per-subplan resultrels in the + * leaf partitions; so cur_update_rri should be positioned just next to + * the last per-subplan resultrel. + */ + Assert(num_update_rri == 0 || + (cur_reloid == InvalidOid && + cur_update_rri == update_rri + num_update_rri)); } /* diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 845c409..a64b477 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -303,7 +303,7 @@ ExecInsert(ModifyTableState *mtstate, * the selected partition. */ saved_resultRelInfo = resultRelInfo; - resultRelInfo = mtstate->mt_partitions + leaf_part_index; + resultRelInfo = mtstate->mt_partitions[leaf_part_index]; /* We do not yet have a way to insert into a foreign partition */ if (resultRelInfo->ri_FdwRoutine) @@ -1498,25 +1498,11 @@ ExecSetupTransitionCaptureState(ModifyTableState *mtstate, EState *estate) if (mtstate->mt_transition_capture != NULL || mtstate->mt_oc_transition_capture != NULL) { - ResultRelInfo *resultRelInfos; int numResultRelInfos; - /* Find the set of partitions so that we can find their TupleDescs. */ - if (mtstate->mt_partition_dispatch_info != NULL) - { - /* - * For INSERT via partitioned table, so we need TupleDescs based - * on the partition routing table. - */ - resultRelInfos = mtstate->mt_partitions; - numResultRelInfos = mtstate->mt_num_partitions; - } - else - { - /* Otherwise we need the ResultRelInfo for each subplan. */ - resultRelInfos = mtstate->resultRelInfo; - numResultRelInfos = mtstate->mt_nplans; - } + numResultRelInfos = (mtstate->mt_partition_tuple_slot != NULL ? + mtstate->mt_num_partitions : + mtstate->mt_nplans); /* * Build array of conversion maps from each child's TupleDesc to the @@ -1526,12 +1512,36 @@ ExecSetupTransitionCaptureState(ModifyTableState *mtstate, EState *estate) */ mtstate->mt_transition_tupconv_maps = (TupleConversionMap **) palloc0(sizeof(TupleConversionMap *) * numResultRelInfos); - for (i = 0; i < numResultRelInfos; ++i) + + /* Choose the right set of partitions */ + if (mtstate->mt_partition_dispatch_info != NULL) + { + /* + * For tuple routing among partitions, we need TupleDescs based + * on the partition routing table. + */ + ResultRelInfo **resultRelInfos = mtstate->mt_partitions; + + for (i = 0; i < numResultRelInfos; ++i) + { + mtstate->mt_transition_tupconv_maps[i] = + convert_tuples_by_name(RelationGetDescr(resultRelInfos[i]->ri_RelationDesc), + RelationGetDescr(targetRelInfo->ri_RelationDesc), + gettext_noop("could not convert row type")); + } + } + else { - mtstate->mt_transition_tupconv_maps[i] = - convert_tuples_by_name(RelationGetDescr(resultRelInfos[i].ri_RelationDesc), - RelationGetDescr(targetRelInfo->ri_RelationDesc), - gettext_noop("could not convert row type")); + /* Otherwise we need the ResultRelInfo for each subplan. */ + ResultRelInfo *resultRelInfos = mtstate->resultRelInfo; + + for (i = 0; i < numResultRelInfos; ++i) + { + mtstate->mt_transition_tupconv_maps[i] = + convert_tuples_by_name(RelationGetDescr(resultRelInfos[i].ri_RelationDesc), + RelationGetDescr(targetRelInfo->ri_RelationDesc), + gettext_noop("could not convert row type")); + } } /* @@ -1935,13 +1945,15 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) { PartitionDispatch *partition_dispatch_info; - ResultRelInfo *partitions; + ResultRelInfo **partitions; TupleConversionMap **partition_tupconv_maps; TupleTableSlot *partition_tuple_slot; int num_parted, num_partitions; ExecSetupPartitionTupleRouting(rel, + NULL, + 0, node->nominalRelation, estate, &partition_dispatch_info, @@ -2014,14 +2026,16 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) mtstate->mt_nplans == 1); wcoList = linitial(node->withCheckOptionLists); plan = mtstate->mt_plans[0]; - resultRelInfo = mtstate->mt_partitions; for (i = 0; i < mtstate->mt_num_partitions; i++) { - Relation partrel = resultRelInfo->ri_RelationDesc; + Relation partrel; List *mapped_wcoList; List *wcoExprs = NIL; ListCell *ll; + resultRelInfo = mtstate->mt_partitions[i]; + partrel = resultRelInfo->ri_RelationDesc; + /* varno = node->nominalRelation */ mapped_wcoList = map_partition_varattnos(wcoList, node->nominalRelation, @@ -2037,7 +2051,6 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) resultRelInfo->ri_WithCheckOptions = mapped_wcoList; resultRelInfo->ri_WithCheckOptionExprs = wcoExprs; - resultRelInfo++; } } @@ -2088,13 +2101,15 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) * will suffice. This only occurs for the INSERT case; UPDATE/DELETE * are handled above. */ - resultRelInfo = mtstate->mt_partitions; returningList = linitial(node->returningLists); for (i = 0; i < mtstate->mt_num_partitions; i++) { - Relation partrel = resultRelInfo->ri_RelationDesc; + Relation partrel; List *rlist; + resultRelInfo = mtstate->mt_partitions[i]; + partrel = resultRelInfo->ri_RelationDesc; + /* varno = node->nominalRelation */ rlist = map_partition_varattnos(returningList, node->nominalRelation, @@ -2376,7 +2391,7 @@ ExecEndModifyTable(ModifyTableState *node) } for (i = 0; i < node->mt_num_partitions; i++) { - ResultRelInfo *resultRelInfo = node->mt_partitions + i; + ResultRelInfo *resultRelInfo = node->mt_partitions[i]; ExecCloseIndices(resultRelInfo); heap_close(resultRelInfo->ri_RelationDesc, NoLock); diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index 7708818..cc1cc2a 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -207,10 +207,12 @@ extern void EvalPlanQualSetTuple(EPQState *epqstate, Index rti, HeapTuple tuple); extern HeapTuple EvalPlanQualGetTuple(EPQState *epqstate, Index rti); extern void ExecSetupPartitionTupleRouting(Relation rel, + ResultRelInfo *update_rri, + int num_update_rri, Index resultRTindex, EState *estate, PartitionDispatch **pd, - ResultRelInfo **partitions, + ResultRelInfo ***partitions, TupleConversionMap ***tup_conv_maps, TupleTableSlot **partition_tuple_slot, int *num_parted, int *num_partitions); diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index c6d3021..9187f7a 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -978,7 +978,7 @@ typedef struct ModifyTableState int mt_num_dispatch; /* Number of entries in the above array */ int mt_num_partitions; /* Number of members in the following * arrays */ - ResultRelInfo *mt_partitions; /* Per partition result relation */ + ResultRelInfo **mt_partitions; /* Per partition result relation pointers */ TupleConversionMap **mt_partition_tupconv_maps; /* Per partition tuple conversion map */ TupleTableSlot *mt_partition_tuple_slot; -- 2.1.4