From 0be881f06d87f1a3e65ebe085ccdf3fa845a4042 Mon Sep 17 00:00:00 2001 From: Greg Nancarrow Date: Mon, 7 Dec 2020 17:40:28 +1100 Subject: [PATCH v9 1/4] Enable parallel SELECT for "INSERT INTO ... SELECT ...", where it is safe to do so. Parallel SELECT can't be utilized for INSERT in the following cases: - INSERT statement uses the ON CONFLICT DO UPDATE clause - Target table is a foreign or temporary table - Target table has a parallel-unsafe trigger, index expression, column default expression or check constraint - Target table is a partitioned table with a parallel-unsafe partition key expression or support function The planner is updated to perform additional parallel-safety checks for the cases listed above, for determining whether it is safe to run INSERT in parallel-mode with an underlying parallel SELECT. The planner is further updated to consider using parallel SELECT for "INSERT INTO ... SELECT ...", provided nothing unsafe is found from the additional parallel-safety checks, or from the existing parallel-safety checks for SELECT that it currently performs. Prior to entering parallel-mode for execution of INSERT with parallel SELECT, a TransactionId is acquired and assigned to the current transaction state which is then serialized in the parallel DSM for the parallel workers to use. Discussion: https://postgr.es/m/CAJcOf-cXnB5cnMKqWEp2E2z7Mvcd04iLVmV=qpFJrR3AcrTS3g@mail.gmail.com --- src/backend/access/transam/xact.c | 22 ++ src/backend/executor/execMain.c | 3 + src/backend/optimizer/plan/planner.c | 33 ++- src/backend/optimizer/util/clauses.c | 436 +++++++++++++++++++++++++++++++++++ src/include/access/xact.h | 15 ++ src/include/optimizer/clauses.h | 1 + 6 files changed, 500 insertions(+), 10 deletions(-) diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 9cd0b7c..0c28a46 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -41,6 +41,7 @@ #include "libpq/be-fsstubs.h" #include "libpq/pqsignal.h" #include "miscadmin.h" +#include "optimizer/optimizer.h" #include "pg_trace.h" #include "pgstat.h" #include "replication/logical.h" @@ -1015,6 +1016,27 @@ IsInParallelMode(void) } /* + * PrepareParallelMode + * + * Prepare for entering parallel mode, based on command-type. + */ +void +PrepareParallelMode(CmdType commandType) +{ + Assert(!IsInParallelMode() || force_parallel_mode != FORCE_PARALLEL_OFF); + + if (IsModifySupportedInParallelMode(commandType)) + { + /* + * Prepare for entering parallel mode by assigning a + * FullTransactionId, to be included in the transaction state that is + * serialized in the parallel DSM. + */ + (void) GetCurrentTransactionId(); + } +} + +/* * CommandCounterIncrement */ void diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 7179f58..2bb74d4 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -1526,7 +1526,10 @@ ExecutePlan(EState *estate, estate->es_use_parallel_mode = use_parallel_mode; if (use_parallel_mode) + { + PrepareParallelMode(estate->es_plannedstmt->commandType); EnterParallelMode(); + } /* * Loop until we've processed the proper number of tuples from the plan. diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 1a94b58..00e7388 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -316,16 +316,16 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions, /* * Assess whether it's feasible to use parallel mode for this query. We * can't do this in a standalone backend, or if the command will try to - * modify any data, or if this is a cursor operation, or if GUCs are set - * to values that don't permit parallelism, or if parallel-unsafe - * functions are present in the query tree. + * modify any data using a CTE, or if this is a cursor operation, or if + * GUCs are set to values that don't permit parallelism, or if + * parallel-unsafe functions are present in the query tree. * - * (Note that we do allow CREATE TABLE AS, SELECT INTO, and CREATE - * MATERIALIZED VIEW to use parallel plans, but as of now, only the leader - * backend writes into a completely new table. In the future, we can - * extend it to allow workers to write into the table. However, to allow - * parallel updates and deletes, we have to solve other problems, - * especially around combo CIDs.) + * (Note that we do allow CREATE TABLE AS, INSERT INTO...SELECT, SELECT + * INTO, and CREATE MATERIALIZED VIEW to use parallel plans. However, as + * of now, only the leader backend writes into a completely new table. In + * the future, we can extend it to allow workers to write into the table. + * However, to allow parallel updates and deletes, we have to solve other + * problems, especially around combo CIDs.) * * For now, we don't try to use parallel mode if we're running inside a * parallel worker. We might eventually be able to relax this @@ -334,7 +334,8 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions, */ if ((cursorOptions & CURSOR_OPT_PARALLEL_OK) != 0 && IsUnderPostmaster && - parse->commandType == CMD_SELECT && + (parse->commandType == CMD_SELECT || + IsModifySupportedInParallelMode(parse->commandType)) && !parse->hasModifyingCTE && max_parallel_workers_per_gather > 0 && !IsParallelWorker()) @@ -342,6 +343,18 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions, /* all the cheap tests pass, so scan the query tree */ glob->maxParallelHazard = max_parallel_hazard(parse); glob->parallelModeOK = (glob->maxParallelHazard != PROPARALLEL_UNSAFE); + + /* + * Additional parallel-mode safety checks are required in order to + * allow an underlying parallel query to be used for a + * table-modification command that is supported in parallel-mode. + */ + if (glob->parallelModeOK && + IsModifySupportedInParallelMode(parse->commandType)) + { + glob->maxParallelHazard = max_parallel_hazard_for_modify(parse, &glob->maxParallelHazard); + glob->parallelModeOK = (glob->maxParallelHazard != PROPARALLEL_UNSAFE); + } } else { diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index 587d494..5b57786 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -19,13 +19,19 @@ #include "postgres.h" +#include "access/genam.h" #include "access/htup_details.h" +#include "access/table.h" +#include "catalog/index.h" +#include "catalog/indexing.h" #include "catalog/pg_aggregate.h" #include "catalog/pg_class.h" +#include "catalog/pg_constraint.h" #include "catalog/pg_language.h" #include "catalog/pg_operator.h" #include "catalog/pg_proc.h" #include "catalog/pg_type.h" +#include "commands/trigger.h" #include "executor/executor.h" #include "executor/functions.h" #include "funcapi.h" @@ -42,7 +48,11 @@ #include "parser/parse_agg.h" #include "parser/parse_coerce.h" #include "parser/parse_func.h" +#include "parser/parsetree.h" +#include "partitioning/partdesc.h" +#include "rewrite/rewriteHandler.h" #include "rewrite/rewriteManip.h" +#include "storage/lmgr.h" #include "tcop/tcopprot.h" #include "utils/acl.h" #include "utils/builtins.h" @@ -50,6 +60,8 @@ #include "utils/fmgroids.h" #include "utils/lsyscache.h" #include "utils/memutils.h" +#include "utils/partcache.h" +#include "utils/rel.h" #include "utils/syscache.h" #include "utils/typcache.h" @@ -147,6 +159,15 @@ static Query *substitute_actual_srf_parameters(Query *expr, static Node *substitute_actual_srf_parameters_mutator(Node *node, substitute_actual_srf_parameters_context *context); +static char trigger_max_parallel_hazard_for_modify(TriggerDesc *trigdesc, + max_parallel_hazard_context *context); +static char index_expr_max_parallel_hazard_for_modify(Relation rel, + max_parallel_hazard_context *context); +static char domain_max_parallel_hazard_for_modify(Oid typid, max_parallel_hazard_context *context); +static char rel_max_parallel_hazard_for_modify(Oid relid, + CmdType command_type, + max_parallel_hazard_context *context, + LOCKMODE lockmode); /***************************************************************************** * Aggregate-function clause manipulation @@ -785,6 +806,421 @@ max_parallel_hazard_walker(Node *node, max_parallel_hazard_context *context) context); } +/* + * trigger_max_parallel_hazard_for_modify + * + * Finds the maximum parallel-mode hazard level for the specified trigger data. + */ +static char +trigger_max_parallel_hazard_for_modify(TriggerDesc *trigdesc, + max_parallel_hazard_context *context) +{ + int i; + + for (i = 0; i < trigdesc->numtriggers; i++) + { + Trigger *trigger = &trigdesc->triggers[i]; + int trigtype; + + if (max_parallel_hazard_test(func_parallel(trigger->tgfoid), context)) + break; + + /* + * If the trigger type is RI_TRIGGER_FK, this indicates a FK exists in + * the relation, and this would result in creation of new CommandIds + * on insert/update/delete and this isn't supported in a parallel + * worker (but is safe in the parallel leader). + */ + trigtype = RI_FKey_trigger_type(trigger->tgfoid); + if (trigtype == RI_TRIGGER_FK) + { + context->max_hazard = PROPARALLEL_RESTRICTED; + + /* + * As we're looking for the max parallel hazard, we don't break + * here; examine any further triggers ... + */ + } + } + + return context->max_hazard; +} + +/* + * index_expr_max_parallel_hazard_for_modify + * + * Finds the maximum parallel-mode hazard level for any existing index + * expressions of a specified relation. + */ +static char +index_expr_max_parallel_hazard_for_modify(Relation rel, + max_parallel_hazard_context *context) +{ + List *index_oid_list; + ListCell *lc; + LOCKMODE lockmode = AccessShareLock; + + index_oid_list = RelationGetIndexList(rel); + foreach(lc, index_oid_list) + { + Oid index_oid = lfirst_oid(lc); + Relation index_rel; + IndexInfo *index_info; + + if (ConditionalLockRelationOid(index_oid, lockmode)) + { + index_rel = index_open(index_oid, NoLock); + } + else + { + context->max_hazard = PROPARALLEL_UNSAFE; + return context->max_hazard; + } + + index_info = BuildIndexInfo(index_rel); + + if (index_info->ii_Expressions != NIL) + { + int i; + ListCell *index_expr_item = list_head(index_info->ii_Expressions); + + for (i = 0; i < index_info->ii_NumIndexAttrs; i++) + { + int keycol = index_info->ii_IndexAttrNumbers[i]; + + if (keycol == 0) + { + /* Found an index expression */ + + Node *index_expr; + + if (index_expr_item == NULL) /* shouldn't happen */ + elog(ERROR, "too few entries in indexprs list"); + + index_expr = (Node *) lfirst(index_expr_item); + index_expr = (Node *) expression_planner((Expr *) index_expr); + + if (max_parallel_hazard_walker(index_expr, context)) + { + index_close(index_rel, lockmode); + return context->max_hazard; + } + + index_expr_item = lnext(index_info->ii_Expressions, index_expr_item); + } + } + } + index_close(index_rel, lockmode); + } + + return context->max_hazard; +} + +/* + * domain_max_parallel_hazard_for_modify + * + * Finds the maximum parallel-mode hazard level for the specified DOMAIN type. + * Only any CHECK expressions are examined for parallel safety. + * DEFAULT values of DOMAIN-type columns in the target-list are already + * being checked for parallel-safety in the max_parallel_hazard() scan of the + * query tree in standard_planner(). + * + */ +static char +domain_max_parallel_hazard_for_modify(Oid typid, max_parallel_hazard_context *context) +{ + Relation con_rel; + ScanKeyData key[1]; + SysScanDesc scan; + HeapTuple tup; + + LOCKMODE lockmode = AccessShareLock; + + con_rel = table_open(ConstraintRelationId, lockmode); + + ScanKeyInit(&key[0], + Anum_pg_constraint_contypid, BTEqualStrategyNumber, + F_OIDEQ, ObjectIdGetDatum(typid)); + scan = systable_beginscan(con_rel, ConstraintTypidIndexId, true, + NULL, 1, key); + + while (HeapTupleIsValid((tup = systable_getnext(scan)))) + { + Form_pg_constraint con = (Form_pg_constraint) GETSTRUCT(tup); + + if (con->contype == CONSTRAINT_CHECK) + { + char *conbin; + Datum val; + bool isnull; + Expr *check_expr; + + val = SysCacheGetAttr(CONSTROID, tup, + Anum_pg_constraint_conbin, &isnull); + if (isnull) + { + /* + * This shouldn't ever happen, but if it does, log a WARNING + * and return UNSAFE, rather than erroring out. + */ + elog(WARNING, "null conbin for constraint %u", con->oid); + context->max_hazard = PROPARALLEL_UNSAFE; + break; + } + conbin = TextDatumGetCString(val); + check_expr = stringToNode(conbin); + if (max_parallel_hazard_walker((Node *) check_expr, context)) + { + break; + } + } + } + + systable_endscan(scan); + table_close(con_rel, lockmode); + return context->max_hazard; +} + +/* + * rel_max_parallel_hazard_for_modify + * + * Determines the maximum parallel-mode hazard level for modification + * of a specified relation. + */ +static char +rel_max_parallel_hazard_for_modify(Oid relid, + CmdType command_type, + max_parallel_hazard_context *context, + LOCKMODE lockmode) +{ + Relation rel; + TupleDesc tupdesc; + int attnum; + + /* Currently only CMD_INSERT is supported */ + Assert(command_type == CMD_INSERT); + + if (lockmode == NoLock) + { + rel = table_open(relid, NoLock); + } + else + { + /* + * It's possible that this relation is locked for exclusive access in + * another concurrent transaction (e.g. as a result of a ALTER TABLE + * ... operation) until that transaction completes. If a share-lock + * can't be acquired on it now, we have to assume this could be the + * worst-case, so to avoid blocking here until that transaction + * completes, conditionally try to acquire the lock and assume and + * return UNSAFE on failure. + */ + if (ConditionalLockRelationOid(relid, lockmode)) + { + rel = table_open(relid, NoLock); + } + else + { + context->max_hazard = PROPARALLEL_UNSAFE; + return context->max_hazard; + } + } + + /* + * We can't support table modification in parallel-mode if it's a foreign + * table/partition (no FDW API for supporting parallel access) or a + * temporary table. + */ + if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE || + RelationUsesLocalBuffers(rel)) + { + table_close(rel, lockmode); + context->max_hazard = PROPARALLEL_UNSAFE; + return context->max_hazard; + } + + /* + * If a partitioned table, check that each partition is safe for + * modification in parallel-mode. + */ + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + int i; + PartitionDesc pdesc; + PartitionKey pkey; + ListCell *partexprs_item; + int partnatts; + List *partexprs; + + pkey = RelationGetPartitionKey(rel); + + partnatts = get_partition_natts(pkey); + partexprs = get_partition_exprs(pkey); + + partexprs_item = list_head(partexprs); + for (i = 0; i < partnatts; i++) + { + /* Check parallel-safety of partition key support functions */ + if (OidIsValid(pkey->partsupfunc[i].fn_oid)) + { + if (max_parallel_hazard_test(func_parallel(pkey->partsupfunc[i].fn_oid), context)) + { + table_close(rel, lockmode); + return context->max_hazard; + } + } + + /* Check parallel-safety of any expressions in the partition key */ + if (get_partition_col_attnum(pkey, i) == 0) + { + Node *check_expr = (Node *) lfirst(partexprs_item); + + if (max_parallel_hazard_walker(check_expr, context)) + { + table_close(rel, lockmode); + return context->max_hazard; + } + + partexprs_item = lnext(partexprs, partexprs_item); + } + } + + /* Recursively check each partition ... */ + pdesc = RelationGetPartitionDesc(rel); + for (i = 0; i < pdesc->nparts; i++) + { + if (rel_max_parallel_hazard_for_modify(pdesc->oids[i], + command_type, + context, + AccessShareLock) == PROPARALLEL_UNSAFE) + { + table_close(rel, lockmode); + return context->max_hazard; + } + } + } + + /* + * If there are any index expressions, check that they are parallel-mode + * safe. + */ + if (index_expr_max_parallel_hazard_for_modify(rel, context) == PROPARALLEL_UNSAFE) + { + table_close(rel, lockmode); + return context->max_hazard; + } + + /* + * If any triggers exist, check that they are parallel safe. + */ + if (rel->trigdesc != NULL && + trigger_max_parallel_hazard_for_modify(rel->trigdesc, context) == PROPARALLEL_UNSAFE) + { + table_close(rel, lockmode); + return context->max_hazard; + } + + /* + * Column default expressions and check constraints are only applicable to + * INSERT and UPDATE, but since only INSERT is currently supported, only + * command_type==CMD_INSERT is checked here. + */ + if (command_type == CMD_INSERT) + { + /* + * Column default expressions for columns in the target-list are + * already being checked for parallel-safety in the + * max_parallel_hazard() scan of the query tree in standard_planner(). + */ + + tupdesc = RelationGetDescr(rel); + for (attnum = 0; attnum < tupdesc->natts; attnum++) + { + Form_pg_attribute att = TupleDescAttr(tupdesc, attnum); + + /* We don't need info for dropped or generated attributes */ + if (att->attisdropped || att->attgenerated) + continue; + + /* + * If the column is of a DOMAIN type, determine whether that + * domain has any CHECK expressions that are not parallel-mode + * safe. + */ + if (get_typtype(att->atttypid) == TYPTYPE_DOMAIN) + { + if (domain_max_parallel_hazard_for_modify(att->atttypid, context) == PROPARALLEL_UNSAFE) + { + table_close(rel, lockmode); + return context->max_hazard; + } + } + } + + /* + * Check if there are any CHECK constraints which are not + * parallel-safe. + */ + if (tupdesc->constr != NULL && tupdesc->constr->num_check > 0) + { + int i; + + ConstrCheck *check = tupdesc->constr->check; + + for (i = 0; i < tupdesc->constr->num_check; i++) + { + Expr *check_expr = stringToNode(check->ccbin); + + if (max_parallel_hazard_walker((Node *) check_expr, context)) + { + table_close(rel, lockmode); + return context->max_hazard; + } + } + } + } + + table_close(rel, lockmode); + return context->max_hazard; +} + +/* + * max_parallel_hazard_for_modify + * + * Determines the worst parallel-mode hazard level for the specified + * table-modification statement, based on the statement attributes and + * target table. An initial max parallel hazard level may optionally be + * supplied. The search returns the earliest in the following list: + * PROPARALLEL_UNSAFE, PROPARALLEL_RESTRICTED, PROPARALLEL_SAFE + */ +char +max_parallel_hazard_for_modify(Query *parse, const char *initial_max_parallel_hazard) +{ + RangeTblEntry *rte; + max_parallel_hazard_context context; + + + /* + * UPDATE is not currently supported in parallel-mode, so prohibit + * INSERT...ON CONFLICT...DO UPDATE... + */ + if (parse->onConflict != NULL && parse->onConflict->action == ONCONFLICT_UPDATE) + return PROPARALLEL_UNSAFE; + + /* + * Setup the context used in finding the max parallel-mode hazard. + */ + Assert(initial_max_parallel_hazard == NULL || + *initial_max_parallel_hazard == PROPARALLEL_SAFE || + *initial_max_parallel_hazard == PROPARALLEL_RESTRICTED); + context.max_hazard = initial_max_parallel_hazard == NULL ? + PROPARALLEL_SAFE : *initial_max_parallel_hazard; + context.max_interesting = PROPARALLEL_UNSAFE; + context.safe_param_ids = NIL; + + rte = rt_fetch(parse->resultRelation, parse->rtable); + return (rel_max_parallel_hazard_for_modify(rte->relid, parse->commandType, &context, NoLock)); +} /***************************************************************************** * Check clauses for nonstrict functions diff --git a/src/include/access/xact.h b/src/include/access/xact.h index 7320de3..591672c 100644 --- a/src/include/access/xact.h +++ b/src/include/access/xact.h @@ -466,5 +466,20 @@ extern void ParsePrepareRecord(uint8 info, xl_xact_prepare *xlrec, xl_xact_parse extern void EnterParallelMode(void); extern void ExitParallelMode(void); extern bool IsInParallelMode(void); +extern void PrepareParallelMode(CmdType commandType); + +/* + * IsModifySupportedInParallelMode + * + * Indicates whether execution of the specified table-modification command + * (INSERT/UPDATE/DELETE) in parallel-mode is supported, subject to certain + * parallel-safety conditions. + */ +static inline bool +IsModifySupportedInParallelMode(CmdType commandType) +{ + /* Currently only INSERT is supported */ + return (commandType == CMD_INSERT); +} #endif /* XACT_H */ diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h index 68855d0..34698f7 100644 --- a/src/include/optimizer/clauses.h +++ b/src/include/optimizer/clauses.h @@ -52,5 +52,6 @@ extern void CommuteOpExpr(OpExpr *clause); extern Query *inline_set_returning_function(PlannerInfo *root, RangeTblEntry *rte); +extern char max_parallel_hazard_for_modify(Query *parse, const char *initial_max_parallel_hazard); #endif /* CLAUSES_H */ -- 1.8.3.1