From 0ae7b130d7ded5765b30cc6c6a37d8b882809a8c Mon Sep 17 00:00:00 2001 From: Hari Babu Kommi Date: Tue, 29 Aug 2017 10:34:34 +1000 Subject: [PATCH] Make parallel eligible for utility commands underneath queries This functionality adds the parallel query support to the queries that run as part of some of the utility commands such as CREATE MATERIALIZED VIEW, CREATE TABLE AS and etc. --- doc/src/sgml/parallel.sgml | 32 +++++++---- src/backend/access/heap/heapam.c | 10 ++-- src/backend/commands/createas.c | 4 +- src/backend/commands/explain.c | 4 +- src/backend/commands/matview.c | 2 +- src/backend/executor/execMain.c | 6 +-- src/test/regress/expected/write_parallel.out | 80 ++++++++++++++++++++++++++++ src/test/regress/parallel_schedule | 1 + src/test/regress/serial_schedule | 1 + src/test/regress/sql/write_parallel.sql | 43 +++++++++++++++ 10 files changed, 158 insertions(+), 25 deletions(-) create mode 100644 src/test/regress/expected/write_parallel.out create mode 100644 src/test/regress/sql/write_parallel.sql diff --git a/doc/src/sgml/parallel.sgml b/doc/src/sgml/parallel.sgml index 2a25f21..85f1d38 100644 --- a/doc/src/sgml/parallel.sgml +++ b/doc/src/sgml/parallel.sgml @@ -243,15 +243,6 @@ EXPLAIN SELECT * FROM pgbench_accounts WHERE filler LIKE '%x%'; - A prepared statement is executed using a CREATE TABLE .. AS - EXECUTE .. statement. This construct converts what otherwise - would have been a read-only operation into a read-write operation, - making it ineligible for parallel query. - - - - - The transaction isolation level is serializable. This situation does not normally arise, because parallel query plans are not generated when the transaction isolation level is serializable. @@ -391,7 +382,30 @@ EXPLAIN SELECT * FROM pgbench_accounts WHERE filler LIKE '%x%'; + + + Write Operations Using Parallel Plan + + Currently there are limited number of write operations + that uses benefits of parallelism. Those are, + + + + + + An utility statement that is used to create table or materialized view + such as CREATE TABLE .. AS or CREATE + MATERIALIZED VIEW .. AS and etc statements are supported. + These write operations not perfomed concurrently by the parallel workers, + but the underlying query that is used by these operations are eligible + for parallel plans. + + + + + + Parallel Plan Tips diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index e29c5ad..47de10e 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -2595,15 +2595,13 @@ heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid, CommandId cid, int options) { /* - * For now, parallel operations are required to be strictly read-only. - * Unlike heap_update() and heap_delete(), an insert should never create a - * combo CID, so it might be possible to relax this restriction, but not - * without more thought and testing. + * For now, parallel operations are required to be strictly read-only in + * parallel worker. */ - if (IsInParallelMode()) + if (IsParallelWorker()) ereport(ERROR, (errcode(ERRCODE_INVALID_TRANSACTION_STATE), - errmsg("cannot insert tuples during a parallel operation"))); + errmsg("cannot insert tuples in a parallel worker"))); if (relation->rd_rel->relhasoids) { diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c index e60210c..4d77411 100644 --- a/src/backend/commands/createas.c +++ b/src/backend/commands/createas.c @@ -326,8 +326,8 @@ ExecCreateTableAs(CreateTableAsStmt *stmt, const char *queryString, query = linitial_node(Query, rewritten); Assert(query->commandType == CMD_SELECT); - /* plan the query --- note we disallow parallelism */ - plan = pg_plan_query(query, 0, params); + /* plan the query */ + plan = pg_plan_query(query, CURSOR_OPT_PARALLEL_OK, params); /* * Use a snapshot with an updated command ID to ensure this query sees diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 4cee357..c13efc9 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -400,8 +400,6 @@ ExplainOneUtility(Node *utilityStmt, IntoClause *into, ExplainState *es, * We have to rewrite the contained SELECT and then pass it back to * ExplainOneQuery. It's probably not really necessary to copy the * contained parsetree another time, but let's be safe. - * - * Like ExecCreateTableAs, disallow parallelism in the plan. */ CreateTableAsStmt *ctas = (CreateTableAsStmt *) utilityStmt; List *rewritten; @@ -409,7 +407,7 @@ ExplainOneUtility(Node *utilityStmt, IntoClause *into, ExplainState *es, rewritten = QueryRewrite(castNode(Query, copyObject(ctas->query))); Assert(list_length(rewritten) == 1); ExplainOneQuery(linitial_node(Query, rewritten), - 0, ctas->into, es, + CURSOR_OPT_PARALLEL_OK, ctas->into, es, queryString, params, queryEnv); } else if (IsA(utilityStmt, DeclareCursorStmt)) diff --git a/src/backend/commands/matview.c b/src/backend/commands/matview.c index d2e0376..b6fa062 100644 --- a/src/backend/commands/matview.c +++ b/src/backend/commands/matview.c @@ -404,7 +404,7 @@ refresh_matview_datafill(DestReceiver *dest, Query *query, CHECK_FOR_INTERRUPTS(); /* Plan the query which will generate data for the refresh. */ - plan = pg_plan_query(query, 0, NULL); + plan = pg_plan_query(query, CURSOR_OPT_PARALLEL_OK, NULL); /* * Use a snapshot with an updated command ID to ensure this query sees diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 2946a0e..6458f0b 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -1689,11 +1689,9 @@ ExecutePlan(EState *estate, /* * If the plan might potentially be executed multiple times, we must force - * it to run without parallelism, because we might exit early. Also - * disable parallelism when writing into a relation, because no database - * changes are allowed in parallel mode. + * it to run without parallelism, because we might exit early. */ - if (!execute_once || dest->mydest == DestIntoRel) + if (!execute_once) use_parallel_mode = false; if (use_parallel_mode) diff --git a/src/test/regress/expected/write_parallel.out b/src/test/regress/expected/write_parallel.out new file mode 100644 index 0000000..e549cc2 --- /dev/null +++ b/src/test/regress/expected/write_parallel.out @@ -0,0 +1,80 @@ +-- +-- PARALLEL +-- +-- Serializable isolation would disable parallel query, so explicitly use an +-- arbitrary other level. +begin isolation level repeatable read; +-- encourage use of parallel plans +set parallel_setup_cost=0; +set parallel_tuple_cost=0; +set min_parallel_table_scan_size=0; +set max_parallel_workers_per_gather=4; +-- +-- Test write operations that has an underlying query that is eligble +-- for parallel plans +-- +explain (costs off) create table parallel_write as + select length(stringu1) from tenk1 group by length(stringu1); + QUERY PLAN +--------------------------------------------------- + Finalize HashAggregate + Group Key: (length((stringu1)::text)) + -> Gather + Workers Planned: 4 + -> Partial HashAggregate + Group Key: length((stringu1)::text) + -> Parallel Seq Scan on tenk1 +(7 rows) + +create table parallel_write as + select length(stringu1) from tenk1 group by length(stringu1); +drop table parallel_write; +explain (costs off) select length(stringu1) into parallel_write + from tenk1 group by length(stringu1); + QUERY PLAN +--------------------------------------------------- + Finalize HashAggregate + Group Key: (length((stringu1)::text)) + -> Gather + Workers Planned: 4 + -> Partial HashAggregate + Group Key: length((stringu1)::text) + -> Parallel Seq Scan on tenk1 +(7 rows) + +select length(stringu1) into parallel_write + from tenk1 group by length(stringu1); +drop table parallel_write; +explain (costs off) create materialized view parallel_mat_view as + select length(stringu1) from tenk1 group by length(stringu1); + QUERY PLAN +--------------------------------------------------- + Finalize HashAggregate + Group Key: (length((stringu1)::text)) + -> Gather + Workers Planned: 4 + -> Partial HashAggregate + Group Key: length((stringu1)::text) + -> Parallel Seq Scan on tenk1 +(7 rows) + +create materialized view parallel_mat_view as + select length(stringu1) from tenk1 group by length(stringu1); +Refresh materialized view parallel_mat_view; +drop materialized view parallel_mat_view; +prepare prep_stmt as select length(stringu1) from tenk1 group by length(stringu1); +explain (costs off) create table parallel_write as execute prep_stmt; + QUERY PLAN +--------------------------------------------------- + Finalize HashAggregate + Group Key: (length((stringu1)::text)) + -> Gather + Workers Planned: 4 + -> Partial HashAggregate + Group Key: length((stringu1)::text) + -> Parallel Seq Scan on tenk1 +(7 rows) + +create table parallel_write as execute prep_stmt; +drop table parallel_write; +rollback; diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index 2fd3f2b..860e8ab 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -96,6 +96,7 @@ test: rules psql_crosstab amutils # run by itself so it can run parallel workers test: select_parallel +test: write_parallel # no relation related tests can be put in this group test: publication subscription diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule index 76b0de3..ef275d0 100644 --- a/src/test/regress/serial_schedule +++ b/src/test/regress/serial_schedule @@ -134,6 +134,7 @@ test: stats_ext test: rules test: psql_crosstab test: select_parallel +test: write_parallel test: publication test: subscription test: amutils diff --git a/src/test/regress/sql/write_parallel.sql b/src/test/regress/sql/write_parallel.sql new file mode 100644 index 0000000..00f9156 --- /dev/null +++ b/src/test/regress/sql/write_parallel.sql @@ -0,0 +1,43 @@ +-- +-- PARALLEL +-- + +-- Serializable isolation would disable parallel query, so explicitly use an +-- arbitrary other level. +begin isolation level repeatable read; + +-- encourage use of parallel plans +set parallel_setup_cost=0; +set parallel_tuple_cost=0; +set min_parallel_table_scan_size=0; +set max_parallel_workers_per_gather=4; + +-- +-- Test write operations that has an underlying query that is eligble +-- for parallel plans +-- +explain (costs off) create table parallel_write as + select length(stringu1) from tenk1 group by length(stringu1); +create table parallel_write as + select length(stringu1) from tenk1 group by length(stringu1); +drop table parallel_write; + +explain (costs off) select length(stringu1) into parallel_write + from tenk1 group by length(stringu1); +select length(stringu1) into parallel_write + from tenk1 group by length(stringu1); +drop table parallel_write; + +explain (costs off) create materialized view parallel_mat_view as + select length(stringu1) from tenk1 group by length(stringu1); +create materialized view parallel_mat_view as + select length(stringu1) from tenk1 group by length(stringu1); +Refresh materialized view parallel_mat_view; +drop materialized view parallel_mat_view; + +prepare prep_stmt as select length(stringu1) from tenk1 group by length(stringu1); +explain (costs off) create table parallel_write as execute prep_stmt; +create table parallel_write as execute prep_stmt; +drop table parallel_write; + +rollback; -- 1.8.3.1