From b30bdbcb49ea0dfdb2b5780edb42796cc6f66de2 Mon Sep 17 00:00:00 2001 From: Bharath Rupireddy Date: Mon, 21 Dec 2020 11:52:22 +0530 Subject: [PATCH v3] Fail Fast In CTAS/CMV If Relation Already Exists Currently, for CTAS or CREATE MATERIALIZED VIEW(CMV) without IF-NOT-EXISTS clause, the existence of the relation (either table or materialized view) gets checked during execution and an error is thrown there. All the unnecessary rewrite and planning for the SELECT part of the query have happened just to fail later. However, if IF-NOT-EXISTS clause is present, then a notice is issued and returned immediately without rewrite and planning further. This seems somewhat inconsistent. This patch propose to check the relation existence early in ExecCreateTableAs() as well as in ExplainOneUtility() and throw an error in case it exists already to avoid unnecessary rewrite, planning and execution of the SELECT part. --- src/backend/commands/createas.c | 70 ++++++++++++++++++----- src/backend/commands/explain.c | 12 ++++ src/include/commands/createas.h | 2 + src/test/regress/expected/matview.out | 52 +++++++++++++++++ src/test/regress/expected/select_into.out | 49 ++++++++++++++++ src/test/regress/sql/matview.sql | 54 +++++++++++++++++ src/test/regress/sql/select_into.sql | 51 +++++++++++++++++ 7 files changed, 276 insertions(+), 14 deletions(-) diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c index 6bf6c5a310..e22e6ee620 100644 --- a/src/backend/commands/createas.c +++ b/src/backend/commands/createas.c @@ -238,22 +238,21 @@ ExecCreateTableAs(ParseState *pstate, CreateTableAsStmt *stmt, List *rewritten; PlannedStmt *plan; QueryDesc *queryDesc; + bool rel_exists; - if (stmt->if_not_exists) - { - Oid nspid; - - nspid = RangeVarGetCreationNamespace(stmt->into->rel); + /* + * Check if the relation that is going to get created exists already. If + * yes and there is no if-not-exists clause, then emit error early so that + * we can avoid planning for the SELECT part unnecessarily which happens + * otherwise only to fail later. + * + * If there is if-not-exists clause, we issue a NOTICE and return from here + * with invalid object address. + */ + rel_exists = CheckRelExistenceInCTAS(stmt, false); - if (get_relname_relid(stmt->into->rel->relname, nspid)) - { - ereport(NOTICE, - (errcode(ERRCODE_DUPLICATE_TABLE), - errmsg("relation \"%s\" already exists, skipping", - stmt->into->rel->relname))); - return InvalidObjectAddress; - } - } + if (stmt->if_not_exists && rel_exists) + return InvalidObjectAddress; /* * Create the tuple receiver object and insert info it will need @@ -400,6 +399,49 @@ GetIntoRelEFlags(IntoClause *intoClause) return flags; } +/* + * CheckRelExistenceInCTAS --- check whether a specified relation in CTAS + * already exists. + * + * We do this before any planning occurs for the SELECT part to avoid planning + * cost. If the relation exists: 1) issue NOTICE and return true for plain i.e. + * non-explain queries with if-not-exists clause. 2) emit an ERROR for both + * plain queries without if-not-exists clause and explain queries irrespective + * of if-not-exists clause. Return false if the relation does not exist. + */ +bool +CheckRelExistenceInCTAS(CreateTableAsStmt *ctas, bool is_explain) +{ + Oid nspid; + + nspid = RangeVarGetCreationNamespace(ctas->into->rel); + + if (get_relname_relid(ctas->into->rel->relname, nspid)) + { + /* + * Issue notice in case if-not-exists clause is present, otherwise an + * error is emitted. However for explains we throw error irrespective + * of if-not-exists clause, as we do not want to show a notice and an + * empty plan in the output which can happen if a notice is issued. + */ + if (!is_explain && ctas->if_not_exists) + ereport(NOTICE, + (errcode(ERRCODE_DUPLICATE_TABLE), + errmsg("relation \"%s\" already exists, skipping", + ctas->into->rel->relname))); + else + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_TABLE), + errmsg("relation \"%s\" already exists", + ctas->into->rel->relname))); + + return true; + } + + /* Relation does not exist. */ + return false; +} + /* * CreateIntoRelDestReceiver -- create a suitable DestReceiver object * diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 43f9b01e83..9238a4a3a6 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -435,6 +435,18 @@ ExplainOneUtility(Node *utilityStmt, IntoClause *into, ExplainState *es, CreateTableAsStmt *ctas = (CreateTableAsStmt *) utilityStmt; List *rewritten; + /* + * Check if the relation that is going to get created exists already. + * If yes, then emit error early so that we can avoid planning for + * the SELECT part unnecessarily which happens otherwise only to fail + * later. + * + * We intentionally ignore the return value of CheckRelExistenceInCTAS + * as it returns for explains only when the relation does not exist + * otherwise it throws an error and does not come here. + */ + (void) CheckRelExistenceInCTAS(ctas, true); + rewritten = QueryRewrite(castNode(Query, copyObject(ctas->query))); Assert(list_length(rewritten) == 1); ExplainOneQuery(linitial_node(Query, rewritten), diff --git a/src/include/commands/createas.h b/src/include/commands/createas.h index 7629230254..33d4d9701f 100644 --- a/src/include/commands/createas.h +++ b/src/include/commands/createas.h @@ -29,4 +29,6 @@ extern int GetIntoRelEFlags(IntoClause *intoClause); extern DestReceiver *CreateIntoRelDestReceiver(IntoClause *intoClause); +extern bool CheckRelExistenceInCTAS(CreateTableAsStmt *ctas, bool is_explain); + #endif /* CREATEAS_H */ diff --git a/src/test/regress/expected/matview.out b/src/test/regress/expected/matview.out index 2c0760404d..d1c79bec4e 100644 --- a/src/test/regress/expected/matview.out +++ b/src/test/regress/expected/matview.out @@ -630,3 +630,55 @@ drop cascades to materialized view matview_schema.mv_withdata2 drop cascades to materialized view matview_schema.mv_nodata1 drop cascades to materialized view matview_schema.mv_nodata2 DROP USER regress_matview_user; +-- Tests for CMV output behaviour in case the target relation already exists. +CREATE MATERIALIZED VIEW re_mv1 AS SELECT * FROM generate_series (1,5); +CREATE MATERIALIZED VIEW re_mv2 AS SELECT * FROM re_mv1; +-- error out +CREATE MATERIALIZED VIEW re_mv2 AS SELECT * FROM re_mv1; +ERROR: relation "re_mv2" already exists +-- issue notice +CREATE MATERIALIZED VIEW IF NOT EXISTS re_mv2 AS SELECT * FROM re_mv1; +NOTICE: relation "re_mv2" already exists, skipping +-- error out +CREATE MATERIALIZED VIEW re_mv2 AS SELECT * FROM re_mv1 WITH NO DATA; +ERROR: relation "re_mv2" already exists +-- issue notice +CREATE MATERIALIZED VIEW IF NOT EXISTS re_mv2 AS + SELECT * FROM re_mv1 WITH NO DATA; +NOTICE: relation "re_mv2" already exists, skipping +-- error out +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +CREATE MATERIALIZED VIEW re_mv2 AS SELECT * FROM re_mv1; +ERROR: relation "re_mv2" already exists +-- error out +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +CREATE MATERIALIZED VIEW IF NOT EXISTS re_mv2 AS SELECT * FROM re_mv1; +ERROR: relation "re_mv2" already exists +-- error out +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +CREATE MATERIALIZED VIEW re_mv2 AS SELECT * FROM re_mv1 WITH NO DATA; +ERROR: relation "re_mv2" already exists +-- error out +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +CREATE MATERIALIZED VIEW IF NOT EXISTS re_mv2 AS + SELECT * FROM re_mv1 WITH NO DATA; +ERROR: relation "re_mv2" already exists +-- error out +EXPLAIN (COSTS OFF, SUMMARY OFF, TIMING OFF) +CREATE MATERIALIZED VIEW re_mv2 AS SELECT * FROM re_mv1; +ERROR: relation "re_mv2" already exists +-- error out +EXPLAIN (COSTS OFF, SUMMARY OFF, TIMING OFF) +CREATE MATERIALIZED VIEW IF NOT EXISTS re_mv2 AS SELECT * FROM re_mv1; +ERROR: relation "re_mv2" already exists +-- error out +EXPLAIN (COSTS OFF, SUMMARY OFF, TIMING OFF) +CREATE MATERIALIZED VIEW re_mv2 AS SELECT * FROM re_mv1 WITH NO DATA; +ERROR: relation "re_mv2" already exists +-- error out +EXPLAIN (COSTS OFF, SUMMARY OFF, TIMING OFF) +CREATE MATERIALIZED VIEW IF NOT EXISTS re_mv2 AS + SELECT * FROM re_mv1 WITH NO DATA; +ERROR: relation "re_mv2" already exists +DROP MATERIALIZED VIEW re_mv2; +DROP MATERIALIZED VIEW re_mv1; diff --git a/src/test/regress/expected/select_into.out b/src/test/regress/expected/select_into.out index bf5c6bea04..22646b0bda 100644 --- a/src/test/regress/expected/select_into.out +++ b/src/test/regress/expected/select_into.out @@ -178,3 +178,52 @@ INSERT INTO b SELECT 1 INTO f; ERROR: SELECT ... INTO is not allowed here LINE 1: INSERT INTO b SELECT 1 INTO f; ^ +-- Tests for CTAS output behaviour in case the target relation already exists. +CREATE TABLE re_tbl1 AS SELECT * FROM generate_series (1,5); +CREATE TABLE re_tbl2 AS SELECT * FROM re_tbl1; +-- error out +CREATE TABLE re_tbl2 AS SELECT * FROM re_tbl1; +ERROR: relation "re_tbl2" already exists +-- issue notice +CREATE TABLE IF NOT EXISTS re_tbl2 AS SELECT * FROM re_tbl1; +NOTICE: relation "re_tbl2" already exists, skipping +-- error out +CREATE TABLE re_tbl2 AS SELECT * FROM re_tbl1 WITH NO DATA; +ERROR: relation "re_tbl2" already exists +-- issue notice +CREATE TABLE IF NOT EXISTS re_tbl2 AS SELECT * FROM re_tbl1 WITH NO DATA; +NOTICE: relation "re_tbl2" already exists, skipping +-- error out +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +CREATE TABLE re_tbl2 AS SELECT * FROM re_tbl1; +ERROR: relation "re_tbl2" already exists +-- error out +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +CREATE TABLE IF NOT EXISTS re_tbl2 AS SELECT * FROM re_tbl1; +ERROR: relation "re_tbl2" already exists +-- error out +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +CREATE TABLE re_tbl2 AS SELECT * FROM re_tbl1 WITH NO DATA; +ERROR: relation "re_tbl2" already exists +-- error out +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +CREATE TABLE IF NOT EXISTS re_tbl2 AS SELECT * FROM re_tbl1 WITH NO DATA; +ERROR: relation "re_tbl2" already exists +-- error out +EXPLAIN (COSTS OFF, SUMMARY OFF, TIMING OFF) +CREATE TABLE re_tbl2 AS SELECT * FROM re_tbl1; +ERROR: relation "re_tbl2" already exists +-- error out +EXPLAIN (COSTS OFF, SUMMARY OFF, TIMING OFF) +CREATE TABLE IF NOT EXISTS re_tbl2 AS SELECT * FROM re_tbl1; +ERROR: relation "re_tbl2" already exists +-- error out +EXPLAIN (COSTS OFF, SUMMARY OFF, TIMING OFF) +CREATE TABLE re_tbl2 AS SELECT * FROM re_tbl1 WITH NO DATA; +ERROR: relation "re_tbl2" already exists +-- error out +EXPLAIN (COSTS OFF, SUMMARY OFF, TIMING OFF) +CREATE TABLE IF NOT EXISTS re_tbl2 AS SELECT * FROM re_tbl1 WITH NO DATA; +ERROR: relation "re_tbl2" already exists +DROP TABLE re_tbl1; +DROP TABLE re_tbl2; diff --git a/src/test/regress/sql/matview.sql b/src/test/regress/sql/matview.sql index 70c4954d89..5109125429 100644 --- a/src/test/regress/sql/matview.sql +++ b/src/test/regress/sql/matview.sql @@ -264,3 +264,57 @@ ALTER DEFAULT PRIVILEGES FOR ROLE regress_matview_user DROP SCHEMA matview_schema CASCADE; DROP USER regress_matview_user; + +-- Tests for CMV output behaviour in case the target relation already exists. +CREATE MATERIALIZED VIEW re_mv1 AS SELECT * FROM generate_series (1,5); +CREATE MATERIALIZED VIEW re_mv2 AS SELECT * FROM re_mv1; + +-- error out +CREATE MATERIALIZED VIEW re_mv2 AS SELECT * FROM re_mv1; + +-- issue notice +CREATE MATERIALIZED VIEW IF NOT EXISTS re_mv2 AS SELECT * FROM re_mv1; + +-- error out +CREATE MATERIALIZED VIEW re_mv2 AS SELECT * FROM re_mv1 WITH NO DATA; + +-- issue notice +CREATE MATERIALIZED VIEW IF NOT EXISTS re_mv2 AS + SELECT * FROM re_mv1 WITH NO DATA; + +-- error out +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +CREATE MATERIALIZED VIEW re_mv2 AS SELECT * FROM re_mv1; + +-- error out +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +CREATE MATERIALIZED VIEW IF NOT EXISTS re_mv2 AS SELECT * FROM re_mv1; + +-- error out +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +CREATE MATERIALIZED VIEW re_mv2 AS SELECT * FROM re_mv1 WITH NO DATA; + +-- error out +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +CREATE MATERIALIZED VIEW IF NOT EXISTS re_mv2 AS + SELECT * FROM re_mv1 WITH NO DATA; + +-- error out +EXPLAIN (COSTS OFF, SUMMARY OFF, TIMING OFF) +CREATE MATERIALIZED VIEW re_mv2 AS SELECT * FROM re_mv1; + +-- error out +EXPLAIN (COSTS OFF, SUMMARY OFF, TIMING OFF) +CREATE MATERIALIZED VIEW IF NOT EXISTS re_mv2 AS SELECT * FROM re_mv1; + +-- error out +EXPLAIN (COSTS OFF, SUMMARY OFF, TIMING OFF) +CREATE MATERIALIZED VIEW re_mv2 AS SELECT * FROM re_mv1 WITH NO DATA; + +-- error out +EXPLAIN (COSTS OFF, SUMMARY OFF, TIMING OFF) +CREATE MATERIALIZED VIEW IF NOT EXISTS re_mv2 AS + SELECT * FROM re_mv1 WITH NO DATA; + +DROP MATERIALIZED VIEW re_mv2; +DROP MATERIALIZED VIEW re_mv1; diff --git a/src/test/regress/sql/select_into.sql b/src/test/regress/sql/select_into.sql index 6c170ef968..db997dbcef 100644 --- a/src/test/regress/sql/select_into.sql +++ b/src/test/regress/sql/select_into.sql @@ -115,3 +115,54 @@ COPY (SELECT 1 INTO frak UNION SELECT 2) TO 'blob'; SELECT * FROM (SELECT 1 INTO f) bar; CREATE VIEW foo AS SELECT 1 INTO b; INSERT INTO b SELECT 1 INTO f; + +-- Tests for CTAS output behaviour in case the target relation already exists. +CREATE TABLE re_tbl1 AS SELECT * FROM generate_series (1,5); +CREATE TABLE re_tbl2 AS SELECT * FROM re_tbl1; + +-- error out +CREATE TABLE re_tbl2 AS SELECT * FROM re_tbl1; + +-- issue notice +CREATE TABLE IF NOT EXISTS re_tbl2 AS SELECT * FROM re_tbl1; + +-- error out +CREATE TABLE re_tbl2 AS SELECT * FROM re_tbl1 WITH NO DATA; + +-- issue notice +CREATE TABLE IF NOT EXISTS re_tbl2 AS SELECT * FROM re_tbl1 WITH NO DATA; + +-- error out +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +CREATE TABLE re_tbl2 AS SELECT * FROM re_tbl1; + +-- error out +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +CREATE TABLE IF NOT EXISTS re_tbl2 AS SELECT * FROM re_tbl1; + +-- error out +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +CREATE TABLE re_tbl2 AS SELECT * FROM re_tbl1 WITH NO DATA; + +-- error out +EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) +CREATE TABLE IF NOT EXISTS re_tbl2 AS SELECT * FROM re_tbl1 WITH NO DATA; + +-- error out +EXPLAIN (COSTS OFF, SUMMARY OFF, TIMING OFF) +CREATE TABLE re_tbl2 AS SELECT * FROM re_tbl1; + +-- error out +EXPLAIN (COSTS OFF, SUMMARY OFF, TIMING OFF) +CREATE TABLE IF NOT EXISTS re_tbl2 AS SELECT * FROM re_tbl1; + +-- error out +EXPLAIN (COSTS OFF, SUMMARY OFF, TIMING OFF) +CREATE TABLE re_tbl2 AS SELECT * FROM re_tbl1 WITH NO DATA; + +-- error out +EXPLAIN (COSTS OFF, SUMMARY OFF, TIMING OFF) +CREATE TABLE IF NOT EXISTS re_tbl2 AS SELECT * FROM re_tbl1 WITH NO DATA; + +DROP TABLE re_tbl1; +DROP TABLE re_tbl2; -- 2.25.1