From 17a14e5ad128024b05ae288a5096148b3f114c98 Mon Sep 17 00:00:00 2001 From: Justin Pryzby Date: Sat, 6 Jun 2020 17:42:23 -0500 Subject: [PATCH v6 1/2] Allow CREATE INDEX CONCURRENTLY on partitioned table Note, this effectively reverts 050098b14, so take care to not reintroduce the bug it fixed. --- doc/src/sgml/ref/create_index.sgml | 9 -- src/backend/commands/indexcmds.c | 118 ++++++++++++++++++++----- src/test/regress/expected/indexing.out | 60 ++++++++++++- src/test/regress/sql/indexing.sql | 18 +++- 4 files changed, 165 insertions(+), 40 deletions(-) diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml index 33aa64e81d..c780dc9547 100644 --- a/doc/src/sgml/ref/create_index.sgml +++ b/doc/src/sgml/ref/create_index.sgml @@ -657,15 +657,6 @@ Indexes: cannot. - - Concurrent builds for indexes on partitioned tables are currently not - supported. However, you may concurrently build the index on each - partition individually and then finally create the partitioned index - non-concurrently in order to reduce the time where writes to the - partitioned table will be locked out. In this case, building the - partitioned index is a metadata only operation. - - diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index f1b5f87e6a..61d0c4914c 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -665,17 +665,6 @@ DefineIndex(Oid relationId, partitioned = rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE; if (partitioned) { - /* - * Note: we check 'stmt->concurrent' rather than 'concurrent', so that - * the error is thrown also for temporary tables. Seems better to be - * consistent, even though we could do it on temporary table because - * we're not actually doing it concurrently. - */ - if (stmt->concurrent) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot create index on partitioned table \"%s\" concurrently", - RelationGetRelationName(rel)))); if (stmt->excludeOpNames) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), @@ -1110,6 +1099,11 @@ DefineIndex(Oid relationId, if (pd->nparts != 0) flags |= INDEX_CREATE_INVALID; } + else if (concurrent && OidIsValid(parentIndexId)) + { + /* If concurrent, initial build of index partitions as "invalid" */ + flags |= INDEX_CREATE_INVALID; + } if (stmt->deferrable) constr_flags |= INDEX_CONSTR_CREATE_DEFERRABLE; @@ -1162,6 +1156,14 @@ DefineIndex(Oid relationId, */ if (!stmt->relation || stmt->relation->inh) { + /* + * Need to close the relation before recursing into children, so + * copy needed data into a longlived context. + */ + + MemoryContext ind_context = AllocSetContextCreate(PortalContext, "CREATE INDEX", + ALLOCSET_DEFAULT_SIZES); + MemoryContext oldcontext = MemoryContextSwitchTo(ind_context); PartitionDesc partdesc = RelationGetPartitionDesc(rel); int nparts = partdesc->nparts; Oid *part_oids = palloc(sizeof(Oid) * nparts); @@ -1173,8 +1175,10 @@ DefineIndex(Oid relationId, nparts); memcpy(part_oids, partdesc->oids, sizeof(Oid) * nparts); + parentDesc = CreateTupleDescCopy(RelationGetDescr(rel)); + table_close(rel, NoLock); + MemoryContextSwitchTo(oldcontext); - parentDesc = RelationGetDescr(rel); opfamOids = palloc(sizeof(Oid) * numberOfKeyAttributes); for (i = 0; i < numberOfKeyAttributes; i++) opfamOids[i] = get_opclass_family(classObjectId[i]); @@ -1217,10 +1221,12 @@ DefineIndex(Oid relationId, continue; } + oldcontext = MemoryContextSwitchTo(ind_context); childidxs = RelationGetIndexList(childrel); attmap = build_attrmap_by_name(RelationGetDescr(childrel), parentDesc); + MemoryContextSwitchTo(oldcontext); foreach(cell, childidxs) { @@ -1291,10 +1297,14 @@ DefineIndex(Oid relationId, */ if (!found) { - IndexStmt *childStmt = copyObject(stmt); + IndexStmt *childStmt; bool found_whole_row; ListCell *lc; + oldcontext = MemoryContextSwitchTo(ind_context); + childStmt = copyObject(stmt); + MemoryContextSwitchTo(oldcontext); + /* * We can't use the same index name for the child index, * so clear idxname to let the recursive invocation choose @@ -1306,8 +1316,8 @@ DefineIndex(Oid relationId, childStmt->relation = NULL; childStmt->indexOid = InvalidOid; childStmt->oldNode = InvalidOid; - childStmt->oldCreateSubid = InvalidSubTransactionId; - childStmt->oldFirstRelfilenodeSubid = InvalidSubTransactionId; + // childStmt->oldCreateSubid = InvalidSubTransactionId; + // childStmt->oldFirstRelfilenodeSubid = InvalidSubTransactionId; /* * Adjust any Vars (both in expressions and in the index's @@ -1346,10 +1356,18 @@ DefineIndex(Oid relationId, createdConstraintId, is_alter_table, check_rights, check_not_in_use, skip_build, quiet); + if (concurrent) + { + PopActiveSnapshot(); + PushActiveSnapshot(GetTransactionSnapshot()); + invalidate_parent = true; + } } - pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, - i + 1); + /* For concurrent build, this is a catalog-only stage */ + if (!concurrent) + pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, + i + 1); free_attrmap(attmap); } @@ -1376,34 +1394,86 @@ DefineIndex(Oid relationId, table_close(pg_index, RowExclusiveLock); heap_freetuple(newtup); } + } else + table_close(rel, NoLock); + + if (concurrent) + { + List *childs; + ListCell *lc; + int npart = 0; + + /* Reindex invalid child indexes created earlier */ + MemoryContext ind_context = AllocSetContextCreate(PortalContext, "CREATE INDEX", + ALLOCSET_DEFAULT_SIZES); + MemoryContext oldcontext = MemoryContextSwitchTo(ind_context); + childs = find_inheritance_children(indexRelationId, NoLock); + MemoryContextSwitchTo(oldcontext); + + /* Make the catalog changes visible to get_partition_parent */ + PopActiveSnapshot(); + CommandCounterIncrement(); + + foreach (lc, childs) + { + Oid indexrelid = lfirst_oid(lc); + + if (!OidIsValid(parentIndexId)) + pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, + npart++); + + CommandCounterIncrement(); + PushActiveSnapshot(GetTransactionSnapshot()); + + if (get_index_isvalid(indexrelid)) + { + PopActiveSnapshot(); + continue; + } + + /* This may be a partitioned index, which is fine too */ + ReindexRelationConcurrently(indexrelid, 0); + } + + /* + * CIC needs to mark a partitioned index as VALID, which itself + * requires setting READY, which is unset for CIC (even though + * it's meaningless for an index without storage). + */ + CommitTransactionCommand(); + StartTransactionCommand(); + PushActiveSnapshot(GetTransactionSnapshot()); + index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY); + CommandCounterIncrement(); + index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID); } /* * Indexes on partitioned tables are not themselves built, so we're * done here. */ - table_close(rel, NoLock); if (!OidIsValid(parentIndexId)) pgstat_progress_end_command(); return address; } - if (!concurrent) + table_close(rel, NoLock); + if (!concurrent || OidIsValid(parentIndexId)) { - /* Close the heap and we're done, in the non-concurrent case */ - table_close(rel, NoLock); + /* + * We're done if this is the top-level index, + * or the catalog-only phase of a partition built concurrently + */ - /* If this is the top-level index, we're done. */ if (!OidIsValid(parentIndexId)) pgstat_progress_end_command(); return address; } - /* save lockrelid and locktag for below, then close rel */ + /* save lockrelid and locktag for below */ heaprelid = rel->rd_lockInfo.lockRelId; SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId); - table_close(rel, NoLock); /* * For a concurrent build, it's important to make the catalog entries diff --git a/src/test/regress/expected/indexing.out b/src/test/regress/expected/indexing.out index 7e78a07af8..f6eba3dd40 100644 --- a/src/test/regress/expected/indexing.out +++ b/src/test/regress/expected/indexing.out @@ -50,11 +50,63 @@ select relname, relkind, relhassubclass, inhparent::regclass (8 rows) drop table idxpart; --- Some unsupported features +-- CIC on partitioned table create table idxpart (a int, b int, c text) partition by range (a); -create table idxpart1 partition of idxpart for values from (0) to (10); -create index concurrently on idxpart (a); -ERROR: cannot create index on partitioned table "idxpart" concurrently +create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a); +create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a); +create table idxpart111 partition of idxpart11 default partition by range(a); +create table idxpart1111 partition of idxpart111 default partition by range(a); +create table idxpart2 partition of idxpart for values from (10) to (20); +insert into idxpart2 values(10),(10); -- not unique +create index concurrently on idxpart (a); -- partitioned +create index concurrently on idxpart1 (a); -- partitioned and partition +create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves +create index concurrently on idxpart2 (a); -- leaf +create unique index concurrently on idxpart (a); -- partitioned, unique failure +ERROR: could not create unique index "idxpart2_a_idx2_ccnew" +DETAIL: Key (a)=(10) is duplicated. +\d idxpart + Partitioned table "public.idxpart" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | integer | | | + c | text | | | +Partition key: RANGE (a) +Indexes: + "idxpart_a_idx" btree (a) + "idxpart_a_idx1" UNIQUE, btree (a) INVALID +Number of partitions: 2 (Use \d+ to list them.) + +\d idxpart1 + Partitioned table "public.idxpart1" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | integer | | | + c | text | | | +Partition of: idxpart FOR VALUES FROM (0) TO (10) +Partition key: RANGE (a) +Indexes: + "idxpart1_a_idx" btree (a) + "idxpart1_a_idx1" btree (a) + "idxpart1_a_idx2" UNIQUE, btree (a) +Number of partitions: 1 (Use \d+ to list them.) + +\d idxpart2 + Table "public.idxpart2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | integer | | | + c | text | | | +Partition of: idxpart FOR VALUES FROM (10) TO (20) +Indexes: + "idxpart2_a_idx" btree (a) + "idxpart2_a_idx1" btree (a) + "idxpart2_a_idx2" UNIQUE, btree (a) INVALID + "idxpart2_a_idx2_ccnew" UNIQUE, btree (a) INVALID + drop table idxpart; -- Verify bugfix with query on indexed partitioned table with no partitions -- https://postgr.es/m/20180124162006.pmapfiznhgngwtjf@alvherre.pgsql diff --git a/src/test/regress/sql/indexing.sql b/src/test/regress/sql/indexing.sql index 42f398b67c..3d4b6e9bc9 100644 --- a/src/test/regress/sql/indexing.sql +++ b/src/test/regress/sql/indexing.sql @@ -29,10 +29,22 @@ select relname, relkind, relhassubclass, inhparent::regclass where relname like 'idxpart%' order by relname; drop table idxpart; --- Some unsupported features +-- CIC on partitioned table create table idxpart (a int, b int, c text) partition by range (a); -create table idxpart1 partition of idxpart for values from (0) to (10); -create index concurrently on idxpart (a); +create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a); +create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a); +create table idxpart111 partition of idxpart11 default partition by range(a); +create table idxpart1111 partition of idxpart111 default partition by range(a); +create table idxpart2 partition of idxpart for values from (10) to (20); +insert into idxpart2 values(10),(10); -- not unique +create index concurrently on idxpart (a); -- partitioned +create index concurrently on idxpart1 (a); -- partitioned and partition +create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves +create index concurrently on idxpart2 (a); -- leaf +create unique index concurrently on idxpart (a); -- partitioned, unique failure +\d idxpart +\d idxpart1 +\d idxpart2 drop table idxpart; -- Verify bugfix with query on indexed partitioned table with no partitions -- 2.17.0