From a40c1bc3d63323a9417374bb7467b8c04d011912 Mon Sep 17 00:00:00 2001 From: Justin Pryzby Date: Sat, 6 Jun 2020 17:42:23 -0500 Subject: [PATCH v5 2/3] Allow CREATE INDEX CONCURRENTLY on partitioned table Note, this effectively reverts 050098b14, so take care to not reintroduce the bug it fixed. --- doc/src/sgml/ref/create_index.sgml | 9 --- src/backend/commands/indexcmds.c | 104 +++++++++++++++++++------ src/test/regress/expected/indexing.out | 57 +++++++++++++- src/test/regress/sql/indexing.sql | 16 +++- 4 files changed, 148 insertions(+), 38 deletions(-) diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml index 33aa64e81d..c780dc9547 100644 --- a/doc/src/sgml/ref/create_index.sgml +++ b/doc/src/sgml/ref/create_index.sgml @@ -657,15 +657,6 @@ Indexes: cannot. - - Concurrent builds for indexes on partitioned tables are currently not - supported. However, you may concurrently build the index on each - partition individually and then finally create the partitioned index - non-concurrently in order to reduce the time where writes to the - partitioned table will be locked out. In this case, building the - partitioned index is a metadata only operation. - - diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 68d75916ae..cdf31a47d8 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -655,17 +655,6 @@ DefineIndex(Oid relationId, partitioned = rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE; if (partitioned) { - /* - * Note: we check 'stmt->concurrent' rather than 'concurrent', so that - * the error is thrown also for temporary tables. Seems better to be - * consistent, even though we could do it on temporary table because - * we're not actually doing it concurrently. - */ - if (stmt->concurrent) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot create index on partitioned table \"%s\" concurrently", - RelationGetRelationName(rel)))); if (stmt->excludeOpNames) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), @@ -1100,6 +1089,9 @@ DefineIndex(Oid relationId, if (pd->nparts != 0) flags |= INDEX_CREATE_INVALID; } + else if (concurrent && OidIsValid(parentIndexId)) + /* Initial concurrent build of index partition is invalid */ + flags |= INDEX_CREATE_INVALID; if (stmt->deferrable) constr_flags |= INDEX_CONSTR_CREATE_DEFERRABLE; @@ -1152,8 +1144,17 @@ DefineIndex(Oid relationId, */ if (!stmt->relation || stmt->relation->inh) { + /* + * Need to close the relation before recursing into children, so + * copy needed data into a longlived context. + */ + + MemoryContext ind_context = AllocSetContextCreate(PortalContext, "CREATE INDEX", + ALLOCSET_DEFAULT_SIZES); + MemoryContext oldcontext = MemoryContextSwitchTo(ind_context); PartitionDesc partdesc = RelationGetPartitionDesc(rel); int nparts = partdesc->nparts; + char *relname = pstrdup(RelationGetRelationName(rel)); Oid *part_oids = palloc(sizeof(Oid) * nparts); bool invalidate_parent = false; TupleDesc parentDesc; @@ -1163,8 +1164,10 @@ DefineIndex(Oid relationId, nparts); memcpy(part_oids, partdesc->oids, sizeof(Oid) * nparts); + parentDesc = CreateTupleDescCopy(RelationGetDescr(rel)); + table_close(rel, NoLock); + MemoryContextSwitchTo(oldcontext); - parentDesc = RelationGetDescr(rel); opfamOids = palloc(sizeof(Oid) * numberOfKeyAttributes); for (i = 0; i < numberOfKeyAttributes; i++) opfamOids[i] = get_opclass_family(classObjectId[i]); @@ -1199,18 +1202,20 @@ DefineIndex(Oid relationId, ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot create unique index on partitioned table \"%s\"", - RelationGetRelationName(rel)), + relname), errdetail("Table \"%s\" contains partitions that are foreign tables.", - RelationGetRelationName(rel)))); + relname))); table_close(childrel, lockmode); continue; } + oldcontext = MemoryContextSwitchTo(ind_context); childidxs = RelationGetIndexList(childrel); attmap = build_attrmap_by_name(RelationGetDescr(childrel), parentDesc); + MemoryContextSwitchTo(oldcontext); foreach(cell, childidxs) { @@ -1336,10 +1341,17 @@ DefineIndex(Oid relationId, createdConstraintId, is_alter_table, check_rights, check_not_in_use, skip_build, quiet); + if (concurrent) + { + PushActiveSnapshot(GetTransactionSnapshot()); + invalidate_parent = true; + } } - pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, - i + 1); + /* For concurrent build, this is a catalog-only stage */ + if (!concurrent) + pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, + i + 1); free_attrmap(attmap); } @@ -1366,23 +1378,72 @@ DefineIndex(Oid relationId, table_close(pg_index, RowExclusiveLock); heap_freetuple(newtup); } + } else + table_close(rel, NoLock); + + if (concurrent) + { + List *childs; + ListCell *lc; + int npart = 0; + + /* Reindex invalid child indexes created earlier */ + MemoryContext ind_context = AllocSetContextCreate(PortalContext, "CREATE INDEX", + ALLOCSET_DEFAULT_SIZES); + MemoryContext oldcontext = MemoryContextSwitchTo(ind_context); + childs = find_inheritance_children(indexRelationId, NoLock); + MemoryContextSwitchTo(oldcontext); + + /* Make the catalog changes visible to get_partition_parent */ + CommandCounterIncrement(); + + foreach (lc, childs) + { + Oid indexrelid = lfirst_oid(lc); + Relation cldidx; + bool isvalid; + + if (!OidIsValid(parentIndexId)) + pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE, + npart++); + + cldidx = index_open(indexrelid, ShareUpdateExclusiveLock); + isvalid = cldidx->rd_index->indisvalid; + index_close(cldidx, NoLock); + if (isvalid) + continue; + + /* This may be a partitioned index, which is fine too */ + ReindexRelationConcurrently(indexrelid, 0); + PushActiveSnapshot(GetTransactionSnapshot()); + } + + PopActiveSnapshot(); + CommitTransactionCommand(); + StartTransactionCommand(); + + /* + * CIC needs to mark a partitioned index as VALID, which itself + * requires setting READY, which is unset for CIC (even though + * it's meaningless for an index without storage). + */ + index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY); + CommandCounterIncrement(); + index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID); } /* * Indexes on partitioned tables are not themselves built, so we're * done here. */ - table_close(rel, NoLock); if (!OidIsValid(parentIndexId)) pgstat_progress_end_command(); return address; } + table_close(rel, NoLock); if (!concurrent) { - /* Close the heap and we're done, in the non-concurrent case */ - table_close(rel, NoLock); - /* If this is the top-level index, we're done. */ if (!OidIsValid(parentIndexId)) pgstat_progress_end_command(); @@ -1390,10 +1451,9 @@ DefineIndex(Oid relationId, return address; } - /* save lockrelid and locktag for below, then close rel */ + /* save lockrelid and locktag for below */ heaprelid = rel->rd_lockInfo.lockRelId; SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId); - table_close(rel, NoLock); /* * For a concurrent build, it's important to make the catalog entries diff --git a/src/test/regress/expected/indexing.out b/src/test/regress/expected/indexing.out index f78865ef81..1ae58e110f 100644 --- a/src/test/regress/expected/indexing.out +++ b/src/test/regress/expected/indexing.out @@ -50,11 +50,60 @@ select relname, relkind, relhassubclass, inhparent::regclass (8 rows) drop table idxpart; --- Some unsupported features +-- CIC on partitioned table create table idxpart (a int, b int, c text) partition by range (a); -create table idxpart1 partition of idxpart for values from (0) to (10); -create index concurrently on idxpart (a); -ERROR: cannot create index on partitioned table "idxpart" concurrently +create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a); +create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a); +create table idxpart2 partition of idxpart for values from (10) to (20); +insert into idxpart2 values(10),(10); -- not unique +create index concurrently on idxpart (a); -- partitioned +create index concurrently on idxpart1 (a); -- partitioned and partition +create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves +create index concurrently on idxpart2 (a); -- leaf +create unique index concurrently on idxpart (a); -- partitioned, unique failure +ERROR: could not create unique index "idxpart2_a_idx2" +DETAIL: Key (a)=(10) is duplicated. +\d idxpart + Partitioned table "public.idxpart" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | integer | | | + c | text | | | +Partition key: RANGE (a) +Indexes: + "idxpart_a_idx" btree (a) + "idxpart_a_idx1" UNIQUE, btree (a) INVALID +Number of partitions: 2 (Use \d+ to list them.) + +\d idxpart1 + Partitioned table "public.idxpart1" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | integer | | | + c | text | | | +Partition of: idxpart FOR VALUES FROM (0) TO (10) +Partition key: RANGE (a) +Indexes: + "idxpart1_a_idx" btree (a) + "idxpart1_a_idx1" btree (a) + "idxpart1_a_idx2" UNIQUE, btree (a) +Number of partitions: 1 (Use \d+ to list them.) + +\d idxpart2 + Table "public.idxpart2" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + a | integer | | | + b | integer | | | + c | text | | | +Partition of: idxpart FOR VALUES FROM (10) TO (20) +Indexes: + "idxpart2_a_idx" btree (a) + "idxpart2_a_idx1" btree (a) + "idxpart2_a_idx2" UNIQUE, btree (a) INVALID + drop table idxpart; -- Verify bugfix with query on indexed partitioned table with no partitions -- https://postgr.es/m/20180124162006.pmapfiznhgngwtjf@alvherre.pgsql diff --git a/src/test/regress/sql/indexing.sql b/src/test/regress/sql/indexing.sql index 35d159f41b..d908ee6d17 100644 --- a/src/test/regress/sql/indexing.sql +++ b/src/test/regress/sql/indexing.sql @@ -29,10 +29,20 @@ select relname, relkind, relhassubclass, inhparent::regclass where relname like 'idxpart%' order by relname; drop table idxpart; --- Some unsupported features +-- CIC on partitioned table create table idxpart (a int, b int, c text) partition by range (a); -create table idxpart1 partition of idxpart for values from (0) to (10); -create index concurrently on idxpart (a); +create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a); +create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a); +create table idxpart2 partition of idxpart for values from (10) to (20); +insert into idxpart2 values(10),(10); -- not unique +create index concurrently on idxpart (a); -- partitioned +create index concurrently on idxpart1 (a); -- partitioned and partition +create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves +create index concurrently on idxpart2 (a); -- leaf +create unique index concurrently on idxpart (a); -- partitioned, unique failure +\d idxpart +\d idxpart1 +\d idxpart2 drop table idxpart; -- Verify bugfix with query on indexed partitioned table with no partitions -- 2.17.0