From 25cac32ee1561c27a0a5eee5ff929d9c44bb1ffc Mon Sep 17 00:00:00 2001 From: Kirk Jamison Date: Wed, 2 Sep 2020 02:57:26 +0000 Subject: [PATCH] Speedup dropping of relation buffers during recovery --- src/backend/storage/buffer/bufmgr.c | 148 ++++++++++++++++++++++++++++-------- src/backend/storage/smgr/smgr.c | 2 +- src/include/storage/bufmgr.h | 2 +- 3 files changed, 119 insertions(+), 33 deletions(-) diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index a2a963b..2b3f08c 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -70,6 +70,8 @@ #define RELS_BSEARCH_THRESHOLD 20 +#define BUF_DROP_FULLSCAN_THRESHOLD (NBuffers / 2) /* NBuffers divided by 2 */ + typedef struct PrivateRefCountEntry { Buffer buffer; @@ -2979,11 +2981,17 @@ BufferGetLSNAtomic(Buffer buffer) * -------------------------------------------------------------------- */ void -DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber *forkNum, +DropRelFileNodeBuffers(SMgrRelation smgr_reln, ForkNumber *forkNum, int nforks, BlockNumber *firstDelBlock) { int i; int j; + int k; + RelFileNodeBackend rnode; + BufferDesc *bufHdr; + uint32 buf_state; + + rnode = smgr_reln->smgr_rnode; /* If it's a local relation, it's localbuf.c's problem. */ if (RelFileNodeBackendIsTemp(rnode)) @@ -2997,44 +3005,122 @@ DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber *forkNum, return; } - for (i = 0; i < NBuffers; i++) + /* + * Proceed to the normal buffer invalidation process. We only speedup + * this path during recovery, because that's the only timing when we + * can get a valid cached value of blocks for relation. See comment + * in smgrnblocks() in smgr.c. + */ + if (!InRecovery) { - BufferDesc *bufHdr = GetBufferDescriptor(i); - uint32 buf_state; + for (i = 0; i < NBuffers; i++) + { + bufHdr = GetBufferDescriptor(i); - /* - * We can make this a tad faster by prechecking the buffer tag before - * we attempt to lock the buffer; this saves a lot of lock - * acquisitions in typical cases. It should be safe because the - * caller must have AccessExclusiveLock on the relation, or some other - * reason to be certain that no one is loading new pages of the rel - * into the buffer pool. (Otherwise we might well miss such pages - * entirely.) Therefore, while the tag might be changing while we - * look at it, it can't be changing *to* a value we care about, only - * *away* from such a value. So false negatives are impossible, and - * false positives are safe because we'll recheck after getting the - * buffer lock. - * - * We could check forkNum and blockNum as well as the rnode, but the - * incremental win from doing so seems small. - */ - if (!RelFileNodeEquals(bufHdr->tag.rnode, rnode.node)) - continue; + /* + * We can make this a tad faster by prechecking the buffer tag before + * we attempt to lock the buffer; this saves a lot of lock + * acquisitions in typical cases. It should be safe because the + * caller must have AccessExclusiveLock on the relation, or some other + * reason to be certain that no one is loading new pages of the rel + * into the buffer pool. (Otherwise we might well miss such pages + * entirely.) Therefore, while the tag might be changing while we + * look at it, it can't be changing *to* a value we care about, only + * *away* from such a value. So false negatives are impossible, and + * false positives are safe because we'll recheck after getting the + * buffer lock. + * + * We could check forkNum and blockNum as well as the rnode, but the + * incremental win from doing so seems small. + */ + if (!RelFileNodeEquals(bufHdr->tag.rnode, rnode.node)) + continue; - buf_state = LockBufHdr(bufHdr); + buf_state = LockBufHdr(bufHdr); + + for (j = 0; j < nforks; j++) + { + if (RelFileNodeEquals(bufHdr->tag.rnode, rnode.node) && + bufHdr->tag.forkNum == forkNum[j] && + bufHdr->tag.blockNum >= firstDelBlock[j]) + { + InvalidateBuffer(bufHdr); /* releases spinlock */ + break; + } + } + if (j >= nforks) + UnlockBufHdr(bufHdr, buf_state); + } + } + else + { + BufferTag newTag; /* identity of requested block */ + uint32 newHash; /* hash value for newTag */ + LWLock *newPartitionLock; /* buffer partition lock for it */ + BlockNumber reln_nblocks; - for (j = 0; j < nforks; j++) + for (i = 0; i < nforks; i++) { - if (RelFileNodeEquals(bufHdr->tag.rnode, rnode.node) && - bufHdr->tag.forkNum == forkNum[j] && - bufHdr->tag.blockNum >= firstDelBlock[j]) + /* Get the number of blocks for the supplied relation fork */ + reln_nblocks = smgrnblocks(smgr_reln, forkNum[i]); + + /* create a tag so we can lookup the buffer */ + INIT_BUFFERTAG(newTag, rnode.node, forkNum[i], reln_nblocks); + + /* determine its hash code and partition lock ID */ + newHash = BufTableHashCode(&newTag); + newPartitionLock = BufMappingPartitionLock(newHash); + + if (((int)reln_nblocks) < BUF_DROP_FULLSCAN_THRESHOLD) { - InvalidateBuffer(bufHdr); /* releases spinlock */ - break; + for (j = 0; j < reln_nblocks; j++) + { + int buf_id; + + /* Check that it is in the buffer pool */ + LWLockAcquire(newPartitionLock, LW_SHARED); + buf_id = BufTableLookup(&newTag, newHash); + LWLockRelease(newPartitionLock); + + bufHdr = GetBufferDescriptor(buf_id); + + if (!RelFileNodeEquals(bufHdr->tag.rnode, rnode.node)) + continue; + + buf_state = LockBufHdr(bufHdr); + + if (RelFileNodeEquals(bufHdr->tag.rnode, rnode.node) && + bufHdr->tag.forkNum == forkNum[j] && + bufHdr->tag.blockNum >= firstDelBlock[j]) + InvalidateBuffer(bufHdr); /* releases spinlock */ + else + UnlockBufHdr(bufHdr, buf_state); + } } + else + { + for (j = BUF_DROP_FULLSCAN_THRESHOLD; j < NBuffers; j++) + { + bufHdr = GetBufferDescriptor(j); + + if (!RelFileNodeEquals(bufHdr->tag.rnode, rnode.node)) + continue; + + buf_state = LockBufHdr(bufHdr); + + if (RelFileNodeEquals(bufHdr->tag.rnode, rnode.node) && + bufHdr->tag.forkNum == forkNum[j] && + bufHdr->tag.blockNum >= firstDelBlock[j]) + { + InvalidateBuffer(bufHdr); /* releases spinlock */ + break; + } + } + } + + if (i >= nforks) + UnlockBufHdr(bufHdr, buf_state); } - if (j >= nforks) - UnlockBufHdr(bufHdr, buf_state); } } diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index dcc09df..5238c6c 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -583,7 +583,7 @@ smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *nb * Get rid of any buffers for the about-to-be-deleted blocks. bufmgr will * just drop them without bothering to write the contents. */ - DropRelFileNodeBuffers(reln->smgr_rnode, forknum, nforks, nblocks); + DropRelFileNodeBuffers(reln, forknum, nforks, nblocks); /* * Send a shared-inval message to force other backends to close any smgr diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index ee91b8f..056f65e 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -203,7 +203,7 @@ extern void FlushOneBuffer(Buffer buffer); extern void FlushRelationBuffers(Relation rel); extern void FlushRelationsAllBuffers(struct SMgrRelationData **smgrs, int nrels); extern void FlushDatabaseBuffers(Oid dbid); -extern void DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber *forkNum, +extern void DropRelFileNodeBuffers(struct SMgrRelationData *smgr_reln, ForkNumber *forkNum, int nforks, BlockNumber *firstDelBlock); extern void DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes); extern void DropDatabaseBuffers(Oid dbid); -- 1.8.3.1