From 50f78225366c1ea34e81e6441baa22109b7cb5d1 Mon Sep 17 00:00:00 2001 From: Kirk Jamison Date: Fri, 16 Oct 2020 02:23:05 +0000 Subject: [PATCH v26 3/4] Optimize DropRelFileNodeBuffers() during recovery. The recovery path of DropRelFileNodeBuffers() is optimized so that scanning of the whole buffer pool is avoided when the relation is small enough, or the the total number of blocks to be invalidated is below the threshold of full scanning. While recovery, we can get a reliable cached value of nblocks for supplied relation's fork, and it's safe because there are no other processes but the startup process that changes the relation size during recovery. Otherwise, or if not in recovery, proceed to sequential search of the whole buffer pool. --- src/backend/storage/buffer/bufmgr.c | 94 +++++++++++++++++++++++++++++++++---- src/backend/storage/smgr/smgr.c | 2 +- src/include/storage/bufmgr.h | 2 +- 3 files changed, 87 insertions(+), 11 deletions(-) diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index ae001fe..fb6ba94 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -70,6 +70,8 @@ #define RELS_BSEARCH_THRESHOLD 20 +#define BUF_DROP_FULL_SCAN_THRESHOLD (uint32)(NBuffers / 512) + typedef struct PrivateRefCountEntry { Buffer buffer; @@ -2965,18 +2967,28 @@ BufferGetLSNAtomic(Buffer buffer) * that no other process could be trying to load more pages of the * relation into buffers. * - * XXX currently it sequentially searches the buffer pool, should be - * changed to more clever ways of searching. However, this routine - * is used only in code paths that aren't very performance-critical, - * and we shouldn't slow down the hot paths to make it faster ... + * If the expected maximum number of buffers to be dropped is small + * enough, individual buffer is located by BufTableLookup(). Otherwise, + * the buffer pool is sequentially scanned. Since buffers must not be + * left behind, the latter way is executed unless the sizes of all the + * involved forks are known to be accurate. See smgrnblocks() for + * more details. * -------------------------------------------------------------------- */ void -DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber *forkNum, +DropRelFileNodeBuffers(SMgrRelation smgr_reln, ForkNumber *forkNum, int nforks, BlockNumber *firstDelBlock) { int i; int j; + RelFileNodeBackend rnode; + bool accurate; + BlockNumber nForkBlocks[MAX_FORKNUM]; + BlockNumber nBlocksToInvalidate = 0; + BufferDesc *bufHdr; + uint32 buf_state; + + rnode = smgr_reln->smgr_rnode; /* If it's a local relation, it's localbuf.c's problem. */ if (RelFileNodeBackendIsTemp(rnode)) @@ -2990,10 +3002,75 @@ DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber *forkNum, return; } + /* + * Get the total number of to-be-invalidated blocks of a relation as well + * as the total nblocks for a given fork. The cached value returned by + * smgrnblocks could be smaller than the actual number of existing buffers + * of the file. This is caused by buggy Linux kernels that might not have + * accounted for the recent write. Give up the optimization if the block + * count of any fork cannot be trusted. + */ + for (i = 0; i < nforks; i++) + { + /* Get the total nblocks for a relation's fork */ + nForkBlocks[i] = smgrnblocks(smgr_reln, forkNum[i], &accurate); + + if (!accurate) + break; + + nBlocksToInvalidate += (nForkBlocks[i] - firstDelBlock[i]); + } + + /* + * Look up the buffer in the hashtable if the block size is known to + * be accurate and the total blocks to be invalidated is below the + * full scan threshold. Otherwise, give up the optimization. + */ + if (accurate && nBlocksToInvalidate < BUF_DROP_FULL_SCAN_THRESHOLD) + { + for (j = 0; j < nforks; j++) + { + BlockNumber curBlock; + + for (curBlock = firstDelBlock[j]; curBlock < nForkBlocks[j]; curBlock++) + { + uint32 bufHash; /* hash value for tag */ + BufferTag bufTag; /* identity of requested block */ + LWLock *bufPartitionLock; /* buffer partition lock for it */ + int buf_id; + + /* create a tag so we can lookup the buffer */ + INIT_BUFFERTAG(bufTag, rnode.node, forkNum[j], curBlock); + + /* determine its hash code and partition lock ID */ + bufHash = BufTableHashCode(&bufTag); + bufPartitionLock = BufMappingPartitionLock(bufHash); + + /* Check that it is in the buffer pool. If not, do nothing. */ + LWLockAcquire(bufPartitionLock, LW_SHARED); + buf_id = BufTableLookup(&bufTag, bufHash); + LWLockRelease(bufPartitionLock); + + if (buf_id < 0) + continue; + + bufHdr = GetBufferDescriptor(buf_id); + + buf_state = LockBufHdr(bufHdr); + + if (RelFileNodeEquals(bufHdr->tag.rnode, rnode.node) && + bufHdr->tag.forkNum == forkNum[j] && + bufHdr->tag.blockNum >= firstDelBlock[j]) + InvalidateBuffer(bufHdr); /* releases spinlock */ + else + UnlockBufHdr(bufHdr, buf_state); + } + } + return; + } for (i = 0; i < NBuffers; i++) { - BufferDesc *bufHdr = GetBufferDescriptor(i); - uint32 buf_state; + bufHdr = GetBufferDescriptor(i); /* * We can make this a tad faster by prechecking the buffer tag before @@ -3244,8 +3321,7 @@ PrintPinnedBufs(void) * XXX currently it sequentially searches the buffer pool, should be * changed to more clever ways of searching. This routine is not * used in any performance-critical code paths, so it's not worth - * adding additional overhead to normal paths to make it go faster; - * but see also DropRelFileNodeBuffers. + * adding additional overhead to normal paths to make it go faster. * -------------------------------------------------------------------- */ void diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index aad6e5d..6b8528e 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -601,7 +601,7 @@ smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *nb * Get rid of any buffers for the about-to-be-deleted blocks. bufmgr will * just drop them without bothering to write the contents. */ - DropRelFileNodeBuffers(reln->smgr_rnode, forknum, nforks, nblocks); + DropRelFileNodeBuffers(reln, forknum, nforks, nblocks); /* * Send a shared-inval message to force other backends to close any smgr diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index ee91b8f..056f65e 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -203,7 +203,7 @@ extern void FlushOneBuffer(Buffer buffer); extern void FlushRelationBuffers(Relation rel); extern void FlushRelationsAllBuffers(struct SMgrRelationData **smgrs, int nrels); extern void FlushDatabaseBuffers(Oid dbid); -extern void DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber *forkNum, +extern void DropRelFileNodeBuffers(struct SMgrRelationData *smgr_reln, ForkNumber *forkNum, int nforks, BlockNumber *firstDelBlock); extern void DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes); extern void DropDatabaseBuffers(Oid dbid); -- 1.8.3.1