From 806bee1efdde958bb3d819626e0cfaf624cf2055 Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Thu, 9 Aug 2018 16:57:57 +0530 Subject: [PATCH 2/4] Fix deadlock in AbsorbAllFsyncRequests(). --- src/backend/postmaster/checkpointer.c | 54 +++++++++++++-------------- 1 file changed, 25 insertions(+), 29 deletions(-) diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c index 9ef56db97bc..6250cb21946 100644 --- a/src/backend/postmaster/checkpointer.c +++ b/src/backend/postmaster/checkpointer.c @@ -112,6 +112,7 @@ typedef struct ForkNumber forknum; BlockNumber segno; /* see md.c for special values */ bool contains_fd; + int ckpt_started; /* might add a real request-type field later; not needed yet */ } CheckpointerRequest; @@ -169,9 +170,6 @@ static double ckpt_cached_elapsed; static pg_time_t last_checkpoint_time; static pg_time_t last_xlog_switch_time; -static BlockNumber next_syn_rqst; -static BlockNumber received_syn_rqst; - /* Prototypes for private functions */ static void CheckArchiveTimeout(void); @@ -179,7 +177,7 @@ static bool IsCheckpointOnSchedule(double progress); static bool ImmediateCheckpointRequested(void); static void UpdateSharedMemoryConfig(void); static void SendFsyncRequest(CheckpointerRequest *request, int fd); -static bool AbsorbFsyncRequest(void); +static bool AbsorbFsyncRequest(bool stop_at_current_cycle); /* Signal handlers */ @@ -1105,6 +1103,11 @@ RequestCheckpoint(int flags) * is theoretically possible a backend fsync might still be necessary, if * the queue is full and contains no duplicate entries. In that case, we * let the backend know by returning false. + * + * We add the cycle counter to the message. That is an unsynchronized read + * of the shared memory counter, but it doesn't matter if it is arbitrarily + * old since it is only used to limit unnecessary extra queue draining in + * AbsorbAllFsyncRequests(). */ void ForwardFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno, @@ -1124,6 +1127,15 @@ ForwardFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno, request.segno = segno; request.contains_fd = file != -1; + /* + * We read ckpt_started without synchronization. It is used to prevent + * AbsorbAllFsyncRequests() from reading new values from after a + * checkpoint began. A slightly out-of-date value here will only cause + * it to do a little bit more work than strictly necessary, but that's + * OK. + */ + request.ckpt_started = CheckpointerShmem->ckpt_started; + SendFsyncRequest(&request, request.contains_fd ? FileGetRawDesc(file) : -1); } @@ -1152,7 +1164,7 @@ AbsorbFsyncRequests(void) if (!FlushFsyncRequestQueueIfNecessary()) break; - if (!AbsorbFsyncRequest()) + if (!AbsorbFsyncRequest(false)) break; } } @@ -1170,8 +1182,6 @@ AbsorbFsyncRequests(void) void AbsorbAllFsyncRequests(void) { - CheckpointerRequest request = {0}; - if (!AmCheckpointerProcess()) return; @@ -1181,22 +1191,12 @@ AbsorbAllFsyncRequests(void) BgWriterStats.m_buf_fsync_backend += pg_atomic_exchange_u32(&CheckpointerShmem->num_backend_fsync, 0); - /* - * For mdsync()'s guarantees to work, all pending fsync requests need to - * be executed. But we don't want to absorb requests till the queue is - * empty, as that could take a long while. So instead we enqueue - */ - request.type = CKPT_REQUEST_SYN; - request.segno = ++next_syn_rqst; - SendFsyncRequest(&request, -1); - - received_syn_rqst = next_syn_rqst + 1; - while (received_syn_rqst != request.segno) + for (;;) { if (!FlushFsyncRequestQueueIfNecessary()) elog(FATAL, "may not happen"); - if (!AbsorbFsyncRequest()) + if (!AbsorbFsyncRequest(true)) break; } } @@ -1206,7 +1206,7 @@ AbsorbAllFsyncRequests(void) * Retrieve one queued fsync request and pass them to local smgr. */ static bool -AbsorbFsyncRequest(void) +AbsorbFsyncRequest(bool stop_at_current_cycle) { CheckpointerRequest req; int fd; @@ -1229,17 +1229,13 @@ AbsorbFsyncRequest(void) elog(FATAL, "message should have fd associated, but doesn't"); } - if (req.type == CKPT_REQUEST_SYN) - { - received_syn_rqst = req.segno; - Assert(fd == -1); - } - else - { - RememberFsyncRequest(req.rnode, req.forknum, req.segno, fd); - } + RememberFsyncRequest(req.rnode, req.forknum, req.segno, fd); END_CRIT_SECTION(); + if (stop_at_current_cycle && + req.ckpt_started == CheckpointerShmem->ckpt_started) + return false; + return true; } -- 2.17.0