From 73a20b949c3d1365163c702b1cf947fb54680efd Mon Sep 17 00:00:00 2001 From: Amul Sul Date: Fri, 15 May 2020 06:39:43 -0400 Subject: [PATCH v1 4/6] Use checkpointer to make system READ-ONLY or READ-WRITE Till the previous commit, the backend used to do this, but now the backend requests checkpointer to do it. Checkpointer, noticing that the current state is has WALPROHIBIT_TRANSITION_IN_PROGRESS flag set, does the barrier request, and then acknowledges back to the backend who requested the state change. Note that this commit also enables ALTER SYSTEM READ WRITE support and make WAL prohibited state persistent across the system restarts. --- src/backend/access/transam/walprohibit.c | 26 ++++-- src/backend/access/transam/xlog.c | 71 ++++++++++++++-- src/backend/postmaster/checkpointer.c | 100 +++++++++++++++++++++-- src/backend/postmaster/pgstat.c | 3 + src/bin/pg_controldata/pg_controldata.c | 2 + src/include/access/walprohibit.h | 11 +++ src/include/access/xlog.h | 3 +- src/include/catalog/pg_control.h | 3 + src/include/pgstat.h | 1 + src/include/postmaster/bgwriter.h | 2 + 10 files changed, 202 insertions(+), 20 deletions(-) diff --git a/src/backend/access/transam/walprohibit.c b/src/backend/access/transam/walprohibit.c index df97596ddf9..a8cda2fafbc 100644 --- a/src/backend/access/transam/walprohibit.c +++ b/src/backend/access/transam/walprohibit.c @@ -30,6 +30,8 @@ ProcessBarrierWALProhibit(void) */ if (FullTransactionIdIsValid(GetTopFullTransactionIdIfAny())) { + Assert(GetWALProhibitState() & WALPROHIBIT_STATE_READ_ONLY); + /* * XXX: Kill off the whole session by throwing FATAL instead of killing * transaction by throwing ERROR due to following reasons that need be @@ -64,6 +66,8 @@ ProcessBarrierWALProhibit(void) void AlterSystemSetWALProhibitState(AlterSystemWALProhibitState *stmt) { + uint32 state; + if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), @@ -72,10 +76,22 @@ AlterSystemSetWALProhibitState(AlterSystemWALProhibitState *stmt) /* Alter WAL prohibit state not allowed during recovery */ PreventCommandDuringRecovery("ALTER SYSTEM"); - /* Yet to add ALTER SYTEM READ WRITE support */ - if (!stmt->WALProhibited) - elog(ERROR, "XXX: Yet to implement"); + /* Requested state */ + state = stmt->WALProhibited ? + WALPROHIBIT_STATE_READ_ONLY : WALPROHIBIT_STATE_READ_WRITE; + + /* + * Since we yet to convey this WAL prohibit state to all backend mark it + * in-progress. + */ + state |= WALPROHIBIT_TRANSITION_IN_PROGRESS; + + if (!SetWALProhibitState(state)) + return; /* server is already in the desired state */ - MakeReadOnlyXLOG(); - WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_WALPROHIBIT)); + /* + * Signal the checkpointer to do the actual state transition, and wait for + * the state change to occur. + */ + WALProhibitRequest(); } diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 83919bdb1f0..ded36113d1a 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -31,6 +31,7 @@ #include "access/timeline.h" #include "access/transam.h" #include "access/twophase.h" +#include "access/walprohibit.h" #include "access/xact.h" #include "access/xlog_internal.h" #include "access/xlogarchive.h" @@ -661,10 +662,10 @@ typedef struct XLogCtlData RecoveryState SharedRecoveryState; /* - * WALProhibited indicates if we have stopped allowing WAL writes. + * SharedWALProhibitState indicates current WAL prohibit state. * Protected by info_lck. */ - bool WALProhibited; + uint32 SharedWALProhibitState; /* * SharedHotStandbyActive indicates if we allow hot standby queries to be @@ -7964,14 +7965,70 @@ StartupXLOG(void) */ if (fast_promoted) RequestCheckpoint(CHECKPOINT_FORCE); + + /* + * Update WAL prohibit state in shared memory that will decide the further + * WAL insert should be allowed or not. + * + * XXX: if the previous recovery checkpoint was not ok which might nevery + * happend even if it's later put back into read-write mode. What to do + * then? + */ + SpinLockAcquire(&XLogCtl->info_lck); + XLogCtl->SharedWALProhibitState = ControlFile->wal_prohibited ? + WALPROHIBIT_STATE_READ_ONLY : WALPROHIBIT_STATE_READ_WRITE; + SpinLockRelease(&XLogCtl->info_lck); + ResetLocalXLogInsertAllowed(); /* return to "check" state */ } -void -MakeReadOnlyXLOG(void) +/* Atomically return the current server WAL prohibited state */ +uint32 +GetWALProhibitState(void) +{ + uint32 state; + + SpinLockAcquire(&XLogCtl->info_lck); + state = XLogCtl->SharedWALProhibitState; + SpinLockRelease(&XLogCtl->info_lck); + + return state; +} + +/* + * SetWALProhibitState: Change current wal prohibit state to the input state. + * + * If the server is already completely moved to the requested WAL prohibit + * state, or if the desired state is same as the current state, return false, + * indicating that the server state did not change. Else return true. + */ +bool +SetWALProhibitState(uint32 new_state) { + uint32 cur_state; + + cur_state = GetWALProhibitState(); + + if (new_state == cur_state || + new_state == (cur_state | WALPROHIBIT_TRANSITION_IN_PROGRESS)) + return false; + + /* Update new state in share memory */ SpinLockAcquire(&XLogCtl->info_lck); - XLogCtl->WALProhibited = true; + XLogCtl->SharedWALProhibitState = new_state; SpinLockRelease(&XLogCtl->info_lck); + + /* Update control file if it is the final state */ + if (!(new_state & WALPROHIBIT_TRANSITION_IN_PROGRESS)) + { + bool wal_prohibited = (new_state & WALPROHIBIT_STATE_READ_ONLY) != 0; + + LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); + ControlFile->wal_prohibited = wal_prohibited; + UpdateControlFile(); + LWLockRelease(ControlFileLock); + } + + return true; } /* @@ -7980,9 +8037,7 @@ MakeReadOnlyXLOG(void) bool IsWALProhibited(void) { - volatile XLogCtlData *xlogctl = XLogCtl; - - return xlogctl->WALProhibited; + return (GetWALProhibitState() & WALPROHIBIT_STATE_READ_ONLY) != 0; } /* diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c index 5e5e56d4eec..1e80d53c18a 100644 --- a/src/backend/postmaster/checkpointer.c +++ b/src/backend/postmaster/checkpointer.c @@ -39,6 +39,7 @@ #include #include +#include "access/walprohibit.h" #include "access/xlog.h" #include "access/xlog_internal.h" #include "libpq/pqsignal.h" @@ -127,6 +128,8 @@ typedef struct ConditionVariable start_cv; /* signaled when ckpt_started advances */ ConditionVariable done_cv; /* signaled when ckpt_done advances */ + ConditionVariable readonly_cv; /* signaled when ckpt_started advances */ + uint32 num_backend_writes; /* counts user backend buffer writes */ uint32 num_backend_fsync; /* counts user backend fsync calls */ @@ -168,6 +171,7 @@ static bool IsCheckpointOnSchedule(double progress); static bool ImmediateCheckpointRequested(void); static bool CompactCheckpointerRequestQueue(void); static void UpdateSharedMemoryConfig(void); +static void performWALProhibitStateChange(uint32 wal_state); /* Signal handlers */ static void ReqCheckpointHandler(SIGNAL_ARGS); @@ -332,6 +336,7 @@ CheckpointerMain(void) pg_time_t now; int elapsed_secs; int cur_timeout; + uint32 wal_state; /* Clear any already-pending wakeups */ ResetLatch(MyLatch); @@ -342,18 +347,28 @@ CheckpointerMain(void) AbsorbSyncRequests(); HandleCheckpointerInterrupts(); - /* - * If the server is in WAL-Prohibited state then don't do anything until - * someone wakes us up. E.g. a backend might later on request us to put - * the system back to read-write. - */ - if (IsWALProhibited()) + wal_state = GetWALProhibitState(); + + if (wal_state & WALPROHIBIT_TRANSITION_IN_PROGRESS) + { + /* Complete WAL prohibit state change request */ + performWALProhibitStateChange(wal_state); + continue; + } + else if (wal_state & WALPROHIBIT_STATE_READ_ONLY) { + /* + * Don't do anything until someone wakes us up. For example a + * backend might later on request us to put the system back to + * read-write wal prohibit sate. + */ (void) WaitLatch(MyLatch, WL_LATCH_SET | WL_EXIT_ON_PM_DEATH, -1, WAIT_EVENT_CHECKPOINTER_MAIN); continue; } + Assert(wal_state == WALPROHIBIT_STATE_READ_WRITE); + /* * Detect a pending checkpoint request by checking whether the flags * word in shared memory is nonzero. We shouldn't need to acquire the @@ -891,6 +906,7 @@ CheckpointerShmemInit(void) CheckpointerShmem->max_requests = NBuffers; ConditionVariableInit(&CheckpointerShmem->start_cv); ConditionVariableInit(&CheckpointerShmem->done_cv); + ConditionVariableInit(&CheckpointerShmem->readonly_cv); } } @@ -1121,6 +1137,78 @@ ForwardSyncRequest(const FileTag *ftag, SyncRequestType type) return true; } +/* + * WALProhibitedRequest: Request checkpointer to make the WALProhibitState to + * read-only. + */ +void +WALProhibitRequest(void) +{ + /* Must not be called from checkpointer */ + Assert(!AmCheckpointerProcess()); + Assert(GetWALProhibitState() & WALPROHIBIT_TRANSITION_IN_PROGRESS); + + if (CheckpointerShmem->checkpointer_pid == 0) + elog(ERROR, "checkpointer is not running"); + + if (kill(CheckpointerShmem->checkpointer_pid, SIGINT) != 0) + elog(ERROR, "could not signal checkpointer: %m"); + + /* Wait for the state to change to read-only */ + ConditionVariablePrepareToSleep(&CheckpointerShmem->readonly_cv); + for (;;) + { + /* We'll be done once in-progress flag bit is cleared */ + if (!(GetWALProhibitState() & WALPROHIBIT_TRANSITION_IN_PROGRESS)) + break; + + elog(DEBUG1, "WALProhibitRequest: Waiting for checkpointer"); + ConditionVariableSleep(&CheckpointerShmem->readonly_cv, + WAIT_EVENT_SYSTEM_WALPROHIBIT_STATE_CHANGE); + } + ConditionVariableCancelSleep(); + elog(DEBUG1, "Done WALProhibitRequest"); +} + +/* + * performWALProhibitStateChange: checkpointer will call this to complete + * the requested WAL prohibit state transition. + */ +static void +performWALProhibitStateChange(uint32 wal_state) +{ + uint64 barrierGeneration; + + /* Must be called from checkpointer */ + Assert(AmCheckpointerProcess()); + Assert(wal_state & WALPROHIBIT_TRANSITION_IN_PROGRESS); + + /* + * WAL prohibit state change is initiated. We need to complete the state + * transition by setting requested WAL prohibit state in all backends. + */ + elog(DEBUG1, "Checkpointer: waiting for backends to adopt requested WAL prohibit state"); + + /* Emit global barrier */ + barrierGeneration = EmitProcSignalBarrier(PROCSIGNAL_BARRIER_WALPROHIBIT); + WaitForProcSignalBarrier(barrierGeneration); + + /* And flush all writes. */ + XLogFlush(GetXLogWriteRecPtr()); + + /* Set final state by clearing in-progress flag bit */ + if (SetWALProhibitState(wal_state & ~(WALPROHIBIT_TRANSITION_IN_PROGRESS))) + { + if ((wal_state & WALPROHIBIT_STATE_READ_ONLY) != 0) + ereport(LOG, (errmsg("system is now read only"))); + else + ereport(LOG, (errmsg("system is now read write"))); + } + + /* Wake up the backend who requested the state change */ + ConditionVariableBroadcast(&CheckpointerShmem->readonly_cv); +} + /* * CompactCheckpointerRequestQueue * Remove duplicates from the request queue to avoid backend fsyncs. diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index e96134dac8a..b5d85d35938 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -4054,6 +4054,9 @@ pgstat_get_wait_io(WaitEventIO w) case WAIT_EVENT_REPLICATION_SLOT_WRITE: event_name = "ReplicationSlotWrite"; break; + case WAIT_EVENT_SYSTEM_WALPROHIBIT_STATE_CHANGE: + event_name = "SystemWALProhibitStateChange"; + break; case WAIT_EVENT_SLRU_FLUSH_SYNC: event_name = "SLRUFlushSync"; break; diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c index e73639df744..9594df76946 100644 --- a/src/bin/pg_controldata/pg_controldata.c +++ b/src/bin/pg_controldata/pg_controldata.c @@ -290,6 +290,8 @@ main(int argc, char *argv[]) (uint32) ControlFile->backupEndPoint); printf(_("End-of-backup record required: %s\n"), ControlFile->backupEndRequired ? _("yes") : _("no")); + printf(_("WAL write prohibited: %s\n"), + ControlFile->wal_prohibited ? _("yes") : _("no")); printf(_("wal_level setting: %s\n"), wal_level_str(ControlFile->wal_level)); printf(_("wal_log_hints setting: %s\n"), diff --git a/src/include/access/walprohibit.h b/src/include/access/walprohibit.h index 619c33cd780..163fe0d2fce 100644 --- a/src/include/access/walprohibit.h +++ b/src/include/access/walprohibit.h @@ -18,4 +18,15 @@ extern bool ProcessBarrierWALProhibit(void); extern void AlterSystemSetWALProhibitState(AlterSystemWALProhibitState *stmt); +/* WAL Prohibit States */ +#define WALPROHIBIT_STATE_READ_WRITE 0x0000 +#define WALPROHIBIT_STATE_READ_ONLY 0x0001 + +/* + * The bit is used in state transition from one state to another. When this + * bit is set then the state indicated by the 0th position bit is yet to + * confirmed. + */ +#define WALPROHIBIT_TRANSITION_IN_PROGRESS 0x0002 + #endif /* WALPROHIBIT_H */ diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index ca7ae766e3f..060bfa4acf3 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -324,7 +324,8 @@ extern void XLOGShmemInit(void); extern void BootStrapXLOG(void); extern void LocalProcessControlFile(bool reset); extern void StartupXLOG(void); -extern void MakeReadOnlyXLOG(void); +extern uint32 GetWALProhibitState(void); +extern bool SetWALProhibitState(uint32 new_state); extern void ShutdownXLOG(int code, Datum arg); extern void InitXLOGAccess(void); extern void CreateCheckPoint(int flags); diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h index de5670e5382..b32c7723275 100644 --- a/src/include/catalog/pg_control.h +++ b/src/include/catalog/pg_control.h @@ -182,6 +182,9 @@ typedef struct ControlFileData int max_locks_per_xact; bool track_commit_timestamp; + /* WAL prohibited determines if the WAL insert is allowed or not. */ + bool wal_prohibited; + /* * This data is used to check for hardware-architecture compatibility of * the database and the backend executable. We need not check endianness diff --git a/src/include/pgstat.h b/src/include/pgstat.h index c55dc1481ca..4bd0193e035 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -954,6 +954,7 @@ typedef enum WAIT_EVENT_REPLICATION_SLOT_RESTORE_SYNC, WAIT_EVENT_REPLICATION_SLOT_SYNC, WAIT_EVENT_REPLICATION_SLOT_WRITE, + WAIT_EVENT_SYSTEM_WALPROHIBIT_STATE_CHANGE, WAIT_EVENT_SLRU_FLUSH_SYNC, WAIT_EVENT_SLRU_READ, WAIT_EVENT_SLRU_SYNC, diff --git a/src/include/postmaster/bgwriter.h b/src/include/postmaster/bgwriter.h index 0a5708b32e6..e8271b49f6d 100644 --- a/src/include/postmaster/bgwriter.h +++ b/src/include/postmaster/bgwriter.h @@ -35,6 +35,8 @@ extern void CheckpointWriteDelay(int flags, double progress); extern bool ForwardSyncRequest(const FileTag *ftag, SyncRequestType type); +extern void WALProhibitRequest(void); + extern void AbsorbSyncRequests(void); extern Size CheckpointerShmemSize(void); -- 2.18.0