From deb5ca23539bd1bd8c19ec1620909ff118b350f7 Mon Sep 17 00:00:00 2001 From: Amul Sul Date: Tue, 16 Jun 2020 06:32:01 -0400 Subject: [PATCH v1 3/6] Implement ALTER SYSTEM READ ONLY using global barrier. Implementation: 1. When a user tried to change server state to WAL-Prohibited using ALTER SYSTEM READ ONLY command; AlterSystemSetWALProhibitState() will emit PROCSIGNAL_BARRIER_WAL_PROHIBIT_STATE_CHANGE barrier and will wait until the barrier has been absorbed by all the backends. 2. When a backend receives the WAL-Prohibited barrier, at that moment if it is already in a transaction and the transaction already assigned XID, then the backend will be killed by throwing FATAL(XXX: need more discussion on this) 3. Otherwise, if that backend running transaction which yet to get XID assigned we don't need to do anything special, simply call ResetLocalXLogInsertAllowed() so that any future WAL insert in will check XLogInsertAllowed() first which set ready only state appropriately. 4. A new transaction (from existing or new backend) starts as a read-only transaction. 5. Auxiliary processes like autovacuum launcher, background writer, checkpointer and walwriter will don't do anything in WAL-Prohibited server state until someone wakes us up. E.g. a backend might later on request us to put the system back to read-write. 6. At shutdown in WAL-Prohibited mode, we'll skip shutdown checkpoint and xlog rotation. Starting up again will perform crash recovery(XXX: need some discussion on this as well) 7. ALTER SYSTEM READ ONLY/WRITE is restricted on standby server. 8. Only super user can toggle WAL-Prohibit state. 9. Add system_is_read_only GUC show the system state -- will true when system is wal prohibited or in recovery. --- src/backend/access/transam/Makefile | 1 + src/backend/access/transam/walprohibit.c | 81 ++++++++++++++++++++++++ src/backend/access/transam/xact.c | 49 ++++++++------ src/backend/access/transam/xlog.c | 72 ++++++++++++++++++--- src/backend/postmaster/autovacuum.c | 4 ++ src/backend/postmaster/bgwriter.c | 2 +- src/backend/postmaster/checkpointer.c | 12 ++++ src/backend/storage/ipc/procsignal.c | 26 ++------ src/backend/tcop/utility.c | 14 +--- src/backend/utils/misc/guc.c | 26 ++++++++ src/include/access/walprohibit.h | 21 ++++++ src/include/access/xlog.h | 3 + src/include/storage/procsignal.h | 7 +- 13 files changed, 246 insertions(+), 72 deletions(-) create mode 100644 src/backend/access/transam/walprohibit.c create mode 100644 src/include/access/walprohibit.h diff --git a/src/backend/access/transam/Makefile b/src/backend/access/transam/Makefile index 595e02de722..b5322a69954 100644 --- a/src/backend/access/transam/Makefile +++ b/src/backend/access/transam/Makefile @@ -26,6 +26,7 @@ OBJS = \ twophase.o \ twophase_rmgr.o \ varsup.o \ + walprohibit.o \ xact.o \ xlog.o \ xlogarchive.o \ diff --git a/src/backend/access/transam/walprohibit.c b/src/backend/access/transam/walprohibit.c new file mode 100644 index 00000000000..df97596ddf9 --- /dev/null +++ b/src/backend/access/transam/walprohibit.c @@ -0,0 +1,81 @@ +/*------------------------------------------------------------------------- + * + * walprohibit.c + * PostgreSQL write-ahead log prohibit states + * + * + * Portions Copyright (c) 2020, PostgreSQL Global Development Group + * + * src/backend/access/transam/walprohibit.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/walprohibit.h" +#include "postmaster/bgwriter.h" +#include "storage/procsignal.h" + +/* + * ProcessBarrierWALProhibit() + * + * Handle WAL prohibit state change request. + */ +bool +ProcessBarrierWALProhibit(void) +{ + /* + * Kill off any transactions that have an XID *before* allowing the system + * to go WAL prohibit state. + */ + if (FullTransactionIdIsValid(GetTopFullTransactionIdIfAny())) + { + /* + * XXX: Kill off the whole session by throwing FATAL instead of killing + * transaction by throwing ERROR due to following reasons that need be + * thought: + * + * 1. Due to some presents challenges with the wire protocol, we could + * not simply kill of idle transaction. + * + * 2. If we are here in subtransaction then the ERROR will kill the + * current subtransaction only. In the case of invalidations, that + * might be good enough, but for XID assignment it's not, because + * assigning an XID to a subtransaction also causes higher + * sub-transaction levels and the parent transaction to get XIDs. + */ + ereport(FATAL, + (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION), + errmsg("system is now read only"), + errhint("Cannot continue a transaction if it has performed writes while system is read only."))); + } + + /* Return to "check" state */ + ResetLocalXLogInsertAllowed(); + + return true; +} + +/* + * AlterSystemSetWALProhibitState + * + * Execute ALTER SYSTEM READ { ONLY | WRITE } statement. + */ +void +AlterSystemSetWALProhibitState(AlterSystemWALProhibitState *stmt) +{ + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to execute ALTER SYSTEM command"))); + + /* Alter WAL prohibit state not allowed during recovery */ + PreventCommandDuringRecovery("ALTER SYSTEM"); + + /* Yet to add ALTER SYTEM READ WRITE support */ + if (!stmt->WALProhibited) + elog(ERROR, "XXX: Yet to implement"); + + MakeReadOnlyXLOG(); + WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_WALPROHIBIT)); +} diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index cd30b62d365..cf15eca53ef 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -1935,23 +1935,28 @@ StartTransaction(void) Assert(s->prevSecContext == 0); /* - * Make sure we've reset xact state variables + * Reset xact state variables. * - * If recovery is still in progress, mark this transaction as read-only. - * We have lower level defences in XLogInsert and elsewhere to stop us - * from modifying data during recovery, but this gives the normal - * indication to the user that the transaction is read-only. - */ - if (RecoveryInProgress()) - { - s->startedInRecovery = true; - XactReadOnly = true; - } - else - { - s->startedInRecovery = false; - XactReadOnly = DefaultXactReadOnly; - } + * If it is not currently possible to insert write-ahead log records, + * either because we are still in recovery or because ALTER SYSTEM READ + * ONLY has been executed, force this to be a read-only transaction. + * We have lower level defences in XLogBeginInsert() and elsewhere to stop + * us from modifying data during recovery when !XLogInsertAllowed(), but + * this gives the normal indication to the user that the transaction is + * read-only. + * + * On the other hand, we only need to set the startedInRecovery flag when + * the transaction started during recovery, and not when WAL is otherwise + * prohibited. This information is used by RelationGetIndexScan() to + * decide whether to permit (1) relying on existing killed-tuple markings + * and (2) further killing of index tuples. Even when WAL is prohibited + * on the master, it's still the master, so the former is OK; and since + * killing index tuples doesn't generate WAL, the latter is also OK. + * See comments in RelationGetIndexScan() and MarkBufferDirtyHint(). + */ + XactReadOnly = DefaultXactReadOnly || !XLogInsertAllowed(); + s->startedInRecovery = RecoveryInProgress(); + XactDeferrable = DefaultXactDeferrable; XactIsoLevel = DefaultXactIsoLevel; forceSyncCommit = false; @@ -4873,9 +4878,11 @@ CommitSubTransaction(void) /* * We need to restore the upper transaction's read-only state, in case the * upper is read-write while the child is read-only; GUC will incorrectly - * think it should leave the child state in place. + * think it should leave the child state in place. Note that the upper + * transaction will be a force to ready-only irrespective of its previous + * status if the server state is WAL prohibited. */ - XactReadOnly = s->prevXactReadOnly; + XactReadOnly = s->prevXactReadOnly || !XLogInsertAllowed(); CurrentResourceOwner = s->parent->curTransactionOwner; CurTransactionResourceOwner = s->parent->curTransactionOwner; @@ -5031,9 +5038,11 @@ AbortSubTransaction(void) /* * Restore the upper transaction's read-only state, too. This should be * redundant with GUC's cleanup but we may as well do it for consistency - * with the commit case. + * with the commit case. Note that the upper transaction will be a force + * to ready-only irrespective of its previous status if the server state is + * WAL prohibited. */ - XactReadOnly = s->prevXactReadOnly; + XactReadOnly = s->prevXactReadOnly || !XLogInsertAllowed(); RESUME_INTERRUPTS(); } diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 55cac186dc7..83919bdb1f0 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -245,9 +245,10 @@ static bool LocalPromoteIsTriggered = false; * 0: unconditionally not allowed to insert XLOG * -1: must check RecoveryInProgress(); disallow until it is false * Most processes start with -1 and transition to 1 after seeing that recovery - * is not in progress. But we can also force the value for special cases. - * The coding in XLogInsertAllowed() depends on the first two of these states - * being numerically the same as bool true and false. + * is not in progress or the server state is not a WAL prohibited state. But + * we can also force the value for special cases. The coding in + * XLogInsertAllowed() depends on the first two of these states being + * numerically the same as bool true and false. */ static int LocalXLogInsertAllowed = -1; @@ -659,6 +660,12 @@ typedef struct XLogCtlData */ RecoveryState SharedRecoveryState; + /* + * WALProhibited indicates if we have stopped allowing WAL writes. + * Protected by info_lck. + */ + bool WALProhibited; + /* * SharedHotStandbyActive indicates if we allow hot standby queries to be * run. Protected by info_lck. @@ -7959,6 +7966,25 @@ StartupXLOG(void) RequestCheckpoint(CHECKPOINT_FORCE); } +void +MakeReadOnlyXLOG(void) +{ + SpinLockAcquire(&XLogCtl->info_lck); + XLogCtl->WALProhibited = true; + SpinLockRelease(&XLogCtl->info_lck); +} + +/* + * Is the system still in WAL prohibited state? + */ +bool +IsWALProhibited(void) +{ + volatile XLogCtlData *xlogctl = XLogCtl; + + return xlogctl->WALProhibited; +} + /* * Checks if recovery has reached a consistent state. When consistency is * reached and we have a valid starting standby snapshot, tell postmaster @@ -8174,9 +8200,9 @@ HotStandbyActiveInReplay(void) /* * Is this process allowed to insert new WAL records? * - * Ordinarily this is essentially equivalent to !RecoveryInProgress(). - * But we also have provisions for forcing the result "true" or "false" - * within specific processes regardless of the global state. + * Ordinarily this is essentially equivalent to !RecoveryInProgress() and + * !IsWALProhibited(). But we also have provisions for forcing the result + * "true" or "false" within specific processes regardless of the global state. */ bool XLogInsertAllowed(void) @@ -8190,14 +8216,25 @@ XLogInsertAllowed(void) return (bool) LocalXLogInsertAllowed; /* - * Else, must check to see if we're still in recovery. + * Else, must check to see if we're still in recovery */ if (RecoveryInProgress()) return false; + /* Or, in WAL prohibited state */ + if (IsWALProhibited()) + { + /* + * Set it to "unconditionally false" to avoid checking until it gets + * reset. + */ + LocalXLogInsertAllowed = 0; + return false; + } + /* - * On exit from recovery, reset to "unconditionally true", since there is - * no need to keep checking. + * On exit from recovery or WAL prohibited state, reset to "unconditionally + * true", since there is no need to keep checking. */ LocalXLogInsertAllowed = 1; return true; @@ -8213,12 +8250,20 @@ static void LocalSetXLogInsertAllowed(void) { Assert(LocalXLogInsertAllowed == -1); + Assert(!IsWALProhibited()); + LocalXLogInsertAllowed = 1; /* Initialize as RecoveryInProgress() would do when switching state */ InitXLOGAccess(); } +void +ResetLocalXLogInsertAllowed(void) +{ + LocalXLogInsertAllowed = -1; +} + /* * Subroutine to try to fetch and validate a prior checkpoint record. * @@ -8509,7 +8554,10 @@ ShutdownXLOG(int code, Datum arg) if (RecoveryInProgress()) CreateRestartPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_IMMEDIATE); - else + /* + * Can't perform checkpoint or xlog rotation without writing WAL. + */ + else if (XLogInsertAllowed()) { /* * If archiving is enabled, rotate the last XLOG file so that all the @@ -8522,6 +8570,10 @@ ShutdownXLOG(int code, Datum arg) CreateCheckPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_IMMEDIATE); } + else + ereport(LOG, + (errmsg("skipping shutdown checkpoint because the system is read only"))); + ShutdownCLOG(); ShutdownCommitTs(); ShutdownSUBTRANS(); diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 9c7d4b0c60e..f83f86994db 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -652,6 +652,10 @@ AutoVacLauncherMain(int argc, char *argv[]) HandleAutoVacLauncherInterrupts(); + /* If the server is read only just go back to sleep. */ + if (!XLogInsertAllowed()) + continue; + /* * a worker finished, or postmaster signaled failure to start a worker */ diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c index 069e27e427f..6c6ff7dc3af 100644 --- a/src/backend/postmaster/bgwriter.c +++ b/src/backend/postmaster/bgwriter.c @@ -268,7 +268,7 @@ BackgroundWriterMain(void) * Checkpointer, when active, is barely ever in its mainloop and thus * makes it hard to log regularly. */ - if (XLogStandbyInfoActive() && !RecoveryInProgress()) + if (XLogStandbyInfoActive() && XLogInsertAllowed()) { TimestampTz timeout = 0; TimestampTz now = GetCurrentTimestamp(); diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c index 624a3238b80..5e5e56d4eec 100644 --- a/src/backend/postmaster/checkpointer.c +++ b/src/backend/postmaster/checkpointer.c @@ -342,6 +342,18 @@ CheckpointerMain(void) AbsorbSyncRequests(); HandleCheckpointerInterrupts(); + /* + * If the server is in WAL-Prohibited state then don't do anything until + * someone wakes us up. E.g. a backend might later on request us to put + * the system back to read-write. + */ + if (IsWALProhibited()) + { + (void) WaitLatch(MyLatch, WL_LATCH_SET | WL_EXIT_ON_PM_DEATH, -1, + WAIT_EVENT_CHECKPOINTER_MAIN); + continue; + } + /* * Detect a pending checkpoint request by checking whether the flags * word in shared memory is nonzero. We shouldn't need to acquire the diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c index 27eebdafda1..0bc9f778909 100644 --- a/src/backend/storage/ipc/procsignal.c +++ b/src/backend/storage/ipc/procsignal.c @@ -18,6 +18,7 @@ #include #include "access/parallel.h" +#include "access/walprohibit.h" #include "commands/async.h" #include "miscadmin.h" #include "pgstat.h" @@ -96,7 +97,6 @@ static volatile ProcSignalSlot *MyProcSignalSlot = NULL; static bool CheckProcSignal(ProcSignalReason reason); static void CleanupProcSignalState(int status, Datum arg); -static bool ProcessBarrierPlaceholder(void); /* * ProcSignalShmemSize @@ -471,9 +471,9 @@ ProcessProcSignalBarrier(void) * unconditionally, but it's more efficient to call only the ones * that might need us to do something based on the flags. */ - if (BARRIER_SHOULD_CHECK(flags, PROCSIGNAL_BARRIER_PLACEHOLDER) - && ProcessBarrierPlaceholder()) - BARRIER_CLEAR_BIT(flags, PROCSIGNAL_BARRIER_PLACEHOLDER); + if (BARRIER_SHOULD_CHECK(flags, PROCSIGNAL_BARRIER_WALPROHIBIT) + && ProcessBarrierWALProhibit()) + BARRIER_CLEAR_BIT(flags, PROCSIGNAL_BARRIER_WALPROHIBIT); } PG_CATCH(); { @@ -515,24 +515,6 @@ ProcessProcSignalBarrier(void) pg_atomic_write_u64(&MyProcSignalSlot->pss_barrierGeneration, generation); } -static bool -ProcessBarrierPlaceholder(void) -{ - /* - * XXX. This is just a placeholder until the first real user of this - * machinery gets committed. Rename PROCSIGNAL_BARRIER_PLACEHOLDER to - * PROCSIGNAL_BARRIER_SOMETHING_ELSE where SOMETHING_ELSE is something - * appropriately descriptive. Get rid of this function and instead have - * ProcessBarrierSomethingElse. Most likely, that function should live in - * the file pertaining to that subsystem, rather than here. - * - * The return value should be 'true' if the barrier was successfully - * absorbed and 'false' if not. Note that returning 'false' can lead to - * very frequent retries, so try hard to make that an uncommon case. - */ - return true; -} - /* * CheckProcSignal - check to see if a particular reason has been * signaled, and clear the signal flag. Should be called after receiving diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c index 900088a2209..2767cf18c68 100644 --- a/src/backend/tcop/utility.c +++ b/src/backend/tcop/utility.c @@ -19,6 +19,7 @@ #include "access/htup_details.h" #include "access/reloptions.h" #include "access/twophase.h" +#include "access/walprohibit.h" #include "access/xact.h" #include "access/xlog.h" #include "catalog/catalog.h" @@ -85,7 +86,6 @@ static void ProcessUtilitySlow(ParseState *pstate, DestReceiver *dest, QueryCompletion *qc); static void ExecDropStmt(DropStmt *stmt, bool isTopLevel); -static void AlterSystemSetWALProhibitState(AlterSystemWALProhibitState *stmt); /* * CommandIsReadOnly: is an executable query read-only? @@ -3644,15 +3644,3 @@ GetCommandLogLevel(Node *parsetree) return lev; } - -/* - * AlterSystemSetWALProhibitState - * - * Execute ALTER SYSTEM READ { ONLY | WRITE } statement. - */ -static void -AlterSystemSetWALProhibitState(AlterSystemWALProhibitState *stmt) -{ - /* some code */ - elog(INFO, "AlterSystemSetWALProhibitState() called"); -} diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 75fc6f11d6a..58b56eac21f 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -221,6 +221,7 @@ static bool check_recovery_target_lsn(char **newval, void **extra, GucSource sou static void assign_recovery_target_lsn(const char *newval, void *extra); static bool check_primary_slot_name(char **newval, void **extra, GucSource source); static bool check_default_with_oids(bool *newval, void **extra, GucSource source); +static const char *show_system_is_read_only(void); /* Private functions in guc-file.l that need to be called from guc.c */ static ConfigVariable *ProcessConfigFileInternal(GucContext context, @@ -610,6 +611,7 @@ static char *recovery_target_string; static char *recovery_target_xid_string; static char *recovery_target_name_string; static char *recovery_target_lsn_string; +static bool system_is_read_only; /* should be static, but commands/variable.c needs to get at this */ @@ -2041,6 +2043,18 @@ static struct config_bool ConfigureNamesBool[] = NULL, NULL, NULL }, + { + /* Not for general use */ + {"system_is_read_only", PGC_INTERNAL, WAL, + gettext_noop("Shows whether the system is read only."), + NULL, + GUC_NO_RESET_ALL | GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE + }, + &system_is_read_only, + false, + NULL, NULL, show_system_is_read_only + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL @@ -11998,4 +12012,16 @@ check_default_with_oids(bool *newval, void **extra, GucSource source) return true; } +/* + * NB: The return string should be the same as the _ShowOption() for boolean + * type. + */ + static const char * + show_system_is_read_only(void) +{ + if (!XLogInsertAllowed()) + return "on"; + return "off"; +} + #include "guc-file.c" diff --git a/src/include/access/walprohibit.h b/src/include/access/walprohibit.h new file mode 100644 index 00000000000..619c33cd780 --- /dev/null +++ b/src/include/access/walprohibit.h @@ -0,0 +1,21 @@ +/* + * walprohibit.h + * + * PostgreSQL write-ahead log prohibit states + * + * Portions Copyright (c) 2020, PostgreSQL Global Development Group + * + * src/include/access/walprohibit.h + */ +#ifndef WALPROHIBIT_H +#define WALPROHIBIT_H + +#include "access/xact.h" +#include "access/xlog.h" +#include "miscadmin.h" +#include "nodes/parsenodes.h" + +extern bool ProcessBarrierWALProhibit(void); +extern void AlterSystemSetWALProhibitState(AlterSystemWALProhibitState *stmt); + +#endif /* WALPROHIBIT_H */ diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index e917dfe92d8..ca7ae766e3f 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -298,11 +298,13 @@ extern const char *xlog_identify(uint8 info); extern void issue_xlog_fsync(int fd, XLogSegNo segno); +extern bool IsWALProhibited(void); extern bool RecoveryInProgress(void); extern RecoveryState GetRecoveryState(void); extern bool HotStandbyActive(void); extern bool HotStandbyActiveInReplay(void); extern bool XLogInsertAllowed(void); +extern void ResetLocalXLogInsertAllowed(void); extern void GetXLogReceiptTime(TimestampTz *rtime, bool *fromStream); extern XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI); extern XLogRecPtr GetXLogInsertRecPtr(void); @@ -322,6 +324,7 @@ extern void XLOGShmemInit(void); extern void BootStrapXLOG(void); extern void LocalProcessControlFile(bool reset); extern void StartupXLOG(void); +extern void MakeReadOnlyXLOG(void); extern void ShutdownXLOG(int code, Datum arg); extern void InitXLOGAccess(void); extern void CreateCheckPoint(int flags); diff --git a/src/include/storage/procsignal.h b/src/include/storage/procsignal.h index a0c0bc3ce55..c425f1ccf48 100644 --- a/src/include/storage/procsignal.h +++ b/src/include/storage/procsignal.h @@ -47,12 +47,7 @@ typedef enum typedef enum { - /* - * XXX. PROCSIGNAL_BARRIER_PLACEHOLDER should be replaced when the first - * real user of the ProcSignalBarrier mechanism is added. It's just here - * for now because we can't have an empty enum. - */ - PROCSIGNAL_BARRIER_PLACEHOLDER = 0 + PROCSIGNAL_BARRIER_WALPROHIBIT = 0 } ProcSignalBarrierType; /* -- 2.18.0