From 74fb205fc87397671392f4f877b2466d1d19869c Mon Sep 17 00:00:00 2001 From: Kyotaro Horiguchi Date: Mon, 15 Jun 2020 16:18:23 +0900 Subject: [PATCH] Some fixes of pg_replication_slots.wal_status The colums is shown as "lost" when the segment at the given LSN has been removed by a checkpoint. But in certain situation the state can come back to "normal" state. To avoid that transition, a new state "being lost" is added, which means "the slot is nearly losing required WAL segments, but not definitely yet." Along with that change, other status words are changed as "normal"->"reserved" and "reserved"->"extended" to clarify the meaning of each word. This also fixes the state recognition logic so that the "lost" state persists after slot invalidation happens. --- doc/src/sgml/catalogs.sgml | 24 +++++++--- src/backend/access/transam/xlog.c | 58 +++++++++++++---------- src/backend/replication/slot.c | 2 + src/backend/replication/slotfuncs.c | 38 ++++++++++++--- src/include/access/xlog.h | 7 +-- src/include/replication/slot.h | 3 ++ src/test/recovery/t/019_replslot_limit.pl | 25 +++++----- 7 files changed, 103 insertions(+), 54 deletions(-) diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 5a66115df1..2c0b51bbd8 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -11239,19 +11239,29 @@ SELECT * FROM pg_locks pl LEFT JOIN pg_prepared_xacts ppx Possible values are: - normal means that the claimed files + reserved means that the claimed files are within max_wal_size. - reserved means + extended means that max_wal_size is exceeded but the files are - still held, either by some replication slot or - by wal_keep_segments. + still retained, either by some replication slot or + by wal_keep_segments. + - lost means that some WAL files are - definitely lost and this slot cannot be used to resume replication - anymore. + being lost means that the slot no longer + retains all required WAL files and some of them are to be removed at + the next checkpoint. This state can return + to reserved or extended + states. + + + + + lost means that some required WAL files have + been removed and this slot is no longer usable. + The last two states are seen only when diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index a1256a103b..9d94d21d5e 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -9485,15 +9485,20 @@ CreateRestartPoint(int flags) * (typically a slot's restart_lsn) * * Returns one of the following enum values: - * * WALAVAIL_NORMAL means targetLSN is available because it is in the range - * of max_wal_size. * - * * WALAVAIL_PRESERVED means it is still available by preserving extra + * * WALAVAIL_RESERVED means targetLSN is available and it is in the range of + * max_wal_size. + * + * * WALAVAIL_EXTENDED means it is still available by preserving extra * segments beyond max_wal_size. If max_slot_wal_keep_size is smaller * than max_wal_size, this state is not returned. * - * * WALAVAIL_REMOVED means it is definitely lost. A replication stream on - * a slot with this LSN cannot continue. + * * WALAVAIL_BEING_REMOVED means it is being lost and the next checkpoint will + * remove reserved segments. The walsender using this slot may return to the + * above. + * + * * WALAVAIL_REMOVED means it has been removed. A replication stream on + * a slot with this LSN cannot continue after a restart. * * * WALAVAIL_INVALID_LSN means the slot hasn't been set to reserve WAL. */ @@ -9509,13 +9514,19 @@ GetWALAvailability(XLogRecPtr targetLSN) * slot */ uint64 keepSegs; - /* slot does not reserve WAL. Either deactivated, or has never been active */ + /* + * slot does not reserve WAL. Either deactivated, or has never been + * active + */ if (XLogRecPtrIsInvalid(targetLSN)) return WALAVAIL_INVALID_LSN; currpos = GetXLogWriteRecPtr(); - /* calculate oldest segment currently needed by slots */ + /* + * calculate the oldest segment currently reserved by all slots, + * considering wal_keep_segments and max_slot_wal_keep_size + */ XLByteToSeg(targetLSN, targetSeg, wal_segment_size); KeepLogSeg(currpos, &oldestSlotSeg); @@ -9526,10 +9537,9 @@ GetWALAvailability(XLogRecPtr targetLSN) */ oldestSeg = XLogGetLastRemovedSegno() + 1; - /* calculate oldest segment by max_wal_size and wal_keep_segments */ + /* calculate oldest segment by max_wal_size */ XLByteToSeg(currpos, currSeg, wal_segment_size); - keepSegs = ConvertToXSegs(Max(max_wal_size_mb, wal_keep_segments), - wal_segment_size) + 1; + keepSegs = ConvertToXSegs(max_wal_size_mb, wal_segment_size) + 1; if (currSeg > keepSegs) oldestSegMaxWalSize = currSeg - keepSegs; @@ -9537,27 +9547,23 @@ GetWALAvailability(XLogRecPtr targetLSN) oldestSegMaxWalSize = 1; /* - * If max_slot_wal_keep_size has changed after the last call, the segment - * that would been kept by the current setting might have been lost by the - * previous setting. No point in showing normal or keeping status values - * if the targetSeg is known to be lost. + * No point in returning reserved or extended status values if the + * targetSeg is known to be lost. */ - if (targetSeg >= oldestSeg) + if (targetSeg >= oldestSlotSeg) { - /* - * show "normal" when targetSeg is within max_wal_size, even if - * max_slot_wal_keep_size is smaller than max_wal_size. - */ - if ((max_slot_wal_keep_size_mb <= 0 || - max_slot_wal_keep_size_mb >= max_wal_size_mb) && - oldestSegMaxWalSize <= targetSeg) - return WALAVAIL_NORMAL; - - /* being retained by slots */ - if (oldestSlotSeg <= targetSeg) + /* show "reserved" when targetSeg is within max_wal_size */ + if (targetSeg >= oldestSegMaxWalSize) return WALAVAIL_RESERVED; + + /* being retained by slots exceeding max_wal_size */ + return WALAVAIL_EXTENDED; } + /* WAL segments are no longer retained but haven't been removed yet */ + if (targetSeg >= oldestSeg) + return WALAVAIL_BEING_REMOVED; + /* Definitely lost */ return WALAVAIL_REMOVED; } diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c index a7bbcf3499..a1786489ad 100644 --- a/src/backend/replication/slot.c +++ b/src/backend/replication/slot.c @@ -288,6 +288,7 @@ ReplicationSlotCreate(const char *name, bool db_specific, slot->candidate_xmin_lsn = InvalidXLogRecPtr; slot->candidate_restart_valid = InvalidXLogRecPtr; slot->candidate_restart_lsn = InvalidXLogRecPtr; + slot->last_invalidated_lsn = InvalidXLogRecPtr; /* * Create the slot on disk. We haven't actually marked the slot allocated @@ -1226,6 +1227,7 @@ restart: (uint32) restart_lsn))); SpinLockAcquire(&s->mutex); + s->last_invalidated_lsn = s->data.restart_lsn; s->data.restart_lsn = InvalidXLogRecPtr; SpinLockRelease(&s->mutex); ReplicationSlotRelease(); diff --git a/src/backend/replication/slotfuncs.c b/src/backend/replication/slotfuncs.c index 06e4955de7..3deccf3448 100644 --- a/src/backend/replication/slotfuncs.c +++ b/src/backend/replication/slotfuncs.c @@ -283,6 +283,7 @@ pg_get_replication_slots(PG_FUNCTION_ARGS) bool nulls[PG_GET_REPLICATION_SLOTS_COLS]; WALAvailability walstate; XLogSegNo last_removed_seg; + XLogRecPtr targetLSN; int i; if (!slot->in_use) @@ -342,7 +343,13 @@ pg_get_replication_slots(PG_FUNCTION_ARGS) else nulls[i++] = true; - walstate = GetWALAvailability(slot_contents.data.restart_lsn); + /* use last_invalidated_lsn when the slot is invalidated */ + if (XLogRecPtrIsInvalid(slot_contents.data.restart_lsn)) + targetLSN = slot_contents.last_invalidated_lsn; + else + targetLSN = slot_contents.data.restart_lsn; + + walstate = GetWALAvailability(targetLSN); switch (walstate) { @@ -350,16 +357,33 @@ pg_get_replication_slots(PG_FUNCTION_ARGS) nulls[i++] = true; break; - case WALAVAIL_NORMAL: - values[i++] = CStringGetTextDatum("normal"); - break; - case WALAVAIL_RESERVED: values[i++] = CStringGetTextDatum("reserved"); break; + case WALAVAIL_EXTENDED: + values[i++] = CStringGetTextDatum("extended"); + break; + + case WALAVAIL_BEING_REMOVED: + values[i++] = CStringGetTextDatum("being lost"); + break; + case WALAVAIL_REMOVED: - values[i++] = CStringGetTextDatum("lost"); + /* + * If the segment that walsender is currently reading has been + * just removed, but the walsender goes into the next segment + * just after, the state goes back to "reserved" or + * "extended". We regard that state as "being lost", rather + * than "lost" since the slot has not definitely dead yet. The + * same can happen when walsender is immediately restarted + * after invalidation. + */ + if (slot_contents.active_pid != 0) + values[i++] = CStringGetTextDatum("being lost"); + else + values[i++] = CStringGetTextDatum("lost"); + break; default: @@ -367,7 +391,7 @@ pg_get_replication_slots(PG_FUNCTION_ARGS) } if (max_slot_wal_keep_size_mb >= 0 && - (walstate == WALAVAIL_NORMAL || walstate == WALAVAIL_RESERVED) && + (walstate == WALAVAIL_RESERVED || walstate == WALAVAIL_EXTENDED) && ((last_removed_seg = XLogGetLastRemovedSegno()) != 0)) { XLogRecPtr min_safe_lsn; diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 347a38f57c..85c1f67e57 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -270,8 +270,9 @@ extern CheckpointStatsData CheckpointStats; typedef enum WALAvailability { WALAVAIL_INVALID_LSN, /* parameter error */ - WALAVAIL_NORMAL, /* WAL segment is within max_wal_size */ - WALAVAIL_RESERVED, /* WAL segment is reserved by a slot */ + WALAVAIL_RESERVED, /* WAL segment is within max_wal_size */ + WALAVAIL_EXTENDED, /* WAL segment is reserved by a slot */ + WALAVAIL_BEING_REMOVED, /* WAL segment is being removed */ WALAVAIL_REMOVED /* WAL segment has been removed */ } WALAvailability; @@ -326,7 +327,7 @@ extern void ShutdownXLOG(int code, Datum arg); extern void InitXLOGAccess(void); extern void CreateCheckPoint(int flags); extern bool CreateRestartPoint(int flags); -extern WALAvailability GetWALAvailability(XLogRecPtr restart_lsn); +extern WALAvailability GetWALAvailability(XLogRecPtr targetLSN); extern XLogRecPtr CalculateMaxmumSafeLSN(void); extern void XLogPutNextOid(Oid nextOid); extern XLogRecPtr XLogRestorePoint(const char *rpName); diff --git a/src/include/replication/slot.h b/src/include/replication/slot.h index 917876010e..8090ca81fe 100644 --- a/src/include/replication/slot.h +++ b/src/include/replication/slot.h @@ -156,6 +156,9 @@ typedef struct ReplicationSlot XLogRecPtr candidate_xmin_lsn; XLogRecPtr candidate_restart_valid; XLogRecPtr candidate_restart_lsn; + + /* restart_lsn is copied here when the slot is invalidated */ + XLogRecPtr last_invalidated_lsn; } ReplicationSlot; #define SlotIsPhysical(slot) ((slot)->data.database == InvalidOid) diff --git a/src/test/recovery/t/019_replslot_limit.pl b/src/test/recovery/t/019_replslot_limit.pl index cba7df920c..ac0059bc7e 100644 --- a/src/test/recovery/t/019_replslot_limit.pl +++ b/src/test/recovery/t/019_replslot_limit.pl @@ -56,7 +56,7 @@ $node_standby->stop; $result = $node_master->safe_psql('postgres', "SELECT wal_status, min_safe_lsn is NULL FROM pg_replication_slots WHERE slot_name = 'rep1'" ); -is($result, "normal|t", 'check the catching-up state'); +is($result, "reserved|t", 'check the catching-up state'); # Advance WAL by five segments (= 5MB) on master advance_wal($node_master, 1); @@ -66,7 +66,7 @@ $node_master->safe_psql('postgres', "CHECKPOINT;"); $result = $node_master->safe_psql('postgres', "SELECT wal_status, min_safe_lsn is NULL FROM pg_replication_slots WHERE slot_name = 'rep1'" ); -is($result, "normal|t", 'check that it is safe if WAL fits in max_wal_size'); +is($result, "reserved|t", 'check that it is safe if WAL fits in max_wal_size'); advance_wal($node_master, 4); $node_master->safe_psql('postgres', "CHECKPOINT;"); @@ -75,7 +75,7 @@ $node_master->safe_psql('postgres', "CHECKPOINT;"); $result = $node_master->safe_psql('postgres', "SELECT wal_status, min_safe_lsn is NULL FROM pg_replication_slots WHERE slot_name = 'rep1'" ); -is($result, "normal|t", 'check that slot is working'); +is($result, "reserved|t", 'check that slot is working'); # The standby can reconnect to master $node_standby->start; @@ -99,7 +99,7 @@ $node_master->reload; $result = $node_master->safe_psql('postgres', "SELECT wal_status FROM pg_replication_slots WHERE slot_name = 'rep1'"); -is($result, "normal", 'check that max_slot_wal_keep_size is working'); +is($result, "reserved", 'check that max_slot_wal_keep_size is working'); # Advance WAL again then checkpoint, reducing remain by 2 MB. advance_wal($node_master, 2); @@ -108,7 +108,7 @@ $node_master->safe_psql('postgres', "CHECKPOINT;"); # The slot is still working $result = $node_master->safe_psql('postgres', "SELECT wal_status FROM pg_replication_slots WHERE slot_name = 'rep1'"); -is($result, "normal", +is($result, "reserved", 'check that min_safe_lsn gets close to the current LSN'); # The standby can reconnect to master @@ -125,7 +125,7 @@ advance_wal($node_master, 6); $result = $node_master->safe_psql('postgres', "SELECT wal_status as remain FROM pg_replication_slots WHERE slot_name = 'rep1'" ); -is($result, "normal", +is($result, "extended", 'check that wal_keep_segments overrides max_slot_wal_keep_size'); # restore wal_keep_segments $result = $node_master->safe_psql('postgres', @@ -138,12 +138,15 @@ $node_master->wait_for_catchup($node_standby, 'replay', $start_lsn); $node_standby->stop; # Advance WAL again without checkpoint, reducing remain by 6 MB. +$result = $node_master->safe_psql('postgres', + "SELECT wal_status, restart_lsn, min_safe_lsn FROM pg_replication_slots WHERE slot_name = 'rep1'"); +print $result, "\n"; advance_wal($node_master, 6); # Slot gets into 'reserved' state $result = $node_master->safe_psql('postgres', "SELECT wal_status FROM pg_replication_slots WHERE slot_name = 'rep1'"); -is($result, "reserved", 'check that the slot state changes to "reserved"'); +is($result, "extended", 'check that the slot state changes to "extended"'); # do checkpoint so that the next checkpoint runs too early $node_master->safe_psql('postgres', "CHECKPOINT;"); @@ -151,11 +154,11 @@ $node_master->safe_psql('postgres', "CHECKPOINT;"); # Advance WAL again without checkpoint; remain goes to 0. advance_wal($node_master, 1); -# Slot gets into 'lost' state +# Slot gets into 'being lost' state $result = $node_master->safe_psql('postgres', "SELECT wal_status, min_safe_lsn is NULL FROM pg_replication_slots WHERE slot_name = 'rep1'" ); -is($result, "lost|t", 'check that the slot state changes to "lost"'); +is($result, "being lost|t", 'check that the slot state changes to "being lost"'); # The standby still can connect to master before a checkpoint $node_standby->start; @@ -186,7 +189,7 @@ ok( find_in_log( $result = $node_master->safe_psql('postgres', "SELECT slot_name, active, restart_lsn IS NULL, wal_status, min_safe_lsn FROM pg_replication_slots WHERE slot_name = 'rep1'" ); -is($result, "rep1|f|t||", 'check that the slot became inactive'); +is($result, "rep1|f|t|lost|", 'check that the slot became inactive and the state "lost" persists'); # The standby no longer can connect to the master $logstart = get_log_size($node_standby); @@ -259,7 +262,7 @@ sub advance_wal for (my $i = 0; $i < $n; $i++) { $node->safe_psql('postgres', - "CREATE TABLE t (); DROP TABLE t; SELECT pg_switch_wal();"); + "CREATE TABLE t (); DROP TABLE t; SELECT pg_switch_wal();"); } return; } -- 2.18.4