From 810b8b881fae4574d654455b757c40c5622d4b6e Mon Sep 17 00:00:00 2001 From: Mark Dilger Date: Wed, 21 Oct 2020 20:26:43 -0700 Subject: [PATCH v20 4/5] Using non-throwing clog interface from amcheck Converting the heap checking functions to use the recently introduced non-throwing interface to clog when checking transaction commit status, and adding corruption reports about missing clog rather than aborting. --- contrib/amcheck/verify_heapam.c | 72 ++++++++----- contrib/pg_amcheck/t/006_clog_truncation.pl | 111 ++++++++++++++++++++ src/tools/pgindent/typedefs.list | 1 - 3 files changed, 154 insertions(+), 30 deletions(-) create mode 100644 contrib/pg_amcheck/t/006_clog_truncation.pl diff --git a/contrib/amcheck/verify_heapam.c b/contrib/amcheck/verify_heapam.c index 0156c1e74a..a42b74ed46 100644 --- a/contrib/amcheck/verify_heapam.c +++ b/contrib/amcheck/verify_heapam.c @@ -10,6 +10,7 @@ */ #include "postgres.h" +#include "access/clogdefs.h" #include "access/detoast.h" #include "access/genam.h" #include "access/heapam.h" @@ -43,13 +44,6 @@ typedef enum XidBoundsViolation XID_BOUNDS_OK } XidBoundsViolation; -typedef enum XidCommitStatus -{ - XID_COMMITTED, - XID_IN_PROGRESS, - XID_ABORTED -} XidCommitStatus; - typedef enum SkipPages { SKIP_PAGES_ALL_FROZEN, @@ -83,7 +77,7 @@ typedef struct HeapCheckContext * Cached copies of the most recently checked xid and its status. */ TransactionId cached_xid; - XidCommitStatus cached_status; + XidStatus cached_status; /* Values concerning the heap relation being checked */ Relation rel; @@ -147,7 +141,7 @@ static XidBoundsViolation check_mxid_valid_in_rel(MultiXactId mxid, HeapCheckContext *ctx); static XidBoundsViolation get_xid_status(TransactionId xid, HeapCheckContext *ctx, - XidCommitStatus *status); + XidStatus *status); /* * Scan and report corruption in heap pages, optionally reconciling toasted @@ -631,7 +625,7 @@ check_tuple_header_and_visibilty(HeapTupleHeader tuphdr, HeapCheckContext *ctx) else if (infomask & HEAP_MOVED_OFF || infomask & HEAP_MOVED_IN) { - XidCommitStatus status; + XidStatus status; TransactionId xvac = HeapTupleHeaderGetXvac(tuphdr); switch (get_xid_status(xvac, ctx, &status)) @@ -666,17 +660,25 @@ check_tuple_header_and_visibilty(HeapTupleHeader tuphdr, HeapCheckContext *ctx) case XID_BOUNDS_OK: switch (status) { - case XID_IN_PROGRESS: + case TRANSACTION_STATUS_IN_PROGRESS: return true; /* HEAPTUPLE_DELETE_IN_PROGRESS */ - case XID_COMMITTED: - case XID_ABORTED: + case TRANSACTION_STATUS_COMMITTED: + case TRANSACTION_STATUS_ABORTED: return false; /* HEAPTUPLE_DEAD */ + case TRANSACTION_STATUS_UNKNOWN: + report_corruption(ctx, + psprintf("old-style VACUUM FULL transaction ID %u transaction status is lost", + xvac)); + return false; /* corruption */ + case TRANSACTION_STATUS_SUB_COMMITTED: + elog(ERROR, "get_xid_status failed to resolve parent transaction status"); + return false; /* not reached */ } } } else { - XidCommitStatus status; + XidStatus status; switch (get_xid_status(raw_xmin, ctx, &status)) { @@ -708,12 +710,20 @@ check_tuple_header_and_visibilty(HeapTupleHeader tuphdr, HeapCheckContext *ctx) case XID_BOUNDS_OK: switch (status) { - case XID_COMMITTED: + case TRANSACTION_STATUS_COMMITTED: break; - case XID_IN_PROGRESS: + case TRANSACTION_STATUS_IN_PROGRESS: return true; /* insert or delete in progress */ - case XID_ABORTED: + case TRANSACTION_STATUS_ABORTED: return false; /* HEAPTUPLE_DEAD */ + case TRANSACTION_STATUS_UNKNOWN: + report_corruption(ctx, + psprintf("raw xmin %u transaction status is lost", + raw_xmin)); + return false; /* corruption */ + case TRANSACTION_STATUS_SUB_COMMITTED: + elog(ERROR, "get_xid_status failed to resolve parent transaction status"); + return false; /* not reached */ } } } @@ -723,7 +733,7 @@ check_tuple_header_and_visibilty(HeapTupleHeader tuphdr, HeapCheckContext *ctx) { if (infomask & HEAP_XMAX_IS_MULTI) { - XidCommitStatus status; + XidStatus status; TransactionId xmax = HeapTupleGetUpdateXid(tuphdr); switch (get_xid_status(xmax, ctx, &status)) @@ -757,12 +767,20 @@ check_tuple_header_and_visibilty(HeapTupleHeader tuphdr, HeapCheckContext *ctx) case XID_BOUNDS_OK: switch (status) { - case XID_IN_PROGRESS: + case TRANSACTION_STATUS_IN_PROGRESS: return true; /* HEAPTUPLE_DELETE_IN_PROGRESS */ - case XID_COMMITTED: - case XID_ABORTED: + case TRANSACTION_STATUS_COMMITTED: + case TRANSACTION_STATUS_ABORTED: return false; /* HEAPTUPLE_RECENTLY_DEAD or * HEAPTUPLE_DEAD */ + case TRANSACTION_STATUS_UNKNOWN: + report_corruption(ctx, + psprintf("xmax %u transaction status is lost", + xmax)); + return false; /* corruption */ + case TRANSACTION_STATUS_SUB_COMMITTED: + elog(ERROR, "get_xid_status failed to resolve parent transaction status"); + return false; /* not reached */ } } @@ -1373,7 +1391,7 @@ check_mxid_valid_in_rel(MultiXactId mxid, HeapCheckContext *ctx) */ static XidBoundsViolation get_xid_status(TransactionId xid, HeapCheckContext *ctx, - XidCommitStatus *status) + XidStatus *status) { XidBoundsViolation result; FullTransactionId fxid; @@ -1424,7 +1442,7 @@ get_xid_status(TransactionId xid, HeapCheckContext *ctx, return result; } - *status = XID_COMMITTED; + *status = TRANSACTION_STATUS_COMMITTED; LWLockAcquire(XactTruncationLock, LW_SHARED); clog_horizon = FullTransactionIdFromXidAndCtx(ShmemVariableCache->oldestClogXid, @@ -1432,13 +1450,9 @@ get_xid_status(TransactionId xid, HeapCheckContext *ctx, if (FullTransactionIdPrecedesOrEquals(clog_horizon, fxid)) { if (TransactionIdIsCurrentTransactionId(xid)) - *status = XID_IN_PROGRESS; - else if (TransactionIdDidCommit(xid)) - *status = XID_COMMITTED; - else if (TransactionIdDidAbort(xid)) - *status = XID_ABORTED; + *status = TRANSACTION_STATUS_IN_PROGRESS; else - *status = XID_IN_PROGRESS; + *status = TransactionIdResolveStatus(xid, false); } LWLockRelease(XactTruncationLock); ctx->cached_xid = xid; diff --git a/contrib/pg_amcheck/t/006_clog_truncation.pl b/contrib/pg_amcheck/t/006_clog_truncation.pl new file mode 100644 index 0000000000..f205ae7ede --- /dev/null +++ b/contrib/pg_amcheck/t/006_clog_truncation.pl @@ -0,0 +1,111 @@ +# This regression test checks the behavior of the heap validation in the +# presence of clog corruption. + +use strict; +use warnings; + +use PostgresNode; +use TestLib; + +use Test::More tests => 3; + +my ($node, $pgdata, $clogdir); + +sub count_clog_files +{ + my $result = 0; + opendir(DIR, $clogdir) or die "Cannot opendir $clogdir: $!"; + while (my $fname = readdir(DIR)) + { + $result++ if (-f "$clogdir/$fname"); + } + closedir(DIR); + return $result; +} + +# Burn through enough xids that at least three clog files exists in pg_xact/ +sub create_three_clog_files +{ + print STDERR "Generating clog entries....\n"; + + $node->safe_psql('postgres', q( + CREATE PROCEDURE burn_xids () + LANGUAGE plpgsql + AS $$ + DECLARE + loopcnt BIGINT; + BEGIN + FOR loopcnt IN 1..32768 + LOOP + PERFORM txid_current(); + COMMIT; + END LOOP; + END; + $$; + )); + + do { + $node->safe_psql('postgres', 'INSERT INTO test_0 (i) VALUES (0)'); + $node->safe_psql('postgres', 'CALL burn_xids()'); + print STDERR "Burned transaction ids...\n"; + $node->safe_psql('postgres', 'INSERT INTO test_1 (i) VALUES (1)'); + } while (count_clog_files() < 3); +} + +# Of the clog files in pg_xact, remove the second one, sorted by name order. +# This function, used along with create_three_clog_files(), is intended to +# remove neither the newest nor the oldest clog file. Experimentation shows +# that removing the newest clog file works ok, but for future-proofing, remove +# one less likely to be checked at server startup. +sub unlink_second_clog_file +{ + my @paths; + opendir(DIR, $clogdir) or die "Cannot opendir $clogdir: $!"; + while (my $fname = readdir(DIR)) + { + my $path = "$clogdir/$fname"; + next unless -f $path; + push @paths, $path; + } + closedir(DIR); + + my @ordered = sort { $a cmp $b } @paths; + unlink $ordered[1]; +} + +# Set umask so test directories and files are created with default permissions +umask(0077); + +# Set up the node. Once we corrupt clog, autovacuum workers visiting tables +# could crash the backend. Disable autovacuum so that won't happen. +$node = get_new_node('test'); +$node->init; +$node->append_conf('postgresql.conf', 'autovacuum=off'); +$pgdata = $node->data_dir; +$clogdir = join('/', $pgdata, 'pg_xact'); +$node->start; +$node->safe_psql('postgres', "CREATE EXTENSION amcheck"); +$node->safe_psql('postgres', "CREATE TABLE test_0 (i INTEGER)"); +$node->safe_psql('postgres', "CREATE TABLE test_1 (i INTEGER)"); +$node->safe_psql('postgres', "VACUUM FREEZE"); + +create_three_clog_files(); + +# Corruptly delete a clog file +$node->stop; +unlink_second_clog_file(); +$node->start; + +my $port = $node->port; + +# Run pg_amcheck against the corrupt database, looking for clog related +# corruption messages +$node->command_checks_all( + ['pg_amcheck', '--check-toast', '--skip-indexes', '-p', $port, 'postgres'], + 0, + [ qr/transaction status is lost/ ], + [ qr/^$/ ], + 'Expected corruption message output'); + +$node->teardown_node; +$node->clean_node; diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 2408bb2bf6..c1144c1a92 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -2796,7 +2796,6 @@ XactCallbackItem XactEvent XactLockTableWaitInfo XidBoundsViolation -XidCommitStatus XidHorizonPrefetchState XidStatus XmlExpr -- 2.21.1 (Apple Git-122.3)