diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h index 51b8f994ac..1a28ee1130 100644 --- a/src/include/storage/bufpage.h +++ b/src/include/storage/bufpage.h @@ -404,26 +404,33 @@ do { \ * extern declarations * ---------------------------------------------------------------- */ + +/* flags for PageAddItemExtended() */ #define PAI_OVERWRITE (1 << 0) #define PAI_IS_HEAP (1 << 1) +/* flags for PageIsVerifiedExtended() */ +#define PIV_LOG_WARNING (1 << 0) +#define PIV_REPORT_STAT (1 << 1) + #define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap) \ PageAddItemExtended(page, item, size, offsetNumber, \ ((overwrite) ? PAI_OVERWRITE : 0) | \ ((is_heap) ? PAI_IS_HEAP : 0)) /* - * Check that BLCKSZ is a multiple of sizeof(size_t). In PageIsVerified(), - * it is much faster to check if a page is full of zeroes using the native - * word size. Note that this assertion is kept within a header to make - * sure that StaticAssertDecl() works across various combinations of - * platforms and compilers. + * Check that BLCKSZ is a multiple of sizeof(size_t). In + * PageIsVerifiedExtended(), it is much faster to check if a page is + * full of zeroes using the native word size. Note that this assertion + * is kept within a header to make sure that StaticAssertDecl() works + * across various combinations of platforms and compilers. */ StaticAssertDecl(BLCKSZ == ((BLCKSZ / sizeof(size_t)) * sizeof(size_t)), "BLCKSZ has to be a multiple of sizeof(size_t)"); extern void PageInit(Page page, Size pageSize, Size specialSize); extern bool PageIsVerified(Page page, BlockNumber blkno); +extern bool PageIsVerifiedExtended(Page page, BlockNumber blkno, int flags); extern OffsetNumber PageAddItemExtended(Page page, Item item, Size size, OffsetNumber offsetNumber, int flags); extern Page PageGetTempPage(Page page); diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c index dbbd3aa31f..d538f25726 100644 --- a/src/backend/catalog/storage.c +++ b/src/backend/catalog/storage.c @@ -443,7 +443,8 @@ RelationCopyStorage(SMgrRelation src, SMgrRelation dst, smgrread(src, forkNum, blkno, buf.data); - if (!PageIsVerified(page, blkno)) + if (!PageIsVerifiedExtended(page, blkno, + PIV_LOG_WARNING | PIV_REPORT_STAT)) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), errmsg("invalid page in block %u of relation %s", diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index b89df01fa7..6f717c8956 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -1569,21 +1569,24 @@ sendFile(const char *readfilename, const char *tarfilename, { int fd; BlockNumber blkno = 0; - bool block_retry = false; + int block_attempts = 0; char buf[TAR_SEND_SIZE]; - uint16 checksum; int checksum_failures = 0; off_t cnt; int i; pgoff_t len = 0; char *page; size_t pad; - PageHeader phdr; int segmentno = 0; char *segmentpath; bool verify_checksum = false; pg_checksum_context checksum_ctx; + /* Maximum number of checksum failures reported for this file */ +#define CHECKSUM_REPORT_THRESHOLD 5 + /* maximum number of retries done for a single page */ +#define PAGE_RETRY_THRESHOLD 5 + pg_checksum_init(&checksum_ctx, manifest->checksum_type); fd = OpenTransientFile(readfilename, O_RDONLY | PG_BINARY); @@ -1661,9 +1664,7 @@ sendFile(const char *readfilename, const char *tarfilename, if (verify_checksum && (cnt % BLCKSZ != 0)) { ereport(WARNING, - (errmsg("could not verify checksum in file \"%s\", block " - "%d: read buffer size %d and page size %d " - "differ", + (errmsg("could not verify checksum in file \"%s\", block %u: read buffer size %d and page size %d differ", readfilename, blkno, (int) cnt, BLCKSZ))); verify_checksum = false; } @@ -1675,78 +1676,75 @@ sendFile(const char *readfilename, const char *tarfilename, page = buf + BLCKSZ * i; /* - * Only check pages which have not been modified since the - * start of the base backup. Otherwise, they might have been - * written only halfway and the checksum would not be valid. - * However, replaying WAL would reinstate the correct page in - * this case. We also skip completely new pages, since they - * don't have a checksum yet. + * Check on-disk pages with the same set of verification + * conditions used before loading pages into shared buffers. + * Note that PageIsVerifiedExtended() considers pages filled + * only with zeros as valid, since they don't have a checksum + * yet. Failures are not reported to pgstat yet, as these are + * reported in a single batch once a file is completed. It + * could be possible, that a page is written halfway while + * doing this check, causing a false positive. If that + * happens, a page is retried once, with an error reported if + * the second attempt also fails. */ - if (!PageIsNew(page) && PageGetLSN(page) < startptr) + if (PageIsVerifiedExtended(page, blkno, 0)) { - checksum = pg_checksum_page((char *) page, blkno + segmentno * RELSEG_SIZE); - phdr = (PageHeader) page; - if (phdr->pd_checksum != checksum) + /* success, move on to the next block */ + blkno++; + block_attempts = 0; + continue; + } + + /* The verification of a page has failed, retry once */ + if (block_attempts < PAGE_RETRY_THRESHOLD) + { + int reread_cnt; + + /* Reread the failed block */ + reread_cnt = + basebackup_read_file(fd, buf + BLCKSZ * i, + BLCKSZ, len + BLCKSZ * i, + readfilename, + false); + if (reread_cnt == 0) { /* - * Retry the block on the first failure. It's - * possible that we read the first 4K page of the - * block just before postgres updated the entire block - * so it ends up looking torn to us. We only need to - * retry once because the LSN should be updated to - * something we can ignore on the next pass. If the - * error happens again then it is a true validation - * failure. + * If we hit end-of-file, a concurrent truncation must + * have occurred, so break out of this loop just as if + * the initial fread() returned 0. We'll drop through + * to the same code that handles that case. (We must + * fix up cnt first, though.) */ - if (block_retry == false) - { - int reread_cnt; - - /* Reread the failed block */ - reread_cnt = - basebackup_read_file(fd, buf + BLCKSZ * i, - BLCKSZ, len + BLCKSZ * i, - readfilename, - false); - if (reread_cnt == 0) - { - /* - * If we hit end-of-file, a concurrent - * truncation must have occurred, so break out - * of this loop just as if the initial fread() - * returned 0. We'll drop through to the same - * code that handles that case. (We must fix - * up cnt first, though.) - */ - cnt = BLCKSZ * i; - break; - } - - /* Set flag so we know a retry was attempted */ - block_retry = true; - - /* Reset loop to validate the block again */ - i--; - continue; - } - - checksum_failures++; - - if (checksum_failures <= 5) - ereport(WARNING, - (errmsg("checksum verification failed in " - "file \"%s\", block %d: calculated " - "%X but expected %X", - readfilename, blkno, checksum, - phdr->pd_checksum))); - if (checksum_failures == 5) - ereport(WARNING, - (errmsg("further checksum verification " - "failures in file \"%s\" will not " - "be reported", readfilename))); + cnt = BLCKSZ * i; + break; } + + /* Set flag so we know a retry was attempted */ + block_attempts++; + + /* + * Wait for 100 ms to give some room to any parallel page + * write that may have caused this retry to finish. + */ + pg_usleep(100000L); + + /* Reset loop to validate the block again */ + i--; + continue; } - block_retry = false; + + checksum_failures++; + if (checksum_failures <= CHECKSUM_REPORT_THRESHOLD) + ereport(WARNING, + (errmsg("checksum verification failed in block %u of file \"%s\"", + blkno, readfilename))); + if (checksum_failures == CHECKSUM_REPORT_THRESHOLD) + ereport(WARNING, + (errmsg("further checksum verification failures in file \"%s\" will not be reported", + readfilename))); + + /* move to next block */ + block_attempts = 0; blkno++; } } diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index e549fa1d30..3eee86afe5 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -625,7 +625,8 @@ ReadBuffer(Relation reln, BlockNumber blockNum) * * In RBM_NORMAL mode, the page is read from disk, and the page header is * validated. An error is thrown if the page header is not valid. (But - * note that an all-zero page is considered "valid"; see PageIsVerified().) + * note that an all-zero page is considered "valid"; see + * PageIsVerifiedExtended().) * * RBM_ZERO_ON_ERROR is like the normal mode, but if the page header is not * valid, the page is zeroed instead of throwing an error. This is intended @@ -917,7 +918,8 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, } /* check for garbage data */ - if (!PageIsVerified((Page) bufBlock, blockNum)) + if (!PageIsVerifiedExtended((Page) bufBlock, blockNum, + PIV_LOG_WARNING | PIV_REPORT_STAT)) { if (mode == RBM_ZERO_ON_ERROR || zero_damaged_pages) { diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c index 4bc2bf955d..3a27dff04d 100644 --- a/src/backend/storage/page/bufpage.c +++ b/src/backend/storage/page/bufpage.c @@ -62,6 +62,18 @@ PageInit(Page page, Size pageSize, Size specialSize) /* * PageIsVerified + * Utility wrapper for PageIsVerifiedExtended(). + */ +bool +PageIsVerified(Page page, BlockNumber blkno) +{ + return PageIsVerifiedExtended(page, blkno, + PIV_LOG_WARNING | PIV_REPORT_STAT); +} + + +/* + * PageIsVerifiedExtended * Check that the page header and checksum (if any) appear valid. * * This is called when a page has just been read in from disk. The idea is @@ -77,9 +89,15 @@ PageInit(Page page, Size pageSize, Size specialSize) * allow zeroed pages here, and are careful that the page access macros * treat such a page as empty and without free space. Eventually, VACUUM * will clean up such a page and make it usable. + * + * If flag PIV_LOG_WARNING is set, a WARNING is logged in the event of + * a checksum failure. + * + * If flag PIV_REPORT_STAT is set, a checksum failure is reported directly + * to pgstat. */ bool -PageIsVerified(Page page, BlockNumber blkno) +PageIsVerifiedExtended(Page page, BlockNumber blkno, int flags) { PageHeader p = (PageHeader) page; size_t *pagebytes; @@ -140,12 +158,14 @@ PageIsVerified(Page page, BlockNumber blkno) */ if (checksum_failure) { - ereport(WARNING, - (errcode(ERRCODE_DATA_CORRUPTED), - errmsg("page verification failed, calculated checksum %u but expected %u", - checksum, p->pd_checksum))); + if ((flags & PIV_LOG_WARNING) != 0) + ereport(WARNING, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("page verification failed, calculated checksum %u but expected %u", + checksum, p->pd_checksum))); - pgstat_report_checksum_failure(); + if ((flags & PIV_REPORT_STAT) != 0) + pgstat_report_checksum_failure(); if (header_sane && ignore_checksum_failure) return true; diff --git a/src/bin/pg_basebackup/t/010_pg_basebackup.pl b/src/bin/pg_basebackup/t/010_pg_basebackup.pl index f674a7c94e..a46a9ece42 100644 --- a/src/bin/pg_basebackup/t/010_pg_basebackup.pl +++ b/src/bin/pg_basebackup/t/010_pg_basebackup.pl @@ -6,7 +6,7 @@ use File::Basename qw(basename dirname); use File::Path qw(rmtree); use PostgresNode; use TestLib; -use Test::More tests => 109; +use Test::More tests => 112; program_help_ok('pg_basebackup'); program_version_ok('pg_basebackup'); @@ -528,6 +528,24 @@ $node->command_checks_all( 'pg_basebackup reports checksum mismatch'); rmtree("$tempdir/backup_corrupt"); +# Create a new type of corruption and zero out one page header +# completely. +open $file, '+<', "$pgdata/$file_corrupt1"; +seek($file, $pageheader_size, 0); +system_or_bail 'pg_ctl', '-D', $pgdata, 'stop'; +my $zero_data = '\0' x $pageheader_size; +syswrite($file, $zero_data); +close $file; +system_or_bail 'pg_ctl', '-D', $pgdata, 'start'; + +$node->command_checks_all( + [ 'pg_basebackup', '-D', "$tempdir/backup_corrupt1a" ], + 1, + [qr{^$}], + [qr/^WARNING.*checksum verification failed/s], + "pg_basebackup reports checksum mismatch for zeroed page headers"); +rmtree("$tempdir/backup_corrupt1a"); + # induce further corruption in 5 more blocks system_or_bail 'pg_ctl', '-D', $pgdata, 'stop'; open $file, '+<', "$pgdata/$file_corrupt1";