diff --git a/contrib/worker_spi/worker_spi.c b/contrib/worker_spi/worker_spi.c index 328c722..a9e4d43 100644 --- a/contrib/worker_spi/worker_spi.c +++ b/contrib/worker_spi/worker_spi.c @@ -180,6 +180,13 @@ worker_spi_main(Datum main_arg) /* We're now ready to receive signals */ BackgroundWorkerUnblockSignals(); + /* + * Allocate memory to store compressed and uncompressed backup blocks + * This comes of use if backup blocks are to be compressed + * at the time of writing FPW in WAL. + */ + CompressBackupBlocksPagesAlloc(); + /* Connect to our database */ BackgroundWorkerInitializeConnection("postgres", NULL); @@ -243,6 +250,13 @@ worker_spi_main(Datum main_arg) { got_sighup = false; ProcessConfigFile(PGC_SIGHUP); + + /* + * Allocate memory to store compressed and uncompressed backup blocks + * This comes of use if backup blocks need to be compressed + * at the time of writing FPW in WAL. + */ + CompressBackupBlocksPagesAlloc(); } /* diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 47b1192..f5f8cbe 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -2181,14 +2181,14 @@ include_dir 'conf.d' - full_page_writes (boolean) + full_page_writes (enum) full_page_writes configuration parameter - - When this parameter is on, the PostgreSQL server + When this parameter is on or compress, + the PostgreSQL server writes the entire content of each disk page to WAL during the first modification of that page after a checkpoint. This is needed because @@ -2206,6 +2206,11 @@ include_dir 'conf.d' + Valid values are on, compress, and off. + The default is on. + + + Turning this parameter off speeds normal operation, but might lead to either unrecoverable data corruption, or silent data corruption, after a system failure. The risks are similar to turning off @@ -2220,9 +2225,13 @@ include_dir 'conf.d' + Setting this parameter to compress compresses + the full page image to reduce the amount of WAL data. + + + This parameter can only be set in the postgresql.conf file or on the server command line. - The default is on. diff --git a/src/backend/access/rmgrdesc/xlogdesc.c b/src/backend/access/rmgrdesc/xlogdesc.c index e0957ff..2e59db6 100644 --- a/src/backend/access/rmgrdesc/xlogdesc.c +++ b/src/backend/access/rmgrdesc/xlogdesc.c @@ -31,6 +31,21 @@ const struct config_enum_entry wal_level_options[] = { {NULL, 0, false} }; +static const char * +full_page_writes_str(FullPageWritesLevel level) +{ + switch (level) + { + case FULL_PAGE_WRITES_ON: + return "true"; + case FULL_PAGE_WRITES_COMPRESS: + return "compress"; + case FULL_PAGE_WRITES_OFF: + return "false"; + } + return "unrecognized"; +} + void xlog_desc(StringInfo buf, XLogRecord *record) { @@ -49,7 +64,7 @@ xlog_desc(StringInfo buf, XLogRecord *record) (uint32) (checkpoint->redo >> 32), (uint32) checkpoint->redo, checkpoint->ThisTimeLineID, checkpoint->PrevTimeLineID, - checkpoint->fullPageWrites ? "true" : "false", + full_page_writes_str(checkpoint->fullPageWrites), checkpoint->nextXidEpoch, checkpoint->nextXid, checkpoint->nextOid, checkpoint->nextMulti, @@ -118,10 +133,10 @@ xlog_desc(StringInfo buf, XLogRecord *record) } else if (info == XLOG_FPW_CHANGE) { - bool fpw; + int fpw; - memcpy(&fpw, rec, sizeof(bool)); - appendStringInfo(buf, "%s", fpw ? "true" : "false"); + memcpy(&fpw, rec, sizeof(int)); + appendStringInfo(buf, "full_page_writes: %s", full_page_writes_str(fpw)); } else if (info == XLOG_END_OF_RECOVERY) { diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 99f702c..62412fc 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -62,6 +62,7 @@ #include "utils/builtins.h" #include "utils/guc.h" #include "utils/memutils.h" +#include "utils/pg_lzcompress.h" #include "utils/ps_status.h" #include "utils/relmapper.h" #include "utils/snapmgr.h" @@ -85,7 +86,7 @@ int XLogArchiveTimeout = 0; bool XLogArchiveMode = false; char *XLogArchiveCommand = NULL; bool EnableHotStandby = false; -bool fullPageWrites = true; +int fullPageWrites = FULL_PAGE_WRITES_ON; bool wal_log_hints = false; bool log_checkpoints = false; int sync_method = DEFAULT_SYNC_METHOD; @@ -179,7 +180,7 @@ static TimeLineID receiveTLI = 0; * that the recovery starting checkpoint record indicates, and then updated * each time XLOG_FPW_CHANGE record is replayed. */ -static bool lastFullPageWrites; +static int lastFullPageWrites; /* * Local copy of SharedRecoveryInProgress variable. True actually means "not @@ -464,7 +465,7 @@ typedef struct XLogCtlInsert */ XLogRecPtr RedoRecPtr; /* current redo point for insertions */ bool forcePageWrites; /* forcing full-page writes for PITR? */ - bool fullPageWrites; + int fullPageWrites; /* * exclusiveBackup is true if a backup started with pg_start_backup() is @@ -947,7 +948,7 @@ XLogInsertRecord(XLogRecData *rdata, XLogRecPtr fpw_lsn) Assert(RedoRecPtr < Insert->RedoRecPtr); RedoRecPtr = Insert->RedoRecPtr; } - doPageWrites = (Insert->fullPageWrites || Insert->forcePageWrites); + doPageWrites = (Insert->fullPageWrites != FULL_PAGE_WRITES_OFF || Insert->forcePageWrites); if (fpw_lsn != InvalidXLogRecPtr && fpw_lsn <= RedoRecPtr && doPageWrites) { @@ -5588,6 +5589,7 @@ StartupXLOG(void) (errmsg("database system was interrupted; last known up at %s", str_time(ControlFile->time)))); + /* This is just to allow attaching to startup process with a debugger */ #ifdef XLOG_REPLAY_DELAY if (ControlFile->state != DB_SHUTDOWNED) @@ -5601,6 +5603,9 @@ StartupXLOG(void) */ ValidateXLOGDirectoryStructure(); + /* Allocate memory to store compressed and uncompressed backup blocks */ + CompressBackupBlocksPagesAlloc(); + /* * Clear out any old relcache cache files. This is *necessary* if we do * any WAL replay, since that would probably result in the cache files @@ -7192,6 +7197,9 @@ InitXLOGAccess(void) (void) GetRedoRecPtr(); /* Also update our copy of doPageWrites. */ doPageWrites = (Insert->fullPageWrites || Insert->forcePageWrites); + + /* Allocate memory to store compressed backup blocks */ + CompressBackupBlocksPagesAlloc(); } /* @@ -7235,6 +7243,15 @@ GetFullPageWriteInfo(XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p) } /* + * Return value of full page writes GUC from shared memory + */ +void +GetFullPageWriteGUC(int *fpw) +{ + XLogCtlInsert *Insert = &XLogCtl->Insert; + *fpw = Insert->fullPageWrites; +} +/* * GetInsertRecPtr -- Returns the current insert position. * * NOTE: The value *actually* returned is the position of the last full @@ -8471,10 +8488,10 @@ UpdateFullPageWrites(void) * setting it to false, first write the WAL record and then set the global * flag. */ - if (fullPageWrites) + if (fullPageWrites != FULL_PAGE_WRITES_OFF) { WALInsertLockAcquireExclusive(); - Insert->fullPageWrites = true; + Insert->fullPageWrites = fullPageWrites; WALInsertLockRelease(); } @@ -8487,17 +8504,17 @@ UpdateFullPageWrites(void) XLogRecData rdata; rdata.data = (char *) (&fullPageWrites); - rdata.len = sizeof(bool); + rdata.len = sizeof(int); rdata.buffer = InvalidBuffer; rdata.next = NULL; XLogInsert(RM_XLOG_ID, XLOG_FPW_CHANGE, &rdata); } - if (!fullPageWrites) + if (fullPageWrites == FULL_PAGE_WRITES_OFF) { WALInsertLockAcquireExclusive(); - Insert->fullPageWrites = false; + Insert->fullPageWrites = fullPageWrites; WALInsertLockRelease(); } END_CRIT_SECTION(); @@ -8840,16 +8857,16 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record) } else if (info == XLOG_FPW_CHANGE) { - bool fpw; + int fpw; - memcpy(&fpw, XLogRecGetData(record), sizeof(bool)); + memcpy(&fpw, XLogRecGetData(record), sizeof(int)); /* * Update the LSN of the last replayed XLOG_FPW_CHANGE record so that * do_pg_start_backup() and do_pg_stop_backup() can check whether * full_page_writes has been disabled during online backup. */ - if (!fpw) + if (fpw == FULL_PAGE_WRITES_OFF) { SpinLockAcquire(&XLogCtl->info_lck); if (XLogCtl->lastFpwDisableRecPtr < ReadRecPtr) @@ -9192,7 +9209,7 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, do { - bool checkpointfpw; + int checkpointfpw; /* * Force a CHECKPOINT. Aside from being necessary to prevent torn @@ -9241,7 +9258,7 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, recptr = XLogCtl->lastFpwDisableRecPtr; SpinLockRelease(&XLogCtl->info_lck); - if (!checkpointfpw || startpoint <= recptr) + if (checkpointfpw == FULL_PAGE_WRITES_OFF || startpoint <= recptr) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("WAL generated with full_page_writes=off was replayed " diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c index b83343b..2b7be7a 100644 --- a/src/backend/access/transam/xloginsert.c +++ b/src/backend/access/transam/xloginsert.c @@ -23,6 +23,7 @@ #include "storage/proc.h" #include "utils/memutils.h" #include "pg_trace.h" +#include "utils/pg_lzcompress.h" static XLogRecData *XLogRecordAssemble(RmgrId rmid, uint8 info, XLogRecData *rdata, @@ -30,6 +31,14 @@ static XLogRecData *XLogRecordAssemble(RmgrId rmid, uint8 info, XLogRecPtr *fpw_lsn, XLogRecData **rdt_lastnormal); static void XLogFillBkpBlock(Buffer buffer, bool buffer_std, BkpBlock *bkpb); +static char *CompressBackupBlocks(char *page, uint32 orig_len, char *dest, uint32 *len); +void CompressBackupBlocksPagesAlloc(void); + +/* For storing backup blocks before and after compression */ +static char *compressedPages; +static char *uncompressedPages; +static bool outOfMem = 0; + /* * Insert an XLOG record having the specified RMID and info bytes, * with the body of the record being the data chunk(s) described by @@ -146,6 +155,12 @@ XLogRecordAssemble(RmgrId rmid, uint8 info, XLogRecData *rdata, total_len; unsigned i; + int fpw; + char *compressed_blocks; + uint32 compressed_len = 0; + uint32 orig_len = 0; + bool compressed = 0; + BkpBlock *bkpb; /* * These need to be static because they are returned to the caller as part * of the XLogRecData chain. @@ -188,6 +203,7 @@ XLogRecordAssemble(RmgrId rmid, uint8 info, XLogRecData *rdata, *fpw_lsn = InvalidXLogRecPtr; len = 0; + GetFullPageWriteGUC(&fpw); for (rdt = rdata;;) { if (rdt->buffer == InvalidBuffer) @@ -333,6 +349,49 @@ XLogRecordAssemble(RmgrId rmid, uint8 info, XLogRecData *rdata, } /* + * If compression is set on replace the rdata nodes of backup blocks added in the loop + * above by single rdata node that contains compressed backup blocks and their headers + * except the header of first block which is used to store the information about compression. + */ + if ((fpw == FULL_PAGE_WRITES_OFF || fpw == FULL_PAGE_WRITES_COMPRESS) && !outOfMem && (*rdt_lastnormal)->next != NULL) + { + rdt = (*rdt_lastnormal)->next; + rdt = rdt->next; + for (; rdt != NULL; rdt = rdt->next) + { + memcpy(uncompressedPages + orig_len, rdt->data, rdt->len); + orig_len += rdt->len; + } + if(orig_len) + { + /* Compress the backup blocks before including it in rdata chain */ + compressed_blocks = CompressBackupBlocks(uncompressedPages, orig_len, + compressedPages, &(compressed_len)); + if (compressed_blocks != NULL) + { + /* + * total_len is the length of compressed block and its varlena + * header + */ + rdt = ((*rdt_lastnormal)->next)->next; + rdt->data = compressed_blocks; + rdt->len = compressed_len; + total_len = SizeOfXLogRecord + len; + total_len = total_len + sizeof(BkpBlock); + total_len += rdt->len; + rdt->next = NULL; + compressed = 1; + } + } + /* Adding information about compression in backup block header */ + bkpb = (BkpBlock *)(*rdt_lastnormal)->next->data; + if (!compressed) + bkpb->flag_compress = BKPBLOCKS_UNCOMPRESSED; + else + bkpb->flag_compress = BKPBLOCKS_COMPRESSED; + } + + /* * We disallow len == 0 because it provides a useful bit of extra error * checking in ReadRecord. This means that all callers of XLogInsert * must supply at least some not-in-a-buffer data. However, we make an @@ -630,4 +689,82 @@ XLogFillBkpBlock(Buffer buffer, bool buffer_std, BkpBlock *bkpb) bkpb->hole_offset = 0; bkpb->hole_length = 0; } + bkpb->flag_compress = BKPBLOCKS_UNCOMPRESSED; +} + +/* + * Create a compressed version of a backup block + * If successful, return a compressed result and set 'len' to its length. + * Otherwise (ie, compressed result is actually bigger than original), + * return NULL. + */ +static char * +CompressBackupBlocks(char *page, uint32 orig_len, char *dest, uint32 *len) +{ + struct varlena *buf = (struct varlena *) dest; + bool ret; + ret = pglz_compress(page, BLCKSZ, + (PGLZ_Header *) buf, PGLZ_strategy_default); + + /* Zero is returned for incompressible data */ + if(!ret) + return NULL; + /* + * We recheck the actual size even if pglz_compress() report success, + * because it might be satisfied with having saved as little as one byte + * in the compressed data --- which could turn into a net loss once you + * consider header and alignment padding. Worst case, the compressed + * format might require three padding bytes (plus header, which is + * included in VARSIZE(buf)), whereas the uncompressed format would take + * only one header byte and no padding if the value is short enough. So + * we insist on a savings of more than 2 bytes to ensure we have a gain. + */ + if(VARSIZE(buf) >= orig_len - 2) + { + return NULL; + } + *len = VARSIZE(buf); + return (char *) buf; +} + +/* + * Allocate pages to store compressed backup blocks once per backend. + * Size of pages depend on the compression algorithm used. These pages + * persist till the end of the backend process. If memory allocation + * fails we disable compression of backup blocks entirely. + */ +void +CompressBackupBlocksPagesAlloc(void) +{ + /* + * Freeing the memory used for compression + * if full_page_writes GUC is changed to 'on' at runtime + */ + if (fullPageWrites == FULL_PAGE_WRITES_ON && compressedPages != NULL) + { + free(compressedPages); + compressedPages = NULL; + } + if (fullPageWrites == FULL_PAGE_WRITES_ON && uncompressedPages != NULL) + { + free(uncompressedPages); + uncompressedPages = NULL; + } + + if (fullPageWrites != FULL_PAGE_WRITES_ON && + compressedPages == NULL) + { + size_t buffer_size = VARHDRSZ; + buffer_size += PGLZ_MAX_OUTPUT(XLR_TOTAL_BLCKSZ); + compressedPages = (char *) malloc(buffer_size); + if (compressedPages == NULL) + outOfMem = 1; + } + if (fullPageWrites != FULL_PAGE_WRITES_ON && + uncompressedPages == NULL) + { + uncompressedPages = (char *)malloc(XLR_TOTAL_BLCKSZ); + if (uncompressedPages == NULL) + outOfMem = 1; + } } diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c index 7d573cc..96a098a 100644 --- a/src/backend/access/transam/xlogreader.c +++ b/src/backend/access/transam/xlogreader.c @@ -665,6 +665,7 @@ ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr, * record's header, which means in particular that xl_tot_len is at least * SizeOfXlogRecord, so it is safe to fetch xl_len. */ + static bool ValidXLogRecord(XLogReaderState *state, XLogRecord *record, XLogRecPtr recptr) { @@ -674,9 +675,11 @@ ValidXLogRecord(XLogReaderState *state, XLogRecord *record, XLogRecPtr recptr) BkpBlock bkpb; char *blk; size_t remaining = record->xl_tot_len; + struct varlena *tmp; + uint32 b_tot_len; /* First the rmgr data */ - if (remaining < SizeOfXLogRecord + len) + if (remaining < SizeOfXLogRecord + len) { /* ValidXLogRecordHeader() should've caught this already... */ report_invalid_record(state, "invalid record length at %X/%X", @@ -689,52 +692,82 @@ ValidXLogRecord(XLogReaderState *state, XLogRecord *record, XLogRecPtr recptr) /* Add in the backup blocks, if any */ blk = (char *) XLogRecGetData(record) + len; - for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++) + if (remaining != 0) { - uint32 blen; - - if (!(record->xl_info & XLR_BKP_BLOCK(i))) - continue; - if (remaining < sizeof(BkpBlock)) { - report_invalid_record(state, - "invalid backup block size in record at %X/%X", - (uint32) (recptr >> 32), (uint32) recptr); + report_invalid_record(state,"invalid backup block size in record at %X/%X", + (uint32) (recptr >> 32), (uint32) recptr); return false; } memcpy(&bkpb, blk, sizeof(BkpBlock)); - if (bkpb.hole_offset + bkpb.hole_length > BLCKSZ) + if (bkpb.flag_compress == BKPBLOCKS_UNCOMPRESSED) { - report_invalid_record(state, - "incorrect hole size in record at %X/%X", - (uint32) (recptr >> 32), (uint32) recptr); - return false; + for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++) + { + uint32 blen; + + if (!(record->xl_info & XLR_BKP_BLOCK(i))) + continue; + + if (remaining < sizeof(BkpBlock)) + { + report_invalid_record(state, + "invalid backup block size in record at %X/%X", + (uint32) (recptr >> 32), (uint32) recptr); + return false; + } + memcpy(&bkpb, blk, sizeof(BkpBlock)); + + if (bkpb.hole_offset + bkpb.hole_length > BLCKSZ) + { + report_invalid_record(state, + "incorrect hole size in record at %X/%X", + (uint32) (recptr >> 32), (uint32) recptr); + return false; + } + + blen = sizeof(BkpBlock) + BLCKSZ - bkpb.hole_length; + + if (remaining < blen) + { + report_invalid_record(state, + "invalid backup block size in record at %X/%X", + (uint32) (recptr >> 32), (uint32) recptr); + return false; + } + remaining -= blen; + COMP_CRC32C(crc, blk, blen); + blk += blen; + } + /* Check that xl_tot_len agrees with our calculation */ + if (remaining != 0) + { + report_invalid_record(state, + "incorrect total length in record at %X/%X", + (uint32) (recptr >> 32), (uint32) recptr); + return false; + } } - blen = sizeof(BkpBlock) + BLCKSZ - bkpb.hole_length; - - if (remaining < blen) + else { - report_invalid_record(state, - "invalid backup block size in record at %X/%X", - (uint32) (recptr >> 32), (uint32) recptr); - return false; - } - remaining -= blen; - COMP_CRC32C(crc, blk, blen); - blk += blen; - } + tmp = blk + sizeof(BkpBlock); + b_tot_len = VARSIZE(tmp); - /* Check that xl_tot_len agrees with our calculation */ - if (remaining != 0) - { - report_invalid_record(state, - "incorrect total length in record at %X/%X", - (uint32) (recptr >> 32), (uint32) recptr); - return false; + /* + * Check to ensure that the total length of compressed blocks stored as varlena + * agrees with the xl_tot_len stored in XLogRecord + */ + if ((remaining - sizeof(BkpBlock)) != b_tot_len) + { + report_invalid_record(state,"invalid backup block size in record at %X/%X", + (uint32) (recptr >> 32), (uint32) recptr); + return false; + } + COMP_CRC32C(crc, blk, remaining); + } } - /* Finally include the record header */ COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc)); FIN_CRC32C(crc); diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c index 1a21dac..7efe5f5 100644 --- a/src/backend/access/transam/xlogutils.c +++ b/src/backend/access/transam/xlogutils.c @@ -24,6 +24,7 @@ #include "utils/guc.h" #include "utils/hsearch.h" #include "utils/rel.h" +#include "utils/pg_lzcompress.h" /* @@ -495,17 +496,28 @@ RestoreBackupBlock(XLogRecPtr lsn, XLogRecord *record, int block_index, BkpBlock bkpb; char *blk; int i; + char *uncompressedPages; + + uncompressedPages = (char *)palloc(XLR_TOTAL_BLCKSZ); /* Locate requested BkpBlock in the record */ blk = (char *) XLogRecGetData(record) + record->xl_len; + + memcpy(&bkpb, blk, sizeof(BkpBlock)); + blk = blk + sizeof(BkpBlock); + + /* Check if blocks in WAL record are compressed */ + if (bkpb.flag_compress == BKPBLOCKS_COMPRESSED) + { + /* Checks to see if decompression is successful is made inside the function */ + pglz_decompress((PGLZ_Header *) blk, uncompressedPages); + blk = uncompressedPages; + } for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++) { if (!(record->xl_info & XLR_BKP_BLOCK(i))) continue; - memcpy(&bkpb, blk, sizeof(BkpBlock)); - blk += sizeof(BkpBlock); - if (i == block_index) { /* Found it, apply the update */ @@ -514,6 +526,8 @@ RestoreBackupBlock(XLogRecPtr lsn, XLogRecord *record, int block_index, } blk += BLCKSZ - bkpb.hole_length; + memcpy(&bkpb, blk, sizeof(BkpBlock)); + blk += sizeof(BkpBlock); } /* Caller specified a bogus block_index */ diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 1d6e3f3..87ea741 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -633,7 +633,6 @@ AutoVacLauncherMain(int argc, char *argv[]) { got_SIGHUP = false; ProcessConfigFile(PGC_SIGHUP); - /* shutdown requested in config file? */ if (!AutoVacuumingActive()) break; @@ -1395,7 +1394,25 @@ avl_sigterm_handler(SIGNAL_ARGS) errno = save_errno; } +/* + * Reload config file on SIGHUP and allocate/deallocate memory for compression of FPW + * depending on the value of fpw GUC + */ +static void +avw_sighup_handler(SIGNAL_ARGS) +{ + int save_errno = errno; + ProcessConfigFile(PGC_SIGHUP); + /* + * Allocate memory to store compressed and uncompressed backup blocks + * This comes of use at the time of writing FPW in WAL if backup blocks + * are to be compressed + */ + CompressBackupBlocksPagesAlloc(); + + errno = save_errno; +} /******************************************************************** * AUTOVACUUM WORKER CODE ********************************************************************/ @@ -1516,7 +1533,7 @@ AutoVacWorkerMain(int argc, char *argv[]) * Currently, we don't pay attention to postgresql.conf changes that * happen during a single daemon iteration, so we can ignore SIGHUP. */ - pqsignal(SIGHUP, SIG_IGN); + pqsignal(SIGHUP, avw_sighup_handler); /* * SIGINT is used to signal canceling the current table's vacuum; SIGTERM diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 61f17bf..7b89303 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -199,6 +199,9 @@ static void drop_unnamed_stmt(void); static void SigHupHandler(SIGNAL_ARGS); static void log_disconnections(int code, Datum arg); +/*----------------------------------------------------------------- + */ +extern void CompressBackupBlocksPagesAlloc(); /* ---------------------------------------------------------------- * routines to obtain user input @@ -3983,6 +3986,8 @@ PostgresMain(int argc, char *argv[], { got_SIGHUP = false; ProcessConfigFile(PGC_SIGHUP); + /* Allocate memory to store compressed and uncompressed backup blocks */ + CompressBackupBlocksPagesAlloc(); } /* diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index aca4243..1073f47 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -418,6 +418,23 @@ static const struct config_enum_entry row_security_options[] = { }; /* + * Although only "on", "off", and "compress" are documented, we + * accept all the likely variants of "on" and "off". + */ +static const struct config_enum_entry full_page_writes_options[] = { + {"compress", FULL_PAGE_WRITES_COMPRESS, false}, + {"on", FULL_PAGE_WRITES_ON, false}, + {"off", FULL_PAGE_WRITES_OFF, false}, + {"true", FULL_PAGE_WRITES_ON, true}, + {"false", FULL_PAGE_WRITES_OFF, true}, + {"yes", FULL_PAGE_WRITES_ON, true}, + {"no", FULL_PAGE_WRITES_OFF, true}, + {"1", FULL_PAGE_WRITES_ON, true}, + {"0", FULL_PAGE_WRITES_OFF, true}, + {NULL, 0, false} +}; + +/* * Options for enum values stored in other modules */ extern const struct config_enum_entry wal_level_options[]; @@ -893,20 +910,6 @@ static struct config_bool ConfigureNamesBool[] = NULL, NULL, NULL }, { - {"full_page_writes", PGC_SIGHUP, WAL_SETTINGS, - gettext_noop("Writes full pages to WAL when first modified after a checkpoint."), - gettext_noop("A page write in process during an operating system crash might be " - "only partially written to disk. During recovery, the row changes " - "stored in WAL are not enough to recover. This option writes " - "pages when first modified after a checkpoint to WAL so full recovery " - "is possible.") - }, - &fullPageWrites, - true, - NULL, NULL, NULL - }, - - { {"wal_log_hints", PGC_POSTMASTER, WAL_SETTINGS, gettext_noop("Writes full pages to WAL when first modified after a checkpoint, even for a non-critical modifications"), NULL @@ -3423,6 +3426,20 @@ static struct config_enum ConfigureNamesEnum[] = }, { + {"full_page_writes", PGC_SIGHUP, WAL_SETTINGS, + gettext_noop("Writes full pages to WAL when first modified after a checkpoint."), + gettext_noop("A page write in process during an operating system crash might be " + "only partially written to disk. During recovery, the row changes " + "stored in WAL are not enough to recover. This option writes " + "pages when first modified after a checkpoint to WAL so full recovery " + "is possible.") + }, + &fullPageWrites, + FULL_PAGE_WRITES_ON, full_page_writes_options, + NULL, NULL, NULL + }, + + { {"trace_recovery_messages", PGC_SIGHUP, DEVELOPER_OPTIONS, gettext_noop("Enables logging of recovery-related debugging information."), gettext_noop("Each level includes all the levels that follow it. The later" diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index dac6776..9f51e30 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -185,7 +185,8 @@ # fsync # fsync_writethrough # open_sync -#full_page_writes = on # recover from partial page writes +#full_page_writes = on # recover from partial page writes; + # off, compress, or on #wal_log_hints = off # also do full page writes of non-critical updates #wal_buffers = -1 # min 32kB, -1 sets based on shared_buffers # (change requires restart) diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c index b2e0793..e250ee0 100644 --- a/src/bin/pg_controldata/pg_controldata.c +++ b/src/bin/pg_controldata/pg_controldata.c @@ -250,7 +250,7 @@ main(int argc, char *argv[]) printf(_("Latest checkpoint's PrevTimeLineID: %u\n"), ControlFile.checkPointCopy.PrevTimeLineID); printf(_("Latest checkpoint's full_page_writes: %s\n"), - ControlFile.checkPointCopy.fullPageWrites ? _("on") : _("off")); + FullPageWritesStr(ControlFile.checkPointCopy.fullPageWrites)); printf(_("Latest checkpoint's NextXID: %u/%u\n"), ControlFile.checkPointCopy.nextXidEpoch, ControlFile.checkPointCopy.nextXid); diff --git a/src/bin/pg_resetxlog/pg_resetxlog.c b/src/bin/pg_resetxlog/pg_resetxlog.c index 2ba9946..79b387d 100644 --- a/src/bin/pg_resetxlog/pg_resetxlog.c +++ b/src/bin/pg_resetxlog/pg_resetxlog.c @@ -517,7 +517,7 @@ GuessControlValues(void) ControlFile.checkPointCopy.redo = SizeOfXLogLongPHD; ControlFile.checkPointCopy.ThisTimeLineID = 1; ControlFile.checkPointCopy.PrevTimeLineID = 1; - ControlFile.checkPointCopy.fullPageWrites = false; + ControlFile.checkPointCopy.fullPageWrites = FULL_PAGE_WRITES_OFF; ControlFile.checkPointCopy.nextXidEpoch = 0; ControlFile.checkPointCopy.nextXid = FirstNormalTransactionId; ControlFile.checkPointCopy.nextOid = FirstBootstrapObjectId; @@ -601,7 +601,7 @@ PrintControlValues(bool guessed) printf(_("Latest checkpoint's TimeLineID: %u\n"), ControlFile.checkPointCopy.ThisTimeLineID); printf(_("Latest checkpoint's full_page_writes: %s\n"), - ControlFile.checkPointCopy.fullPageWrites ? _("on") : _("off")); + FullPageWritesStr(ControlFile.checkPointCopy.fullPageWrites)); printf(_("Latest checkpoint's NextXID: %u/%u\n"), ControlFile.checkPointCopy.nextXidEpoch, ControlFile.checkPointCopy.nextXid); diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 6f8b5f4..97be041 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -96,7 +96,6 @@ extern int XLogArchiveTimeout; extern bool XLogArchiveMode; extern char *XLogArchiveCommand; extern bool EnableHotStandby; -extern bool fullPageWrites; extern bool wal_log_hints; extern bool log_checkpoints; @@ -110,6 +109,17 @@ typedef enum WalLevel } WalLevel; extern int wal_level; +typedef enum FullPageWritesLevel +{ + FULL_PAGE_WRITES_OFF = 0, + FULL_PAGE_WRITES_COMPRESS, + FULL_PAGE_WRITES_ON +} FullPageWritesLevel; +extern int fullPageWrites; +#define FullPageWritesStr(fpw) \ + (fpw == FULL_PAGE_WRITES_ON ? _("on") : \ + (fpw == FULL_PAGE_WRITES_OFF ? _("off") : _("compress"))) + #define XLogArchivingActive() (XLogArchiveMode && wal_level >= WAL_LEVEL_ARCHIVE) #define XLogArchiveCommandSet() (XLogArchiveCommand[0] != '\0') @@ -234,6 +244,7 @@ extern void XLogPutNextOid(Oid nextOid); extern XLogRecPtr XLogRestorePoint(const char *rpName); extern void UpdateFullPageWrites(void); extern void GetFullPageWriteInfo(XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p); +extern void GetFullPageWriteGUC(int * fpw); extern XLogRecPtr GetRedoRecPtr(void); extern XLogRecPtr GetInsertRecPtr(void); extern XLogRecPtr GetFlushRecPtr(void); diff --git a/src/include/access/xloginsert.h b/src/include/access/xloginsert.h index 30c2e84..4e74557 100644 --- a/src/include/access/xloginsert.h +++ b/src/include/access/xloginsert.h @@ -62,5 +62,8 @@ extern XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum, extern XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std); extern XLogRecPtr XLogSaveBufferForHint(Buffer buffer, bool buffer_std); extern bool XLogCheckBufferNeedsBackup(Buffer buffer); +void CompressBackupBlocksPagesAlloc(void); +/* Total size of maximum number of backup blocks in an XLOG record(including backup block headers) */ +#define XLR_TOTAL_BLCKSZ XLR_MAX_BKP_BLOCKS * BLCKSZ + XLR_MAX_BKP_BLOCKS * sizeof(BkpBlock) #endif /* XLOGINSERT_H */ diff --git a/src/include/access/xlogrecord.h b/src/include/access/xlogrecord.h index ab0fb1c..7f2b4ca 100644 --- a/src/include/access/xlogrecord.h +++ b/src/include/access/xlogrecord.h @@ -91,10 +91,13 @@ typedef struct BkpBlock RelFileNode node; /* relation containing block */ ForkNumber fork; /* fork within the relation */ BlockNumber block; /* block number */ - uint16 hole_offset; /* number of bytes before "hole" */ - uint16 hole_length; /* number of bytes in "hole" */ + unsigned hole_offset:15, /* number of bytes before "hole" */ + flag_compress:2,/* flag to store compression information */ + hole_length:15; /* number of bytes in "hole" */ /* ACTUAL BLOCK DATA FOLLOWS AT END OF STRUCT */ } BkpBlock; +#define BKPBLOCKS_UNCOMPRESSED 0 /* uncompressed */ +#define BKPBLOCKS_COMPRESSED 1 /* compressed */ #endif /* XLOGRECORD_H */ diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h index ba79d25..6a536fc 100644 --- a/src/include/catalog/pg_control.h +++ b/src/include/catalog/pg_control.h @@ -35,7 +35,7 @@ typedef struct CheckPoint TimeLineID ThisTimeLineID; /* current TLI */ TimeLineID PrevTimeLineID; /* previous TLI, if this record begins a new * timeline (equals ThisTimeLineID otherwise) */ - bool fullPageWrites; /* current full_page_writes */ + int fullPageWrites; /* current full_page_writes */ uint32 nextXidEpoch; /* higher-order bits of nextXid */ TransactionId nextXid; /* next free XID */ Oid nextOid; /* next free OID */