From 931ab8fa7e9181f6b69601ad279e0ee5acb103d4 Mon Sep 17 00:00:00 2001 From: Takashi Menjo Date: Wed, 24 Jun 2020 15:07:56 +0900 Subject: [PATCH v3 1/5] Support GUCs for external WAL buffer To implement non-volatile WAL buffer, we add two new GUCs nvwal_path and nvwal_size. Now postgres maps a file at that path onto memory to use it as WAL buffer. Note that the buffer is still volatile for now. --- configure | 262 ++++++++++++++++++ configure.in | 43 +++ src/backend/access/transam/Makefile | 3 +- src/backend/access/transam/nv_xlog_buffer.c | 95 +++++++ src/backend/access/transam/xlog.c | 164 ++++++++++- src/backend/utils/misc/guc.c | 23 +- src/backend/utils/misc/postgresql.conf.sample | 2 + src/bin/initdb/initdb.c | 93 ++++++- src/include/access/nv_xlog_buffer.h | 71 +++++ src/include/access/xlog.h | 2 + src/include/pg_config.h.in | 6 + src/include/utils/guc.h | 4 + 12 files changed, 747 insertions(+), 21 deletions(-) create mode 100644 src/backend/access/transam/nv_xlog_buffer.c create mode 100644 src/include/access/nv_xlog_buffer.h diff --git a/configure b/configure index 2feff37fe3..3f16feeb54 100755 --- a/configure +++ b/configure @@ -866,6 +866,7 @@ with_libxml with_libxslt with_system_tzdata with_zlib +with_nvwal with_gnu_ld enable_largefile ' @@ -1570,6 +1571,7 @@ Optional Packages: --with-system-tzdata=DIR use system time zone data in DIR --without-zlib do not use Zlib + --with-nvwal use non-volatile WAL buffer (NVWAL) --with-gnu-ld assume the C compiler uses GNU ld [default=no] Some influential environment variables: @@ -8504,6 +8506,203 @@ fi +# +# Non-volatile WAL buffer (NVWAL) +# +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build with non-volatile WAL buffer (NVWAL)" >&5 +$as_echo_n "checking whether to build with non-volatile WAL buffer (NVWAL)... " >&6; } + + + +# Check whether --with-nvwal was given. +if test "${with_nvwal+set}" = set; then : + withval=$with_nvwal; + case $withval in + yes) + +$as_echo "#define USE_NVWAL 1" >>confdefs.h + + ;; + no) + : + ;; + *) + as_fn_error $? "no argument expected for --with-nvwal option" "$LINENO" 5 + ;; + esac + +else + with_nvwal=no + +fi + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $with_nvwal" >&5 +$as_echo "$with_nvwal" >&6; } + +# +# Elf +# + +# Assume system is ELF if it predefines __ELF__ as 1, +# otherwise believe host_os based default. +case $host_os in + freebsd1*|freebsd2*) elf=no;; + freebsd3*|freebsd4*) elf=yes;; +esac + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5 +$as_echo_n "checking for grep that handles long lines and -e... " >&6; } +if ${ac_cv_path_GREP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -z "$GREP"; then + ac_path_GREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in grep ggrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_GREP" || continue +# Check for GNU ac_path_GREP and select it if it is found. + # Check for GNU $ac_path_GREP +case `"$ac_path_GREP" --version 2>&1` in +*GNU*) + ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo 'GREP' >> "conftest.nl" + "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_GREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_GREP="$ac_path_GREP" + ac_path_GREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_GREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_GREP"; then + as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_GREP=$GREP +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5 +$as_echo "$ac_cv_path_GREP" >&6; } + GREP="$ac_cv_path_GREP" + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5 +$as_echo_n "checking for egrep... " >&6; } +if ${ac_cv_path_EGREP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 + then ac_cv_path_EGREP="$GREP -E" + else + if test -z "$EGREP"; then + ac_path_EGREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in egrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_EGREP" || continue +# Check for GNU ac_path_EGREP and select it if it is found. + # Check for GNU $ac_path_EGREP +case `"$ac_path_EGREP" --version 2>&1` in +*GNU*) + ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo 'EGREP' >> "conftest.nl" + "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_EGREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_EGREP="$ac_path_EGREP" + ac_path_EGREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_EGREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_EGREP"; then + as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_EGREP=$EGREP +fi + + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5 +$as_echo "$ac_cv_path_EGREP" >&6; } + EGREP="$ac_cv_path_EGREP" + + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#if __ELF__ + yes +#endif + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "yes" >/dev/null 2>&1; then : + ELF_SYS=true +else + if test "X$elf" = "Xyes" ; then + ELF_SYS=true +else + ELF_SYS= +fi +fi +rm -f conftest* + + + # # Assignments # @@ -12861,6 +13060,57 @@ fi fi +# for non-volatile WAL buffer (NVWAL) +if test "$with_nvwal" = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for pmem_map_file in -lpmem" >&5 +$as_echo_n "checking for pmem_map_file in -lpmem... " >&6; } +if ${ac_cv_lib_pmem_pmem_map_file+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lpmem $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char pmem_map_file (); +int +main () +{ +return pmem_map_file (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_pmem_pmem_map_file=yes +else + ac_cv_lib_pmem_pmem_map_file=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_pmem_pmem_map_file" >&5 +$as_echo "$ac_cv_lib_pmem_pmem_map_file" >&6; } +if test "x$ac_cv_lib_pmem_pmem_map_file" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBPMEM 1 +_ACEOF + + LIBS="-lpmem $LIBS" + +else + as_fn_error $? "library 'libpmem' is required for non-volatile WAL buffer (NVWAL)" "$LINENO" 5 +fi + +fi + ## ## Header files @@ -13540,6 +13790,18 @@ fi done +fi + +# for non-volatile WAL buffer (NVWAL) +if test "$with_nvwal" = yes ; then + ac_fn_c_check_header_mongrel "$LINENO" "libpmem.h" "ac_cv_header_libpmem_h" "$ac_includes_default" +if test "x$ac_cv_header_libpmem_h" = xyes; then : + +else + as_fn_error $? "header file is required for non-volatile WAL buffer (NVWAL)" "$LINENO" 5 +fi + + fi if test "$PORTNAME" = "win32" ; then diff --git a/configure.in b/configure.in index 0188c6ff07..a5f9c9fb9d 100644 --- a/configure.in +++ b/configure.in @@ -992,6 +992,38 @@ PGAC_ARG_BOOL(with, zlib, yes, [do not use Zlib]) AC_SUBST(with_zlib) +# +# Non-volatile WAL buffer (NVWAL) +# +AC_MSG_CHECKING([whether to build with non-volatile WAL buffer (NVWAL)]) +PGAC_ARG_BOOL(with, nvwal, no, [use non-volatile WAL buffer (NVWAL)], + [AC_DEFINE([USE_NVWAL], 1, [Define to 1 to use non-volatile WAL buffer (NVWAL). (--with-nvwal)])]) +AC_MSG_RESULT([$with_nvwal]) + +# +# Elf +# + +# Assume system is ELF if it predefines __ELF__ as 1, +# otherwise believe host_os based default. +case $host_os in + freebsd1*|freebsd2*) elf=no;; + freebsd3*|freebsd4*) elf=yes;; +esac + +AC_EGREP_CPP(yes, +[#if __ELF__ + yes +#endif +], +[ELF_SYS=true], +[if test "X$elf" = "Xyes" ; then + ELF_SYS=true +else + ELF_SYS= +fi]) +AC_SUBST(ELF_SYS) + # # Assignments # @@ -1293,6 +1325,12 @@ elif test "$with_uuid" = ossp ; then fi AC_SUBST(UUID_LIBS) +# for non-volatile WAL buffer (NVWAL) +if test "$with_nvwal" = yes; then + AC_CHECK_LIB(pmem, pmem_map_file, [], + [AC_MSG_ERROR([library 'libpmem' is required for non-volatile WAL buffer (NVWAL)])]) +fi + ## ## Header files @@ -1470,6 +1508,11 @@ elif test "$with_uuid" = ossp ; then [AC_MSG_ERROR([header file or is required for OSSP UUID])])]) fi +# for non-volatile WAL buffer (NVWAL) +if test "$with_nvwal" = yes ; then + AC_CHECK_HEADER(libpmem.h, [], [AC_MSG_ERROR([header file is required for non-volatile WAL buffer (NVWAL)])]) +fi + if test "$PORTNAME" = "win32" ; then AC_CHECK_HEADERS(crtdefs.h) fi diff --git a/src/backend/access/transam/Makefile b/src/backend/access/transam/Makefile index 595e02de72..b41a710e7e 100644 --- a/src/backend/access/transam/Makefile +++ b/src/backend/access/transam/Makefile @@ -32,7 +32,8 @@ OBJS = \ xlogfuncs.o \ xloginsert.o \ xlogreader.o \ - xlogutils.o + xlogutils.o \ + nv_xlog_buffer.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/access/transam/nv_xlog_buffer.c b/src/backend/access/transam/nv_xlog_buffer.c new file mode 100644 index 0000000000..cfc6a6376b --- /dev/null +++ b/src/backend/access/transam/nv_xlog_buffer.c @@ -0,0 +1,95 @@ +/*------------------------------------------------------------------------- + * + * nv_xlog_buffer.c + * PostgreSQL non-volatile WAL buffer + * + * + * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/backend/access/transam/nv_xlog_buffer.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#ifdef USE_NVWAL + +#include +#include "access/nv_xlog_buffer.h" + +#include "miscadmin.h" /* IsBootstrapProcessingMode */ +#include "common/file_perm.h" /* pg_file_create_mode */ + +/* + * Maps non-volatile WAL buffer on shared memory. + * + * Returns a mapped address if success; PANICs and never return otherwise. + */ +void * +MapNonVolatileXLogBuffer(const char *fname, Size fsize) +{ + void *addr; + size_t map_len = 0; + int is_pmem = 0; + + Assert(fname != NULL); + Assert(fsize > 0); + + if (IsBootstrapProcessingMode()) + { + /* + * Create and map a new file if we are in bootstrap mode (typically + * executed by initdb). + */ + addr = pmem_map_file(fname, fsize, PMEM_FILE_CREATE|PMEM_FILE_EXCL, + pg_file_create_mode, &map_len, &is_pmem); + } + else + { + /* + * Map an existing file. The second argument (len) should be zero, + * the third argument (flags) should have neither PMEM_FILE_CREATE nor + * PMEM_FILE_EXCL, and the fourth argument (mode) will be ignored. + */ + addr = pmem_map_file(fname, 0, 0, 0, &map_len, &is_pmem); + } + + if (addr == NULL) + elog(PANIC, "could not map non-volatile WAL buffer '%s': %m", fname); + + if (map_len != fsize) + elog(PANIC, "size of non-volatile WAL buffer '%s' is invalid; " + "expected %zu; actual %zu", + fname, fsize, map_len); + + if (!is_pmem) + elog(PANIC, "non-volatile WAL buffer '%s' is not on persistent memory", + fname); + + /* + * Assert page boundary alignment (8KiB as default). It should pass because + * PMDK considers hugepage boundary alignment (2MiB or 1GiB on x64). + */ + Assert((uint64) addr % XLOG_BLCKSZ == 0); + + elog(LOG, "non-volatile WAL buffer '%s' is mapped on [%p-%p)", + fname, addr, (char *) addr + map_len); + return addr; +} + +void +UnmapNonVolatileXLogBuffer(void *addr, Size fsize) +{ + Assert(addr != NULL); + + if (pmem_unmap(addr, fsize) < 0) + { + elog(WARNING, "could not unmap non-volatile WAL buffer: %m"); + return; + } + + elog(LOG, "non-volatile WAL buffer unmapped"); +} + +#endif diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index a1256a103b..0681ba1262 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -37,6 +37,7 @@ #include "access/xloginsert.h" #include "access/xlogreader.h" #include "access/xlogutils.h" +#include "access/nv_xlog_buffer.h" #include "catalog/catversion.h" #include "catalog/pg_control.h" #include "catalog/pg_database.h" @@ -873,6 +874,12 @@ static bool InRedo = false; /* Have we launched bgwriter during recovery? */ static bool bgwriterLaunched = false; +/* For non-volatile WAL buffer (NVWAL) */ +char *NvwalPath = NULL; /* a GUC parameter */ +int NvwalSizeMB = 1024; /* a direct GUC parameter */ +static Size NvwalSize = 0; /* an indirect GUC parameter */ +static bool NvwalAvail = false; + /* For WALInsertLockAcquire/Release functions */ static int MyLockNo = 0; static bool holdingAllLocks = false; @@ -5014,6 +5021,76 @@ check_wal_buffers(int *newval, void **extra, GucSource source) return true; } +/* + * GUC check_hook for nvwal_path. + */ +bool +check_nvwal_path(char **newval, void **extra, GucSource source) +{ +#ifndef USE_NVWAL + Assert(!NvwalAvail); + + if (**newval != '\0') + { + GUC_check_errcode(ERRCODE_INVALID_PARAMETER_VALUE); + GUC_check_errmsg("nvwal_path is invalid parameter without NVWAL"); + return false; + } +#endif + + return true; +} + +void +assign_nvwal_path(const char *newval, void *extra) +{ + /* true if not empty; false if empty */ + NvwalAvail = (bool) (*newval != '\0'); +} + +/* + * GUC check_hook for nvwal_size. + * + * It checks the boundary only and DOES NOT check if the size is multiple + * of wal_segment_size because the segment size (probably stored in the + * control file) have not been set properly here yet. + * + * See XLOGShmemSize for more validation. + */ +bool +check_nvwal_size(int *newval, void **extra, GucSource source) +{ +#ifdef USE_NVWAL + Size buf_size; + int64 npages; + + Assert(*newval > 0); + + buf_size = (Size) (*newval) * 1024 * 1024; + npages = (int64) buf_size / XLOG_BLCKSZ; + Assert(npages > 0); + + if (npages > INT_MAX) + { + /* XLOG_BLCKSZ could be so small that npages exceeds INT_MAX */ + GUC_check_errcode(ERRCODE_INVALID_PARAMETER_VALUE); + GUC_check_errmsg("invalid value for nvwal_size (%dMB): " + "the number of WAL pages too large; " + "buf_size %zu; XLOG_BLCKSZ %d", + *newval, buf_size, (int) XLOG_BLCKSZ); + return false; + } +#endif + + return true; +} + +void +assign_nvwal_size(int newval, void *extra) +{ + NvwalSize = (Size) newval * 1024 * 1024; +} + /* * Read the control file, set respective GUCs. * @@ -5042,13 +5119,49 @@ XLOGShmemSize(void) { Size size; + /* + * If we use non-volatile WAL buffer, we don't use the given wal_buffers. + * Instead, we set it the value based on the size of the file for the + * buffer. This should be done here because of xlblocks array calculation. + */ + if (NvwalAvail) + { + char buf[32]; + int64 npages; + + Assert(NvwalSizeMB > 0); + Assert(NvwalSize > 0); + Assert(wal_segment_size > 0); + Assert(wal_segment_size % XLOG_BLCKSZ == 0); + + /* + * At last, we can check if the size of non-volatile WAL buffer + * (nvwal_size) is multiple of WAL segment size. + * + * Note that NvwalSize has already been calculated in assign_nvwal_size. + */ + if (NvwalSize % wal_segment_size != 0) + { + elog(PANIC, + "invalid value for nvwal_size (%dMB): " + "it should be multiple of WAL segment size; " + "NvwalSize %zu; wal_segment_size %d", + NvwalSizeMB, NvwalSize, wal_segment_size); + } + + npages = (int64) NvwalSize / XLOG_BLCKSZ; + Assert(npages > 0 && npages <= INT_MAX); + + snprintf(buf, sizeof(buf), "%d", (int) npages); + SetConfigOption("wal_buffers", buf, PGC_POSTMASTER, PGC_S_OVERRIDE); + } /* * If the value of wal_buffers is -1, use the preferred auto-tune value. * This isn't an amazingly clean place to do this, but we must wait till * NBuffers has received its final value, and must do it before using the * value of XLOGbuffers to do anything important. */ - if (XLOGbuffers == -1) + else if (XLOGbuffers == -1) { char buf[32]; @@ -5064,10 +5177,13 @@ XLOGShmemSize(void) size = add_size(size, mul_size(sizeof(WALInsertLockPadded), NUM_XLOGINSERT_LOCKS + 1)); /* xlblocks array */ size = add_size(size, mul_size(sizeof(XLogRecPtr), XLOGbuffers)); - /* extra alignment padding for XLOG I/O buffers */ - size = add_size(size, XLOG_BLCKSZ); - /* and the buffers themselves */ - size = add_size(size, mul_size(XLOG_BLCKSZ, XLOGbuffers)); + if (!NvwalAvail) + { + /* extra alignment padding for XLOG I/O buffers */ + size = add_size(size, XLOG_BLCKSZ); + /* and the buffers themselves */ + size = add_size(size, mul_size(XLOG_BLCKSZ, XLOGbuffers)); + } /* * Note: we don't count ControlFileData, it comes out of the "slop factor" @@ -5161,13 +5277,32 @@ XLOGShmemInit(void) } /* - * Align the start of the page buffers to a full xlog block size boundary. - * This simplifies some calculations in XLOG insertion. It is also - * required for O_DIRECT. + * Open and memory-map a file for non-volatile XLOG buffer. The PMDK will + * align the start of the buffer to 2-MiB boundary if the size of the + * buffer is larger than or equal to 4 MiB. */ - allocptr = (char *) TYPEALIGN(XLOG_BLCKSZ, allocptr); - XLogCtl->pages = allocptr; - memset(XLogCtl->pages, 0, (Size) XLOG_BLCKSZ * XLOGbuffers); + if (NvwalAvail) + { + /* Logging and error-handling should be done in the function */ + XLogCtl->pages = MapNonVolatileXLogBuffer(NvwalPath, NvwalSize); + + /* + * Do not memset non-volatile XLOG buffer (XLogCtl->pages) here + * because it would contain records for recovery. We should do so in + * checkpoint after the recovery completes successfully. + */ + } + else + { + /* + * Align the start of the page buffers to a full xlog block size + * boundary. This simplifies some calculations in XLOG insertion. It + * is also required for O_DIRECT. + */ + allocptr = (char *) TYPEALIGN(XLOG_BLCKSZ, allocptr); + XLogCtl->pages = allocptr; + memset(XLogCtl->pages, 0, (Size) XLOG_BLCKSZ * XLOGbuffers); + } /* * Do basic initialization of XLogCtl shared data. (StartupXLOG will fill @@ -8522,6 +8657,13 @@ ShutdownXLOG(int code, Datum arg) CreateCheckPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_IMMEDIATE); } + + /* + * If we use non-volatile XLOG buffer, unmap it. + */ + if (NvwalAvail) + UnmapNonVolatileXLogBuffer(XLogCtl->pages, NvwalSize); + ShutdownCLOG(); ShutdownCommitTs(); ShutdownSUBTRANS(); diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 75fc6f11d6..140a99faee 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -2707,7 +2707,7 @@ static struct config_int ConfigureNamesInt[] = GUC_UNIT_XBLOCKS }, &XLOGbuffers, - -1, -1, (INT_MAX / XLOG_BLCKSZ), + -1, -1, INT_MAX, check_wal_buffers, NULL, NULL }, @@ -3381,6 +3381,17 @@ static struct config_int ConfigureNamesInt[] = NULL, assign_tcp_user_timeout, show_tcp_user_timeout }, + { + {"nvwal_size", PGC_POSTMASTER, WAL_SETTINGS, + gettext_noop("Size of non-volatile WAL buffer (NVWAL)."), + NULL, + GUC_UNIT_MB + }, + &NvwalSizeMB, + 1024, 1, INT_MAX, + check_nvwal_size, assign_nvwal_size, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL @@ -4419,6 +4430,16 @@ static struct config_string ConfigureNamesString[] = check_backtrace_functions, assign_backtrace_functions, NULL }, + { + {"nvwal_path", PGC_POSTMASTER, WAL_SETTINGS, + gettext_noop("Path to file for non-volatile WAL buffer (NVWAL)."), + NULL + }, + &NvwalPath, + "", + check_nvwal_path, assign_nvwal_path, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, NULL, NULL, NULL, NULL diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 3a25287a39..866f77828d 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -226,6 +226,8 @@ #checkpoint_timeout = 5min # range 30s-1d #max_wal_size = 1GB #min_wal_size = 80MB +#nvwal_path = '/path/to/nvwal' +#nvwal_size = 1GB #checkpoint_completion_target = 0.5 # checkpoint target duration, 0.0 - 1.0 #checkpoint_flush_after = 0 # measured in pages, 0 disables #checkpoint_warning = 30s # 0 disables diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index 786672b1b6..1b18097580 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -144,7 +144,10 @@ static bool show_setting = false; static bool data_checksums = false; static char *xlog_dir = NULL; static char *str_wal_segment_size_mb = NULL; +static char *nvwal_path = NULL; +static char *str_nvwal_size_mb = NULL; static int wal_segment_size_mb; +static int nvwal_size_mb; /* internal vars */ @@ -1109,14 +1112,78 @@ setup_config(void) conflines = replace_token(conflines, "#port = 5432", repltok); #endif - /* set default max_wal_size and min_wal_size */ - snprintf(repltok, sizeof(repltok), "min_wal_size = %s", - pretty_wal_size(DEFAULT_MIN_WAL_SEGS)); - conflines = replace_token(conflines, "#min_wal_size = 80MB", repltok); + if (nvwal_path != NULL) + { + int nr_segs; + + if (str_nvwal_size_mb == NULL) + nvwal_size_mb = 1024; + else + { + char *endptr; + + /* check that the argument is a number */ + nvwal_size_mb = strtol(str_nvwal_size_mb, &endptr, 10); + + /* verify that the size of non-volatile WAL buffer is valid */ + if (endptr == str_nvwal_size_mb || *endptr != '\0') + { + pg_log_error("argument of --nvwal-size must be a number; " + "str_nvwal_size_mb '%s'", + str_nvwal_size_mb); + exit(1); + } + if (nvwal_size_mb <= 0) + { + pg_log_error("argument of --nvwal-size must be a positive number; " + "str_nvwal_size_mb '%s'; nvwal_size_mb %d", + str_nvwal_size_mb, nvwal_size_mb); + exit(1); + } + if (nvwal_size_mb % wal_segment_size_mb != 0) + { + pg_log_error("argument of --nvwal-size must be multiple of WAL segment size; " + "str_nvwal_size_mb '%s'; nvwal_size_mb %d; wal_segment_size_mb %d", + str_nvwal_size_mb, nvwal_size_mb, wal_segment_size_mb); + exit(1); + } + } + + /* + * XXX We set {min_,max_,nv}wal_size to the same value. Note that + * postgres might bootstrap and run if the three config does not have + * the same value, but have not been tested yet. + */ + nr_segs = nvwal_size_mb / wal_segment_size_mb; - snprintf(repltok, sizeof(repltok), "max_wal_size = %s", - pretty_wal_size(DEFAULT_MAX_WAL_SEGS)); - conflines = replace_token(conflines, "#max_wal_size = 1GB", repltok); + snprintf(repltok, sizeof(repltok), "min_wal_size = %s", + pretty_wal_size(nr_segs)); + conflines = replace_token(conflines, "#min_wal_size = 80MB", repltok); + + snprintf(repltok, sizeof(repltok), "max_wal_size = %s", + pretty_wal_size(nr_segs)); + conflines = replace_token(conflines, "#max_wal_size = 1GB", repltok); + + snprintf(repltok, sizeof(repltok), "nvwal_path = '%s'", + nvwal_path); + conflines = replace_token(conflines, + "#nvwal_path = '/path/to/nvwal'", repltok); + + snprintf(repltok, sizeof(repltok), "nvwal_size = %s", + pretty_wal_size(nr_segs)); + conflines = replace_token(conflines, "#nvwal_size = 1GB", repltok); + } + else + { + /* set default max_wal_size and min_wal_size */ + snprintf(repltok, sizeof(repltok), "min_wal_size = %s", + pretty_wal_size(DEFAULT_MIN_WAL_SEGS)); + conflines = replace_token(conflines, "#min_wal_size = 80MB", repltok); + + snprintf(repltok, sizeof(repltok), "max_wal_size = %s", + pretty_wal_size(DEFAULT_MAX_WAL_SEGS)); + conflines = replace_token(conflines, "#max_wal_size = 1GB", repltok); + } snprintf(repltok, sizeof(repltok), "lc_messages = '%s'", escape_quotes(lc_messages)); @@ -2321,6 +2388,8 @@ usage(const char *progname) printf(_(" -W, --pwprompt prompt for a password for the new superuser\n")); printf(_(" -X, --waldir=WALDIR location for the write-ahead log directory\n")); printf(_(" --wal-segsize=SIZE size of WAL segments, in megabytes\n")); + printf(_(" -P, --nvwal-path=FILE path to file for non-volatile WAL buffer (NVWAL)\n")); + printf(_(" -Q, --nvwal-size=SIZE size of NVWAL, in megabytes\n")); printf(_("\nLess commonly used options:\n")); printf(_(" -d, --debug generate lots of debugging output\n")); printf(_(" -k, --data-checksums use data page checksums\n")); @@ -2989,6 +3058,8 @@ main(int argc, char *argv[]) {"sync-only", no_argument, NULL, 'S'}, {"waldir", required_argument, NULL, 'X'}, {"wal-segsize", required_argument, NULL, 12}, + {"nvwal-path", required_argument, NULL, 'P'}, + {"nvwal-size", required_argument, NULL, 'Q'}, {"data-checksums", no_argument, NULL, 'k'}, {"allow-group-access", no_argument, NULL, 'g'}, {NULL, 0, NULL, 0} @@ -3032,7 +3103,7 @@ main(int argc, char *argv[]) /* process command-line options */ - while ((c = getopt_long(argc, argv, "dD:E:kL:nNU:WA:sST:X:g", long_options, &option_index)) != -1) + while ((c = getopt_long(argc, argv, "dD:E:kL:nNU:WA:sST:X:P:Q:g", long_options, &option_index)) != -1) { switch (c) { @@ -3126,6 +3197,12 @@ main(int argc, char *argv[]) case 12: str_wal_segment_size_mb = pg_strdup(optarg); break; + case 'P': + nvwal_path = pg_strdup(optarg); + break; + case 'Q': + str_nvwal_size_mb = pg_strdup(optarg); + break; case 'g': SetDataDirectoryCreatePerm(PG_DIR_MODE_GROUP); break; diff --git a/src/include/access/nv_xlog_buffer.h b/src/include/access/nv_xlog_buffer.h new file mode 100644 index 0000000000..b58878c92b --- /dev/null +++ b/src/include/access/nv_xlog_buffer.h @@ -0,0 +1,71 @@ +/* + * nv_xlog_buffer.h + * + * PostgreSQL non-volatile WAL buffer + * + * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/access/nv_xlog_buffer.h + */ +#ifndef NV_XLOG_BUFFER_H +#define NV_XLOG_BUFFER_H + +extern void *MapNonVolatileXLogBuffer(const char *fname, Size fsize); +extern void UnmapNonVolatileXLogBuffer(void *addr, Size fsize); + +#ifdef USE_NVWAL +#include + +#define nv_memset_persist pmem_memset_persist +#define nv_memcpy_nodrain pmem_memcpy_nodrain +#define nv_flush pmem_flush +#define nv_drain pmem_drain +#define nv_persist pmem_persist + +#else +void * +MapNonVolatileXLogBuffer(const char *fname, Size fsize) +{ + return NULL; +} + +void +UnmapNonVolatileXLogBuffer(void *addr, Size fsize) +{ + return; +} + +static inline void * +nv_memset_persist(void *pmemdest, int c, size_t len) +{ + return NULL; +} + +static inline void * +nv_memcpy_nodrain(void *pmemdest, const void *src, + size_t len) +{ + return NULL; +} + +static inline void +nv_flush(void *pmemdest, size_t len) +{ + return; +} + +static inline void +nv_drain(void) +{ + return; +} + +static inline void +nv_persist(const void *addr, size_t len) +{ + return; +} + +#endif /* USE_NVWAL */ +#endif /* NV_XLOG_BUFFER_H */ diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 347a38f57c..0a05e79524 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -131,6 +131,8 @@ extern int recovery_min_apply_delay; extern char *PrimaryConnInfo; extern char *PrimarySlotName; extern bool wal_receiver_create_temp_slot; +extern char *NvwalPath; +extern int NvwalSizeMB; /* indirectly set via GUC system */ extern TransactionId recoveryTargetXid; diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index c199cd46d2..90d23b46d1 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -325,6 +325,9 @@ /* Define to 1 if you have the `pam' library (-lpam). */ #undef HAVE_LIBPAM +/* Define to 1 if you have the `pmem' library (-lpmem). */ +#undef HAVE_LIBPMEM + /* Define if you have a function readline library */ #undef HAVE_LIBREADLINE @@ -880,6 +883,9 @@ /* Define to select named POSIX semaphores. */ #undef USE_NAMED_POSIX_SEMAPHORES +/* Define to 1 to use non-volatile WAL buffer (NVWAL). (--with-nvwal) */ +#undef USE_NVWAL + /* Define to build with OpenSSL support. (--with-openssl) */ #undef USE_OPENSSL diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h index 2819282181..d941a76d43 100644 --- a/src/include/utils/guc.h +++ b/src/include/utils/guc.h @@ -438,6 +438,10 @@ extern void assign_search_path(const char *newval, void *extra); /* in access/transam/xlog.c */ extern bool check_wal_buffers(int *newval, void **extra, GucSource source); +extern bool check_nvwal_path(char **newval, void **extra, GucSource source); +extern void assign_nvwal_path(const char *newval, void *extra); +extern bool check_nvwal_size(int *newval, void **extra, GucSource source); +extern void assign_nvwal_size(int newval, void *extra); extern void assign_xlog_sync_method(int new_sync_method, void *extra); #endif /* GUC_H */ -- 2.17.1