diff --git a/configure b/configure index 19a3cd09a0..6a31eb9f9f 100755 --- a/configure +++ b/configure @@ -699,6 +699,7 @@ with_gnu_ld LD LDFLAGS_SL LDFLAGS_EX +with_zstd with_zlib with_system_tzdata with_libxslt @@ -867,6 +868,7 @@ with_libxml with_libxslt with_system_tzdata with_zlib +with_zstd with_gnu_ld enable_largefile ' @@ -1571,6 +1573,7 @@ Optional Packages: --with-system-tzdata=DIR use system time zone data in DIR --without-zlib do not use Zlib + --with-zstd build with zstd support --with-gnu-ld assume the C compiler uses GNU ld [default=no] Some influential environment variables: @@ -8601,6 +8604,41 @@ fi +# +# zstd +# +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build with zstd support" >&5 +$as_echo_n "checking whether to build with zstd support... " >&6; } + + + +# Check whether --with-zstd was given. +if test "${with_zstd+set}" = set; then : + withval=$with_zstd; + case $withval in + yes) + +$as_echo "#define USE_ZSTD 1" >>confdefs.h + + ;; + no) + : + ;; + *) + as_fn_error $? "no argument expected for --with-zstd option" "$LINENO" 5 + ;; + esac + +else + with_zstd=no + +fi + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $with_zstd" >&5 +$as_echo "$with_zstd" >&6; } + + # # Assignments # @@ -12092,6 +12130,56 @@ fi fi +if test "$with_zstd" = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ZSTD_compress in -lzstd" >&5 +$as_echo_n "checking for ZSTD_compress in -lzstd... " >&6; } +if ${ac_cv_lib_zstd_ZSTD_compress+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lzstd $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char ZSTD_compress (); +int +main () +{ +return ZSTD_compress (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_zstd_ZSTD_compress=yes +else + ac_cv_lib_zstd_ZSTD_compress=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_zstd_ZSTD_compress" >&5 +$as_echo "$ac_cv_lib_zstd_ZSTD_compress" >&6; } +if test "x$ac_cv_lib_zstd_ZSTD_compress" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBZSTD 1 +_ACEOF + + LIBS="-lzstd $LIBS" + +else + as_fn_error $? "library 'zstd' is required for zstd support" "$LINENO" 5 +fi + +fi + if test "$enable_spinlocks" = yes; then $as_echo "#define HAVE_SPINLOCKS 1" >>confdefs.h @@ -13295,6 +13383,36 @@ Use --without-zlib to disable zlib support." "$LINENO" 5 fi +fi + +if test "$with_zstd" = yes; then + for ac_header in zstd.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "zstd.h" "ac_cv_header_zstd_h" "$ac_includes_default" +if test "x$ac_cv_header_zstd_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_ZSTD_H 1 +_ACEOF + +else + for ac_header in zstd.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "zstd.h" "ac_cv_header_zstd_h" "$ac_includes_default" +if test "x$ac_cv_header_zstd_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_ZSTD_H 1 +_ACEOF + +else + as_fn_error $? "zstd.h header file is required for zstd" "$LINENO" 5 +fi + +done + +fi + +done + fi if test "$with_gssapi" = yes ; then diff --git a/configure.ac b/configure.ac index 6b9d0487a8..c8e5b81233 100644 --- a/configure.ac +++ b/configure.ac @@ -999,6 +999,16 @@ PGAC_ARG_BOOL(with, zlib, yes, [do not use Zlib]) AC_SUBST(with_zlib) +# +# zstd +# +AC_MSG_CHECKING([whether to build with zstd support]) +PGAC_ARG_BOOL(with, zstd, no, + [build with zstd support], + [AC_DEFINE([USE_ZSTD], 1, [Define to 1 to build with zstd support. (--with-zstd)])]) +AC_MSG_RESULT([$with_zstd]) +AC_SUBST(with_zstd) + # # Assignments # @@ -1186,6 +1196,10 @@ failure. It is possible the compiler isn't looking in the proper directory. Use --without-zlib to disable zlib support.])]) fi +if test "$with_zstd" = yes; then + AC_CHECK_LIB(zstd, ZSTD_compress, [], [AC_MSG_ERROR([library 'zstd' is required for zstd support])]) +fi + if test "$enable_spinlocks" = yes; then AC_DEFINE(HAVE_SPINLOCKS, 1, [Define to 1 if you have spinlocks.]) else @@ -1400,6 +1414,11 @@ failure. It is possible the compiler isn't looking in the proper directory. Use --without-zlib to disable zlib support.])]) fi +if test "$with_zstd" = yes; then + AC_CHECK_HEADERS(zstd.h, [], + [AC_CHECK_HEADERS(zstd.h, [], [AC_MSG_ERROR([zstd.h header file is required for zstd])])]) +fi + if test "$with_gssapi" = yes ; then AC_CHECK_HEADERS(gssapi/gssapi.h, [], [AC_CHECK_HEADERS(gssapi.h, [], [AC_MSG_ERROR([gssapi.h header file is required for GSSAPI])])]) diff --git a/contrib/pageinspect/Makefile b/contrib/pageinspect/Makefile index d9d8177116..279cb84297 100644 --- a/contrib/pageinspect/Makefile +++ b/contrib/pageinspect/Makefile @@ -9,10 +9,11 @@ OBJS = \ ginfuncs.o \ hashfuncs.o \ heapfuncs.o \ + pcfuncs.o \ rawpage.o EXTENSION = pageinspect -DATA = pageinspect--1.7--1.8.sql pageinspect--1.6--1.7.sql \ +DATA = pageinspect--1.8--1.9.sql pageinspect--1.7--1.8.sql pageinspect--1.6--1.7.sql \ pageinspect--1.5.sql pageinspect--1.5--1.6.sql \ pageinspect--1.4--1.5.sql pageinspect--1.3--1.4.sql \ pageinspect--1.2--1.3.sql pageinspect--1.1--1.2.sql \ diff --git a/contrib/pageinspect/expected/page.out b/contrib/pageinspect/expected/page.out index b6aea0124b..f25294da45 100644 --- a/contrib/pageinspect/expected/page.out +++ b/contrib/pageinspect/expected/page.out @@ -211,3 +211,57 @@ select tuple_data_split('test8'::regclass, t_data, t_infomask, t_infomask2, t_bi (1 row) drop table test8; +-- check functions for compressed relation +CREATE TABLE test_compressed(a int, b int) WITH(compresstype=pglz,compress_chunk_size=1024,compress_prealloc_chunks=4); +INSERT INTO test_compressed SELECT id,id FROM generate_series(1,1000)id; +SELECT nblocks, allocated_chunks, chunk_size, algorithm FROM get_compress_address_header('test_compressed',0); + nblocks | allocated_chunks | chunk_size | algorithm +---------+------------------+------------+----------- + 5 | 20 | 1024 | 1 +(1 row) + +SELECT nblocks, allocated_chunks, chunk_size, algorithm FROM get_compress_address_header('xxx',0); --fail +ERROR: relation "xxx" does not exist +SELECT nblocks, allocated_chunks, chunk_size, algorithm FROM get_compress_address_header('test_compressed',1); --fail +ERROR: could not open file "base/16384/16434.1_pca": No such file or directory +SELECT * FROM get_compress_address_items('test_compressed',0); + blkno | nchunks | allocated_chunks | chunknos +-------+---------+------------------+--------------- + 0 | 0 | 4 | {1,2,3,4} + 1 | 0 | 4 | {5,6,7,8} + 2 | 0 | 4 | {9,10,11,12} + 3 | 0 | 4 | {13,14,15,16} + 4 | 0 | 4 | {17,18,19,20} +(5 rows) + +SELECT * FROM get_compress_address_items('xxx',0); --fail +ERROR: relation "xxx" does not exist +SELECT * FROM get_compress_address_items('test_compressed',1); --fail +ERROR: could not open file "base/16384/16434.1_pca": No such file or directory +-- VACUUM does not reclaim address +DELETE FROM test_compressed WHERE ctid >= '(1,0)'::tid; +VACUUM test_compressed; +SELECT nblocks, allocated_chunks, chunk_size, algorithm FROM get_compress_address_header('test_compressed',0); + nblocks | allocated_chunks | chunk_size | algorithm +---------+------------------+------------+----------- + 1 | 20 | 1024 | 1 +(1 row) + +SELECT * FROM get_compress_address_items('test_compressed',0); + blkno | nchunks | allocated_chunks | chunknos +-------+---------+------------------+--------------- + 0 | 0 | 4 | {1,2,3,4} + 1 | 0 | 4 | {5,6,7,8} + 2 | 0 | 4 | {9,10,11,12} + 3 | 0 | 4 | {13,14,15,16} + 4 | 0 | 4 | {17,18,19,20} +(5 rows) + +SELECT octet_length(page_compress(get_raw_page('test_compressed', 'main', 0), 'pglz', 0)) > 0 AS page_compress_test; + page_compress_test +-------------------- + t +(1 row) + +SELECT octet_length(page_compress(get_raw_page('test_compressed', 'main', 0), 'xxx', 0)) > 0 AS page_compress_test; --fail +ERROR: unrecognized compression algorithm xxx diff --git a/contrib/pageinspect/pageinspect--1.8--1.9.sql b/contrib/pageinspect/pageinspect--1.8--1.9.sql new file mode 100644 index 0000000000..cc099e2a15 --- /dev/null +++ b/contrib/pageinspect/pageinspect--1.8--1.9.sql @@ -0,0 +1,40 @@ +/* contrib/pageinspect/pageinspect--1.8--1.9.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION pageinspect UPDATE TO '1.9'" to load this file. \quit + +-- +-- get_compress_address_header() +-- +CREATE FUNCTION get_compress_address_header(IN relname text, IN segno integer, + OUT nblocks integer, + OUT allocated_chunks integer, + OUT chunk_size integer, + OUT algorithm integer, + OUT last_synced_nblocks integer, + OUT last_synced_allocated_chunks integer) +RETURNS record +AS 'MODULE_PATHNAME', 'get_compress_address_header' +LANGUAGE C STRICT PARALLEL SAFE; + +-- +-- get_compress_address_items() +-- +CREATE FUNCTION get_compress_address_items(IN relname text, IN segno integer, + OUT blkno integer, + OUT nchunks integer, + OUT allocated_chunks integer, + OUT chunknos integer[]) +AS 'MODULE_PATHNAME', 'get_compress_address_items' +LANGUAGE C STRICT PARALLEL SAFE; + +-- +-- page_compress() +-- +CREATE FUNCTION page_compress( + page bytea, + algorithm text, + level integer) +RETURNS bytea +AS 'MODULE_PATHNAME', 'page_compress' +LANGUAGE C STRICT PARALLEL SAFE; diff --git a/contrib/pageinspect/pageinspect.control b/contrib/pageinspect/pageinspect.control index f8cdf526c6..bd716769a1 100644 --- a/contrib/pageinspect/pageinspect.control +++ b/contrib/pageinspect/pageinspect.control @@ -1,5 +1,5 @@ # pageinspect extension comment = 'inspect the contents of database pages at a low level' -default_version = '1.8' +default_version = '1.9' module_pathname = '$libdir/pageinspect' relocatable = true diff --git a/contrib/pageinspect/pcfuncs.c b/contrib/pageinspect/pcfuncs.c new file mode 100644 index 0000000000..36dc1b83fe --- /dev/null +++ b/contrib/pageinspect/pcfuncs.c @@ -0,0 +1,367 @@ +/*------------------------------------------------------------------------- + * + * pcfuncs.c + * Functions to investigate the content of address file of compressed relation + * + * Access-method specific inspection functions are in separate files. + * + * Copyright (c) 2007-2020, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/pageinspect/pcfuncs.c + * + *------------------------------------------------------------------------- + */ + +#include + +#include "postgres.h" + +#include "access/htup_details.h" +#include "access/relation.h" +#include "catalog/namespace.h" +#include "catalog/pg_type.h" +#include "funcapi.h" +#include "miscadmin.h" +#include "pageinspect.h" +#include "utils/builtins.h" +#include "utils/rel.h" +#include "utils/varlena.h" + +#include "storage/page_compression.h" + + +static PageCompressHeader *get_compress_address_contents_internal(text *relname, int segno); + + +/* + * get_compress_address_header + * + */ +PG_FUNCTION_INFO_V1(get_compress_address_header); + +Datum +get_compress_address_header(PG_FUNCTION_ARGS) +{ + text *relname = PG_GETARG_TEXT_PP(0); + uint32 segno = PG_GETARG_UINT32(1); + Datum result; + HeapTuple tuple; + TupleDesc tupleDesc; + Datum values[5]; + bool nulls[5]; + PageCompressHeader *pcMap; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to use raw page functions"))); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + pcMap = get_compress_address_contents_internal(relname, segno); + if(pcMap == NULL) + PG_RETURN_NULL(); + + values[0] = UInt32GetDatum(pg_atomic_read_u32(&pcMap->nblocks)); + values[1] = UInt32GetDatum(pg_atomic_read_u32(&pcMap->allocated_chunks)); + values[2] = UInt32GetDatum(pcMap->chunk_size); + values[3] = UInt32GetDatum(pcMap->algorithm); + values[4] = UInt32GetDatum(pg_atomic_read_u32(&pcMap->last_synced_nblocks)); + values[5] = UInt32GetDatum(pg_atomic_read_u32(&pcMap->last_synced_allocated_chunks)); + + pfree(pcMap); + + memset(nulls, 0, sizeof(nulls)); + + /* Build and return the result tuple */ + tuple = heap_form_tuple(tupleDesc, values, nulls); + result = HeapTupleGetDatum(tuple); + + PG_RETURN_DATUM(result); +} + +/* + * get_compress_address_items + * + * Allows inspection of compress address contents of a compressed relation. + */ +PG_FUNCTION_INFO_V1(get_compress_address_items); + +typedef struct compress_address_items_state +{ + TupleDesc tupd; + uint32 blkno; + PageCompressHeader *pcMap; +} compress_address_items_state; + +Datum +get_compress_address_items(PG_FUNCTION_ARGS) +{ + text *relname = PG_GETARG_TEXT_PP(0); + uint32 segno = PG_GETARG_UINT32(1); + compress_address_items_state *inter_call_data = NULL; + FuncCallContext *fctx; + PageCompressHeader *pcMap; + PageCompressAddr *pcAddr; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to use raw page functions"))); + + if (SRF_IS_FIRSTCALL()) + { + TupleDesc tupdesc; + MemoryContext mctx; + uint32 blkno; + + fctx = SRF_FIRSTCALL_INIT(); + mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx); + + inter_call_data = palloc(sizeof(compress_address_items_state)); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + inter_call_data->tupd = tupdesc; + inter_call_data->blkno = 0; + + pcMap = get_compress_address_contents_internal(relname, segno); + + inter_call_data->pcMap = pcMap; + if(pcMap) + { + /* find the largest page with a non-empty address */ + fctx->max_calls = pg_atomic_read_u32(&pcMap->nblocks); + for(blkno = fctx->max_calls; blkno < RELSEG_SIZE; blkno++) + { + pcAddr = GetPageCompressAddr(pcMap, + pcMap->chunk_size, + blkno); + if(pcAddr->allocated_chunks != 0) + fctx->max_calls = blkno + 1; + } + } + else + fctx->max_calls = 0; + + fctx->user_fctx = inter_call_data; + + MemoryContextSwitchTo(mctx); + } + + fctx = SRF_PERCALL_SETUP(); + inter_call_data = fctx->user_fctx; + + if (fctx->call_cntr < fctx->max_calls) + { + Datum result; + HeapTuple tuple; + int i, len, chunk_count; + char *values[5]; + char buf[256]; + char *p; + + pcMap = inter_call_data->pcMap; + pcAddr = GetPageCompressAddr(pcMap, + pcMap->chunk_size, + inter_call_data->blkno); + + /* Extract information from the compress address */ + + p = buf; + snprintf(p,sizeof(buf) - (p - buf), "{"); + p++; + + /* skip invalid chunkno at tail */ + chunk_count = BLCKSZ / pcMap->chunk_size; + while(pcAddr->chunknos[chunk_count - 1] == 0) + chunk_count --; + + for(i=0; i< chunk_count; i++) + { + if(i==0) + len = snprintf(p, sizeof(buf) - (p - buf), "%d", pcAddr->chunknos[i]); + else + len = snprintf(p, sizeof(buf) - (p - buf), ",%d", pcAddr->chunknos[i]); + p += len; + } + snprintf(p, sizeof(buf) - (p - buf), "}"); + + values[0] = psprintf("%d", inter_call_data->blkno); + values[1] = psprintf("%d", pcAddr->nchunks); + values[2] = psprintf("%d", pcAddr->allocated_chunks); + values[3] = psprintf("%s", buf); + + /* Build and return the result tuple. */ + tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(inter_call_data->tupd), + values); + + result = HeapTupleGetDatum(tuple); + + inter_call_data->blkno++; + + SRF_RETURN_NEXT(fctx, result); + } + else + SRF_RETURN_DONE(fctx); +} + +/* + * get_compress_address_contents_internal + * + * returns raw compress address file contents + */ +static PageCompressHeader * +get_compress_address_contents_internal(text *relname, int segno) +{ + RangeVar *relrv; + Relation rel; + PageCompressHeader *pcMap; + PageCompressHeader *result = NULL; + int i; + + relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); + rel = relation_openrv(relrv, AccessShareLock); + + /* Check that this relation support compression */ + if (rel->rd_rel->relkind != RELKIND_RELATION && rel->rd_rel->relkind != RELKIND_INDEX) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot get compress address contents from relation \"%s\", only support table and index", + RelationGetRelationName(rel)))); + + /* + * Reject attempts to read non-local temporary relations; we would be + * likely to get wrong data since we have no visibility into the owning + * session's local buffers. + */ + if (RELATION_IS_OTHER_TEMP(rel)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot access temporary tables of other sessions"))); + + if(rel->rd_node.compress_chunk_size != 0 && rel->rd_node.compress_algorithm != 0) + { + /* Get file path */ + char *path, *pca_path; + int file, chunk_size; + + path = relpathbackend(rel->rd_node, rel->rd_backend, MAIN_FORKNUM); + if (segno > 0) + pca_path = psprintf("%s.%u_pca", path, segno); + else + pca_path = psprintf("%s_pca", path); + pfree(path); + + file = open(pca_path, PG_BINARY | O_RDONLY, 0); + + if (file < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open file \"%s\": %m", pca_path))); + + chunk_size = rel->rd_node.compress_chunk_size; + pcMap = pc_mmap(file, chunk_size, true); + if(pcMap == MAP_FAILED) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_RESOURCES), + errmsg("Failed to mmap page compression address file %s: %m", + pca_path))); + + result = (PageCompressHeader *)palloc(SizeofPageCompressAddrFile(chunk_size)); + + /* Make a member-by-member copy to ensure that each member is read atomically */ + for(i=0; i < SizeofPageCompressAddrFile(chunk_size)/sizeof(int); i++) + ((int *)result)[i] = ((int *)pcMap)[i]; + + pc_munmap(pcMap); + close(file); + pfree(pca_path); + } + + relation_close(rel, AccessShareLock); + + return result; +} + + +/* + * page_compress + * + * compress one page + */ + +PG_FUNCTION_INFO_V1(page_compress); + +Datum +page_compress(PG_FUNCTION_ARGS) +{ + bytea *raw_page = PG_GETARG_BYTEA_P(0); + char *algorithm_name = text_to_cstring(PG_GETARG_TEXT_PP(1)); + int32 level = PG_GETARG_INT32(2); + char *work_buffer; + int raw_page_size; + int work_buffer_size; + int8 algorithm; + bytea *result; + int nbytes; + PageHeader page; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to use raw page functions"))); + + raw_page_size = VARSIZE(raw_page) - VARHDRSZ; + + /* + * Check that enough data was supplied, so that we don't try to access + * fields outside the supplied buffer. + */ + if (raw_page_size < BLCKSZ) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("input page too small (%d bytes)", raw_page_size))); + + page = (PageHeader) VARDATA(raw_page); + + /* compress page */ + if (strcmp(algorithm_name, "pglz") == 0) + algorithm = COMPRESS_ALGORITHM_PGLZ; + else if (strcmp(algorithm_name, "zstd") == 0) + algorithm = COMPRESS_ALGORITHM_ZSTD; + else + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("unrecognized compression algorithm %s", + algorithm_name))); + + work_buffer_size = compress_page_buffer_bound(algorithm); + if(work_buffer_size < 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("unrecognized compression algorithm %d", + algorithm))); + work_buffer = palloc(work_buffer_size); + + nbytes = compress_page((char *)page, work_buffer, work_buffer_size, algorithm, level); + + if(nbytes < 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("unrecognized compression algorithm %d", + algorithm))); + + result = (bytea *) palloc(nbytes + VARHDRSZ); + SET_VARSIZE(result, nbytes + VARHDRSZ); + memcpy(VARDATA(result), work_buffer, nbytes); + + pfree(work_buffer); + + PG_RETURN_BYTEA_P(result); +} diff --git a/contrib/pageinspect/sql/page.sql b/contrib/pageinspect/sql/page.sql index bd049aeb24..cb10e4b349 100644 --- a/contrib/pageinspect/sql/page.sql +++ b/contrib/pageinspect/sql/page.sql @@ -86,3 +86,24 @@ select t_bits, t_data from heap_page_items(get_raw_page('test8', 0)); select tuple_data_split('test8'::regclass, t_data, t_infomask, t_infomask2, t_bits) from heap_page_items(get_raw_page('test8', 0)); drop table test8; + +-- check functions for compressed relation +CREATE TABLE test_compressed(a int, b int) WITH(compresstype=pglz,compress_chunk_size=1024,compress_prealloc_chunks=4); +INSERT INTO test_compressed SELECT id,id FROM generate_series(1,1000)id; + +SELECT nblocks, allocated_chunks, chunk_size, algorithm FROM get_compress_address_header('test_compressed',0); +SELECT nblocks, allocated_chunks, chunk_size, algorithm FROM get_compress_address_header('xxx',0); --fail +SELECT nblocks, allocated_chunks, chunk_size, algorithm FROM get_compress_address_header('test_compressed',1); --fail + +SELECT * FROM get_compress_address_items('test_compressed',0); +SELECT * FROM get_compress_address_items('xxx',0); --fail +SELECT * FROM get_compress_address_items('test_compressed',1); --fail + +-- VACUUM does not reclaim address +DELETE FROM test_compressed WHERE ctid >= '(1,0)'::tid; +VACUUM test_compressed; +SELECT nblocks, allocated_chunks, chunk_size, algorithm FROM get_compress_address_header('test_compressed',0); +SELECT * FROM get_compress_address_items('test_compressed',0); + +SELECT octet_length(page_compress(get_raw_page('test_compressed', 'main', 0), 'pglz', 0)) > 0 AS page_compress_test; +SELECT octet_length(page_compress(get_raw_page('test_compressed', 'main', 0), 'xxx', 0)) > 0 AS page_compress_test; --fail diff --git a/src/Makefile.global.in b/src/Makefile.global.in index 7ca1e9aac5..6734c32c8e 100644 --- a/src/Makefile.global.in +++ b/src/Makefile.global.in @@ -196,6 +196,7 @@ with_llvm = @with_llvm@ with_system_tzdata = @with_system_tzdata@ with_uuid = @with_uuid@ with_zlib = @with_zlib@ +with_zstd = @with_zstd@ enable_rpath = @enable_rpath@ enable_nls = @enable_nls@ enable_debug = @enable_debug@ diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c index 8ccc228a8c..242b343bdf 100644 --- a/src/backend/access/common/reloptions.c +++ b/src/backend/access/common/reloptions.c @@ -382,7 +382,60 @@ static relopt_int intRelOpts[] = }, -1, 0, 1024 }, - + { + { + "compresslevel", + "Level of page compression.", + RELOPT_KIND_HEAP | RELOPT_KIND_BTREE | RELOPT_KIND_HASH | RELOPT_KIND_GIN | RELOPT_KIND_GIST | RELOPT_KIND_SPGIST, + ShareUpdateExclusiveLock + }, + 0, -127, 127 + }, + { + { + "compress_chunk_size", + "Size of chunk to store compressed page.", + RELOPT_KIND_HEAP | RELOPT_KIND_BTREE | RELOPT_KIND_HASH | RELOPT_KIND_GIN | RELOPT_KIND_GIST | RELOPT_KIND_SPGIST, + AccessExclusiveLock + }, + BLCKSZ / 2, BLCKSZ / 8, BLCKSZ / 2 + }, + { + { + "compress_prealloc_chunks", + "Number of prealloced chunks for each block.", + RELOPT_KIND_HEAP | RELOPT_KIND_BTREE | RELOPT_KIND_HASH | RELOPT_KIND_GIN | RELOPT_KIND_GIST | RELOPT_KIND_SPGIST, + ShareUpdateExclusiveLock + }, + 0, 0, 7 + }, + { + { + "default_compresslevel", + "Default level of page compression.", + RELOPT_KIND_TABLESPACE, + ShareUpdateExclusiveLock + }, + 0, -127, 127 + }, + { + { + "default_compress_chunk_size", + "Default size of chunk to store compressed page.", + RELOPT_KIND_TABLESPACE, + ShareUpdateExclusiveLock + }, + BLCKSZ / 2, BLCKSZ / 8, BLCKSZ / 2 + }, + { + { + "default_compress_prealloc_chunks", + "Default number of prealloced chunks for each block.", + RELOPT_KIND_TABLESPACE, + ShareUpdateExclusiveLock + }, + 0, 0, 7 + }, /* list terminator */ {{NULL}} }; @@ -492,6 +545,16 @@ relopt_enum_elt_def viewCheckOptValues[] = {(const char *) NULL} /* list terminator */ }; +/* values from compressTypeOption */ +relopt_enum_elt_def compressTypeOptValues[] = +{ + /* no value for NOT_SET */ + {"none", COMPRESS_TYPE_NONE}, + {"pglz", COMPRESS_TYPE_PGLZ}, + {"zstd", COMPRESS_TYPE_ZSTD}, + {(const char *) NULL} /* list terminator */ +}; + static relopt_enum enumRelOpts[] = { { @@ -516,6 +579,28 @@ static relopt_enum enumRelOpts[] = VIEW_OPTION_CHECK_OPTION_NOT_SET, gettext_noop("Valid values are \"local\" and \"cascaded\".") }, + { + { + "compresstype", + "Compression type (none, pglz or zstd).", + RELOPT_KIND_HEAP | RELOPT_KIND_BTREE | RELOPT_KIND_HASH | RELOPT_KIND_GIN | RELOPT_KIND_GIST | RELOPT_KIND_SPGIST, + AccessExclusiveLock + }, + compressTypeOptValues, + COMPRESS_TYPE_NONE, + gettext_noop("Valid values are \"none\", \"pglz\" and \"zstd\".") + }, + { + { + "default_compresstype", + "Default compression type (none, pglz or zstd).", + RELOPT_KIND_TABLESPACE, + ShareUpdateExclusiveLock + }, + compressTypeOptValues, + COMPRESS_TYPE_NONE, + gettext_noop("Valid values are \"none\", \"pglz\" and \"zstd\".") + }, /* list terminator */ {{NULL}} }; @@ -1859,7 +1944,15 @@ default_reloptions(Datum reloptions, bool validate, relopt_kind kind) {"vacuum_index_cleanup", RELOPT_TYPE_BOOL, offsetof(StdRdOptions, vacuum_index_cleanup)}, {"vacuum_truncate", RELOPT_TYPE_BOOL, - offsetof(StdRdOptions, vacuum_truncate)} + offsetof(StdRdOptions, vacuum_truncate)}, + {"compresstype", RELOPT_TYPE_ENUM, + offsetof(StdRdOptions, compress) + offsetof(PageCompressOpts, compresstype)}, + {"compresslevel", RELOPT_TYPE_INT, + offsetof(StdRdOptions, compress) + offsetof(PageCompressOpts, compresslevel)}, + {"compress_chunk_size", RELOPT_TYPE_INT, + offsetof(StdRdOptions, compress) + offsetof(PageCompressOpts, compress_chunk_size)}, + {"compress_prealloc_chunks", RELOPT_TYPE_INT, + offsetof(StdRdOptions, compress) + offsetof(PageCompressOpts, compress_prealloc_chunks)} }; return (bytea *) build_reloptions(reloptions, validate, kind, @@ -2066,7 +2159,11 @@ tablespace_reloptions(Datum reloptions, bool validate) {"random_page_cost", RELOPT_TYPE_REAL, offsetof(TableSpaceOpts, random_page_cost)}, {"seq_page_cost", RELOPT_TYPE_REAL, offsetof(TableSpaceOpts, seq_page_cost)}, {"effective_io_concurrency", RELOPT_TYPE_INT, offsetof(TableSpaceOpts, effective_io_concurrency)}, - {"maintenance_io_concurrency", RELOPT_TYPE_INT, offsetof(TableSpaceOpts, maintenance_io_concurrency)} + {"maintenance_io_concurrency", RELOPT_TYPE_INT, offsetof(TableSpaceOpts, maintenance_io_concurrency)}, + {"default_compresstype", RELOPT_TYPE_ENUM, offsetof(TableSpaceOpts, compress) + offsetof(PageCompressOpts, compresstype)}, + {"default_compresslevel", RELOPT_TYPE_INT, offsetof(TableSpaceOpts, compress) + offsetof(PageCompressOpts, compresslevel)}, + {"default_compress_chunk_size", RELOPT_TYPE_INT, offsetof(TableSpaceOpts, compress) + offsetof(PageCompressOpts, compress_chunk_size)}, + {"default_compress_prealloc_chunks", RELOPT_TYPE_INT, offsetof(TableSpaceOpts, compress) + offsetof(PageCompressOpts, compress_prealloc_chunks)} }; return (bytea *) build_reloptions(reloptions, validate, diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c index ef9b56fd36..52da88d77f 100644 --- a/src/backend/access/gin/ginutil.c +++ b/src/backend/access/gin/ginutil.c @@ -611,7 +611,15 @@ ginoptions(Datum reloptions, bool validate) static const relopt_parse_elt tab[] = { {"fastupdate", RELOPT_TYPE_BOOL, offsetof(GinOptions, useFastUpdate)}, {"gin_pending_list_limit", RELOPT_TYPE_INT, offsetof(GinOptions, - pendingListCleanupSize)} + pendingListCleanupSize)}, + {"compresstype", RELOPT_TYPE_ENUM, + offsetof(GinOptions, compress) + offsetof(PageCompressOpts, compresstype)}, + {"compresslevel", RELOPT_TYPE_INT, + offsetof(GinOptions, compress) + offsetof(PageCompressOpts, compresslevel)}, + {"compress_chunk_size", RELOPT_TYPE_INT, + offsetof(GinOptions, compress) + offsetof(PageCompressOpts, compress_chunk_size)}, + {"compress_prealloc_chunks", RELOPT_TYPE_INT, + offsetof(GinOptions, compress) + offsetof(PageCompressOpts, compress_prealloc_chunks)} }; return (bytea *) build_reloptions(reloptions, validate, diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c index 615b5ade23..a47d9acd37 100644 --- a/src/backend/access/gist/gistutil.c +++ b/src/backend/access/gist/gistutil.c @@ -924,7 +924,15 @@ gistoptions(Datum reloptions, bool validate) { static const relopt_parse_elt tab[] = { {"fillfactor", RELOPT_TYPE_INT, offsetof(GiSTOptions, fillfactor)}, - {"buffering", RELOPT_TYPE_ENUM, offsetof(GiSTOptions, buffering_mode)} + {"buffering", RELOPT_TYPE_ENUM, offsetof(GiSTOptions, buffering_mode)}, + {"compresstype", RELOPT_TYPE_ENUM, + offsetof(GiSTOptions, compress) + offsetof(PageCompressOpts, compresstype)}, + {"compresslevel", RELOPT_TYPE_INT, + offsetof(GiSTOptions, compress) + offsetof(PageCompressOpts, compresslevel)}, + {"compress_chunk_size", RELOPT_TYPE_INT, + offsetof(GiSTOptions, compress) + offsetof(PageCompressOpts, compress_chunk_size)}, + {"compress_prealloc_chunks", RELOPT_TYPE_INT, + offsetof(GiSTOptions, compress) + offsetof(PageCompressOpts, compress_prealloc_chunks)} }; return (bytea *) build_reloptions(reloptions, validate, diff --git a/src/backend/access/hash/hashutil.c b/src/backend/access/hash/hashutil.c index eb510be332..c5e8d2d251 100644 --- a/src/backend/access/hash/hashutil.c +++ b/src/backend/access/hash/hashutil.c @@ -277,6 +277,14 @@ hashoptions(Datum reloptions, bool validate) { static const relopt_parse_elt tab[] = { {"fillfactor", RELOPT_TYPE_INT, offsetof(HashOptions, fillfactor)}, + {"compresstype", RELOPT_TYPE_ENUM, + offsetof(HashOptions, compress) + offsetof(PageCompressOpts, compresstype)}, + {"compresslevel", RELOPT_TYPE_INT, + offsetof(HashOptions, compress) + offsetof(PageCompressOpts, compresslevel)}, + {"compress_chunk_size", RELOPT_TYPE_INT, + offsetof(HashOptions, compress) + offsetof(PageCompressOpts, compress_chunk_size)}, + {"compress_prealloc_chunks", RELOPT_TYPE_INT, + offsetof(HashOptions, compress) + offsetof(PageCompressOpts, compress_prealloc_chunks)} }; return (bytea *) build_reloptions(reloptions, validate, diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c index 81589b9056..ba9ac09c70 100644 --- a/src/backend/access/nbtree/nbtutils.c +++ b/src/backend/access/nbtree/nbtutils.c @@ -2107,8 +2107,15 @@ btoptions(Datum reloptions, bool validate) {"vacuum_cleanup_index_scale_factor", RELOPT_TYPE_REAL, offsetof(BTOptions, vacuum_cleanup_index_scale_factor)}, {"deduplicate_items", RELOPT_TYPE_BOOL, - offsetof(BTOptions, deduplicate_items)} - + offsetof(BTOptions, deduplicate_items)}, + {"compresstype", RELOPT_TYPE_ENUM, + offsetof(BTOptions, compress) + offsetof(PageCompressOpts, compresstype)}, + {"compresslevel", RELOPT_TYPE_INT, + offsetof(BTOptions, compress) + offsetof(PageCompressOpts, compresslevel)}, + {"compress_chunk_size", RELOPT_TYPE_INT, + offsetof(BTOptions, compress) + offsetof(PageCompressOpts, compress_chunk_size)}, + {"compress_prealloc_chunks", RELOPT_TYPE_INT, + offsetof(BTOptions, compress) + offsetof(PageCompressOpts, compress_prealloc_chunks)} }; return (bytea *) build_reloptions(reloptions, validate, diff --git a/src/backend/access/spgist/spgutils.c b/src/backend/access/spgist/spgutils.c index 64d3ba8288..ba01ad7d36 100644 --- a/src/backend/access/spgist/spgutils.c +++ b/src/backend/access/spgist/spgutils.c @@ -593,6 +593,14 @@ spgoptions(Datum reloptions, bool validate) { static const relopt_parse_elt tab[] = { {"fillfactor", RELOPT_TYPE_INT, offsetof(SpGistOptions, fillfactor)}, + {"compresstype", RELOPT_TYPE_ENUM, + offsetof(SpGistOptions, compress) + offsetof(PageCompressOpts, compresstype)}, + {"compresslevel", RELOPT_TYPE_INT, + offsetof(SpGistOptions, compress) + offsetof(PageCompressOpts, compresslevel)}, + {"compress_chunk_size", RELOPT_TYPE_INT, + offsetof(SpGistOptions, compress) + offsetof(PageCompressOpts, compress_chunk_size)}, + {"compress_prealloc_chunks", RELOPT_TYPE_INT, + offsetof(SpGistOptions, compress) + offsetof(PageCompressOpts, compress_prealloc_chunks)} }; return (bytea *) build_reloptions(reloptions, validate, diff --git a/src/backend/bootstrap/bootparse.y b/src/backend/bootstrap/bootparse.y index 6bb0c6ed1e..f76f2db139 100644 --- a/src/backend/bootstrap/bootparse.y +++ b/src/backend/bootstrap/bootparse.y @@ -210,6 +210,7 @@ Boot_CreateStmt: RELPERSISTENCE_PERMANENT, shared_relation, mapped_relation, + (Datum) 0, true, &relfrozenxid, &relminmxid); diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 67144aa3c9..17b3f7b8b5 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -302,6 +302,7 @@ heap_create(const char *relname, char relpersistence, bool shared_relation, bool mapped_relation, + Datum reloptions, bool allow_system_table_mods, TransactionId *relfrozenxid, MultiXactId *relminmxid) @@ -402,7 +403,9 @@ heap_create(const char *relname, shared_relation, mapped_relation, relpersistence, - relkind); + relkind, + reloptions); + /* * Have the storage manager create the relation's disk file, if needed. @@ -1289,6 +1292,7 @@ heap_create_with_catalog(const char *relname, relpersistence, shared_relation, mapped_relation, + reloptions, allow_system_table_mods, &relfrozenxid, &relminmxid); diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 0974f3e23a..0d79135f71 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -923,6 +923,7 @@ index_create(Relation heapRelation, relpersistence, shared_relation, mapped_relation, + reloptions, allow_system_table_mods, &relfrozenxid, &relminmxid); diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 75552c64ed..d39a5ef1f2 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -63,7 +63,9 @@ #include "utils/partcache.h" #include "utils/pg_rusage.h" #include "utils/regproc.h" +#include "utils/rel.h" #include "utils/snapmgr.h" +#include "utils/spccache.h" #include "utils/syscache.h" @@ -534,6 +536,7 @@ DefineIndex(Oid relationId, Snapshot snapshot; int save_nestlevel = -1; int i; + Datum defaultReloptions = (Datum) 0; /* * Some callers need us to run with an empty default_tablespace; this is a @@ -843,10 +846,29 @@ DefineIndex(Oid relationId, if (stmt->whereClause) CheckPredicate((Expr *) stmt->whereClause); + /* + * Get default compression options from tablespace. + */ + if(rel->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT) + { + if(accessMethodId == BTREE_AM_OID || + accessMethodId == HASH_AM_OID || + accessMethodId == GIN_AM_OID || + accessMethodId == GIST_AM_OID || + accessMethodId == SPGIST_AM_OID) + { + PageCompressOpts * pcOpt = + get_tablespace_compression_option(tablespaceId ? tablespaceId : MyDatabaseTableSpace); + + if(pcOpt) + defaultReloptions = buildCompressReloptions(pcOpt); + } + } + /* * Parse AM-specific options, convert to text array form, validate. */ - reloptions = transformRelOptions((Datum) 0, stmt->options, + reloptions = transformRelOptions(defaultReloptions, stmt->options, NULL, NULL, false, false); (void) index_reloptions(amoptions, reloptions, true); diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index e0ac4e05e5..c2a9a85b1a 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -14,13 +14,20 @@ */ #include "postgres.h" + + #include "access/attmap.h" #include "access/genam.h" +#include "access/gin_private.h" +#include "access/gist_private.h" +#include "access/hash.h" #include "access/heapam.h" #include "access/heapam_xlog.h" #include "access/multixact.h" +#include "access/nbtree.h" #include "access/reloptions.h" #include "access/relscan.h" +#include "access/spgist_private.h" #include "access/sysattr.h" #include "access/tableam.h" #include "access/xact.h" @@ -83,6 +90,7 @@ #include "storage/bufmgr.h" #include "storage/lmgr.h" #include "storage/lock.h" +#include "storage/page_compression.h" #include "storage/predicate.h" #include "storage/smgr.h" #include "tcop/utility.h" @@ -96,6 +104,7 @@ #include "utils/relcache.h" #include "utils/ruleutils.h" #include "utils/snapmgr.h" +#include "utils/spccache.h" #include "utils/syscache.h" #include "utils/timestamp.h" #include "utils/typcache.h" @@ -604,6 +613,7 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, LOCKMODE parentLockmode; const char *accessMethod = NULL; Oid accessMethodId = InvalidOid; + Datum defaultReloptions = (Datum) 0; /* * Truncate relname to appropriate length (probably a waste of time, as @@ -743,10 +753,25 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, if (!OidIsValid(ownerId)) ownerId = GetUserId(); + /* + * Get default compression options from tablespace. + */ + if(relkind == RELKIND_RELATION && stmt->relation->relpersistence == RELPERSISTENCE_PERMANENT) + { + if(stmt->accessMethod == NULL || strcmp(stmt->accessMethod, "heap") == 0) + { + PageCompressOpts * pcOpt = + get_tablespace_compression_option(tablespaceId ? tablespaceId : MyDatabaseTableSpace); + + if(pcOpt) + defaultReloptions = buildCompressReloptions(pcOpt); + } + } + /* * Parse and validate reloptions, if any. */ - reloptions = transformRelOptions((Datum) 0, stmt->options, NULL, validnsps, + reloptions = transformRelOptions(defaultReloptions, stmt->options, NULL, validnsps, true, false); switch (relkind) @@ -12927,6 +12952,8 @@ ATExecSetRelOptions(Relation rel, List *defList, AlterTableType operation, Datum repl_val[Natts_pg_class]; bool repl_null[Natts_pg_class]; bool repl_repl[Natts_pg_class]; + bytea *byteaOpts; + PageCompressOpts *newPcOpts = NULL; static char *validnsps[] = HEAP_RELOPT_NAMESPACES; if (defList == NIL && operation != AT_ReplaceRelOptions) @@ -12967,7 +12994,9 @@ ATExecSetRelOptions(Relation rel, List *defList, AlterTableType operation, case RELKIND_RELATION: case RELKIND_TOASTVALUE: case RELKIND_MATVIEW: - (void) heap_reloptions(rel->rd_rel->relkind, newOptions, true); + byteaOpts = heap_reloptions(rel->rd_rel->relkind, newOptions, true); + if(byteaOpts) + newPcOpts = &((StdRdOptions *)byteaOpts)->compress; break; case RELKIND_PARTITIONED_TABLE: (void) partitioned_table_reloptions(newOptions, true); @@ -12977,7 +13006,35 @@ ATExecSetRelOptions(Relation rel, List *defList, AlterTableType operation, break; case RELKIND_INDEX: case RELKIND_PARTITIONED_INDEX: - (void) index_reloptions(rel->rd_indam->amoptions, newOptions, true); + byteaOpts = index_reloptions(rel->rd_indam->amoptions, newOptions, true); + if(byteaOpts) + { + switch(rel->rd_rel->relam) + { + case BTREE_AM_OID: + newPcOpts = &((BTOptions *)byteaOpts)->compress; + break; + + case HASH_AM_OID: + newPcOpts = &((HashOptions *)byteaOpts)->compress; + break; + + case GIN_AM_OID: + newPcOpts = &((GinOptions *)byteaOpts)->compress; + break; + + case GIST_AM_OID: + newPcOpts = &((GiSTOptions *)byteaOpts)->compress; + break; + + case SPGIST_AM_OID: + newPcOpts = &((SpGistOptions *)byteaOpts)->compress; + break; + + default: + break; + } + } break; default: ereport(ERROR, @@ -13020,6 +13077,26 @@ ATExecSetRelOptions(Relation rel, List *defList, AlterTableType operation, } } + /* check if changed page compression store format */ + if(newPcOpts != NULL) + { + if(newPcOpts->compresstype != rel->rd_node.compress_algorithm) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("change compresstype OPTION is not supported"))); + + if(rel->rd_node.compress_algorithm != COMPRESS_TYPE_NONE && + newPcOpts->compress_chunk_size != rel->rd_node.compress_chunk_size) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("change compress_chunk_size OPTION is not supported"))); + }else{ + if(rel->rd_node.compress_algorithm != COMPRESS_TYPE_NONE) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("change compresstype OPTION is not supported"))); + } + /* * All we need do here is update the pg_class row; the new options will be * propagated into relcaches during post-commit cache inval. diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index 5294c78549..ab1ee6c6a8 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -4163,6 +4163,10 @@ pgstat_get_wait_io(WaitEventIO w) break; case WAIT_EVENT_WAL_WRITE: event_name = "WALWrite"; + case WAIT_EVENT_COMPRESS_ADDRESS_FILE_FLUSH: + event_name = "CompressAddressFileFlush"; + case WAIT_EVENT_COMPRESS_ADDRESS_FILE_SYNC: + event_name = "CompressAddressFileSync"; break; case WAIT_EVENT_LOGICAL_CHANGES_READ: event_name = "LogicalChangesRead"; diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index b89df01fa7..a60e60c254 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -1525,6 +1525,7 @@ is_checksummed_file(const char *fullpath, const char *filename) strncmp(fullpath, "/", 1) == 0) { int excludeIdx; + size_t filenameLen; /* Compare file against noChecksumFiles skip list */ for (excludeIdx = 0; noChecksumFiles[excludeIdx].name != NULL; excludeIdx++) @@ -1538,6 +1539,17 @@ is_checksummed_file(const char *fullpath, const char *filename) return false; } + /* Skip compressed page files. Compressed pages may be stored in + * multiple non-continuous chunks, and cannot perform checksum + * while transferring blocks like ordinary data files. */ + filenameLen = strlen(filename); + if(filenameLen >= 4) + { + if(strncmp(filename + filenameLen - 4, "_pca", 4) == 0 || + strncmp(filename + filenameLen - 4, "_pcd", 4) == 0) + return false; + } + return true; } else diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index bd72a87ee3..c4c46810dc 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -97,6 +97,7 @@ #include "storage/ipc.h" #include "utils/guc.h" #include "utils/resowner_private.h" +#include "storage/page_compression.h" /* Define PG_FLUSH_DATA_WORKS if we have an implementation for pg_flush_data */ #if defined(HAVE_SYNC_FILE_RANGE) @@ -196,6 +197,8 @@ typedef struct vfd /* NB: fileName is malloc'd, and must be free'd when closing the VFD */ int fileFlags; /* open(2) flags for (re)opening the file */ mode_t fileMode; /* mode to pass to open(2) */ + bool with_pcmap; /* is page compression relation */ + PageCompressHeader *pcmap; /* memory map of page compression address file */ } Vfd; /* @@ -1157,6 +1160,17 @@ LruDelete(File file) vfdP = &VfdCache[file]; + if (vfdP->with_pcmap && vfdP->pcmap != NULL) + { + if (pc_munmap(vfdP->pcmap) != 0) + ereport(vfdP->fdstate & FD_TEMP_FILE_LIMIT ? LOG : data_sync_elevel(LOG), + (errcode_for_file_access(), + errmsg("could not munmap file \"%s\": %m", + vfdP->fileName))); + + vfdP->pcmap = NULL; + } + /* * Close the file. We aren't expecting this to fail; if it does, better * to leak the FD than to mess up our internal state. @@ -1836,6 +1850,18 @@ FileClose(File file) if (!FileIsNotOpen(file)) { + /* close the pcmap */ + if (vfdP->with_pcmap && vfdP->pcmap != NULL) + { + if (pc_munmap(vfdP->pcmap)) + ereport(vfdP->fdstate & FD_TEMP_FILE_LIMIT ? LOG : data_sync_elevel(LOG), + (errcode_for_file_access(), + errmsg("could not munmap file \"%s\": %m", + vfdP->fileName))); + + vfdP->pcmap = NULL; + } + /* close the file */ if (close(vfdP->fd) != 0) { @@ -2190,6 +2216,96 @@ FileTruncate(File file, off_t offset, uint32 wait_event_info) return returnCode; } +/* + * initialize page compress memory map. + * + */ +void +SetupPageCompressMemoryMap(File file, int chunk_size, uint8 algorithm) +{ + int returnCode; + Vfd *vfdP; + PageCompressHeader *map; + + Assert(FileIsValid(file)); + + vfdP = &VfdCache[file]; + + returnCode = FileAccess(file); + if (returnCode < 0) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_RESOURCES), + errmsg("Failed to open file %s: %m", + vfdP->fileName))); + + map = pc_mmap(vfdP->fd, chunk_size, false); + if(map == MAP_FAILED) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_RESOURCES), + errmsg("Failed to mmap page compression address file %s: %m", + vfdP->fileName))); + + /* initialize page compress header */ + if(map->chunk_size == 0 && map->algorithm == 0) + { + map->chunk_size = chunk_size; + map->algorithm = algorithm; + + if(pc_msync(map) != 0) + ereport(data_sync_elevel(ERROR), + (errcode_for_file_access(), + errmsg("could not msync file \"%s\": %m", + vfdP->fileName))); + } + + if(InRecovery) + { + check_and_repair_compress_address(map, chunk_size, algorithm, vfdP->fileName); + } + + vfdP->with_pcmap=true; + vfdP->pcmap = map; +} + +/* + * Return the page compress memory map. + * + */ +void * +GetPageCompressMemoryMap(File file, int chunk_size) +{ + int returnCode; + Vfd *vfdP; + PageCompressHeader *map; + + Assert(FileIsValid(file)); + + vfdP = &VfdCache[file]; + + returnCode = FileAccess(file); + if (returnCode < 0) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_RESOURCES), + errmsg("Failed to open file %s: %m", + vfdP->fileName))); + + Assert(vfdP->with_pcmap); + + if(vfdP->pcmap == NULL) + { + map = pc_mmap(vfdP->fd, chunk_size, false); + if(map == MAP_FAILED) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_RESOURCES), + errmsg("Failed to mmap page compression address file %s: %m", + vfdP->fileName))); + + vfdP->pcmap = map; + } + + return vfdP->pcmap; +} + /* * Return the pathname associated with an open file. * diff --git a/src/backend/storage/smgr/Makefile b/src/backend/storage/smgr/Makefile index 596b564656..ba7a9aad35 100644 --- a/src/backend/storage/smgr/Makefile +++ b/src/backend/storage/smgr/Makefile @@ -14,6 +14,7 @@ include $(top_builddir)/src/Makefile.global OBJS = \ md.o \ - smgr.o + smgr.o \ + page_compression.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index 1d4aa482cc..9f3728f0cd 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "access/xlog.h" #include "access/xlogutils.h" @@ -40,6 +41,7 @@ #include "storage/sync.h" #include "utils/hsearch.h" #include "utils/memutils.h" +#include "storage/page_compression.h" /* * The magnetic disk storage manager keeps track of open file @@ -82,6 +84,8 @@ typedef struct _MdfdVec { File mdfd_vfd; /* fd number in fd.c's pool */ + File mdfd_vfd_pca; /* page compression address file 's fd number in fd.c's pool */ + File mdfd_vfd_pcd; /* page compression data file 's fd number in fd.c's pool */ BlockNumber mdfd_segno; /* segment number, from 0 */ } MdfdVec; @@ -117,6 +121,13 @@ static MemoryContext MdCxt; /* context for all MdfdVec objects */ */ #define EXTENSION_DONT_CHECK_SIZE (1 << 4) +#define IS_COMPRESSED_MAINFORK(reln, forkNum) \ + (reln->smgr_rnode.node.compress_algorithm != COMPRESS_TYPE_NONE && forkNum == MAIN_FORKNUM) + +#define PAGE_COMPRESS_ALGORITHM(reln) (reln->smgr_rnode.node.compress_algorithm) +#define PAGE_COMPRESS_LEVEL(reln) (reln->smgr_rnode.node.compresslevel) +#define PAGE_COMPRESS_CHUNK_SIZE(reln) (reln->smgr_rnode.node.compress_chunk_size) +#define PAGE_COMPRESS_PREALLOC_CHUNKS(reln) (reln->smgr_rnode.node.compress_prealloc_chunks) /* local routines */ static void mdunlinkfork(RelFileNodeBackend rnode, ForkNumber forkNum, @@ -140,6 +151,7 @@ static MdfdVec *_mdfd_getseg(SMgrRelation reln, ForkNumber forkno, static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg); +static int sync_pcmap(PageCompressHeader *pcMap, uint32 wait_event_info); /* * mdinit() -- Initialize private state for magnetic disk storage manager. @@ -179,7 +191,8 @@ mdcreate(SMgrRelation reln, ForkNumber forkNum, bool isRedo) { MdfdVec *mdfd; char *path; - File fd; + char *pcfile_path; + File fd,fd_pca,fd_pcd; if (isRedo && reln->md_num_open_segs[forkNum] > 0) return; /* created and opened already... */ @@ -219,11 +232,66 @@ mdcreate(SMgrRelation reln, ForkNumber forkNum, bool isRedo) } } + fd_pca = -1; + fd_pcd = -1; + if(IS_COMPRESSED_MAINFORK(reln,forkNum)) + { + /* close main fork file */ + FileClose(fd); + fd = -1; + + /* open page compress address file */ + pcfile_path = psprintf("%s_pca", path); + fd_pca = PathNameOpenFile(pcfile_path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY); + + if (fd_pca < 0) + { + int save_errno = errno; + + if (isRedo) + fd_pca = PathNameOpenFile(pcfile_path, O_RDWR | PG_BINARY); + if (fd_pca < 0) + { + /* be sure to report the error reported by create, not open */ + errno = save_errno; + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not create file \"%s\": %m", pcfile_path))); + } + } + pfree(pcfile_path); + + /* open page compress data file */ + pcfile_path = psprintf("%s_pcd", path); + fd_pcd = PathNameOpenFile(pcfile_path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY); + + if (fd_pcd < 0) + { + int save_errno = errno; + + if (isRedo) + fd_pcd = PathNameOpenFile(pcfile_path, O_RDWR | PG_BINARY); + if (fd_pcd < 0) + { + /* be sure to report the error reported by create, not open */ + errno = save_errno; + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not create file \"%s\": %m", pcfile_path))); + } + } + pfree(pcfile_path); + + SetupPageCompressMemoryMap(fd_pca, PAGE_COMPRESS_CHUNK_SIZE(reln), PAGE_COMPRESS_ALGORITHM(reln)); + } + pfree(path); _fdvec_resize(reln, forkNum, 1); mdfd = &reln->md_seg_fds[forkNum][0]; mdfd->mdfd_vfd = fd; + mdfd->mdfd_vfd_pca = fd_pca; + mdfd->mdfd_vfd_pcd = fd_pcd; mdfd->mdfd_segno = 0; } @@ -309,6 +377,28 @@ mdunlinkfork(RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo) ereport(WARNING, (errcode_for_file_access(), errmsg("could not remove file \"%s\": %m", path))); + + if(rnode.node.compress_algorithm != COMPRESS_TYPE_NONE && + forkNum == MAIN_FORKNUM) + { + char *pcfile_path; + + pcfile_path = psprintf("%s_pca", path); + ret = unlink(pcfile_path); + if (ret < 0 && errno != ENOENT) + ereport(WARNING, + (errcode_for_file_access(), + errmsg("could not remove file \"%s\": %m", pcfile_path))); + pfree(pcfile_path); + + pcfile_path = psprintf("%s_pcd", path); + ret = unlink(pcfile_path); + if (ret < 0 && errno != ENOENT) + ereport(WARNING, + (errcode_for_file_access(), + errmsg("could not remove file \"%s\": %m", pcfile_path))); + pfree(pcfile_path); + } } else { @@ -332,6 +422,52 @@ mdunlinkfork(RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo) (errcode_for_file_access(), errmsg("could not truncate file \"%s\": %m", path))); + if(rnode.node.compress_algorithm != COMPRESS_TYPE_NONE && + forkNum == MAIN_FORKNUM) + { + char *pcfile_path; + + /* clear page compression address file */ + pcfile_path = psprintf("%s_pca", path); + fd = OpenTransientFile(pcfile_path, O_RDWR | PG_BINARY); + if (fd >= 0) + { + int save_errno; + + ret = ftruncate(fd, 0); + save_errno = errno; + CloseTransientFile(fd); + errno = save_errno; + } + else + ret = -1; + if (ret < 0 && errno != ENOENT) + ereport(WARNING, + (errcode_for_file_access(), + errmsg("could not truncate file \"%s\": %m", pcfile_path))); + pfree(pcfile_path); + + /* truncate page compression data file */ + pcfile_path = psprintf("%s_pcd", path); + fd = OpenTransientFile(pcfile_path, O_RDWR | PG_BINARY); + if (fd >= 0) + { + int save_errno; + + ret = ftruncate(fd, 0); + save_errno = errno; + CloseTransientFile(fd); + errno = save_errno; + } + else + ret = -1; + if (ret < 0 && errno != ENOENT) + ereport(WARNING, + (errcode_for_file_access(), + errmsg("could not truncate file \"%s\": %m", path))); + pfree(pcfile_path); + } + /* Register request to unlink first segment later */ register_unlink_segment(rnode, forkNum, 0 /* first seg */ ); } @@ -367,6 +503,36 @@ mdunlinkfork(RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo) errmsg("could not remove file \"%s\": %m", segpath))); break; } + + if((rnode.node.compress_algorithm != COMPRESS_TYPE_NONE && + forkNum == MAIN_FORKNUM)) + { + char *pcfile_segpath; + + pcfile_segpath = psprintf("%s_pca", segpath); + if (unlink(pcfile_segpath) < 0) + { + /* ENOENT is expected after the last segment... */ + if (errno != ENOENT) + ereport(WARNING, + (errcode_for_file_access(), + errmsg("could not remove file \"%s\": %m", pcfile_segpath))); + break; + } + pfree(pcfile_segpath); + + pcfile_segpath = psprintf("%s_pcd", segpath); + if (unlink(pcfile_segpath) < 0) + { + /* ENOENT is expected after the last segment... */ + if (errno != ENOENT) + ereport(WARNING, + (errcode_for_file_access(), + errmsg("could not remove file \"%s\": %m", pcfile_segpath))); + break; + } + pfree(pcfile_segpath); + } } pfree(segpath); } @@ -374,6 +540,220 @@ mdunlinkfork(RelFileNodeBackend rnode, ForkNumber forkNum, bool isRedo) pfree(path); } +/* + * mdextend_pc() -- Add a block to the specified page compressed relation. + * + */ +static void +mdextend_pc(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + char *buffer, bool skipFsync) +{ + off_t seekpos; + int nbytes; + MdfdVec *v; + char *work_buffer,*buffer_pos; + int i; + int prealloc_chunks,need_chunks,chunk_size,nchunks,range,write_amount; + pc_chunk_number_t chunkno; + PageCompressHeader *pcMap; + PageCompressAddr *pcAddr; + uint8 algorithm; + int8 level; + + /* This assert is too expensive to have on normally ... */ +#ifdef CHECK_WRITE_VS_EXTEND + Assert(blocknum >= mdnblocks(reln, forknum)); +#endif + + Assert(IS_COMPRESSED_MAINFORK(reln,forknum)); + + /* + * If a relation manages to grow to 2^32-1 blocks, refuse to extend it any + * more --- we mustn't create a block whose number actually is + * InvalidBlockNumber. + */ + if (blocknum == InvalidBlockNumber) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("cannot extend file \"%s\" beyond %u blocks", + relpath(reln->smgr_rnode, forknum), + InvalidBlockNumber))); + + v = _mdfd_getseg(reln, MAIN_FORKNUM, blocknum, skipFsync, EXTENSION_CREATE); + + chunk_size = PAGE_COMPRESS_CHUNK_SIZE(reln); + algorithm = PAGE_COMPRESS_ALGORITHM(reln); + level = PAGE_COMPRESS_LEVEL(reln); + prealloc_chunks = PAGE_COMPRESS_PREALLOC_CHUNKS(reln); + if(prealloc_chunks > BLCKSZ / chunk_size -1) + prealloc_chunks = BLCKSZ / chunk_size -1; + + pcMap = (PageCompressHeader *)GetPageCompressMemoryMap(v->mdfd_vfd_pca, chunk_size); + pcAddr = GetPageCompressAddr(pcMap, chunk_size, blocknum); + + Assert(blocknum % RELSEG_SIZE >= pg_atomic_read_u32(&pcMap->nblocks)); + + /* check allocated chunk number */ + if(pcAddr->allocated_chunks > BLCKSZ / chunk_size) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("invalid chunks %u of block %u in file \"%s\"", + pcAddr->allocated_chunks, blocknum, FilePathName(v->mdfd_vfd_pca)))); + + for(i=0; i< pcAddr->allocated_chunks; i++) + { + if(pcAddr->chunknos[i] <= 0 || pcAddr->chunknos[i] > (BLCKSZ / chunk_size) * RELSEG_SIZE) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("invalid chunk number %u of block %u in file \"%s\"", + pcAddr->chunknos[i], blocknum, FilePathName(v->mdfd_vfd_pca)))); + } + + nchunks = 0; + work_buffer = NULL; + + /* compress page only for initialized page */ + if(!PageIsNew(buffer)) + { + int work_buffer_size, compressed_page_size; + + work_buffer_size = compress_page_buffer_bound(algorithm); + if(work_buffer_size < 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("unrecognized compression algorithm %d", + algorithm))); + work_buffer = palloc(work_buffer_size); + + compressed_page_size = compress_page(buffer, work_buffer, work_buffer_size, algorithm, level); + + if(compressed_page_size < 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("unrecognized compression algorithm %d", + algorithm))); + + nchunks = (compressed_page_size + chunk_size - 1) / chunk_size; + + if(chunk_size * nchunks >= BLCKSZ) + { + /* store original page if can not save space ?TODO? */ + pfree(work_buffer); + work_buffer = buffer; + nchunks = BLCKSZ / chunk_size; + } + else + { + /* fill zero in the last chunk */ + if(compressed_page_size < chunk_size * nchunks) + memset(work_buffer + compressed_page_size, 0x00, chunk_size * nchunks - compressed_page_size); + } + } + + need_chunks = prealloc_chunks > nchunks ? prealloc_chunks : nchunks; + if(pcAddr->allocated_chunks < need_chunks) + { + chunkno = (pc_chunk_number_t)pg_atomic_fetch_add_u32(&pcMap->allocated_chunks, need_chunks - pcAddr->allocated_chunks) + 1; + for(i = pcAddr->allocated_chunks ;ichunknos[i] = chunkno; + } + pcAddr->allocated_chunks = need_chunks; + + if(compress_address_flush_chunks > 0 && + pg_atomic_read_u32(&pcMap->allocated_chunks) - pg_atomic_read_u32(&pcMap->last_synced_allocated_chunks) > compress_address_flush_chunks) + { + if(sync_pcmap(pcMap, WAIT_EVENT_COMPRESS_ADDRESS_FILE_FLUSH) != 0) + ereport(data_sync_elevel(ERROR), + (errcode_for_file_access(), + errmsg("could not msync file \"%s\": %m", + FilePathName(v->mdfd_vfd_pca)))); + } + } + + /* write chunks of compressed page */ + for(i=0; i < nchunks;i++) + { + buffer_pos = work_buffer + chunk_size * i; + seekpos = (off_t) OffsetOfPageCompressChunk(chunk_size, pcAddr->chunknos[i]); + range = 1; + while(i < nchunks -1 && pcAddr->chunknos[i+1] == pcAddr->chunknos[i] + 1) + { + range++; + i++; + } + write_amount = chunk_size * range; + + if ((nbytes = FileWrite(v->mdfd_vfd_pcd, buffer_pos, write_amount, seekpos, WAIT_EVENT_DATA_FILE_EXTEND)) != write_amount) + { + if (nbytes < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not extend file \"%s\": %m", + FilePathName(v->mdfd_vfd_pcd)), + errhint("Check free disk space."))); + /* short write: complain appropriately */ + ereport(ERROR, + (errcode(ERRCODE_DISK_FULL), + errmsg("could not extend file \"%s\": wrote only %d of %d bytes at block %u", + FilePathName(v->mdfd_vfd_pcd), + nbytes, write_amount, blocknum), + errhint("Check free disk space."))); + } + } + + /* write preallocated chunks */ + if(need_chunks > nchunks) + { + char *zero_buffer = palloc0(chunk_size * (need_chunks - nchunks)); + + for(i=nchunks; i < need_chunks;i++) + { + seekpos = (off_t) OffsetOfPageCompressChunk(chunk_size, pcAddr->chunknos[i]); + range = 1; + while(i < nchunks -1 && pcAddr->chunknos[i+1] == pcAddr->chunknos[i] + 1) + { + range++; + i++; + } + write_amount = chunk_size * range; + + if ((nbytes = FileWrite(v->mdfd_vfd_pcd, zero_buffer, write_amount, seekpos, WAIT_EVENT_DATA_FILE_EXTEND)) != write_amount) + { + if (nbytes < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not extend file \"%s\": %m", + FilePathName(v->mdfd_vfd_pcd)), + errhint("Check free disk space."))); + /* short write: complain appropriately */ + ereport(ERROR, + (errcode(ERRCODE_DISK_FULL), + errmsg("could not extend file \"%s\": wrote only %d of %d bytes at block %u", + FilePathName(v->mdfd_vfd_pcd), + nbytes, write_amount, blocknum), + errhint("Check free disk space."))); + } + } + pfree(zero_buffer); + } + + /* finally update size of this page and global nblocks */ + if(pcAddr->nchunks != nchunks) + pcAddr->nchunks = nchunks; + + if(pg_atomic_read_u32(&pcMap->nblocks) < blocknum % RELSEG_SIZE + 1) + pg_atomic_write_u32(&pcMap->nblocks, blocknum % RELSEG_SIZE + 1); + + if(work_buffer != NULL && work_buffer != buffer) + pfree(work_buffer); + + if (!skipFsync && !SmgrIsTemp(reln)) + register_dirty_segment(reln, forknum, v); + + Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE)); +} + /* * mdextend() -- Add a block to the specified relation. * @@ -396,6 +776,9 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, Assert(blocknum >= mdnblocks(reln, forknum)); #endif + if(IS_COMPRESSED_MAINFORK(reln,forknum)) + return mdextend_pc(reln, forknum, blocknum, buffer, skipFsync); + /* * If a relation manages to grow to 2^32-1 blocks, refuse to extend it any * more --- we mustn't create a block whose number actually is @@ -419,16 +802,16 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, if (nbytes < 0) ereport(ERROR, (errcode_for_file_access(), - errmsg("could not extend file \"%s\": %m", + errmsg("could not extend file \"%s\": %m", FilePathName(v->mdfd_vfd)), - errhint("Check free disk space."))); + errhint("Check free disk space."))); /* short write: complain appropriately */ ereport(ERROR, (errcode(ERRCODE_DISK_FULL), - errmsg("could not extend file \"%s\": wrote only %d of %d bytes at block %u", + errmsg("could not extend file \"%s\": wrote only %d of %d bytes at block %u", FilePathName(v->mdfd_vfd), nbytes, BLCKSZ, blocknum), - errhint("Check free disk space."))); + errhint("Check free disk space."))); } if (!skipFsync && !SmgrIsTemp(reln)) @@ -452,27 +835,79 @@ mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior) { MdfdVec *mdfd; char *path; - File fd; + File fd,fd_pca,fd_pcd; /* No work if already open */ if (reln->md_num_open_segs[forknum] > 0) return &reln->md_seg_fds[forknum][0]; - path = relpath(reln->smgr_rnode, forknum); + fd = -1; + fd_pca = -1; + fd_pcd = -1; + if(IS_COMPRESSED_MAINFORK(reln,forknum)) + { + char *pcfile_path; - fd = PathNameOpenFile(path, O_RDWR | PG_BINARY); + path = relpath(reln->smgr_rnode, forknum); - if (fd < 0) + /* open page compression address file */ + pcfile_path = psprintf("%s_pca", path); + fd_pca = PathNameOpenFile(pcfile_path, O_RDWR | PG_BINARY); + + if (fd_pca < 0) + { + if ((behavior & EXTENSION_RETURN_NULL) && + FILE_POSSIBLY_DELETED(errno)) + { + pfree(path); + pfree(pcfile_path); + return NULL; + } + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open file \"%s\": %m", pcfile_path))); + } + pfree(pcfile_path); + + /* open page compression data file */ + pcfile_path = psprintf("%s_pcd", path); + fd_pcd = PathNameOpenFile(pcfile_path, O_RDWR | PG_BINARY); + + if (fd_pcd < 0) + { + if ((behavior & EXTENSION_RETURN_NULL) && + FILE_POSSIBLY_DELETED(errno)) + { + pfree(path); + pfree(pcfile_path); + return NULL; + } + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open file \"%s\": %m", pcfile_path))); + } + pfree(pcfile_path); + + SetupPageCompressMemoryMap(fd_pca, PAGE_COMPRESS_CHUNK_SIZE(reln), PAGE_COMPRESS_ALGORITHM(reln)); + } + else { - if ((behavior & EXTENSION_RETURN_NULL) && - FILE_POSSIBLY_DELETED(errno)) + path = relpath(reln->smgr_rnode, forknum); + + fd = PathNameOpenFile(path, O_RDWR | PG_BINARY); + + if (fd < 0) { - pfree(path); - return NULL; + if ((behavior & EXTENSION_RETURN_NULL) && + FILE_POSSIBLY_DELETED(errno)) + { + pfree(path); + return NULL; + } + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open file \"%s\": %m", path))); } - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not open file \"%s\": %m", path))); } pfree(path); @@ -480,6 +915,8 @@ mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior) _fdvec_resize(reln, forknum, 1); mdfd = &reln->md_seg_fds[forknum][0]; mdfd->mdfd_vfd = fd; + mdfd->mdfd_vfd_pca = fd_pca; + mdfd->mdfd_vfd_pcd = fd_pcd; mdfd->mdfd_segno = 0; Assert(_mdnblocks(reln, forknum, mdfd) <= ((BlockNumber) RELSEG_SIZE)); @@ -515,7 +952,13 @@ mdclose(SMgrRelation reln, ForkNumber forknum) { MdfdVec *v = &reln->md_seg_fds[forknum][nopensegs - 1]; - FileClose(v->mdfd_vfd); + if(IS_COMPRESSED_MAINFORK(reln,forknum)) + { + FileClose(v->mdfd_vfd_pca); + FileClose(v->mdfd_vfd_pcd); + } + else + FileClose(v->mdfd_vfd); _fdvec_resize(reln, forknum, nopensegs - 1); nopensegs--; } @@ -536,11 +979,61 @@ mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum) if (v == NULL) return false; - seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); + if(IS_COMPRESSED_MAINFORK(reln,forknum)) + { + int chunk_size,i,range; + PageCompressHeader *pcMap; + PageCompressAddr *pcAddr; + + chunk_size = PAGE_COMPRESS_CHUNK_SIZE(reln); + pcMap = (PageCompressHeader *)GetPageCompressMemoryMap(v->mdfd_vfd_pca, chunk_size); + pcAddr = GetPageCompressAddr(pcMap, chunk_size, blocknum); + + /* check chunk number */ + if(pcAddr->nchunks < 0 || pcAddr->nchunks > BLCKSZ / chunk_size) + { + if (zero_damaged_pages || InRecovery) + return true; + else + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("invalid chunks %u of block %u in file \"%s\"", + pcAddr->nchunks, blocknum, FilePathName(v->mdfd_vfd_pca)))); + } - Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE); + for(i=0; i< pcAddr->nchunks; i++) + { + if(pcAddr->chunknos[i] <= 0 || pcAddr->chunknos[i] > (BLCKSZ / chunk_size) * RELSEG_SIZE) + { + if (zero_damaged_pages || InRecovery) + return true; + else + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("invalid chunk number %u of block %u in file \"%s\"", + pcAddr->chunknos[i], blocknum, FilePathName(v->mdfd_vfd_pca)))); + } - (void) FilePrefetch(v->mdfd_vfd, seekpos, BLCKSZ, WAIT_EVENT_DATA_FILE_PREFETCH); + seekpos = (off_t) OffsetOfPageCompressChunk(chunk_size, pcAddr->chunknos[i]); + range = 1; + while(i < pcAddr->nchunks - 1 && + pcAddr->chunknos[i + 1] == pcAddr->chunknos[i] + 1) + { + range++; + i++; + } + + (void) FilePrefetch(v->mdfd_vfd_pcd, seekpos, chunk_size * range, WAIT_EVENT_DATA_FILE_PREFETCH); + } + } + else + { + seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); + + Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE); + + (void) FilePrefetch(v->mdfd_vfd, seekpos, BLCKSZ, WAIT_EVENT_DATA_FILE_PREFETCH); + } #endif /* USE_PREFETCH */ return true; @@ -586,16 +1079,237 @@ mdwriteback(SMgrRelation reln, ForkNumber forknum, if (segnum_start != segnum_end) nflush = RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE)); - Assert(nflush >= 1); - Assert(nflush <= nblocks); + Assert(nflush >= 1); + Assert(nflush <= nblocks); + + if(IS_COMPRESSED_MAINFORK(reln,forknum)) + { + int i,chunk_size; + PageCompressHeader *pcMap; + PageCompressAddr *pcAddr; + BlockNumber iblock; + pc_chunk_number_t seekpos_chunk,last_chunk,nchunks; + + chunk_size = PAGE_COMPRESS_CHUNK_SIZE(reln); + pcMap = (PageCompressHeader *)GetPageCompressMemoryMap(v->mdfd_vfd_pca, chunk_size); + + seekpos_chunk = -1; + last_chunk = -1; + for(iblock = 0; iblock < nflush; iblock++) + { + /* flush one block */ + pcAddr = GetPageCompressAddr(pcMap, chunk_size, blocknum + iblock); + + for(i=0; i < pcAddr->nchunks; i++) + { + if(seekpos_chunk == -1) + { + seekpos_chunk = pcAddr->chunknos[i]; + last_chunk = seekpos_chunk; + } + else if(pcAddr->chunknos[i] == last_chunk + 1) + { + last_chunk++; + } + else + { + /* from here the chunks is discontinuous, flush previous chuncks range */ + seekpos = (off_t) OffsetOfPageCompressChunk(chunk_size, seekpos_chunk); + nchunks = 1 + last_chunk - seekpos_chunk; + + FileWriteback(v->mdfd_vfd_pcd, seekpos, (off_t) chunk_size * nchunks, WAIT_EVENT_DATA_FILE_FLUSH); + + seekpos_chunk = pcAddr->chunknos[i]; + last_chunk = seekpos_chunk; + } + } + } + + /* flush the rest chuncks */ + if(seekpos_chunk != -1) + { + seekpos = (off_t) OffsetOfPageCompressChunk(chunk_size, seekpos_chunk); + nchunks = 1 + last_chunk - seekpos_chunk; + + FileWriteback(v->mdfd_vfd_pcd, seekpos, (off_t) chunk_size * nchunks, WAIT_EVENT_DATA_FILE_FLUSH); + } + } + else + { + seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); + + FileWriteback(v->mdfd_vfd, seekpos, (off_t) BLCKSZ * nflush, WAIT_EVENT_DATA_FILE_FLUSH); + } + + nblocks -= nflush; + blocknum += nflush; + } +} + +/* + * mdread_pc() -- Read the specified block from a page compressed relation. + */ +static void +mdread_pc(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + char *buffer) +{ + off_t seekpos; + int nbytes,chunk_size,i,read_amount,range,nchunks; + MdfdVec *v; + PageCompressHeader *pcMap; + PageCompressAddr *pcAddr; + char *compress_buffer,*buffer_pos; + + Assert(IS_COMPRESSED_MAINFORK(reln,forkNum)); + + v = _mdfd_getseg(reln, forknum, blocknum, false, + EXTENSION_FAIL | EXTENSION_CREATE_RECOVERY); + + chunk_size = PAGE_COMPRESS_CHUNK_SIZE(reln); + pcMap = (PageCompressHeader *)GetPageCompressMemoryMap(v->mdfd_vfd_pca, chunk_size); + pcAddr = GetPageCompressAddr(pcMap, chunk_size, blocknum); + + nchunks = pcAddr->nchunks; + if(nchunks == 0) + { + MemSet(buffer, 0, BLCKSZ); + return; + } + + /* check chunk number */ + if(nchunks > BLCKSZ / chunk_size) + { + if (zero_damaged_pages || InRecovery) + { + MemSet(buffer, 0, BLCKSZ); + return; + } + else + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("invalid chunks %u of block %u in file \"%s\"", + nchunks, blocknum, FilePathName(v->mdfd_vfd_pca)))); + } + + for(i=0; i< nchunks; i++) + { + if(pcAddr->chunknos[i] <= 0 || pcAddr->chunknos[i] > MAX_CHUNK_NUMBER(chunk_size)) + { + if (zero_damaged_pages || InRecovery) + { + MemSet(buffer, 0, BLCKSZ); + return; + } + else + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("invalid chunk number %u of block %u in file \"%s\"", + pcAddr->chunknos[i], blocknum, FilePathName(v->mdfd_vfd_pca)))); + } + } + + /* read chunk data */ + compress_buffer = palloc(chunk_size * nchunks); + for(i=0; i< nchunks; i++) + { + buffer_pos = compress_buffer + chunk_size * i; + seekpos = (off_t) OffsetOfPageCompressChunk(chunk_size, pcAddr->chunknos[i]); + range = 1; + while(ichunknos[i+1] == pcAddr->chunknos[i]+1) + { + range++; + i++; + } + read_amount = chunk_size * range; + + TRACE_POSTGRESQL_SMGR_MD_READ_START(forknum, blocknum, + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode, + reln->smgr_rnode.backend); + + nbytes = FileRead(v->mdfd_vfd_pcd, buffer_pos, read_amount, seekpos, WAIT_EVENT_DATA_FILE_READ); + + TRACE_POSTGRESQL_SMGR_MD_READ_DONE(forknum, blocknum, + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode, + reln->smgr_rnode.backend, + nbytes, + read_amount); + + if (nbytes != read_amount) + { + if (nbytes < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read block %u in file \"%s\": %m", + blocknum, FilePathName(v->mdfd_vfd_pcd)))); - seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); + /* + * Short read: we are at or past EOF, or we read a partial block at + * EOF. Normally this is an error; upper levels should never try to + * read a nonexistent block. However, if zero_damaged_pages is ON or + * we are InRecovery, we should instead return zeroes without + * complaining. This allows, for example, the case of trying to + * update a block that was later truncated away. + */ + if (zero_damaged_pages || InRecovery) + { + pfree(compress_buffer); + MemSet(buffer, 0, BLCKSZ); + return; + } + else + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("could not read block %u in file \"%s\": read only %d of %d bytes", + blocknum, FilePathName(v->mdfd_vfd_pcd), + nbytes, read_amount))); + } + } - FileWriteback(v->mdfd_vfd, seekpos, (off_t) BLCKSZ * nflush, WAIT_EVENT_DATA_FILE_FLUSH); + /* decompress chunk data */ + if(pcAddr->nchunks == BLCKSZ / chunk_size) + { + memcpy(buffer, compress_buffer, BLCKSZ); + } + else + { + nbytes = decompress_page(compress_buffer, buffer, PAGE_COMPRESS_ALGORITHM(reln) ); + if (nbytes != BLCKSZ) + { + if(nbytes == -2) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("could not recognized compression algorithm %d for file \"%s\"", + PAGE_COMPRESS_ALGORITHM(reln), + FilePathName(v->mdfd_vfd_pcd)))); - nblocks -= nflush; - blocknum += nflush; + /* + * Short read: we are at or past EOF, or we read a partial block at + * EOF. Normally this is an error; upper levels should never try to + * read a nonexistent block. However, if zero_damaged_pages is ON or + * we are InRecovery, we should instead return zeroes without + * complaining. This allows, for example, the case of trying to + * update a block that was later truncated away. + */ + if (zero_damaged_pages || InRecovery) + { + pfree(compress_buffer); + MemSet(buffer, 0, BLCKSZ); + return; + } + else + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("could not decompress block %u in file \"%s\": decompress %d of %d bytes", + blocknum, FilePathName(v->mdfd_vfd_pcd), + nbytes, BLCKSZ))); + } } + + pfree(compress_buffer); } /* @@ -609,6 +1323,9 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nbytes; MdfdVec *v; + if(IS_COMPRESSED_MAINFORK(reln,forknum)) + return mdread_pc(reln, forknum, blocknum, buffer); + TRACE_POSTGRESQL_SMGR_MD_READ_START(forknum, blocknum, reln->smgr_rnode.node.spcNode, reln->smgr_rnode.node.dbNode, @@ -616,7 +1333,7 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, reln->smgr_rnode.backend); v = _mdfd_getseg(reln, forknum, blocknum, false, - EXTENSION_FAIL | EXTENSION_CREATE_RECOVERY); + EXTENSION_FAIL | EXTENSION_CREATE_RECOVERY); seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); @@ -625,40 +1342,267 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_READ); TRACE_POSTGRESQL_SMGR_MD_READ_DONE(forknum, blocknum, - reln->smgr_rnode.node.spcNode, - reln->smgr_rnode.node.dbNode, - reln->smgr_rnode.node.relNode, - reln->smgr_rnode.backend, - nbytes, - BLCKSZ); + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode, + reln->smgr_rnode.backend, + nbytes, + BLCKSZ); if (nbytes != BLCKSZ) { if (nbytes < 0) ereport(ERROR, (errcode_for_file_access(), - errmsg("could not read block %u in file \"%s\": %m", + errmsg("could not read block %u in file \"%s\": %m", blocknum, FilePathName(v->mdfd_vfd)))); /* - * Short read: we are at or past EOF, or we read a partial block at - * EOF. Normally this is an error; upper levels should never try to - * read a nonexistent block. However, if zero_damaged_pages is ON or - * we are InRecovery, we should instead return zeroes without - * complaining. This allows, for example, the case of trying to - * update a block that was later truncated away. - */ + * Short read: we are at or past EOF, or we read a partial block at + * EOF. Normally this is an error; upper levels should never try to + * read a nonexistent block. However, if zero_damaged_pages is ON or + * we are InRecovery, we should instead return zeroes without + * complaining. This allows, for example, the case of trying to + * update a block that was later truncated away. + */ if (zero_damaged_pages || InRecovery) MemSet(buffer, 0, BLCKSZ); else ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), - errmsg("could not read block %u in file \"%s\": read only %d of %d bytes", + errmsg("could not read block %u in file \"%s\": read only %d of %d bytes", blocknum, FilePathName(v->mdfd_vfd), nbytes, BLCKSZ))); } } +/* + * mdwrite_pc() -- Write the supplied block at the appropriate location for page compressed relation. + * + * This is to be used only for updating already-existing blocks of a + * relation (ie, those before the current EOF). To extend a relation, + * use mdextend(). + */ +static void +mdwrite_pc(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + char *buffer, bool skipFsync) +{ + off_t seekpos; + int nbytes; + MdfdVec *v; + char *work_buffer,*buffer_pos; + int i,work_buffer_size, compressed_page_size; + int prealloc_chunks,chunk_size,nchunks,need_chunks,range,write_amount; + pc_chunk_number_t chunkno; + PageCompressHeader *pcMap; + PageCompressAddr *pcAddr; + uint8 algorithm; + int8 level; + + /* This assert is too expensive to have on normally ... */ +#ifdef CHECK_WRITE_VS_EXTEND + Assert(blocknum < mdnblocks(reln, forknum)); +#endif + + Assert(IS_COMPRESSED_MAINFORK(reln,forkNum)); + + v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, + EXTENSION_FAIL | EXTENSION_CREATE_RECOVERY); + + chunk_size = PAGE_COMPRESS_CHUNK_SIZE(reln); + algorithm = PAGE_COMPRESS_ALGORITHM(reln); + level = PAGE_COMPRESS_LEVEL(reln); + prealloc_chunks = PAGE_COMPRESS_PREALLOC_CHUNKS(reln); + if(prealloc_chunks > BLCKSZ / chunk_size -1) + prealloc_chunks = BLCKSZ / chunk_size -1; + + pcMap = (PageCompressHeader *)GetPageCompressMemoryMap(v->mdfd_vfd_pca, chunk_size); + pcAddr = GetPageCompressAddr(pcMap, chunk_size, blocknum); + + Assert(blocknum % RELSEG_SIZE < pg_atomic_read_u32(&pcMap->nblocks)); + + /* check allocated chunk number */ + if(pcAddr->allocated_chunks > BLCKSZ / chunk_size) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("invalid chunks %u of block %u in file \"%s\"", + pcAddr->allocated_chunks, blocknum, FilePathName(v->mdfd_vfd_pca)))); + + for(i=0; i< pcAddr->allocated_chunks; i++) + { + if(pcAddr->chunknos[i] <= 0 || pcAddr->chunknos[i] > MAX_CHUNK_NUMBER(chunk_size)) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("invalid chunk number %u of block %u in file \"%s\"", + pcAddr->chunknos[i], blocknum, FilePathName(v->mdfd_vfd_pca)))); + } + + /* compress page */ + work_buffer_size = compress_page_buffer_bound(algorithm); + if(work_buffer_size < 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("unrecognized compression algorithm %d", + algorithm))); + work_buffer = palloc(work_buffer_size); + + compressed_page_size = compress_page(buffer, work_buffer, work_buffer_size, algorithm, level); + + if(compressed_page_size < 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("unrecognized compression algorithm %d", + algorithm))); + + nchunks = (compressed_page_size + chunk_size - 1) / chunk_size; + + if(chunk_size * nchunks >= BLCKSZ) + { + /* store original page if can not save space ?TODO? */ + pfree(work_buffer); + work_buffer = buffer; + nchunks = BLCKSZ / chunk_size; + } + else + { + /* fill zero in the last chunk */ + if(compressed_page_size < chunk_size * nchunks) + memset(work_buffer + compressed_page_size, 0x00, chunk_size * nchunks - compressed_page_size); + } + + need_chunks = prealloc_chunks > nchunks ? prealloc_chunks : nchunks; + + /* allocate chunks needed */ + if(pcAddr->allocated_chunks < need_chunks) + { + chunkno = (pc_chunk_number_t)pg_atomic_fetch_add_u32(&pcMap->allocated_chunks, need_chunks - pcAddr->allocated_chunks) + 1; + for(i = pcAddr->allocated_chunks ;ichunknos[i] = chunkno; + } + pcAddr->allocated_chunks = need_chunks; + + if(compress_address_flush_chunks > 0 && + pg_atomic_read_u32(&pcMap->allocated_chunks) - pg_atomic_read_u32(&pcMap->last_synced_allocated_chunks) > compress_address_flush_chunks) + { + if(sync_pcmap(pcMap, WAIT_EVENT_COMPRESS_ADDRESS_FILE_FLUSH) != 0) + ereport(data_sync_elevel(ERROR), + (errcode_for_file_access(), + errmsg("could not msync file \"%s\": %m", + FilePathName(v->mdfd_vfd_pca)))); + } + } + + /* write chunks of compressed page */ + for(i=0; i < nchunks;i++) + { + buffer_pos = work_buffer + chunk_size * i; + seekpos = (off_t) OffsetOfPageCompressChunk(chunk_size, pcAddr->chunknos[i]); + range = 1; + while(i < nchunks -1 && pcAddr->chunknos[i+1] == pcAddr->chunknos[i] + 1) + { + range++; + i++; + } + write_amount = chunk_size * range; + + TRACE_POSTGRESQL_SMGR_MD_WRITE_START(forknum, blocknum, + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode, + reln->smgr_rnode.backend); + + nbytes = FileWrite(v->mdfd_vfd_pcd, buffer_pos, write_amount, seekpos, WAIT_EVENT_DATA_FILE_EXTEND); + + TRACE_POSTGRESQL_SMGR_MD_WRITE_DONE(forknum, blocknum, + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode, + reln->smgr_rnode.backend, + nbytes, + write_amount); + + if (nbytes != write_amount) + { + if (nbytes < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not write block %u in file \"%s\": %m", + blocknum, FilePathName(v->mdfd_vfd_pcd)))); + /* short write: complain appropriately */ + ereport(ERROR, + (errcode(ERRCODE_DISK_FULL), + errmsg("could not write block %u in file \"%s\": wrote only %d of %d bytes", + blocknum, + FilePathName(v->mdfd_vfd_pcd), + nbytes, write_amount), + errhint("Check free disk space."))); + } + } + + /* write preallocated chunks */ + if(need_chunks > nchunks) + { + char *zero_buffer = palloc0(chunk_size * (need_chunks - nchunks)); + + for(i=nchunks; i < need_chunks;i++) + { + seekpos = (off_t) OffsetOfPageCompressChunk(chunk_size, pcAddr->chunknos[i]); + range = 1; + while(i < nchunks -1 && pcAddr->chunknos[i+1] == pcAddr->chunknos[i] + 1) + { + range++; + i++; + } + write_amount = chunk_size * range; + + TRACE_POSTGRESQL_SMGR_MD_WRITE_START(forknum, blocknum, + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode, + reln->smgr_rnode.backend); + + nbytes = FileWrite(v->mdfd_vfd_pcd, zero_buffer, write_amount, seekpos, WAIT_EVENT_DATA_FILE_EXTEND); + + TRACE_POSTGRESQL_SMGR_MD_WRITE_DONE(forknum, blocknum, + reln->smgr_rnode.node.spcNode, + reln->smgr_rnode.node.dbNode, + reln->smgr_rnode.node.relNode, + reln->smgr_rnode.backend, + nbytes, + write_amount); + + if (nbytes != write_amount) + { + if (nbytes < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not extend file \"%s\": %m", + FilePathName(v->mdfd_vfd_pcd)), + errhint("Check free disk space."))); + /* short write: complain appropriately */ + ereport(ERROR, + (errcode(ERRCODE_DISK_FULL), + errmsg("could not extend file \"%s\": wrote only %d of %d bytes at block %u", + FilePathName(v->mdfd_vfd_pcd), + nbytes, write_amount, blocknum), + errhint("Check free disk space."))); + } + } + pfree(zero_buffer); + } + + /* finally update size of this page and global nblocks */ + if(pcAddr->nchunks != nchunks) + pcAddr->nchunks = nchunks; + + if(work_buffer != buffer) + pfree(work_buffer); + + if (!skipFsync && !SmgrIsTemp(reln)) + register_dirty_segment(reln, forknum, v); +} + /* * mdwrite() -- Write the supplied block at the appropriate location. * @@ -679,6 +1623,9 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, Assert(blocknum < mdnblocks(reln, forknum)); #endif + if(IS_COMPRESSED_MAINFORK(reln,forknum)) + return mdwrite_pc(reln, forknum, blocknum, buffer, skipFsync); + TRACE_POSTGRESQL_SMGR_MD_WRITE_START(forknum, blocknum, reln->smgr_rnode.node.spcNode, reln->smgr_rnode.node.dbNode, @@ -793,7 +1740,10 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks) { BlockNumber curnblk; BlockNumber priorblocks; - int curopensegs; + BlockNumber blk; + int curopensegs, chunk_size, i; + PageCompressHeader *pcMap; + PageCompressAddr *pcAddr; /* * NOTE: mdnblocks makes sure we have opened all active segments, so that @@ -832,11 +1782,36 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks) * This segment is no longer active. We truncate the file, but do * not delete it, for reasons explained in the header comments. */ - if (FileTruncate(v->mdfd_vfd, 0, WAIT_EVENT_DATA_FILE_TRUNCATE) < 0) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not truncate file \"%s\": %m", - FilePathName(v->mdfd_vfd)))); + if(IS_COMPRESSED_MAINFORK(reln,forknum)) + { + chunk_size = PAGE_COMPRESS_CHUNK_SIZE(reln); + pcMap = (PageCompressHeader *)GetPageCompressMemoryMap(v->mdfd_vfd_pca, chunk_size); + + pg_atomic_write_u32(&pcMap->nblocks, 0); + pg_atomic_write_u32(&pcMap->allocated_chunks, 0); + memset((char *)pcMap + SizeOfPageCompressHeaderData, + 0x00, + SizeofPageCompressAddrFile(chunk_size) - SizeOfPageCompressHeaderData); + + if(sync_pcmap(pcMap, WAIT_EVENT_COMPRESS_ADDRESS_FILE_SYNC) != 0) + ereport(data_sync_elevel(ERROR), + (errcode_for_file_access(), + errmsg("could not msync file \"%s\": %m", + FilePathName(v->mdfd_vfd_pca)))); + + if (FileTruncate(v->mdfd_vfd_pcd, 0, WAIT_EVENT_DATA_FILE_TRUNCATE) < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not truncate file \"%s\": %m", + FilePathName(v->mdfd_vfd_pcd)))); + } + else{ + if (FileTruncate(v->mdfd_vfd, 0, WAIT_EVENT_DATA_FILE_TRUNCATE) < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not truncate file \"%s\": %m", + FilePathName(v->mdfd_vfd)))); + } if (!SmgrIsTemp(reln)) register_dirty_segment(reln, forknum, v); @@ -844,26 +1819,97 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks) /* we never drop the 1st segment */ Assert(v != &reln->md_seg_fds[forknum][0]); - FileClose(v->mdfd_vfd); + if(IS_COMPRESSED_MAINFORK(reln,forknum)) + { + FileClose(v->mdfd_vfd_pca); + FileClose(v->mdfd_vfd_pcd); + } + else + FileClose(v->mdfd_vfd); + _fdvec_resize(reln, forknum, curopensegs - 1); } else if (priorblocks + ((BlockNumber) RELSEG_SIZE) > nblocks) { - /* - * This is the last segment we want to keep. Truncate the file to - * the right length. NOTE: if nblocks is exactly a multiple K of - * RELSEG_SIZE, we will truncate the K+1st segment to 0 length but - * keep it. This adheres to the invariant given in the header - * comments. - */ - BlockNumber lastsegblocks = nblocks - priorblocks; + if(IS_COMPRESSED_MAINFORK(reln,forknum)) + { + pc_chunk_number_t max_used_chunkno = (pc_chunk_number_t) 0; + BlockNumber lastsegblocks = nblocks - priorblocks; + uint32 allocated_chunks; - if (FileTruncate(v->mdfd_vfd, (off_t) lastsegblocks * BLCKSZ, WAIT_EVENT_DATA_FILE_TRUNCATE) < 0) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not truncate file \"%s\" to %u blocks: %m", - FilePathName(v->mdfd_vfd), - nblocks))); + chunk_size = PAGE_COMPRESS_CHUNK_SIZE(reln); + pcMap = (PageCompressHeader *)GetPageCompressMemoryMap(v->mdfd_vfd_pca, chunk_size); + + for(blk = lastsegblocks; blk < RELSEG_SIZE; blk++) + { + pcAddr = GetPageCompressAddr(pcMap, chunk_size, blk); + pcAddr->nchunks = 0; + } + + pg_atomic_write_u32(&pcMap->nblocks, lastsegblocks); + + if(sync_pcmap(pcMap, WAIT_EVENT_COMPRESS_ADDRESS_FILE_SYNC) != 0) + ereport(data_sync_elevel(ERROR), + (errcode_for_file_access(), + errmsg("could not msync file \"%s\": %m", + FilePathName(v->mdfd_vfd_pca)))); + + allocated_chunks = pg_atomic_read_u32(&pcMap->allocated_chunks); + + /* find the max used chunkno */ + for(blk = (BlockNumber)0; blk < (BlockNumber)lastsegblocks; blk++) + { + pcAddr = GetPageCompressAddr(pcMap, chunk_size, blk); + + /* check allocated_chunks for one page */ + if(pcAddr->allocated_chunks > BLCKSZ / chunk_size) + { + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("invalid chunks %u of block %u in file \"%s\"", + pcAddr->allocated_chunks, blk, FilePathName(v->mdfd_vfd_pca)))); + } + + /* check chunknos for one page */ + for(i = 0; i< pcAddr->allocated_chunks; i++) + { + if(pcAddr->chunknos[i] == 0 || pcAddr->chunknos[i] > allocated_chunks) + { + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("invalid chunk number %u of block %u in file \"%s\"", + pcAddr->chunknos[i], blk, FilePathName(v->mdfd_vfd_pca)))); + } + + if(pcAddr->chunknos[i] > max_used_chunkno ) + max_used_chunkno = pcAddr->chunknos[i]; + } + } + + if (FileTruncate(v->mdfd_vfd_pcd, max_used_chunkno * chunk_size, WAIT_EVENT_DATA_FILE_TRUNCATE) < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not truncate file \"%s\": %m", + FilePathName(v->mdfd_vfd_pcd)))); + } + else + { + /* + * This is the last segment we want to keep. Truncate the file to + * the right length. NOTE: if nblocks is exactly a multiple K of + * RELSEG_SIZE, we will truncate the K+1st segment to 0 length but + * keep it. This adheres to the invariant given in the header + * comments. + */ + BlockNumber lastsegblocks = nblocks - priorblocks; + + if (FileTruncate(v->mdfd_vfd, (off_t) lastsegblocks * BLCKSZ, WAIT_EVENT_DATA_FILE_TRUNCATE) < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not truncate file \"%s\" to %u blocks: %m", + FilePathName(v->mdfd_vfd), + nblocks))); + } if (!SmgrIsTemp(reln)) register_dirty_segment(reln, forknum, v); } @@ -917,16 +1963,43 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum) { MdfdVec *v = &reln->md_seg_fds[forknum][segno - 1]; - if (FileSync(v->mdfd_vfd, WAIT_EVENT_DATA_FILE_IMMEDIATE_SYNC) < 0) - ereport(data_sync_elevel(ERROR), - (errcode_for_file_access(), - errmsg("could not fsync file \"%s\": %m", - FilePathName(v->mdfd_vfd)))); + if(IS_COMPRESSED_MAINFORK(reln,forknum)) + { + PageCompressHeader *pcMap; + + pcMap = (PageCompressHeader *)GetPageCompressMemoryMap(v->mdfd_vfd_pca, PAGE_COMPRESS_CHUNK_SIZE(reln)); + + if(sync_pcmap(pcMap, WAIT_EVENT_COMPRESS_ADDRESS_FILE_SYNC) != 0) + ereport(data_sync_elevel(ERROR), + (errcode_for_file_access(), + errmsg("could not msync file \"%s\": %m", + FilePathName(v->mdfd_vfd_pca)))); + + if (FileSync(v->mdfd_vfd_pcd, WAIT_EVENT_DATA_FILE_IMMEDIATE_SYNC) < 0) + ereport(data_sync_elevel(ERROR), + (errcode_for_file_access(), + errmsg("could not fsync file \"%s\": %m", + FilePathName(v->mdfd_vfd_pcd)))); + } + else + { + if (FileSync(v->mdfd_vfd, WAIT_EVENT_DATA_FILE_IMMEDIATE_SYNC) < 0) + ereport(data_sync_elevel(ERROR), + (errcode_for_file_access(), + errmsg("could not fsync file \"%s\": %m", + FilePathName(v->mdfd_vfd)))); + } /* Close inactive segments immediately */ if (segno > min_inactive_seg) { - FileClose(v->mdfd_vfd); + if(IS_COMPRESSED_MAINFORK(reln,forknum)) + { + FileClose(v->mdfd_vfd_pca); + FileClose(v->mdfd_vfd_pcd); + } + else + FileClose(v->mdfd_vfd); _fdvec_resize(reln, forknum, segno - 1); } @@ -958,11 +2031,31 @@ register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg) ereport(DEBUG1, (errmsg("could not forward fsync request because request queue is full"))); - if (FileSync(seg->mdfd_vfd, WAIT_EVENT_DATA_FILE_SYNC) < 0) - ereport(data_sync_elevel(ERROR), - (errcode_for_file_access(), - errmsg("could not fsync file \"%s\": %m", - FilePathName(seg->mdfd_vfd)))); + if(IS_COMPRESSED_MAINFORK(reln,forknum)) + { + PageCompressHeader *pcMap; + + pcMap = (PageCompressHeader *)GetPageCompressMemoryMap(seg->mdfd_vfd_pca, PAGE_COMPRESS_CHUNK_SIZE(reln)); + + if(sync_pcmap(pcMap, WAIT_EVENT_COMPRESS_ADDRESS_FILE_SYNC) != 0) + ereport(data_sync_elevel(ERROR), + (errcode_for_file_access(), + errmsg("could not msync file \"%s\": %m", + FilePathName(seg->mdfd_vfd_pca)))); + + if (FileSync(seg->mdfd_vfd_pcd, WAIT_EVENT_DATA_FILE_SYNC) < 0) + ereport(data_sync_elevel(ERROR), + (errcode_for_file_access(), + errmsg("could not fsync file \"%s\": %m", + FilePathName(seg->mdfd_vfd_pcd)))); + }else + { + if (FileSync(seg->mdfd_vfd, WAIT_EVENT_DATA_FILE_SYNC) < 0) + ereport(data_sync_elevel(ERROR), + (errcode_for_file_access(), + errmsg("could not fsync file \"%s\": %m", + FilePathName(seg->mdfd_vfd)))); + } } } @@ -1116,18 +2209,52 @@ _mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno, int oflags) { MdfdVec *v; - File fd; - char *fullpath; + File fd,fd_pca,fd_pcd; + char *fullpath,*pcfile_path; fullpath = _mdfd_segpath(reln, forknum, segno); /* open the file */ fd = PathNameOpenFile(fullpath, O_RDWR | PG_BINARY | oflags); - pfree(fullpath); - if (fd < 0) + { + pfree(fullpath); return NULL; + } + + fd_pca = -1; + fd_pcd = -1; + if(IS_COMPRESSED_MAINFORK(reln,forknum)) + { + /* open page compress address file */ + pcfile_path = psprintf("%s_pca", fullpath); + fd_pca = PathNameOpenFile(pcfile_path, O_RDWR | PG_BINARY | oflags); + + pfree(pcfile_path); + + if (fd_pca < 0) + { + pfree(fullpath); + return NULL; + } + + /* open page compress data file */ + pcfile_path = psprintf("%s_pcd", fullpath); + fd_pcd = PathNameOpenFile(pcfile_path, O_RDWR | PG_BINARY | oflags); + + pfree(pcfile_path); + + if (fd_pcd < 0) + { + pfree(fullpath); + return NULL; + } + + SetupPageCompressMemoryMap(fd_pca, PAGE_COMPRESS_CHUNK_SIZE(reln), PAGE_COMPRESS_ALGORITHM(reln)); + } + + pfree(fullpath); /* * Segments are always opened in order from lowest to highest, so we must @@ -1140,6 +2267,8 @@ _mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno, /* fill the entry */ v = &reln->md_seg_fds[forknum][segno]; v->mdfd_vfd = fd; + v->mdfd_vfd_pca = fd_pca; + v->mdfd_vfd_pcd = fd_pcd; v->mdfd_segno = segno; Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE)); @@ -1285,6 +2414,13 @@ static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg) { off_t len; + PageCompressHeader *pcMap; + + if(IS_COMPRESSED_MAINFORK(reln,forknum)) + { + pcMap = (PageCompressHeader *)GetPageCompressMemoryMap(seg->mdfd_vfd_pca, PAGE_COMPRESS_CHUNK_SIZE(reln)); + return (BlockNumber) pg_atomic_read_u32(&pcMap->nblocks); + } len = FileSize(seg->mdfd_vfd); if (len < 0) @@ -1311,6 +2447,82 @@ mdsyncfiletag(const FileTag *ftag, char *path) int result, save_errno; + if(IS_COMPRESSED_MAINFORK(reln,ftag->forknum)) + { + PageCompressHeader *pcMap; + + /* sync page compression address file */ + /* See if we already have the file open, or need to open it. */ + if (ftag->segno < reln->md_num_open_segs[ftag->forknum]) + { + file = reln->md_seg_fds[ftag->forknum][ftag->segno].mdfd_vfd_pca; + strlcpy(path, FilePathName(file), MAXPGPATH); + need_to_close = false; + } + else + { + char *p; + + p = _mdfd_segpath(reln, ftag->forknum, ftag->segno); + snprintf(path, MAXPGPATH, "%s_pca", p); + pfree(p); + + file = PathNameOpenFile(path, O_RDWR | PG_BINARY); + if (file < 0) + return -1; + + need_to_close = true; + + SetupPageCompressMemoryMap(file, PAGE_COMPRESS_CHUNK_SIZE(reln), PAGE_COMPRESS_ALGORITHM(reln)); + } + + pcMap = (PageCompressHeader *)GetPageCompressMemoryMap(file, PAGE_COMPRESS_CHUNK_SIZE(reln)); + result = sync_pcmap(pcMap, WAIT_EVENT_COMPRESS_ADDRESS_FILE_SYNC); + save_errno = errno; + + if (need_to_close) + FileClose(file); + + if(result != 0) + { + errno = save_errno; + return result; + } + + /* sync page compression data file */ + /* See if we already have the file open, or need to open it. */ + if (ftag->segno < reln->md_num_open_segs[ftag->forknum]) + { + file = reln->md_seg_fds[ftag->forknum][ftag->segno].mdfd_vfd_pcd; + strlcpy(path, FilePathName(file), MAXPGPATH); + need_to_close = false; + } + else + { + char *p; + + p = _mdfd_segpath(reln, ftag->forknum, ftag->segno); + snprintf(path, MAXPGPATH, "%s_pcd", p); + pfree(p); + + file = PathNameOpenFile(path, O_RDWR | PG_BINARY); + if (file < 0) + return -1; + + need_to_close = true; + } + + /* Sync the page compression data file. */ + result = FileSync(file, WAIT_EVENT_DATA_FILE_SYNC); + save_errno = errno; + + if (need_to_close) + FileClose(file); + + errno = save_errno; + return result; + } + /* See if we already have the file open, or need to open it. */ if (ftag->segno < reln->md_num_open_segs[ftag->forknum]) { @@ -1352,15 +2564,33 @@ mdsyncfiletag(const FileTag *ftag, char *path) int mdunlinkfiletag(const FileTag *ftag, char *path) { + SMgrRelation reln = smgropen(ftag->rnode, InvalidBackendId); char *p; + int ret; /* Compute the path. */ p = relpathperm(ftag->rnode, MAIN_FORKNUM); strlcpy(path, p, MAXPGPATH); - pfree(p); /* Try to unlink the file. */ - return unlink(path); + ret = unlink(path); + + if((ret == 0 || errno == ENOENT) && + IS_COMPRESSED_MAINFORK(reln,ftag->forknum)) + { + snprintf(path, MAXPGPATH, "%s_pca", p); + ret = unlink(path); + + if(ret == 0 || errno == ENOENT) + { + snprintf(path, MAXPGPATH, "%s_pcd", p); + ret = unlink(path); + } + } + + pfree(p); + + return ret; } /* @@ -1379,3 +2609,30 @@ mdfiletagmatches(const FileTag *ftag, const FileTag *candidate) */ return ftag->rnode.dbNode == candidate->rnode.dbNode; } + +static int +sync_pcmap(PageCompressHeader * pcMap, uint32 wait_event_info) +{ + int returnCode; + uint32 nblocks, allocated_chunks, last_synced_nblocks, last_synced_allocated_chunks; + + nblocks = pg_atomic_read_u32(&pcMap->nblocks); + allocated_chunks = pg_atomic_read_u32(&pcMap->allocated_chunks); + last_synced_nblocks = pg_atomic_read_u32(&pcMap->last_synced_nblocks); + last_synced_allocated_chunks = pg_atomic_read_u32(&pcMap->last_synced_allocated_chunks); + + pgstat_report_wait_start(wait_event_info); + returnCode = pc_msync(pcMap); + pgstat_report_wait_end(); + + if(returnCode == 0) + { + if(last_synced_nblocks != nblocks) + pg_atomic_write_u32(&pcMap->last_synced_nblocks, nblocks); + + if(last_synced_allocated_chunks != allocated_chunks) + pg_atomic_write_u32(&pcMap->last_synced_allocated_chunks, allocated_chunks); + } + + return returnCode; +} \ No newline at end of file diff --git a/src/backend/storage/smgr/page_compression.c b/src/backend/storage/smgr/page_compression.c new file mode 100644 index 0000000000..86cb3eefc4 --- /dev/null +++ b/src/backend/storage/smgr/page_compression.c @@ -0,0 +1,399 @@ +/* + * page_compression.c + * Routines for page compression + * + * There are two implementations at the moment: zstd, and the Postgres + * pg_lzcompress(). zstd support requires that the server was compiled + * with --with-zstd. + * + * Copyright (c) 2020, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/storage/smgr/page_compression.c + */ +#include "postgres.h" +#include "miscadmin.h" +#include "catalog/pg_type.h" +#include "utils/array.h" +#include "utils/datum.h" +#include "utils/relcache.h" + +#include "utils/timestamp.h" +#include "storage/bufmgr.h" +#include "storage/page_compression.h" +#include "storage/page_compression_impl.h" + +int compress_address_flush_chunks; + +/** + * buildCompressReloptions() -- build compression option array from PageCompressOpts + * + */ +Datum +buildCompressReloptions(PageCompressOpts *pcOpt) +{ + Datum result; + ArrayBuildState *astate; + text *t; + char *value; + Size len; + + /* We build new array using accumArrayResult */ + astate = NULL; + + /* compresstype */ + value = NULL; + switch (pcOpt->compresstype) + { + case COMPRESS_TYPE_PGLZ: + value = "pglz"; + break; + + case COMPRESS_TYPE_ZSTD: + value = "zstd"; + break; + + default: + break; + } + + if(value != NULL) + { + len = VARHDRSZ + strlen("compresstype") + 1 + strlen(value); + + /* +1 leaves room for sprintf's trailing null */ + t = (text *) palloc(len + 1); + SET_VARSIZE(t, len); + sprintf(VARDATA(t), "compresstype=%s", value); + + astate = accumArrayResult(astate, PointerGetDatum(t), + false, TEXTOID, + CurrentMemoryContext); + } + + /* compresslevel */ + if(pcOpt->compresslevel != 0) + { + value = psprintf("%d",pcOpt->compresslevel); + + len = VARHDRSZ + strlen("compresslevel") + 1 + strlen(value); + + /* +1 leaves room for sprintf's trailing null */ + t = (text *) palloc(len + 1); + SET_VARSIZE(t, len); + sprintf(VARDATA(t), "compresslevel=%s", value); + pfree(value); + + astate = accumArrayResult(astate, PointerGetDatum(t), + false, TEXTOID, + CurrentMemoryContext); + } + + /* compresslevel */ + if(pcOpt->compress_chunk_size != BLCKSZ / 2) + { + value = psprintf("%d",pcOpt->compress_chunk_size); + + len = VARHDRSZ + strlen("compress_chunk_size") + 1 + strlen(value); + + /* +1 leaves room for sprintf's trailing null */ + t = (text *) palloc(len + 1); + SET_VARSIZE(t, len); + sprintf(VARDATA(t), "compress_chunk_size=%s", value); + pfree(value); + + astate = accumArrayResult(astate, PointerGetDatum(t), + false, TEXTOID, + CurrentMemoryContext); + } + + /* compresslevel */ + if(pcOpt->compress_prealloc_chunks != 0) + { + value = psprintf("%d",pcOpt->compress_prealloc_chunks); + + len = VARHDRSZ + strlen("compress_prealloc_chunks") + 1 + strlen(value); + + /* +1 leaves room for sprintf's trailing null */ + t = (text *) palloc(len + 1); + SET_VARSIZE(t, len); + sprintf(VARDATA(t), "compress_prealloc_chunks=%s", value); + pfree(value); + + astate = accumArrayResult(astate, PointerGetDatum(t), + false, TEXTOID, + CurrentMemoryContext); + } + + if (astate) + result = makeArrayResult(astate, CurrentMemoryContext); + else + result = (Datum) 0; + + return result; +} + +void +check_and_repair_compress_address(PageCompressHeader *pcMap, uint16 chunk_size, uint8 algorithm, const char *path) +{ + int i, unused_chunks; + BlockNumber blocknum, max_blocknum, max_nonzero_blocknum; + uint32 nblocks, allocated_chunks; + pc_chunk_number_t max_allocated_chunkno; + BlockNumber *global_chunknos; + char last_recovery_start_time_buf[sizeof(TimestampTz)]; + char start_time_buf[sizeof(TimestampTz)]; + bool need_check = false; + + unused_chunks = 0; + max_blocknum = (BlockNumber) -1; + max_nonzero_blocknum = (BlockNumber) -1; + max_allocated_chunkno = (pc_chunk_number_t) 0; + + /* if the relation had been checked in this startup, skip */ + memcpy(last_recovery_start_time_buf, &pcMap->last_recovery_start_time,sizeof(TimestampTz)); + memcpy(start_time_buf, &PgStartTime,sizeof(TimestampTz)); + for(i=0; i < sizeof(TimestampTz); i++) + { + if(start_time_buf[i] != last_recovery_start_time_buf[i]) + { + need_check = true; + break; + } + } + if(!need_check) + return; + + /* check head of compress address file */ + if(pcMap->chunk_size != chunk_size || pcMap->algorithm != algorithm) + { + /* reinitialize compress head if it's invalid and zero_damaged_pages is on */ + if(zero_damaged_pages) + { + ereport(WARNING, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("invalid chunk_size %u or algorithm %u in head of compress relation address file \"%s\", and reinitialized it.", + pcMap->chunk_size, pcMap->algorithm, path))); + + pcMap->algorithm = algorithm; + pg_atomic_write_u32(&pcMap->nblocks, RELSEG_SIZE); + pg_atomic_write_u32(&pcMap->allocated_chunks, 0); + pg_atomic_write_u32(&pcMap->last_synced_allocated_chunks, 0); + pcMap->chunk_size = chunk_size; + } + else + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("invalid chunk_size %u or algorithm %u in head of compress relation address file \"%s\"", + pcMap->chunk_size, pcMap->algorithm, path))); + } + + nblocks = pg_atomic_read_u32(&pcMap->nblocks); + allocated_chunks = pg_atomic_read_u32(&pcMap->allocated_chunks); + global_chunknos = palloc0(MAX_CHUNK_NUMBER(chunk_size)); + + /* check compress address of every pages */ + for(blocknum = 0; blocknum < (BlockNumber)RELSEG_SIZE; blocknum++) + { + PageCompressAddr *pcAddr = GetPageCompressAddr(pcMap, chunk_size, blocknum); + + /* skip when found first zero filled block after nblocks */ + /*if(blocknum >= (BlockNumber)nblocks && pcAddr->allocated_chunks == 0) + break;*/ + + /* check allocated_chunks for one page */ + if(pcAddr->allocated_chunks > BLCKSZ / chunk_size) + { + if(zero_damaged_pages) + { + MemSet(pcAddr, 0, SizeOfPageCompressAddr(chunk_size)); + ereport(WARNING, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("invalid allocated_chunks %u of block %u in file \"%s\", and zero this block", + pcAddr->allocated_chunks, blocknum, path))); + continue; + } + else + { + pfree(global_chunknos); + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("invalid allocated_chunks %u of block %u in file \"%s\"", + pcAddr->allocated_chunks, blocknum, path))); + } + } + + /* check chunknos for one page */ + for(i = 0; i< pcAddr->allocated_chunks; i++) + { + /* check for invalid chunkno */ + if(pcAddr->chunknos[i] == 0 || pcAddr->chunknos[i] > MAX_CHUNK_NUMBER(chunk_size)) + { + if (zero_damaged_pages) + { + MemSet(pcAddr, 0, SizeOfPageCompressAddr(chunk_size)); + ereport(WARNING, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("invalid chunk number %u of block %u in file \"%s\", and zero this block", + pcAddr->chunknos[i], blocknum, path))); + continue; + } + else + { + pfree(global_chunknos); + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("invalid chunk number %u of block %u in file \"%s\"", + pcAddr->chunknos[i], blocknum, path))); + } + } + + /* check for duplicate chunkno */ + if(global_chunknos[pcAddr->chunknos[i] - 1] !=0 ) + { + if (zero_damaged_pages) + { + MemSet(pcAddr, 0, SizeOfPageCompressAddr(chunk_size)); + ereport(WARNING, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("chunk number %u of block %u duplicate with block %u in file \"%s\", and zero this block", + pcAddr->chunknos[i], blocknum, global_chunknos[pcAddr->chunknos[i] - 1], path))); + continue; + } + else + { + pfree(global_chunknos); + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("chunk number %u of block %u duplicate with block %u in file \"%s\"", + pcAddr->chunknos[i], blocknum, global_chunknos[pcAddr->chunknos[i]], path))); + } + } + } + + /* clean chunknos beyond allocated_chunks for one page */ + for(i = pcAddr->allocated_chunks; i< BLCKSZ / chunk_size; i++) + { + if(pcAddr->chunknos[i] != 0) + { + pcAddr->chunknos[i] = 0; + ereport(WARNING, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("clear chunk number %u beyond allocated_chunks %u of block %u in file \"%s\"", + pcAddr->chunknos[i], pcAddr->allocated_chunks, blocknum, path))); + } + } + + /* check nchunks for one page */ + if(pcAddr->nchunks > pcAddr->allocated_chunks) + { + if(zero_damaged_pages) + { + MemSet(pcAddr, 0, SizeOfPageCompressAddr(chunk_size)); + ereport(WARNING, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("nchunks %u exceeds allocated_chunks %u of block %u in file \"%s\", and zero this block", + pcAddr->nchunks, pcAddr->allocated_chunks, blocknum, path))); + continue; + } + else + { + pfree(global_chunknos); + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("nchunks %u exceeds allocated_chunks %u of block %u in file \"%s\"", + pcAddr->nchunks, pcAddr->allocated_chunks, blocknum, path))); + } + } + + max_blocknum = blocknum; + if(pcAddr->nchunks > 0) + max_nonzero_blocknum = blocknum; + + for(i = 0; i < pcAddr->allocated_chunks; i++) + { + global_chunknos[pcAddr->chunknos[i] -1 ] = blocknum + 1; + if(pcAddr->chunknos[i] > max_allocated_chunkno) + max_allocated_chunkno = pcAddr->chunknos[i]; + } + } + + /* check for holes in allocated chunks*/ + for(i = 0; i < max_allocated_chunkno; i++) + if(global_chunknos[i] == 0) + unused_chunks ++; + + if(unused_chunks > 0) + ereport(WARNING, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("there are %u chunks of total allocated chunks %u can not be use in file \"%s\"", + unused_chunks, max_allocated_chunkno, path), + errhint("You may need to run VACUMM FULL to optimize space allocation, or run REINDEX if it is an index."))); + + /* update nblocks in head of compressed file */ + if(nblocks < max_nonzero_blocknum + 1) + { + pg_atomic_write_u32(&pcMap->nblocks, max_nonzero_blocknum + 1); + pg_atomic_write_u32(&pcMap->last_synced_nblocks, max_nonzero_blocknum + 1); + + ereport(WARNING, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("update nblocks head of compressed file \"%s\". old: %u, new: %u", + path, nblocks, max_nonzero_blocknum + 1))); + } + + /* update allocated_chunks in head of compress file */ + if(allocated_chunks != max_allocated_chunkno) + { + pg_atomic_write_u32(&pcMap->allocated_chunks, max_allocated_chunkno); + pg_atomic_write_u32(&pcMap->last_synced_allocated_chunks, max_allocated_chunkno); + + ereport(WARNING, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("update allocated_chunks in head of compressed file \"%s\". old: %u, new: %u", + path, allocated_chunks, max_allocated_chunkno))); + } + + /* clean compress address after max_blocknum + 1 */ + for(blocknum = max_blocknum + 1; blocknum < (BlockNumber)RELSEG_SIZE; blocknum++) + { + char buf[128], *p; + PageCompressAddr *pcAddr = GetPageCompressAddr(pcMap, chunk_size, blocknum); + + /* skip zero block */ + if(pcAddr->allocated_chunks == 0 && pcAddr->nchunks == 0) + continue; + + /* clean compress address and output content of the address */ + MemSet(buf, 0, sizeof(buf)); + p = buf; + + for(i = 0; i< pcAddr->allocated_chunks; i++) + { + if(pcAddr->chunknos[i]) + { + if(i==0) + snprintf(p, (sizeof(buf) - (p - buf)), "%u", pcAddr->chunknos[i]); + else + snprintf(p, (sizeof(buf) - (p - buf)), ",%u", pcAddr->chunknos[i]); + p += strlen(p); + } + } + + MemSet(pcAddr, 0, SizeOfPageCompressAddr(chunk_size)); + ereport(WARNING, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("clean unused compress address of block %u in file \"%s\", old allocated_chunks/nchunks/chunknos: %u/%u/{%s}", + blocknum, path, pcAddr->allocated_chunks, pcAddr->nchunks, buf))); + } + + pfree(global_chunknos); + + if(pc_msync(pcMap) != 0) + ereport(data_sync_elevel(ERROR), + (errcode_for_file_access(), + errmsg("could not msync file \"%s\": %m", + path))); + + pcMap->last_recovery_start_time = PgStartTime; +} \ No newline at end of file diff --git a/src/backend/utils/adt/dbsize.c b/src/backend/utils/adt/dbsize.c index 3319e9761e..bbffde6239 100644 --- a/src/backend/utils/adt/dbsize.c +++ b/src/backend/utils/adt/dbsize.c @@ -292,16 +292,58 @@ calculate_relation_size(RelFileNode *rfn, BackendId backend, ForkNumber forknum) snprintf(pathname, MAXPGPATH, "%s.%u", relationpath, segcount); - if (stat(pathname, &fst) < 0) + if(rfn->compress_algorithm != COMPRESS_TYPE_NONE && + forknum == MAIN_FORKNUM) { - if (errno == ENOENT) - break; - else - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not stat file \"%s\": %m", pathname))); + char *pcfile_segpath; + + /* calculate size of page compression address file */ + pcfile_segpath = psprintf("%s_pca", pathname); + if (stat(pcfile_segpath, &fst) < 0) + { + if (errno == ENOENT) + { + pfree(pcfile_segpath); + break; + } + else + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", pcfile_segpath))); + } + totalsize += fst.st_size; + pfree(pcfile_segpath); + + /* calculate size of page compression data file */ + pcfile_segpath = psprintf("%s_pcd", pathname); + if (stat(pcfile_segpath, &fst) < 0) + { + if (errno == ENOENT) + { + pfree(pcfile_segpath); + break; + } + else + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", pcfile_segpath))); + } + totalsize += fst.st_size; + pfree(pcfile_segpath); + } + else + { + if (stat(pathname, &fst) < 0) + { + if (errno == ENOENT) + break; + else + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", pathname))); + } + totalsize += fst.st_size; } - totalsize += fst.st_size; } return totalsize; diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 9061af81a3..772d558b83 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -30,6 +30,10 @@ #include #include +#include "access/hash.h" +#include "access/gin_private.h" +#include "access/gist_private.h" +#include "access/spgist_private.h" #include "access/htup_details.h" #include "access/multixact.h" #include "access/nbtree.h" @@ -74,6 +78,7 @@ #include "rewrite/rewriteDefine.h" #include "rewrite/rowsecurity.h" #include "storage/lmgr.h" +#include "storage/page_compression.h" #include "storage/smgr.h" #include "utils/array.h" #include "utils/builtins.h" @@ -295,6 +300,7 @@ static OpClassCacheEnt *LookupOpclassInfo(Oid operatorClassOid, StrategyNumber numSupport); static void RelationCacheInitFileRemoveInDir(const char *tblspcpath); static void unlink_initfile(const char *initfilename, int elevel); +static void SetupPageCompressForRelation(Relation relation, PageCompressOpts *compress_options); /* @@ -1337,6 +1343,40 @@ RelationInitPhysicalAddr(Relation relation) else relation->rd_firstRelfilenodeSubid = InvalidSubTransactionId; } + + /* setup page compression option */ + if(relation->rd_options) + { + switch(relation->rd_rel->relam) + { + case HEAP_TABLE_AM_OID: + SetupPageCompressForRelation(relation, &((StdRdOptions *)(relation->rd_options))->compress); + break; + + case BTREE_AM_OID: + SetupPageCompressForRelation(relation, &((BTOptions *)(relation->rd_options))->compress); + break; + + case HASH_AM_OID: + SetupPageCompressForRelation(relation, &((HashOptions *)(relation->rd_options))->compress); + break; + + case GIN_AM_OID: + SetupPageCompressForRelation(relation, &((GinOptions *)(relation->rd_options))->compress); + break; + + case GIST_AM_OID: + SetupPageCompressForRelation(relation, &((GiSTOptions *)(relation->rd_options))->compress); + break; + + case SPGIST_AM_OID: + SetupPageCompressForRelation(relation, &((SpGistOptions *)(relation->rd_options))->compress); + break; + + default: + break; + } + } } /* @@ -3350,7 +3390,8 @@ RelationBuildLocalRelation(const char *relname, bool shared_relation, bool mapped_relation, char relpersistence, - char relkind) + char relkind, + Datum reloptions) { Relation rel; MemoryContext oldcxt; @@ -3527,6 +3568,16 @@ RelationBuildLocalRelation(const char *relname, RelationInitPhysicalAddr(rel); + /* setup page compress option */ + if (reloptions && + (relkind == RELKIND_RELATION || + relkind == RELKIND_MATVIEW || + relkind == RELKIND_INDEX)) + { + StdRdOptions *options = (StdRdOptions *)default_reloptions(reloptions, false, RELOPT_KIND_HEAP); + SetupPageCompressForRelation(rel, &options->compress); + } + rel->rd_rel->relam = accessmtd; if (relkind == RELKIND_RELATION || @@ -6404,3 +6455,46 @@ unlink_initfile(const char *initfilename, int elevel) initfilename))); } } + +/* setup page compress options for relation */ +static void +SetupPageCompressForRelation(Relation relation, PageCompressOpts *compress_options) +{ + if(compress_options->compresstype == COMPRESS_TYPE_NONE) + { + relation->rd_node.compress_algorithm = COMPRESS_TYPE_NONE; + relation->rd_node.compresslevel = 0; + relation->rd_node.compress_chunk_size = 0; + relation->rd_node.compress_prealloc_chunks = 0; + } + else + { + if(!SUPPORT_PAGE_COMPRESSION) + elog(ERROR, "unsupported page compression on this platform"); + +#ifndef USE_ZSTD + if(compress_options->compresstype == COMPRESS_TYPE_ZSTD) + elog(ERROR, "unsupported compression algorithm %s","zstd"); +#endif + + relation->rd_node.compress_algorithm = compress_options->compresstype; + relation->rd_node.compresslevel = compress_options->compresslevel; + + if(compress_options->compress_chunk_size != BLCKSZ / 2 && + compress_options->compress_chunk_size != BLCKSZ / 4 && + compress_options->compress_chunk_size != BLCKSZ / 8) + { + elog(ERROR, "invalid compress_chunk_size %d , must be one of %d, %d or %d for %s", + compress_options->compress_chunk_size, + BLCKSZ / 8, BLCKSZ / 4, BLCKSZ / 2, + RelationGetRelationName(relation)); + } + + relation->rd_node.compress_chunk_size = compress_options->compress_chunk_size; + + if(compress_options->compress_prealloc_chunks >= BLCKSZ / compress_options->compress_chunk_size) + relation->rd_node.compress_prealloc_chunks = (uint8)(BLCKSZ / compress_options->compress_chunk_size - 1); + else + relation->rd_node.compress_prealloc_chunks = (uint8)(compress_options->compress_prealloc_chunks); + } +} \ No newline at end of file diff --git a/src/backend/utils/cache/spccache.c b/src/backend/utils/cache/spccache.c index e0c3c1b1c1..fed9074a36 100644 --- a/src/backend/utils/cache/spccache.c +++ b/src/backend/utils/cache/spccache.c @@ -235,3 +235,18 @@ get_tablespace_maintenance_io_concurrency(Oid spcid) else return spc->opts->maintenance_io_concurrency; } + +/* + * get_tablespace_compression_option + * + */ +PageCompressOpts * +get_tablespace_compression_option(Oid spcid) +{ + TableSpaceCacheEntry *spc = get_tablespace(spcid); + + if (!spc->opts) + return NULL; + else + return &spc->opts->compress; +} diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 596bcb7b84..065c306b7c 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -80,6 +80,7 @@ #include "storage/fd.h" #include "storage/large_object.h" #include "storage/pg_shmem.h" +#include "storage/page_compression.h" #include "storage/predicate.h" #include "storage/proc.h" #include "storage/standby.h" @@ -759,6 +760,8 @@ const char *const config_group_names[] = gettext_noop("Client Connection Defaults / Other Defaults"), /* LOCK_MANAGEMENT */ gettext_noop("Lock Management"), + /* COMPRESS_OPTIONS */ + gettext_noop("Page Compression"), /* COMPAT_OPTIONS */ gettext_noop("Version and Platform Compatibility"), /* COMPAT_OPTIONS_PREVIOUS */ @@ -3399,6 +3402,16 @@ static struct config_int ConfigureNamesInt[] = check_huge_page_size, NULL, NULL }, + { + {"compress_address_flush_chunks", PGC_SIGHUP, COMPRESS_OPTIONS, + gettext_noop("Set the maximum number of chunks allocated between two flush compressed address files."), + NULL + }, + &compress_address_flush_chunks, + 5000, 0, INT_MAX, + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL diff --git a/src/bin/pg_basebackup/t/010_pg_basebackup.pl b/src/bin/pg_basebackup/t/010_pg_basebackup.pl index f674a7c94e..afd9ea5395 100644 --- a/src/bin/pg_basebackup/t/010_pg_basebackup.pl +++ b/src/bin/pg_basebackup/t/010_pg_basebackup.pl @@ -6,7 +6,7 @@ use File::Basename qw(basename dirname); use File::Path qw(rmtree); use PostgresNode; use TestLib; -use Test::More tests => 109; +use Test::More tests => 115; program_help_ok('pg_basebackup'); program_version_ok('pg_basebackup'); @@ -88,6 +88,17 @@ my $baseUnloggedPath = $node->safe_psql('postgres', ok(-f "$pgdata/${baseUnloggedPath}_init", 'unlogged init fork in base'); ok(-f "$pgdata/$baseUnloggedPath", 'unlogged main fork in base'); +# Create an compressed table to test that pca and pcd file are copied. +$node->safe_psql('postgres', 'CREATE TABLE base_compressed (id int) WITH(compresstype=pglz)'); + +my $baseCompressedPath = $node->safe_psql('postgres', + q{select pg_relation_filepath('base_compressed')}); + +# Make sure compressed relation's address and data files exist +ok(-f "$pgdata/$baseCompressedPath", 'compressed main fork in base'); +ok(-f "$pgdata/${baseCompressedPath}_pca", 'compressed address file in base'); +ok(-f "$pgdata/${baseCompressedPath}_pcd", 'compressed data file in base'); + # Create files that look like temporary relations to ensure they are ignored. my $postgresOid = $node->safe_psql('postgres', q{select oid from pg_database where datname = 'postgres'}); @@ -147,6 +158,14 @@ ok(-f "$tempdir/backup/${baseUnloggedPath}_init", ok( !-f "$tempdir/backup/$baseUnloggedPath", 'unlogged main fork not in backup'); +# Compressed relation fork's address and data should be copied +ok(-f "$tempdir/backup/${baseCompressedPath}", + 'compressed main fork in backup'); +ok(-f "$tempdir/backup/${baseCompressedPath}_pca", + 'compressed address file in backup'); +ok(-f "$tempdir/backup/${baseCompressedPath}_pcd", + 'compressed data file in backup'); + # Temp relations should not be copied. foreach my $filename (@tempRelationFiles) { diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c index ffdc23945c..e3837a8461 100644 --- a/src/bin/pg_checksums/pg_checksums.c +++ b/src/bin/pg_checksums/pg_checksums.c @@ -29,6 +29,8 @@ #include "storage/bufpage.h" #include "storage/checksum.h" #include "storage/checksum_impl.h" +#include "storage/page_compression.h" +#include "storage/page_compression_impl.h" static int64 files = 0; @@ -188,18 +190,70 @@ skipfile(const char *fn) } static void -scan_file(const char *fn, BlockNumber segmentno) +scan_file(const char *fn, BlockNumber segmentno, bool is_compressed_datafile) { PGAlignedBlock buf; PageHeader header = (PageHeader) buf.data; - int f; + int f, f_pca; BlockNumber blockno; int flags; + char fn_pca[MAXPGPATH]; + PageCompressHeader *pcMap; Assert(mode == PG_MODE_ENABLE || mode == PG_MODE_CHECK); + /* init memory map of compression page address file */ + if(is_compressed_datafile) + { + int r; + PageCompressHeader pchdr; + + strlcpy(fn_pca, fn, sizeof(fn_pca)); + fn_pca[strlen(fn_pca) -1] = 'a'; + + f_pca = open(fn_pca, PG_BINARY | O_RDONLY, 0); + + if (f_pca < 0) + { + pg_log_error("could not open file \"%s\": %m", fn_pca); + exit(1); + } + + r = read(f_pca, &pchdr, sizeof(PageCompressHeader)); + if(r != sizeof(PageCompressHeader)) + { + if (r < 0) + pg_log_error("could not read page compression address file head in file \"%s\": %m", + fn_pca); + else + pg_log_error("could not read page compression address file head in file \"%s\": read %d of %d", + fn_pca, r, (int)sizeof(PageCompressHeader)); + exit(1); + } + + if(pchdr.chunk_size != BLCKSZ / 2 && + pchdr.chunk_size != BLCKSZ / 4 && + pchdr.chunk_size != BLCKSZ / 8) + { + pg_log_error("the page compression address file head is corrupted in file \"%s\"", + fn_pca); + exit(1); + } + + pcMap = pc_mmap(f_pca, pchdr.chunk_size, true); + if(pcMap == MAP_FAILED) + { + pg_log_error("Failed to mmap page compression address file %s: %m", + fn_pca); + exit(1); + } + + close(f_pca); + } + flags = (mode == PG_MODE_ENABLE) ? O_RDWR : O_RDONLY; + f = open(fn, PG_BINARY | flags, 0); if (f < 0) @@ -213,19 +267,120 @@ scan_file(const char *fn, BlockNumber segmentno) for (blockno = 0;; blockno++) { uint16 csum; - int r = read(f, buf.data, BLCKSZ); + int r; + off_t first_pc_chunk_pos; - if (r == 0) - break; - if (r != BLCKSZ) + if(is_compressed_datafile) { - if (r < 0) - pg_log_error("could not read block %u in file \"%s\": %m", - blockno, fn); - else - pg_log_error("could not read block %u in file \"%s\": read %d of %d", - blockno, fn, r, BLCKSZ); - exit(1); + PGAlignedBlock pcd_buf; + int nbytes,i,read_amount,range; + off_t seekpos; + char *buffer_pos; + PageCompressAddr *pcAddr; + bool is_compressed_block = true; + + if(blockno >= pcMap->nblocks) + break; + + pcAddr = GetPageCompressAddr(pcMap, pcMap->chunk_size, blockno); + + /* New pages have no checksum yet */ + if(pcAddr->nchunks == 0) + { + blocks++; + continue; + } + + if(pcAddr->nchunks == BLCKSZ / pcMap->chunk_size) + is_compressed_block = false; + + for(i=0; i< pcAddr->nchunks; i++) + { + if(is_compressed_block) + buffer_pos = pcd_buf.data + pcMap->chunk_size * i; + else + buffer_pos = buf.data + pcMap->chunk_size * i; + + seekpos = (off_t) OffsetOfPageCompressChunk(pcMap->chunk_size, pcAddr->chunknos[i]); + if(i == 0) + first_pc_chunk_pos = seekpos; + + range = 1; + while(i < pcAddr->nchunks - 1 && pcAddr->chunknos[i + 1] == pcAddr->chunknos[i] + 1) + { + range++; + i++; + } + read_amount = pcMap->chunk_size * range; + + if (lseek(f, seekpos, SEEK_SET) < 0) + { + pg_log_error("seek failed for block %u in file \"%s\": %m", blockno, fn); + exit(1); + } + + nbytes = read(f, buffer_pos, read_amount); + + if (nbytes != read_amount) + { + if (nbytes < 0) + pg_log_error("could not read block %u in file \"%s\": %m", + blockno, fn); + else + pg_log_error("could not read block %u in file \"%s\": read %d of %d", + blockno, fn, nbytes, read_amount); + exit(1); + } + } + + r = pcMap->chunk_size * pcAddr->nchunks; + + /* decompress chunk data */ + if(is_compressed_block) + { + nbytes = decompress_page(pcd_buf.data, buf.data, pcMap->algorithm); + if (nbytes != BLCKSZ) + { + if(nbytes == -2) + { + pg_log_error("could not recognized compression algorithm %d for file \"%s\"", + pcMap->algorithm, fn); + exit(1); + } + else + { + pg_log_error("could not decompress block %u in file \"%s\": decompress %d of %d bytes", + blockno, fn, nbytes, BLCKSZ); + + if(mode == PG_MODE_ENABLE) + exit(1); + else + { + blocks++; + badblocks++; + current_size += r; + continue; + } + } + } + } + } + else + { + r = read(f, buf.data, BLCKSZ); + + if (r == 0) + break; + if (r != BLCKSZ) + { + if (r < 0) + pg_log_error("could not read block %u in file \"%s\": %m", + blockno, fn); + else + pg_log_error("could not read block %u in file \"%s\": read %d of %d", + blockno, fn, r, BLCKSZ); + exit(1); + } } blocks++; @@ -252,24 +407,49 @@ scan_file(const char *fn, BlockNumber segmentno) /* Set checksum in page header */ header->pd_checksum = csum; - /* Seek back to beginning of block */ - if (lseek(f, -BLCKSZ, SEEK_CUR) < 0) + if(is_compressed_datafile) { - pg_log_error("seek failed for block %u in file \"%s\": %m", blockno, fn); - exit(1); - } + /* Seek back to beginning of first chunk */ + if (lseek(f, first_pc_chunk_pos, SEEK_SET) < 0) + { + pg_log_error("seek failed for block %u in file \"%s\": %m", blockno, fn); + exit(1); + } - /* Write block with checksum */ - w = write(f, buf.data, BLCKSZ); - if (w != BLCKSZ) + /* Write block with checksum */ + w = write(f, header, sizeof(PageHeaderData)); + if (w != sizeof(PageHeaderData)) + { + if (w < 0) + pg_log_error("could not write block %u in file \"%s\": %m", + blockno, fn); + else + pg_log_error("could not write block %u in file \"%s\": wrote %d of %d", + blockno, fn, w, (int)sizeof(PageHeaderData)); + exit(1); + } + } + else { - if (w < 0) - pg_log_error("could not write block %u in file \"%s\": %m", - blockno, fn); - else - pg_log_error("could not write block %u in file \"%s\": wrote %d of %d", - blockno, fn, w, BLCKSZ); - exit(1); + /* Seek back to beginning of block */ + if (lseek(f, -BLCKSZ, SEEK_CUR) < 0) + { + pg_log_error("seek failed for block %u in file \"%s\": %m", blockno, fn); + exit(1); + } + + /* Write block with checksum */ + w = write(f, buf.data, BLCKSZ); + if (w != BLCKSZ) + { + if (w < 0) + pg_log_error("could not write block %u in file \"%s\": %m", + blockno, fn); + else + pg_log_error("could not write block %u in file \"%s\": wrote %d of %d", + blockno, fn, w, BLCKSZ); + exit(1); + } } } @@ -286,6 +466,8 @@ scan_file(const char *fn, BlockNumber segmentno) } close(f); + if(is_compressed_datafile) + pc_munmap(pcMap); } /* @@ -343,17 +525,32 @@ scan_directory(const char *basedir, const char *subdir, bool sizeonly) char *forkpath, *segmentpath; BlockNumber segmentno = 0; + size_t filenamelength; + bool is_compressed_datafile = false; + if (skipfile(de->d_name)) continue; + /* check if is compressed page file */ + strlcpy(fnonly, de->d_name, sizeof(fnonly)); + filenamelength = strlen(fnonly); + if(filenamelength >= 4) + { + if(strncmp(fnonly + filenamelength - 4, "_pca", 4) == 0) + continue; + else if(strncmp(fnonly + filenamelength - 4, "_pcd", 4) == 0) + { + is_compressed_datafile = true; + fnonly[filenamelength - 4] = '\0'; + } + } /* * Cut off at the segment boundary (".") to get the segment number * in order to mix it into the checksum. Then also cut off at the * fork boundary, to get the filenode the file belongs to for * filtering. */ - strlcpy(fnonly, de->d_name, sizeof(fnonly)); segmentpath = strchr(fnonly, '.'); if (segmentpath != NULL) { @@ -382,7 +579,7 @@ scan_directory(const char *basedir, const char *subdir, bool sizeonly) * the items in the data folder. */ if (!sizeonly) - scan_file(fn, segmentno); + scan_file(fn, segmentno, is_compressed_datafile); } #ifndef WIN32 else if (S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode)) diff --git a/src/bin/pg_checksums/t/002_actions.pl b/src/bin/pg_checksums/t/002_actions.pl index 4e4934532a..35f484ee2f 100644 --- a/src/bin/pg_checksums/t/002_actions.pl +++ b/src/bin/pg_checksums/t/002_actions.pl @@ -5,7 +5,7 @@ use strict; use warnings; use PostgresNode; use TestLib; -use Test::More tests => 63; +use Test::More tests => 79; # Utility routine to create and check a table with corrupted checksums @@ -86,6 +86,84 @@ sub check_relation_corruption return; } +# Utility routine to create and check a compressed table with corrupted checksums +# on a wanted tablespace. Note that this stops and starts the node +# multiple times to perform the checks, leaving the node started +# at the end. +sub check_compressed_relation_corruption +{ + my $node = shift; + my $table = shift; + my $tablespace = shift; + my $pgdata = $node->data_dir; + + $node->safe_psql( + 'postgres', + "CREATE TABLE $table (a int) WITH(compresstype=pglz, autovacuum_enabled=false); + INSERT INTO $table SELECT a FROM generate_series(1,10000)a;"); + + $node->safe_psql('postgres', + "ALTER TABLE " . $table . " SET TABLESPACE " . $tablespace . ";"); + + my $file_corrupted = + $node->safe_psql('postgres', "SELECT pg_relation_filepath('$table');") . "_pcd"; + my $relfilenode_corrupted = $node->safe_psql('postgres', + "SELECT relfilenode FROM pg_class WHERE relname = '$table';"); + + # Set page header and block size + my $pageheader_size = 24; + my $block_size = $node->safe_psql('postgres', 'SHOW block_size;'); + $node->stop; + + # Checksums are correct for single relfilenode as the table is not + # corrupted yet. + command_ok( + [ + 'pg_checksums', '--check', + '-D', $pgdata, + '--filenode', $relfilenode_corrupted + ], + "succeeds for single relfilenode on tablespace $tablespace with offline cluster" + ); + + # Time to create some corruption + open my $file, '+<', "$pgdata/$file_corrupted"; + seek($file, $pageheader_size, 0); + syswrite($file, "\0\0\0\0\0\0\0\0\0"); + close $file; + + # Checksum checks on single relfilenode fail + $node->command_checks_all( + [ + 'pg_checksums', '--check', + '-D', $pgdata, + '--filenode', $relfilenode_corrupted + ], + 1, + [qr/Bad checksums:.*1/], + [qr/could not decompress block/], + "fails with corrupted data for single relfilenode on tablespace $tablespace" + ); + + # Global checksum checks fail as well + $node->command_checks_all( + [ 'pg_checksums', '--check', '-D', $pgdata ], + 1, + [qr/Bad checksums:.*1/], + [qr/could not decompress block/], + "fails with corrupted data on tablespace $tablespace"); + + # Drop corrupted table again and make sure there is no more corruption. + $node->start; + $node->safe_psql('postgres', "DROP TABLE $table;"); + $node->stop; + $node->command_ok([ 'pg_checksums', '--check', '-D', $pgdata ], + "succeeds again after table drop on tablespace $tablespace"); + + $node->start; + return; +} + # Initialize node with checksums disabled. my $node = get_new_node('node_checksum'); $node->init(); @@ -115,6 +193,15 @@ append_to_file "$pgdata/global/pgsql_tmp/1.1", "foo"; append_to_file "$pgdata/global/pg_internal.init", "foo"; append_to_file "$pgdata/global/pg_internal.init.123", "foo"; +# Create a compressed table and index before vertify enable checksums +$node->start; +$node->safe_psql( + 'postgres', + "CREATE TABLE compressed_table(a int) WITH(compresstype=pglz, autovacuum_enabled=false); + INSERT INTO compressed_table SELECT a FROM generate_series(1,10000)a; + CREATE INDEX ON compressed_table(a) WITH(compresstype=pglz);"); +$node->stop; + # Enable checksums. command_ok([ 'pg_checksums', '--enable', '--no-sync', '-D', $pgdata ], "checksums successfully enabled in cluster"); @@ -180,6 +267,7 @@ command_fails([ 'pg_checksums', '--check', '-D', $pgdata ], # Check corruption of table on default tablespace. check_relation_corruption($node, 'corrupt1', 'pg_default'); +check_compressed_relation_corruption($node, 'compressed_corrupt1', 'pg_default'); # Create tablespace to check corruptions in a non-default tablespace. my $basedir = $node->basedir; @@ -189,6 +277,7 @@ $tablespace_dir = TestLib::perl2host($tablespace_dir); $node->safe_psql('postgres', "CREATE TABLESPACE ts_corrupt LOCATION '$tablespace_dir';"); check_relation_corruption($node, 'corrupt2', 'ts_corrupt'); +check_compressed_relation_corruption($node, 'compressed_corrupt2', 'ts_corrupt'); # Utility routine to check that pg_checksums is able to detect # correctly-named relation files filled with some corrupted data. diff --git a/src/bin/pg_rewind/copy_fetch.c b/src/bin/pg_rewind/copy_fetch.c index 1edab5f186..cb95088045 100644 --- a/src/bin/pg_rewind/copy_fetch.c +++ b/src/bin/pg_rewind/copy_fetch.c @@ -14,17 +14,24 @@ #include #include +#include "common/string.h" #include "datapagemap.h" #include "fetch.h" #include "file_ops.h" #include "filemap.h" #include "pg_rewind.h" +#include "storage/page_compression.h" static void recurse_dir(const char *datadir, const char *path, process_file_callback_t callback); -static void execute_pagemap(datapagemap_t *pagemap, const char *path); - +static void execute_pagemap(datapagemap_t *pagemap, const char *path, + bool iscompressedrel, int chunk_size, + int prealloc_chunks, compressedpagemap_t *first_compressedpagemap); +static void rewind_copy_compressed_relation_range(const char *path, int chunk_size, + BlockNumber blocknum, int nblocks, + int prealloc_chunks, + compressedpagemap_t *first_compressedpagemap); /* * Traverse through all files in a data directory, calling 'callback' * for each file. @@ -95,10 +102,32 @@ recurse_dir(const char *datadir, const char *parentpath, snprintf(path, sizeof(path), "%s", xlde->d_name); if (S_ISREG(fst.st_mode)) - callback(path, FILE_TYPE_REGULAR, fst.st_size, NULL); + { + if(pg_str_endswith(path, "_pca")) + { + int fd, ret; + PageCompressHeader pchdr; + + /* read header of compressed relation address file */ + fd = open(fullpath, O_RDONLY | PG_BINARY, 0); + if (fd < 0) + pg_fatal("could not open file \"%s\": %m", + fullpath); + + ret = read(fd, &pchdr, sizeof(PageCompressHeader)); + if(ret == sizeof(PageCompressHeader)) + callback(path, FILE_TYPE_REGULAR, fst.st_size, NULL, &pchdr); + else + callback(path, FILE_TYPE_REGULAR, fst.st_size, NULL, NULL); + + close(fd); + } + else + callback(path, FILE_TYPE_REGULAR, fst.st_size, NULL, NULL); + } else if (S_ISDIR(fst.st_mode)) { - callback(path, FILE_TYPE_DIRECTORY, 0, NULL); + callback(path, FILE_TYPE_DIRECTORY, 0, NULL, NULL); /* recurse to handle subdirectories */ recurse_dir(datadir, path, callback); } @@ -121,7 +150,7 @@ recurse_dir(const char *datadir, const char *parentpath, fullpath); link_target[len] = '\0'; - callback(path, FILE_TYPE_SYMLINK, 0, link_target); + callback(path, FILE_TYPE_SYMLINK, 0, link_target, NULL); /* * If it's a symlink within pg_tblspc, we need to recurse into it, @@ -147,6 +176,46 @@ recurse_dir(const char *datadir, const char *parentpath, fullparentpath); } +void +local_fetchCompressedRelationAddress(filemap_t *map) +{ + char srcpath[MAXPGPATH]; + file_entry_t *entry; + int chunk_size; + int fd, i; + PageCompressHeader *pcMap; + + for (i = 0; i < map->narray; i++) + { + entry = map->array[i]; + + if(entry->type != FILE_TYPE_COMPRESSED_REL) + continue; + + chunk_size = entry->chunk_size; + + /* read page compression file header of source relation */ + snprintf(srcpath, sizeof(srcpath), "%s/%s", datadir_source, entry->pca->path); + fd = open(srcpath, O_RDONLY | PG_BINARY, 0); + if (fd < 0) + pg_fatal("could not open source file \"%s\": %m", + srcpath); + + pcMap = pc_mmap(fd, chunk_size, true); + if(pcMap == MAP_FAILED) + pg_fatal("Failed to mmap page compression address file %s: %m", + srcpath); + + if (close(fd) != 0) + pg_fatal("could not close file \"%s\": %m", srcpath); + + fill_compressed_relation_address(entry, NULL, pcMap); + + if (pc_munmap(pcMap) != 0) + pg_fatal("could not munmap file \"%s\": %m", srcpath); + } +} + /* * Copy a file from source to target, between 'begin' and 'end' offsets. * @@ -197,6 +266,89 @@ rewind_copy_file_range(const char *path, off_t begin, off_t end, bool trunc) pg_fatal("could not close file \"%s\": %m", srcpath); } +/* + * Copy a file from source to target, between 'begin' and 'end' offsets. + * + * If 'trunc' is true, any existing file with the same name is truncated. + */ +static void +rewind_copy_compressed_relation_range(const char *path, int chunk_size, + BlockNumber blocknum, int nblocks, + int prealloc_chunks, + compressedpagemap_t *first_compressedpagemap) +{ + PGAlignedBlock buf; + char srcpath[MAXPGPATH]; + int fd, i; + compressedpagemap_t *compressedpagemap = first_compressedpagemap; + + /* open source compressed relation data file */ + snprintf(srcpath, sizeof(srcpath), "%s/%s_pcd", datadir_source, path); + + fd = open(srcpath, O_RDONLY | PG_BINARY, 0); + if (fd < 0) + pg_fatal("could not open source file \"%s\": %m", + srcpath); + + /* copy blocks from source to target */ + open_target_compressed_relation(path); + + for(i=0; i < nblocks; i++) + { + int j; + BlockNumber blkno = blocknum + i; + + while(compressedpagemap != NULL) + { + if(compressedpagemap->blkno == blkno) + break; + compressedpagemap = compressedpagemap->next; + } + + if(compressedpagemap == NULL || compressedpagemap->blkno != blkno) + pg_fatal("could not find compressedpagemap for block %d of file \"%s\"", + blkno, path); + + for(j=0; j < compressedpagemap->nchunks; j++) + { + int readlen; + int seekpos; + int length = chunk_size; + int chunkindex = j; + pc_chunk_number_t chunkno = compressedpagemap->chunknos[j]; + + seekpos = OffsetOfPageCompressChunk(chunk_size, chunkno); + + while(j + 1 < compressedpagemap->nchunks && + compressedpagemap->chunknos[j + 1]== compressedpagemap->chunknos[j] + 1) + { + length += chunk_size; + j++; + } + + if (lseek(fd, seekpos, SEEK_SET) == -1) + pg_fatal("could not seek in source file: %m"); + + readlen = read(fd, buf.data, length); + if (readlen != length) + { + if (readlen < 0) + pg_fatal("could not read file \"%s\": %m", srcpath); + else + pg_fatal("could not read file \"%s\": read %d of %zu", + srcpath, readlen, (Size) length); + } + + write_target_compressed_relation_chunk(buf.data, readlen, blkno, chunkindex, + compressedpagemap->nchunks, prealloc_chunks); + } + } + + if (close(fd) != 0) + pg_fatal("could not close file \"%s\": %m", srcpath); +} + + /* * Copy all relation data files from datadir_source to datadir_target, which * are marked in the given data page map. @@ -209,8 +361,15 @@ copy_executeFileMap(filemap_t *map) for (i = 0; i < map->narray; i++) { + bool iscompressedrel = false; + entry = map->array[i]; - execute_pagemap(&entry->pagemap, entry->path); + + if(entry->type == FILE_TYPE_COMPRESSED_REL) + iscompressedrel = true; + + execute_pagemap(&entry->pagemap, entry->path, iscompressedrel, entry->chunk_size, + entry->prealloc_chunks, entry->first_compressedpagemap); switch (entry->action) { @@ -223,12 +382,23 @@ copy_executeFileMap(filemap_t *map) break; case FILE_ACTION_TRUNCATE: - truncate_target_file(entry->path, entry->newsize); + if(iscompressedrel) + truncate_target_compressed_relation(entry->path, entry->newblocks); + else + truncate_target_file(entry->path, entry->newsize); break; case FILE_ACTION_COPY_TAIL: - rewind_copy_file_range(entry->path, entry->oldsize, - entry->newsize, false); + if(iscompressedrel) + rewind_copy_compressed_relation_range(entry->path, + entry->chunk_size, + entry->oldblocks, + entry->newblocks - entry->oldblocks, + entry->prealloc_chunks, + entry->first_compressedpagemap); + else + rewind_copy_file_range(entry->path, entry->oldsize, + entry->newsize, false); break; case FILE_ACTION_CREATE: @@ -245,7 +415,9 @@ copy_executeFileMap(filemap_t *map) } static void -execute_pagemap(datapagemap_t *pagemap, const char *path) +execute_pagemap(datapagemap_t *pagemap, const char *path, + bool iscompressedrel, int chunk_size, + int prealloc_chunks, compressedpagemap_t *first_compressedpagemap) { datapagemap_iterator_t *iter; BlockNumber blkno; @@ -254,8 +426,14 @@ execute_pagemap(datapagemap_t *pagemap, const char *path) iter = datapagemap_iterate(pagemap); while (datapagemap_next(iter, &blkno)) { - offset = blkno * BLCKSZ; - rewind_copy_file_range(path, offset, offset + BLCKSZ, false); + if(iscompressedrel) + rewind_copy_compressed_relation_range(path, chunk_size, blkno, 1, + prealloc_chunks, first_compressedpagemap); + else + { + offset = blkno * BLCKSZ; + rewind_copy_file_range(path, offset, offset + BLCKSZ, false); + } /* Ok, this block has now been copied from new data dir to old */ } pg_free(iter); diff --git a/src/bin/pg_rewind/fetch.c b/src/bin/pg_rewind/fetch.c index f18fe5386e..f66bc2b31c 100644 --- a/src/bin/pg_rewind/fetch.c +++ b/src/bin/pg_rewind/fetch.c @@ -33,6 +33,15 @@ fetchSourceFileList(void) libpqProcessFileList(); } +void +fetchCompressedRelationAddress() +{ + if (datadir_source) + local_fetchCompressedRelationAddress(filemap); + else + libpq_fetchCompressedRelationAddress(filemap); +} + /* * Fetch all relation data files that are marked in the given data page map. */ diff --git a/src/bin/pg_rewind/fetch.h b/src/bin/pg_rewind/fetch.h index 7cf8b6ea09..cdecdae917 100644 --- a/src/bin/pg_rewind/fetch.h +++ b/src/bin/pg_rewind/fetch.h @@ -18,27 +18,32 @@ #include "access/xlogdefs.h" #include "filemap.h" +#include "storage/page_compression.h" /* * Common interface. Calls the copy or libpq method depending on global * config options. */ extern void fetchSourceFileList(void); +extern void fetchCompressedRelationAddress(void); extern char *fetchFile(const char *filename, size_t *filesize); extern void executeFileMap(void); /* in libpq_fetch.c */ extern void libpqProcessFileList(void); extern char *libpqGetFile(const char *filename, size_t *filesize); +extern void libpq_fetchCompressedRelationAddress(filemap_t *map); extern void libpq_executeFileMap(filemap_t *map); extern void libpqConnect(const char *connstr); extern XLogRecPtr libpqGetCurrentXlogInsertLocation(void); /* in copy_fetch.c */ +extern void local_fetchCompressedRelationAddress(filemap_t *map); extern void copy_executeFileMap(filemap_t *map); -typedef void (*process_file_callback_t) (const char *path, file_type_t type, size_t size, const char *link_target); +typedef void (*process_file_callback_t) (const char *path, file_type_t type, size_t size, const char *link_target, + const PageCompressHeader *pchdr); extern void traverse_datadir(const char *datadir, process_file_callback_t callback); #endif /* FETCH_H */ diff --git a/src/bin/pg_rewind/file_ops.c b/src/bin/pg_rewind/file_ops.c index b3bf091c54..28de652409 100644 --- a/src/bin/pg_rewind/file_ops.c +++ b/src/bin/pg_rewind/file_ops.c @@ -22,12 +22,17 @@ #include "file_ops.h" #include "filemap.h" #include "pg_rewind.h" +#include "storage/page_compression.h" +#include "storage/page_compression_impl.h" /* * Currently open target file. */ static int dstfd = -1; static char dstpath[MAXPGPATH] = ""; +static PageCompressHeader *dstpcmap = NULL; +static char dstpcapath[MAXPGPATH] = ""; +static int chunk_size = 0; static void create_target_dir(const char *path); static void remove_target_dir(const char *path); @@ -77,6 +82,14 @@ close_target_file(void) dstpath); dstfd = -1; + + if(dstpcmap != NULL) + { + if (pc_munmap(dstpcmap) != 0) + pg_fatal("could not munmap file \"%s\": %m", dstpcapath); + + dstpcmap = NULL; + } } void @@ -139,6 +152,10 @@ remove_target(file_entry_t *entry) case FILE_TYPE_SYMLINK: remove_target_symlink(entry->path); break; + case FILE_TYPE_COMPRESSED_REL: + /* can't happen. */ + pg_fatal("invalid action (REMOVE) for compressed relation file"); + break; } } @@ -158,6 +175,7 @@ create_target(file_entry_t *entry) break; case FILE_TYPE_REGULAR: + case FILE_TYPE_COMPRESSED_REL: /* can't happen. Regular files are created with open_target_file. */ pg_fatal("invalid action (CREATE) for regular file"); break; @@ -323,3 +341,189 @@ slurpFile(const char *datadir, const char *path, size_t *filesize) *filesize = len; return buffer; } + + +/* + * Open a target compressed relation for writing. If 'trunc' is true and the + * file already exists, it will be truncated. + */ +void +open_target_compressed_relation(const char *path) +{ + char localpath[MAXPGPATH]; + int mode; + int fd, ret; + PageCompressHeader pchdr; + + if (dry_run) + return; + + snprintf(localpath, sizeof(localpath), "%s/%s_pcd", datadir_target, path); + + if (dstfd != -1 && + strcmp(localpath, dstpath) == 0) + return; /* already open */ + + close_target_file(); + + /* read page compression file header of target relation */ + snprintf(dstpcapath, sizeof(dstpcapath), "%s/%s_pca", datadir_target, path); + + fd = open(dstpcapath, O_RDWR | PG_BINARY, 0); + if (fd < 0) + pg_fatal("could not open target file \"%s\": %m", + dstpcapath); + + ret = read(fd, &pchdr, sizeof(PageCompressHeader)); + if(ret != sizeof(PageCompressHeader)) + pg_fatal("could not read compressed page address file \"%s\"", + dstpcapath); + + dstpcmap = pc_mmap(fd, pchdr.chunk_size, false); + if(dstpcmap == MAP_FAILED) + pg_fatal("Failed to mmap page compression address file %s: %m", + dstpcapath); + + if (close(fd) != 0) + pg_fatal("could not close file \"%s\": %m", dstpcapath); + + /* open page compression data file */ + snprintf(dstpath, sizeof(dstpath), "%s/%s_pcd", datadir_target, path); + + mode = O_WRONLY | O_CREAT | PG_BINARY; + dstfd = open(dstpath, mode, pg_file_create_mode); + if (dstfd < 0) + pg_fatal("could not open target file \"%s\": %m", + dstpath); + + chunk_size = pchdr.chunk_size; +} + + +void +truncate_target_compressed_relation(const char *path, int newblocks) +{ + char dstpath[MAXPGPATH]; + int fd, ret; + PageCompressHeader pchdr, *dstpcmap; + + if (dry_run) + return; + + /* read page compression file header of target relation */ + snprintf(dstpath, sizeof(dstpath), "%s/%s_pca", datadir_target, path); + + fd = open(dstpath, O_RDWR | PG_BINARY, pg_file_create_mode); + if (fd < 0) + pg_fatal("could not open target file \"%s\": %m", + dstpath); + + ret = read(fd, &pchdr, sizeof(PageCompressHeader)); + if(ret != sizeof(PageCompressHeader)) + pg_fatal("could not read compressed page address file \"%s\"", + dstpath); + + dstpcmap = pc_mmap(fd, pchdr.chunk_size, false); + if(dstpcmap == MAP_FAILED) + pg_fatal("Failed to mmap page compression address file %s: %m", + dstpath); + + if (close(fd) != 0) + pg_fatal("could not close file \"%s\": %m", dstpcapath); + + /* resize target relation */ + if(dstpcmap->nblocks > newblocks) + { + int blk; + for(blk = newblocks; blk < dstpcmap->nblocks; blk++) + { + PageCompressAddr *pcAddr = GetPageCompressAddr(dstpcmap, pchdr.chunk_size, blk); + pcAddr->nchunks = 0; + } + dstpcmap->nblocks = newblocks; + } + + if (pc_munmap(dstpcmap) != 0) + pg_fatal("could not munmap file \"%s\": %m", dstpath); +} + + +void +write_target_compressed_relation_chunk(char *buf, size_t size, int blocknum, int chunkindex, int nchunks, int prealloc_chunks) +{ + PageCompressAddr *pcAddr; + int i; + int writeleft; + int writed = 0; + char *p = buf; + int seekpos; + int allocated_chunks = nchunks; + + /* update progress report */ + fetch_done += size; + progress_report(false); + + if (dry_run) + return; + + if(size <= 0) + return; + + pcAddr = GetPageCompressAddr(dstpcmap, chunk_size, blocknum); + + /* allocate chunks */ + if(allocated_chunks < prealloc_chunks) + allocated_chunks = prealloc_chunks; + + if(pcAddr->allocated_chunks < allocated_chunks) + { + for(i = pcAddr->allocated_chunks; i< allocated_chunks; i++) + { + pcAddr->chunknos[i] = dstpcmap->allocated_chunks + 1; + dstpcmap->allocated_chunks++; + } + pcAddr->allocated_chunks = allocated_chunks; + } + + if(dstpcmap->nblocks <= blocknum) + dstpcmap->nblocks = blocknum + 1; + + if(pcAddr->nchunks != nchunks) + pcAddr->nchunks = nchunks; + + while(writed < size) + { + /* write one chunk to target */ + + writeleft = size - writed; + if(writeleft > chunk_size) + writeleft = chunk_size; + + seekpos = OffsetOfPageCompressChunk(chunk_size, pcAddr->chunknos[chunkindex + writed / chunk_size]); + + if (lseek(dstfd, seekpos, SEEK_SET) == -1) + pg_fatal("could not seek in target file \"%s\": %m", + dstpath); + + while (writeleft > 0) + { + int writelen; + + errno = 0; + writelen = write(dstfd, p, writeleft); + if (writelen < 0) + { + /* if write didn't set errno, assume problem is no disk space */ + if (errno == 0) + errno = ENOSPC; + pg_fatal("could not write file \"%s\": %m", + dstpath); + } + + p += writelen; + writeleft -= writelen; + writed += writelen; + } + } + /* keep the file open, in case we need to copy more blocks in it */ +} diff --git a/src/bin/pg_rewind/file_ops.h b/src/bin/pg_rewind/file_ops.h index 025f24141c..31e2d7a058 100644 --- a/src/bin/pg_rewind/file_ops.h +++ b/src/bin/pg_rewind/file_ops.h @@ -22,4 +22,9 @@ extern void remove_target(file_entry_t *t); extern char *slurpFile(const char *datadir, const char *path, size_t *filesize); +extern void open_target_compressed_relation(const char *path); +extern void write_target_compressed_relation_chunk(char *buf, size_t size, int blocknum, int chunkindex, + int nchunks, int prealloc_chunks); +extern void truncate_target_compressed_relation(const char *path, int newblocks); + #endif /* FILE_OPS_H */ diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c index 1abc257177..f63bb7a352 100644 --- a/src/bin/pg_rewind/filemap.c +++ b/src/bin/pg_rewind/filemap.c @@ -19,6 +19,7 @@ #include "filemap.h" #include "pg_rewind.h" #include "storage/fd.h" +#include "storage/page_compression.h" filemap_t *filemap = NULL; @@ -155,7 +156,7 @@ filemap_create(void) */ void process_source_file(const char *path, file_type_t type, size_t newsize, - const char *link_target) + const char *link_target, const PageCompressHeader *pchdr) { bool exists; char localpath[MAXPGPATH]; @@ -312,6 +313,10 @@ process_source_file(const char *path, file_type_t type, size_t newsize, action = FILE_ACTION_NONE; } break; + case FILE_TYPE_COMPRESSED_REL: + /* can't happen. */ + Assert(false); + break; } /* Create a new entry for this file */ @@ -326,6 +331,17 @@ process_source_file(const char *path, file_type_t type, size_t newsize, entry->pagemap.bitmap = NULL; entry->pagemap.bitmapsize = 0; entry->isrelfile = isRelDataFile(path); + entry->prealloc_chunks = 0; + entry->target_pchdr = NULL; + entry->first_compressedpagemap = NULL; + entry->last_compressedpagemap = NULL; + if(pchdr != NULL) + { + entry->source_pchdr = pg_malloc(sizeof(PageCompressHeader)); + memcpy(entry->source_pchdr, pchdr, sizeof(PageCompressHeader)); + } + else + entry->source_pchdr = NULL; if (map->last) { @@ -346,11 +362,13 @@ process_source_file(const char *path, file_type_t type, size_t newsize, */ void process_target_file(const char *path, file_type_t type, size_t oldsize, - const char *link_target) + const char *link_target, const PageCompressHeader *pchdr) { - bool exists; + char localpath[MAXPGPATH]; + struct stat statbuf; file_entry_t key; file_entry_t *key_ptr; + file_entry_t **e; filemap_t *map = filemap; file_entry_t *entry; @@ -360,6 +378,14 @@ process_target_file(const char *path, file_type_t type, size_t oldsize, * the source data folder when processing the source files. */ + snprintf(localpath, sizeof(localpath), "%s/%s", datadir_target, path); + if (lstat(localpath, &statbuf) < 0) + { + if (errno != ENOENT) + pg_fatal("could not stat file \"%s\": %m", + localpath); + } + if (map->array == NULL) { /* on first call, initialize lookup array */ @@ -384,11 +410,11 @@ process_target_file(const char *path, file_type_t type, size_t oldsize, key.path = (char *) path; key_ptr = &key; - exists = (bsearch(&key_ptr, map->array, map->narray, sizeof(file_entry_t *), - path_cmp) != NULL); + e = bsearch(&key_ptr, map->array, map->narray, sizeof(file_entry_t *), + path_cmp); /* Remove any file or folder that doesn't exist in the source system. */ - if (!exists) + if (e == NULL) { entry = pg_malloc(sizeof(file_entry_t)); entry->path = pg_strdup(path); @@ -401,6 +427,17 @@ process_target_file(const char *path, file_type_t type, size_t oldsize, entry->pagemap.bitmap = NULL; entry->pagemap.bitmapsize = 0; entry->isrelfile = isRelDataFile(path); + entry->prealloc_chunks = 0; + entry->source_pchdr = NULL; + entry->first_compressedpagemap = NULL; + entry->last_compressedpagemap = NULL; + if(pchdr != NULL) + { + entry->target_pchdr = pg_malloc(sizeof(PageCompressHeader)); + memcpy(entry->target_pchdr, pchdr, sizeof(PageCompressHeader)); + } + else + entry->target_pchdr = NULL; if (map->last == NULL) map->first = entry; @@ -413,8 +450,114 @@ process_target_file(const char *path, file_type_t type, size_t oldsize, { /* * We already handled all files that exist in the source system in - * process_source_file(). + * process_source_file(). The only thing should to do is setting + * target_pchdr. */ + entry = *e; + if(pchdr != NULL) + { + entry->target_pchdr = pg_malloc(sizeof(PageCompressHeader)); + memcpy(entry->target_pchdr, pchdr, sizeof(PageCompressHeader)); + } + else + entry->target_pchdr = NULL; + } +} + +/* + * + * + */ +void +process_compressed_relation(void) +{ + char path[MAXPGPATH]; + file_entry_t key; + file_entry_t *key_ptr; + file_entry_t *entry; + file_entry_t **e; + filemap_t *map = filemap; + int i; + + Assert(map->array); + + for(i = 0; i < map->narray; i++) + { + entry = map->array[i]; + if(entry->isrelfile) + { + bool iscompressedrel = false; + file_entry_t *pca_entry, *pcd_entry; + uint16 chunk_size; + + if(entry->action != FILE_ACTION_COPY_TAIL && + entry->action != FILE_ACTION_TRUNCATE && + entry->action != FILE_ACTION_NONE) + continue; + + /* check is this a compressed relation file */ + snprintf(path, sizeof(path), "%s_pca", entry->path); + key.path = path; + key_ptr = &key; + e = bsearch(&key_ptr, map->array, map->narray, sizeof(file_entry_t *), + path_cmp); + if(e == NULL) + continue; + pca_entry = *e; + + snprintf(path, sizeof(path), "%s_pcd", entry->path); + key.path = path; + key_ptr = &key; + e = bsearch(&key_ptr, map->array, map->narray, sizeof(file_entry_t *), + path_cmp); + + /* check is this a valid compressed relation file */ + if(e && pca_entry->source_pchdr && pca_entry->target_pchdr && + entry->newsize == 0 && entry->oldsize == 0) + { + pcd_entry = *e; + chunk_size = pca_entry->source_pchdr->chunk_size; + + if((chunk_size == BLCKSZ / 2 || chunk_size == BLCKSZ / 4 || chunk_size == BLCKSZ / 8) && + pca_entry->target_pchdr->algorithm == pca_entry->source_pchdr->algorithm && + pca_entry->target_pchdr->chunk_size == chunk_size) + { + iscompressedrel = true; + } + } + + if(iscompressedrel) + { + int action; + uint32 oldblocks = pca_entry->target_pchdr->nblocks; + uint32 newblocks = pca_entry->source_pchdr->nblocks; + + if (oldblocks < newblocks) + action = FILE_ACTION_COPY_TAIL; + else if (oldblocks > newblocks) + action = FILE_ACTION_TRUNCATE; + else + action = FILE_ACTION_NONE; + + entry->type = FILE_TYPE_COMPRESSED_REL; + entry->action = action; + entry->chunk_size = chunk_size; + entry->oldblocks = oldblocks; + entry->newblocks = newblocks; + entry->pca = pca_entry; + entry->pcd = pcd_entry; + + /* we copy data when processing the main fork file, and do not need to separately + copy the pca and pcd file */ + pca_entry->action = FILE_ACTION_NONE; + pcd_entry->action = FILE_ACTION_NONE; + } + else + { + /* copy the entire file for invalid compressed relation file */ + entry->action = FILE_ACTION_COPY; + } + } } } @@ -455,6 +598,7 @@ process_block_change(ForkNumber forknum, RelFileNode rnode, BlockNumber blkno) if (entry) { + size_t newsize, oldsize; Assert(entry->isrelfile); switch (entry->action) @@ -462,7 +606,11 @@ process_block_change(ForkNumber forknum, RelFileNode rnode, BlockNumber blkno) case FILE_ACTION_NONE: case FILE_ACTION_TRUNCATE: /* skip if we're truncating away the modified block anyway */ - if ((blkno_inseg + 1) * BLCKSZ <= entry->newsize) + if(entry->type == FILE_TYPE_COMPRESSED_REL) + newsize = BLCKSZ * entry->newblocks; + else + newsize = entry->newsize; + if ((blkno_inseg + 1) * BLCKSZ <= newsize) datapagemap_add(&entry->pagemap, blkno_inseg); break; @@ -472,7 +620,11 @@ process_block_change(ForkNumber forknum, RelFileNode rnode, BlockNumber blkno) * skip the modified block if it is part of the "tail" that * we're copying anyway. */ - if ((blkno_inseg + 1) * BLCKSZ <= entry->oldsize) + if(entry->type == FILE_TYPE_COMPRESSED_REL) + oldsize = BLCKSZ * entry->oldblocks; + else + oldsize = entry->oldsize; + if ((blkno_inseg + 1) * BLCKSZ <= oldsize) datapagemap_add(&entry->pagemap, blkno_inseg); break; @@ -616,6 +768,9 @@ action_to_str(file_action_t action) /* * Calculate the totals needed for progress reports. + * + * For compressed relation, just returns an estimate and don't consider + * compressed relation address file. */ void calculate_totals(void) @@ -631,7 +786,7 @@ calculate_totals(void) { entry = map->array[i]; - if (entry->type != FILE_TYPE_REGULAR) + if (entry->type != FILE_TYPE_REGULAR && entry->type != FILE_TYPE_COMPRESSED_REL) continue; map->total_size += entry->newsize; @@ -642,19 +797,33 @@ calculate_totals(void) continue; } - if (entry->action == FILE_ACTION_COPY_TAIL) - map->fetch_size += (entry->newsize - entry->oldsize); + if(entry->type == FILE_TYPE_COMPRESSED_REL) + { + compressedpagemap_t *compressedpagemap; - if (entry->pagemap.bitmapsize > 0) + compressedpagemap = entry->first_compressedpagemap; + while(compressedpagemap != NULL) + { + map->fetch_size += entry->chunk_size * compressedpagemap->nchunks; + compressedpagemap = compressedpagemap->next; + } + } + else { - datapagemap_iterator_t *iter; - BlockNumber blk; + if (entry->action == FILE_ACTION_COPY_TAIL) + map->fetch_size += (entry->newsize - entry->oldsize); + + if (entry->pagemap.bitmapsize > 0) + { + datapagemap_iterator_t *iter; + BlockNumber blk; - iter = datapagemap_iterate(&entry->pagemap); - while (datapagemap_next(iter, &blk)) - map->fetch_size += BLCKSZ; + iter = datapagemap_iterate(&entry->pagemap); + while (datapagemap_next(iter, &blk)) + map->fetch_size += BLCKSZ; - pg_free(iter); + pg_free(iter); + } } } } @@ -825,3 +994,119 @@ final_filemap_cmp(const void *a, const void *b) else return strcmp(fa->path, fb->path); } + +void +fill_compressed_relation_address(file_entry_t *entry, const char *path, PageCompressHeader *pcMap) +{ + compressedpagemap_t *compressedpagemap; + PageCompressAddr *pcAddr; + int min_allocated_chunks = 0; + datapagemap_iterator_t *iter; + BlockNumber blkno; + int i; + filemap_t *map = filemap; + + if(entry == NULL) + { + file_entry_t key; + file_entry_t *key_ptr; + file_entry_t **e; + + key.path = (char *) path; + key_ptr = &key; + e = bsearch(&key_ptr, map->array, map->narray, sizeof(file_entry_t *), + path_cmp); + + if(e == NULL) + pg_fatal("can not find compressed relation file \"%s\" in filemap", path); + + entry = *e; + } + + if(entry->type != FILE_TYPE_COMPRESSED_REL) + pg_fatal("unexpected type of compressed relation file \"%s\"", path); + + /* fill compressed relation address for blocks in pagemap */ + iter = datapagemap_iterate(&entry->pagemap); + while (datapagemap_next(iter, &blkno)) + { + pcAddr = GetPageCompressAddr(pcMap, entry->chunk_size, blkno); + compressedpagemap = pg_malloc(SizeOfCompressedPageMapHeaderData + sizeof(pc_chunk_number_t) * pcAddr->nchunks); + compressedpagemap->blkno = blkno; + compressedpagemap->next = NULL; + compressedpagemap->nchunks = pcAddr->nchunks; + + /* if number of chunks of this block changed while reading compressed relation address file, + use the smaller one. */ + for(i = 0; i< compressedpagemap->nchunks; i++) + { + if(pcAddr->chunknos[i] == 0) + { + compressedpagemap->nchunks = i; + break; + } + } + + if(compressedpagemap->nchunks > 0) + memcpy(compressedpagemap->chunknos, pcAddr->chunknos, sizeof(pc_chunk_number_t) * compressedpagemap->nchunks); + + if(entry->first_compressedpagemap == NULL) + { + entry->first_compressedpagemap = compressedpagemap; + entry->last_compressedpagemap = compressedpagemap; + } + else + { + entry->last_compressedpagemap->next = compressedpagemap; + entry->last_compressedpagemap = compressedpagemap; + } + + if(min_allocated_chunks == 0 || pcAddr->allocated_chunks < min_allocated_chunks) + min_allocated_chunks = pcAddr->allocated_chunks; + } + pg_free(iter); + + /* fill compressed relation address for blocks in tail of source file */ + if(entry->action == FILE_ACTION_COPY_TAIL) + { + for(blkno = (BlockNumber)entry->oldblocks; blkno < (BlockNumber)entry->newblocks; blkno++) + { + pcAddr = GetPageCompressAddr(pcMap, entry->chunk_size, blkno); + compressedpagemap = pg_malloc(SizeOfCompressedPageMapHeaderData + sizeof(pc_chunk_number_t) * pcAddr->nchunks); + compressedpagemap->blkno = blkno; + compressedpagemap->next = NULL; + compressedpagemap->nchunks = pcAddr->nchunks; + + /* if number of chunks of this block changed while reading compressed relation address file, + use the smaller one. */ + for(i = 0; i< compressedpagemap->nchunks; i++) + { + if(pcAddr->chunknos[i] == 0) + { + compressedpagemap->nchunks = i; + break; + } + } + + if(compressedpagemap->nchunks > 0) + memcpy(compressedpagemap->chunknos, pcAddr->chunknos, sizeof(pc_chunk_number_t) * compressedpagemap->nchunks); + + if(entry->first_compressedpagemap == NULL) + { + entry->first_compressedpagemap = compressedpagemap; + entry->last_compressedpagemap = compressedpagemap; + } + else + { + entry->last_compressedpagemap->next = compressedpagemap; + entry->last_compressedpagemap = compressedpagemap; + } + + if(min_allocated_chunks == 0 || pcAddr->allocated_chunks < min_allocated_chunks) + min_allocated_chunks = pcAddr->allocated_chunks; + } + } + + /* we can not get prealloc_chunks guc of this relation, instead use the min allocated_chunks of source */ + entry->prealloc_chunks = min_allocated_chunks; +} \ No newline at end of file diff --git a/src/bin/pg_rewind/filemap.h b/src/bin/pg_rewind/filemap.h index 0cb7425170..334dd31567 100644 --- a/src/bin/pg_rewind/filemap.h +++ b/src/bin/pg_rewind/filemap.h @@ -11,6 +11,7 @@ #include "datapagemap.h" #include "storage/block.h" #include "storage/relfilenode.h" +#include "storage/page_compression.h" /* * For every file found in the local or remote system, we have a file entry @@ -35,9 +36,20 @@ typedef enum { FILE_TYPE_REGULAR, FILE_TYPE_DIRECTORY, - FILE_TYPE_SYMLINK + FILE_TYPE_SYMLINK, + FILE_TYPE_COMPRESSED_REL } file_type_t; +typedef struct compressedpagemap_t +{ + struct compressedpagemap_t *next; + BlockNumber blkno; + uint8 nchunks; + pc_chunk_number_t chunknos[FLEXIBLE_ARRAY_MEMBER]; +}compressedpagemap_t; + +#define SizeOfCompressedPageMapHeaderData offsetof(compressedpagemap_t, chunknos) + typedef struct file_entry_t { char *path; @@ -50,6 +62,21 @@ typedef struct file_entry_t size_t newsize; bool isrelfile; /* is it a relation data file? */ + /* for a compressed relation file */ + struct file_entry_t *pca; + struct file_entry_t *pcd; + uint32 oldblocks; + uint32 newblocks; + uint16 chunk_size; + uint8 prealloc_chunks; + /* compressedpagemap is a link ordered by blkno */ + compressedpagemap_t *first_compressedpagemap; + compressedpagemap_t *last_compressedpagemap; + + /* for a compressed relation address file */ + PageCompressHeader *source_pchdr; + PageCompressHeader *target_pchdr; + datapagemap_t pagemap; /* for a symlink */ @@ -94,11 +121,15 @@ extern void print_filemap(void); /* Functions for populating the filemap */ extern void process_source_file(const char *path, file_type_t type, - size_t newsize, const char *link_target); + size_t newsize, const char *link_target, + const PageCompressHeader *pchdr); extern void process_target_file(const char *path, file_type_t type, - size_t newsize, const char *link_target); + size_t newsize, const char *link_target, + const PageCompressHeader *pchdr); +extern void process_compressed_relation(void); extern void process_block_change(ForkNumber forknum, RelFileNode rnode, BlockNumber blkno); +extern void fill_compressed_relation_address(file_entry_t *entry, const char *path, PageCompressHeader *pcMap); extern void filemap_finalize(void); #endif /* FILEMAP_H */ diff --git a/src/bin/pg_rewind/libpq_fetch.c b/src/bin/pg_rewind/libpq_fetch.c index bf4dfc23b9..327af2f828 100644 --- a/src/bin/pg_rewind/libpq_fetch.c +++ b/src/bin/pg_rewind/libpq_fetch.c @@ -22,9 +22,18 @@ #include "filemap.h" #include "pg_rewind.h" #include "port/pg_bswap.h" +#include "storage/page_compression.h" -PGconn *conn = NULL; +typedef union PageCompressAddrBuffer +{ + char data[MAX_PAGE_COMPRESS_ADDRESS_FILE_SIZE]; + double force_align_d; + int64 force_align_i64; +} PageCompressAddrBuffer; + +PGconn *conn = NULL; +static PageCompressAddrBuffer pca_buffer; /* * Files are fetched max CHUNKSIZE bytes at a time. * @@ -35,9 +44,16 @@ PGconn *conn = NULL; #define CHUNKSIZE 1000000 static void receiveFileChunks(const char *sql); -static void execute_pagemap(datapagemap_t *pagemap, const char *path); +static void receiveCompressedRelationAddressFileChunks(filemap_t *map, const char *sql); +static void execute_pagemap(datapagemap_t *pagemap, const char *path, + bool iscompressedrel, int chunk_size, + int prealloc_chunks, compressedpagemap_t *first_compressedpagemap); static char *run_simple_query(const char *sql); static void run_simple_command(const char *sql); +static void fetch_compressed_relation_range(const char *path, int chunk_size, + BlockNumber blocknum, int nblocks, + int prealloc_chunks, + compressedpagemap_t *first_compressedpagemap); void libpqConnect(const char *connstr) @@ -169,6 +185,7 @@ libpqGetCurrentXlogInsertLocation(void) void libpqProcessFileList(void) { + char sqlbuf[1024]; PGresult *res; const char *sql; int i; @@ -197,18 +214,22 @@ libpqProcessFileList(void) " WHERE parent.isdir = 't'\n" ")\n" "SELECT path || filename, size, isdir,\n" - " pg_tablespace_location(pg_tablespace.oid) AS link_target\n" + " pg_tablespace_location(pg_tablespace.oid) AS link_target,\n" + " CASE WHEN filename ~ '_pca$' THEN pg_read_binary_file(path || filename, 0, %d, true)\n" + " ELSE NULL\n" + " END AS pchdr\n" "FROM files\n" "LEFT OUTER JOIN pg_tablespace ON files.path = 'pg_tblspc/'\n" " AND oid::text = files.filename\n"; - res = PQexec(conn, sql); + snprintf(sqlbuf, sizeof(sqlbuf), sql, sizeof(PageCompressHeader)); + res = PQexec(conn, sqlbuf); if (PQresultStatus(res) != PGRES_TUPLES_OK) pg_fatal("could not fetch file list: %s", PQresultErrorMessage(res)); /* sanity check the result set */ - if (PQnfields(res) != 4) + if (PQnfields(res) != 5) pg_fatal("unexpected result set while fetching file list"); /* Read result to local variables */ @@ -241,7 +262,24 @@ libpqProcessFileList(void) else type = FILE_TYPE_REGULAR; - process_source_file(path, type, filesize, link_target); + if(!PQgetisnull(res, i, 4)) + { + PageCompressHeader *pchdr; + size_t length = 0; + char *strvalue = PQgetvalue(res, i, 4); + + pchdr = (PageCompressHeader *)PQunescapeBytea((const unsigned char*)strvalue, &length); + + if(length == sizeof(PageCompressHeader)) + process_source_file(path, type, filesize, link_target, pchdr); + else + process_source_file(path, type, filesize, link_target, NULL); + + if(pchdr) + PQfreemem(pchdr); + } + else + process_source_file(path, type, filesize, link_target, NULL); } PQclear(res); } @@ -257,14 +295,14 @@ libpqProcessFileList(void) *---- */ static void -receiveFileChunks(const char *sql) +receiveCompressedRelationAddressFileChunks(filemap_t *map, const char *sql) { PGresult *res; if (PQsendQueryParams(conn, sql, 0, NULL, NULL, NULL, NULL, 1) != 1) pg_fatal("could not send query: %s", PQerrorMessage(conn)); - pg_log_debug("getting file chunks"); + pg_log_debug("getting compressed relation address file chunks"); if (PQsetSingleRowMode(conn) != 1) pg_fatal("could not set libpq connection to single row mode"); @@ -274,6 +312,7 @@ receiveFileChunks(const char *sql) char *filename; int filenamelen; int64 chunkoff; + char chunkoff_str[32]; int chunksize; char *chunk; @@ -343,23 +382,219 @@ receiveFileChunks(const char *sql) { pg_log_debug("received null value for chunk for file \"%s\", file has been deleted", filename); - remove_target_file(filename, true); + // TODO remove_target_file(filename, true); + pg_free(filename); + PQclear(res); + continue; + } + + /* + * Separate step to keep platform-dependent format code out of + * translatable strings. + */ + snprintf(chunkoff_str, sizeof(chunkoff_str), INT64_FORMAT, chunkoff); + pg_log_debug("received chunk for file \"%s\", offset %s, size %d", + filename, chunkoff_str, chunksize); + + /* fill compressed relation address in filemap */ + filename[strlen(filename) - strlen("_pca")] = '\0'; + + + memset(pca_buffer.data, 0x00, sizeof(PageCompressAddrBuffer)); + memcpy(pca_buffer.data + chunkoff, chunk, chunksize); + + fill_compressed_relation_address(NULL, filename, (PageCompressHeader *)pca_buffer.data); + + pg_free(filename); + + PQclear(res); + } +} + + +/*---- + * Runs a query, which returns pieces of files from the remote source data + * directory, and overwrites the corresponding parts of target files with + * the received parts. The result set is expected to be of format: + * + * path text -- path in the data directory, e.g "base/1/123" + * begin int8 -- offset within the file + * chunk bytea -- file content + * iscompressedchunk bool -- if this is a chunk for compressed relation data file + * blocknum int4 -- block number of this compressed chunk + * chunkindex int4 -- chunk index of this compressed chunk + * nchunks int4 -- number of chunks for this block + * prealloc_chunks int4 -- prealloc_chunks for this relation + *---- + */ +static void +receiveFileChunks(const char *sql) +{ + PGresult *res; + + if (PQsendQueryParams(conn, sql, 0, NULL, NULL, NULL, NULL, 1) != 1) + pg_fatal("could not send query: %s", PQerrorMessage(conn)); + + pg_log_debug("getting file chunks"); + + if (PQsetSingleRowMode(conn) != 1) + pg_fatal("could not set libpq connection to single row mode"); + + while ((res = PQgetResult(conn)) != NULL) + { + char *filename; + int filenamelen; + int64 chunkoff; + int chunksize; + char *chunk; + bool iscompressedchunk; + + switch (PQresultStatus(res)) + { + case PGRES_SINGLE_TUPLE: + break; + + case PGRES_TUPLES_OK: + PQclear(res); + continue; /* final zero-row result */ + + default: + pg_fatal("unexpected result while fetching remote files: %s", + PQresultErrorMessage(res)); + } + + /* sanity check the result set */ + if (PQnfields(res) != 8 || PQntuples(res) != 1) + pg_fatal("unexpected result set size while fetching remote files"); + + if (PQftype(res, 0) != TEXTOID || + PQftype(res, 1) != INT8OID || + PQftype(res, 2) != BYTEAOID || + PQftype(res, 3) != BOOLOID || + PQftype(res, 4) != INT4OID || + PQftype(res, 5) != INT4OID || + PQftype(res, 6) != INT4OID || + PQftype(res, 7) != INT4OID) + { + pg_fatal("unexpected data types in result set while fetching remote files: %u %u %u", + PQftype(res, 0), PQftype(res, 1), PQftype(res, 2)); + } + + if (PQfformat(res, 0) != 1 && + PQfformat(res, 1) != 1 && + PQfformat(res, 2) != 1 && + PQfformat(res, 3) != 1 && + PQfformat(res, 4) != 1 && + PQfformat(res, 5) != 1 && + PQfformat(res, 6) != 1 && + PQfformat(res, 7) != 1) + { + pg_fatal("unexpected result format while fetching remote files"); + } + + if (PQgetisnull(res, 0, 0) || + PQgetisnull(res, 0, 1) || + PQgetisnull(res, 0, 3) || + PQgetisnull(res, 0, 4) || + PQgetisnull(res, 0, 5) || + PQgetisnull(res, 0, 6) || + PQgetisnull(res, 0, 7)) + { + pg_fatal("unexpected null values in result while fetching remote files"); + } + + if (PQgetlength(res, 0, 1) != sizeof(int64) || + PQgetlength(res, 0, 3) != sizeof(bool) || + PQgetlength(res, 0, 4) != sizeof(int32) || + PQgetlength(res, 0, 5) != sizeof(int32) || + PQgetlength(res, 0, 6) != sizeof(int32) || + PQgetlength(res, 0, 7) != sizeof(int32)) + { + pg_fatal("unexpected result length while fetching remote files"); + } + + /* Read result set to local variables */ + memcpy(&chunkoff, PQgetvalue(res, 0, 1), sizeof(int64)); + chunkoff = pg_ntoh64(chunkoff); + chunksize = PQgetlength(res, 0, 2); + + filenamelen = PQgetlength(res, 0, 0); + filename = pg_malloc(filenamelen + 1); + memcpy(filename, PQgetvalue(res, 0, 0), filenamelen); + filename[filenamelen] = '\0'; + + chunk = PQgetvalue(res, 0, 2); + + memcpy(&iscompressedchunk, PQgetvalue(res, 0, 3), sizeof(bool)); + iscompressedchunk = (iscompressedchunk != 0 ? true : false); + + /* + * If a file has been deleted on the source, remove it on the target + * as well. Note that multiple unlink() calls may happen on the same + * file if multiple data chunks are associated with it, hence ignore + * unconditionally anything missing. If this file is not a relation + * data file, then it has been already truncated when creating the + * file chunk list at the previous execution of the filemap. + * + * For compressed relation, directly deleting the target file may cause + * errors in the subsequent processing of the tuples corresponding to + * the same file, so skip it simply here. This relation can be deleted + * when WAL is applied later. + */ + if (PQgetisnull(res, 0, 2)) + { + pg_log_debug("received null value for chunk for file \"%s\", file has been deleted", + filename); + /* TODO compressed relation*/ + if(!iscompressedchunk) + remove_target_file(filename, true); pg_free(filename); PQclear(res); continue; } - pg_log_debug("received chunk for file \"%s\", offset %lld, size %d", - filename, (long long int) chunkoff, chunksize); + if(iscompressedchunk) + { + int blocknum, chunkindex, nchunks, prealloc_chunks; - open_target_file(filename, false); + memcpy(&blocknum, PQgetvalue(res, 0, 4), sizeof(int32)); + blocknum = pg_ntoh32(blocknum); - write_target_range(chunk, chunkoff, chunksize); + memcpy(&chunkindex, PQgetvalue(res, 0, 5), sizeof(int32)); + chunkindex = pg_ntoh32(chunkindex); + + memcpy(&nchunks, PQgetvalue(res, 0, 6), sizeof(int32)); + nchunks = pg_ntoh32(nchunks); + + memcpy(&prealloc_chunks, PQgetvalue(res, 0, 7), sizeof(int32)); + prealloc_chunks = pg_ntoh32(prealloc_chunks); + + pg_log_debug("received compressed chunk for file \"%s\", offset %lld, size %d," + " blocknum %d, chunkindex %d, nchunks %d, prealloc_chunks %d", + filename, (long long int) chunkoff, chunksize, + blocknum, chunkindex, nchunks, prealloc_chunks); + + filename[filenamelen - 4] = '\0'; + open_target_compressed_relation(filename); + + write_target_compressed_relation_chunk(chunk, chunksize, blocknum, chunkindex, nchunks, prealloc_chunks); + } + else + { + pg_log_debug("received chunk for file \"%s\", offset %lld, size %d", + filename, (long long int) chunkoff, chunksize); + + open_target_file(filename, false); + + write_target_range(chunk, chunkoff, chunksize); + } pg_free(filename); PQclear(res); } + close_target_file(); + } /* @@ -424,7 +659,7 @@ fetch_file_range(const char *path, uint64 begin, uint64 end) else len = (unsigned int) (end - begin); - snprintf(linebuf, sizeof(linebuf), "%s\t" UINT64_FORMAT "\t%u\n", path, begin, len); + snprintf(linebuf, sizeof(linebuf), "%s\t" UINT64_FORMAT "\t%u\tfalse\t0\t0\t0\t0\n", path, begin, len); if (PQputCopyData(conn, linebuf, strlen(linebuf)) != 1) pg_fatal("could not send COPY data: %s", @@ -434,6 +669,178 @@ fetch_file_range(const char *path, uint64 begin, uint64 end) } } +/* + * Write a compressed relation block range to a temporary table in the server. + * + * The range is sent to the server as a COPY formatted line, to be inserted + * into the 'fetchchunks' temporary table. It is used in receiveFileChunks() + * function to actually fetch the data. + */ +static void +fetch_compressed_relation_range(const char *path, int chunk_size, BlockNumber blocknum, int nblocks, + int prealloc_chunks, compressedpagemap_t *first_compressedpagemap) +{ + char linebuf[MAXPGPATH + 23]; + int i; + compressedpagemap_t *compressedpagemap = first_compressedpagemap; + + /* read blocks */ + for(i=0; i < nblocks; i++) + { + int j; + BlockNumber blkno = blocknum + i; + + while(compressedpagemap != NULL) + { + if(compressedpagemap->blkno == blkno) + break; + compressedpagemap = compressedpagemap->next; + } + + if(compressedpagemap == NULL) + pg_fatal("could not find compressedpagemap for block %d of file \"%s\"", + blkno, path); + + for(j=0; j < compressedpagemap->nchunks; j++) + { + uint64 begin; + int length = chunk_size; + int chunkindex = j; + pc_chunk_number_t chunkno = compressedpagemap->chunknos[j]; + + begin = OffsetOfPageCompressChunk(chunk_size, chunkno); + + while(j + 1 < compressedpagemap->nchunks && + compressedpagemap->chunknos[j + 1]== compressedpagemap->chunknos[j] + 1) + { + length += chunk_size; + j++; + } + + snprintf(linebuf, sizeof(linebuf), "%s_pcd\t" UINT64_FORMAT "\t%u\ttrue\t%u\t%u\t%u\t%u\n", + path, begin, length, blkno, chunkindex, + compressedpagemap->nchunks, prealloc_chunks); + + if (PQputCopyData(conn, linebuf, strlen(linebuf)) != 1) + pg_fatal("could not send COPY data: %s", + PQerrorMessage(conn)); + } + } +} + +/* + * Fetch all address of changed compressed blocks from remote source data directory. + */ +void +libpq_fetchCompressedRelationAddress(filemap_t *map) +{ + char linebuf[MAXPGPATH + 23]; + const char *sql; + PGresult *res; + int i; + + /* + * First create a temporary table, and load it with the blocks that we + * need to fetch. + */ + sql = "CREATE TEMPORARY TABLE fetchchunks_pca(path text, begin int8, len int4)"; + run_simple_command(sql); + + sql = "COPY fetchchunks_pca FROM STDIN"; + res = PQexec(conn, sql); + + if (PQresultStatus(res) != PGRES_COPY_IN) + pg_fatal("could not send file list: %s", + PQresultErrorMessage(res)); + PQclear(res); + + for (i = 0; i < map->narray; i++) + { + file_entry_t *entry; + int chunk_size, len; + datapagemap_iterator_t *iter; + BlockNumber blkno; + off_t range_start = MAX_PAGE_COMPRESS_ADDRESS_FILE_SIZE; + off_t range_end = 0; + + entry = map->array[i]; + + if(entry->type != FILE_TYPE_COMPRESSED_REL) + continue; + + chunk_size = entry->chunk_size; + /* add changed blocks in pagemap */ + iter = datapagemap_iterate(&entry->pagemap); + while (datapagemap_next(iter, &blkno)) + { + off_t start, end; + + /* calculate postion of this block in compressed relation address file */ + + start = OffsetOfPageCompressAddr(chunk_size, blkno); + if(start < range_start) + range_start = start; + + end = OffsetOfPageCompressAddr(chunk_size, blkno + 1); + if(end > range_end) + range_end = end; + } + pg_free(iter); + + /* add blocks range in source's tail */ + if(entry->action == FILE_ACTION_COPY_TAIL) + { + off_t start, end; + + /* calculate postion of tail range in compressed relation address file */ + + //TODO blkno = (BlockNumber)(entry->oldsize / BLCKSZ); + start = OffsetOfPageCompressAddr(chunk_size, entry->oldblocks - 1); + if(start < range_start) + range_start = start; + + //blkno = (BlockNumber)(entry->newsize / BLCKSZ); + end = OffsetOfPageCompressAddr(chunk_size, entry->newblocks); + if(end > range_end) + range_end = end; + } + + len = range_end - range_start; + + if(len > 0) + { + snprintf(linebuf, sizeof(linebuf), "%s\t" UINT64_FORMAT "\t%u\n", entry->pca->path, range_start, len); + + if (PQputCopyData(conn, linebuf, strlen(linebuf)) != 1) + pg_fatal("could not send COPY data: %s", + PQerrorMessage(conn)); + } + } + + if (PQputCopyEnd(conn, NULL) != 1) + pg_fatal("could not send end-of-COPY: %s", + PQerrorMessage(conn)); + + while ((res = PQgetResult(conn)) != NULL) + { + if (PQresultStatus(res) != PGRES_COMMAND_OK) + pg_fatal("unexpected result while sending file list: %s", + PQresultErrorMessage(res)); + PQclear(res); + } + + /* + * We've now copied the list of file ranges that we need to fetch to the + * temporary table. Now, actually fetch all of those ranges. + */ + sql = + "SELECT path, begin,\n" + " pg_read_binary_file(path, begin, len, true) AS chunk\n" + "FROM fetchchunks_pca\n"; + + receiveCompressedRelationAddressFileChunks(map, sql); +} + /* * Fetch all changed blocks from remote source data directory. */ @@ -449,7 +856,9 @@ libpq_executeFileMap(filemap_t *map) * First create a temporary table, and load it with the blocks that we * need to fetch. */ - sql = "CREATE TEMPORARY TABLE fetchchunks(path text, begin int8, len int4);"; + sql = "CREATE TEMPORARY TABLE fetchchunks(path text, begin int8, len int4," + " iscompressedchunk bool, blocknum int4, chunkindex int4," + " nchunks int4, prealloc_chunks int4);"; run_simple_command(sql); sql = "COPY fetchchunks FROM STDIN"; @@ -462,10 +871,15 @@ libpq_executeFileMap(filemap_t *map) for (i = 0; i < map->narray; i++) { + bool iscompressedrel = false; entry = map->array[i]; + if(entry->type == FILE_TYPE_COMPRESSED_REL) + iscompressedrel = true; + /* If this is a relation file, copy the modified blocks */ - execute_pagemap(&entry->pagemap, entry->path); + execute_pagemap(&entry->pagemap, entry->path, iscompressedrel, entry->chunk_size, + entry->prealloc_chunks, entry->first_compressedpagemap); switch (entry->action) { @@ -480,11 +894,20 @@ libpq_executeFileMap(filemap_t *map) break; case FILE_ACTION_TRUNCATE: - truncate_target_file(entry->path, entry->newsize); + if(iscompressedrel) + truncate_target_compressed_relation(entry->path, entry->newblocks); + else + truncate_target_file(entry->path, entry->newsize); break; case FILE_ACTION_COPY_TAIL: - fetch_file_range(entry->path, entry->oldsize, entry->newsize); + if(iscompressedrel) + fetch_compressed_relation_range(entry->path, entry->chunk_size, entry->oldblocks, + entry->newblocks - entry->oldblocks, + entry->prealloc_chunks, + entry->first_compressedpagemap); + else + fetch_file_range(entry->path, entry->oldsize, entry->newsize); break; case FILE_ACTION_REMOVE: @@ -515,14 +938,16 @@ libpq_executeFileMap(filemap_t *map) */ sql = "SELECT path, begin,\n" - " pg_read_binary_file(path, begin, len, true) AS chunk\n" + " pg_read_binary_file(path, begin, len, true) AS chunk,\n" + " iscompressedchunk, blocknum, chunkindex, nchunks, prealloc_chunks\n" "FROM fetchchunks\n"; receiveFileChunks(sql); } static void -execute_pagemap(datapagemap_t *pagemap, const char *path) +execute_pagemap(datapagemap_t *pagemap, const char *path, bool iscompressedrel, int chunk_size, + int prealloc_chunks, compressedpagemap_t *first_compressedpagemap) { datapagemap_iterator_t *iter; BlockNumber blkno; @@ -531,9 +956,13 @@ execute_pagemap(datapagemap_t *pagemap, const char *path) iter = datapagemap_iterate(pagemap); while (datapagemap_next(iter, &blkno)) { - offset = blkno * BLCKSZ; - - fetch_file_range(path, offset, offset + BLCKSZ); + if(iscompressedrel) + fetch_compressed_relation_range(path, chunk_size, blkno, 1, prealloc_chunks, first_compressedpagemap); + else + { + offset = blkno * BLCKSZ; + fetch_file_range(path, offset, offset + BLCKSZ); + } } pg_free(iter); } diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c index 0ec52cb032..a3fd8bc566 100644 --- a/src/bin/pg_rewind/pg_rewind.c +++ b/src/bin/pg_rewind/pg_rewind.c @@ -380,6 +380,7 @@ main(int argc, char **argv) if (showprogress) pg_log_info("reading target file list"); traverse_datadir(datadir_target, &process_target_file); + process_compressed_relation(); /* * Read the target WAL from last checkpoint before the point of fork, to @@ -392,6 +393,8 @@ main(int argc, char **argv) pg_log_info("reading WAL in target"); extractPageMap(datadir_target, chkptrec, lastcommontliIndex, ControlFile_target.checkPoint, restore_command); + + fetchCompressedRelationAddress(); filemap_finalize(); if (showprogress) diff --git a/src/bin/pg_rewind/pg_rewind.h b/src/bin/pg_rewind/pg_rewind.h index 8a9319ed67..9c6d5decae 100644 --- a/src/bin/pg_rewind/pg_rewind.h +++ b/src/bin/pg_rewind/pg_rewind.h @@ -32,6 +32,7 @@ extern int targetNentries; /* general state */ extern PGconn *conn; +extern PGconn *pca_conn; /* Progress counters */ extern uint64 fetch_size; diff --git a/src/bin/pg_rewind/t/007_page_compression.pl b/src/bin/pg_rewind/t/007_page_compression.pl new file mode 100644 index 0000000000..d5320c15f7 --- /dev/null +++ b/src/bin/pg_rewind/t/007_page_compression.pl @@ -0,0 +1,199 @@ +use strict; +use warnings; +use TestLib; +use Test::More tests => 26; + +use FindBin; +use lib $FindBin::RealBin; + +use RewindTest; + +sub run_test +{ + my $test_mode = shift; + + RewindTest::setup_cluster($test_mode); + RewindTest::start_primary(); + + # Create a test table and insert a row in primary. + primary_psql("CREATE TABLE tbl1 (d text) WITH(compresstype=pglz, compress_chunk_size=1024, compress_prealloc_chunks=2)"); + primary_psql("INSERT INTO tbl1 VALUES ('in primary')"); + + # This test table will be used to test truncation, i.e. the table + # is extended in the old primary after promotion + primary_psql("CREATE TABLE trunc_tbl (d text) WITH(compresstype=pglz, compress_chunk_size=1024, compress_prealloc_chunks=2)"); + primary_psql("INSERT INTO trunc_tbl VALUES ('in primary')"); + + # This test table will be used to test the "copy-tail" case, i.e. the + # table is truncated in the old primary after promotion + primary_psql("CREATE TABLE tail_tbl (id integer, d text) WITH(compresstype=pglz, compress_chunk_size=1024, compress_prealloc_chunks=2)"); + primary_psql("INSERT INTO tail_tbl VALUES (0, 'in primary')"); + + # This test table is dropped in the old primary after promotion. + primary_psql("CREATE TABLE drop_tbl (d text) WITH(compresstype=pglz, compress_chunk_size=1024, compress_prealloc_chunks=2)"); + primary_psql("INSERT INTO drop_tbl VALUES ('in primary')"); + + primary_psql("CHECKPOINT"); + + RewindTest::create_standby($test_mode); + + # Insert additional data on primary that will be replicated to standby + primary_psql("INSERT INTO tbl1 values ('in primary, before promotion')"); + primary_psql( + "INSERT INTO trunc_tbl values ('in primary, before promotion')"); + primary_psql( + "INSERT INTO tail_tbl SELECT g, 'in primary, before promotion: ' || g FROM generate_series(1, 10000) g" + ); + + primary_psql('CHECKPOINT'); + + RewindTest::promote_standby(); + + # Insert a row in the old primary. This causes the primary and standby + # to have "diverged", it's no longer possible to just apply the + # standy's logs over primary directory - you need to rewind. + primary_psql("INSERT INTO tbl1 VALUES ('in primary, after promotion')"); + + # Also insert a new row in the standby, which won't be present in the + # old primary. + standby_psql("INSERT INTO tbl1 VALUES ('in standby, after promotion')"); + + # Insert enough rows to trunc_tbl to extend the file. pg_rewind should + # truncate it back to the old size. + primary_psql( + "INSERT INTO trunc_tbl SELECT 'in primary, after promotion: ' || g FROM generate_series(1, 10000) g" + ); + + # Truncate tail_tbl. pg_rewind should copy back the truncated part + # (We cannot use an actual TRUNCATE command here, as that creates a + # whole new relfilenode) + primary_psql("DELETE FROM tail_tbl WHERE id > 10"); + primary_psql("VACUUM tail_tbl"); + + # Drop drop_tbl. pg_rewind should copy it back. + primary_psql("DROP TABLE drop_tbl"); + + # Create new_tbl in standby. pg_rewind should copy it from source. + standby_psql("CREATE TABLE new_tbl (d text) WITH(compresstype=pglz, compress_chunk_size=1024, compress_prealloc_chunks=2)"); + standby_psql("INSERT INTO new_tbl VALUES ('in standby')"); + + # Before running pg_rewind, do a couple of extra tests with several + # option combinations. As the code paths taken by those tests + # do not change for the "local" and "remote" modes, just run them + # in "local" mode for simplicity's sake. + if ($test_mode eq 'local') + { + my $primary_pgdata = $node_primary->data_dir; + my $standby_pgdata = $node_standby->data_dir; + + # First check that pg_rewind fails if the target cluster is + # not stopped as it fails to start up for the forced recovery + # step. + command_fails( + [ + 'pg_rewind', '--debug', + '--source-pgdata', $standby_pgdata, + '--target-pgdata', $primary_pgdata, + '--no-sync' + ], + 'pg_rewind with running target'); + + # Again with --no-ensure-shutdown, which should equally fail. + # This time pg_rewind complains without attempting to perform + # recovery once. + command_fails( + [ + 'pg_rewind', '--debug', + '--source-pgdata', $standby_pgdata, + '--target-pgdata', $primary_pgdata, + '--no-sync', '--no-ensure-shutdown' + ], + 'pg_rewind --no-ensure-shutdown with running target'); + + # Stop the target, and attempt to run with a local source + # still running. This fails as pg_rewind requires to have + # a source cleanly stopped. + $node_primary->stop; + command_fails( + [ + 'pg_rewind', '--debug', + '--source-pgdata', $standby_pgdata, + '--target-pgdata', $primary_pgdata, + '--no-sync', '--no-ensure-shutdown' + ], + 'pg_rewind with unexpected running source'); + + # Stop the target cluster cleanly, and run again pg_rewind + # with --dry-run mode. If anything gets generated in the data + # folder, the follow-up run of pg_rewind will most likely fail, + # so keep this test as the last one of this subset. + $node_standby->stop; + command_ok( + [ + 'pg_rewind', '--debug', + '--source-pgdata', $standby_pgdata, + '--target-pgdata', $primary_pgdata, + '--no-sync', '--dry-run' + ], + 'pg_rewind --dry-run'); + + # Both clusters need to be alive moving forward. + $node_standby->start; + $node_primary->start; + } + + RewindTest::run_pg_rewind($test_mode); + + check_query( + 'SELECT * FROM tbl1', + qq(in primary +in primary, before promotion +in standby, after promotion +), + 'table content'); + + check_query( + 'SELECT * FROM trunc_tbl', + qq(in primary +in primary, before promotion +), + 'truncation'); + + check_query( + 'SELECT count(*) FROM tail_tbl', + qq(10001 +), + 'tail-copy'); + + check_query( + 'SELECT * FROM drop_tbl', + qq(in primary +), + 'drop'); + + check_query( + 'SELECT * FROM new_tbl', + qq(in standby +), + 'new'); + + # Permissions on PGDATA should be default + SKIP: + { + skip "unix-style permissions not supported on Windows", 1 + if ($windows_os); + + ok(check_mode_recursive($node_primary->data_dir(), 0700, 0600), + 'check PGDATA permissions'); + } + + RewindTest::clean_rewind_test(); + return; +} + +# Run the test in both modes +run_test('local'); +run_test('remote'); +run_test('archive'); + +exit(0); diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c index 24c7b414cf..41af84b234 100644 --- a/src/bin/psql/tab-complete.c +++ b/src/bin/psql/tab-complete.c @@ -1088,6 +1088,10 @@ static const char *const table_storage_parameters[] = { "autovacuum_vacuum_insert_threshold", "autovacuum_vacuum_scale_factor", "autovacuum_vacuum_threshold", + "compress_chunk_size", + "compresslevel", + "compress_prealloc_chunks", + "compresstype", "fillfactor", "log_autovacuum_min_duration", "parallel_workers", @@ -1749,6 +1753,7 @@ psql_completion(const char *text, int start, int end) /* ALTER INDEX SET|RESET ( */ else if (Matches("ALTER", "INDEX", MatchAny, "RESET", "(")) COMPLETE_WITH("fillfactor", + "compresstype","compresslevel","compress_chunk_size","compress_prealloc_chunks", "vacuum_cleanup_index_scale_factor", "deduplicate_items", /* BTREE */ "fastupdate", "gin_pending_list_limit", /* GIN */ "buffering", /* GiST */ @@ -1756,6 +1761,7 @@ psql_completion(const char *text, int start, int end) ); else if (Matches("ALTER", "INDEX", MatchAny, "SET", "(")) COMPLETE_WITH("fillfactor =", + "compresstype =", "compresslevel =", "compress_chunk_size =", "compress_prealloc_chunks =", "vacuum_cleanup_index_scale_factor =", "deduplicate_items =", /* BTREE */ "fastupdate =", "gin_pending_list_limit =", /* GIN */ "buffering =", /* GiST */ @@ -2154,7 +2160,9 @@ psql_completion(const char *text, int start, int end) /* ALTER TABLESPACE SET|RESET ( */ else if (Matches("ALTER", "TABLESPACE", MatchAny, "SET|RESET", "(")) COMPLETE_WITH("seq_page_cost", "random_page_cost", - "effective_io_concurrency", "maintenance_io_concurrency"); + "effective_io_concurrency", "maintenance_io_concurrency", + "default_compresstype", "default_compresslevel", + "default_compress_chunk_size", "default_compress_prealloc_chunks"); /* ALTER TEXT SEARCH */ else if (Matches("ALTER", "TEXT", "SEARCH")) diff --git a/src/include/access/gin_private.h b/src/include/access/gin_private.h index 5cb2f72e4c..4c71973a34 100644 --- a/src/include/access/gin_private.h +++ b/src/include/access/gin_private.h @@ -18,6 +18,7 @@ #include "fmgr.h" #include "lib/rbtree.h" #include "storage/bufmgr.h" +#include "utils/rel.h" /* * Storage type for GIN's reloptions @@ -27,6 +28,7 @@ typedef struct GinOptions int32 vl_len_; /* varlena header (do not touch directly!) */ bool useFastUpdate; /* use fast updates? */ int pendingListCleanupSize; /* maximum size of pending list */ + PageCompressOpts compress; /* page compress related options */ } GinOptions; #define GIN_DEFAULT_USE_FASTUPDATE true diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h index b68c01a5f2..1705bfed2e 100644 --- a/src/include/access/gist_private.h +++ b/src/include/access/gist_private.h @@ -21,6 +21,7 @@ #include "storage/bufmgr.h" #include "storage/buffile.h" #include "utils/hsearch.h" +#include "utils/rel.h" #include "access/genam.h" /* @@ -396,6 +397,7 @@ typedef struct GiSTOptions int32 vl_len_; /* varlena header (do not touch directly!) */ int fillfactor; /* page fill factor in percent (0..100) */ GistOptBufferingMode buffering_mode; /* buffering build mode */ + PageCompressOpts compress; /* page compress related options */ } GiSTOptions; /* gist.c */ diff --git a/src/include/access/hash.h b/src/include/access/hash.h index bab4d9f1b0..cac5087797 100644 --- a/src/include/access/hash.h +++ b/src/include/access/hash.h @@ -26,6 +26,7 @@ #include "storage/bufmgr.h" #include "storage/lockdefs.h" #include "utils/hsearch.h" +#include "utils/rel.h" #include "utils/relcache.h" /* @@ -268,6 +269,7 @@ typedef struct HashOptions { int32 varlena_header_; /* varlena header (do not touch directly!) */ int fillfactor; /* page fill factor in percent (0..100) */ + PageCompressOpts compress; /* page compress related options */ } HashOptions; #define HashGetFillFactor(relation) \ diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index 65d9698b89..9ed89cbff9 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -23,6 +23,7 @@ #include "lib/stringinfo.h" #include "storage/bufmgr.h" #include "storage/shm_toc.h" +#include "utils/rel.h" /* There's room for a 16-bit vacuum cycle ID in BTPageOpaqueData */ typedef uint16 BTCycleId; @@ -963,6 +964,7 @@ typedef struct BTOptions /* fraction of newly inserted tuples prior to trigger index cleanup */ float8 vacuum_cleanup_index_scale_factor; bool deduplicate_items; /* Try to deduplicate items? */ + PageCompressOpts compress; /* page compress related options */ } BTOptions; #define BTGetFillFactor(relation) \ diff --git a/src/include/access/spgist_private.h b/src/include/access/spgist_private.h index 00b98ec6a0..36d9a6e088 100644 --- a/src/include/access/spgist_private.h +++ b/src/include/access/spgist_private.h @@ -20,6 +20,7 @@ #include "nodes/tidbitmap.h" #include "storage/buf.h" #include "utils/geo_decls.h" +#include "utils/rel.h" #include "utils/relcache.h" @@ -27,6 +28,7 @@ typedef struct SpGistOptions { int32 varlena_header_; /* varlena header (do not touch directly!) */ int fillfactor; /* page fill factor in percent (0..100) */ + PageCompressOpts compress; /* page compress related options */ } SpGistOptions; #define SpGistGetFillFactor(relation) \ diff --git a/src/include/catalog/heap.h b/src/include/catalog/heap.h index d31141c1a2..e22308e221 100644 --- a/src/include/catalog/heap.h +++ b/src/include/catalog/heap.h @@ -57,6 +57,7 @@ extern Relation heap_create(const char *relname, char relpersistence, bool shared_relation, bool mapped_relation, + Datum reloptions, bool allow_system_table_mods, TransactionId *relfrozenxid, MultiXactId *relminmxid); diff --git a/src/include/commands/tablespace.h b/src/include/commands/tablespace.h index fd1b28fca2..3069cd383c 100644 --- a/src/include/commands/tablespace.h +++ b/src/include/commands/tablespace.h @@ -18,6 +18,8 @@ #include "catalog/objectaddress.h" #include "lib/stringinfo.h" #include "nodes/parsenodes.h" +#include "storage/page_compression.h" +#include "utils/rel.h" /* XLOG stuff */ #define XLOG_TBLSPC_CREATE 0x00 @@ -41,6 +43,7 @@ typedef struct TableSpaceOpts float8 seq_page_cost; int effective_io_concurrency; int maintenance_io_concurrency; + PageCompressOpts compress; /* page compress related options */ } TableSpaceOpts; extern Oid CreateTableSpace(CreateTableSpaceStmt *stmt); diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index fb270df678..f335c399cb 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -346,6 +346,9 @@ /* Define to 1 if you have the `z' library (-lz). */ #undef HAVE_LIBZ +/* Define to 1 if you have the `zstd' library (-lzstd). */ +#undef HAVE_LIBZSTD + /* Define to 1 if you have the `link' function. */ #undef HAVE_LINK @@ -686,6 +689,9 @@ /* Define to 1 if the assembler supports X86_64's POPCNTQ instruction. */ #undef HAVE_X86_64_POPCNTQ +/* Define to 1 if you have the header file. */ +#undef HAVE_ZSTD_H + /* Define to 1 if the system has the type `_Bool'. */ #undef HAVE__BOOL @@ -923,6 +929,9 @@ /* Define to select Win32-style shared memory. */ #undef USE_WIN32_SHARED_MEMORY +/* Define to 1 to build with zstd support. (--with-zstd) */ +#undef USE_ZSTD + /* Define to 1 if `wcstombs_l' requires . */ #undef WCSTOMBS_L_IN_XLOCALE diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 343eef507e..a5a4de4e9d 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -1008,7 +1008,9 @@ typedef enum WAIT_EVENT_LOGICAL_CHANGES_READ, WAIT_EVENT_LOGICAL_CHANGES_WRITE, WAIT_EVENT_LOGICAL_SUBXACT_READ, - WAIT_EVENT_LOGICAL_SUBXACT_WRITE + WAIT_EVENT_LOGICAL_SUBXACT_WRITE, + WAIT_EVENT_COMPRESS_ADDRESS_FILE_FLUSH, + WAIT_EVENT_COMPRESS_ADDRESS_FILE_SYNC } WaitEventIO; /* ---------- diff --git a/src/include/storage/checksum_impl.h b/src/include/storage/checksum_impl.h index 364acfafd2..402011900a 100644 --- a/src/include/storage/checksum_impl.h +++ b/src/include/storage/checksum_impl.h @@ -143,7 +143,7 @@ do { \ * (at least on 4-byte boundary). */ static uint32 -pg_checksum_block(const PGChecksummablePage *page) +pg_checksum_block(const PGChecksummablePage *page, size_t size) { uint32 sums[N_SUMS]; uint32 result = 0; @@ -157,7 +157,7 @@ pg_checksum_block(const PGChecksummablePage *page) memcpy(sums, checksumBaseOffsets, sizeof(checksumBaseOffsets)); /* main checksum calculation */ - for (i = 0; i < (uint32) (BLCKSZ / (sizeof(uint32) * N_SUMS)); i++) + for (i = 0; i < (uint32) (size / (sizeof(uint32) * N_SUMS)); i++) for (j = 0; j < N_SUMS; j++) CHECKSUM_COMP(sums[j], page->data[i][j]); @@ -201,7 +201,7 @@ pg_checksum_page(char *page, BlockNumber blkno) */ save_checksum = cpage->phdr.pd_checksum; cpage->phdr.pd_checksum = 0; - checksum = pg_checksum_block(cpage); + checksum = pg_checksum_block(cpage, BLCKSZ); cpage->phdr.pd_checksum = save_checksum; /* Mix in the block number to detect transposed pages */ @@ -212,4 +212,4 @@ pg_checksum_page(char *page, BlockNumber blkno) * one. That avoids checksums of zero, which seems like a good idea. */ return (uint16) ((checksum % 65535) + 1); -} +} \ No newline at end of file diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h index e209f047e8..9fa562f3d1 100644 --- a/src/include/storage/fd.h +++ b/src/include/storage/fd.h @@ -132,6 +132,10 @@ extern void ReleaseExternalFD(void); /* Make a directory with default permissions */ extern int MakePGDirectory(const char *directoryName); +/* Page compression support routines */ +extern void SetupPageCompressMemoryMap(File file, int chunk_size, uint8 algorithm); +extern void *GetPageCompressMemoryMap(File file, int chunk_size); + /* Miscellaneous support routines */ extern void InitFileAccess(void); extern void set_max_safe_fds(void); diff --git a/src/include/storage/page_compression.h b/src/include/storage/page_compression.h new file mode 100644 index 0000000000..153e595b1f --- /dev/null +++ b/src/include/storage/page_compression.h @@ -0,0 +1,140 @@ +/* + * page_compression.h + * internal declarations for page compression + * + * Copyright (c) 2020, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/include/storage/page_compression.h + */ + +#ifndef PAGE_COMPRESSION_H +#define PAGE_COMPRESSION_H + +#include + +#include "storage/bufpage.h" +//#include "utils/rel.h" +#include "datatype/timestamp.h" + +#ifdef FRONTEND +typedef uint32 pg_atomic_uint32; +#else +#include "port/atomics.h" +#include "utils/rel.h" + +/* The page compression feature relies on native atomic operation support. + * On platforms that do not support native atomic operations, the members + * of pg_atomic_uint32 contain semaphore objects, which will affect the + * persistence of compressed page address files. + */ +#define SUPPORT_PAGE_COMPRESSION (sizeof(pg_atomic_uint32) == sizeof(uint32)) +#endif + +/* In order to avoid the inconsistency of address metadata data when the server + * is down, it is necessary to prevent the address metadata of one data block + * from crossing two storage device blocks. The block size of ordinary storage + * devices is a multiple of 512, so 512 is used as the block size of the + * compressed address file. + */ +#define COMPRESS_ADDR_BLCKSZ 512 + +/* COMPRESS_ALGORITHM_XXX must be the same as COMPRESS_TYPE_XXX */ +#define COMPRESS_ALGORITHM_PGLZ 1 +#define COMPRESS_ALGORITHM_ZSTD 2 + +typedef uint32 pc_chunk_number_t; + +/* + * layout of files for Page Compress: + * + * 1. page compression address file(_pca) + * - PageCompressHeader + * - PageCompressAddr[] + * + * 2. page compression data file(_pcd) + * - PageCompressData[] + * + */ + +typedef struct PageCompressHeader +{ + pg_atomic_uint32 nblocks; /* number of total blocks in this segment */ + pg_atomic_uint32 allocated_chunks; /* number of total allocated chunks in data area */ + uint16 chunk_size; /* size of each chunk, must be 1/2 1/4 or 1/8 of BLCKSZ */ + uint8 algorithm; /* compress algorithm, 1=pglz, 2=lz4 */ + pg_atomic_uint32 last_synced_nblocks; /* last synced nblocks */ + pg_atomic_uint32 last_synced_allocated_chunks; /* last synced allocated_chunks */ + TimestampTz last_recovery_start_time; /* postmaster start time of last recovery */ +} PageCompressHeader; + +typedef struct PageCompressAddr +{ + volatile uint8 nchunks; /* number of chunks for this block */ + volatile uint8 allocated_chunks; /* number of allocated chunks for this block */ + + /* variable-length fields, 1 based chunk no array for this block, size of the array must be 2, 4 or 8 */ + pc_chunk_number_t chunknos[FLEXIBLE_ARRAY_MEMBER]; +} PageCompressAddr; + +typedef struct PageCompressData +{ + char page_header[SizeOfPageHeaderData]; /* page header */ + uint32 size; /* size of compressed data */ + char data[FLEXIBLE_ARRAY_MEMBER]; /* compressed page, except for the page header */ +} PageCompressData; + + +#define SizeOfPageCompressHeaderData sizeof(PageCompressHeader) +#define SizeOfPageCompressAddrHeaderData offsetof(PageCompressAddr, chunknos) +#define SizeOfPageCompressDataHeaderData offsetof(PageCompressData, data) + +#define SizeOfPageCompressAddr(chunk_size) \ + (SizeOfPageCompressAddrHeaderData + sizeof(pc_chunk_number_t) * (BLCKSZ / (chunk_size))) + +#define NumberOfPageCompressAddrPerBlock(chunk_size) \ + (COMPRESS_ADDR_BLCKSZ / SizeOfPageCompressAddr(chunk_size)) + +#define OffsetOfPageCompressAddr(chunk_size, blockno) \ + (COMPRESS_ADDR_BLCKSZ * (1 + (blockno) / NumberOfPageCompressAddrPerBlock(chunk_size)) \ + + SizeOfPageCompressAddr(chunk_size) * ((blockno) % NumberOfPageCompressAddrPerBlock(chunk_size))) + +#define GetPageCompressAddr(pcbuffer, chunk_size, blockno) \ + (PageCompressAddr *)((char *)(pcbuffer) + OffsetOfPageCompressAddr((chunk_size),(blockno) % RELSEG_SIZE)) + +#define SizeofPageCompressAddrFile(chunk_size) \ + OffsetOfPageCompressAddr((chunk_size), RELSEG_SIZE) + +#define OffsetOfPageCompressChunk(chunk_size, chunkno) \ + ((chunk_size) * ((chunkno) - 1)) + +#define MAX_PAGE_COMPRESS_ADDRESS_FILE_SIZE SizeofPageCompressAddrFile(BLCKSZ / 8) + +/* Abnormal scenarios may cause holes in the space allocation of data files, + * causing data file expansion. Usually the holes are not too big, so the definition + * allows a maximum of 10,000 chunks for holes. If allocated_chunks exceeds this value, + * VACUUM FULL needs to be executed to reclaim space. + */ +#define MAX_CHUNK_NUMBER(chunk_size) (RELSEG_SIZE * (BLCKSZ / (chunk_size)) + 10000) + +/* Compress function */ +extern int compress_page_buffer_bound(uint8 algorithm); +extern int compress_page(const char *src, char *dst, int dst_size, uint8 algorithm, int8 level); +extern int decompress_page(const char * src, char *dst, uint8 algorithm); + +/* Memory mapping function */ +extern PageCompressHeader * pc_mmap(int fd, int chunk_size, bool readonly); +extern int pc_munmap(PageCompressHeader * map); +extern int pc_msync(PageCompressHeader * map); + + +#ifndef FRONTEND +extern int compress_address_flush_chunks; + +/* compression options function */ +extern Datum buildCompressReloptions(PageCompressOpts *pcOpt); + +extern void check_and_repair_compress_address(PageCompressHeader *pcMap, uint16 chunk_size, uint8 algorithm, const char *path); +#endif + +#endif /* PAGE_COMPRESSION_H */ diff --git a/src/include/storage/page_compression_impl.h b/src/include/storage/page_compression_impl.h new file mode 100644 index 0000000000..35eefd30ee --- /dev/null +++ b/src/include/storage/page_compression_impl.h @@ -0,0 +1,238 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "common/pg_lzcompress.h" + +#ifdef USE_ZSTD +#include + +#define DEFAULT_ZSTD_COMPRESSION_LEVEL 1 +#define MIN_ZSTD_COMPRESSION_LEVEL ZSTD_minCLevel() +#define MAX_ZSTD_COMPRESSION_LEVEL ZSTD_maxCLevel() +#endif + + +/** + * compress_page_buffer_bound() + * -- Get the destination buffer boundary to compress one page. + * + * Return needed destination buffer size for compress one page or + * -1 for unrecognized compression algorithm + * + */ +int +compress_page_buffer_bound(uint8 algorithm) +{ + switch(algorithm) + { + case COMPRESS_ALGORITHM_PGLZ: + return BLCKSZ + 4; +#ifdef USE_ZSTD + case COMPRESS_ALGORITHM_ZSTD: + return ZSTD_compressBound(BLCKSZ - SizeOfPageHeaderData); +#endif + default: + return -1; + break; + } +} + +/** + * compress_page() -- Compress one page. + * + * Only the parts other than the page header will be compressed. The + * compressed data is rounded by chunck_size, The insufficient part is + * filled with zero. Compression needs to be able to save at least one + * chunk of space, otherwise it fail. + * This function returen the size of compressed data or + * -1 for compression fail + * -2 for unrecognized compression algorithm + */ +int +compress_page(const char *src, char *dst, int dst_size, uint8 algorithm, int8 level) +{ + int compressed_size; + PageCompressData *pcdptr; + + pcdptr = (PageCompressData *)dst; + + switch(algorithm) + { + case COMPRESS_ALGORITHM_PGLZ: + compressed_size = pglz_compress(src + SizeOfPageHeaderData, + BLCKSZ - SizeOfPageHeaderData, + pcdptr->data, + PGLZ_strategy_always); + break; +#ifdef USE_ZSTD + case COMPRESS_ALGORITHM_ZSTD: + { + if(level == 0 || + level < MIN_ZSTD_COMPRESSION_LEVEL || + level > MAX_ZSTD_COMPRESSION_LEVEL) + level = DEFAULT_ZSTD_COMPRESSION_LEVEL; + + compressed_size = ZSTD_compress(pcdptr->data, + dst_size, + src + SizeOfPageHeaderData, + BLCKSZ - SizeOfPageHeaderData, + level); + + if (ZSTD_isError(compressed_size)) + { + return -1; + } + break; + } +#endif + default: + return -2; + break; + } + + if(compressed_size < 0) + return -1; + + memcpy(pcdptr->page_header, src, SizeOfPageHeaderData); + pcdptr->size = compressed_size; + + return SizeOfPageCompressDataHeaderData + compressed_size; +} + +/** + * decompress_page() -- Decompress one compressed page. + * return size of decompressed page which should be BLCKSZ or + * -1 for decompress error + * -2 for unrecognized compression algorithm + * + * note:The size of dst must be greater than or equal to BLCKSZ. + */ +int +decompress_page(const char * src, char *dst, uint8 algorithm) +{ + int decompressed_size; + PageCompressData *pcdptr; + + pcdptr = (PageCompressData *)src; + + memcpy(dst, src, SizeOfPageHeaderData); + + switch(algorithm) + { + case COMPRESS_ALGORITHM_PGLZ: + decompressed_size = pglz_decompress(pcdptr->data, + pcdptr->size, + dst + SizeOfPageHeaderData, + BLCKSZ - SizeOfPageHeaderData, + false); + break; + +#ifdef USE_ZSTD + case COMPRESS_ALGORITHM_ZSTD: + decompressed_size = ZSTD_decompress(dst + SizeOfPageHeaderData, + BLCKSZ - SizeOfPageHeaderData, + pcdptr->data, + pcdptr->size); + + if (ZSTD_isError(decompressed_size)) + { + return -1; + } + + break; +#endif + + default: + return -2; + break; + + } + + return SizeOfPageHeaderData + decompressed_size; +} + + +/** + * pc_mmap() -- create memory map for page compress file's address area. + * + */ +PageCompressHeader * +pc_mmap(int fd, int chunk_size, bool readonly) +{ + PageCompressHeader *map; + int file_size,pc_memory_map_size; + + pc_memory_map_size = SizeofPageCompressAddrFile(chunk_size); + + file_size = lseek(fd, 0, SEEK_END); + if(file_size != pc_memory_map_size) + { + if (ftruncate(fd, pc_memory_map_size) != 0) + return (PageCompressHeader *) MAP_FAILED; + } + +#ifdef WIN32 + { + HANDLE mh; + if(readonly) + mh = CreateSnapshotMapping((HANDLE)_get_osfhandle(fd), NULL, PAGE_READONLY, + 0, (DWORD) pc_memory_map_size, NULL); + else + mh = CreateSnapshotMapping((HANDLE)_get_osfhandle(fd), NULL, PAGE_READWRITE, + 0, (DWORD) pc_memory_map_size, NULL); + + if (mh == NULL) + return (PageCompressHeader *) MAP_FAILED; + + map = (PageCompressHeader *) MapViewOfFile(mh, FILE_MAP_ALL_ACCESS, 0, 0, 0); + CloseHandle(mh); + } + if (map == NULL) + return (PageCompressHeader *) MAP_FAILED; + +#else + if(readonly) + map = (PageCompressHeader *) mmap(NULL, pc_memory_map_size, PROT_READ, MAP_SHARED, fd, 0); + else + map = (PageCompressHeader *) mmap(NULL, pc_memory_map_size, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0); +#endif + return map; +} + +/** + * pc_munmap() -- release memory map of page compress file. + * + */ +int +pc_munmap(PageCompressHeader * map) +{ +#ifdef WIN32 + return UnmapViewOfFile(map) ? 0 : -1; +#else + return munmap(map, SizeofPageCompressAddrFile(map->chunk_size)); +#endif +} + +/** + * pc_msync() -- sync memory map of page compress file. + * + */ +int +pc_msync(PageCompressHeader *map) +{ +#ifndef FRONTEND + if (!enableFsync) + return 0; +#endif + +#ifdef WIN32 + return FlushViewOfFile(map, SizeofPageCompressAddrFile(map->chunk_size)) ? 0 : -1; +#else + return msync(map, SizeofPageCompressAddrFile(map->chunk_size), MS_SYNC); +#endif +} \ No newline at end of file diff --git a/src/include/storage/relfilenode.h b/src/include/storage/relfilenode.h index 4de9fc1e69..008b97755b 100644 --- a/src/include/storage/relfilenode.h +++ b/src/include/storage/relfilenode.h @@ -59,6 +59,10 @@ typedef struct RelFileNode Oid spcNode; /* tablespace */ Oid dbNode; /* database */ Oid relNode; /* relation */ + uint32 compress_chunk_size; /* chunk size of compressed data */ + uint8 compress_algorithm; /* compress algorithm */ + int8 compresslevel; /* compress level */ + uint8 compress_prealloc_chunks; /* prealloced chunks to store compressed data */ } RelFileNode; /* diff --git a/src/include/utils/guc_tables.h b/src/include/utils/guc_tables.h index 04431d0eb2..7dfb4cf7b0 100644 --- a/src/include/utils/guc_tables.h +++ b/src/include/utils/guc_tables.h @@ -96,6 +96,7 @@ enum config_group CLIENT_CONN_PRELOAD, CLIENT_CONN_OTHER, LOCK_MANAGEMENT, + COMPRESS_OPTIONS, COMPAT_OPTIONS, COMPAT_OPTIONS_PREVIOUS, COMPAT_OPTIONS_CLIENT, diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index 0b5957ba02..e36c793fca 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -294,6 +294,23 @@ typedef struct AutoVacOpts float8 analyze_scale_factor; } AutoVacOpts; +/* PageCompressOpts->compresstype values */ +typedef enum compressTypeOption +{ + COMPRESS_TYPE_NONE = 0, + COMPRESS_TYPE_PGLZ = 1, + COMPRESS_TYPE_ZSTD = 2 +} compressTypeOption; + + /* page compress related reloptions. */ +typedef struct PageCompressOpts +{ + compressTypeOption compresstype; /* compress algorithm */ + int compresslevel; /* compress level */ + int compress_chunk_size; /* chunk size of compressed data */ + int compress_prealloc_chunks; /* prealloced chunks to store compressed data */ +} PageCompressOpts; + typedef struct StdRdOptions { int32 vl_len_; /* varlena header (do not touch directly!) */ @@ -305,6 +322,7 @@ typedef struct StdRdOptions int parallel_workers; /* max number of parallel workers */ bool vacuum_index_cleanup; /* enables index vacuuming and cleanup */ bool vacuum_truncate; /* enables vacuum to truncate a relation */ + PageCompressOpts compress; /* page compress related reloptions. */ } StdRdOptions; #define HEAP_MIN_FILLFACTOR 10 diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h index 9a85b7dd57..259ec2a753 100644 --- a/src/include/utils/relcache.h +++ b/src/include/utils/relcache.h @@ -106,7 +106,8 @@ extern Relation RelationBuildLocalRelation(const char *relname, bool shared_relation, bool mapped_relation, char relpersistence, - char relkind); + char relkind, + Datum reloptions); /* * Routines to manage assignment of new relfilenode to a relation diff --git a/src/include/utils/spccache.h b/src/include/utils/spccache.h index 7e4ec69aa2..a33acf02f3 100644 --- a/src/include/utils/spccache.h +++ b/src/include/utils/spccache.h @@ -13,9 +13,12 @@ #ifndef SPCCACHE_H #define SPCCACHE_H +#include "utils/rel.h" + void get_tablespace_page_costs(Oid spcid, float8 *spc_random_page_cost, float8 *spc_seq_page_cost); int get_tablespace_io_concurrency(Oid spcid); int get_tablespace_maintenance_io_concurrency(Oid spcid); +PageCompressOpts *get_tablespace_compression_option(Oid spcid); #endif /* SPCCACHE_H */ diff --git a/src/test/Makefile b/src/test/Makefile index efb206aa75..35ef7ccac8 100644 --- a/src/test/Makefile +++ b/src/test/Makefile @@ -14,6 +14,10 @@ include $(top_builddir)/src/Makefile.global SUBDIRS = perl regress isolation modules authentication recovery subscription +ifeq ($(with_zstd),yes) +SUBDIRS += zstd +endif + # Test suites that are not safe by default but can be run if selected # by the user via the whitespace-separated list in variable # PG_TEST_EXTRA: diff --git a/src/test/README b/src/test/README index b5ccfc0cf6..63ad9903fc 100644 --- a/src/test/README +++ b/src/test/README @@ -45,3 +45,6 @@ subscription/ thread/ A thread-safety-testing utility used by configure + +zstd/ + Tests for page compression with zstd diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl index 9e31a53de7..1e094896de 100644 --- a/src/test/recovery/t/001_stream_rep.pl +++ b/src/test/recovery/t/001_stream_rep.pl @@ -3,7 +3,7 @@ use strict; use warnings; use PostgresNode; use TestLib; -use Test::More tests => 36; +use Test::More tests => 38; # Initialize primary node my $node_primary = get_new_node('primary'); @@ -43,6 +43,9 @@ $node_standby_2->start; $node_primary->safe_psql('postgres', "CREATE TABLE tab_int AS SELECT generate_series(1,1002) AS a"); +$node_primary->safe_psql('postgres', + "CREATE TABLE tab_int_compressed WITH(compresstype=pglz)AS SELECT generate_series(1,1002) AS a"); + # Wait for standbys to catch up $node_primary->wait_for_catchup($node_standby_1, 'replay', $node_primary->lsn('insert')); @@ -54,11 +57,21 @@ my $result = print "standby 1: $result\n"; is($result, qq(1002), 'check streamed content on standby 1'); +my $result = + $node_standby_1->safe_psql('postgres', "SELECT count(*) FROM tab_int_compressed"); +print "standby 1: $result\n"; +is($result, qq(1002), 'check streamed content for compressed table on standby 1'); + $result = $node_standby_2->safe_psql('postgres', "SELECT count(*) FROM tab_int"); print "standby 2: $result\n"; is($result, qq(1002), 'check streamed content on standby 2'); +$result = + $node_standby_2->safe_psql('postgres', "SELECT count(*) FROM tab_int_compressed"); +print "standby 2: $result\n"; +is($result, qq(1002), 'check streamed content for compressed table on standby 2'); + # Check that only READ-only queries can run on standbys is($node_standby_1->psql('postgres', 'INSERT INTO tab_int VALUES (1)'), 3, 'read-only queries on standby 1'); diff --git a/src/test/regress/expected/page_compression.out b/src/test/regress/expected/page_compression.out new file mode 100644 index 0000000000..b877310c95 --- /dev/null +++ b/src/test/regress/expected/page_compression.out @@ -0,0 +1,1292 @@ +-- +-- Page compression tests +-- +-- +-- create compressed table +-- +CREATE TABLE tbl_pc(id int, c1 text) WITH(compresstype=pglz); +\d+ tbl_pc + Table "public.tbl_pc" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | | | plain | | + c1 | text | | | | extended | | +Options: compresstype=pglz + +DROP TABLE tbl_pc; +CREATE TABLE tbl_pc(id int, c1 text) WITH(compresstype=pglz, compress_chunk_size=1024); +\d+ tbl_pc + Table "public.tbl_pc" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | | | plain | | + c1 | text | | | | extended | | +Options: compresstype=pglz, compress_chunk_size=1024 + +DROP TABLE tbl_pc; +CREATE TABLE tbl_pc(id int, c1 text) WITH(compresstype=pglz, compress_chunk_size=2048); +\d+ tbl_pc + Table "public.tbl_pc" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | | | plain | | + c1 | text | | | | extended | | +Options: compresstype=pglz, compress_chunk_size=2048 + +DROP TABLE tbl_pc; +CREATE TABLE tbl_pc(id int, c1 text) WITH(compresstype=pglz, compresslevel=0, compress_chunk_size=4096, compress_prealloc_chunks=0); +\d+ tbl_pc + Table "public.tbl_pc" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | | | plain | | + c1 | text | | | | extended | | +Options: compresstype=pglz, compresslevel=0, compress_chunk_size=4096, compress_prealloc_chunks=0 + +DROP TABLE tbl_pc; +CREATE TABLE tbl_pc(id int PRIMARY KEY, c1 text) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +\d+ tbl_pc + Table "public.tbl_pc" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | not null | | plain | | + c1 | text | | | | extended | | +Indexes: + "tbl_pc_pkey" PRIMARY KEY, btree (id) +Options: compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2 + +DROP TABLE tbl_pc; +CREATE TABLE tbl_pc(id int PRIMARY KEY, c1 text) WITH(compresstype=pglz, compresslevel=-1, compress_chunk_size=1024, compress_prealloc_chunks=7); +\d+ tbl_pc + Table "public.tbl_pc" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | not null | | plain | | + c1 | text | | | | extended | | +Indexes: + "tbl_pc_pkey" PRIMARY KEY, btree (id) +Options: compresstype=pglz, compresslevel=-1, compress_chunk_size=1024, compress_prealloc_chunks=7 + +DROP TABLE tbl_pc; +CREATE TABLE tbl_pc WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=7) + AS SELECT id, id::text c1 FROM generate_series(1,1000)id; +\d+ tbl_pc + Table "public.tbl_pc" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | | | plain | | + c1 | text | | | | extended | | +Options: compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=7 + +DROP TABLE tbl_pc; +CREATE TABLE tbl_pc(id int PRIMARY KEY WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2), c1 text) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +CREATE TABLE tbl_pc2(LIKE tbl_pc INCLUDING ALL); +\d+ tbl_pc + Table "public.tbl_pc" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | not null | | plain | | + c1 | text | | | | extended | | +Indexes: + "tbl_pc_pkey" PRIMARY KEY, btree (id) WITH (compresstype=pglz, compresslevel='1', compress_chunk_size='1024', compress_prealloc_chunks='2') +Options: compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2 + +DROP TABLE tbl_pc; +DROP TABLE tbl_pc2; +CREATE TABLE tbl_pc(id int PRIMARY KEY, c1 text) WITH(compresstype=none); +\d+ tbl_pc + Table "public.tbl_pc" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | not null | | plain | | + c1 | text | | | | extended | | +Indexes: + "tbl_pc_pkey" PRIMARY KEY, btree (id) +Options: compresstype=none + +DROP TABLE tbl_pc; +-- invalid storage parameter +CREATE TABLE tbl_pc_error(id int, c1 text) WITH(compresstype=xyz); -- fail +ERROR: invalid value for enum option "compresstype": xyz +DETAIL: Valid values are "none", "pglz" and "zstd". +CREATE TABLE tbl_pc_error(id int, c1 text) WITH(compresstype=pglz, compresslevel=xyz); -- fail +ERROR: invalid value for integer option "compresslevel": xyz +CREATE TABLE tbl_pc_error(id int, c1 text) WITH(compresstype=pglz, compress_chunk_size=1025); -- fail +ERROR: invalid compress_chunk_size 1025 , must be one of 1024, 2048 or 4096 for tbl_pc_error +CREATE TABLE tbl_pc_error(id int, c1 text) WITH(compresstype=pglz, compress_prealloc_chunks=8); -- fail +ERROR: value 8 out of bounds for option "compress_prealloc_chunks" +DETAIL: Valid values are between "0" and "7". +-- +-- create compressed index +-- +SET enable_seqscan = OFF; +CREATE TABLE tbl_pc(id int PRIMARY KEY WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2), c1 text) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +CREATE INDEX tbl_pc_idx1 on tbl_pc(c1) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +\d+ tbl_pc + Table "public.tbl_pc" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | not null | | plain | | + c1 | text | | | | extended | | +Indexes: + "tbl_pc_pkey" PRIMARY KEY, btree (id) WITH (compresstype=pglz, compresslevel='1', compress_chunk_size='1024', compress_prealloc_chunks='2') + "tbl_pc_idx1" btree (c1) WITH (compresstype=pglz, compresslevel='1', compress_chunk_size='1024', compress_prealloc_chunks='2') +Options: compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2 + +INSERT INTO tbl_pc SELECT id, id::text FROM generate_series(1,1000)id; +-- call CHECKPOINT to flush shared buffer to compressed relation file +CHECKPOINT; +-- run ANALYZE REINDEX VACUUM and CLUSTER on compressed table and index +ANALYZE tbl_pc; +SELECT count(*) FROM tbl_pc; + count +------- + 1000 +(1 row) + +SELECT * FROM tbl_pc WHERE c1='100'; + id | c1 +-----+----- + 100 | 100 +(1 row) + +EXPLAIN(COSTS off) SELECT * FROM tbl_pc WHERE c1='100'; + QUERY PLAN +---------------------------------------- + Index Scan using tbl_pc_idx1 on tbl_pc + Index Cond: (c1 = '100'::text) +(2 rows) + +REINDEX INDEX tbl_pc_idx1; +CHECKPOINT; +SELECT * FROM tbl_pc WHERE c1='100'; + id | c1 +-----+----- + 100 | 100 +(1 row) + +REINDEX TABLE tbl_pc; +CHECKPOINT; +SELECT * FROM tbl_pc WHERE c1='100'; + id | c1 +-----+----- + 100 | 100 +(1 row) + +VACUUM tbl_pc; +CHECKPOINT; +SELECT count(*) FROM tbl_pc; + count +------- + 1000 +(1 row) + +SELECT * FROM tbl_pc WHERE c1='100'; + id | c1 +-----+----- + 100 | 100 +(1 row) + +VACUUM FULL tbl_pc; +CHECKPOINT; +SELECT count(*) FROM tbl_pc; + count +------- + 1000 +(1 row) + +SELECT * FROM tbl_pc WHERE c1='100'; + id | c1 +-----+----- + 100 | 100 +(1 row) + +CLUSTER tbl_pc USING tbl_pc_idx1; +CHECKPOINT; +SELECT count(*) FROM tbl_pc; + count +------- + 1000 +(1 row) + +SELECT * FROM tbl_pc WHERE c1='100'; + id | c1 +-----+----- + 100 | 100 +(1 row) + +DROP INDEX tbl_pc_idx1; +-- check usage of compressed index with data +CREATE INDEX tbl_pc_idx1 on tbl_pc USING hash(c1) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +CHECKPOINT; +SELECT * FROM tbl_pc WHERE c1='100'; + id | c1 +-----+----- + 100 | 100 +(1 row) + +EXPLAIN(COSTS off) SELECT * FROM tbl_pc WHERE c1='100'; + QUERY PLAN +---------------------------------------- + Index Scan using tbl_pc_idx1 on tbl_pc + Index Cond: (c1 = '100'::text) +(2 rows) + +DROP INDEX tbl_pc_idx1; +CREATE INDEX tbl_pc_idx1 on tbl_pc USING gin((ARRAY[id])) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +CHECKPOINT; +SELECT * FROM tbl_pc WHERE ARRAY[id] @> ARRAY[100]; + id | c1 +-----+----- + 100 | 100 +(1 row) + +EXPLAIN(COSTS off) SELECT * FROM tbl_pc WHERE ARRAY[id] @> ARRAY[100]; + QUERY PLAN +------------------------------------------------------- + Bitmap Heap Scan on tbl_pc + Recheck Cond: (ARRAY[id] @> '{100}'::integer[]) + -> Bitmap Index Scan on tbl_pc_idx1 + Index Cond: (ARRAY[id] @> '{100}'::integer[]) +(4 rows) + +DROP INDEX tbl_pc_idx1; +CREATE INDEX tbl_pc_idx1 on tbl_pc USING gist((point(id,id))) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +CHECKPOINT; +SELECT * FROM tbl_pc ORDER BY point(id,id) <-> point(100,100) limit 1; + id | c1 +-----+----- + 100 | 100 +(1 row) + +EXPLAIN(COSTS off) SELECT * FROM tbl_pc ORDER BY point(id,id) <-> point(100,100) limit 1; + QUERY PLAN +-------------------------------------------------------------------------------------------------- + Limit + -> Index Scan using tbl_pc_idx1 on tbl_pc + Order By: (point((id)::double precision, (id)::double precision) <-> '(100,100)'::point) +(3 rows) + +DROP INDEX tbl_pc_idx1; +CREATE INDEX tbl_pc_idx1 on tbl_pc USING spgist(c1) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +CHECKPOINT; +SELECT * FROM tbl_pc WHERE c1='100'; + id | c1 +-----+----- + 100 | 100 +(1 row) + +EXPLAIN(COSTS off) SELECT * FROM tbl_pc WHERE c1='100'; + QUERY PLAN +---------------------------------------- + Index Scan using tbl_pc_idx1 on tbl_pc + Index Cond: (c1 = '100'::text) +(2 rows) + +DROP INDEX tbl_pc_idx1; +-- brin index does not support compression +CREATE INDEX tbl_pc_idx1 on tbl_pc USING brin(c1) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); -- fail +ERROR: unrecognized parameter "compresstype" +DROP TABLE tbl_pc; +RESET enable_seqscan; +-- +-- alter table and index +-- +-- ALTER TABLE +-- ALTER compresstype and compress_chunk_size currently is not supported +CREATE TABLE tbl_pc(id int, c1 text); +ALTER TABLE tbl_pc SET(compresstype=pglz); -- fail +ERROR: change compresstype OPTION is not supported +DROP TABLE tbl_pc; +CREATE TABLE tbl_pc(id int PRIMARY KEY, c1 text) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +ALTER TABLE tbl_pc SET(compresstype=none); -- fail +ERROR: change compresstype OPTION is not supported +ALTER TABLE tbl_pc SET(compress_chunk_size=2048); -- fail +ERROR: change compress_chunk_size OPTION is not supported +ALTER TABLE tbl_pc SET(compress_prealloc_chunks=8); -- fail +ERROR: value 8 out of bounds for option "compress_prealloc_chunks" +DETAIL: Valid values are between "0" and "7". +ALTER TABLE tbl_pc SET(compresslevel=2, compress_prealloc_chunks=0); -- ok +\d+ tbl_pc + Table "public.tbl_pc" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | not null | | plain | | + c1 | text | | | | extended | | +Indexes: + "tbl_pc_pkey" PRIMARY KEY, btree (id) +Options: compresstype=pglz, compress_chunk_size=1024, compresslevel=2, compress_prealloc_chunks=0 + +ALTER TABLE tbl_pc RESET(compresstype); -- fail +ERROR: change compresstype OPTION is not supported +ALTER TABLE tbl_pc RESET(compress_chunk_size); -- fail +ERROR: change compress_chunk_size OPTION is not supported +ALTER TABLE tbl_pc RESET(compresslevel); -- ok +ALTER TABLE tbl_pc RESET(compress_prealloc_chunks); -- ok +\d+ tbl_pc + Table "public.tbl_pc" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | not null | | plain | | + c1 | text | | | | extended | | +Indexes: + "tbl_pc_pkey" PRIMARY KEY, btree (id) +Options: compresstype=pglz, compress_chunk_size=1024 + +CHECKPOINT; +SELECT count(*) FROM tbl_pc; + count +------- + 0 +(1 row) + +-- ALTER INDEX +-- ALTER compresstype and compress_chunk_size currently is not supported +CREATE INDEX tbl_pc_idx1 on tbl_pc USING btree(c1); +ALTER INDEX tbl_pc_idx1 SET(compresstype=pglz); -- fail +ERROR: change compresstype OPTION is not supported +DROP INDEX tbl_pc_idx1; +CREATE INDEX tbl_pc_idx1 on tbl_pc USING btree(c1) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +ALTER INDEX tbl_pc_idx1 SET(compresstype=none); -- fail +ERROR: change compresstype OPTION is not supported +ALTER INDEX tbl_pc_idx1 SET(compress_chunk_size=2048); -- fail +ERROR: change compress_chunk_size OPTION is not supported +ALTER INDEX tbl_pc_idx1 SET(compress_prealloc_chunks=8); -- fail +ERROR: value 8 out of bounds for option "compress_prealloc_chunks" +DETAIL: Valid values are between "0" and "7". +ALTER INDEX tbl_pc_idx1 SET(compresslevel=2, compress_prealloc_chunks=0); -- ok +\d+ tbl_pc + Table "public.tbl_pc" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | not null | | plain | | + c1 | text | | | | extended | | +Indexes: + "tbl_pc_pkey" PRIMARY KEY, btree (id) + "tbl_pc_idx1" btree (c1) WITH (compresstype=pglz, compress_chunk_size='1024', compresslevel='2', compress_prealloc_chunks='0') +Options: compresstype=pglz, compress_chunk_size=1024 + +ALTER INDEX tbl_pc_idx1 RESET(compresstype); -- fail +ERROR: change compresstype OPTION is not supported +ALTER INDEX tbl_pc_idx1 RESET(compress_chunk_size); -- fail +ERROR: change compress_chunk_size OPTION is not supported +ALTER INDEX tbl_pc_idx1 RESET(compresslevel); -- ok +ALTER INDEX tbl_pc_idx1 RESET(compress_prealloc_chunks); -- ok +\d+ tbl_pc + Table "public.tbl_pc" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | not null | | plain | | + c1 | text | | | | extended | | +Indexes: + "tbl_pc_pkey" PRIMARY KEY, btree (id) + "tbl_pc_idx1" btree (c1) WITH (compresstype=pglz, compress_chunk_size='1024') +Options: compresstype=pglz, compress_chunk_size=1024 + +CHECKPOINT; +SELECT * FROM tbl_pc WHERE c1='100'; + id | c1 +----+---- +(0 rows) + +EXPLAIN(COSTS off) SELECT * FROM tbl_pc WHERE c1='100'; + QUERY PLAN +---------------------------------------- + Bitmap Heap Scan on tbl_pc + Recheck Cond: (c1 = '100'::text) + -> Bitmap Index Scan on tbl_pc_idx1 + Index Cond: (c1 = '100'::text) +(4 rows) + +-- alter hash index +CREATE INDEX tbl_pc_idx_hash on tbl_pc USING hash(c1) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +ALTER INDEX tbl_pc_idx_hash SET(compresstype=none); -- fail +ERROR: change compresstype OPTION is not supported +ALTER INDEX tbl_pc_idx_hash SET(compress_chunk_size=2048); -- fail +ERROR: change compress_chunk_size OPTION is not supported +ALTER INDEX tbl_pc_idx_hash SET(compress_prealloc_chunks=8); -- fail +ERROR: value 8 out of bounds for option "compress_prealloc_chunks" +DETAIL: Valid values are between "0" and "7". +ALTER INDEX tbl_pc_idx_hash SET(compresslevel=2, compress_prealloc_chunks=0); -- ok +\d+ tbl_pc_idx_hash + Index "public.tbl_pc_idx_hash" + Column | Type | Key? | Definition | Storage | Stats target +--------+---------+------+------------+---------+-------------- + c1 | integer | yes | c1 | plain | +hash, for table "public.tbl_pc" +Options: compresstype=pglz, compress_chunk_size=1024, compresslevel=2, compress_prealloc_chunks=0 + +ALTER INDEX tbl_pc_idx_hash RESET(compresstype); -- fail +ERROR: change compresstype OPTION is not supported +ALTER INDEX tbl_pc_idx_hash RESET(compress_chunk_size); -- fail +ERROR: change compress_chunk_size OPTION is not supported +ALTER INDEX tbl_pc_idx_hash RESET(compresslevel); -- ok +ALTER INDEX tbl_pc_idx_hash RESET(compress_prealloc_chunks); -- ok +\d+ tbl_pc_idx_hash + Index "public.tbl_pc_idx_hash" + Column | Type | Key? | Definition | Storage | Stats target +--------+---------+------+------------+---------+-------------- + c1 | integer | yes | c1 | plain | +hash, for table "public.tbl_pc" +Options: compresstype=pglz, compress_chunk_size=1024 + +-- alter gin index +CREATE INDEX tbl_pc_idx_gin on tbl_pc USING gin((ARRAY[id])) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +ALTER INDEX tbl_pc_idx_gin SET(compresstype=none); -- fail +ERROR: change compresstype OPTION is not supported +ALTER INDEX tbl_pc_idx_gin SET(compress_chunk_size=2048); -- fail +ERROR: change compress_chunk_size OPTION is not supported +ALTER INDEX tbl_pc_idx_gin SET(compress_prealloc_chunks=8); -- fail +ERROR: value 8 out of bounds for option "compress_prealloc_chunks" +DETAIL: Valid values are between "0" and "7". +ALTER INDEX tbl_pc_idx_gin SET(compresslevel=2, compress_prealloc_chunks=0); -- ok +\d+ tbl_pc_idx_gin + Index "public.tbl_pc_idx_gin" + Column | Type | Key? | Definition | Storage | Stats target +--------+---------+------+-------------+---------+-------------- + array | integer | yes | (ARRAY[id]) | plain | +gin, for table "public.tbl_pc" +Options: compresstype=pglz, compress_chunk_size=1024, compresslevel=2, compress_prealloc_chunks=0 + +ALTER INDEX tbl_pc_idx_gin RESET(compresstype); -- fail +ERROR: change compresstype OPTION is not supported +ALTER INDEX tbl_pc_idx_gin RESET(compress_chunk_size); -- fail +ERROR: change compress_chunk_size OPTION is not supported +ALTER INDEX tbl_pc_idx_gin RESET(compresslevel); -- ok +ALTER INDEX tbl_pc_idx_gin RESET(compress_prealloc_chunks); -- ok +\d+ tbl_pc_idx_gin + Index "public.tbl_pc_idx_gin" + Column | Type | Key? | Definition | Storage | Stats target +--------+---------+------+-------------+---------+-------------- + array | integer | yes | (ARRAY[id]) | plain | +gin, for table "public.tbl_pc" +Options: compresstype=pglz, compress_chunk_size=1024 + +-- alter gist index +CREATE INDEX tbl_pc_idx_gist on tbl_pc USING gist((point(id,id))) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +ALTER INDEX tbl_pc_idx_gist SET(compresstype=none); -- fail +ERROR: change compresstype OPTION is not supported +ALTER INDEX tbl_pc_idx_gist SET(compress_chunk_size=2048); -- fail +ERROR: change compress_chunk_size OPTION is not supported +ALTER INDEX tbl_pc_idx_gist SET(compress_prealloc_chunks=8); -- fail +ERROR: value 8 out of bounds for option "compress_prealloc_chunks" +DETAIL: Valid values are between "0" and "7". +ALTER INDEX tbl_pc_idx_gist SET(compresslevel=2, compress_prealloc_chunks=0); -- ok +\d+ tbl_pc_idx_gist + Index "public.tbl_pc_idx_gist" + Column | Type | Key? | Definition | Storage | Stats target +--------+------+------+---------------------------------------------------+---------+-------------- + point | box | yes | point(id::double precision, id::double precision) | plain | +gist, for table "public.tbl_pc" +Options: compresstype=pglz, compress_chunk_size=1024, compresslevel=2, compress_prealloc_chunks=0 + +ALTER INDEX tbl_pc_idx_gist RESET(compresstype); -- fail +ERROR: change compresstype OPTION is not supported +ALTER INDEX tbl_pc_idx_gist RESET(compress_chunk_size); -- fail +ERROR: change compress_chunk_size OPTION is not supported +ALTER INDEX tbl_pc_idx_gist RESET(compresslevel); -- ok +ALTER INDEX tbl_pc_idx_gist RESET(compress_prealloc_chunks); -- ok +\d+ tbl_pc_idx_gist + Index "public.tbl_pc_idx_gist" + Column | Type | Key? | Definition | Storage | Stats target +--------+------+------+---------------------------------------------------+---------+-------------- + point | box | yes | point(id::double precision, id::double precision) | plain | +gist, for table "public.tbl_pc" +Options: compresstype=pglz, compress_chunk_size=1024 + +-- alter spgist index +CREATE INDEX tbl_pc_idx_spgist on tbl_pc USING spgist(c1) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +ALTER INDEX tbl_pc_idx_spgist SET(compresstype=none); -- fail +ERROR: change compresstype OPTION is not supported +ALTER INDEX tbl_pc_idx_spgist SET(compress_chunk_size=2048); -- fail +ERROR: change compress_chunk_size OPTION is not supported +ALTER INDEX tbl_pc_idx_spgist SET(compress_prealloc_chunks=8); -- fail +ERROR: value 8 out of bounds for option "compress_prealloc_chunks" +DETAIL: Valid values are between "0" and "7". +ALTER INDEX tbl_pc_idx_spgist SET(compresslevel=2, compress_prealloc_chunks=0); -- ok +\d+ tbl_pc_idx_spgist + Index "public.tbl_pc_idx_spgist" + Column | Type | Key? | Definition | Storage | Stats target +--------+------+------+------------+----------+-------------- + c1 | text | yes | c1 | extended | +spgist, for table "public.tbl_pc" +Options: compresstype=pglz, compress_chunk_size=1024, compresslevel=2, compress_prealloc_chunks=0 + +ALTER INDEX tbl_pc_idx_spgist RESET(compresstype); -- fail +ERROR: change compresstype OPTION is not supported +ALTER INDEX tbl_pc_idx_spgist RESET(compress_chunk_size); -- fail +ERROR: change compress_chunk_size OPTION is not supported +ALTER INDEX tbl_pc_idx_spgist RESET(compresslevel); -- ok +ALTER INDEX tbl_pc_idx_spgist RESET(compress_prealloc_chunks); -- ok +\d+ tbl_pc_idx_spgist + Index "public.tbl_pc_idx_spgist" + Column | Type | Key? | Definition | Storage | Stats target +--------+------+------+------------+----------+-------------- + c1 | text | yes | c1 | extended | +spgist, for table "public.tbl_pc" +Options: compresstype=pglz, compress_chunk_size=1024 + +-- alter brin index (do not support compression) +CREATE INDEX tbl_pc_idx_brin on tbl_pc USING brin(c1); +ALTER INDEX tbl_pc_idx_brin SET(compress_prealloc_chunks=3); -- fail +ERROR: unrecognized parameter "compress_prealloc_chunks" +DROP TABLE tbl_pc; +-- +-- partitioned table and index +-- +-- partition table does not support compression, but index of partition table and its child tables can use compression +CREATE TABLE tbl_pc_part (id int, c1 text) PARTITION BY RANGE (id) WITH(compresstype=pglz); -- fail +ERROR: unrecognized parameter "compresstype" +CREATE TABLE tbl_pc_part (id int, c1 text) PARTITION BY RANGE (id); +CREATE TABLE tbl_pc_part_1 PARTITION OF tbl_pc_part FOR VALUES FROM (1) TO (1001); +CREATE TABLE tbl_pc_part_2 PARTITION OF tbl_pc_part FOR VALUES FROM (1001) TO (2001) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +CREATE INDEX part_id_idx ON tbl_pc_part(id) WITH(compresstype=pglz, compresslevel=2, compress_chunk_size=1024, compress_prealloc_chunks=2); +CREATE TABLE tbl_pc_part_3 PARTITION OF tbl_pc_part FOR VALUES FROM (2001) TO (3001); +CREATE INDEX part3_id_idx1 ON tbl_pc_part_3(id) WITH(compresstype=pglz, compresslevel=2, compress_chunk_size=1024, compress_prealloc_chunks=2); +\d+ tbl_pc_part + Partitioned table "public.tbl_pc_part" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | | | plain | | + c1 | text | | | | extended | | +Partition key: RANGE (id) +Indexes: + "part_id_idx" btree (id) WITH (compresstype=pglz, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') +Partitions: tbl_pc_part_1 FOR VALUES FROM (1) TO (1001), + tbl_pc_part_2 FOR VALUES FROM (1001) TO (2001), + tbl_pc_part_3 FOR VALUES FROM (2001) TO (3001) + +\d+ part_id_idx + Partitioned index "public.part_id_idx" + Column | Type | Key? | Definition | Storage | Stats target +--------+---------+------+------------+---------+-------------- + id | integer | yes | id | plain | +btree, for table "public.tbl_pc_part" +Partitions: tbl_pc_part_1_id_idx, + tbl_pc_part_2_id_idx, + tbl_pc_part_3_id_idx +Options: compresstype=pglz, compresslevel=2, compress_chunk_size=1024, compress_prealloc_chunks=2 + +\d+ tbl_pc_part_1 + Table "public.tbl_pc_part_1" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | | | plain | | + c1 | text | | | | extended | | +Partition of: tbl_pc_part FOR VALUES FROM (1) TO (1001) +Partition constraint: ((id IS NOT NULL) AND (id >= 1) AND (id < 1001)) +Indexes: + "tbl_pc_part_1_id_idx" btree (id) WITH (compresstype=pglz, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') + +\d+ tbl_pc_part_2 + Table "public.tbl_pc_part_2" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | | | plain | | + c1 | text | | | | extended | | +Partition of: tbl_pc_part FOR VALUES FROM (1001) TO (2001) +Partition constraint: ((id IS NOT NULL) AND (id >= 1001) AND (id < 2001)) +Indexes: + "tbl_pc_part_2_id_idx" btree (id) WITH (compresstype=pglz, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') +Options: compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2 + +\d+ tbl_pc_part_3 + Table "public.tbl_pc_part_3" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | | | plain | | + c1 | text | | | | extended | | +Partition of: tbl_pc_part FOR VALUES FROM (2001) TO (3001) +Partition constraint: ((id IS NOT NULL) AND (id >= 2001) AND (id < 3001)) +Indexes: + "part3_id_idx1" btree (id) WITH (compresstype=pglz, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') + "tbl_pc_part_3_id_idx" btree (id) WITH (compresstype=pglz, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') + +INSERT INTO tbl_pc_part SELECT id, id::text FROM generate_series(1,3000)id; +CHECKPOINT; +SELECT count(*) FROM tbl_pc_part; + count +------- + 3000 +(1 row) + +SELECT * FROM tbl_pc_part WHERE id=100; + id | c1 +-----+----- + 100 | 100 +(1 row) + +SELECT * FROM tbl_pc_part WHERE id=1100; + id | c1 +------+------ + 1100 | 1100 +(1 row) + +SELECT * FROM tbl_pc_part WHERE id=2100; + id | c1 +------+------ + 2100 | 2100 +(1 row) + +ALTER TABLE tbl_pc_part SET(compresstype=pglz); -- fail +ERROR: unrecognized parameter "compresstype" +ALTER TABLE tbl_pc_part_1 SET(compresstype=pglz); -- fail +ERROR: change compresstype OPTION is not supported +ALTER TABLE tbl_pc_part_2 SET(compresstype=none); -- fail +ERROR: change compresstype OPTION is not supported +ALTER TABLE tbl_pc_part_2 SET(compress_chunk_size=2048); -- fail +ERROR: change compress_chunk_size OPTION is not supported +ALTER TABLE tbl_pc_part_2 SET(compress_prealloc_chunks=8); -- fail +ERROR: value 8 out of bounds for option "compress_prealloc_chunks" +DETAIL: Valid values are between "0" and "7". +ALTER TABLE tbl_pc_part_2 SET(compresslevel=2, compress_prealloc_chunks=0); -- ok +\d+ tbl_pc_part_2 + Table "public.tbl_pc_part_2" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | | | plain | | + c1 | text | | | | extended | | +Partition of: tbl_pc_part FOR VALUES FROM (1001) TO (2001) +Partition constraint: ((id IS NOT NULL) AND (id >= 1001) AND (id < 2001)) +Indexes: + "tbl_pc_part_2_id_idx" btree (id) WITH (compresstype=pglz, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') +Options: compresstype=pglz, compress_chunk_size=1024, compresslevel=2, compress_prealloc_chunks=0 + +ALTER INDEX part3_id_idx1 SET(compresstype=none); -- fail +ERROR: change compresstype OPTION is not supported +ALTER INDEX part3_id_idx1 SET(compress_chunk_size=2048); -- fail +ERROR: change compress_chunk_size OPTION is not supported +ALTER INDEX part3_id_idx1 SET(compress_prealloc_chunks=8); -- fail +ERROR: value 8 out of bounds for option "compress_prealloc_chunks" +DETAIL: Valid values are between "0" and "7". +ALTER INDEX part3_id_idx1 SET(compresslevel=2, compress_prealloc_chunks=0); -- ok +\d+ part3_id_idx1 + Index "public.part3_id_idx1" + Column | Type | Key? | Definition | Storage | Stats target +--------+---------+------+------------+---------+-------------- + id | integer | yes | id | plain | +btree, for table "public.tbl_pc_part_3" +Options: compresstype=pglz, compress_chunk_size=1024, compresslevel=2, compress_prealloc_chunks=0 + +ALTER INDEX part_id_idx SET(compresstype=pglz); -- fail +ERROR: "part_id_idx" is not a table, view, materialized view, or index +ALTER INDEX part_id_idx SET(compress_chunk_size=2048); -- fail +ERROR: "part_id_idx" is not a table, view, materialized view, or index +ALTER INDEX part_id_idx SET(compress_prealloc_chunks=8); -- fail +ERROR: "part_id_idx" is not a table, view, materialized view, or index +ALTER INDEX part_id_idx SET(compresslevel=2, compress_prealloc_chunks=0); -- fail +ERROR: "part_id_idx" is not a table, view, materialized view, or index +\d+ tbl_pc_part + Partitioned table "public.tbl_pc_part" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | | | plain | | + c1 | text | | | | extended | | +Partition key: RANGE (id) +Indexes: + "part_id_idx" btree (id) WITH (compresstype=pglz, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') +Partitions: tbl_pc_part_1 FOR VALUES FROM (1) TO (1001), + tbl_pc_part_2 FOR VALUES FROM (1001) TO (2001), + tbl_pc_part_3 FOR VALUES FROM (2001) TO (3001) + +INSERT INTO tbl_pc_part SELECT id, id::text FROM generate_series(1,3000)id; +CHECKPOINT; +SELECT count(*) FROM tbl_pc_part; + count +------- + 6000 +(1 row) + +SELECT * FROM tbl_pc_part WHERE id=100; + id | c1 +-----+----- + 100 | 100 + 100 | 100 +(2 rows) + +SELECT * FROM tbl_pc_part WHERE id=1100; + id | c1 +------+------ + 1100 | 1100 + 1100 | 1100 +(2 rows) + +SELECT * FROM tbl_pc_part WHERE id=2100; + id | c1 +------+------ + 2100 | 2100 + 2100 | 2100 +(2 rows) + +DROP TABLE tbl_pc_part; +-- +-- default tablespace store parameter +-- +-- can not use compression on global tablespace +ALTER TABLESPACE pg_default SET(default_compresstype=xxx); -- fail +ERROR: invalid value for enum option "default_compresstype": xxx +DETAIL: Valid values are "none", "pglz" and "zstd". +ALTER TABLESPACE pg_default SET(default_compress_chunk_size=1023); -- fail +ERROR: value 1023 out of bounds for option "default_compress_chunk_size" +DETAIL: Valid values are between "1024" and "4096". +ALTER TABLESPACE pg_default SET(default_compress_chunk_size=4097); -- fail +ERROR: value 4097 out of bounds for option "default_compress_chunk_size" +DETAIL: Valid values are between "1024" and "4096". +ALTER TABLESPACE pg_default SET(default_compress_prealloc_chunks=-1); -- fail +ERROR: value -1 out of bounds for option "default_compress_prealloc_chunks" +DETAIL: Valid values are between "0" and "7". +ALTER TABLESPACE pg_default SET(default_compress_prealloc_chunks=8); -- fail +ERROR: value 8 out of bounds for option "default_compress_prealloc_chunks" +DETAIL: Valid values are between "0" and "7". +ALTER TABLESPACE pg_default SET(default_compresstype=pglz, default_compresslevel=2, default_compress_chunk_size=1024, default_compress_prealloc_chunks=2); + -- ok +-- table and index(btree,hash,gin,gist,spgist) inherit default compression options from it's tablespace +CREATE TABLE tbl_pc(id int PRIMARY KEY, c1 text); +CREATE INDEX tbl_pc_idx_btree on tbl_pc(c1); +CREATE INDEX tbl_pc_idx_hash on tbl_pc USING hash(c1); +CREATE INDEX tbl_pc_idx_gin on tbl_pc USING gin((ARRAY[id])); +CREATE INDEX tbl_pc_idx_gist on tbl_pc USING gist((point(id,id))); +CREATE INDEX tbl_pc_idx_spgist on tbl_pc USING spgist(c1); +CREATE INDEX tbl_pc_idx_brin on tbl_pc USING brin(c1); +\d+ tbl_pc + Table "public.tbl_pc" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | not null | | plain | | + c1 | text | | | | extended | | +Indexes: + "tbl_pc_pkey" PRIMARY KEY, btree (id) WITH (compresstype=pglz, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') + "tbl_pc_idx_brin" brin (c1) + "tbl_pc_idx_btree" btree (c1) WITH (compresstype=pglz, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') + "tbl_pc_idx_gin" gin ((ARRAY[id])) WITH (compresstype=pglz, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') + "tbl_pc_idx_gist" gist (point(id::double precision, id::double precision)) WITH (compresstype=pglz, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') + "tbl_pc_idx_hash" hash (c1) WITH (compresstype=pglz, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') + "tbl_pc_idx_spgist" spgist (c1) WITH (compresstype=pglz, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') +Options: compresstype=pglz, compresslevel=2, compress_chunk_size=1024, compress_prealloc_chunks=2 + +SELECT size FROM pg_stat_file(pg_relation_filepath('tbl_pc') || '_pca'); + size +--------- + 4794000 +(1 row) + +SELECT size FROM pg_stat_file(pg_relation_filepath('tbl_pc_idx_btree') || '_pca'); + size +--------- + 4794000 +(1 row) + +SELECT size FROM pg_stat_file(pg_relation_filepath('tbl_pc_idx_hash') || '_pca'); + size +--------- + 4794000 +(1 row) + +SELECT size FROM pg_stat_file(pg_relation_filepath('tbl_pc_idx_gin') || '_pca'); + size +--------- + 4794000 +(1 row) + +SELECT size FROM pg_stat_file(pg_relation_filepath('tbl_pc_idx_gist') || '_pca'); + size +--------- + 4794000 +(1 row) + +SELECT size FROM pg_stat_file(pg_relation_filepath('tbl_pc_idx_spgist') || '_pca'); + size +--------- + 4794000 +(1 row) + +SELECT size FROM pg_stat_file(pg_relation_filepath('tbl_pc_idx_brin') || '_pca', true); + size +------ + +(1 row) + +-- toast relation will not be compressed +SELECT reltoastrelid FROM pg_class WHERE oid='tbl_pc'::regclass \gset +SELECT reloptions FROM pg_class where oid=:reltoastrelid; + reloptions +------------ + +(1 row) + +SELECT size FROM pg_stat_file(pg_relation_filepath(:reltoastrelid) || '_pca', true); + size +------ + +(1 row) + +CREATE TABLE tbl_pc1 AS SELECT * FROM tbl_pc; +\d+ tbl_pc1 + Table "public.tbl_pc1" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | | | plain | | + c1 | text | | | | extended | | +Options: compresstype=pglz, compresslevel=2, compress_chunk_size=1024, compress_prealloc_chunks=2 + +ALTER TABLESPACE pg_default RESET(default_compresstype, default_compresslevel, default_compress_chunk_size, default_compress_prealloc_chunks); +CREATE INDEX tbl_pc_idx2 on tbl_pc(c1); +\d+ tbl_pc + Table "public.tbl_pc" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | not null | | plain | | + c1 | text | | | | extended | | +Indexes: + "tbl_pc_pkey" PRIMARY KEY, btree (id) WITH (compresstype=pglz, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') + "tbl_pc_idx2" btree (c1) + "tbl_pc_idx_brin" brin (c1) + "tbl_pc_idx_btree" btree (c1) WITH (compresstype=pglz, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') + "tbl_pc_idx_gin" gin ((ARRAY[id])) WITH (compresstype=pglz, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') + "tbl_pc_idx_gist" gist (point(id::double precision, id::double precision)) WITH (compresstype=pglz, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') + "tbl_pc_idx_hash" hash (c1) WITH (compresstype=pglz, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') + "tbl_pc_idx_spgist" spgist (c1) WITH (compresstype=pglz, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') +Options: compresstype=pglz, compresslevel=2, compress_chunk_size=1024, compress_prealloc_chunks=2 + +CREATE TABLE tbl_pc2(LIKE tbl_pc); +\d+ tbl_pc2 + Table "public.tbl_pc2" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | not null | | plain | | + c1 | text | | | | extended | | + +CREATE TABLE tbl_pc3(LIKE tbl_pc INCLUDING ALL); +\d+ tbl_pc3 + Table "public.tbl_pc3" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | not null | | plain | | + c1 | text | | | | extended | | +Indexes: + "tbl_pc3_pkey" PRIMARY KEY, btree (id) WITH (compresstype=pglz, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') + "tbl_pc3_array_idx" gin ((ARRAY[id])) WITH (compresstype=pglz, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') + "tbl_pc3_c1_idx" btree (c1) WITH (compresstype=pglz, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') + "tbl_pc3_c1_idx1" hash (c1) WITH (compresstype=pglz, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') + "tbl_pc3_c1_idx2" spgist (c1) WITH (compresstype=pglz, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') + "tbl_pc3_c1_idx3" brin (c1) + "tbl_pc3_c1_idx4" btree (c1) + "tbl_pc3_point_idx" gist (point(id::double precision, id::double precision)) WITH (compresstype=pglz, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') + +CREATE TABLE tbl_pc4 AS SELECT * FROM tbl_pc; +\d+ tbl_pc4 + Table "public.tbl_pc4" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | | | plain | | + c1 | text | | | | extended | | + +DROP TABLE tbl_pc; +DROP TABLE tbl_pc1; +DROP TABLE tbl_pc2; +DROP TABLE tbl_pc3; +DROP TABLE tbl_pc4; +ALTER TABLESPACE pg_default SET(default_compresstype=pglz, default_compresslevel=2, default_compress_chunk_size=1024, default_compress_prealloc_chunks=2); +CREATE TABLE tbl_pc(id int PRIMARY KEY, c1 text) WITH(compresstype=none); +CREATE INDEX tbl_pc_idx1 on tbl_pc(c1) WITH(compresstype=none); +\d+ tbl_pc + Table "public.tbl_pc" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | not null | | plain | | + c1 | text | | | | extended | | +Indexes: + "tbl_pc_pkey" PRIMARY KEY, btree (id) WITH (compresstype=pglz, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') + "tbl_pc_idx1" btree (c1) WITH (compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2', compresstype='none') +Options: compresslevel=2, compress_chunk_size=1024, compress_prealloc_chunks=2, compresstype=none + +INSERT INTO tbl_pc SELECT id, id::text FROM generate_series(1,1000)id; +CHECKPOINT; +SELECT count(*) FROM tbl_pc; + count +------- + 1000 +(1 row) + +SELECT * FROM tbl_pc WHERE c1='100'; + id | c1 +-----+----- + 100 | 100 +(1 row) + +DROP TABLE tbl_pc; +-- tablespace & partitioned table +CREATE TABLE tbl_pc_part (id int, c1 text) PARTITION BY RANGE (id); +CREATE TABLE tbl_pc_part_1 PARTITION OF tbl_pc_part FOR VALUES FROM (1) TO (1001); +CREATE TABLE tbl_pc_part_2 PARTITION OF tbl_pc_part FOR VALUES FROM (1001) TO (2001) WITH(compresstype=pglz); +CREATE INDEX part_id_idx ON tbl_pc_part(id) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024); +CREATE TABLE tbl_pc_part_3 PARTITION OF tbl_pc_part FOR VALUES FROM (2001) TO (3001) WITH(compresstype=none); +CREATE INDEX part3_id_idx1 ON tbl_pc_part_3(id) WITH(compresstype=pglz, compress_chunk_size=1024, compress_prealloc_chunks=3); +\d+ tbl_pc_part + Partitioned table "public.tbl_pc_part" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | | | plain | | + c1 | text | | | | extended | | +Partition key: RANGE (id) +Indexes: + "part_id_idx" btree (id) WITH (compress_prealloc_chunks='2', compresstype=pglz, compresslevel='1', compress_chunk_size='1024') +Partitions: tbl_pc_part_1 FOR VALUES FROM (1) TO (1001), + tbl_pc_part_2 FOR VALUES FROM (1001) TO (2001), + tbl_pc_part_3 FOR VALUES FROM (2001) TO (3001) + +\d+ tbl_pc_part_1 + Table "public.tbl_pc_part_1" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | | | plain | | + c1 | text | | | | extended | | +Partition of: tbl_pc_part FOR VALUES FROM (1) TO (1001) +Partition constraint: ((id IS NOT NULL) AND (id >= 1) AND (id < 1001)) +Indexes: + "tbl_pc_part_1_id_idx" btree (id) WITH (compress_prealloc_chunks='2', compresstype=pglz, compresslevel='1', compress_chunk_size='1024') +Options: compresstype=pglz, compresslevel=2, compress_chunk_size=1024, compress_prealloc_chunks=2 + +\d+ tbl_pc_part_2 + Table "public.tbl_pc_part_2" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | | | plain | | + c1 | text | | | | extended | | +Partition of: tbl_pc_part FOR VALUES FROM (1001) TO (2001) +Partition constraint: ((id IS NOT NULL) AND (id >= 1001) AND (id < 2001)) +Indexes: + "tbl_pc_part_2_id_idx" btree (id) WITH (compress_prealloc_chunks='2', compresstype=pglz, compresslevel='1', compress_chunk_size='1024') +Options: compresslevel=2, compress_chunk_size=1024, compress_prealloc_chunks=2, compresstype=pglz + +\d+ tbl_pc_part_3 + Table "public.tbl_pc_part_3" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | | | plain | | + c1 | text | | | | extended | | +Partition of: tbl_pc_part FOR VALUES FROM (2001) TO (3001) +Partition constraint: ((id IS NOT NULL) AND (id >= 2001) AND (id < 3001)) +Indexes: + "part3_id_idx1" btree (id) WITH (compresslevel='2', compresstype=pglz, compress_chunk_size='1024', compress_prealloc_chunks='3') + "tbl_pc_part_3_id_idx" btree (id) WITH (compress_prealloc_chunks='2', compresstype=pglz, compresslevel='1', compress_chunk_size='1024') +Options: compresslevel=2, compress_chunk_size=1024, compress_prealloc_chunks=2, compresstype=none + +DROP TABLE tbl_pc_part; +-- tablespace & unlogged relation +CREATE UNLOGGED TABLE tbl_pc_unlogged(id int PRIMARY KEY, c1 text); +CREATE INDEX tbl_pc_idx1_unlogged ON tbl_pc_unlogged(c1); +\d+ tbl_pc_unlogged + Unlogged table "public.tbl_pc_unlogged" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | not null | | plain | | + c1 | text | | | | extended | | +Indexes: + "tbl_pc_unlogged_pkey" PRIMARY KEY, btree (id) + "tbl_pc_idx1_unlogged" btree (c1) + +ALTER TABLE tbl_pc_unlogged SET LOGGED; +DROP TABLE tbl_pc_unlogged; +CREATE UNLOGGED TABLE tbl_pc_unlogged(id int PRIMARY KEY, c1 text) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=0); +CREATE INDEX tbl_pc_idx1_unlogged ON tbl_pc_unlogged(c1) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=1); +\d+ tbl_pc_unlogged + Unlogged table "public.tbl_pc_unlogged" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | not null | | plain | | + c1 | text | | | | extended | | +Indexes: + "tbl_pc_unlogged_pkey" PRIMARY KEY, btree (id) + "tbl_pc_idx1_unlogged" btree (c1) WITH (compresstype=pglz, compresslevel='1', compress_chunk_size='1024', compress_prealloc_chunks='1') +Options: compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=0 + +ALTER TABLE tbl_pc_unlogged SET(compresslevel=2, compress_prealloc_chunks=1); +ALTER INDEX tbl_pc_idx1_unlogged SET(compresslevel=2, compress_prealloc_chunks=1); +\d+ tbl_pc_unlogged + Unlogged table "public.tbl_pc_unlogged" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | not null | | plain | | + c1 | text | | | | extended | | +Indexes: + "tbl_pc_unlogged_pkey" PRIMARY KEY, btree (id) + "tbl_pc_idx1_unlogged" btree (c1) WITH (compresstype=pglz, compress_chunk_size='1024', compresslevel='2', compress_prealloc_chunks='1') +Options: compresstype=pglz, compress_chunk_size=1024, compresslevel=2, compress_prealloc_chunks=1 + +INSERT INTO tbl_pc_unlogged SELECT id, id::text FROM generate_series(1,1000)id; +CHECKPOINT; +SELECT count(*) FROM tbl_pc_unlogged; + count +------- + 1000 +(1 row) + +SELECT * FROM tbl_pc_unlogged WHERE c1='100'; + id | c1 +-----+----- + 100 | 100 +(1 row) + +ALTER TABLE tbl_pc_unlogged SET LOGGED; +INSERT INTO tbl_pc_unlogged SELECT id, id::text FROM generate_series(1001,2000)id; +CHECKPOINT; +SELECT count(*) FROM tbl_pc_unlogged; + count +------- + 2000 +(1 row) + +SELECT * FROM tbl_pc_unlogged WHERE c1='100'; + id | c1 +-----+----- + 100 | 100 +(1 row) + +DROP TABLE tbl_pc_unlogged; +-- tablespace & temp relation +CREATE TEMP TABLE tbl_pc_tmp(id int PRIMARY KEY, c1 text); +CREATE INDEX tbl_pc_idx1_tmp ON tbl_pc_tmp(c1); +SELECT reloptions FROM pg_class WHERE oid='tbl_pc_tmp'::regclass; + reloptions +------------ + +(1 row) + +SELECT reloptions FROM pg_class WHERE oid='tbl_pc_idx1_tmp'::regclass; + reloptions +------------ + +(1 row) + +DROP TABLE tbl_pc_tmp; +CREATE TEMP TABLE tbl_pc_tmp(id int PRIMARY KEY, c1 text) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=2048, compress_prealloc_chunks=2); +CREATE INDEX tbl_pc_idx1_tmp ON tbl_pc_tmp(c1) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=2048, compress_prealloc_chunks=1); +SELECT reloptions FROM pg_class WHERE oid='tbl_pc_tmp'::regclass; + reloptions +----------------------------------------------------------------------------------------- + {compresstype=pglz,compresslevel=1,compress_chunk_size=2048,compress_prealloc_chunks=2} +(1 row) + +SELECT reloptions FROM pg_class WHERE oid='tbl_pc_idx1_tmp'::regclass; + reloptions +----------------------------------------------------------------------------------------- + {compresstype=pglz,compresslevel=1,compress_chunk_size=2048,compress_prealloc_chunks=1} +(1 row) + +ALTER TABLE tbl_pc_tmp SET(compresslevel=2, compress_prealloc_chunks=1); +ALTER INDEX tbl_pc_idx1_tmp SET(compresslevel=2, compress_prealloc_chunks=1); +SELECT reloptions FROM pg_class WHERE oid='tbl_pc_tmp'::regclass; + reloptions +----------------------------------------------------------------------------------------- + {compresstype=pglz,compress_chunk_size=2048,compresslevel=2,compress_prealloc_chunks=1} +(1 row) + +SELECT reloptions FROM pg_class WHERE oid='tbl_pc_idx1_tmp'::regclass; + reloptions +----------------------------------------------------------------------------------------- + {compresstype=pglz,compress_chunk_size=2048,compresslevel=2,compress_prealloc_chunks=1} +(1 row) + +INSERT INTO tbl_pc_tmp SELECT id, id::text FROM generate_series(1,1000)id; +CHECKPOINT; +SELECT count(*) FROM tbl_pc_tmp; + count +------- + 1000 +(1 row) + +SELECT * FROM tbl_pc_tmp WHERE c1='100'; + id | c1 +-----+----- + 100 | 100 +(1 row) + +DROP TABLE tbl_pc_tmp; +-- tablespace & materialized view +CREATE MATERIALIZED VIEW mv_pc AS SELECT id, id::text c1 FROM generate_series(1,1000)id; +\d+ mv_pc + Materialized view "public.mv_pc" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | | | plain | | + c1 | text | | | | extended | | +View definition: + SELECT id.id, + id.id::text AS c1 + FROM generate_series(1, 1000) id(id); + +DROP MATERIALIZED VIEW mv_pc; +CREATE MATERIALIZED VIEW mv_pc WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=2048, compress_prealloc_chunks=1) + AS SELECT id, id::text c1 FROM generate_series(1,1000)id; +CREATE INDEX mv_pc_idx ON mv_pc(c1) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +\d+ mv_pc + Materialized view "public.mv_pc" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | | | plain | | + c1 | text | | | | extended | | +Indexes: + "mv_pc_idx" btree (c1) WITH (compresstype=pglz, compresslevel='1', compress_chunk_size='1024', compress_prealloc_chunks='2') +View definition: + SELECT id.id, + id.id::text AS c1 + FROM generate_series(1, 1000) id(id); +Options: compresstype=pglz, compresslevel=1, compress_chunk_size=2048, compress_prealloc_chunks=1 + +ALTER MATERIALIZED VIEW mv_pc SET(compresstype=none); -- fail +ERROR: change compresstype OPTION is not supported +ALTER MATERIALIZED VIEW mv_pc SET(compress_chunk_size=1024); -- fail +ERROR: change compress_chunk_size OPTION is not supported +ALTER MATERIALIZED VIEW mv_pc SET(compress_prealloc_chunks=8); -- fail +ERROR: value 8 out of bounds for option "compress_prealloc_chunks" +DETAIL: Valid values are between "0" and "7". +ALTER MATERIALIZED VIEW mv_pc SET(compresslevel=2, compress_prealloc_chunks=0); -- ok +\d+ mv_pc + Materialized view "public.mv_pc" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | | | plain | | + c1 | text | | | | extended | | +Indexes: + "mv_pc_idx" btree (c1) WITH (compresstype=pglz, compresslevel='1', compress_chunk_size='1024', compress_prealloc_chunks='2') +View definition: + SELECT id.id, + id.id::text AS c1 + FROM generate_series(1, 1000) id(id); +Options: compresstype=pglz, compress_chunk_size=2048, compresslevel=2, compress_prealloc_chunks=0 + +ALTER MATERIALIZED VIEW mv_pc RESET(compresstype); -- fail +ERROR: change compresstype OPTION is not supported +ALTER MATERIALIZED VIEW mv_pc RESET(compress_chunk_size); -- fail +ERROR: change compress_chunk_size OPTION is not supported +ALTER MATERIALIZED VIEW mv_pc RESET(compresslevel); -- ok +ALTER MATERIALIZED VIEW mv_pc RESET(compress_prealloc_chunks); -- ok +\d+ mv_pc + Materialized view "public.mv_pc" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | | | plain | | + c1 | text | | | | extended | | +Indexes: + "mv_pc_idx" btree (c1) WITH (compresstype=pglz, compresslevel='1', compress_chunk_size='1024', compress_prealloc_chunks='2') +View definition: + SELECT id.id, + id.id::text AS c1 + FROM generate_series(1, 1000) id(id); +Options: compresstype=pglz, compress_chunk_size=2048 + +ALTER INDEX mv_pc_idx SET(compresstype=none); -- fail +ERROR: change compresstype OPTION is not supported +ALTER INDEX mv_pc_idx SET(compress_chunk_size=2048); -- fail +ERROR: change compress_chunk_size OPTION is not supported +ALTER INDEX mv_pc_idx SET(compress_prealloc_chunks=8); -- fail +ERROR: value 8 out of bounds for option "compress_prealloc_chunks" +DETAIL: Valid values are between "0" and "7". +ALTER INDEX mv_pc_idx SET(compresslevel=2, compress_prealloc_chunks=0); -- ok +\d+ mv_pc_idx + Index "public.mv_pc_idx" + Column | Type | Key? | Definition | Storage | Stats target +--------+------+------+------------+----------+-------------- + c1 | text | yes | c1 | extended | +btree, for table "public.mv_pc" +Options: compresstype=pglz, compress_chunk_size=1024, compresslevel=2, compress_prealloc_chunks=0 + +ALTER INDEX mv_pc_idx RESET(compresstype); -- fail +ERROR: change compresstype OPTION is not supported +ALTER INDEX mv_pc_idx RESET(compress_chunk_size); -- fail +ERROR: change compress_chunk_size OPTION is not supported +ALTER INDEX mv_pc_idx RESET(compresslevel); -- ok +ALTER INDEX mv_pc_idx RESET(compress_prealloc_chunks); -- ok +\d+ mv_pc_idx + Index "public.mv_pc_idx" + Column | Type | Key? | Definition | Storage | Stats target +--------+------+------+------------+----------+-------------- + c1 | text | yes | c1 | extended | +btree, for table "public.mv_pc" +Options: compresstype=pglz, compress_chunk_size=1024 + +CHECKPOINT; +SELECT count(*) FROM mv_pc; + count +------- + 1000 +(1 row) + +SELECT count(*) FROM mv_pc WHERE c1 = '100'; + count +------- + 1 +(1 row) + +REFRESH MATERIALIZED VIEW mv_pc; +CHECKPOINT; +SELECT count(*) FROM mv_pc; + count +------- + 1000 +(1 row) + +SELECT count(*) FROM mv_pc WHERE c1 = '100'; + count +------- + 1 +(1 row) + +DROP MATERIALIZED VIEW mv_pc; +ALTER TABLESPACE pg_default RESET(default_compresstype, default_compresslevel, default_compress_chunk_size, default_compress_prealloc_chunks); +-- +-- recycling space with vacuum +-- +CREATE TABLE tbl_pc(id int PRIMARY KEY, c1 text) WITH(compresstype=pglz, compress_chunk_size=2048, compress_prealloc_chunks=0); +CHECKPOINT; +SELECT pg_relation_size('tbl_pc') size_0 \gset +INSERT INTO tbl_pc SELECT id, id::text FROM generate_series(1,1000)id; +CHECKPOINT; +SELECT pg_relation_size('tbl_pc') size_1000 \gset +SELECT :size_1000 > :size_0; -- true + ?column? +---------- + t +(1 row) + +DELETE FROM tbl_pc WHERE id > 500; +VACUUM tbl_pc; +SELECT pg_relation_size('tbl_pc') size_500 \gset +SELECT count(*) FROM tbl_pc; + count +------- + 500 +(1 row) + +SELECT :size_500 < :size_1000; -- true + ?column? +---------- + t +(1 row) + +DELETE FROM tbl_pc WHERE id < 500; +VACUUM tbl_pc; +SELECT pg_relation_size('tbl_pc') size_1 \gset +SELECT count(*) FROM tbl_pc; + count +------- + 1 +(1 row) + +SELECT :size_1 = :size_500; -- true + ?column? +---------- + t +(1 row) + +DELETE FROM tbl_pc; +VACUUM tbl_pc; +SELECT pg_relation_size('tbl_pc') size_0_2 \gset +SELECT :size_0_2 = :size_0; + ?column? +---------- + t +(1 row) + +DROP TABLE tbl_pc; diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index 026ea880cd..e10e0cb73f 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -121,3 +121,6 @@ test: fast_default # run stats by itself because its delay may be insufficient under heavy load test: stats + +# run page_compression by itself temporarily +test: page_compression \ No newline at end of file diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule index 979d926119..e3710e7532 100644 --- a/src/test/regress/serial_schedule +++ b/src/test/regress/serial_schedule @@ -201,3 +201,4 @@ test: explain test: event_trigger test: fast_default test: stats +test: page_compression diff --git a/src/test/regress/sql/page_compression.sql b/src/test/regress/sql/page_compression.sql new file mode 100644 index 0000000000..5ed02804b2 --- /dev/null +++ b/src/test/regress/sql/page_compression.sql @@ -0,0 +1,525 @@ +-- +-- Page compression tests +-- + +-- +-- create compressed table +-- +CREATE TABLE tbl_pc(id int, c1 text) WITH(compresstype=pglz); +\d+ tbl_pc +DROP TABLE tbl_pc; + +CREATE TABLE tbl_pc(id int, c1 text) WITH(compresstype=pglz, compress_chunk_size=1024); +\d+ tbl_pc +DROP TABLE tbl_pc; + +CREATE TABLE tbl_pc(id int, c1 text) WITH(compresstype=pglz, compress_chunk_size=2048); +\d+ tbl_pc +DROP TABLE tbl_pc; + +CREATE TABLE tbl_pc(id int, c1 text) WITH(compresstype=pglz, compresslevel=0, compress_chunk_size=4096, compress_prealloc_chunks=0); +\d+ tbl_pc +DROP TABLE tbl_pc; + +CREATE TABLE tbl_pc(id int PRIMARY KEY, c1 text) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +\d+ tbl_pc +DROP TABLE tbl_pc; + +CREATE TABLE tbl_pc(id int PRIMARY KEY, c1 text) WITH(compresstype=pglz, compresslevel=-1, compress_chunk_size=1024, compress_prealloc_chunks=7); +\d+ tbl_pc +DROP TABLE tbl_pc; + +CREATE TABLE tbl_pc WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=7) + AS SELECT id, id::text c1 FROM generate_series(1,1000)id; +\d+ tbl_pc +DROP TABLE tbl_pc; + +CREATE TABLE tbl_pc(id int PRIMARY KEY WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2), c1 text) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +CREATE TABLE tbl_pc2(LIKE tbl_pc INCLUDING ALL); +\d+ tbl_pc +DROP TABLE tbl_pc; +DROP TABLE tbl_pc2; + +CREATE TABLE tbl_pc(id int PRIMARY KEY, c1 text) WITH(compresstype=none); +\d+ tbl_pc +DROP TABLE tbl_pc; + +-- invalid storage parameter +CREATE TABLE tbl_pc_error(id int, c1 text) WITH(compresstype=xyz); -- fail +CREATE TABLE tbl_pc_error(id int, c1 text) WITH(compresstype=pglz, compresslevel=xyz); -- fail +CREATE TABLE tbl_pc_error(id int, c1 text) WITH(compresstype=pglz, compress_chunk_size=1025); -- fail +CREATE TABLE tbl_pc_error(id int, c1 text) WITH(compresstype=pglz, compress_prealloc_chunks=8); -- fail + + +-- +-- create compressed index +-- +SET enable_seqscan = OFF; + +CREATE TABLE tbl_pc(id int PRIMARY KEY WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2), c1 text) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); + +CREATE INDEX tbl_pc_idx1 on tbl_pc(c1) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); + +\d+ tbl_pc + +INSERT INTO tbl_pc SELECT id, id::text FROM generate_series(1,1000)id; + +-- call CHECKPOINT to flush shared buffer to compressed relation file +CHECKPOINT; + +-- run ANALYZE REINDEX VACUUM and CLUSTER on compressed table and index +ANALYZE tbl_pc; + +SELECT count(*) FROM tbl_pc; +SELECT * FROM tbl_pc WHERE c1='100'; +EXPLAIN(COSTS off) SELECT * FROM tbl_pc WHERE c1='100'; + +REINDEX INDEX tbl_pc_idx1; +CHECKPOINT; +SELECT * FROM tbl_pc WHERE c1='100'; + +REINDEX TABLE tbl_pc; +CHECKPOINT; +SELECT * FROM tbl_pc WHERE c1='100'; + +VACUUM tbl_pc; +CHECKPOINT; +SELECT count(*) FROM tbl_pc; +SELECT * FROM tbl_pc WHERE c1='100'; + +VACUUM FULL tbl_pc; +CHECKPOINT; +SELECT count(*) FROM tbl_pc; +SELECT * FROM tbl_pc WHERE c1='100'; + +CLUSTER tbl_pc USING tbl_pc_idx1; +CHECKPOINT; +SELECT count(*) FROM tbl_pc; +SELECT * FROM tbl_pc WHERE c1='100'; + +DROP INDEX tbl_pc_idx1; + +-- check usage of compressed index with data +CREATE INDEX tbl_pc_idx1 on tbl_pc USING hash(c1) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +CHECKPOINT; +SELECT * FROM tbl_pc WHERE c1='100'; +EXPLAIN(COSTS off) SELECT * FROM tbl_pc WHERE c1='100'; +DROP INDEX tbl_pc_idx1; + +CREATE INDEX tbl_pc_idx1 on tbl_pc USING gin((ARRAY[id])) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +CHECKPOINT; +SELECT * FROM tbl_pc WHERE ARRAY[id] @> ARRAY[100]; +EXPLAIN(COSTS off) SELECT * FROM tbl_pc WHERE ARRAY[id] @> ARRAY[100]; +DROP INDEX tbl_pc_idx1; + +CREATE INDEX tbl_pc_idx1 on tbl_pc USING gist((point(id,id))) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +CHECKPOINT; +SELECT * FROM tbl_pc ORDER BY point(id,id) <-> point(100,100) limit 1; +EXPLAIN(COSTS off) SELECT * FROM tbl_pc ORDER BY point(id,id) <-> point(100,100) limit 1; +DROP INDEX tbl_pc_idx1; + +CREATE INDEX tbl_pc_idx1 on tbl_pc USING spgist(c1) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +CHECKPOINT; +SELECT * FROM tbl_pc WHERE c1='100'; +EXPLAIN(COSTS off) SELECT * FROM tbl_pc WHERE c1='100'; +DROP INDEX tbl_pc_idx1; + +-- brin index does not support compression +CREATE INDEX tbl_pc_idx1 on tbl_pc USING brin(c1) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); -- fail + +DROP TABLE tbl_pc; +RESET enable_seqscan; + +-- +-- alter table and index +-- + +-- ALTER TABLE +-- ALTER compresstype and compress_chunk_size currently is not supported +CREATE TABLE tbl_pc(id int, c1 text); +ALTER TABLE tbl_pc SET(compresstype=pglz); -- fail +DROP TABLE tbl_pc; + +CREATE TABLE tbl_pc(id int PRIMARY KEY, c1 text) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +ALTER TABLE tbl_pc SET(compresstype=none); -- fail +ALTER TABLE tbl_pc SET(compress_chunk_size=2048); -- fail +ALTER TABLE tbl_pc SET(compress_prealloc_chunks=8); -- fail +ALTER TABLE tbl_pc SET(compresslevel=2, compress_prealloc_chunks=0); -- ok +\d+ tbl_pc +ALTER TABLE tbl_pc RESET(compresstype); -- fail +ALTER TABLE tbl_pc RESET(compress_chunk_size); -- fail +ALTER TABLE tbl_pc RESET(compresslevel); -- ok +ALTER TABLE tbl_pc RESET(compress_prealloc_chunks); -- ok +\d+ tbl_pc +CHECKPOINT; +SELECT count(*) FROM tbl_pc; + +-- ALTER INDEX +-- ALTER compresstype and compress_chunk_size currently is not supported +CREATE INDEX tbl_pc_idx1 on tbl_pc USING btree(c1); +ALTER INDEX tbl_pc_idx1 SET(compresstype=pglz); -- fail +DROP INDEX tbl_pc_idx1; + +CREATE INDEX tbl_pc_idx1 on tbl_pc USING btree(c1) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +ALTER INDEX tbl_pc_idx1 SET(compresstype=none); -- fail +ALTER INDEX tbl_pc_idx1 SET(compress_chunk_size=2048); -- fail +ALTER INDEX tbl_pc_idx1 SET(compress_prealloc_chunks=8); -- fail +ALTER INDEX tbl_pc_idx1 SET(compresslevel=2, compress_prealloc_chunks=0); -- ok +\d+ tbl_pc +ALTER INDEX tbl_pc_idx1 RESET(compresstype); -- fail +ALTER INDEX tbl_pc_idx1 RESET(compress_chunk_size); -- fail +ALTER INDEX tbl_pc_idx1 RESET(compresslevel); -- ok +ALTER INDEX tbl_pc_idx1 RESET(compress_prealloc_chunks); -- ok +\d+ tbl_pc +CHECKPOINT; +SELECT * FROM tbl_pc WHERE c1='100'; +EXPLAIN(COSTS off) SELECT * FROM tbl_pc WHERE c1='100'; + +-- alter hash index +CREATE INDEX tbl_pc_idx_hash on tbl_pc USING hash(c1) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); + +ALTER INDEX tbl_pc_idx_hash SET(compresstype=none); -- fail +ALTER INDEX tbl_pc_idx_hash SET(compress_chunk_size=2048); -- fail +ALTER INDEX tbl_pc_idx_hash SET(compress_prealloc_chunks=8); -- fail +ALTER INDEX tbl_pc_idx_hash SET(compresslevel=2, compress_prealloc_chunks=0); -- ok +\d+ tbl_pc_idx_hash +ALTER INDEX tbl_pc_idx_hash RESET(compresstype); -- fail +ALTER INDEX tbl_pc_idx_hash RESET(compress_chunk_size); -- fail +ALTER INDEX tbl_pc_idx_hash RESET(compresslevel); -- ok +ALTER INDEX tbl_pc_idx_hash RESET(compress_prealloc_chunks); -- ok +\d+ tbl_pc_idx_hash + +-- alter gin index +CREATE INDEX tbl_pc_idx_gin on tbl_pc USING gin((ARRAY[id])) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); + +ALTER INDEX tbl_pc_idx_gin SET(compresstype=none); -- fail +ALTER INDEX tbl_pc_idx_gin SET(compress_chunk_size=2048); -- fail +ALTER INDEX tbl_pc_idx_gin SET(compress_prealloc_chunks=8); -- fail +ALTER INDEX tbl_pc_idx_gin SET(compresslevel=2, compress_prealloc_chunks=0); -- ok +\d+ tbl_pc_idx_gin +ALTER INDEX tbl_pc_idx_gin RESET(compresstype); -- fail +ALTER INDEX tbl_pc_idx_gin RESET(compress_chunk_size); -- fail +ALTER INDEX tbl_pc_idx_gin RESET(compresslevel); -- ok +ALTER INDEX tbl_pc_idx_gin RESET(compress_prealloc_chunks); -- ok +\d+ tbl_pc_idx_gin + +-- alter gist index +CREATE INDEX tbl_pc_idx_gist on tbl_pc USING gist((point(id,id))) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); + +ALTER INDEX tbl_pc_idx_gist SET(compresstype=none); -- fail +ALTER INDEX tbl_pc_idx_gist SET(compress_chunk_size=2048); -- fail +ALTER INDEX tbl_pc_idx_gist SET(compress_prealloc_chunks=8); -- fail +ALTER INDEX tbl_pc_idx_gist SET(compresslevel=2, compress_prealloc_chunks=0); -- ok +\d+ tbl_pc_idx_gist +ALTER INDEX tbl_pc_idx_gist RESET(compresstype); -- fail +ALTER INDEX tbl_pc_idx_gist RESET(compress_chunk_size); -- fail +ALTER INDEX tbl_pc_idx_gist RESET(compresslevel); -- ok +ALTER INDEX tbl_pc_idx_gist RESET(compress_prealloc_chunks); -- ok +\d+ tbl_pc_idx_gist + +-- alter spgist index +CREATE INDEX tbl_pc_idx_spgist on tbl_pc USING spgist(c1) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); + +ALTER INDEX tbl_pc_idx_spgist SET(compresstype=none); -- fail +ALTER INDEX tbl_pc_idx_spgist SET(compress_chunk_size=2048); -- fail +ALTER INDEX tbl_pc_idx_spgist SET(compress_prealloc_chunks=8); -- fail +ALTER INDEX tbl_pc_idx_spgist SET(compresslevel=2, compress_prealloc_chunks=0); -- ok +\d+ tbl_pc_idx_spgist +ALTER INDEX tbl_pc_idx_spgist RESET(compresstype); -- fail +ALTER INDEX tbl_pc_idx_spgist RESET(compress_chunk_size); -- fail +ALTER INDEX tbl_pc_idx_spgist RESET(compresslevel); -- ok +ALTER INDEX tbl_pc_idx_spgist RESET(compress_prealloc_chunks); -- ok +\d+ tbl_pc_idx_spgist + +-- alter brin index (do not support compression) +CREATE INDEX tbl_pc_idx_brin on tbl_pc USING brin(c1); +ALTER INDEX tbl_pc_idx_brin SET(compress_prealloc_chunks=3); -- fail + +DROP TABLE tbl_pc; + +-- +-- partitioned table and index +-- + +-- partition table does not support compression, but index of partition table and its child tables can use compression +CREATE TABLE tbl_pc_part (id int, c1 text) PARTITION BY RANGE (id) WITH(compresstype=pglz); -- fail + +CREATE TABLE tbl_pc_part (id int, c1 text) PARTITION BY RANGE (id); +CREATE TABLE tbl_pc_part_1 PARTITION OF tbl_pc_part FOR VALUES FROM (1) TO (1001); +CREATE TABLE tbl_pc_part_2 PARTITION OF tbl_pc_part FOR VALUES FROM (1001) TO (2001) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); + +CREATE INDEX part_id_idx ON tbl_pc_part(id) WITH(compresstype=pglz, compresslevel=2, compress_chunk_size=1024, compress_prealloc_chunks=2); + +CREATE TABLE tbl_pc_part_3 PARTITION OF tbl_pc_part FOR VALUES FROM (2001) TO (3001); + +CREATE INDEX part3_id_idx1 ON tbl_pc_part_3(id) WITH(compresstype=pglz, compresslevel=2, compress_chunk_size=1024, compress_prealloc_chunks=2); + +\d+ tbl_pc_part +\d+ part_id_idx +\d+ tbl_pc_part_1 +\d+ tbl_pc_part_2 +\d+ tbl_pc_part_3 + +INSERT INTO tbl_pc_part SELECT id, id::text FROM generate_series(1,3000)id; +CHECKPOINT; +SELECT count(*) FROM tbl_pc_part; +SELECT * FROM tbl_pc_part WHERE id=100; +SELECT * FROM tbl_pc_part WHERE id=1100; +SELECT * FROM tbl_pc_part WHERE id=2100; + +ALTER TABLE tbl_pc_part SET(compresstype=pglz); -- fail +ALTER TABLE tbl_pc_part_1 SET(compresstype=pglz); -- fail + +ALTER TABLE tbl_pc_part_2 SET(compresstype=none); -- fail +ALTER TABLE tbl_pc_part_2 SET(compress_chunk_size=2048); -- fail +ALTER TABLE tbl_pc_part_2 SET(compress_prealloc_chunks=8); -- fail +ALTER TABLE tbl_pc_part_2 SET(compresslevel=2, compress_prealloc_chunks=0); -- ok +\d+ tbl_pc_part_2 + +ALTER INDEX part3_id_idx1 SET(compresstype=none); -- fail +ALTER INDEX part3_id_idx1 SET(compress_chunk_size=2048); -- fail +ALTER INDEX part3_id_idx1 SET(compress_prealloc_chunks=8); -- fail +ALTER INDEX part3_id_idx1 SET(compresslevel=2, compress_prealloc_chunks=0); -- ok +\d+ part3_id_idx1 + +ALTER INDEX part_id_idx SET(compresstype=pglz); -- fail +ALTER INDEX part_id_idx SET(compress_chunk_size=2048); -- fail +ALTER INDEX part_id_idx SET(compress_prealloc_chunks=8); -- fail +ALTER INDEX part_id_idx SET(compresslevel=2, compress_prealloc_chunks=0); -- fail +\d+ tbl_pc_part + +INSERT INTO tbl_pc_part SELECT id, id::text FROM generate_series(1,3000)id; +CHECKPOINT; +SELECT count(*) FROM tbl_pc_part; +SELECT * FROM tbl_pc_part WHERE id=100; +SELECT * FROM tbl_pc_part WHERE id=1100; +SELECT * FROM tbl_pc_part WHERE id=2100; + +DROP TABLE tbl_pc_part; + +-- +-- default tablespace store parameter +-- + +-- can not use compression on global tablespace +ALTER TABLESPACE pg_default SET(default_compresstype=xxx); -- fail +ALTER TABLESPACE pg_default SET(default_compress_chunk_size=1023); -- fail +ALTER TABLESPACE pg_default SET(default_compress_chunk_size=4097); -- fail +ALTER TABLESPACE pg_default SET(default_compress_prealloc_chunks=-1); -- fail +ALTER TABLESPACE pg_default SET(default_compress_prealloc_chunks=8); -- fail + +ALTER TABLESPACE pg_default SET(default_compresstype=pglz, default_compresslevel=2, default_compress_chunk_size=1024, default_compress_prealloc_chunks=2); + -- ok + +-- table and index(btree,hash,gin,gist,spgist) inherit default compression options from it's tablespace +CREATE TABLE tbl_pc(id int PRIMARY KEY, c1 text); +CREATE INDEX tbl_pc_idx_btree on tbl_pc(c1); +CREATE INDEX tbl_pc_idx_hash on tbl_pc USING hash(c1); +CREATE INDEX tbl_pc_idx_gin on tbl_pc USING gin((ARRAY[id])); +CREATE INDEX tbl_pc_idx_gist on tbl_pc USING gist((point(id,id))); +CREATE INDEX tbl_pc_idx_spgist on tbl_pc USING spgist(c1); +CREATE INDEX tbl_pc_idx_brin on tbl_pc USING brin(c1); +\d+ tbl_pc + +SELECT size FROM pg_stat_file(pg_relation_filepath('tbl_pc') || '_pca'); +SELECT size FROM pg_stat_file(pg_relation_filepath('tbl_pc_idx_btree') || '_pca'); +SELECT size FROM pg_stat_file(pg_relation_filepath('tbl_pc_idx_hash') || '_pca'); +SELECT size FROM pg_stat_file(pg_relation_filepath('tbl_pc_idx_gin') || '_pca'); +SELECT size FROM pg_stat_file(pg_relation_filepath('tbl_pc_idx_gist') || '_pca'); +SELECT size FROM pg_stat_file(pg_relation_filepath('tbl_pc_idx_spgist') || '_pca'); +SELECT size FROM pg_stat_file(pg_relation_filepath('tbl_pc_idx_brin') || '_pca', true); + +-- toast relation will not be compressed +SELECT reltoastrelid FROM pg_class WHERE oid='tbl_pc'::regclass \gset +SELECT reloptions FROM pg_class where oid=:reltoastrelid; +SELECT size FROM pg_stat_file(pg_relation_filepath(:reltoastrelid) || '_pca', true); + +CREATE TABLE tbl_pc1 AS SELECT * FROM tbl_pc; +\d+ tbl_pc1 + +ALTER TABLESPACE pg_default RESET(default_compresstype, default_compresslevel, default_compress_chunk_size, default_compress_prealloc_chunks); +CREATE INDEX tbl_pc_idx2 on tbl_pc(c1); +\d+ tbl_pc + +CREATE TABLE tbl_pc2(LIKE tbl_pc); +\d+ tbl_pc2 + +CREATE TABLE tbl_pc3(LIKE tbl_pc INCLUDING ALL); +\d+ tbl_pc3 + +CREATE TABLE tbl_pc4 AS SELECT * FROM tbl_pc; +\d+ tbl_pc4 + +DROP TABLE tbl_pc; +DROP TABLE tbl_pc1; +DROP TABLE tbl_pc2; +DROP TABLE tbl_pc3; +DROP TABLE tbl_pc4; + +ALTER TABLESPACE pg_default SET(default_compresstype=pglz, default_compresslevel=2, default_compress_chunk_size=1024, default_compress_prealloc_chunks=2); + +CREATE TABLE tbl_pc(id int PRIMARY KEY, c1 text) WITH(compresstype=none); +CREATE INDEX tbl_pc_idx1 on tbl_pc(c1) WITH(compresstype=none); +\d+ tbl_pc + +INSERT INTO tbl_pc SELECT id, id::text FROM generate_series(1,1000)id; +CHECKPOINT; +SELECT count(*) FROM tbl_pc; +SELECT * FROM tbl_pc WHERE c1='100'; + +DROP TABLE tbl_pc; + +-- tablespace & partitioned table +CREATE TABLE tbl_pc_part (id int, c1 text) PARTITION BY RANGE (id); +CREATE TABLE tbl_pc_part_1 PARTITION OF tbl_pc_part FOR VALUES FROM (1) TO (1001); +CREATE TABLE tbl_pc_part_2 PARTITION OF tbl_pc_part FOR VALUES FROM (1001) TO (2001) WITH(compresstype=pglz); + +CREATE INDEX part_id_idx ON tbl_pc_part(id) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024); + +CREATE TABLE tbl_pc_part_3 PARTITION OF tbl_pc_part FOR VALUES FROM (2001) TO (3001) WITH(compresstype=none); +CREATE INDEX part3_id_idx1 ON tbl_pc_part_3(id) WITH(compresstype=pglz, compress_chunk_size=1024, compress_prealloc_chunks=3); + +\d+ tbl_pc_part +\d+ tbl_pc_part_1 +\d+ tbl_pc_part_2 +\d+ tbl_pc_part_3 + +DROP TABLE tbl_pc_part; + +-- tablespace & unlogged relation +CREATE UNLOGGED TABLE tbl_pc_unlogged(id int PRIMARY KEY, c1 text); +CREATE INDEX tbl_pc_idx1_unlogged ON tbl_pc_unlogged(c1); +\d+ tbl_pc_unlogged + +ALTER TABLE tbl_pc_unlogged SET LOGGED; +DROP TABLE tbl_pc_unlogged; + +CREATE UNLOGGED TABLE tbl_pc_unlogged(id int PRIMARY KEY, c1 text) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=0); +CREATE INDEX tbl_pc_idx1_unlogged ON tbl_pc_unlogged(c1) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=1); +\d+ tbl_pc_unlogged + +ALTER TABLE tbl_pc_unlogged SET(compresslevel=2, compress_prealloc_chunks=1); +ALTER INDEX tbl_pc_idx1_unlogged SET(compresslevel=2, compress_prealloc_chunks=1); +\d+ tbl_pc_unlogged + +INSERT INTO tbl_pc_unlogged SELECT id, id::text FROM generate_series(1,1000)id; +CHECKPOINT; +SELECT count(*) FROM tbl_pc_unlogged; +SELECT * FROM tbl_pc_unlogged WHERE c1='100'; + +ALTER TABLE tbl_pc_unlogged SET LOGGED; + +INSERT INTO tbl_pc_unlogged SELECT id, id::text FROM generate_series(1001,2000)id; +CHECKPOINT; +SELECT count(*) FROM tbl_pc_unlogged; +SELECT * FROM tbl_pc_unlogged WHERE c1='100'; + +DROP TABLE tbl_pc_unlogged; + +-- tablespace & temp relation +CREATE TEMP TABLE tbl_pc_tmp(id int PRIMARY KEY, c1 text); +CREATE INDEX tbl_pc_idx1_tmp ON tbl_pc_tmp(c1); +SELECT reloptions FROM pg_class WHERE oid='tbl_pc_tmp'::regclass; +SELECT reloptions FROM pg_class WHERE oid='tbl_pc_idx1_tmp'::regclass; + +DROP TABLE tbl_pc_tmp; + +CREATE TEMP TABLE tbl_pc_tmp(id int PRIMARY KEY, c1 text) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=2048, compress_prealloc_chunks=2); +CREATE INDEX tbl_pc_idx1_tmp ON tbl_pc_tmp(c1) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=2048, compress_prealloc_chunks=1); +SELECT reloptions FROM pg_class WHERE oid='tbl_pc_tmp'::regclass; +SELECT reloptions FROM pg_class WHERE oid='tbl_pc_idx1_tmp'::regclass; + +ALTER TABLE tbl_pc_tmp SET(compresslevel=2, compress_prealloc_chunks=1); +ALTER INDEX tbl_pc_idx1_tmp SET(compresslevel=2, compress_prealloc_chunks=1); +SELECT reloptions FROM pg_class WHERE oid='tbl_pc_tmp'::regclass; +SELECT reloptions FROM pg_class WHERE oid='tbl_pc_idx1_tmp'::regclass; + +INSERT INTO tbl_pc_tmp SELECT id, id::text FROM generate_series(1,1000)id; +CHECKPOINT; +SELECT count(*) FROM tbl_pc_tmp; +SELECT * FROM tbl_pc_tmp WHERE c1='100'; + +DROP TABLE tbl_pc_tmp; + +-- tablespace & materialized view + +CREATE MATERIALIZED VIEW mv_pc AS SELECT id, id::text c1 FROM generate_series(1,1000)id; +\d+ mv_pc +DROP MATERIALIZED VIEW mv_pc; + +CREATE MATERIALIZED VIEW mv_pc WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=2048, compress_prealloc_chunks=1) + AS SELECT id, id::text c1 FROM generate_series(1,1000)id; + +CREATE INDEX mv_pc_idx ON mv_pc(c1) WITH(compresstype=pglz, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +\d+ mv_pc + +ALTER MATERIALIZED VIEW mv_pc SET(compresstype=none); -- fail +ALTER MATERIALIZED VIEW mv_pc SET(compress_chunk_size=1024); -- fail +ALTER MATERIALIZED VIEW mv_pc SET(compress_prealloc_chunks=8); -- fail +ALTER MATERIALIZED VIEW mv_pc SET(compresslevel=2, compress_prealloc_chunks=0); -- ok +\d+ mv_pc + +ALTER MATERIALIZED VIEW mv_pc RESET(compresstype); -- fail +ALTER MATERIALIZED VIEW mv_pc RESET(compress_chunk_size); -- fail +ALTER MATERIALIZED VIEW mv_pc RESET(compresslevel); -- ok +ALTER MATERIALIZED VIEW mv_pc RESET(compress_prealloc_chunks); -- ok +\d+ mv_pc + +ALTER INDEX mv_pc_idx SET(compresstype=none); -- fail +ALTER INDEX mv_pc_idx SET(compress_chunk_size=2048); -- fail +ALTER INDEX mv_pc_idx SET(compress_prealloc_chunks=8); -- fail +ALTER INDEX mv_pc_idx SET(compresslevel=2, compress_prealloc_chunks=0); -- ok +\d+ mv_pc_idx + +ALTER INDEX mv_pc_idx RESET(compresstype); -- fail +ALTER INDEX mv_pc_idx RESET(compress_chunk_size); -- fail +ALTER INDEX mv_pc_idx RESET(compresslevel); -- ok +ALTER INDEX mv_pc_idx RESET(compress_prealloc_chunks); -- ok +\d+ mv_pc_idx + +CHECKPOINT; +SELECT count(*) FROM mv_pc; +SELECT count(*) FROM mv_pc WHERE c1 = '100'; + +REFRESH MATERIALIZED VIEW mv_pc; +CHECKPOINT; +SELECT count(*) FROM mv_pc; +SELECT count(*) FROM mv_pc WHERE c1 = '100'; + +DROP MATERIALIZED VIEW mv_pc; + +ALTER TABLESPACE pg_default RESET(default_compresstype, default_compresslevel, default_compress_chunk_size, default_compress_prealloc_chunks); + +-- +-- recycling space with vacuum +-- +CREATE TABLE tbl_pc(id int PRIMARY KEY, c1 text) WITH(compresstype=pglz, compress_chunk_size=2048, compress_prealloc_chunks=0); +CHECKPOINT; +SELECT pg_relation_size('tbl_pc') size_0 \gset + +INSERT INTO tbl_pc SELECT id, id::text FROM generate_series(1,1000)id; +CHECKPOINT; +SELECT pg_relation_size('tbl_pc') size_1000 \gset + +SELECT :size_1000 > :size_0; -- true + +DELETE FROM tbl_pc WHERE id > 500; +VACUUM tbl_pc; +SELECT pg_relation_size('tbl_pc') size_500 \gset +SELECT count(*) FROM tbl_pc; +SELECT :size_500 < :size_1000; -- true + + +DELETE FROM tbl_pc WHERE id < 500; +VACUUM tbl_pc; +SELECT pg_relation_size('tbl_pc') size_1 \gset +SELECT count(*) FROM tbl_pc; +SELECT :size_1 = :size_500; -- true + +DELETE FROM tbl_pc; +VACUUM tbl_pc; +SELECT pg_relation_size('tbl_pc') size_0_2 \gset + +SELECT :size_0_2 = :size_0; + +DROP TABLE tbl_pc; diff --git a/src/test/zstd/.gitignore b/src/test/zstd/.gitignore new file mode 100644 index 0000000000..5dcb3ff972 --- /dev/null +++ b/src/test/zstd/.gitignore @@ -0,0 +1,4 @@ +# Generated subdirectories +/log/ +/results/ +/tmp_check/ diff --git a/src/test/zstd/Makefile b/src/test/zstd/Makefile new file mode 100644 index 0000000000..0f6e446b9e --- /dev/null +++ b/src/test/zstd/Makefile @@ -0,0 +1,14 @@ +# src/test/zstd/Makefile + +REGRESS = page_compression_with_zstd + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = src/test/zstd +top_builddir = ../../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/src/test/zstd/README b/src/test/zstd/README new file mode 100644 index 0000000000..b8b7c592d7 --- /dev/null +++ b/src/test/zstd/README @@ -0,0 +1,3 @@ +This directory contains tests for page compression with ztsd. + +The premise of running this test is to enable the '-with-zstd' option when running configure diff --git a/src/test/zstd/expected/page_compression_with_zstd.out b/src/test/zstd/expected/page_compression_with_zstd.out new file mode 100644 index 0000000000..b1f36cdaec --- /dev/null +++ b/src/test/zstd/expected/page_compression_with_zstd.out @@ -0,0 +1,1292 @@ +-- +-- Page compression tests +-- +-- +-- create compressed table +-- +CREATE TABLE tbl_pc(id int, c1 text) WITH(compresstype=zstd); +\d+ tbl_pc + Table "public.tbl_pc" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | | | plain | | + c1 | text | | | | extended | | +Options: compresstype=zstd + +DROP TABLE tbl_pc; +CREATE TABLE tbl_pc(id int, c1 text) WITH(compresstype=zstd, compress_chunk_size=1024); +\d+ tbl_pc + Table "public.tbl_pc" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | | | plain | | + c1 | text | | | | extended | | +Options: compresstype=zstd, compress_chunk_size=1024 + +DROP TABLE tbl_pc; +CREATE TABLE tbl_pc(id int, c1 text) WITH(compresstype=zstd, compress_chunk_size=2048); +\d+ tbl_pc + Table "public.tbl_pc" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | | | plain | | + c1 | text | | | | extended | | +Options: compresstype=zstd, compress_chunk_size=2048 + +DROP TABLE tbl_pc; +CREATE TABLE tbl_pc(id int, c1 text) WITH(compresstype=zstd, compresslevel=0, compress_chunk_size=4096, compress_prealloc_chunks=0); +\d+ tbl_pc + Table "public.tbl_pc" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | | | plain | | + c1 | text | | | | extended | | +Options: compresstype=zstd, compresslevel=0, compress_chunk_size=4096, compress_prealloc_chunks=0 + +DROP TABLE tbl_pc; +CREATE TABLE tbl_pc(id int PRIMARY KEY, c1 text) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +\d+ tbl_pc + Table "public.tbl_pc" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | not null | | plain | | + c1 | text | | | | extended | | +Indexes: + "tbl_pc_pkey" PRIMARY KEY, btree (id) +Options: compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2 + +DROP TABLE tbl_pc; +CREATE TABLE tbl_pc(id int PRIMARY KEY, c1 text) WITH(compresstype=zstd, compresslevel=-1, compress_chunk_size=1024, compress_prealloc_chunks=7); +\d+ tbl_pc + Table "public.tbl_pc" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | not null | | plain | | + c1 | text | | | | extended | | +Indexes: + "tbl_pc_pkey" PRIMARY KEY, btree (id) +Options: compresstype=zstd, compresslevel=-1, compress_chunk_size=1024, compress_prealloc_chunks=7 + +DROP TABLE tbl_pc; +CREATE TABLE tbl_pc WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=7) + AS SELECT id, id::text c1 FROM generate_series(1,1000)id; +\d+ tbl_pc + Table "public.tbl_pc" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | | | plain | | + c1 | text | | | | extended | | +Options: compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=7 + +DROP TABLE tbl_pc; +CREATE TABLE tbl_pc(id int PRIMARY KEY WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2), c1 text) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +CREATE TABLE tbl_pc2(LIKE tbl_pc INCLUDING ALL); +\d+ tbl_pc + Table "public.tbl_pc" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | not null | | plain | | + c1 | text | | | | extended | | +Indexes: + "tbl_pc_pkey" PRIMARY KEY, btree (id) WITH (compresstype=zstd, compresslevel='1', compress_chunk_size='1024', compress_prealloc_chunks='2') +Options: compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2 + +DROP TABLE tbl_pc; +DROP TABLE tbl_pc2; +CREATE TABLE tbl_pc(id int PRIMARY KEY, c1 text) WITH(compresstype=none); +\d+ tbl_pc + Table "public.tbl_pc" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | not null | | plain | | + c1 | text | | | | extended | | +Indexes: + "tbl_pc_pkey" PRIMARY KEY, btree (id) +Options: compresstype=none + +DROP TABLE tbl_pc; +-- invalid storage parameter +CREATE TABLE tbl_pc_error(id int, c1 text) WITH(compresstype=xyz); -- fail +ERROR: invalid value for enum option "compresstype": xyz +DETAIL: Valid values are "none", "pglz" and "zstd". +CREATE TABLE tbl_pc_error(id int, c1 text) WITH(compresstype=zstd, compresslevel=xyz); -- fail +ERROR: invalid value for integer option "compresslevel": xyz +CREATE TABLE tbl_pc_error(id int, c1 text) WITH(compresstype=zstd, compress_chunk_size=1025); -- fail +ERROR: invalid compress_chunk_size 1025 , must be one of 1024, 2048 or 4096 for tbl_pc_error +CREATE TABLE tbl_pc_error(id int, c1 text) WITH(compresstype=zstd, compress_prealloc_chunks=8); -- fail +ERROR: value 8 out of bounds for option "compress_prealloc_chunks" +DETAIL: Valid values are between "0" and "7". +-- +-- create compressed index +-- +SET enable_seqscan = OFF; +CREATE TABLE tbl_pc(id int PRIMARY KEY WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2), c1 text) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +CREATE INDEX tbl_pc_idx1 on tbl_pc(c1) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +\d+ tbl_pc + Table "public.tbl_pc" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | not null | | plain | | + c1 | text | | | | extended | | +Indexes: + "tbl_pc_pkey" PRIMARY KEY, btree (id) WITH (compresstype=zstd, compresslevel='1', compress_chunk_size='1024', compress_prealloc_chunks='2') + "tbl_pc_idx1" btree (c1) WITH (compresstype=zstd, compresslevel='1', compress_chunk_size='1024', compress_prealloc_chunks='2') +Options: compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2 + +INSERT INTO tbl_pc SELECT id, id::text FROM generate_series(1,1000)id; +-- call CHECKPOINT to flush shared buffer to compressed relation file +CHECKPOINT; +-- run ANALYZE REINDEX VACUUM and CLUSTER on compressed table and index +ANALYZE tbl_pc; +SELECT count(*) FROM tbl_pc; + count +------- + 1000 +(1 row) + +SELECT * FROM tbl_pc WHERE c1='100'; + id | c1 +-----+----- + 100 | 100 +(1 row) + +EXPLAIN(COSTS off) SELECT * FROM tbl_pc WHERE c1='100'; + QUERY PLAN +---------------------------------------- + Index Scan using tbl_pc_idx1 on tbl_pc + Index Cond: (c1 = '100'::text) +(2 rows) + +REINDEX INDEX tbl_pc_idx1; +CHECKPOINT; +SELECT * FROM tbl_pc WHERE c1='100'; + id | c1 +-----+----- + 100 | 100 +(1 row) + +REINDEX TABLE tbl_pc; +CHECKPOINT; +SELECT * FROM tbl_pc WHERE c1='100'; + id | c1 +-----+----- + 100 | 100 +(1 row) + +VACUUM tbl_pc; +CHECKPOINT; +SELECT count(*) FROM tbl_pc; + count +------- + 1000 +(1 row) + +SELECT * FROM tbl_pc WHERE c1='100'; + id | c1 +-----+----- + 100 | 100 +(1 row) + +VACUUM FULL tbl_pc; +CHECKPOINT; +SELECT count(*) FROM tbl_pc; + count +------- + 1000 +(1 row) + +SELECT * FROM tbl_pc WHERE c1='100'; + id | c1 +-----+----- + 100 | 100 +(1 row) + +CLUSTER tbl_pc USING tbl_pc_idx1; +CHECKPOINT; +SELECT count(*) FROM tbl_pc; + count +------- + 1000 +(1 row) + +SELECT * FROM tbl_pc WHERE c1='100'; + id | c1 +-----+----- + 100 | 100 +(1 row) + +DROP INDEX tbl_pc_idx1; +-- check usage of compressed index with data +CREATE INDEX tbl_pc_idx1 on tbl_pc USING hash(c1) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +CHECKPOINT; +SELECT * FROM tbl_pc WHERE c1='100'; + id | c1 +-----+----- + 100 | 100 +(1 row) + +EXPLAIN(COSTS off) SELECT * FROM tbl_pc WHERE c1='100'; + QUERY PLAN +---------------------------------------- + Index Scan using tbl_pc_idx1 on tbl_pc + Index Cond: (c1 = '100'::text) +(2 rows) + +DROP INDEX tbl_pc_idx1; +CREATE INDEX tbl_pc_idx1 on tbl_pc USING gin((ARRAY[id])) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +CHECKPOINT; +SELECT * FROM tbl_pc WHERE ARRAY[id] @> ARRAY[100]; + id | c1 +-----+----- + 100 | 100 +(1 row) + +EXPLAIN(COSTS off) SELECT * FROM tbl_pc WHERE ARRAY[id] @> ARRAY[100]; + QUERY PLAN +------------------------------------------------------- + Bitmap Heap Scan on tbl_pc + Recheck Cond: (ARRAY[id] @> '{100}'::integer[]) + -> Bitmap Index Scan on tbl_pc_idx1 + Index Cond: (ARRAY[id] @> '{100}'::integer[]) +(4 rows) + +DROP INDEX tbl_pc_idx1; +CREATE INDEX tbl_pc_idx1 on tbl_pc USING gist((point(id,id))) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +CHECKPOINT; +SELECT * FROM tbl_pc ORDER BY point(id,id) <-> point(100,100) limit 1; + id | c1 +-----+----- + 100 | 100 +(1 row) + +EXPLAIN(COSTS off) SELECT * FROM tbl_pc ORDER BY point(id,id) <-> point(100,100) limit 1; + QUERY PLAN +-------------------------------------------------------------------------------------------------- + Limit + -> Index Scan using tbl_pc_idx1 on tbl_pc + Order By: (point((id)::double precision, (id)::double precision) <-> '(100,100)'::point) +(3 rows) + +DROP INDEX tbl_pc_idx1; +CREATE INDEX tbl_pc_idx1 on tbl_pc USING spgist(c1) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +CHECKPOINT; +SELECT * FROM tbl_pc WHERE c1='100'; + id | c1 +-----+----- + 100 | 100 +(1 row) + +EXPLAIN(COSTS off) SELECT * FROM tbl_pc WHERE c1='100'; + QUERY PLAN +---------------------------------------- + Index Scan using tbl_pc_idx1 on tbl_pc + Index Cond: (c1 = '100'::text) +(2 rows) + +DROP INDEX tbl_pc_idx1; +-- brin index does not support compression +CREATE INDEX tbl_pc_idx1 on tbl_pc USING brin(c1) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); -- fail +ERROR: unrecognized parameter "compresstype" +DROP TABLE tbl_pc; +RESET enable_seqscan; +-- +-- alter table and index +-- +-- ALTER TABLE +-- ALTER compresstype and compress_chunk_size currently is not supported +CREATE TABLE tbl_pc(id int, c1 text); +ALTER TABLE tbl_pc SET(compresstype=zstd); -- fail +ERROR: change compresstype OPTION is not supported +DROP TABLE tbl_pc; +CREATE TABLE tbl_pc(id int PRIMARY KEY, c1 text) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +ALTER TABLE tbl_pc SET(compresstype=none); -- fail +ERROR: change compresstype OPTION is not supported +ALTER TABLE tbl_pc SET(compress_chunk_size=2048); -- fail +ERROR: change compress_chunk_size OPTION is not supported +ALTER TABLE tbl_pc SET(compress_prealloc_chunks=8); -- fail +ERROR: value 8 out of bounds for option "compress_prealloc_chunks" +DETAIL: Valid values are between "0" and "7". +ALTER TABLE tbl_pc SET(compresslevel=2, compress_prealloc_chunks=0); -- ok +\d+ tbl_pc + Table "public.tbl_pc" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | not null | | plain | | + c1 | text | | | | extended | | +Indexes: + "tbl_pc_pkey" PRIMARY KEY, btree (id) +Options: compresstype=zstd, compress_chunk_size=1024, compresslevel=2, compress_prealloc_chunks=0 + +ALTER TABLE tbl_pc RESET(compresstype); -- fail +ERROR: change compresstype OPTION is not supported +ALTER TABLE tbl_pc RESET(compress_chunk_size); -- fail +ERROR: change compress_chunk_size OPTION is not supported +ALTER TABLE tbl_pc RESET(compresslevel); -- ok +ALTER TABLE tbl_pc RESET(compress_prealloc_chunks); -- ok +\d+ tbl_pc + Table "public.tbl_pc" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | not null | | plain | | + c1 | text | | | | extended | | +Indexes: + "tbl_pc_pkey" PRIMARY KEY, btree (id) +Options: compresstype=zstd, compress_chunk_size=1024 + +CHECKPOINT; +SELECT count(*) FROM tbl_pc; + count +------- + 0 +(1 row) + +-- ALTER INDEX +-- ALTER compresstype and compress_chunk_size currently is not supported +CREATE INDEX tbl_pc_idx1 on tbl_pc USING btree(c1); +ALTER INDEX tbl_pc_idx1 SET(compresstype=zstd); -- fail +ERROR: change compresstype OPTION is not supported +DROP INDEX tbl_pc_idx1; +CREATE INDEX tbl_pc_idx1 on tbl_pc USING btree(c1) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +ALTER INDEX tbl_pc_idx1 SET(compresstype=none); -- fail +ERROR: change compresstype OPTION is not supported +ALTER INDEX tbl_pc_idx1 SET(compress_chunk_size=2048); -- fail +ERROR: change compress_chunk_size OPTION is not supported +ALTER INDEX tbl_pc_idx1 SET(compress_prealloc_chunks=8); -- fail +ERROR: value 8 out of bounds for option "compress_prealloc_chunks" +DETAIL: Valid values are between "0" and "7". +ALTER INDEX tbl_pc_idx1 SET(compresslevel=2, compress_prealloc_chunks=0); -- ok +\d+ tbl_pc + Table "public.tbl_pc" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | not null | | plain | | + c1 | text | | | | extended | | +Indexes: + "tbl_pc_pkey" PRIMARY KEY, btree (id) + "tbl_pc_idx1" btree (c1) WITH (compresstype=zstd, compress_chunk_size='1024', compresslevel='2', compress_prealloc_chunks='0') +Options: compresstype=zstd, compress_chunk_size=1024 + +ALTER INDEX tbl_pc_idx1 RESET(compresstype); -- fail +ERROR: change compresstype OPTION is not supported +ALTER INDEX tbl_pc_idx1 RESET(compress_chunk_size); -- fail +ERROR: change compress_chunk_size OPTION is not supported +ALTER INDEX tbl_pc_idx1 RESET(compresslevel); -- ok +ALTER INDEX tbl_pc_idx1 RESET(compress_prealloc_chunks); -- ok +\d+ tbl_pc + Table "public.tbl_pc" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | not null | | plain | | + c1 | text | | | | extended | | +Indexes: + "tbl_pc_pkey" PRIMARY KEY, btree (id) + "tbl_pc_idx1" btree (c1) WITH (compresstype=zstd, compress_chunk_size='1024') +Options: compresstype=zstd, compress_chunk_size=1024 + +CHECKPOINT; +SELECT * FROM tbl_pc WHERE c1='100'; + id | c1 +----+---- +(0 rows) + +EXPLAIN(COSTS off) SELECT * FROM tbl_pc WHERE c1='100'; + QUERY PLAN +---------------------------------------- + Bitmap Heap Scan on tbl_pc + Recheck Cond: (c1 = '100'::text) + -> Bitmap Index Scan on tbl_pc_idx1 + Index Cond: (c1 = '100'::text) +(4 rows) + +-- alter hash index +CREATE INDEX tbl_pc_idx_hash on tbl_pc USING hash(c1) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +ALTER INDEX tbl_pc_idx_hash SET(compresstype=none); -- fail +ERROR: change compresstype OPTION is not supported +ALTER INDEX tbl_pc_idx_hash SET(compress_chunk_size=2048); -- fail +ERROR: change compress_chunk_size OPTION is not supported +ALTER INDEX tbl_pc_idx_hash SET(compress_prealloc_chunks=8); -- fail +ERROR: value 8 out of bounds for option "compress_prealloc_chunks" +DETAIL: Valid values are between "0" and "7". +ALTER INDEX tbl_pc_idx_hash SET(compresslevel=2, compress_prealloc_chunks=0); -- ok +\d+ tbl_pc_idx_hash + Index "public.tbl_pc_idx_hash" + Column | Type | Key? | Definition | Storage | Stats target +--------+---------+------+------------+---------+-------------- + c1 | integer | yes | c1 | plain | +hash, for table "public.tbl_pc" +Options: compresstype=zstd, compress_chunk_size=1024, compresslevel=2, compress_prealloc_chunks=0 + +ALTER INDEX tbl_pc_idx_hash RESET(compresstype); -- fail +ERROR: change compresstype OPTION is not supported +ALTER INDEX tbl_pc_idx_hash RESET(compress_chunk_size); -- fail +ERROR: change compress_chunk_size OPTION is not supported +ALTER INDEX tbl_pc_idx_hash RESET(compresslevel); -- ok +ALTER INDEX tbl_pc_idx_hash RESET(compress_prealloc_chunks); -- ok +\d+ tbl_pc_idx_hash + Index "public.tbl_pc_idx_hash" + Column | Type | Key? | Definition | Storage | Stats target +--------+---------+------+------------+---------+-------------- + c1 | integer | yes | c1 | plain | +hash, for table "public.tbl_pc" +Options: compresstype=zstd, compress_chunk_size=1024 + +-- alter gin index +CREATE INDEX tbl_pc_idx_gin on tbl_pc USING gin((ARRAY[id])) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +ALTER INDEX tbl_pc_idx_gin SET(compresstype=none); -- fail +ERROR: change compresstype OPTION is not supported +ALTER INDEX tbl_pc_idx_gin SET(compress_chunk_size=2048); -- fail +ERROR: change compress_chunk_size OPTION is not supported +ALTER INDEX tbl_pc_idx_gin SET(compress_prealloc_chunks=8); -- fail +ERROR: value 8 out of bounds for option "compress_prealloc_chunks" +DETAIL: Valid values are between "0" and "7". +ALTER INDEX tbl_pc_idx_gin SET(compresslevel=2, compress_prealloc_chunks=0); -- ok +\d+ tbl_pc_idx_gin + Index "public.tbl_pc_idx_gin" + Column | Type | Key? | Definition | Storage | Stats target +--------+---------+------+-------------+---------+-------------- + array | integer | yes | (ARRAY[id]) | plain | +gin, for table "public.tbl_pc" +Options: compresstype=zstd, compress_chunk_size=1024, compresslevel=2, compress_prealloc_chunks=0 + +ALTER INDEX tbl_pc_idx_gin RESET(compresstype); -- fail +ERROR: change compresstype OPTION is not supported +ALTER INDEX tbl_pc_idx_gin RESET(compress_chunk_size); -- fail +ERROR: change compress_chunk_size OPTION is not supported +ALTER INDEX tbl_pc_idx_gin RESET(compresslevel); -- ok +ALTER INDEX tbl_pc_idx_gin RESET(compress_prealloc_chunks); -- ok +\d+ tbl_pc_idx_gin + Index "public.tbl_pc_idx_gin" + Column | Type | Key? | Definition | Storage | Stats target +--------+---------+------+-------------+---------+-------------- + array | integer | yes | (ARRAY[id]) | plain | +gin, for table "public.tbl_pc" +Options: compresstype=zstd, compress_chunk_size=1024 + +-- alter gist index +CREATE INDEX tbl_pc_idx_gist on tbl_pc USING gist((point(id,id))) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +ALTER INDEX tbl_pc_idx_gist SET(compresstype=none); -- fail +ERROR: change compresstype OPTION is not supported +ALTER INDEX tbl_pc_idx_gist SET(compress_chunk_size=2048); -- fail +ERROR: change compress_chunk_size OPTION is not supported +ALTER INDEX tbl_pc_idx_gist SET(compress_prealloc_chunks=8); -- fail +ERROR: value 8 out of bounds for option "compress_prealloc_chunks" +DETAIL: Valid values are between "0" and "7". +ALTER INDEX tbl_pc_idx_gist SET(compresslevel=2, compress_prealloc_chunks=0); -- ok +\d+ tbl_pc_idx_gist + Index "public.tbl_pc_idx_gist" + Column | Type | Key? | Definition | Storage | Stats target +--------+------+------+---------------------------------------------------+---------+-------------- + point | box | yes | point(id::double precision, id::double precision) | plain | +gist, for table "public.tbl_pc" +Options: compresstype=zstd, compress_chunk_size=1024, compresslevel=2, compress_prealloc_chunks=0 + +ALTER INDEX tbl_pc_idx_gist RESET(compresstype); -- fail +ERROR: change compresstype OPTION is not supported +ALTER INDEX tbl_pc_idx_gist RESET(compress_chunk_size); -- fail +ERROR: change compress_chunk_size OPTION is not supported +ALTER INDEX tbl_pc_idx_gist RESET(compresslevel); -- ok +ALTER INDEX tbl_pc_idx_gist RESET(compress_prealloc_chunks); -- ok +\d+ tbl_pc_idx_gist + Index "public.tbl_pc_idx_gist" + Column | Type | Key? | Definition | Storage | Stats target +--------+------+------+---------------------------------------------------+---------+-------------- + point | box | yes | point(id::double precision, id::double precision) | plain | +gist, for table "public.tbl_pc" +Options: compresstype=zstd, compress_chunk_size=1024 + +-- alter spgist index +CREATE INDEX tbl_pc_idx_spgist on tbl_pc USING spgist(c1) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +ALTER INDEX tbl_pc_idx_spgist SET(compresstype=none); -- fail +ERROR: change compresstype OPTION is not supported +ALTER INDEX tbl_pc_idx_spgist SET(compress_chunk_size=2048); -- fail +ERROR: change compress_chunk_size OPTION is not supported +ALTER INDEX tbl_pc_idx_spgist SET(compress_prealloc_chunks=8); -- fail +ERROR: value 8 out of bounds for option "compress_prealloc_chunks" +DETAIL: Valid values are between "0" and "7". +ALTER INDEX tbl_pc_idx_spgist SET(compresslevel=2, compress_prealloc_chunks=0); -- ok +\d+ tbl_pc_idx_spgist + Index "public.tbl_pc_idx_spgist" + Column | Type | Key? | Definition | Storage | Stats target +--------+------+------+------------+----------+-------------- + c1 | text | yes | c1 | extended | +spgist, for table "public.tbl_pc" +Options: compresstype=zstd, compress_chunk_size=1024, compresslevel=2, compress_prealloc_chunks=0 + +ALTER INDEX tbl_pc_idx_spgist RESET(compresstype); -- fail +ERROR: change compresstype OPTION is not supported +ALTER INDEX tbl_pc_idx_spgist RESET(compress_chunk_size); -- fail +ERROR: change compress_chunk_size OPTION is not supported +ALTER INDEX tbl_pc_idx_spgist RESET(compresslevel); -- ok +ALTER INDEX tbl_pc_idx_spgist RESET(compress_prealloc_chunks); -- ok +\d+ tbl_pc_idx_spgist + Index "public.tbl_pc_idx_spgist" + Column | Type | Key? | Definition | Storage | Stats target +--------+------+------+------------+----------+-------------- + c1 | text | yes | c1 | extended | +spgist, for table "public.tbl_pc" +Options: compresstype=zstd, compress_chunk_size=1024 + +-- alter brin index (do not support compression) +CREATE INDEX tbl_pc_idx_brin on tbl_pc USING brin(c1); +ALTER INDEX tbl_pc_idx_brin SET(compress_prealloc_chunks=3); -- fail +ERROR: unrecognized parameter "compress_prealloc_chunks" +DROP TABLE tbl_pc; +-- +-- partitioned table and index +-- +-- partition table does not support compression, but index of partition table and its child tables can use compression +CREATE TABLE tbl_pc_part (id int, c1 text) PARTITION BY RANGE (id) WITH(compresstype=zstd); -- fail +ERROR: unrecognized parameter "compresstype" +CREATE TABLE tbl_pc_part (id int, c1 text) PARTITION BY RANGE (id); +CREATE TABLE tbl_pc_part_1 PARTITION OF tbl_pc_part FOR VALUES FROM (1) TO (1001); +CREATE TABLE tbl_pc_part_2 PARTITION OF tbl_pc_part FOR VALUES FROM (1001) TO (2001) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +CREATE INDEX part_id_idx ON tbl_pc_part(id) WITH(compresstype=zstd, compresslevel=2, compress_chunk_size=1024, compress_prealloc_chunks=2); +CREATE TABLE tbl_pc_part_3 PARTITION OF tbl_pc_part FOR VALUES FROM (2001) TO (3001); +CREATE INDEX part3_id_idx1 ON tbl_pc_part_3(id) WITH(compresstype=zstd, compresslevel=2, compress_chunk_size=1024, compress_prealloc_chunks=2); +\d+ tbl_pc_part + Partitioned table "public.tbl_pc_part" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | | | plain | | + c1 | text | | | | extended | | +Partition key: RANGE (id) +Indexes: + "part_id_idx" btree (id) WITH (compresstype=zstd, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') +Partitions: tbl_pc_part_1 FOR VALUES FROM (1) TO (1001), + tbl_pc_part_2 FOR VALUES FROM (1001) TO (2001), + tbl_pc_part_3 FOR VALUES FROM (2001) TO (3001) + +\d+ part_id_idx + Partitioned index "public.part_id_idx" + Column | Type | Key? | Definition | Storage | Stats target +--------+---------+------+------------+---------+-------------- + id | integer | yes | id | plain | +btree, for table "public.tbl_pc_part" +Partitions: tbl_pc_part_1_id_idx, + tbl_pc_part_2_id_idx, + tbl_pc_part_3_id_idx +Options: compresstype=zstd, compresslevel=2, compress_chunk_size=1024, compress_prealloc_chunks=2 + +\d+ tbl_pc_part_1 + Table "public.tbl_pc_part_1" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | | | plain | | + c1 | text | | | | extended | | +Partition of: tbl_pc_part FOR VALUES FROM (1) TO (1001) +Partition constraint: ((id IS NOT NULL) AND (id >= 1) AND (id < 1001)) +Indexes: + "tbl_pc_part_1_id_idx" btree (id) WITH (compresstype=zstd, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') + +\d+ tbl_pc_part_2 + Table "public.tbl_pc_part_2" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | | | plain | | + c1 | text | | | | extended | | +Partition of: tbl_pc_part FOR VALUES FROM (1001) TO (2001) +Partition constraint: ((id IS NOT NULL) AND (id >= 1001) AND (id < 2001)) +Indexes: + "tbl_pc_part_2_id_idx" btree (id) WITH (compresstype=zstd, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') +Options: compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2 + +\d+ tbl_pc_part_3 + Table "public.tbl_pc_part_3" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | | | plain | | + c1 | text | | | | extended | | +Partition of: tbl_pc_part FOR VALUES FROM (2001) TO (3001) +Partition constraint: ((id IS NOT NULL) AND (id >= 2001) AND (id < 3001)) +Indexes: + "part3_id_idx1" btree (id) WITH (compresstype=zstd, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') + "tbl_pc_part_3_id_idx" btree (id) WITH (compresstype=zstd, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') + +INSERT INTO tbl_pc_part SELECT id, id::text FROM generate_series(1,3000)id; +CHECKPOINT; +SELECT count(*) FROM tbl_pc_part; + count +------- + 3000 +(1 row) + +SELECT * FROM tbl_pc_part WHERE id=100; + id | c1 +-----+----- + 100 | 100 +(1 row) + +SELECT * FROM tbl_pc_part WHERE id=1100; + id | c1 +------+------ + 1100 | 1100 +(1 row) + +SELECT * FROM tbl_pc_part WHERE id=2100; + id | c1 +------+------ + 2100 | 2100 +(1 row) + +ALTER TABLE tbl_pc_part SET(compresstype=zstd); -- fail +ERROR: unrecognized parameter "compresstype" +ALTER TABLE tbl_pc_part_1 SET(compresstype=zstd); -- fail +ERROR: change compresstype OPTION is not supported +ALTER TABLE tbl_pc_part_2 SET(compresstype=none); -- fail +ERROR: change compresstype OPTION is not supported +ALTER TABLE tbl_pc_part_2 SET(compress_chunk_size=2048); -- fail +ERROR: change compress_chunk_size OPTION is not supported +ALTER TABLE tbl_pc_part_2 SET(compress_prealloc_chunks=8); -- fail +ERROR: value 8 out of bounds for option "compress_prealloc_chunks" +DETAIL: Valid values are between "0" and "7". +ALTER TABLE tbl_pc_part_2 SET(compresslevel=2, compress_prealloc_chunks=0); -- ok +\d+ tbl_pc_part_2 + Table "public.tbl_pc_part_2" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | | | plain | | + c1 | text | | | | extended | | +Partition of: tbl_pc_part FOR VALUES FROM (1001) TO (2001) +Partition constraint: ((id IS NOT NULL) AND (id >= 1001) AND (id < 2001)) +Indexes: + "tbl_pc_part_2_id_idx" btree (id) WITH (compresstype=zstd, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') +Options: compresstype=zstd, compress_chunk_size=1024, compresslevel=2, compress_prealloc_chunks=0 + +ALTER INDEX part3_id_idx1 SET(compresstype=none); -- fail +ERROR: change compresstype OPTION is not supported +ALTER INDEX part3_id_idx1 SET(compress_chunk_size=2048); -- fail +ERROR: change compress_chunk_size OPTION is not supported +ALTER INDEX part3_id_idx1 SET(compress_prealloc_chunks=8); -- fail +ERROR: value 8 out of bounds for option "compress_prealloc_chunks" +DETAIL: Valid values are between "0" and "7". +ALTER INDEX part3_id_idx1 SET(compresslevel=2, compress_prealloc_chunks=0); -- ok +\d+ part3_id_idx1 + Index "public.part3_id_idx1" + Column | Type | Key? | Definition | Storage | Stats target +--------+---------+------+------------+---------+-------------- + id | integer | yes | id | plain | +btree, for table "public.tbl_pc_part_3" +Options: compresstype=zstd, compress_chunk_size=1024, compresslevel=2, compress_prealloc_chunks=0 + +ALTER INDEX part_id_idx SET(compresstype=zstd); -- fail +ERROR: "part_id_idx" is not a table, view, materialized view, or index +ALTER INDEX part_id_idx SET(compress_chunk_size=2048); -- fail +ERROR: "part_id_idx" is not a table, view, materialized view, or index +ALTER INDEX part_id_idx SET(compress_prealloc_chunks=8); -- fail +ERROR: "part_id_idx" is not a table, view, materialized view, or index +ALTER INDEX part_id_idx SET(compresslevel=2, compress_prealloc_chunks=0); -- fail +ERROR: "part_id_idx" is not a table, view, materialized view, or index +\d+ tbl_pc_part + Partitioned table "public.tbl_pc_part" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | | | plain | | + c1 | text | | | | extended | | +Partition key: RANGE (id) +Indexes: + "part_id_idx" btree (id) WITH (compresstype=zstd, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') +Partitions: tbl_pc_part_1 FOR VALUES FROM (1) TO (1001), + tbl_pc_part_2 FOR VALUES FROM (1001) TO (2001), + tbl_pc_part_3 FOR VALUES FROM (2001) TO (3001) + +INSERT INTO tbl_pc_part SELECT id, id::text FROM generate_series(1,3000)id; +CHECKPOINT; +SELECT count(*) FROM tbl_pc_part; + count +------- + 6000 +(1 row) + +SELECT * FROM tbl_pc_part WHERE id=100; + id | c1 +-----+----- + 100 | 100 + 100 | 100 +(2 rows) + +SELECT * FROM tbl_pc_part WHERE id=1100; + id | c1 +------+------ + 1100 | 1100 + 1100 | 1100 +(2 rows) + +SELECT * FROM tbl_pc_part WHERE id=2100; + id | c1 +------+------ + 2100 | 2100 + 2100 | 2100 +(2 rows) + +DROP TABLE tbl_pc_part; +-- +-- default tablespace store parameter +-- +-- can not use compression on global tablespace +ALTER TABLESPACE pg_default SET(default_compresstype=xxx); -- fail +ERROR: invalid value for enum option "default_compresstype": xxx +DETAIL: Valid values are "none", "pglz" and "zstd". +ALTER TABLESPACE pg_default SET(default_compress_chunk_size=1023); -- fail +ERROR: value 1023 out of bounds for option "default_compress_chunk_size" +DETAIL: Valid values are between "1024" and "4096". +ALTER TABLESPACE pg_default SET(default_compress_chunk_size=4097); -- fail +ERROR: value 4097 out of bounds for option "default_compress_chunk_size" +DETAIL: Valid values are between "1024" and "4096". +ALTER TABLESPACE pg_default SET(default_compress_prealloc_chunks=-1); -- fail +ERROR: value -1 out of bounds for option "default_compress_prealloc_chunks" +DETAIL: Valid values are between "0" and "7". +ALTER TABLESPACE pg_default SET(default_compress_prealloc_chunks=8); -- fail +ERROR: value 8 out of bounds for option "default_compress_prealloc_chunks" +DETAIL: Valid values are between "0" and "7". +ALTER TABLESPACE pg_default SET(default_compresstype=zstd, default_compresslevel=2, default_compress_chunk_size=1024, default_compress_prealloc_chunks=2); + -- ok +-- table and index(btree,hash,gin,gist,spgist) inherit default compression options from it's tablespace +CREATE TABLE tbl_pc(id int PRIMARY KEY, c1 text); +CREATE INDEX tbl_pc_idx_btree on tbl_pc(c1); +CREATE INDEX tbl_pc_idx_hash on tbl_pc USING hash(c1); +CREATE INDEX tbl_pc_idx_gin on tbl_pc USING gin((ARRAY[id])); +CREATE INDEX tbl_pc_idx_gist on tbl_pc USING gist((point(id,id))); +CREATE INDEX tbl_pc_idx_spgist on tbl_pc USING spgist(c1); +CREATE INDEX tbl_pc_idx_brin on tbl_pc USING brin(c1); +\d+ tbl_pc + Table "public.tbl_pc" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | not null | | plain | | + c1 | text | | | | extended | | +Indexes: + "tbl_pc_pkey" PRIMARY KEY, btree (id) WITH (compresstype=zstd, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') + "tbl_pc_idx_brin" brin (c1) + "tbl_pc_idx_btree" btree (c1) WITH (compresstype=zstd, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') + "tbl_pc_idx_gin" gin ((ARRAY[id])) WITH (compresstype=zstd, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') + "tbl_pc_idx_gist" gist (point(id::double precision, id::double precision)) WITH (compresstype=zstd, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') + "tbl_pc_idx_hash" hash (c1) WITH (compresstype=zstd, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') + "tbl_pc_idx_spgist" spgist (c1) WITH (compresstype=zstd, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') +Options: compresstype=zstd, compresslevel=2, compress_chunk_size=1024, compress_prealloc_chunks=2 + +SELECT size FROM pg_stat_file(pg_relation_filepath('tbl_pc') || '_pca'); + size +--------- + 4794000 +(1 row) + +SELECT size FROM pg_stat_file(pg_relation_filepath('tbl_pc_idx_btree') || '_pca'); + size +--------- + 4794000 +(1 row) + +SELECT size FROM pg_stat_file(pg_relation_filepath('tbl_pc_idx_hash') || '_pca'); + size +--------- + 4794000 +(1 row) + +SELECT size FROM pg_stat_file(pg_relation_filepath('tbl_pc_idx_gin') || '_pca'); + size +--------- + 4794000 +(1 row) + +SELECT size FROM pg_stat_file(pg_relation_filepath('tbl_pc_idx_gist') || '_pca'); + size +--------- + 4794000 +(1 row) + +SELECT size FROM pg_stat_file(pg_relation_filepath('tbl_pc_idx_spgist') || '_pca'); + size +--------- + 4794000 +(1 row) + +SELECT size FROM pg_stat_file(pg_relation_filepath('tbl_pc_idx_brin') || '_pca', true); + size +------ + +(1 row) + +-- toast relation will not be compressed +SELECT reltoastrelid FROM pg_class WHERE oid='tbl_pc'::regclass \gset +SELECT reloptions FROM pg_class where oid=:reltoastrelid; + reloptions +------------ + +(1 row) + +SELECT size FROM pg_stat_file(pg_relation_filepath(:reltoastrelid) || '_pca', true); + size +------ + +(1 row) + +CREATE TABLE tbl_pc1 AS SELECT * FROM tbl_pc; +\d+ tbl_pc1 + Table "public.tbl_pc1" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | | | plain | | + c1 | text | | | | extended | | +Options: compresstype=zstd, compresslevel=2, compress_chunk_size=1024, compress_prealloc_chunks=2 + +ALTER TABLESPACE pg_default RESET(default_compresstype, default_compresslevel, default_compress_chunk_size, default_compress_prealloc_chunks); +CREATE INDEX tbl_pc_idx2 on tbl_pc(c1); +\d+ tbl_pc + Table "public.tbl_pc" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | not null | | plain | | + c1 | text | | | | extended | | +Indexes: + "tbl_pc_pkey" PRIMARY KEY, btree (id) WITH (compresstype=zstd, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') + "tbl_pc_idx2" btree (c1) + "tbl_pc_idx_brin" brin (c1) + "tbl_pc_idx_btree" btree (c1) WITH (compresstype=zstd, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') + "tbl_pc_idx_gin" gin ((ARRAY[id])) WITH (compresstype=zstd, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') + "tbl_pc_idx_gist" gist (point(id::double precision, id::double precision)) WITH (compresstype=zstd, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') + "tbl_pc_idx_hash" hash (c1) WITH (compresstype=zstd, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') + "tbl_pc_idx_spgist" spgist (c1) WITH (compresstype=zstd, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') +Options: compresstype=zstd, compresslevel=2, compress_chunk_size=1024, compress_prealloc_chunks=2 + +CREATE TABLE tbl_pc2(LIKE tbl_pc); +\d+ tbl_pc2 + Table "public.tbl_pc2" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | not null | | plain | | + c1 | text | | | | extended | | + +CREATE TABLE tbl_pc3(LIKE tbl_pc INCLUDING ALL); +\d+ tbl_pc3 + Table "public.tbl_pc3" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | not null | | plain | | + c1 | text | | | | extended | | +Indexes: + "tbl_pc3_pkey" PRIMARY KEY, btree (id) WITH (compresstype=zstd, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') + "tbl_pc3_array_idx" gin ((ARRAY[id])) WITH (compresstype=zstd, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') + "tbl_pc3_c1_idx" btree (c1) WITH (compresstype=zstd, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') + "tbl_pc3_c1_idx1" hash (c1) WITH (compresstype=zstd, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') + "tbl_pc3_c1_idx2" spgist (c1) WITH (compresstype=zstd, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') + "tbl_pc3_c1_idx3" brin (c1) + "tbl_pc3_c1_idx4" btree (c1) + "tbl_pc3_point_idx" gist (point(id::double precision, id::double precision)) WITH (compresstype=zstd, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') + +CREATE TABLE tbl_pc4 AS SELECT * FROM tbl_pc; +\d+ tbl_pc4 + Table "public.tbl_pc4" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | | | plain | | + c1 | text | | | | extended | | + +DROP TABLE tbl_pc; +DROP TABLE tbl_pc1; +DROP TABLE tbl_pc2; +DROP TABLE tbl_pc3; +DROP TABLE tbl_pc4; +ALTER TABLESPACE pg_default SET(default_compresstype=zstd, default_compresslevel=2, default_compress_chunk_size=1024, default_compress_prealloc_chunks=2); +CREATE TABLE tbl_pc(id int PRIMARY KEY, c1 text) WITH(compresstype=none); +CREATE INDEX tbl_pc_idx1 on tbl_pc(c1) WITH(compresstype=none); +\d+ tbl_pc + Table "public.tbl_pc" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | not null | | plain | | + c1 | text | | | | extended | | +Indexes: + "tbl_pc_pkey" PRIMARY KEY, btree (id) WITH (compresstype=zstd, compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2') + "tbl_pc_idx1" btree (c1) WITH (compresslevel='2', compress_chunk_size='1024', compress_prealloc_chunks='2', compresstype='none') +Options: compresslevel=2, compress_chunk_size=1024, compress_prealloc_chunks=2, compresstype=none + +INSERT INTO tbl_pc SELECT id, id::text FROM generate_series(1,1000)id; +CHECKPOINT; +SELECT count(*) FROM tbl_pc; + count +------- + 1000 +(1 row) + +SELECT * FROM tbl_pc WHERE c1='100'; + id | c1 +-----+----- + 100 | 100 +(1 row) + +DROP TABLE tbl_pc; +-- tablespace & partitioned table +CREATE TABLE tbl_pc_part (id int, c1 text) PARTITION BY RANGE (id); +CREATE TABLE tbl_pc_part_1 PARTITION OF tbl_pc_part FOR VALUES FROM (1) TO (1001); +CREATE TABLE tbl_pc_part_2 PARTITION OF tbl_pc_part FOR VALUES FROM (1001) TO (2001) WITH(compresstype=zstd); +CREATE INDEX part_id_idx ON tbl_pc_part(id) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024); +CREATE TABLE tbl_pc_part_3 PARTITION OF tbl_pc_part FOR VALUES FROM (2001) TO (3001) WITH(compresstype=none); +CREATE INDEX part3_id_idx1 ON tbl_pc_part_3(id) WITH(compresstype=zstd, compress_chunk_size=1024, compress_prealloc_chunks=3); +\d+ tbl_pc_part + Partitioned table "public.tbl_pc_part" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | | | plain | | + c1 | text | | | | extended | | +Partition key: RANGE (id) +Indexes: + "part_id_idx" btree (id) WITH (compress_prealloc_chunks='2', compresstype=zstd, compresslevel='1', compress_chunk_size='1024') +Partitions: tbl_pc_part_1 FOR VALUES FROM (1) TO (1001), + tbl_pc_part_2 FOR VALUES FROM (1001) TO (2001), + tbl_pc_part_3 FOR VALUES FROM (2001) TO (3001) + +\d+ tbl_pc_part_1 + Table "public.tbl_pc_part_1" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | | | plain | | + c1 | text | | | | extended | | +Partition of: tbl_pc_part FOR VALUES FROM (1) TO (1001) +Partition constraint: ((id IS NOT NULL) AND (id >= 1) AND (id < 1001)) +Indexes: + "tbl_pc_part_1_id_idx" btree (id) WITH (compress_prealloc_chunks='2', compresstype=zstd, compresslevel='1', compress_chunk_size='1024') +Options: compresstype=zstd, compresslevel=2, compress_chunk_size=1024, compress_prealloc_chunks=2 + +\d+ tbl_pc_part_2 + Table "public.tbl_pc_part_2" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | | | plain | | + c1 | text | | | | extended | | +Partition of: tbl_pc_part FOR VALUES FROM (1001) TO (2001) +Partition constraint: ((id IS NOT NULL) AND (id >= 1001) AND (id < 2001)) +Indexes: + "tbl_pc_part_2_id_idx" btree (id) WITH (compress_prealloc_chunks='2', compresstype=zstd, compresslevel='1', compress_chunk_size='1024') +Options: compresslevel=2, compress_chunk_size=1024, compress_prealloc_chunks=2, compresstype=zstd + +\d+ tbl_pc_part_3 + Table "public.tbl_pc_part_3" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | | | plain | | + c1 | text | | | | extended | | +Partition of: tbl_pc_part FOR VALUES FROM (2001) TO (3001) +Partition constraint: ((id IS NOT NULL) AND (id >= 2001) AND (id < 3001)) +Indexes: + "part3_id_idx1" btree (id) WITH (compresslevel='2', compresstype=zstd, compress_chunk_size='1024', compress_prealloc_chunks='3') + "tbl_pc_part_3_id_idx" btree (id) WITH (compress_prealloc_chunks='2', compresstype=zstd, compresslevel='1', compress_chunk_size='1024') +Options: compresslevel=2, compress_chunk_size=1024, compress_prealloc_chunks=2, compresstype=none + +DROP TABLE tbl_pc_part; +-- tablespace & unlogged relation +CREATE UNLOGGED TABLE tbl_pc_unlogged(id int PRIMARY KEY, c1 text); +CREATE INDEX tbl_pc_idx1_unlogged ON tbl_pc_unlogged(c1); +\d+ tbl_pc_unlogged + Unlogged table "public.tbl_pc_unlogged" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | not null | | plain | | + c1 | text | | | | extended | | +Indexes: + "tbl_pc_unlogged_pkey" PRIMARY KEY, btree (id) + "tbl_pc_idx1_unlogged" btree (c1) + +ALTER TABLE tbl_pc_unlogged SET LOGGED; +DROP TABLE tbl_pc_unlogged; +CREATE UNLOGGED TABLE tbl_pc_unlogged(id int PRIMARY KEY, c1 text) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=0); +CREATE INDEX tbl_pc_idx1_unlogged ON tbl_pc_unlogged(c1) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=1); +\d+ tbl_pc_unlogged + Unlogged table "public.tbl_pc_unlogged" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | not null | | plain | | + c1 | text | | | | extended | | +Indexes: + "tbl_pc_unlogged_pkey" PRIMARY KEY, btree (id) + "tbl_pc_idx1_unlogged" btree (c1) WITH (compresstype=zstd, compresslevel='1', compress_chunk_size='1024', compress_prealloc_chunks='1') +Options: compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=0 + +ALTER TABLE tbl_pc_unlogged SET(compresslevel=2, compress_prealloc_chunks=1); +ALTER INDEX tbl_pc_idx1_unlogged SET(compresslevel=2, compress_prealloc_chunks=1); +\d+ tbl_pc_unlogged + Unlogged table "public.tbl_pc_unlogged" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | not null | | plain | | + c1 | text | | | | extended | | +Indexes: + "tbl_pc_unlogged_pkey" PRIMARY KEY, btree (id) + "tbl_pc_idx1_unlogged" btree (c1) WITH (compresstype=zstd, compress_chunk_size='1024', compresslevel='2', compress_prealloc_chunks='1') +Options: compresstype=zstd, compress_chunk_size=1024, compresslevel=2, compress_prealloc_chunks=1 + +INSERT INTO tbl_pc_unlogged SELECT id, id::text FROM generate_series(1,1000)id; +CHECKPOINT; +SELECT count(*) FROM tbl_pc_unlogged; + count +------- + 1000 +(1 row) + +SELECT * FROM tbl_pc_unlogged WHERE c1='100'; + id | c1 +-----+----- + 100 | 100 +(1 row) + +ALTER TABLE tbl_pc_unlogged SET LOGGED; +INSERT INTO tbl_pc_unlogged SELECT id, id::text FROM generate_series(1001,2000)id; +CHECKPOINT; +SELECT count(*) FROM tbl_pc_unlogged; + count +------- + 2000 +(1 row) + +SELECT * FROM tbl_pc_unlogged WHERE c1='100'; + id | c1 +-----+----- + 100 | 100 +(1 row) + +DROP TABLE tbl_pc_unlogged; +-- tablespace & temp relation +CREATE TEMP TABLE tbl_pc_tmp(id int PRIMARY KEY, c1 text); +CREATE INDEX tbl_pc_idx1_tmp ON tbl_pc_tmp(c1); +SELECT reloptions FROM pg_class WHERE oid='tbl_pc_tmp'::regclass; + reloptions +------------ + +(1 row) + +SELECT reloptions FROM pg_class WHERE oid='tbl_pc_idx1_tmp'::regclass; + reloptions +------------ + +(1 row) + +DROP TABLE tbl_pc_tmp; +CREATE TEMP TABLE tbl_pc_tmp(id int PRIMARY KEY, c1 text) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=2048, compress_prealloc_chunks=2); +CREATE INDEX tbl_pc_idx1_tmp ON tbl_pc_tmp(c1) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=2048, compress_prealloc_chunks=1); +SELECT reloptions FROM pg_class WHERE oid='tbl_pc_tmp'::regclass; + reloptions +----------------------------------------------------------------------------------------- + {compresstype=zstd,compresslevel=1,compress_chunk_size=2048,compress_prealloc_chunks=2} +(1 row) + +SELECT reloptions FROM pg_class WHERE oid='tbl_pc_idx1_tmp'::regclass; + reloptions +----------------------------------------------------------------------------------------- + {compresstype=zstd,compresslevel=1,compress_chunk_size=2048,compress_prealloc_chunks=1} +(1 row) + +ALTER TABLE tbl_pc_tmp SET(compresslevel=2, compress_prealloc_chunks=1); +ALTER INDEX tbl_pc_idx1_tmp SET(compresslevel=2, compress_prealloc_chunks=1); +SELECT reloptions FROM pg_class WHERE oid='tbl_pc_tmp'::regclass; + reloptions +----------------------------------------------------------------------------------------- + {compresstype=zstd,compress_chunk_size=2048,compresslevel=2,compress_prealloc_chunks=1} +(1 row) + +SELECT reloptions FROM pg_class WHERE oid='tbl_pc_idx1_tmp'::regclass; + reloptions +----------------------------------------------------------------------------------------- + {compresstype=zstd,compress_chunk_size=2048,compresslevel=2,compress_prealloc_chunks=1} +(1 row) + +INSERT INTO tbl_pc_tmp SELECT id, id::text FROM generate_series(1,1000)id; +CHECKPOINT; +SELECT count(*) FROM tbl_pc_tmp; + count +------- + 1000 +(1 row) + +SELECT * FROM tbl_pc_tmp WHERE c1='100'; + id | c1 +-----+----- + 100 | 100 +(1 row) + +DROP TABLE tbl_pc_tmp; +-- tablespace & materialized view +CREATE MATERIALIZED VIEW mv_pc AS SELECT id, id::text c1 FROM generate_series(1,1000)id; +\d+ mv_pc + Materialized view "public.mv_pc" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | | | plain | | + c1 | text | | | | extended | | +View definition: + SELECT id.id, + id.id::text AS c1 + FROM generate_series(1, 1000) id(id); + +DROP MATERIALIZED VIEW mv_pc; +CREATE MATERIALIZED VIEW mv_pc WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=2048, compress_prealloc_chunks=1) + AS SELECT id, id::text c1 FROM generate_series(1,1000)id; +CREATE INDEX mv_pc_idx ON mv_pc(c1) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +\d+ mv_pc + Materialized view "public.mv_pc" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | | | plain | | + c1 | text | | | | extended | | +Indexes: + "mv_pc_idx" btree (c1) WITH (compresstype=zstd, compresslevel='1', compress_chunk_size='1024', compress_prealloc_chunks='2') +View definition: + SELECT id.id, + id.id::text AS c1 + FROM generate_series(1, 1000) id(id); +Options: compresstype=zstd, compresslevel=1, compress_chunk_size=2048, compress_prealloc_chunks=1 + +ALTER MATERIALIZED VIEW mv_pc SET(compresstype=none); -- fail +ERROR: change compresstype OPTION is not supported +ALTER MATERIALIZED VIEW mv_pc SET(compress_chunk_size=1024); -- fail +ERROR: change compress_chunk_size OPTION is not supported +ALTER MATERIALIZED VIEW mv_pc SET(compress_prealloc_chunks=8); -- fail +ERROR: value 8 out of bounds for option "compress_prealloc_chunks" +DETAIL: Valid values are between "0" and "7". +ALTER MATERIALIZED VIEW mv_pc SET(compresslevel=2, compress_prealloc_chunks=0); -- ok +\d+ mv_pc + Materialized view "public.mv_pc" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | | | plain | | + c1 | text | | | | extended | | +Indexes: + "mv_pc_idx" btree (c1) WITH (compresstype=zstd, compresslevel='1', compress_chunk_size='1024', compress_prealloc_chunks='2') +View definition: + SELECT id.id, + id.id::text AS c1 + FROM generate_series(1, 1000) id(id); +Options: compresstype=zstd, compress_chunk_size=2048, compresslevel=2, compress_prealloc_chunks=0 + +ALTER MATERIALIZED VIEW mv_pc RESET(compresstype); -- fail +ERROR: change compresstype OPTION is not supported +ALTER MATERIALIZED VIEW mv_pc RESET(compress_chunk_size); -- fail +ERROR: change compress_chunk_size OPTION is not supported +ALTER MATERIALIZED VIEW mv_pc RESET(compresslevel); -- ok +ALTER MATERIALIZED VIEW mv_pc RESET(compress_prealloc_chunks); -- ok +\d+ mv_pc + Materialized view "public.mv_pc" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+----------+--------------+------------- + id | integer | | | | plain | | + c1 | text | | | | extended | | +Indexes: + "mv_pc_idx" btree (c1) WITH (compresstype=zstd, compresslevel='1', compress_chunk_size='1024', compress_prealloc_chunks='2') +View definition: + SELECT id.id, + id.id::text AS c1 + FROM generate_series(1, 1000) id(id); +Options: compresstype=zstd, compress_chunk_size=2048 + +ALTER INDEX mv_pc_idx SET(compresstype=none); -- fail +ERROR: change compresstype OPTION is not supported +ALTER INDEX mv_pc_idx SET(compress_chunk_size=2048); -- fail +ERROR: change compress_chunk_size OPTION is not supported +ALTER INDEX mv_pc_idx SET(compress_prealloc_chunks=8); -- fail +ERROR: value 8 out of bounds for option "compress_prealloc_chunks" +DETAIL: Valid values are between "0" and "7". +ALTER INDEX mv_pc_idx SET(compresslevel=2, compress_prealloc_chunks=0); -- ok +\d+ mv_pc_idx + Index "public.mv_pc_idx" + Column | Type | Key? | Definition | Storage | Stats target +--------+------+------+------------+----------+-------------- + c1 | text | yes | c1 | extended | +btree, for table "public.mv_pc" +Options: compresstype=zstd, compress_chunk_size=1024, compresslevel=2, compress_prealloc_chunks=0 + +ALTER INDEX mv_pc_idx RESET(compresstype); -- fail +ERROR: change compresstype OPTION is not supported +ALTER INDEX mv_pc_idx RESET(compress_chunk_size); -- fail +ERROR: change compress_chunk_size OPTION is not supported +ALTER INDEX mv_pc_idx RESET(compresslevel); -- ok +ALTER INDEX mv_pc_idx RESET(compress_prealloc_chunks); -- ok +\d+ mv_pc_idx + Index "public.mv_pc_idx" + Column | Type | Key? | Definition | Storage | Stats target +--------+------+------+------------+----------+-------------- + c1 | text | yes | c1 | extended | +btree, for table "public.mv_pc" +Options: compresstype=zstd, compress_chunk_size=1024 + +CHECKPOINT; +SELECT count(*) FROM mv_pc; + count +------- + 1000 +(1 row) + +SELECT count(*) FROM mv_pc WHERE c1 = '100'; + count +------- + 1 +(1 row) + +REFRESH MATERIALIZED VIEW mv_pc; +CHECKPOINT; +SELECT count(*) FROM mv_pc; + count +------- + 1000 +(1 row) + +SELECT count(*) FROM mv_pc WHERE c1 = '100'; + count +------- + 1 +(1 row) + +DROP MATERIALIZED VIEW mv_pc; +ALTER TABLESPACE pg_default RESET(default_compresstype, default_compresslevel, default_compress_chunk_size, default_compress_prealloc_chunks); +-- +-- recycling space with vacuum +-- +CREATE TABLE tbl_pc(id int PRIMARY KEY, c1 text) WITH(compresstype=zstd, compress_chunk_size=2048, compress_prealloc_chunks=0); +CHECKPOINT; +SELECT pg_relation_size('tbl_pc') size_0 \gset +INSERT INTO tbl_pc SELECT id, id::text FROM generate_series(1,1000)id; +CHECKPOINT; +SELECT pg_relation_size('tbl_pc') size_1000 \gset +SELECT :size_1000 > :size_0; -- true + ?column? +---------- + t +(1 row) + +DELETE FROM tbl_pc WHERE id > 500; +VACUUM tbl_pc; +SELECT pg_relation_size('tbl_pc') size_500 \gset +SELECT count(*) FROM tbl_pc; + count +------- + 500 +(1 row) + +SELECT :size_500 < :size_1000; -- true + ?column? +---------- + t +(1 row) + +DELETE FROM tbl_pc WHERE id < 500; +VACUUM tbl_pc; +SELECT pg_relation_size('tbl_pc') size_1 \gset +SELECT count(*) FROM tbl_pc; + count +------- + 1 +(1 row) + +SELECT :size_1 = :size_500; -- true + ?column? +---------- + t +(1 row) + +DELETE FROM tbl_pc; +VACUUM tbl_pc; +SELECT pg_relation_size('tbl_pc') size_0_2 \gset +SELECT :size_0_2 = :size_0; + ?column? +---------- + t +(1 row) + +DROP TABLE tbl_pc; diff --git a/src/test/zstd/sql/page_compression_with_zstd.sql b/src/test/zstd/sql/page_compression_with_zstd.sql new file mode 100644 index 0000000000..7fc6c78f1d --- /dev/null +++ b/src/test/zstd/sql/page_compression_with_zstd.sql @@ -0,0 +1,525 @@ +-- +-- Page compression tests +-- + +-- +-- create compressed table +-- +CREATE TABLE tbl_pc(id int, c1 text) WITH(compresstype=zstd); +\d+ tbl_pc +DROP TABLE tbl_pc; + +CREATE TABLE tbl_pc(id int, c1 text) WITH(compresstype=zstd, compress_chunk_size=1024); +\d+ tbl_pc +DROP TABLE tbl_pc; + +CREATE TABLE tbl_pc(id int, c1 text) WITH(compresstype=zstd, compress_chunk_size=2048); +\d+ tbl_pc +DROP TABLE tbl_pc; + +CREATE TABLE tbl_pc(id int, c1 text) WITH(compresstype=zstd, compresslevel=0, compress_chunk_size=4096, compress_prealloc_chunks=0); +\d+ tbl_pc +DROP TABLE tbl_pc; + +CREATE TABLE tbl_pc(id int PRIMARY KEY, c1 text) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +\d+ tbl_pc +DROP TABLE tbl_pc; + +CREATE TABLE tbl_pc(id int PRIMARY KEY, c1 text) WITH(compresstype=zstd, compresslevel=-1, compress_chunk_size=1024, compress_prealloc_chunks=7); +\d+ tbl_pc +DROP TABLE tbl_pc; + +CREATE TABLE tbl_pc WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=7) + AS SELECT id, id::text c1 FROM generate_series(1,1000)id; +\d+ tbl_pc +DROP TABLE tbl_pc; + +CREATE TABLE tbl_pc(id int PRIMARY KEY WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2), c1 text) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +CREATE TABLE tbl_pc2(LIKE tbl_pc INCLUDING ALL); +\d+ tbl_pc +DROP TABLE tbl_pc; +DROP TABLE tbl_pc2; + +CREATE TABLE tbl_pc(id int PRIMARY KEY, c1 text) WITH(compresstype=none); +\d+ tbl_pc +DROP TABLE tbl_pc; + +-- invalid storage parameter +CREATE TABLE tbl_pc_error(id int, c1 text) WITH(compresstype=xyz); -- fail +CREATE TABLE tbl_pc_error(id int, c1 text) WITH(compresstype=zstd, compresslevel=xyz); -- fail +CREATE TABLE tbl_pc_error(id int, c1 text) WITH(compresstype=zstd, compress_chunk_size=1025); -- fail +CREATE TABLE tbl_pc_error(id int, c1 text) WITH(compresstype=zstd, compress_prealloc_chunks=8); -- fail + + +-- +-- create compressed index +-- +SET enable_seqscan = OFF; + +CREATE TABLE tbl_pc(id int PRIMARY KEY WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2), c1 text) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); + +CREATE INDEX tbl_pc_idx1 on tbl_pc(c1) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); + +\d+ tbl_pc + +INSERT INTO tbl_pc SELECT id, id::text FROM generate_series(1,1000)id; + +-- call CHECKPOINT to flush shared buffer to compressed relation file +CHECKPOINT; + +-- run ANALYZE REINDEX VACUUM and CLUSTER on compressed table and index +ANALYZE tbl_pc; + +SELECT count(*) FROM tbl_pc; +SELECT * FROM tbl_pc WHERE c1='100'; +EXPLAIN(COSTS off) SELECT * FROM tbl_pc WHERE c1='100'; + +REINDEX INDEX tbl_pc_idx1; +CHECKPOINT; +SELECT * FROM tbl_pc WHERE c1='100'; + +REINDEX TABLE tbl_pc; +CHECKPOINT; +SELECT * FROM tbl_pc WHERE c1='100'; + +VACUUM tbl_pc; +CHECKPOINT; +SELECT count(*) FROM tbl_pc; +SELECT * FROM tbl_pc WHERE c1='100'; + +VACUUM FULL tbl_pc; +CHECKPOINT; +SELECT count(*) FROM tbl_pc; +SELECT * FROM tbl_pc WHERE c1='100'; + +CLUSTER tbl_pc USING tbl_pc_idx1; +CHECKPOINT; +SELECT count(*) FROM tbl_pc; +SELECT * FROM tbl_pc WHERE c1='100'; + +DROP INDEX tbl_pc_idx1; + +-- check usage of compressed index with data +CREATE INDEX tbl_pc_idx1 on tbl_pc USING hash(c1) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +CHECKPOINT; +SELECT * FROM tbl_pc WHERE c1='100'; +EXPLAIN(COSTS off) SELECT * FROM tbl_pc WHERE c1='100'; +DROP INDEX tbl_pc_idx1; + +CREATE INDEX tbl_pc_idx1 on tbl_pc USING gin((ARRAY[id])) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +CHECKPOINT; +SELECT * FROM tbl_pc WHERE ARRAY[id] @> ARRAY[100]; +EXPLAIN(COSTS off) SELECT * FROM tbl_pc WHERE ARRAY[id] @> ARRAY[100]; +DROP INDEX tbl_pc_idx1; + +CREATE INDEX tbl_pc_idx1 on tbl_pc USING gist((point(id,id))) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +CHECKPOINT; +SELECT * FROM tbl_pc ORDER BY point(id,id) <-> point(100,100) limit 1; +EXPLAIN(COSTS off) SELECT * FROM tbl_pc ORDER BY point(id,id) <-> point(100,100) limit 1; +DROP INDEX tbl_pc_idx1; + +CREATE INDEX tbl_pc_idx1 on tbl_pc USING spgist(c1) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +CHECKPOINT; +SELECT * FROM tbl_pc WHERE c1='100'; +EXPLAIN(COSTS off) SELECT * FROM tbl_pc WHERE c1='100'; +DROP INDEX tbl_pc_idx1; + +-- brin index does not support compression +CREATE INDEX tbl_pc_idx1 on tbl_pc USING brin(c1) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); -- fail + +DROP TABLE tbl_pc; +RESET enable_seqscan; + +-- +-- alter table and index +-- + +-- ALTER TABLE +-- ALTER compresstype and compress_chunk_size currently is not supported +CREATE TABLE tbl_pc(id int, c1 text); +ALTER TABLE tbl_pc SET(compresstype=zstd); -- fail +DROP TABLE tbl_pc; + +CREATE TABLE tbl_pc(id int PRIMARY KEY, c1 text) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +ALTER TABLE tbl_pc SET(compresstype=none); -- fail +ALTER TABLE tbl_pc SET(compress_chunk_size=2048); -- fail +ALTER TABLE tbl_pc SET(compress_prealloc_chunks=8); -- fail +ALTER TABLE tbl_pc SET(compresslevel=2, compress_prealloc_chunks=0); -- ok +\d+ tbl_pc +ALTER TABLE tbl_pc RESET(compresstype); -- fail +ALTER TABLE tbl_pc RESET(compress_chunk_size); -- fail +ALTER TABLE tbl_pc RESET(compresslevel); -- ok +ALTER TABLE tbl_pc RESET(compress_prealloc_chunks); -- ok +\d+ tbl_pc +CHECKPOINT; +SELECT count(*) FROM tbl_pc; + +-- ALTER INDEX +-- ALTER compresstype and compress_chunk_size currently is not supported +CREATE INDEX tbl_pc_idx1 on tbl_pc USING btree(c1); +ALTER INDEX tbl_pc_idx1 SET(compresstype=zstd); -- fail +DROP INDEX tbl_pc_idx1; + +CREATE INDEX tbl_pc_idx1 on tbl_pc USING btree(c1) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +ALTER INDEX tbl_pc_idx1 SET(compresstype=none); -- fail +ALTER INDEX tbl_pc_idx1 SET(compress_chunk_size=2048); -- fail +ALTER INDEX tbl_pc_idx1 SET(compress_prealloc_chunks=8); -- fail +ALTER INDEX tbl_pc_idx1 SET(compresslevel=2, compress_prealloc_chunks=0); -- ok +\d+ tbl_pc +ALTER INDEX tbl_pc_idx1 RESET(compresstype); -- fail +ALTER INDEX tbl_pc_idx1 RESET(compress_chunk_size); -- fail +ALTER INDEX tbl_pc_idx1 RESET(compresslevel); -- ok +ALTER INDEX tbl_pc_idx1 RESET(compress_prealloc_chunks); -- ok +\d+ tbl_pc +CHECKPOINT; +SELECT * FROM tbl_pc WHERE c1='100'; +EXPLAIN(COSTS off) SELECT * FROM tbl_pc WHERE c1='100'; + +-- alter hash index +CREATE INDEX tbl_pc_idx_hash on tbl_pc USING hash(c1) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); + +ALTER INDEX tbl_pc_idx_hash SET(compresstype=none); -- fail +ALTER INDEX tbl_pc_idx_hash SET(compress_chunk_size=2048); -- fail +ALTER INDEX tbl_pc_idx_hash SET(compress_prealloc_chunks=8); -- fail +ALTER INDEX tbl_pc_idx_hash SET(compresslevel=2, compress_prealloc_chunks=0); -- ok +\d+ tbl_pc_idx_hash +ALTER INDEX tbl_pc_idx_hash RESET(compresstype); -- fail +ALTER INDEX tbl_pc_idx_hash RESET(compress_chunk_size); -- fail +ALTER INDEX tbl_pc_idx_hash RESET(compresslevel); -- ok +ALTER INDEX tbl_pc_idx_hash RESET(compress_prealloc_chunks); -- ok +\d+ tbl_pc_idx_hash + +-- alter gin index +CREATE INDEX tbl_pc_idx_gin on tbl_pc USING gin((ARRAY[id])) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); + +ALTER INDEX tbl_pc_idx_gin SET(compresstype=none); -- fail +ALTER INDEX tbl_pc_idx_gin SET(compress_chunk_size=2048); -- fail +ALTER INDEX tbl_pc_idx_gin SET(compress_prealloc_chunks=8); -- fail +ALTER INDEX tbl_pc_idx_gin SET(compresslevel=2, compress_prealloc_chunks=0); -- ok +\d+ tbl_pc_idx_gin +ALTER INDEX tbl_pc_idx_gin RESET(compresstype); -- fail +ALTER INDEX tbl_pc_idx_gin RESET(compress_chunk_size); -- fail +ALTER INDEX tbl_pc_idx_gin RESET(compresslevel); -- ok +ALTER INDEX tbl_pc_idx_gin RESET(compress_prealloc_chunks); -- ok +\d+ tbl_pc_idx_gin + +-- alter gist index +CREATE INDEX tbl_pc_idx_gist on tbl_pc USING gist((point(id,id))) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); + +ALTER INDEX tbl_pc_idx_gist SET(compresstype=none); -- fail +ALTER INDEX tbl_pc_idx_gist SET(compress_chunk_size=2048); -- fail +ALTER INDEX tbl_pc_idx_gist SET(compress_prealloc_chunks=8); -- fail +ALTER INDEX tbl_pc_idx_gist SET(compresslevel=2, compress_prealloc_chunks=0); -- ok +\d+ tbl_pc_idx_gist +ALTER INDEX tbl_pc_idx_gist RESET(compresstype); -- fail +ALTER INDEX tbl_pc_idx_gist RESET(compress_chunk_size); -- fail +ALTER INDEX tbl_pc_idx_gist RESET(compresslevel); -- ok +ALTER INDEX tbl_pc_idx_gist RESET(compress_prealloc_chunks); -- ok +\d+ tbl_pc_idx_gist + +-- alter spgist index +CREATE INDEX tbl_pc_idx_spgist on tbl_pc USING spgist(c1) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); + +ALTER INDEX tbl_pc_idx_spgist SET(compresstype=none); -- fail +ALTER INDEX tbl_pc_idx_spgist SET(compress_chunk_size=2048); -- fail +ALTER INDEX tbl_pc_idx_spgist SET(compress_prealloc_chunks=8); -- fail +ALTER INDEX tbl_pc_idx_spgist SET(compresslevel=2, compress_prealloc_chunks=0); -- ok +\d+ tbl_pc_idx_spgist +ALTER INDEX tbl_pc_idx_spgist RESET(compresstype); -- fail +ALTER INDEX tbl_pc_idx_spgist RESET(compress_chunk_size); -- fail +ALTER INDEX tbl_pc_idx_spgist RESET(compresslevel); -- ok +ALTER INDEX tbl_pc_idx_spgist RESET(compress_prealloc_chunks); -- ok +\d+ tbl_pc_idx_spgist + +-- alter brin index (do not support compression) +CREATE INDEX tbl_pc_idx_brin on tbl_pc USING brin(c1); +ALTER INDEX tbl_pc_idx_brin SET(compress_prealloc_chunks=3); -- fail + +DROP TABLE tbl_pc; + +-- +-- partitioned table and index +-- + +-- partition table does not support compression, but index of partition table and its child tables can use compression +CREATE TABLE tbl_pc_part (id int, c1 text) PARTITION BY RANGE (id) WITH(compresstype=zstd); -- fail + +CREATE TABLE tbl_pc_part (id int, c1 text) PARTITION BY RANGE (id); +CREATE TABLE tbl_pc_part_1 PARTITION OF tbl_pc_part FOR VALUES FROM (1) TO (1001); +CREATE TABLE tbl_pc_part_2 PARTITION OF tbl_pc_part FOR VALUES FROM (1001) TO (2001) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); + +CREATE INDEX part_id_idx ON tbl_pc_part(id) WITH(compresstype=zstd, compresslevel=2, compress_chunk_size=1024, compress_prealloc_chunks=2); + +CREATE TABLE tbl_pc_part_3 PARTITION OF tbl_pc_part FOR VALUES FROM (2001) TO (3001); + +CREATE INDEX part3_id_idx1 ON tbl_pc_part_3(id) WITH(compresstype=zstd, compresslevel=2, compress_chunk_size=1024, compress_prealloc_chunks=2); + +\d+ tbl_pc_part +\d+ part_id_idx +\d+ tbl_pc_part_1 +\d+ tbl_pc_part_2 +\d+ tbl_pc_part_3 + +INSERT INTO tbl_pc_part SELECT id, id::text FROM generate_series(1,3000)id; +CHECKPOINT; +SELECT count(*) FROM tbl_pc_part; +SELECT * FROM tbl_pc_part WHERE id=100; +SELECT * FROM tbl_pc_part WHERE id=1100; +SELECT * FROM tbl_pc_part WHERE id=2100; + +ALTER TABLE tbl_pc_part SET(compresstype=zstd); -- fail +ALTER TABLE tbl_pc_part_1 SET(compresstype=zstd); -- fail + +ALTER TABLE tbl_pc_part_2 SET(compresstype=none); -- fail +ALTER TABLE tbl_pc_part_2 SET(compress_chunk_size=2048); -- fail +ALTER TABLE tbl_pc_part_2 SET(compress_prealloc_chunks=8); -- fail +ALTER TABLE tbl_pc_part_2 SET(compresslevel=2, compress_prealloc_chunks=0); -- ok +\d+ tbl_pc_part_2 + +ALTER INDEX part3_id_idx1 SET(compresstype=none); -- fail +ALTER INDEX part3_id_idx1 SET(compress_chunk_size=2048); -- fail +ALTER INDEX part3_id_idx1 SET(compress_prealloc_chunks=8); -- fail +ALTER INDEX part3_id_idx1 SET(compresslevel=2, compress_prealloc_chunks=0); -- ok +\d+ part3_id_idx1 + +ALTER INDEX part_id_idx SET(compresstype=zstd); -- fail +ALTER INDEX part_id_idx SET(compress_chunk_size=2048); -- fail +ALTER INDEX part_id_idx SET(compress_prealloc_chunks=8); -- fail +ALTER INDEX part_id_idx SET(compresslevel=2, compress_prealloc_chunks=0); -- fail +\d+ tbl_pc_part + +INSERT INTO tbl_pc_part SELECT id, id::text FROM generate_series(1,3000)id; +CHECKPOINT; +SELECT count(*) FROM tbl_pc_part; +SELECT * FROM tbl_pc_part WHERE id=100; +SELECT * FROM tbl_pc_part WHERE id=1100; +SELECT * FROM tbl_pc_part WHERE id=2100; + +DROP TABLE tbl_pc_part; + +-- +-- default tablespace store parameter +-- + +-- can not use compression on global tablespace +ALTER TABLESPACE pg_default SET(default_compresstype=xxx); -- fail +ALTER TABLESPACE pg_default SET(default_compress_chunk_size=1023); -- fail +ALTER TABLESPACE pg_default SET(default_compress_chunk_size=4097); -- fail +ALTER TABLESPACE pg_default SET(default_compress_prealloc_chunks=-1); -- fail +ALTER TABLESPACE pg_default SET(default_compress_prealloc_chunks=8); -- fail + +ALTER TABLESPACE pg_default SET(default_compresstype=zstd, default_compresslevel=2, default_compress_chunk_size=1024, default_compress_prealloc_chunks=2); + -- ok + +-- table and index(btree,hash,gin,gist,spgist) inherit default compression options from it's tablespace +CREATE TABLE tbl_pc(id int PRIMARY KEY, c1 text); +CREATE INDEX tbl_pc_idx_btree on tbl_pc(c1); +CREATE INDEX tbl_pc_idx_hash on tbl_pc USING hash(c1); +CREATE INDEX tbl_pc_idx_gin on tbl_pc USING gin((ARRAY[id])); +CREATE INDEX tbl_pc_idx_gist on tbl_pc USING gist((point(id,id))); +CREATE INDEX tbl_pc_idx_spgist on tbl_pc USING spgist(c1); +CREATE INDEX tbl_pc_idx_brin on tbl_pc USING brin(c1); +\d+ tbl_pc + +SELECT size FROM pg_stat_file(pg_relation_filepath('tbl_pc') || '_pca'); +SELECT size FROM pg_stat_file(pg_relation_filepath('tbl_pc_idx_btree') || '_pca'); +SELECT size FROM pg_stat_file(pg_relation_filepath('tbl_pc_idx_hash') || '_pca'); +SELECT size FROM pg_stat_file(pg_relation_filepath('tbl_pc_idx_gin') || '_pca'); +SELECT size FROM pg_stat_file(pg_relation_filepath('tbl_pc_idx_gist') || '_pca'); +SELECT size FROM pg_stat_file(pg_relation_filepath('tbl_pc_idx_spgist') || '_pca'); +SELECT size FROM pg_stat_file(pg_relation_filepath('tbl_pc_idx_brin') || '_pca', true); + +-- toast relation will not be compressed +SELECT reltoastrelid FROM pg_class WHERE oid='tbl_pc'::regclass \gset +SELECT reloptions FROM pg_class where oid=:reltoastrelid; +SELECT size FROM pg_stat_file(pg_relation_filepath(:reltoastrelid) || '_pca', true); + +CREATE TABLE tbl_pc1 AS SELECT * FROM tbl_pc; +\d+ tbl_pc1 + +ALTER TABLESPACE pg_default RESET(default_compresstype, default_compresslevel, default_compress_chunk_size, default_compress_prealloc_chunks); +CREATE INDEX tbl_pc_idx2 on tbl_pc(c1); +\d+ tbl_pc + +CREATE TABLE tbl_pc2(LIKE tbl_pc); +\d+ tbl_pc2 + +CREATE TABLE tbl_pc3(LIKE tbl_pc INCLUDING ALL); +\d+ tbl_pc3 + +CREATE TABLE tbl_pc4 AS SELECT * FROM tbl_pc; +\d+ tbl_pc4 + +DROP TABLE tbl_pc; +DROP TABLE tbl_pc1; +DROP TABLE tbl_pc2; +DROP TABLE tbl_pc3; +DROP TABLE tbl_pc4; + +ALTER TABLESPACE pg_default SET(default_compresstype=zstd, default_compresslevel=2, default_compress_chunk_size=1024, default_compress_prealloc_chunks=2); + +CREATE TABLE tbl_pc(id int PRIMARY KEY, c1 text) WITH(compresstype=none); +CREATE INDEX tbl_pc_idx1 on tbl_pc(c1) WITH(compresstype=none); +\d+ tbl_pc + +INSERT INTO tbl_pc SELECT id, id::text FROM generate_series(1,1000)id; +CHECKPOINT; +SELECT count(*) FROM tbl_pc; +SELECT * FROM tbl_pc WHERE c1='100'; + +DROP TABLE tbl_pc; + +-- tablespace & partitioned table +CREATE TABLE tbl_pc_part (id int, c1 text) PARTITION BY RANGE (id); +CREATE TABLE tbl_pc_part_1 PARTITION OF tbl_pc_part FOR VALUES FROM (1) TO (1001); +CREATE TABLE tbl_pc_part_2 PARTITION OF tbl_pc_part FOR VALUES FROM (1001) TO (2001) WITH(compresstype=zstd); + +CREATE INDEX part_id_idx ON tbl_pc_part(id) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024); + +CREATE TABLE tbl_pc_part_3 PARTITION OF tbl_pc_part FOR VALUES FROM (2001) TO (3001) WITH(compresstype=none); +CREATE INDEX part3_id_idx1 ON tbl_pc_part_3(id) WITH(compresstype=zstd, compress_chunk_size=1024, compress_prealloc_chunks=3); + +\d+ tbl_pc_part +\d+ tbl_pc_part_1 +\d+ tbl_pc_part_2 +\d+ tbl_pc_part_3 + +DROP TABLE tbl_pc_part; + +-- tablespace & unlogged relation +CREATE UNLOGGED TABLE tbl_pc_unlogged(id int PRIMARY KEY, c1 text); +CREATE INDEX tbl_pc_idx1_unlogged ON tbl_pc_unlogged(c1); +\d+ tbl_pc_unlogged + +ALTER TABLE tbl_pc_unlogged SET LOGGED; +DROP TABLE tbl_pc_unlogged; + +CREATE UNLOGGED TABLE tbl_pc_unlogged(id int PRIMARY KEY, c1 text) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=0); +CREATE INDEX tbl_pc_idx1_unlogged ON tbl_pc_unlogged(c1) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=1); +\d+ tbl_pc_unlogged + +ALTER TABLE tbl_pc_unlogged SET(compresslevel=2, compress_prealloc_chunks=1); +ALTER INDEX tbl_pc_idx1_unlogged SET(compresslevel=2, compress_prealloc_chunks=1); +\d+ tbl_pc_unlogged + +INSERT INTO tbl_pc_unlogged SELECT id, id::text FROM generate_series(1,1000)id; +CHECKPOINT; +SELECT count(*) FROM tbl_pc_unlogged; +SELECT * FROM tbl_pc_unlogged WHERE c1='100'; + +ALTER TABLE tbl_pc_unlogged SET LOGGED; + +INSERT INTO tbl_pc_unlogged SELECT id, id::text FROM generate_series(1001,2000)id; +CHECKPOINT; +SELECT count(*) FROM tbl_pc_unlogged; +SELECT * FROM tbl_pc_unlogged WHERE c1='100'; + +DROP TABLE tbl_pc_unlogged; + +-- tablespace & temp relation +CREATE TEMP TABLE tbl_pc_tmp(id int PRIMARY KEY, c1 text); +CREATE INDEX tbl_pc_idx1_tmp ON tbl_pc_tmp(c1); +SELECT reloptions FROM pg_class WHERE oid='tbl_pc_tmp'::regclass; +SELECT reloptions FROM pg_class WHERE oid='tbl_pc_idx1_tmp'::regclass; + +DROP TABLE tbl_pc_tmp; + +CREATE TEMP TABLE tbl_pc_tmp(id int PRIMARY KEY, c1 text) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=2048, compress_prealloc_chunks=2); +CREATE INDEX tbl_pc_idx1_tmp ON tbl_pc_tmp(c1) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=2048, compress_prealloc_chunks=1); +SELECT reloptions FROM pg_class WHERE oid='tbl_pc_tmp'::regclass; +SELECT reloptions FROM pg_class WHERE oid='tbl_pc_idx1_tmp'::regclass; + +ALTER TABLE tbl_pc_tmp SET(compresslevel=2, compress_prealloc_chunks=1); +ALTER INDEX tbl_pc_idx1_tmp SET(compresslevel=2, compress_prealloc_chunks=1); +SELECT reloptions FROM pg_class WHERE oid='tbl_pc_tmp'::regclass; +SELECT reloptions FROM pg_class WHERE oid='tbl_pc_idx1_tmp'::regclass; + +INSERT INTO tbl_pc_tmp SELECT id, id::text FROM generate_series(1,1000)id; +CHECKPOINT; +SELECT count(*) FROM tbl_pc_tmp; +SELECT * FROM tbl_pc_tmp WHERE c1='100'; + +DROP TABLE tbl_pc_tmp; + +-- tablespace & materialized view + +CREATE MATERIALIZED VIEW mv_pc AS SELECT id, id::text c1 FROM generate_series(1,1000)id; +\d+ mv_pc +DROP MATERIALIZED VIEW mv_pc; + +CREATE MATERIALIZED VIEW mv_pc WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=2048, compress_prealloc_chunks=1) + AS SELECT id, id::text c1 FROM generate_series(1,1000)id; + +CREATE INDEX mv_pc_idx ON mv_pc(c1) WITH(compresstype=zstd, compresslevel=1, compress_chunk_size=1024, compress_prealloc_chunks=2); +\d+ mv_pc + +ALTER MATERIALIZED VIEW mv_pc SET(compresstype=none); -- fail +ALTER MATERIALIZED VIEW mv_pc SET(compress_chunk_size=1024); -- fail +ALTER MATERIALIZED VIEW mv_pc SET(compress_prealloc_chunks=8); -- fail +ALTER MATERIALIZED VIEW mv_pc SET(compresslevel=2, compress_prealloc_chunks=0); -- ok +\d+ mv_pc + +ALTER MATERIALIZED VIEW mv_pc RESET(compresstype); -- fail +ALTER MATERIALIZED VIEW mv_pc RESET(compress_chunk_size); -- fail +ALTER MATERIALIZED VIEW mv_pc RESET(compresslevel); -- ok +ALTER MATERIALIZED VIEW mv_pc RESET(compress_prealloc_chunks); -- ok +\d+ mv_pc + +ALTER INDEX mv_pc_idx SET(compresstype=none); -- fail +ALTER INDEX mv_pc_idx SET(compress_chunk_size=2048); -- fail +ALTER INDEX mv_pc_idx SET(compress_prealloc_chunks=8); -- fail +ALTER INDEX mv_pc_idx SET(compresslevel=2, compress_prealloc_chunks=0); -- ok +\d+ mv_pc_idx + +ALTER INDEX mv_pc_idx RESET(compresstype); -- fail +ALTER INDEX mv_pc_idx RESET(compress_chunk_size); -- fail +ALTER INDEX mv_pc_idx RESET(compresslevel); -- ok +ALTER INDEX mv_pc_idx RESET(compress_prealloc_chunks); -- ok +\d+ mv_pc_idx + +CHECKPOINT; +SELECT count(*) FROM mv_pc; +SELECT count(*) FROM mv_pc WHERE c1 = '100'; + +REFRESH MATERIALIZED VIEW mv_pc; +CHECKPOINT; +SELECT count(*) FROM mv_pc; +SELECT count(*) FROM mv_pc WHERE c1 = '100'; + +DROP MATERIALIZED VIEW mv_pc; + +ALTER TABLESPACE pg_default RESET(default_compresstype, default_compresslevel, default_compress_chunk_size, default_compress_prealloc_chunks); + +-- +-- recycling space with vacuum +-- +CREATE TABLE tbl_pc(id int PRIMARY KEY, c1 text) WITH(compresstype=zstd, compress_chunk_size=2048, compress_prealloc_chunks=0); +CHECKPOINT; +SELECT pg_relation_size('tbl_pc') size_0 \gset + +INSERT INTO tbl_pc SELECT id, id::text FROM generate_series(1,1000)id; +CHECKPOINT; +SELECT pg_relation_size('tbl_pc') size_1000 \gset + +SELECT :size_1000 > :size_0; -- true + +DELETE FROM tbl_pc WHERE id > 500; +VACUUM tbl_pc; +SELECT pg_relation_size('tbl_pc') size_500 \gset +SELECT count(*) FROM tbl_pc; +SELECT :size_500 < :size_1000; -- true + + +DELETE FROM tbl_pc WHERE id < 500; +VACUUM tbl_pc; +SELECT pg_relation_size('tbl_pc') size_1 \gset +SELECT count(*) FROM tbl_pc; +SELECT :size_1 = :size_500; -- true + +DELETE FROM tbl_pc; +VACUUM tbl_pc; +SELECT pg_relation_size('tbl_pc') size_0_2 \gset + +SELECT :size_0_2 = :size_0; + +DROP TABLE tbl_pc;