From b6e2dd66c5e3006b2ea7b079cab8675e968df801 Mon Sep 17 00:00:00 2001 From: dilip kumar Date: Thu, 13 Aug 2020 15:57:07 +0530 Subject: [PATCH v1 2/3] Add support for another built-in compression method (zlib) --- src/backend/access/compression/Makefile | 2 +- src/backend/access/compression/cm_zlib.c | 163 +++++++++++++++++++++++ src/include/catalog/pg_am.dat | 3 + src/include/catalog/pg_proc.dat | 4 + src/test/regress/expected/create_cm.out | 17 ++- src/test/regress/expected/psql.out | 12 +- src/test/regress/sql/create_cm.sql | 10 +- 7 files changed, 204 insertions(+), 7 deletions(-) create mode 100644 src/backend/access/compression/cm_zlib.c diff --git a/src/backend/access/compression/Makefile b/src/backend/access/compression/Makefile index 14286920d3..7ea5ee2e43 100644 --- a/src/backend/access/compression/Makefile +++ b/src/backend/access/compression/Makefile @@ -12,6 +12,6 @@ subdir = src/backend/access/compression top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global -OBJS = cm_pglz.o cmapi.o +OBJS = cm_pglz.o cm_zlib.o cmapi.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/access/compression/cm_zlib.c b/src/backend/access/compression/cm_zlib.c new file mode 100644 index 0000000000..b1ba33e90b --- /dev/null +++ b/src/backend/access/compression/cm_zlib.c @@ -0,0 +1,163 @@ +/*------------------------------------------------------------------------- + * + * cm_zlib.c + * zlib compression method + * + * Copyright (c) 2015-2018, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/access/compression/cm_zlib.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" +#include "access/cmapi.h" +#include "commands/defrem.h" +#include "nodes/parsenodes.h" +#include "utils/builtins.h" + +#ifdef HAVE_LIBZ +#include + +#define ZLIB_MAX_DICTIONARY_LENGTH 32768 +#define ZLIB_DICTIONARY_DELIM (" ,") + +typedef struct +{ + int level; + Bytef dict[ZLIB_MAX_DICTIONARY_LENGTH]; + unsigned int dictlen; +} zlib_state; + +static void * +zlib_cminitstate(Oid acoid) +{ + zlib_state *state = NULL; + + state = palloc0(sizeof(zlib_state)); + state->level = Z_DEFAULT_COMPRESSION; + + return state; +} + +static struct varlena * +zlib_cmcompress(CompressionAmOptions *cmoptions, const struct varlena *value) +{ + int32 valsize, + len; + struct varlena *tmp = NULL; + z_streamp zp; + int res; + zlib_state *state = (zlib_state *) cmoptions->acstate; + + zp = (z_streamp) palloc(sizeof(z_stream)); + zp->zalloc = Z_NULL; + zp->zfree = Z_NULL; + zp->opaque = Z_NULL; + + if (deflateInit(zp, state->level) != Z_OK) + elog(ERROR, "could not initialize compression library: %s", zp->msg); + + if (state->dictlen > 0) + { + res = deflateSetDictionary(zp, state->dict, state->dictlen); + if (res != Z_OK) + elog(ERROR, "could not set dictionary for zlib: %s", zp->msg); + } + + valsize = VARSIZE_ANY_EXHDR(DatumGetPointer(value)); + tmp = (struct varlena *) palloc(valsize + VARHDRSZ_CUSTOM_COMPRESSED); + zp->next_in = (void *) VARDATA_ANY(value); + zp->avail_in = valsize; + zp->avail_out = valsize; + zp->next_out = (void *)((char *) tmp + VARHDRSZ_CUSTOM_COMPRESSED); + + do { + res = deflate(zp, Z_FINISH); + if (res == Z_STREAM_ERROR) + elog(ERROR, "could not compress data: %s", zp->msg); + } while (zp->avail_in != 0); + + Assert(res == Z_STREAM_END); + + len = valsize - zp->avail_out; + if (deflateEnd(zp) != Z_OK) + elog(ERROR, "could not close compression stream: %s", zp->msg); + pfree(zp); + + if (len > 0) + { + SET_VARSIZE_COMPRESSED(tmp, len + VARHDRSZ_CUSTOM_COMPRESSED); + return tmp; + } + + pfree(tmp); + return NULL; +} + +static struct varlena * +zlib_cmdecompress(CompressionAmOptions *cmoptions, const struct varlena *value) +{ + struct varlena *result; + z_streamp zp; + int res = Z_OK; + zlib_state *state = (zlib_state *) cmoptions->acstate; + + zp = (z_streamp) palloc(sizeof(z_stream)); + zp->zalloc = Z_NULL; + zp->zfree = Z_NULL; + zp->opaque = Z_NULL; + + if (inflateInit(zp) != Z_OK) + elog(ERROR, "could not initialize compression library: %s", zp->msg); + + Assert(VARATT_IS_CUSTOM_COMPRESSED(value)); + zp->next_in = (void *) ((char *) value + VARHDRSZ_CUSTOM_COMPRESSED); + zp->avail_in = VARSIZE(value) - VARHDRSZ_CUSTOM_COMPRESSED; + zp->avail_out = VARRAWSIZE_4B_C(value); + + result = (struct varlena *) palloc(zp->avail_out + VARHDRSZ); + SET_VARSIZE(result, zp->avail_out + VARHDRSZ); + zp->next_out = (void *) VARDATA(result); + + while (zp->avail_in > 0) + { + res = inflate(zp, 0); + if (res == Z_NEED_DICT && state->dictlen > 0) + { + res = inflateSetDictionary(zp, state->dict, state->dictlen); + if (res != Z_OK) + elog(ERROR, "could not set dictionary for zlib"); + continue; + } + if (!(res == Z_OK || res == Z_STREAM_END)) + elog(ERROR, "could not uncompress data: %s", zp->msg); + } + + if (inflateEnd(zp) != Z_OK) + elog(ERROR, "could not close compression library: %s", zp->msg); + + pfree(zp); + return result; +} +#endif + +Datum +zlibhandler(PG_FUNCTION_ARGS) +{ +#ifndef HAVE_LIBZ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("not built with zlib support"))); +#else + CompressionAmRoutine *routine = makeNode(CompressionAmRoutine); + + routine->cminitstate = zlib_cminitstate; + routine->cmcompress = zlib_cmcompress; + routine->cmdecompress = zlib_cmdecompress; + routine->cmdecompress_slice = NULL; + + PG_RETURN_POINTER(routine); +#endif +} diff --git a/src/include/catalog/pg_am.dat b/src/include/catalog/pg_am.dat index 877aa3d7e5..53c2b498ab 100644 --- a/src/include/catalog/pg_am.dat +++ b/src/include/catalog/pg_am.dat @@ -36,5 +36,8 @@ { oid => '4225', oid_symbol => 'PGLZ_COMPRESSION_AM_OID', descr => 'pglz compression access method', amname => 'pglz', amhandler => 'pglzhandler', amtype => 'c' }, +{ oid => '4226', oid_symbol => 'ZLIB_COMPRESSION_AM_OID', + descr => 'zlib compression access method', + amname => 'zlib', amhandler => 'zlibhandler', amtype => 'c' }, ] diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index d8848f58c9..9404a24199 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -950,6 +950,10 @@ proname => 'pglzhandler', provolatile => 'v', prorettype => 'compression_am_handler', proargtypes => 'internal', prosrc => 'pglzhandler' }, +{ oid => '4389', descr => 'zlib compression access method handler', + proname => 'zlibhandler', provolatile => 'v', + prorettype => 'compression_am_handler', proargtypes => 'internal', + prosrc => 'zlibhandler' }, { oid => '338', descr => 'validate an operator class', proname => 'amvalidate', provolatile => 'v', prorettype => 'bool', diff --git a/src/test/regress/expected/create_cm.out b/src/test/regress/expected/create_cm.out index 84adf0c459..ee091bb01f 100644 --- a/src/test/regress/expected/create_cm.out +++ b/src/test/regress/expected/create_cm.out @@ -68,4 +68,19 @@ SELECT length(f1) FROM cmmove3; 10010 (2 rows) -DROP TABLE cmmove1, cmmove2, cmmove3; +-- zlib compression +CREATE TABLE zlibtest(f1 TEXT COMPRESSION zlib); +CREATE TABLE zlibtest(f1 TEXT COMPRESSION zlib); +ERROR: relation "zlibtest" already exists +CREATE TABLE zlibtest(f1 TEXT COMPRESSION zlib); +ERROR: relation "zlibtest" already exists +INSERT INTO zlibtest VALUES(repeat('1234567890',1004)); +INSERT INTO zlibtest VALUES(repeat('1234567890 one two three',1004)); +SELECT length(f1) FROM zlibtest; + length +-------- + 10040 + 24096 +(2 rows) + +DROP TABLE cmmove1, cmmove2, cmmove3, zlibtest; diff --git a/src/test/regress/expected/psql.out b/src/test/regress/expected/psql.out index 2502b23793..531b298fe3 100644 --- a/src/test/regress/expected/psql.out +++ b/src/test/regress/expected/psql.out @@ -4845,7 +4845,8 @@ List of access methods heap2 | Table pglz | spgist | Index -(9 rows) + zlib | +(10 rows) \dA * List of access methods @@ -4860,7 +4861,8 @@ List of access methods heap2 | Table pglz | spgist | Index -(9 rows) + zlib | +(10 rows) \dA h* List of access methods @@ -4897,7 +4899,8 @@ List of access methods heap2 | Table | heap_tableam_handler | pglz | | pglzhandler | pglz compression access method spgist | Index | spghandler | SP-GiST index access method -(9 rows) + zlib | | zlibhandler | zlib compression access method +(10 rows) \dA+ * List of access methods @@ -4912,7 +4915,8 @@ List of access methods heap2 | Table | heap_tableam_handler | pglz | | pglzhandler | pglz compression access method spgist | Index | spghandler | SP-GiST index access method -(9 rows) + zlib | | zlibhandler | zlib compression access method +(10 rows) \dA+ h* List of access methods diff --git a/src/test/regress/sql/create_cm.sql b/src/test/regress/sql/create_cm.sql index a9ddccdc8f..56501b45b0 100644 --- a/src/test/regress/sql/create_cm.sql +++ b/src/test/regress/sql/create_cm.sql @@ -34,4 +34,12 @@ SELECT length(f1) FROM cmmove1; SELECT length(f1) FROM cmmove2; SELECT length(f1) FROM cmmove3; -DROP TABLE cmmove1, cmmove2, cmmove3; \ No newline at end of file +-- zlib compression +CREATE TABLE zlibtest(f1 TEXT COMPRESSION zlib); +CREATE TABLE zlibtest(f1 TEXT COMPRESSION zlib); +CREATE TABLE zlibtest(f1 TEXT COMPRESSION zlib); +INSERT INTO zlibtest VALUES(repeat('1234567890',1004)); +INSERT INTO zlibtest VALUES(repeat('1234567890 one two three',1004)); +SELECT length(f1) FROM zlibtest; + +DROP TABLE cmmove1, cmmove2, cmmove3, zlibtest; -- 2.23.0