From 828229b5f6382326f4ac62ebb6ddcd2e6b4e2bc0 Mon Sep 17 00:00:00 2001 From: dilipkumar Date: Fri, 16 Oct 2020 19:56:14 +0530 Subject: [PATCH v9 5/5] new compression method extension for zlib --- contrib/Makefile | 1 + contrib/cmzlib/.gitignore | 4 + contrib/cmzlib/Makefile | 26 +++++ contrib/cmzlib/cmzlib--1.0.sql | 13 +++ contrib/cmzlib/cmzlib.control | 5 + contrib/cmzlib/expected/cmzlib.out | 45 ++++++++ contrib/cmzlib/sql/cmzlib.sql | 21 ++++ contrib/cmzlib/zlib.c | 158 +++++++++++++++++++++++++++++ 8 files changed, 273 insertions(+) create mode 100644 contrib/cmzlib/.gitignore create mode 100644 contrib/cmzlib/Makefile create mode 100644 contrib/cmzlib/cmzlib--1.0.sql create mode 100644 contrib/cmzlib/cmzlib.control create mode 100644 contrib/cmzlib/expected/cmzlib.out create mode 100644 contrib/cmzlib/sql/cmzlib.sql create mode 100644 contrib/cmzlib/zlib.c diff --git a/contrib/Makefile b/contrib/Makefile index 7a4866e338..f3a2f582bf 100644 --- a/contrib/Makefile +++ b/contrib/Makefile @@ -12,6 +12,7 @@ SUBDIRS = \ bloom \ btree_gin \ btree_gist \ + cmzlib \ citext \ cube \ dblink \ diff --git a/contrib/cmzlib/.gitignore b/contrib/cmzlib/.gitignore new file mode 100644 index 0000000000..5dcb3ff972 --- /dev/null +++ b/contrib/cmzlib/.gitignore @@ -0,0 +1,4 @@ +# Generated subdirectories +/log/ +/results/ +/tmp_check/ diff --git a/contrib/cmzlib/Makefile b/contrib/cmzlib/Makefile new file mode 100644 index 0000000000..d4281228a3 --- /dev/null +++ b/contrib/cmzlib/Makefile @@ -0,0 +1,26 @@ +# contrib/cmzlib/Makefile + +MODULE_big = cmzlib +OBJS = \ + $(WIN32RES) \ + zlib.o + +EXTENSION = cmzlib +DATA = cmzlib--1.0.sql +PGFILEDESC = "zlib compression method " + +SHLIB_LINK += $(filter -lz, $(LIBS)) + +REGRESS = cmzlib + + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = contrib/cmzlib +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/contrib/cmzlib/cmzlib--1.0.sql b/contrib/cmzlib/cmzlib--1.0.sql new file mode 100644 index 0000000000..fcdc0e7980 --- /dev/null +++ b/contrib/cmzlib/cmzlib--1.0.sql @@ -0,0 +1,13 @@ +/* contrib/cm_lz4/cmzlib--1.0.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION cmzlib" to load this file. \quit + +CREATE FUNCTION zlibhandler(internal) +RETURNS compression_handler +AS 'MODULE_PATHNAME' +LANGUAGE C; + +-- Compression method +CREATE COMPRESSION METHOD zlib HANDLER zlibhandler; +COMMENT ON COMPRESSION METHOD zlib IS 'zlib compression method'; diff --git a/contrib/cmzlib/cmzlib.control b/contrib/cmzlib/cmzlib.control new file mode 100644 index 0000000000..2eb10f3a83 --- /dev/null +++ b/contrib/cmzlib/cmzlib.control @@ -0,0 +1,5 @@ +# cm_lz4 extension +comment = 'cmzlib compression method' +default_version = '1.0' +module_pathname = '$libdir/cmzlib' +relocatable = true diff --git a/contrib/cmzlib/expected/cmzlib.out b/contrib/cmzlib/expected/cmzlib.out new file mode 100644 index 0000000000..576d99bcfa --- /dev/null +++ b/contrib/cmzlib/expected/cmzlib.out @@ -0,0 +1,45 @@ +CREATE EXTENSION cmzlib; +-- zlib compression +CREATE TABLE zlibtest(f1 TEXT COMPRESSION lz4); +INSERT INTO zlibtest VALUES(repeat('1234567890',1004)); +INSERT INTO zlibtest VALUES(repeat('1234567890 one two three',1004)); +SELECT length(f1) FROM zlibtest; + length +-------- + 10040 + 24096 +(2 rows) + +-- alter compression method with rewrite +ALTER TABLE zlibtest ALTER COLUMN f1 SET COMPRESSION lz4; +\d+ zlibtest + Table "public.zlibtest" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + f1 | text | | | | extended | lz4 | | + +ALTER TABLE zlibtest ALTER COLUMN f1 SET COMPRESSION zlib; +\d+ zlibtest + Table "public.zlibtest" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + f1 | text | | | | extended | zlib | | + +-- preserve old compression method +ALTER TABLE zlibtest ALTER COLUMN f1 SET COMPRESSION lz4 PRESERVE (zlib); +INSERT INTO zlibtest VALUES (repeat('1234567890',1004)); +\d+ zlibtest + Table "public.zlibtest" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + f1 | text | | | | extended | lz4 | | + +SELECT length(f1) FROM zlibtest; + length +-------- + 10040 + 24096 + 10040 +(3 rows) + +DROP TABLE zlibtest; diff --git a/contrib/cmzlib/sql/cmzlib.sql b/contrib/cmzlib/sql/cmzlib.sql new file mode 100644 index 0000000000..bdb59af4fc --- /dev/null +++ b/contrib/cmzlib/sql/cmzlib.sql @@ -0,0 +1,21 @@ +CREATE EXTENSION cmzlib; + +-- zlib compression +CREATE TABLE zlibtest(f1 TEXT COMPRESSION lz4); +INSERT INTO zlibtest VALUES(repeat('1234567890',1004)); +INSERT INTO zlibtest VALUES(repeat('1234567890 one two three',1004)); +SELECT length(f1) FROM zlibtest; + +-- alter compression method with rewrite +ALTER TABLE zlibtest ALTER COLUMN f1 SET COMPRESSION lz4; +\d+ zlibtest +ALTER TABLE zlibtest ALTER COLUMN f1 SET COMPRESSION zlib; +\d+ zlibtest + +-- preserve old compression method +ALTER TABLE zlibtest ALTER COLUMN f1 SET COMPRESSION lz4 PRESERVE (zlib); +INSERT INTO zlibtest VALUES (repeat('1234567890',1004)); +\d+ zlibtest +SELECT length(f1) FROM zlibtest; + +DROP TABLE zlibtest; diff --git a/contrib/cmzlib/zlib.c b/contrib/cmzlib/zlib.c new file mode 100644 index 0000000000..f24fc1c936 --- /dev/null +++ b/contrib/cmzlib/zlib.c @@ -0,0 +1,158 @@ +/*------------------------------------------------------------------------- + * + * zlib.c + * zlib compression method + * + * Copyright (c) 2015-2018, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * contrib/cmzlib/zlib.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" +#include "access/compressionapi.h" +#include "access/toast_internals.h" + +#include "fmgr.h" +#include "utils/builtins.h" + +#include + +PG_MODULE_MAGIC; + +PG_FUNCTION_INFO_V1(zlibhandler); + +void _PG_init(void); + +/* + * Module initialize function: initialize info about zlib + */ +void +_PG_init(void) +{ + +} + +#define ZLIB_MAX_DICTIONARY_LENGTH 32768 +#define ZLIB_DICTIONARY_DELIM (" ,") + +typedef struct +{ + int level; + Bytef dict[ZLIB_MAX_DICTIONARY_LENGTH]; + unsigned int dictlen; +} zlib_state; + +/* + * zlib_cmcompress - compression routine for zlib compression method + * + * Compresses source into dest using the default compression level. + * Returns the compressed varlena, or NULL if compression fails. + */ +static struct varlena * +zlib_cmcompress(const struct varlena *value, int32 header_size) +{ + int32 valsize, + len; + struct varlena *tmp = NULL; + z_streamp zp; + int res; + zlib_state state; + + state.level = Z_DEFAULT_COMPRESSION; + + zp = (z_streamp) palloc(sizeof(z_stream)); + zp->zalloc = Z_NULL; + zp->zfree = Z_NULL; + zp->opaque = Z_NULL; + + if (deflateInit(zp, state.level) != Z_OK) + elog(ERROR, "could not initialize compression library: %s", zp->msg); + + valsize = VARSIZE_ANY_EXHDR(DatumGetPointer(value)); + tmp = (struct varlena *) palloc(valsize + header_size); + zp->next_in = (void *) VARDATA_ANY(value); + zp->avail_in = valsize; + zp->avail_out = valsize; + zp->next_out = (void *) ((char *) tmp + header_size); + + do + { + res = deflate(zp, Z_FINISH); + if (res == Z_STREAM_ERROR) + elog(ERROR, "could not compress data: %s", zp->msg); + } while (zp->avail_in != 0); + + Assert(res == Z_STREAM_END); + + len = valsize - zp->avail_out; + if (deflateEnd(zp) != Z_OK) + elog(ERROR, "could not close compression stream: %s", zp->msg); + pfree(zp); + + if (len > 0) + { + SET_VARSIZE_COMPRESSED(tmp, len + header_size); + return tmp; + } + + pfree(tmp); + return NULL; +} + +/* + * zlib_cmdecompress - decompression routine for zlib compression method + * + * Returns the decompressed varlena. + */ +static struct varlena * +zlib_cmdecompress(const struct varlena *value, int32 header_size) +{ + struct varlena *result; + z_streamp zp; + int res = Z_OK; + + zp = (z_streamp) palloc(sizeof(z_stream)); + zp->zalloc = Z_NULL; + zp->zfree = Z_NULL; + zp->opaque = Z_NULL; + + if (inflateInit(zp) != Z_OK) + elog(ERROR, "could not initialize compression library: %s", zp->msg); + + zp->next_in = (void *) ((char *) value + header_size); + zp->avail_in = VARSIZE(value) - header_size; + zp->avail_out = VARRAWSIZE_4B_C(value); + + result = (struct varlena *) palloc(zp->avail_out + VARHDRSZ); + SET_VARSIZE(result, zp->avail_out + VARHDRSZ); + zp->next_out = (void *) VARDATA(result); + + while (zp->avail_in > 0) + { + res = inflate(zp, 0); + if (!(res == Z_OK || res == Z_STREAM_END)) + elog(ERROR, "could not uncompress data: %s", zp->msg); + } + + if (inflateEnd(zp) != Z_OK) + elog(ERROR, "could not close compression library: %s", zp->msg); + + pfree(zp); + return result; +} + + +Datum +zlibhandler(PG_FUNCTION_ARGS) +{ + CompressionRoutine *routine = makeNode(CompressionRoutine); + + routine->cmcompress = zlib_cmcompress; + routine->cmdecompress = zlib_cmdecompress; + routine->cmdecompress_slice = NULL; + + PG_RETURN_POINTER(routine); +} \ No newline at end of file -- 2.23.0