diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c index ff9bfcc..57532a9 100644 --- a/src/backend/utils/adt/numeric.c +++ b/src/backend/utils/adt/numeric.c @@ -29,13 +29,28 @@ #include "access/hash.h" #include "catalog/pg_type.h" #include "funcapi.h" +#include "lib/hyperloglog.h" #include "libpq/pqformat.h" #include "miscadmin.h" #include "nodes/nodeFuncs.h" #include "utils/array.h" #include "utils/builtins.h" #include "utils/int8.h" +#include "utils/memutils.h" #include "utils/numeric.h" +#include "utils/sortsupport.h" + +#ifndef INT64_MIN +#define INT64_MIN (-INT64CONST(0x7FFFFFFFFFFFFFFF) - 1) +#endif +#ifndef INT64_MAX +#define INT64_MAX INT64CONST(0x7FFFFFFFFFFFFFFF) +#endif + +/* Abbreviation sortsupport encoding scheme supported? */ +#ifndef USE_FLOAT8_BYVAL +#define DISABLE_NUMERIC_ABBREV +#endif /* ---------- * Uncomment the following to enable compilation of dump_numeric() @@ -275,6 +290,19 @@ typedef struct /* ---------- + * sortsupport data + * ---------- + */ +typedef struct +{ + bool estimating; /* Still estimating cardinality? */ + void *buf; /* Scratch, for handling unaligned packed values */ + Size buflen; /* current size of buf */ + hyperLogLogState abbr_card; /* Abbreviated key cardinality state */ +} NumericSortSupport; + + +/* ---------- * Some preinitialized constants * ---------- */ @@ -410,6 +438,14 @@ static double numeric_to_double_no_overflow(Numeric num); static double numericvar_to_double_no_overflow(NumericVar *var); static int cmp_numerics(Numeric num1, Numeric num2); +static int numericfastcmp(Datum x, Datum y, SortSupport ssup); +#ifndef DISABLE_NUMERIC_ABBREV +static Datum numeric_abbrev_convert(Datum original, SortSupport ssup); +static bool numeric_abbrev_abort(int memtupcount, SortSupport ssup); +static int numericcmp_abbrev(Datum x, Datum y, SortSupport ssup); +static Datum numeric_abbrev_convert_worker(NumericVar *var, + NumericSortSupport *nss); +#endif static int cmp_var(NumericVar *var1, NumericVar *var2); static int cmp_var_common(const NumericDigit *var1digits, int var1ndigits, int var1weight, int var1sign, @@ -1507,8 +1543,31 @@ compute_bucket(Numeric operand, Numeric bound1, Numeric bound2, * Note: btree indexes need these routines not to leak memory; therefore, * be careful to free working copies of toasted datums. Most places don't * need to be so careful. + * + * sortsupport: + * + * Numeric uses a sortsupport routine, which is mostly effective in speeding up + * sorts because of its abbreviation capability. + * + * The abbreviation code makes assumptions about word sizes, such as that the + * value of a 4-decimal-digit field can fit in 14 bits rather than needing 16. + * int64 is used as the underlying representation if USE_FLOAT8_BYVAL is set + * (which, despite its name, indicates if int64 is pass-by-value), and if + * abbreviation is not otherwise disabled. Otherwise, abbreviation isn't + * performed at all. * ---------------------------------------------------------------------- */ +#ifdef DEBUG_ABBREV_KEYS +#define DEBUG_elog_output DEBUG1 +#endif + +/* Abbreviation related constants */ +/* NaN is encoded as lowest possible negative integer value */ +#define NUMERIC_ABBREV_NAN ((int64) INT64_MIN) +/* "Negative infinity" representation for large negative scales, and zero */ +#define NUMERIC_ABBREV_NEG_INFINITY ((int64) 0) +/* "Positive infinity" representation for large positive scales */ +#define NUMERIC_ABBREV_POS_INFINITY ((int64) INT64_MAX) Datum @@ -1650,6 +1709,351 @@ cmp_numerics(Numeric num1, Numeric num2) } Datum +numericsortsupport(PG_FUNCTION_ARGS) +{ + SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); + +#ifndef DISABLE_NUMERIC_ABBREV + + StaticAssertStmt(DEC_DIGITS == 4, + "numericsortsupport assumes 4 dec digits/NBASE digits"); + + if (ssup->abbreviate) + { + NumericSortSupport *nss; + MemoryContext oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); + + ssup->abbrev_full_comparator = numericfastcmp; + ssup->comparator = numericcmp_abbrev; + ssup->abbrev_converter = numeric_abbrev_convert; + ssup->abbrev_abort = numeric_abbrev_abort; + + nss = palloc(sizeof(NumericSortSupport)); + /* Allocate a buffer for handling unaligned packed values */ + nss->buflen = NUMERIC_HDRSZ + 8 * sizeof(NumericDigit); + nss->buf = palloc(nss->buflen); + nss->estimating = true; + initHyperLogLog(&nss->abbr_card, 10); + ssup->ssup_extra = nss; + + MemoryContextSwitchTo(oldcontext); + } + else +#endif /* DISABLE_NUMERIC_ABBREV */ + { + /* + * Set ssup->comparator to a function which can be used to directly + * compare two datums, avoiding the overhead of a trip through the fmgr + * layer for every comparison, which can be substantial. + */ + ssup->comparator = numericfastcmp; + } + + PG_RETURN_VOID(); +} + +/* + * sortsupport comparison func + */ +static int +numericfastcmp(Datum x, Datum y, SortSupport ssup) +{ + Numeric num1 = DatumGetNumeric(x); + Numeric num2 = DatumGetNumeric(y); + int result; + + result = cmp_numerics(num1, num2); + + if ((Pointer) num1 != DatumGetPointer(x)) + pfree(num1); + if ((Pointer) num2 != DatumGetPointer(y)) + pfree(num2); + + return result; +} + +#ifndef DISABLE_NUMERIC_ABBREV +/* + * Conversion routine for sortsupport. Converts original numeric to + * abbreviated key representation. + */ +static Datum +numeric_abbrev_convert(Datum original, SortSupport ssup) +{ + NumericSortSupport *nss = (NumericSortSupport *) ssup->ssup_extra; + void *authoritative_data = PG_DETOAST_DATUM_PACKED(original); + + /* working state */ + Numeric value; + Datum res; + + if (VARATT_IS_SHORT(authoritative_data)) + { + Size data_size; + Size new_size; + + /* + * This is a short-header varlena --- convert to 4-byte header format. + * + * This is necessary because init_var_from_num() does not expect the + * packed format. We can avoid palloc()/pfree() cycles by reusing the + * same buffer across calls. + */ + data_size = VARSIZE_SHORT(authoritative_data) - VARHDRSZ_SHORT;; + new_size = data_size + VARHDRSZ; + + if (new_size > nss->buflen) + { + pfree(nss->buf); + nss->buflen = Max(new_size, Min(nss->buflen * 2, MaxAllocSize)); + nss->buf = palloc(nss->buflen); + } + + SET_VARSIZE(nss->buf, new_size); + memcpy(VARDATA(nss->buf), VARDATA_SHORT(authoritative_data), data_size); + + value = (Numeric) nss->buf; + } + else + { + value = (Numeric) authoritative_data; + } + + if (NUMERIC_IS_NAN(value)) + { + res = NUMERIC_ABBREV_NAN; + } + else + { + NumericVar var; + + init_var_from_num(value, &var); + + /* + * Worker routine operates on NumericVar representation, to generate + * abbreviated int64 representation + */ + res = numeric_abbrev_convert_worker(&var, nss); + + if (nss->estimating) + { + /* Hash abbreviated key */ + uint32 hash; + uint32 lohalf, + hihalf; + + StaticAssertStmt(SIZEOF_DATUM == 8, + "numeric abbreviation assumes 8 byte Datum size"); + + lohalf = (uint32) res; + hihalf = (uint32) (res >> 32); + hash = hash_uint32(lohalf ^ hihalf); + + addHyperLogLog(&nss->abbr_card, hash); + } + } + + /* Don't leak memory here */ + if ((Pointer) authoritative_data != DatumGetPointer(original)) + pfree(authoritative_data); + + return res; +} + +static bool +numeric_abbrev_abort(int memtupcount, SortSupport ssup) +{ + NumericSortSupport *nss = (NumericSortSupport *) ssup->ssup_extra; + double abbrev_distinct; + + Assert(ssup->abbreviate); + + /* + * Have some patience, or defer to previous decision to no longer estimate + */ + if (memtupcount < 10000 || !nss->estimating) + return false; + + abbrev_distinct = estimateHyperLogLog(&nss->abbr_card); + + /* + * Clamp cardinality estimate to at least one distinct value. While NULLs + * are generally disregarded, if only NULL values were seen so far, that + * might misrepresent costs if we failed to clamp. + */ + if (abbrev_distinct <= 1.0) + abbrev_distinct = 1.0; + +#ifdef DEBUG_ABBREV_KEYS + { + double norm_abbrev_card = abbrev_distinct / (double) memtupcount; + + elog(DEBUG_elog_output, "abbrev_distinct after %d: %f (norm_abbrev_card: %f)", + memtupcount, abbrev_distinct, norm_abbrev_card); + } +#endif + + /* + * If we have > 100k distinct values, then even if we were sorting many + * billions of rows we'd likely still break even. Besides, the additional + * penalty of actually undoing abbreviation would probably not be worth it. + * Stop even counting at 100k tuples. + */ + if (abbrev_distinct > 100000.0) + { + nss->estimating = false; +#ifdef DEBUG_ABBREV_KEYS + elog(DEBUG_elog_output, "gave up on considering aborting at %d. abbrev_distinct: %f", + memtupcount, abbrev_distinct); +#endif + return false; + } + + /* + * Target minimum cardinality is 1 per ~10k tuples. (The break even point + * is somewhere between one per 100k tuples, where abbreviation has a very + * slight penalty, and 1 per 10k, where it wins by a measurable + * percentage). We use the relatively pessimistic 10k threshold. + */ + if (abbrev_distinct > memtupcount / 10000.0) + return false; + +#ifdef DEBUG_ABBREV_KEYS + elog(DEBUG_elog_output, "would have aborted abbreviation due to worst-case at %d. abbrev_distinct: %f", + memtupcount, abbrev_distinct); + /* Actually abort only when debugging is disabled */ + return false; +#endif + + return true; +} + +/* + * Abbreviated key comparison func + */ +static int +numericcmp_abbrev(Datum x, Datum y, SortSupport ssup) +{ + int64 a = DatumGetInt64(x); + int64 b = DatumGetInt64(y); + + /* + * NB: This is intentionally backwards, because the abbreviated + * representation has its sign inverted (this is used to seamlessly handle + * NaN comparisons, and representational saturation) + */ + if (a < b) + return 1; + else if (a > b) + return -1; + else + return 0; +} + +/* + * Guts of encoding scheme + */ +static Datum +numeric_abbrev_convert_worker(NumericVar *var, NumericSortSupport *nss) +{ + int ndigits = var->ndigits; + int weight = var->weight; + int64 res; + + Assert(var->sign != NUMERIC_NAN); + + if (ndigits == 0 || weight < -44) + { + res = NUMERIC_ABBREV_NEG_INFINITY; + } + else if (weight > 83) + { + res = NUMERIC_ABBREV_POS_INFINITY; + } + else + { + int64 exponent, abs; + + /* + * First, store "weight" in first 7 binary digits (7 binary digits is + * the smallest number of digits that allows us to represent weights + * -44 to 83 inclusive). + * + * The weight is stored in excess-44 -- the original weight in digit + * words (i.e. powers of NBASE/10000). The "excess-K"/offset binary + * technique is also used with IEEE-754 floating point numbers. As + * with IEEE-754, we use an exponent without a sign (a 7-bit exponent + * without a sign). And as with IEEE-754, the lack of a sign does not + * imply that the exponent must be *logically* positive. + * + * Adding 44 to "weight" makes the smallest possible "exponent" 7 + * binary digit number (where "res" hasn't already saturated to + * NUMERIC_ABBREV_NEG_INFINITY) have the value zero in this + * representation (since the smallest possible "weight" is -44). If + * the "weight" is 83, the largest possible, then "exponent" will have + * 0b1111111 as its first 7 binary digits. The 7-digit exponent is + * always positive (forgetting for a moment that it's really + * excess-44), and thus the bits could be equivalently interpreted as + * either signed or unsigned. + * + * We left shift 56 bits in order to have this 7-bit representation + * stored at the beginning of "exponent", a two's complement/signed + * 64-bit integer. In doing so, an eighth bit is "wasted". + */ + exponent = ((int64) (weight + 44) << 56); + abs = 0; + + /* + * Append 4 x 14-bit packed digit words into remaining 56 bits. + * 14-bits of storage should be enough to represent the largest + * possible base-NBASE digit, despite the fact that those are stored as + * int16. + */ + switch (ndigits) + { + default: + abs |= ((int64) var->digits[3]); + /* FALLTHROUGH */ + case 3: + abs |= ((int64) var->digits[2]) << 14; + /* FALLTHROUGH */ + case 2: + abs |= ((int64) var->digits[1]) << 28; + /* FALLTHROUGH */ + case 1: + abs |= ((int64) var->digits[0]) << 42; + break; + } + + res = exponent | abs; + } + + Assert(res >= 0); + + /* + * Flip the abbreviated int64 representation's sign, for positive numerics + * only, since the 63-bit value is currently the absolute value. With + * this, the last detail of encoding things such that int64 comparisons can + * be interpreted backwards (within the abbreviated comparator, as a proxy + * for actual numeric comparisons) is complete. + */ + if (var->sign == NUMERIC_POS) + res = -res; + + Assert(res > NUMERIC_ABBREV_NAN); + + /* + * The representable range is 10^-176 to 10^332, which is considered to be + * good enough for most practical purposes. Comparison of + * 4 words means that at least 13 decimal digits are compared, which is + * considered to be a reasonable compromise between effectiveness in + * concentrating entropy, and efficiency in computing abbreviations. + */ + return Int64GetDatum(res); +} +#endif /* #ifndef DISABLE_NUMERIC_ABBREV */ + +Datum hash_numeric(PG_FUNCTION_ARGS) { Numeric key = PG_GETARG_NUMERIC(0); diff --git a/src/include/catalog/pg_amproc.h b/src/include/catalog/pg_amproc.h index 49d3d13..2461593 100644 --- a/src/include/catalog/pg_amproc.h +++ b/src/include/catalog/pg_amproc.h @@ -118,6 +118,7 @@ DATA(insert ( 1984 829 829 1 836 )); DATA(insert ( 1986 19 19 1 359 )); DATA(insert ( 1986 19 19 2 3135 )); DATA(insert ( 1988 1700 1700 1 1769 )); +DATA(insert ( 1988 1700 1700 2 3280 )); DATA(insert ( 1989 26 26 1 356 )); DATA(insert ( 1989 26 26 2 3134 )); DATA(insert ( 1991 30 30 1 404 )); diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 3c218a3..bc2c8e3 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -2366,6 +2366,8 @@ DATA(insert OID = 1767 ( numeric_larger PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 DESCR("larger of two"); DATA(insert OID = 1769 ( numeric_cmp PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 23 "1700 1700" _null_ _null_ _null_ _null_ numeric_cmp _null_ _null_ _null_ )); DESCR("less-equal-greater"); +DATA(insert OID = 3280 ( numericsortsupport PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 2278 "2281" _null_ _null_ _null_ _null_ numericsortsupport _null_ _null_ _null_ )); +DESCR("sort support"); DATA(insert OID = 1771 ( numeric_uminus PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 1700 "1700" _null_ _null_ _null_ _null_ numeric_uminus _null_ _null_ _null_ )); DATA(insert OID = 1779 ( int8 PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 20 "1700" _null_ _null_ _null_ _null_ numeric_int8 _null_ _null_ _null_ )); DESCR("convert numeric to int8"); diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index 6310641..f518111 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -985,6 +985,7 @@ extern Datum numeric_gt(PG_FUNCTION_ARGS); extern Datum numeric_ge(PG_FUNCTION_ARGS); extern Datum numeric_lt(PG_FUNCTION_ARGS); extern Datum numeric_le(PG_FUNCTION_ARGS); +extern Datum numericsortsupport(PG_FUNCTION_ARGS); extern Datum numeric_add(PG_FUNCTION_ARGS); extern Datum numeric_sub(PG_FUNCTION_ARGS); extern Datum numeric_mul(PG_FUNCTION_ARGS);