diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index 03e1212d50..f272b7dca4 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -1013,6 +1013,14 @@ pgbench options> dbname>
random_gaussian(1, 10, 2.5)>>
an integer between 1> and 10>>
+
+ random_zipfian(lb>, ub>, parameter>)>>>
+ integer>
+ Zipfian-distributed random integer in [lb, ub]>,
+ see below>
+ random_zipfian(1, 10, 1.5)>>
+ an integer between 1> and 10>>
+
sqrt(x>)>>>
double>
@@ -1094,6 +1102,28 @@ f(x) = PHI(2.0 * parameter * (x - mu) / (max - min + 1)) /
of the Box-Muller transform.
+
+
+ random_zipfian> generates an approximated bounded zipfian
+ distribution. For parameter> in (0, 1), an
+ approximated algorithm is taken from
+ "Quickly Generating Billion-Record Synthetic Databases",
+ Jim Gray et al, SIGMOD 1994. For parameter>
+ in (1, 1000), a rejection method used, based on
+ "Non-Uniform Random Variate Generation", Luc Devroye, p. 550-551,
+ Springer 1986. The distribution is not defined when the parameter's
+ value is 1.0. The drawing performance is poor for parameter values
+ close and above 1.0 and on a small range.
+
+
+
+ parameter>
+ defines how skewed the distribution is. The larger the parameter>, the more
+ frequently values to the beginning of the interval are drawn.
+ The closer to 0 parameter> is,
+ the flatter (more uniform) the access distribution.
+
+
diff --git a/src/bin/pgbench/exprparse.y b/src/bin/pgbench/exprparse.y
index b3a2d9bfd3..25d5ad48e5 100644
--- a/src/bin/pgbench/exprparse.y
+++ b/src/bin/pgbench/exprparse.y
@@ -191,6 +191,9 @@ static const struct
{
"random_exponential", 3, PGBENCH_RANDOM_EXPONENTIAL
},
+ {
+ "random_zipfian", 3, PGBENCH_RANDOM_ZIPFIAN
+ },
/* keep as last array element */
{
NULL, 0, 0
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index ae78c7b1d4..c0fc98975f 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -93,7 +93,10 @@ static int pthread_join(pthread_t th, void **thread_return);
#define LOG_STEP_SECONDS 5 /* seconds between log messages */
#define DEFAULT_NXACTS 10 /* default nxacts */
+#define ZIPF_CACHE_SIZE 15 /* cache cells number */
+
#define MIN_GAUSSIAN_PARAM 2.0 /* minimum parameter for gauss */
+#define MAX_ZIPFIAN_PARAM 1000 /* maximum parameter for zipfian */
int nxacts = 0; /* number of transactions per client */
int duration = 0; /* duration in seconds */
@@ -333,6 +336,36 @@ typedef struct
int ecnt; /* error count */
} CState;
+/*
+ * Cache cell for zipfian_random call
+ */
+typedef struct
+{
+ /* cell keys */
+ double s; /* s - parameter of zipfan_random function */
+ int64 n; /* number of elements in range (max - min + 1) */
+
+ double harmonicn; /* generalizedHarmonicNumber(n, s) */
+ double alpha;
+ double beta;
+ double eta;
+
+ uint64 last_used; /* last used logical time */
+} ZipfCell;
+
+/*
+ * Zipf cache for zeta values
+ */
+typedef struct
+{
+ uint64 current; /* counter for LRU cache replacement algorithm */
+
+ int nb_cells; /* number of filled cells */
+ ZipfCell cells[ZIPF_CACHE_SIZE];
+} ZipfCache;
+
+bool zipfCacheOverflowMessagePrinted = false;
+
/*
* Thread state
*/
@@ -345,6 +378,8 @@ typedef struct
unsigned short random_state[3]; /* separate randomness for each thread */
int64 throttle_trigger; /* previous/next throttling (us) */
FILE *logfile; /* where to log, or NULL */
+ ZipfCache zipf_cache; /* for thread-safe zipfian random number
+ * generation */
/* per thread collected stats */
instr_time start_time; /* thread start time */
@@ -737,6 +772,140 @@ getPoissonRand(TState *thread, int64 center)
return (int64) (-log(uniform) * ((double) center) + 0.5);
}
+/* helper function for getZipfianRand */
+static double
+generalizedHarmonicNumber(int64 n, double s)
+{
+ int i;
+ double ans = 0.0;
+
+ for (i = n; i > 1; i--)
+ ans += pow(i, -s);
+ return ans + 1.0;
+}
+
+/* set harmonicn and other parameters to cache cell */
+static void
+zipfSetCacheCell(ZipfCell * cell, int64 n, double s)
+{
+ double harmonic2;
+
+ cell->n = n;
+ cell->s = s;
+
+ harmonic2 = generalizedHarmonicNumber(2, s);
+ cell->harmonicn = generalizedHarmonicNumber(n, s);
+
+ cell->alpha = 1.0 / (1.0 - s);
+ cell->beta = pow(0.5, s);
+ cell->eta = (1.0 - pow(2.0 / n, 1.0 - s)) / (1.0 - harmonic2 / cell->harmonicn);
+}
+
+/*
+ * search for cache cell with keys (n, s)
+ * and create new cell if it does not exist
+ */
+static ZipfCell *
+zipfFindOrCreateCacheCell(ZipfCache * cache, int64 n, double s)
+{
+ int i,
+ least_recently_used_i = 0;
+ ZipfCell *cell;
+
+ /* search cached cell for given parameters */
+ for (i = 0; i < cache->nb_cells; i++)
+ {
+ cell = &cache->cells[i];
+ if (cell->n == n && cell->s == s)
+ return &cache->cells[i];
+
+ if (cell->last_used < cache->cells[least_recently_used_i].last_used)
+ least_recently_used_i = i;
+ }
+
+ /* create new one if it does not exist */
+ if (cache->nb_cells != ZIPF_CACHE_SIZE)
+ i = cache->nb_cells++;
+ else
+ {
+ /* replace LRU cell if cache is full */
+ i = least_recently_used_i;
+ if (!zipfCacheOverflowMessagePrinted)
+ {
+ /*
+ * flag can be accessed in several threads simultaneously and
+ * message can be printed more than one time
+ */
+ zipfCacheOverflowMessagePrinted = true;
+ fprintf(stderr, "zipfian cache array overflowed\n");
+ }
+ }
+
+ zipfSetCacheCell(&cache->cells[i], n, s);
+
+ cache->cells[i].last_used = cache->current++;
+ return &cache->cells[i];
+}
+/*
+ * Computing zipfian usinng rejection method, based on
+ * "Non-Uniform Random Variate Generation",
+ * Luc Devroye, p. 550-551, Springer 1986.
+ */
+static int64
+computeIterativeZipfian(TState *thread, int64 n, double s)
+{
+ double b = pow(2.0, s - 1.0);
+ double x,
+ t,
+ u,
+ v;
+
+ while (true)
+ {
+ /* random variates */
+ u = pg_erand48(thread->random_state);
+ v = pg_erand48(thread->random_state);
+
+ x = floor(pow(u, -1.0 / (s - 1.0)));
+
+ t = pow(1.0 + 1.0 / x, s - 1.0);
+ /* reject if too large or out of bound */
+ if (v * x * (t - 1.0) / (b - 1.0) <= t / b && x <= n)
+ break;
+ }
+ return (int64) x;
+}
+
+/*
+ * Computing zipfian using harmonic numbers, based on algorithm described in
+ * "Quickly Generating Billion-Record Synthetic Databases",
+ * Jim Gray et al, SIGMOD 1994
+ */
+static int64
+computeHarmonicZipfian(TState *thread, int64 n, double s)
+{
+ ZipfCell *cell = zipfFindOrCreateCacheCell(&thread->zipf_cache, n, s);
+ double uniform = pg_erand48(thread->random_state);
+ double uz = uniform * cell->harmonicn;
+
+ if (uz < 1.0)
+ return 1;
+ if (uz < 1.0 + cell->beta)
+ return 2;
+ return 1 + (int64) (cell->n * pow(cell->eta * uniform - cell->eta + 1.0, cell->alpha));
+}
+
+/* random number generator: zipfian distribution from min to max inclusive */
+static int64
+getZipfianRand(TState *thread, int64 min, int64 max, double s)
+{
+ int64 n = max - min + 1;
+
+ return min - 1 + ((s > 1)
+ ? computeIterativeZipfian(thread, n, s)
+ : computeHarmonicZipfian(thread, n, s));
+}
+
/*
* Initialize the given SimpleStats struct to all zeroes
*/
@@ -1567,6 +1736,7 @@ evalFunc(TState *thread, CState *st,
case PGBENCH_RANDOM:
case PGBENCH_RANDOM_EXPONENTIAL:
case PGBENCH_RANDOM_GAUSSIAN:
+ case PGBENCH_RANDOM_ZIPFIAN:
{
int64 imin,
imax;
@@ -1617,6 +1787,18 @@ evalFunc(TState *thread, CState *st,
setIntValue(retval,
getGaussianRand(thread, imin, imax, param));
}
+ else if (func == PGBENCH_RANDOM_ZIPFIAN)
+ {
+ if (param <= 0.0 || param == 1.0 || param > MAX_ZIPFIAN_PARAM)
+ {
+ fprintf(stderr,
+ "zipfian parameter must be in range (0, 1) U (1, %d]"
+ " (got %f)\n", MAX_ZIPFIAN_PARAM, param);
+ return false;
+ }
+ setIntValue(retval,
+ getZipfianRand(thread, imin, imax, param));
+ }
else /* exponential */
{
if (param <= 0.0)
@@ -4273,6 +4455,8 @@ main(int argc, char **argv)
thread->random_state[2] = random();
thread->logfile = NULL; /* filled in later */
thread->latency_late = 0;
+ thread->zipf_cache.nb_cells = 0;
+ thread->zipf_cache.current = 0;
initStats(&thread->stats, 0);
nclients_dealt += thread->nstate;
diff --git a/src/bin/pgbench/pgbench.h b/src/bin/pgbench/pgbench.h
index abc13e9463..6eff7016ea 100644
--- a/src/bin/pgbench/pgbench.h
+++ b/src/bin/pgbench/pgbench.h
@@ -75,7 +75,8 @@ typedef enum PgBenchFunction
PGBENCH_SQRT,
PGBENCH_RANDOM,
PGBENCH_RANDOM_GAUSSIAN,
- PGBENCH_RANDOM_EXPONENTIAL
+ PGBENCH_RANDOM_EXPONENTIAL,
+ PGBENCH_RANDOM_ZIPFIAN
} PgBenchFunction;
typedef struct PgBenchExpr PgBenchExpr;