commit f5e73a04684d0b70f9ce3a904db2b96a21e19f58 Author: Heikki Linnakangas Date: Thu Oct 22 14:34:51 2020 +0300 Sort benchmark stuff diff --git a/src/test/sort/Makefile b/src/test/sort/Makefile new file mode 100644 index 00000000000..4ec150a69c7 --- /dev/null +++ b/src/test/sort/Makefile @@ -0,0 +1,32 @@ +CFLAGS=-g -I`pg_config --includedir` + +LDFLAGS=-L`pg_config --libdir` -lpq -lm + +TESTDB=sorttest + +# For testing quicksort. +SCALE_SMALL=1024 # 1 MB + +# For testing external sort, while the dataset still fits in OS cache. +SCALE_MEDIUM=1048576 # 1 GB +#SCALE_MEDIUM=102400 # 100 MB + +# Does not fit in memory. +SCALE_LARGE=20971520 # 20 GB +#SCALE_LARGE=1500000 # 20 GB + +all: generate speed correctness + +generate: generate.c + +speed: speed.c + +correctness: correctness.c + +generate_testdata: + dropdb --if-exists $(TESTDB) + createdb $(TESTDB) + psql $(TESTDB) -c "CREATE SCHEMA small; CREATE SCHEMA medium; CREATE SCHEMA large;" + (echo "set search_path=medium;"; ./generate all $(SCALE_MEDIUM)) | psql $(TESTDB) + (echo "set search_path=small;"; ./generate all $(SCALE_SMALL)) | psql $(TESTDB) + (echo "set search_path=large;"; ./generate all $(SCALE_LARGE)) | psql $(TESTDB) diff --git a/src/test/sort/correctness.c b/src/test/sort/correctness.c new file mode 100644 index 00000000000..b41aa2e3a42 --- /dev/null +++ b/src/test/sort/correctness.c @@ -0,0 +1,153 @@ +#include +#include +#include + +#include + +#include + +static PGconn *conn; + +static void +execute(const char *sql) +{ + int i; + PGresult *res; + + fprintf(stderr, "%s\n", sql); + + res = PQexec(conn, sql); + if (PQresultStatus(res) != PGRES_COMMAND_OK && PQresultStatus(res) != PGRES_TUPLES_OK) + { + fprintf(stderr,"command failed: %s\n%s", sql, PQerrorMessage(conn)); + PQclear(res); + exit(1); + } + + PQclear(res); +} + +static void +check_sorted(const char *sql, int (*cmp)(const char *a, const char *b)) +{ + int i; + PGresult *res; + PGresult *prevres = NULL; + int rowno; + + fprintf(stderr, "running query: %s\n", sql); + if (!PQsendQuery(conn, sql)) + { + fprintf(stderr,"query failed: %s\n%s", sql, PQerrorMessage(conn)); + PQclear(res); + exit(1); + } + if (!PQsetSingleRowMode(conn)) + { + fprintf(stderr,"setting single-row mode failed: %s", PQerrorMessage(conn)); + PQclear(res); + exit(1); + } + + rowno = 1; + while (res = PQgetResult(conn)) + { + if (PQresultStatus(res) == PGRES_TUPLES_OK) + continue; + if (PQresultStatus(res) != PGRES_SINGLE_TUPLE) + { + fprintf(stderr,"error while fetching: %d, %s\n%s", PQresultStatus(res), sql, PQerrorMessage(conn)); + PQclear(res); + exit(1); + } + + if (prevres) + { + if (!cmp(PQgetvalue(prevres, 0, 0), PQgetvalue(res, 0, 0))) + { + fprintf(stderr,"FAIL: result not sorted, row %d: %s, prev %s\n", rowno, + PQgetvalue(prevres, 0, 0), PQgetvalue(res, 0, 0)); + PQclear(res); + exit(1); + } + PQclear(prevres); + } + prevres = res; + + rowno++; + } + + if (prevres) + PQclear(prevres); +} + + +static int +compare_strings(const char *a, const char *b) +{ + return strcmp(a, b) <= 0; +} + +static int +compare_ints(const char *a, const char *b) +{ + return atoi(a) <= atoi(b); +} + +int +main(int argc, char **argv) +{ + double duration; + char buf[1000]; + + /* Make a connection to the database */ + conn = PQconnectdb(""); + + /* Check to see that the backend connection was successfully made */ + if (PQstatus(conn) != CONNECTION_OK) + { + fprintf(stderr, "Connection to database failed: %s", + PQerrorMessage(conn)); + exit(1); + } + execute("set trace_sort=on"); + + execute("set work_mem = '4MB'"); + + check_sorted("SELECT * FROM small.ordered_ints ORDER BY i", compare_ints); + check_sorted("SELECT * FROM small.random_ints ORDER BY i", compare_ints); + check_sorted("SELECT * FROM small.ordered_text ORDER BY t", compare_strings); + check_sorted("SELECT * FROM small.random_text ORDER BY t", compare_strings); + + execute("set work_mem = '16MB'"); + + check_sorted("SELECT * FROM medium.ordered_ints ORDER BY i", compare_ints); + check_sorted("SELECT * FROM medium.random_ints ORDER BY i", compare_ints); + check_sorted("SELECT * FROM medium.ordered_text ORDER BY t", compare_strings); + check_sorted("SELECT * FROM medium.random_text ORDER BY t", compare_strings); + + execute("set work_mem = '256MB'"); + + check_sorted("SELECT * FROM medium.ordered_ints ORDER BY i", compare_ints); + check_sorted("SELECT * FROM medium.random_ints ORDER BY i", compare_ints); + check_sorted("SELECT * FROM medium.ordered_text ORDER BY t", compare_strings); + check_sorted("SELECT * FROM medium.random_text ORDER BY t", compare_strings); + + execute("set work_mem = '512MB'"); + + check_sorted("SELECT * FROM medium.ordered_ints ORDER BY i", compare_ints); + check_sorted("SELECT * FROM medium.random_ints ORDER BY i", compare_ints); + check_sorted("SELECT * FROM medium.ordered_text ORDER BY t", compare_strings); + check_sorted("SELECT * FROM medium.random_text ORDER BY t", compare_strings); + + execute("set work_mem = '2048MB'"); + + check_sorted("SELECT * FROM medium.ordered_ints ORDER BY i", compare_ints); + check_sorted("SELECT * FROM medium.random_ints ORDER BY i", compare_ints); + check_sorted("SELECT * FROM medium.ordered_text ORDER BY t", compare_strings); + check_sorted("SELECT * FROM medium.random_text ORDER BY t", compare_strings); + + PQfinish(conn); + + return 0; +} diff --git a/src/test/sort/generate.c b/src/test/sort/generate.c new file mode 100644 index 00000000000..9b005f97ed6 --- /dev/null +++ b/src/test/sort/generate.c @@ -0,0 +1,198 @@ +#include +#include +#include +#include + +static void +generate_ordered_integers(int scale) +{ + int rows = ((double) scale) * 28.75; + int i; + + printf("DROP TABLE IF EXISTS ordered_ints;\n"); + printf("BEGIN;"); + printf("CREATE TABLE ordered_ints (i int4);\n"); + printf("COPY ordered_ints FROM STDIN WITH (FREEZE);\n"); + + for (i = 0; i < rows; i++) + printf("%d\n", i); + + printf("\\.\n"); + printf("COMMIT;\n"); +} + +static void +generate_random_integers(int scale) +{ + int rows = ((double) scale) * 28.75; + int i; + + printf("DROP TABLE IF EXISTS random_ints;\n"); + printf("BEGIN;"); + printf("CREATE TABLE random_ints (i int4);\n"); + printf("COPY random_ints FROM STDIN WITH (FREEZE);\n"); + + for (i = 0; i < rows; i++) + printf("%d\n", random()); + + printf("\\.\n"); + printf("COMMIT;\n"); +} + +#define ALPHABET_SIZE 26 +static const char alphabet[ALPHABET_SIZE + 1] = "abcdefghijklmnopqrstuvwxyz"; + +#define TEXT_LEN 50 + +static void +random_string(char *buf, int len) +{ + int i; + long r; + long m; + + m = 0; + for (i = 0; i < len; i++) + { + if (m / ALPHABET_SIZE < ALPHABET_SIZE) + { + m = RAND_MAX; + r = random(); + } + + *buf = alphabet[r % ALPHABET_SIZE]; + m = m / ALPHABET_SIZE; + r = r / ALPHABET_SIZE; + buf++; + } + *buf = '\0'; + return; +} + +static void +generate_random_text(int scale) +{ + int rows = ((double) scale) * 12.7; + int i; + char buf[TEXT_LEN + 1] = { 0 }; + + printf("DROP TABLE IF EXISTS random_text;\n"); + printf("BEGIN;"); + printf("CREATE TABLE random_text (t text);\n"); + printf("COPY random_text FROM STDIN WITH (FREEZE);\n"); + + for (i = 0; i < rows; i++) + { + random_string(buf, TEXT_LEN); + printf("%s\n", buf); + } + + printf("\\.\n"); + printf("COMMIT;\n"); +} + +static void +generate_ordered_text(int scale) +{ + int rows = ((double) scale) * 12.7; + int i; + int j; + char indexes[TEXT_LEN] = {0}; + char buf[TEXT_LEN + 1]; + double digits; + + printf("DROP TABLE IF EXISTS ordered_text;\n"); + printf("BEGIN;"); + printf("CREATE TABLE ordered_text (t text);\n"); + printf("COPY ordered_text FROM STDIN WITH (FREEZE);\n"); + + /* + * We don't want all the strings to have the same prefix. + * That makes the comparisons very expensive. That might be an + * interesting test case too, but not what we want here. To avoid + * that, figure out how many characters will change, with the # + * of rows we chose. + */ + digits = ceil(log(rows) / log((double) ALPHABET_SIZE)); + + if (digits > TEXT_LEN) + digits = TEXT_LEN; + + for (i = 0; i < rows; i++) + { + for (j = 0; j < TEXT_LEN; j++) + { + buf[j] = alphabet[indexes[j]]; + } + buf[j] = '\0'; + printf("%s\n", buf); + + /* increment last character, carrying if needed */ + for (j = digits - 1; j >= 0; j--) + { + indexes[j]++; + if (indexes[j] == ALPHABET_SIZE) + indexes[j] = 0; + else + break; + } + } + + printf("\\.\n"); + printf("COMMIT;\n"); +} + + +struct +{ + char *name; + void (*generate_func)(int scale); +} datasets[] = +{ + { "ordered_integers", generate_ordered_integers }, + { "random_integers", generate_random_integers }, + { "ordered_text", generate_ordered_text }, + { "random_text", generate_random_text }, + { NULL, NULL } +}; + +void +usage() +{ + printf("Usage: generate [scale] [schema]\n"); + exit(1); +} + +int +main(int argc, char **argv) +{ + int scale; + int i; + int found = 0; + + if (argc < 2) + usage(); + + if (argc >= 3) + scale = atoi(argv[2]); + else + scale = 1024; /* 1 MB */ + + for (i = 0; datasets[i].name != NULL; i++) + { + if (strcmp(argv[1], datasets[i].name) == 0 || + strcmp(argv[1], "all") == 0) + { + fprintf (stderr, "Generating %s for %d kB...\n", datasets[i].name, scale); + datasets[i].generate_func(scale); + found = 1; + } + } + + if (!found) + { + fprintf(stderr, "unrecognized test name %s\n", argv[1]); + exit(1); + } + exit(0); +} diff --git a/src/test/sort/plot.sh b/src/test/sort/plot.sh new file mode 100644 index 00000000000..942a4af125d --- /dev/null +++ b/src/test/sort/plot.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +set -e + +PATH=~/pgsql.fsmfork/bin:$PATH + +plot_test() { + local testname=$1; + + psql sorttest <> $PGDATA/postgresql.conf + echo "shared_buffers = '1 GB' " >> $PGDATA/postgresql.conf + + $PREFIX/bin/pg_ctl -D $PGDATA start -w -l serverlog-$1 + + LD_LIBRARY_PATH=$PREFIX/lib PATH=$PREFIX/bin:$PATH make -C src/test/sort generate_testdata + + git rev-parse --short HEAD > results-$1.txt + + LD_LIBRARY_PATH=$PREFIX/lib PGDATABASE=sorttest ./src/test/sort/speed >> results-$1.txt + mail -s "pgperftest-vm: test $1 finished!" $EMAIL < results-$1.txt +} + +compile master +runtest master + +compile sort-balanced-merge2 +runtest sort-balanced-merge2 diff --git a/src/test/sort/speed.c b/src/test/sort/speed.c new file mode 100644 index 00000000000..72a9e9b117e --- /dev/null +++ b/src/test/sort/speed.c @@ -0,0 +1,177 @@ +#define FRONTEND 1 + +#include +#include +#include + +#include + +#define pg_attribute_printf(f,a) + +#include +#include + +#define REPETITIONS 1 + +static PGconn *conn; + +/* returns duration in ms */ +static double +execute(const char *sql) +{ + struct timeval before, after; + PGresult *res; + + gettimeofday(&before, NULL); + res = PQexec(conn, sql); + gettimeofday(&after, NULL); + if (PQresultStatus(res) != PGRES_COMMAND_OK && PQresultStatus(res) != PGRES_TUPLES_OK) + { + fprintf(stderr,"command failed: %s\n%s", sql, PQerrorMessage(conn)); + PQclear(res); + exit(1); + } + PQclear(res); + + return (((double) (after.tv_sec - before.tv_sec)) * 1000.0 + ((double) (after.tv_usec - before.tv_usec) / 1000.0)); +} + +static void +execute_test(const char *testname, const char *query) +{ + double duration; + char buf[1000]; + int i; + + for (i = 0; i < REPETITIONS; i++) + { + printf ("%s: ", testname); + fflush(stdout); + duration = execute(query); + + //if (i > 0) + // printf(", "); + printf("%.0f ms", duration); + printf("\n"); + fflush(stdout); + + sprintf(buf, + "insert into public.results (testname, work_mem, time_ms) " + "values ('%s', (select setting from pg_settings where name='work_mem')::int, '%g')", + testname, duration); + execute(buf); + } +} + +static void +execute_test_series(char *tblname) +{ + //static const char *work_mems[] = { "1MB", "4MB", "8MB", "16MB", "32MB", "64MB", "128MB", "256MB", "512MB", NULL }; + static const char *work_mems[] = { + "64 kB", + "80 kB", + + "100 kB", + "150 kB", + "250 kB", + "400 kB", + "650 kB", + + "1000 kB", + "1500 kB", + "2500 kB", + "4000 kB", + "6500 kB", + + "10000 kB", + "15000 kB", + "25000 kB", + "40000 kB", + "65000 kB", + + "100 MB", + "150 MB", + "250 MB", + "400 MB", + "650 MB", + "800 MB", + + "1000 MB", + "1500 MB", + "2500 MB", + + NULL, NULL, NULL, NULL }; + int i; + int j; + PQExpBufferData sqlbuf; + + initPQExpBuffer(&sqlbuf); + + printf("# Tests with %s, different work_mems, no parallelism\n", tblname); + printf("-----\n"); + + printfPQExpBuffer(&sqlbuf, "set max_parallel_workers_per_gather=0"); + execute(sqlbuf.data); + + for (i = 0; work_mems[i] != NULL; i++) + { + const char *work_mem = work_mems[i]; + char testname[100]; + + printfPQExpBuffer(&sqlbuf, "set work_mem='%s'", work_mem); + execute(sqlbuf.data); + snprintf(testname, sizeof(testname), "%s", tblname, work_mem); + + resetPQExpBuffer(&sqlbuf); + + appendPQExpBuffer(&sqlbuf, "SELECT COUNT(*) FROM (("); + appendPQExpBuffer(&sqlbuf, "SELECT * FROM %s", tblname); + appendPQExpBuffer(&sqlbuf, ") ORDER BY 1) t"); + + execute_test(testname, sqlbuf.data); + } +} + +int +main(int argc, char **argv) +{ + double duration; + char buf[1000]; + + /* Make a connection to the database */ + conn = PQconnectdb(""); + + /* Check to see that the backend connection was successfully made */ + if (PQstatus(conn) != CONNECTION_OK) + { + fprintf(stderr, "Connection to database failed: %s", + PQerrorMessage(conn)); + exit(1); + } + + execute("create table if not exists public.results (testname text, work_mem int, time_ms numeric)"); + + //execute("set trace_sort=on"); +#if 0 + execute_test_series("small.ordered_ints"); + execute_test_series("small.random_ints"); + execute_test_series("small.ordered_text"); + execute_test_series("small.random_text"); +#endif + + execute_test_series("medium.ordered_ints"); + execute_test_series("medium.random_ints"); + execute_test_series("medium.ordered_text"); + execute_test_series("medium.random_text"); + +#if 0 + execute_test_series("large.ordered_ints"); + execute_test_series("large.random_ints"); + execute_test_series("large.ordered_text"); + execute_test_series("large.random_text"); +#endif + + PQfinish(conn); + + return 0; +}