From c63cd34aa51941d5851dfd6d3d273415ad02a7fb Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Thu, 8 Sep 2016 21:42:55 +0300 Subject: [PATCH 3/3] Add sorting test suite --- src/test/sorttestsuite/Makefile | 31 ++++++ src/test/sorttestsuite/correctness.c | 153 +++++++++++++++++++++++++++ src/test/sorttestsuite/generate.c | 198 +++++++++++++++++++++++++++++++++++ src/test/sorttestsuite/speed.c | 139 ++++++++++++++++++++++++ 4 files changed, 521 insertions(+) create mode 100644 src/test/sorttestsuite/Makefile create mode 100644 src/test/sorttestsuite/correctness.c create mode 100644 src/test/sorttestsuite/generate.c create mode 100644 src/test/sorttestsuite/speed.c diff --git a/src/test/sorttestsuite/Makefile b/src/test/sorttestsuite/Makefile new file mode 100644 index 0000000..91c8ccd --- /dev/null +++ b/src/test/sorttestsuite/Makefile @@ -0,0 +1,31 @@ +CFLAGS=-g -I/home/heikki/pgsql.master/include + +LDFLAGS=-L/home/heikki/pgsql.master/lib -lpq -lm + +TESTDB=sorttest + +# For testing quicksort. +SCALE_SMALL=1024 # 1 MB + +# For testing external sort, while the dataset still fits in OS cache. +SCALE_MEDIUM=1048576 # 1 GB + +# Does not fit in memory. +SCALE_LARGE=20971520 # 20 GB +#SCALE_LARGE=1500000 # 20 GB + +all: generate speed correctness + +generate: generate.c + +speed: speed.c + +correctness: correctness.c + +generate_testdata: + dropdb --if-exists $(TESTDB) + createdb $(TESTDB) + psql $(TESTDB) -c "CREATE SCHEMA small; CREATE SCHEMA medium; CREATE SCHEMA large;" + (echo "set search_path=small;"; ./generate all $(SCALE_SMALL)) | psql $(TESTDB) + (echo "set search_path=medium;"; ./generate all $(SCALE_MEDIUM)) | psql $(TESTDB) + (echo "set search_path=large;"; ./generate all $(SCALE_LARGE)) | psql $(TESTDB) diff --git a/src/test/sorttestsuite/correctness.c b/src/test/sorttestsuite/correctness.c new file mode 100644 index 0000000..b41aa2e --- /dev/null +++ b/src/test/sorttestsuite/correctness.c @@ -0,0 +1,153 @@ +#include +#include +#include + +#include + +#include + +static PGconn *conn; + +static void +execute(const char *sql) +{ + int i; + PGresult *res; + + fprintf(stderr, "%s\n", sql); + + res = PQexec(conn, sql); + if (PQresultStatus(res) != PGRES_COMMAND_OK && PQresultStatus(res) != PGRES_TUPLES_OK) + { + fprintf(stderr,"command failed: %s\n%s", sql, PQerrorMessage(conn)); + PQclear(res); + exit(1); + } + + PQclear(res); +} + +static void +check_sorted(const char *sql, int (*cmp)(const char *a, const char *b)) +{ + int i; + PGresult *res; + PGresult *prevres = NULL; + int rowno; + + fprintf(stderr, "running query: %s\n", sql); + if (!PQsendQuery(conn, sql)) + { + fprintf(stderr,"query failed: %s\n%s", sql, PQerrorMessage(conn)); + PQclear(res); + exit(1); + } + if (!PQsetSingleRowMode(conn)) + { + fprintf(stderr,"setting single-row mode failed: %s", PQerrorMessage(conn)); + PQclear(res); + exit(1); + } + + rowno = 1; + while (res = PQgetResult(conn)) + { + if (PQresultStatus(res) == PGRES_TUPLES_OK) + continue; + if (PQresultStatus(res) != PGRES_SINGLE_TUPLE) + { + fprintf(stderr,"error while fetching: %d, %s\n%s", PQresultStatus(res), sql, PQerrorMessage(conn)); + PQclear(res); + exit(1); + } + + if (prevres) + { + if (!cmp(PQgetvalue(prevres, 0, 0), PQgetvalue(res, 0, 0))) + { + fprintf(stderr,"FAIL: result not sorted, row %d: %s, prev %s\n", rowno, + PQgetvalue(prevres, 0, 0), PQgetvalue(res, 0, 0)); + PQclear(res); + exit(1); + } + PQclear(prevres); + } + prevres = res; + + rowno++; + } + + if (prevres) + PQclear(prevres); +} + + +static int +compare_strings(const char *a, const char *b) +{ + return strcmp(a, b) <= 0; +} + +static int +compare_ints(const char *a, const char *b) +{ + return atoi(a) <= atoi(b); +} + +int +main(int argc, char **argv) +{ + double duration; + char buf[1000]; + + /* Make a connection to the database */ + conn = PQconnectdb(""); + + /* Check to see that the backend connection was successfully made */ + if (PQstatus(conn) != CONNECTION_OK) + { + fprintf(stderr, "Connection to database failed: %s", + PQerrorMessage(conn)); + exit(1); + } + execute("set trace_sort=on"); + + execute("set work_mem = '4MB'"); + + check_sorted("SELECT * FROM small.ordered_ints ORDER BY i", compare_ints); + check_sorted("SELECT * FROM small.random_ints ORDER BY i", compare_ints); + check_sorted("SELECT * FROM small.ordered_text ORDER BY t", compare_strings); + check_sorted("SELECT * FROM small.random_text ORDER BY t", compare_strings); + + execute("set work_mem = '16MB'"); + + check_sorted("SELECT * FROM medium.ordered_ints ORDER BY i", compare_ints); + check_sorted("SELECT * FROM medium.random_ints ORDER BY i", compare_ints); + check_sorted("SELECT * FROM medium.ordered_text ORDER BY t", compare_strings); + check_sorted("SELECT * FROM medium.random_text ORDER BY t", compare_strings); + + execute("set work_mem = '256MB'"); + + check_sorted("SELECT * FROM medium.ordered_ints ORDER BY i", compare_ints); + check_sorted("SELECT * FROM medium.random_ints ORDER BY i", compare_ints); + check_sorted("SELECT * FROM medium.ordered_text ORDER BY t", compare_strings); + check_sorted("SELECT * FROM medium.random_text ORDER BY t", compare_strings); + + execute("set work_mem = '512MB'"); + + check_sorted("SELECT * FROM medium.ordered_ints ORDER BY i", compare_ints); + check_sorted("SELECT * FROM medium.random_ints ORDER BY i", compare_ints); + check_sorted("SELECT * FROM medium.ordered_text ORDER BY t", compare_strings); + check_sorted("SELECT * FROM medium.random_text ORDER BY t", compare_strings); + + execute("set work_mem = '2048MB'"); + + check_sorted("SELECT * FROM medium.ordered_ints ORDER BY i", compare_ints); + check_sorted("SELECT * FROM medium.random_ints ORDER BY i", compare_ints); + check_sorted("SELECT * FROM medium.ordered_text ORDER BY t", compare_strings); + check_sorted("SELECT * FROM medium.random_text ORDER BY t", compare_strings); + + PQfinish(conn); + + return 0; +} diff --git a/src/test/sorttestsuite/generate.c b/src/test/sorttestsuite/generate.c new file mode 100644 index 0000000..f481189 --- /dev/null +++ b/src/test/sorttestsuite/generate.c @@ -0,0 +1,198 @@ +#include +#include +#include +#include + +static void +generate_ordered_integers(int scale) +{ + int rows = ((double) scale) * 28.75; + int i; + + printf("DROP TABLE IF EXISTS ordered_ints;\n"); + printf("BEGIN;"); + printf("CREATE TABLE ordered_ints (i int4);\n"); + printf("COPY ordered_ints FROM STDIN WITH (FREEZE);\n"); + + for (i = 0; i < rows; i++) + printf("%d\n", i); + + printf("\\.\n"); + printf("COMMIT;\n"); +} + +static void +generate_random_integers(int scale) +{ + int rows = ((double) scale) * 28.75; + int i; + + printf("DROP TABLE IF EXISTS random_ints;\n"); + printf("BEGIN;"); + printf("CREATE TABLE random_ints (i int4);\n"); + printf("COPY random_ints FROM STDIN WITH (FREEZE);\n"); + + for (i = 0; i < rows; i++) + printf("%d\n", random()); + + printf("\\.\n"); + printf("COMMIT;\n"); +} + +#define ALPHABET_SIZE 26 +static const char alphabet[ALPHABET_SIZE + 1] = "abcdefghijklmnopqrstuvwxyz"; + +#define TEXT_LEN 50 + +static void +random_string(char *buf, int len) +{ + int i; + long r; + long m; + + m = 0; + for (i = 0; i < len; i++) + { + if (m / ALPHABET_SIZE < ALPHABET_SIZE) + { + m = RAND_MAX; + r = random(); + } + + *buf = alphabet[r % ALPHABET_SIZE]; + m = m / ALPHABET_SIZE; + r = r / ALPHABET_SIZE; + buf++; + } + *buf = '\0'; + return; +} + +static void +generate_random_text(int scale) +{ + int rows = ((double) scale) * 12.7; + int i; + char buf[TEXT_LEN + 1] = { 0 }; + + printf("DROP TABLE IF EXISTS random_text;\n"); + printf("BEGIN;"); + printf("CREATE TABLE random_text (t text);\n"); + printf("COPY random_text FROM STDIN WITH (FREEZE);\n"); + + for (i = 0; i < rows; i++) + { + random_string(buf, TEXT_LEN); + printf("%s\n", buf); + } + + printf("\\.\n"); + printf("COMMIT;\n"); +} + +static void +generate_ordered_text(int scale) +{ + int rows = ((double) scale) * 12.7; + int i; + int j; + char indexes[TEXT_LEN] = {0}; + char buf[TEXT_LEN + 1]; + double digits; + + printf("DROP TABLE IF EXISTS ordered_text;\n"); + printf("BEGIN;"); + printf("CREATE TABLE ordered_text (t text);\n"); + printf("COPY ordered_text FROM STDIN WITH (FREEZE);\n"); + + /* + * We don't want all the strings to have the same prefix. + * That makes the comparisons very expensive. That might be an + * interesting test case too, but not what we want here. To avoid + * that, figure out how many characters will change, with the # + * of rows we chose. + */ + digits = ceil(log(rows) / log((double) ALPHABET_SIZE)); + + if (digits > TEXT_LEN) + digits = TEXT_LEN; + + for (i = 0; i < rows; i++) + { + for (j = 0; j < TEXT_LEN; j++) + { + buf[j] = alphabet[indexes[j]]; + } + buf[j] = '\0'; + printf("%s\n", buf); + + /* increment last character, carrying if needed */ + for (j = digits - 1; j >= 0; j--) + { + indexes[j]++; + if (indexes[j] == ALPHABET_SIZE) + indexes[j] = 0; + else + break; + } + } + + printf("\\.\n"); + printf("COMMIT;\n"); +} + + +struct +{ + char *name; + void (*generate_func)(int scale); +} datasets[] = +{ + { "ordered_integers", generate_ordered_integers }, + { "random_integers", generate_random_integers }, + { "ordered_text", generate_ordered_text }, + { "random_text", generate_random_text }, + { NULL, NULL } +}; + +void +usage() +{ + printf("Usage: generate [scale] [schema]"); + exit(1); +} + +int +main(int argc, char **argv) +{ + int scale; + int i; + int found = 0; + + if (argc < 2) + usage(); + + if (argc >= 3) + scale = atoi(argv[2]); + else + scale = 1024; /* 1 MB */ + + for (i = 0; datasets[i].name != NULL; i++) + { + if (strcmp(argv[1], datasets[i].name) == 0 || + strcmp(argv[1], "all") == 0) + { + fprintf (stderr, "Generating %s for %d kB...\n", datasets[i].name, scale); + datasets[i].generate_func(scale); + found = 1; + } + } + + if (!found) + { + fprintf(stderr, "unrecognized test name %s\n", argv[1]); + exit(1); + } + exit(0); +} diff --git a/src/test/sorttestsuite/speed.c b/src/test/sorttestsuite/speed.c new file mode 100644 index 0000000..3ebc57c --- /dev/null +++ b/src/test/sorttestsuite/speed.c @@ -0,0 +1,139 @@ +#include +#include +#include + +#include + +#include + +#define REPETITIONS 3 + +static PGconn *conn; + +/* returns duration in ms */ +static double +execute(const char *sql) +{ + struct timeval before, after; + PGresult *res; + + gettimeofday(&before, NULL); + res = PQexec(conn, sql); + gettimeofday(&after, NULL); + if (PQresultStatus(res) != PGRES_COMMAND_OK && PQresultStatus(res) != PGRES_TUPLES_OK) + { + fprintf(stderr,"command failed: %s\n%s", sql, PQerrorMessage(conn)); + PQclear(res); + exit(1); + } + PQclear(res); + + return (((double) (after.tv_sec - before.tv_sec)) * 1000.0 + ((double) (after.tv_usec - before.tv_usec) / 1000.0)); +} + +static void +execute_test(const char *testname, const char *query) +{ + double duration; + char buf[100]; + int i; + + printf ("%s: ", testname); + fflush(stdout); + for (i = 0; i < REPETITIONS; i++) + { + duration = execute(query); + + if (i > 0) + printf(", "); + printf("%.0f ms", duration); + fflush(stdout); + } + printf("\n"); +} + +int +main(int argc, char **argv) +{ + double duration; + char buf[1000]; + + /* Make a connection to the database */ + conn = PQconnectdb(""); + + /* Check to see that the backend connection was successfully made */ + if (PQstatus(conn) != CONNECTION_OK) + { + fprintf(stderr, "Connection to database failed: %s", + PQerrorMessage(conn)); + exit(1); + } + + execute("set trace_sort=on"); + + printf("# Tests on small tables (1 MB), 4MB work_mem\n"); + printf("# Performs a quicksort\n"); + printf("-----\n"); + execute("set work_mem='4MB'"); + execute_test("ordered_ints,", "SELECT COUNT(*) FROM (SELECT * FROM small.ordered_ints ORDER BY i) t"); + execute_test("random_ints", "SELECT COUNT(*) FROM (SELECT * FROM small.random_ints ORDER BY i) t"); + execute_test("ordered_text", "SELECT COUNT(*) FROM (SELECT * FROM small.ordered_text ORDER BY t) t"); + execute_test("random_text", "SELECT COUNT(*) FROM (SELECT * FROM small.random_text ORDER BY t) t"); + printf("\n"); + + printf("# Tests on medium-sized tables (1 GB), 4MB work_mem\n"); + printf("# Performs an external sort, but the table still fits in OS cache\n"); + printf("# Needs a multi-stage merge\n"); + printf("-----\n"); + execute("set work_mem='4MB'"); + execute_test("ordered_ints", "SELECT COUNT(*) FROM (SELECT * FROM medium.ordered_ints ORDER BY i) t"); + execute_test("random_ints", "SELECT COUNT(*) FROM (SELECT * FROM medium.random_ints ORDER BY i) t"); + execute_test("ordered_text", "SELECT COUNT(*) FROM (SELECT * FROM medium.ordered_text ORDER BY t) t"); + execute_test("random_text", "SELECT COUNT(*) FROM (SELECT * FROM medium.random_text ORDER BY t) t"); + printf("\n"); + + printf("# Tests on medium-sized tables (1 GB), 16MB work_mem\n"); + printf("# Same as previous test, but with larger work_mem\n"); + printf("-----\n"); + execute("set work_mem='16MB'"); + execute_test("ordered_ints", "SELECT COUNT(*) FROM (SELECT * FROM medium.ordered_ints ORDER BY i) t"); + execute_test("random_ints", "SELECT COUNT(*) FROM (SELECT * FROM medium.random_ints ORDER BY i) t"); + execute_test("ordered_text", "SELECT COUNT(*) FROM (SELECT * FROM medium.ordered_text ORDER BY t) t"); + execute_test("random_text", "SELECT COUNT(*) FROM (SELECT * FROM medium.random_text ORDER BY t) t"); + printf("\n"); + + printf("# Tests on medium-sized tables (1 GB), 256MB work_mem\n"); + printf("# This works with a single merge pass\n"); + printf("-----\n"); + execute("set work_mem='256MB'"); + execute_test("ordered_ints", "SELECT COUNT(*) FROM (SELECT * FROM medium.ordered_ints ORDER BY i) t"); + execute_test("random_ints", "SELECT COUNT(*) FROM (SELECT * FROM medium.random_ints ORDER BY i) t"); + execute_test("ordered_text", "SELECT COUNT(*) FROM (SELECT * FROM medium.ordered_text ORDER BY t) t"); + execute_test("random_text", "SELECT COUNT(*) FROM (SELECT * FROM medium.random_text ORDER BY t) t"); + printf("\n"); + + printf("# Tests on medium-sized tables (1 GB), 512MB work_mem\n"); + printf("# This works with a single merge pass\n"); + printf("-----\n"); + execute("set work_mem='512MB'"); + execute_test("ordered_ints", "SELECT COUNT(*) FROM (SELECT * FROM medium.ordered_ints ORDER BY i) t"); + execute_test("random_ints", "SELECT COUNT(*) FROM (SELECT * FROM medium.random_ints ORDER BY i) t"); + execute_test("ordered_text", "SELECT COUNT(*) FROM (SELECT * FROM medium.ordered_text ORDER BY t) t"); + execute_test("random_text", "SELECT COUNT(*) FROM (SELECT * FROM medium.random_text ORDER BY t) t"); + printf("\n"); + + printf("# Tests on medium-sized tables (1 GB), 2GB work_mem\n"); + printf("# I thought 2GB would be enough to do a quicksort, but because of\n"); + printf("# SortTuple overhead (?), it doesn't fit. Performs an external sort with two runs\n"); + printf("-----\n"); + execute("set work_mem='2048MB'"); + execute_test("ordered_ints", "SELECT COUNT(*) FROM (SELECT * FROM medium.ordered_ints ORDER BY i) t"); + execute_test("random_ints", "SELECT COUNT(*) FROM (SELECT * FROM medium.random_ints ORDER BY i) t"); + execute_test("ordered_text", "SELECT COUNT(*) FROM (SELECT * FROM medium.ordered_text ORDER BY t) t"); + execute_test("random_text", "SELECT COUNT(*) FROM (SELECT * FROM medium.random_text ORDER BY t) t"); + printf("\n"); + + PQfinish(conn); + + return 0; +} -- 2.9.3