SET max_parallel_workers_per_gather = 0; CREATE EXTENSION btree_gin; CREATE EXTENSION tsm_system_rows; -- IP generated by hashing the seed CREATE OR REPLACE FUNCTION random_ip_address(p_seed BIGINT) RETURNS INET AS $$ DECLARE v_ip text; v_hash bigint; BEGIN v_hash := abs(hashint8(p_seed)); v_ip := (1 + ((v_hash >> 24) % 253)) || '.' || (1 + ((v_hash >> 16) % 253)) || '.' || (1 + ((v_hash >> 8) % 253)) || '.' || (1 + (v_hash % 253)); RETURN v_ip::inet; END; $$ LANGUAGE plpgsql; CREATE TABLE t (a int, b text, c inet, d numeric); INSERT INTO t SELECT mod(abs(hashint8(i/100 + (100 * random())::int)),100000000), md5((i/100 + (100 * random())::int)::text), random_ip_address(i/100 + (100 * random())::int), mod(abs(hashint8(i/100 + (100 * random())::int)::numeric/10),100000000) FROM generate_series(1,100000000) s(i); VACUUM ANALYZE t; -- compute average distinct values per range SELECT COUNT(*), AVG(a), AVG(b), AVG(c), AVG(d) FROM ( SELECT ((ctid::text::point)[0]/128)::int AS range_number, COUNT(DISTINCT a) AS a, COUNT(DISTINCT b) AS b, COUNT(DISTINCT c) AS c, COUNT(DISTINCT d) AS d FROM t GROUP BY 1 ) foo; -- compute parameters for queries SELECT percentile_disc(0.5) within group (order by a) AS a FROM t TABLESAMPLE system_rows(10000) \gset SELECT ('''' || (percentile_disc(0.5) within group (order by b)) || '''') AS b FROM t TABLESAMPLE system_rows(10000) \gset SELECT ('''' || (percentile_disc(0.5) within group (order by c)) || '''') AS c FROM t TABLESAMPLE system_rows(10000) \gset SELECT percentile_disc(0.5) within group (order by d) AS d FROM t TABLESAMPLE system_rows(10000) \gset \d+ -- first let's try seqscan \echo ================= dataset #1 seqscan : plans ================= EXPLAIN ANALYZE SELECT * FROM t WHERE a = :a; EXPLAIN ANALYZE SELECT * FROM t WHERE b = :b; EXPLAIN ANALYZE SELECT * FROM t WHERE c = :c::inet; EXPLAIN ANALYZE SELECT * FROM t WHERE d = :d; \timing on \echo ================= dataset #1 seqscan : query timing ================= SELECT COUNT(*) FROM t WHERE a = :a; SELECT COUNT(*) FROM t WHERE b = :b; SELECT COUNT(*) FROM t WHERE c = :c::inet; SELECT COUNT(*) FROM t WHERE d = :d; \timing off -- disable seqscan to force plans using indexes SET enable_seqscan = off; -- now let's try btree indexes \echo ================= dataset #1 btree : create index ================= \timing on CREATE INDEX btree_a_idx ON t (a); CREATE INDEX btree_b_idx ON t (b); CREATE INDEX btree_c_idx ON t (c); CREATE INDEX btree_d_idx ON t (d); \timing off \echo ================= dataset #1 btree : index size ================= \di+ ANALYZE t; \echo ================= dataset #1 btree : plans ================= EXPLAIN ANALYZE SELECT * FROM t WHERE a = :a; EXPLAIN ANALYZE SELECT * FROM t WHERE b = :b; EXPLAIN ANALYZE SELECT * FROM t WHERE c = :c::inet; EXPLAIN ANALYZE SELECT * FROM t WHERE d = :d; \timing on \echo ================= dataset #1 btree : query timing ================= SELECT COUNT(*) FROM t WHERE a = :a; SELECT COUNT(*) FROM t WHERE b = :b; SELECT COUNT(*) FROM t WHERE c = :c::inet; SELECT COUNT(*) FROM t WHERE d = :d; \timing off DROP INDEX btree_a_idx; DROP INDEX btree_b_idx; DROP INDEX btree_c_idx; DROP INDEX btree_d_idx; -- then GIN indexes, created using btree_gin \echo ================= dataset #1 gin : create index ================= \timing on CREATE INDEX gin_a_idx ON t USING GIN (a); CREATE INDEX gin_b_idx ON t USING GIN (b); CREATE INDEX gin_c_idx ON t USING GIN (c); CREATE INDEX gin_d_idx ON t USING GIN (d); \timing off \echo ================= dataset #1 gin : index size ================= \di+ ANALYZE t; \echo ================= dataset #1 gin : plans ================= EXPLAIN ANALYZE SELECT * FROM t WHERE a = :a; EXPLAIN ANALYZE SELECT * FROM t WHERE b = :b; EXPLAIN ANALYZE SELECT * FROM t WHERE c = :c::inet; EXPLAIN ANALYZE SELECT * FROM t WHERE d = :d; \timing on \echo ================= dataset #1 gin : query timing ================= SELECT COUNT(*) FROM t WHERE a = :a; SELECT COUNT(*) FROM t WHERE b = :b; SELECT COUNT(*) FROM t WHERE c = :c::inet; SELECT COUNT(*) FROM t WHERE d = :d; \timing off DROP INDEX gin_a_idx; DROP INDEX gin_b_idx; DROP INDEX gin_c_idx; DROP INDEX gin_d_idx; -- and now finally the bloom indexes \echo ================= dataset #1 brin minmax : create index ================= \timing on CREATE INDEX brin_a_idx ON t USING BRIN (a); CREATE INDEX brin_b_idx ON t USING BRIN (b); CREATE INDEX brin_c_idx ON t USING BRIN (c); CREATE INDEX brin_d_idx ON t USING BRIN (d); \timing off \echo ================= dataset #1 brin minmax : index size ================= \di+ ANALYZE t; \echo ================= dataset #1 brin minmax : plans ================= EXPLAIN ANALYZE SELECT * FROM t WHERE a = :a; EXPLAIN ANALYZE SELECT * FROM t WHERE b = :b; EXPLAIN ANALYZE SELECT * FROM t WHERE c = :c::inet; EXPLAIN ANALYZE SELECT * FROM t WHERE d = :d; \timing on \echo ================= dataset #1 brin minmax : query timing ================= SELECT COUNT(*) FROM t WHERE a = :a; SELECT COUNT(*) FROM t WHERE b = :b; SELECT COUNT(*) FROM t WHERE c = :c::inet; SELECT COUNT(*) FROM t WHERE d = :d; \timing off DROP INDEX brin_a_idx; DROP INDEX brin_b_idx; DROP INDEX brin_c_idx; DROP INDEX brin_d_idx; \echo ================= dataset #1 brin bloom (256, 0.05) : create index ================= \timing on CREATE INDEX brin_a_idx ON t USING BRIN (a int4_bloom_ops) WITH (n_distinct_per_range=256, false_positive_rate=0.05); CREATE INDEX brin_b_idx ON t USING BRIN (b text_bloom_ops) WITH (n_distinct_per_range=256, false_positive_rate=0.05); CREATE INDEX brin_c_idx ON t USING BRIN (c inet_bloom_ops) WITH (n_distinct_per_range=256, false_positive_rate=0.05); CREATE INDEX brin_d_idx ON t USING BRIN (d numeric_bloom_ops) WITH (n_distinct_per_range=256, false_positive_rate=0.05); \timing off \echo ================= dataset #1 brin bloom (256, 0.05) : index size ================= \di+ ANALYZE t; \echo ================= dataset #1 brin bloom (256, 0.05) : plans ================= EXPLAIN ANALYZE SELECT * FROM t WHERE a = :a; EXPLAIN ANALYZE SELECT * FROM t WHERE b = :b; EXPLAIN ANALYZE SELECT * FROM t WHERE c = :c::inet; EXPLAIN ANALYZE SELECT * FROM t WHERE d = :d; \timing on \echo ================= dataset #1 brin bloom (256, 0.05) : query timing ================= SELECT COUNT(*) FROM t WHERE a = :a; SELECT COUNT(*) FROM t WHERE b = :b; SELECT COUNT(*) FROM t WHERE c = :c::inet; SELECT COUNT(*) FROM t WHERE d = :d; \timing off \timing off DROP INDEX brin_a_idx; DROP INDEX brin_b_idx; DROP INDEX brin_c_idx; DROP INDEX brin_d_idx; \timing on \echo ================= dataset #1 brin bloom (256, 0.01) : create index ================= CREATE INDEX brin_a_idx ON t USING BRIN (a int4_bloom_ops) WITH (n_distinct_per_range=256, false_positive_rate=0.01); CREATE INDEX brin_b_idx ON t USING BRIN (b text_bloom_ops) WITH (n_distinct_per_range=256, false_positive_rate=0.01); CREATE INDEX brin_c_idx ON t USING BRIN (c inet_bloom_ops) WITH (n_distinct_per_range=256, false_positive_rate=0.01); CREATE INDEX brin_d_idx ON t USING BRIN (d numeric_bloom_ops) WITH (n_distinct_per_range=256, false_positive_rate=0.01); \timing off \echo ================= dataset #1 brin bloom (256, 0.01) : index size ================= \di+ ANALYZE t; \echo ================= dataset #1 brin bloom (256, 0.01) : plans ================= EXPLAIN ANALYZE SELECT * FROM t WHERE a = :a; EXPLAIN ANALYZE SELECT * FROM t WHERE b = :b; EXPLAIN ANALYZE SELECT * FROM t WHERE c = :c::inet; EXPLAIN ANALYZE SELECT * FROM t WHERE d = :d; \timing on \echo ================= dataset #1 brin bloom (256, 0.01) : query timing ================= SELECT COUNT(*) FROM t WHERE a = :a; SELECT COUNT(*) FROM t WHERE b = :b; SELECT COUNT(*) FROM t WHERE c = :c::inet; SELECT COUNT(*) FROM t WHERE d = :d; \timing off -- now let's try with a table that has fewer distinct values per range RESET enable_seqscan; DROP TABLE t; CREATE TABLE t (a int, b text, c inet, d numeric, e inet); INSERT INTO t SELECT mod(abs(hashint8(i/10 + (1000 * random())::int)),100000000), md5((i/10 + (1000 * random())::int)::text), random_ip_address(i/10 + (1000 * random())::int), mod(abs(hashint8(i/10 + (1000 * random())::int)::numeric/10),100000000) FROM generate_series(1,100000000) s(i); VACUUM ANALYZE t; -- compute average distinct values per range SELECT COUNT(*), AVG(a), AVG(b), AVG(c), AVG(d) FROM ( SELECT ((ctid::text::point)[0]/128)::int AS range_number, COUNT(DISTINCT a) AS a, COUNT(DISTINCT b) AS b, COUNT(DISTINCT c) AS c, COUNT(DISTINCT d) AS d FROM t GROUP BY 1 ) foo; -- compute parameters for queries SELECT percentile_disc(0.5) within group (order by a) AS a FROM t TABLESAMPLE system_rows(10000) \gset SELECT ('''' || (percentile_disc(0.5) within group (order by b)) || '''') AS b FROM t TABLESAMPLE system_rows(10000) \gset SELECT ('''' || (percentile_disc(0.5) within group (order by c)) || '''') AS c FROM t TABLESAMPLE system_rows(10000) \gset SELECT percentile_disc(0.5) within group (order by d) AS d FROM t TABLESAMPLE system_rows(10000) \gset \d+ -- first let's try seqscan \echo ================= dataset #2 seqscan : plans ================= EXPLAIN ANALYZE SELECT * FROM t WHERE a = :a; EXPLAIN ANALYZE SELECT * FROM t WHERE b = :b; EXPLAIN ANALYZE SELECT * FROM t WHERE c = :c::inet; EXPLAIN ANALYZE SELECT * FROM t WHERE d = :d; \timing on \echo ================= dataset #2 seqscan : query timing ================= SELECT COUNT(*) FROM t WHERE a = :a; SELECT COUNT(*) FROM t WHERE b = :b; SELECT COUNT(*) FROM t WHERE c = :c::inet; SELECT COUNT(*) FROM t WHERE d = :d; \timing off -- disable seqscan, to force usage of indexes SET enable_seqscan = off; -- first let's try btree indexes \timing on \echo ================= dataset #2 btree : create index ================= CREATE INDEX btree_a_idx ON t (a); CREATE INDEX btree_b_idx ON t (b); CREATE INDEX btree_c_idx ON t (c); CREATE INDEX btree_d_idx ON t (d); \timing off \echo ================= dataset #2 btree : index size ================= \di+ ANALYZE t; \echo ================= dataset #2 btree : plans ================= EXPLAIN ANALYZE SELECT * FROM t WHERE a = :a; EXPLAIN ANALYZE SELECT * FROM t WHERE b = :b; EXPLAIN ANALYZE SELECT * FROM t WHERE c = :c::inet; EXPLAIN ANALYZE SELECT * FROM t WHERE d = :d; \timing on \echo ================= dataset #2 btree : query timing ================= SELECT COUNT(*) FROM t WHERE a = :a; SELECT COUNT(*) FROM t WHERE b = :b; SELECT COUNT(*) FROM t WHERE c = :c::inet; SELECT COUNT(*) FROM t WHERE d = :d; \timing off DROP INDEX btree_a_idx; DROP INDEX btree_b_idx; DROP INDEX btree_c_idx; DROP INDEX btree_d_idx; -- then GIN indexes, created using btree_gin \timing on \echo ================= dataset #2 gin : create index ================= CREATE INDEX gin_a_idx ON t USING GIN (a); CREATE INDEX gin_b_idx ON t USING GIN (b); CREATE INDEX gin_c_idx ON t USING GIN (c); CREATE INDEX gin_d_idx ON t USING GIN (d); \timing off \echo ================= dataset #2 gin : index size ================= \di+ ANALYZE t; \echo ================= dataset #2 gin : plans ================= EXPLAIN ANALYZE SELECT * FROM t WHERE a = :a; EXPLAIN ANALYZE SELECT * FROM t WHERE b = :b; EXPLAIN ANALYZE SELECT * FROM t WHERE c = :c::inet; EXPLAIN ANALYZE SELECT * FROM t WHERE d = :d; \timing on \echo ================= dataset #2 gin : query timing ================= SELECT COUNT(*) FROM t WHERE a = :a; SELECT COUNT(*) FROM t WHERE b = :b; SELECT COUNT(*) FROM t WHERE c = :c::inet; SELECT COUNT(*) FROM t WHERE d = :d; \timing off DROP INDEX gin_a_idx; DROP INDEX gin_b_idx; DROP INDEX gin_c_idx; DROP INDEX gin_d_idx; -- and now finally the bloom indexes \timing on \echo ================= dataset #2 brin minmax : create index ================= CREATE INDEX brin_a_idx ON t USING BRIN (a); CREATE INDEX brin_b_idx ON t USING BRIN (b); CREATE INDEX brin_c_idx ON t USING BRIN (c); CREATE INDEX brin_d_idx ON t USING BRIN (d); \timing off \echo ================= dataset #2 brin minmax : index size ================= \di+ ANALYZE t; \echo ================= dataset #2 brin minmax : plans ================= EXPLAIN ANALYZE SELECT * FROM t WHERE a = :a; EXPLAIN ANALYZE SELECT * FROM t WHERE b = :b; EXPLAIN ANALYZE SELECT * FROM t WHERE c = :c::inet; EXPLAIN ANALYZE SELECT * FROM t WHERE d = :d; \timing on \echo ================= dataset #2 brin minmax : query timing ================= SELECT COUNT(*) FROM t WHERE a = :a; SELECT COUNT(*) FROM t WHERE b = :b; SELECT COUNT(*) FROM t WHERE c = :c::inet; SELECT COUNT(*) FROM t WHERE d = :d; \timing off DROP INDEX brin_a_idx; DROP INDEX brin_b_idx; DROP INDEX brin_c_idx; DROP INDEX brin_d_idx; \timing on \echo ================= dataset #2 brin bloom (2048, 0.05) : create index ================= CREATE INDEX brin_a_idx ON t USING BRIN (a int4_bloom_ops) WITH (n_distinct_per_range=2048, false_positive_rate=0.05); CREATE INDEX brin_b_idx ON t USING BRIN (b text_bloom_ops) WITH (n_distinct_per_range=2048, false_positive_rate=0.05); CREATE INDEX brin_c_idx ON t USING BRIN (c inet_bloom_ops) WITH (n_distinct_per_range=2048, false_positive_rate=0.05); CREATE INDEX brin_d_idx ON t USING BRIN (d numeric_bloom_ops) WITH (n_distinct_per_range=2048, false_positive_rate=0.05); \timing off \echo ================= dataset #2 brin bloom (2048, 0.05) : index size ================= \di+ ANALYZE t; \echo ================= dataset #2 brin bloom (2048, 0.05) : plans ================= EXPLAIN ANALYZE SELECT * FROM t WHERE a = :a; EXPLAIN ANALYZE SELECT * FROM t WHERE b = :b; EXPLAIN ANALYZE SELECT * FROM t WHERE c = :c::inet; EXPLAIN ANALYZE SELECT * FROM t WHERE d = :d; \timing on \echo ================= dataset #2 brin bloom (2048, 0.05) : query timing ================= SELECT COUNT(*) FROM t WHERE a = :a; SELECT COUNT(*) FROM t WHERE b = :b; SELECT COUNT(*) FROM t WHERE c = :c::inet; SELECT COUNT(*) FROM t WHERE d = :d; \timing off \timing off DROP INDEX brin_a_idx; DROP INDEX brin_b_idx; DROP INDEX brin_c_idx; DROP INDEX brin_d_idx; \timing on \echo ================= dataset #2 brin bloom (2048, 0.01) : create index ================= CREATE INDEX brin_a_idx ON t USING BRIN (a int4_bloom_ops) WITH (n_distinct_per_range=2048, false_positive_rate=0.01); CREATE INDEX brin_b_idx ON t USING BRIN (b text_bloom_ops) WITH (n_distinct_per_range=2048, false_positive_rate=0.01); CREATE INDEX brin_c_idx ON t USING BRIN (c inet_bloom_ops) WITH (n_distinct_per_range=2048, false_positive_rate=0.01); CREATE INDEX brin_d_idx ON t USING BRIN (d numeric_bloom_ops) WITH (n_distinct_per_range=2048, false_positive_rate=0.01); \timing off \echo ================= dataset #2 brin bloom (2048, 0.01) : index size ================= \di+ ANALYZE t; \echo ================= dataset #2 brin bloom (2048, 0.01) : plans ================= EXPLAIN ANALYZE SELECT * FROM t WHERE a = :a; EXPLAIN ANALYZE SELECT * FROM t WHERE b = :b; EXPLAIN ANALYZE SELECT * FROM t WHERE c = :c::inet; EXPLAIN ANALYZE SELECT * FROM t WHERE d = :d; \timing on \echo ================= dataset #2 brin bloom (2048, 0.01) : query timing ================= SELECT COUNT(*) FROM t WHERE a = :a; SELECT COUNT(*) FROM t WHERE b = :b; SELECT COUNT(*) FROM t WHERE c = :c::inet; SELECT COUNT(*) FROM t WHERE d = :d; \timing off