From 83696f24eceb2c9d7a71ffb74171c30bc0c3727a Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas@pgaddict.com>
Date: Sun, 23 Oct 2016 17:38:35 +0200
Subject: [PATCH 6/9] PATCH: multivariate histograms

- extends the pg_mv_statistic catalog (add 'hist' fields)
- building the histograms during ANALYZE
- simple estimation while planning the queries
- pg_histogram data type (varlena-based)

Includes regression tests mostly equal to those for functional
dependencies / MCV lists.

A new varlena-based data type for storing serialized histograms.
---
 doc/src/sgml/catalogs.sgml                 |   30 +
 doc/src/sgml/planstats.sgml                |  125 ++
 doc/src/sgml/ref/create_statistics.sgml    |   35 +
 src/backend/catalog/system_views.sql       |    4 +-
 src/backend/commands/statscmds.c           |   11 +-
 src/backend/nodes/outfuncs.c               |    2 +
 src/backend/optimizer/path/clausesel.c     |  606 +++++++-
 src/backend/optimizer/util/plancat.c       |    4 +-
 src/backend/utils/mvstats/Makefile         |    2 +-
 src/backend/utils/mvstats/README.histogram |  299 ++++
 src/backend/utils/mvstats/README.stats     |    2 +
 src/backend/utils/mvstats/common.c         |   32 +-
 src/backend/utils/mvstats/common.h         |    8 +-
 src/backend/utils/mvstats/histogram.c      | 2123 ++++++++++++++++++++++++++++
 src/bin/psql/describe.c                    |   15 +-
 src/include/catalog/pg_cast.h              |    3 +
 src/include/catalog/pg_mv_statistic.h      |   22 +-
 src/include/catalog/pg_proc.h              |   13 +
 src/include/catalog/pg_type.h              |    4 +
 src/include/nodes/relation.h               |    2 +
 src/include/utils/builtins.h               |    4 +
 src/include/utils/mvstats.h                |  125 +-
 src/test/regress/expected/mv_histogram.out |  198 +++
 src/test/regress/expected/opr_sanity.out   |    3 +-
 src/test/regress/expected/rules.out        |    4 +-
 src/test/regress/expected/type_sanity.out  |    3 +-
 src/test/regress/parallel_schedule         |    2 +-
 src/test/regress/serial_schedule           |    1 +
 src/test/regress/sql/mv_histogram.sql      |  167 +++
 29 files changed, 3801 insertions(+), 48 deletions(-)
 create mode 100644 src/backend/utils/mvstats/README.histogram
 create mode 100644 src/backend/utils/mvstats/histogram.c
 create mode 100644 src/test/regress/expected/mv_histogram.out
 create mode 100644 src/test/regress/sql/mv_histogram.sql
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index bca03e9..be34e24 100644
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -4307,6 +4307,17 @@
      </row>
 
      <row>
+      <entry><structfield>hist_enabled</structfield></entry>
+      <entry><type>bool</type></entry>
+      <entry></entry>
+      <entry>
+       If true, histogram will be computed for the combination of columns,
+       covered by the statistics. This does not mean the histogram is already
+       computed, though.
+      </entry>
+     </row>
+
+     <row>
       <entry><structfield>ndist_built</structfield></entry>
       <entry><type>bool</type></entry>
       <entry></entry>
@@ -4337,6 +4348,16 @@
      </row>
 
      <row>
+      <entry><structfield>hist_built</structfield></entry>
+      <entry><type>bool</type></entry>
+      <entry></entry>
+      <entry>
+       If true, histogram is already computed and available for use during query
+       estimation.
+      </entry>
+     </row>
+
+     <row>
       <entry><structfield>stakeys</structfield></entry>
       <entry><type>int2vector</type></entry>
       <entry><literal><link linkend="catalog-pg-attribute"><structname>pg_attribute</structname></link>.attnum</literal></entry>
@@ -4374,6 +4395,15 @@
       </entry>
      </row>
 
+     <row>
+      <entry><structfield>stahist</structfield></entry>
+      <entry><type>pg_histogram</type></entry>
+      <entry></entry>
+      <entry>
+       Histogram, serialized as <structname>pg_histogram</> type.
+      </entry>
+     </row>
+
     </tbody>
    </tgroup>
   </table>
diff --git a/doc/src/sgml/planstats.sgml b/doc/src/sgml/planstats.sgml
index 57f9441..2896b04 100644
--- a/doc/src/sgml/planstats.sgml
+++ b/doc/src/sgml/planstats.sgml
@@ -914,6 +914,131 @@ EXPLAIN ANALYZE SELECT * FROM t WHERE a <= 49 AND b > 49;
 
   </sect2>
 
+  <sect2 id="mv-histograms">
+   <title>Histograms</title>
+
+   <para>
+    <acronym>MCV</> lists, introduced in the previous section, work very well
+    for low-cardinality columns (i.e. columns with only very few distinct
+    values), and for columns with a few very frequent values (and possibly
+    many rare ones). Histograms, a generalization of per-column histograms
+    briefly described in <xref linkend="row-estimation-examples">, are meant
+    to address the other cases, i.e. high-cardinality columns, particularly
+    when there are no frequent values.
+   </para>
+
+   <para>
+    Although the example data we've used so far is not a very good match, we
+    can try creating a histogram instead of the <acronym>MCV</> list. With the
+    histogram in place, you may get a plan like this:
+
+<programlisting>
+DROP STATISTICS s2;
+CREATE STATISTICS s3 ON t (a,b) WITH (histogram);
+ANALYZE t;
+EXPLAIN ANALYZE SELECT * FROM t WHERE a = 1 AND b = 1;
+                                           QUERY PLAN
+-------------------------------------------------------------------------------------------------
+ Seq Scan on t  (cost=0.00..195.00 rows=100 width=8) (actual time=0.035..2.967 rows=100 loops=1)
+   Filter: ((a = 1) AND (b = 1))
+   Rows Removed by Filter: 9900
+ Planning time: 0.227 ms
+ Execution time: 3.189 ms
+(5 rows)
+</programlisting>
+
+    Which seems quite accurate, however for other combinations of values the
+    results may be much worse, as illustrated by the following query
+
+<programlisting>
+                                          QUERY PLAN
+-----------------------------------------------------------------------------------------------
+ Seq Scan on t  (cost=0.00..195.00 rows=100 width=8) (actual time=2.771..2.771 rows=0 loops=1)
+   Filter: ((a = 1) AND (b = 10))
+   Rows Removed by Filter: 10000
+ Planning time: 0.179 ms
+ Execution time: 2.812 ms
+(5 rows)
+</programlisting>
+
+    This is due to histograms tracking ranges of values, not individual values.
+    That means it's only possible say whether a bucket may contain items
+    matching the conditions, but it's unclear how many such tuples there
+    actually are in the bucket. Moreover, for larger tables only a small subset
+    of rows gets sampled by <command>ANALYZE</>, causing small variations in
+    the shape of buckets.
+   </para>
+
+   <para>
+    To inspect details of the histogram, we can look into the
+    <structname>pg_mv_stats</> view
+
+<programlisting>
+SELECT tablename, staname, attnums, histbytes, histinfo
+  FROM pg_mv_stats WHERE staname = 's3';
+ tablename | staname | attnums | histbytes |  histinfo   
+-----------+---------+---------+-----------+-------------
+ t         | s3      | 1 2     |      1928 | nbuckets=64
+(1 row)
+</programlisting>
+
+    This shows the histogram has 64 buckets, but as we know there are 100
+    distinct combinations of values in the two columns. This means there are
+    buckets containing multiple combinations, causing the inaccuracy.
+   </para>
+
+   <para>
+    Similarly to <acronym>MCV</> lists, we can inspect histogram contents
+    using a function called <function>pg_mv_histogram_buckets</>.
+
+<programlisting>
+test=# SELECT * FROM pg_mv_histogram_buckets((SELECT oid FROM pg_mv_statistic WHERE staname = 's3'), 0);
+ index | minvals | maxvals | nullsonly | mininclusive | maxinclusive | frequency | density  | bucket_volume 
+-------+---------+---------+-----------+--------------+--------------+-----------+----------+---------------
+     0 | {0,0}   | {3,1}   | {f,f}     | {t,t}        | {f,f}        |      0.01 |     1.68 |      0.005952
+     1 | {50,0}  | {51,3}  | {f,f}     | {t,t}        | {f,f}        |      0.01 |     1.12 |      0.008929
+     2 | {0,25}  | {26,31} | {f,f}     | {t,t}        | {f,f}        |      0.01 |     0.28 |      0.035714
+...
+    61 | {60,0}  | {99,12} | {f,f}     | {t,t}        | {t,f}        |      0.02 | 0.124444 |      0.160714
+    62 | {34,35} | {37,49} | {f,f}     | {t,t}        | {t,t}        |      0.02 |     0.96 |      0.020833
+    63 | {84,35} | {87,49} | {f,f}     | {t,t}        | {t,t}        |      0.02 |     0.96 |      0.020833
+(64 rows)
+</programlisting>
+
+    Which confirms there are 64 buckets, with frequencies ranging between 1%
+    and 2%. The <structfield>minvals</> and <structfield>maxvals</> show the
+    bucket boundaries, <structfield>nullsonly</> shows which columns contain
+    only null values (in the given bucket).
+   </para>
+
+   <para>
+    Similarly to <acronym>MCV</> lists, the planner applies all conditions to
+    the buckets, and sums the frequencies of the matching ones. For details,
+    see <function>clauselist_mv_selectivity_histogram</> function in
+    <filename>clausesel.c</>.
+   </para>
+
+   <para>
+    It's also possible to build <acronym>MCV</> lists and a histogram, in which
+    case <command>ANALYZE</> will build a <acronym>MCV</> lists with the most
+    frequent values, and a histogram on the remaining part of the sample.
+
+<programlisting>
+DROP STATISTICS s3;
+CREATE STATISTICS s4 ON t (a,b) WITH (mcv, histogram);
+</programlisting>
+
+    In this case the <acronym>MCV</> list and histogram are treated as a single
+    composed statistics.
+   </para>
+
+   <para>
+    For additional information about multivariate histograms, see
+    <filename>src/backend/utils/mvstats/README.histogram</>.
+   </para>
+
+  </sect2>
+
  </sect1>
 
 </chapter>
diff --git a/doc/src/sgml/ref/create_statistics.sgml b/doc/src/sgml/ref/create_statistics.sgml
index e95d8d3..de419d2 100644
--- a/doc/src/sgml/ref/create_statistics.sgml
+++ b/doc/src/sgml/ref/create_statistics.sgml
@@ -124,6 +124,15 @@ CREATE STATISTICS [ IF NOT EXISTS ] <replaceable class="PARAMETER">statistics_na
    </varlistentry>
 
    <varlistentry>
+    <term><literal>histogram</> (<type>boolean</>)</term>
+    <listitem>
+     <para>
+      Enables histogram for the statistics.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
     <term><literal>mcv</> (<type>boolean</>)</term>
     <listitem>
      <para>
@@ -201,6 +210,32 @@ EXPLAIN ANALYZE SELECT * FROM t2 WHERE (a = 1) AND (b = 2);
 </programlisting>
   </para>
 
+  <para>
+   Create table <structname>t3</> with two strongly correlated columns, and
+   a histogram on those two columns:
+
+<programlisting>
+CREATE TABLE t3 (
+    a   float,
+    b   float
+);
+
+INSERT INTO t3 SELECT mod(i,1000), mod(i,1000) + 50 * (r - 0.5) FROM (
+                   SELECT i, random() r FROM generate_series(1,1000000) s(i)
+                 ) foo;
+
+CREATE STATISTICS s3 WITH (histogram) ON (a, b) FROM t3;
+
+ANALYZE t3;
+
+-- small overlap
+EXPLAIN ANALYZE SELECT * FROM t3 WHERE (a < 500) AND (b > 500);
+
+-- no overlap
+EXPLAIN ANALYZE SELECT * FROM t3 WHERE (a < 400) AND (b > 600);
+</programlisting>
+  </para>
+
  </refsect1>
 
  <refsect1>
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index d4d9c24..2501455 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -190,7 +190,9 @@ CREATE VIEW pg_mv_stats AS
         length(s.standist::bytea) AS ndistbytes,
         length(S.stadeps::bytea) AS depsbytes,
         length(S.stamcv::bytea) AS mcvbytes,
-        pg_mv_stats_mcvlist_info(S.stamcv) AS mcvinfo
+        pg_mv_stats_mcvlist_info(S.stamcv) AS mcvinfo,
+        length(S.stahist::bytea) AS histbytes,
+        pg_mv_stats_histogram_info(S.stahist) AS histinfo
     FROM (pg_mv_statistic S JOIN pg_class C ON (C.oid = S.starelid))
         LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace);
 
diff --git a/src/backend/commands/statscmds.c b/src/backend/commands/statscmds.c
index ef05745..2e91b0c 100644
--- a/src/backend/commands/statscmds.c
+++ b/src/backend/commands/statscmds.c
@@ -71,7 +71,8 @@ CreateStatistics(CreateStatsStmt *stmt)
 	/* by default build nothing */
 	bool		build_ndistinct = false,
 				build_dependencies = false,
-				build_mcv = false;
+				build_mcv = false,
+				build_histogram = false;
 
 	Assert(IsA(stmt, CreateStatsStmt));
 
@@ -172,6 +173,8 @@ CreateStatistics(CreateStatsStmt *stmt)
 			build_dependencies = defGetBoolean(opt);
 		else if (strcmp(opt->defname, "mcv") == 0)
 			build_mcv = defGetBoolean(opt);
+		else if (strcmp(opt->defname, "histogram") == 0)
+			build_histogram = defGetBoolean(opt);
 		else
 			ereport(ERROR,
 					(errcode(ERRCODE_SYNTAX_ERROR),
@@ -180,10 +183,10 @@ CreateStatistics(CreateStatsStmt *stmt)
 	}
 
 	/* Make sure there's at least one statistics type specified. */
-	if (!(build_ndistinct || build_dependencies || build_mcv))
+	if (!(build_ndistinct || build_dependencies || build_mcv || build_histogram))
 		ereport(ERROR,
 				(errcode(ERRCODE_SYNTAX_ERROR),
-				 errmsg("no statistics type (ndistinct, dependencies, mcv) requested")));
+				 errmsg("no statistics type (ndistinct, dependencies, mcv, histogram) requested")));
 
 	stakeys = buildint2vector(attnums, numcols);
 
@@ -207,10 +210,12 @@ CreateStatistics(CreateStatsStmt *stmt)
 	values[Anum_pg_mv_statistic_ndist_enabled - 1] = BoolGetDatum(build_ndistinct);
 	values[Anum_pg_mv_statistic_deps_enabled - 1] = BoolGetDatum(build_dependencies);
 	values[Anum_pg_mv_statistic_mcv_enabled - 1] = BoolGetDatum(build_mcv);
+	values[Anum_pg_mv_statistic_hist_enabled - 1] = BoolGetDatum(build_histogram);
 
 	nulls[Anum_pg_mv_statistic_standist - 1] = true;
 	nulls[Anum_pg_mv_statistic_stadeps - 1] = true;
 	nulls[Anum_pg_mv_statistic_stamcv - 1] = true;
+	nulls[Anum_pg_mv_statistic_stahist - 1] = true;
 
 	/* insert the tuple into pg_mv_statistic */
 	mvstatrel = heap_open(MvStatisticRelationId, RowExclusiveLock);
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index a9cc9ad..27dbe76 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -2204,11 +2204,13 @@ _outMVStatisticInfo(StringInfo str, const MVStatisticInfo *node)
 	WRITE_BOOL_FIELD(ndist_enabled);
 	WRITE_BOOL_FIELD(deps_enabled);
 	WRITE_BOOL_FIELD(mcv_enabled);
+	WRITE_BOOL_FIELD(hist_enabled);
 
 	/* built/available statistics */
 	WRITE_BOOL_FIELD(ndist_built);
 	WRITE_BOOL_FIELD(deps_built);
 	WRITE_BOOL_FIELD(mcv_built);
+	WRITE_BOOL_FIELD(hist_built);
 }
 
 static void
diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c
index abdbc5b..fddbcc4 100644
--- a/src/backend/optimizer/path/clausesel.c
+++ b/src/backend/optimizer/path/clausesel.c
@@ -49,6 +49,7 @@ static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
 
 #define		STATS_TYPE_FDEPS	0x01
 #define		STATS_TYPE_MCV		0x02
+#define		STATS_TYPE_HIST		0x04
 
 static bool clause_is_mv_compatible(Node *clause, Index relid, Bitmapset **attnums,
 						int type);
@@ -77,12 +78,21 @@ static Selectivity clauselist_mv_selectivity_mcvlist(PlannerInfo *root,
 								  List *clauses, MVStatisticInfo *mvstats,
 								  bool *fullmatch, Selectivity *lowsel);
 
+static Selectivity clauselist_mv_selectivity_histogram(PlannerInfo *root,
+									List *clauses, MVStatisticInfo *mvstats);
+
 static int update_match_bitmap_mcvlist(PlannerInfo *root, List *clauses,
 							int2vector *stakeys, MCVList mcvlist,
 							int nmatches, char *matches,
 							Selectivity *lowsel, bool *fullmatch,
 							bool is_or);
 
+static int update_match_bitmap_histogram(PlannerInfo *root, List *clauses,
+							  int2vector *stakeys,
+							  MVSerializedHistogram mvhist,
+							  int nmatches, char *matches,
+							  bool is_or);
+
 static bool has_stats(List *stats, int type);
 
 static List *find_stats(PlannerInfo *root, Index relid);
@@ -93,6 +103,7 @@ static bool stats_type_matches(MVStatisticInfo *stat, int type);
 #define UPDATE_RESULT(m,r,isor) \
 	(m) = (isor) ? (Max(m,r)) : (Min(m,r))
 
+
 /****************************************************************************
  *		ROUTINES TO COMPUTE SELECTIVITIES
  ****************************************************************************/
@@ -121,7 +132,7 @@ static bool stats_type_matches(MVStatisticInfo *stat, int type);
  *
  * First we try to reduce the list of clauses by applying (soft) functional
  * dependencies, and then we try to estimate the selectivity of the reduced
- * list of clauses using the multivariate MCV list.
+ * list of clauses using the multivariate MCV list and histograms.
  *
  * Finally we remove the portion of clauses estimated using multivariate stats,
  * and process the rest of the clauses using the regular per-column stats.
@@ -208,16 +219,17 @@ clauselist_selectivity(PlannerInfo *root,
 	 * If there are no such stats or not enough attributes, don't waste time
 	 * simply skip to estimation using the plain per-column stats.
 	 */
-	if (has_stats(stats, STATS_TYPE_MCV) &&
-		(count_mv_attnums(clauses, relid, STATS_TYPE_MCV) >= 2))
+	if (has_stats(stats, STATS_TYPE_MCV | STATS_TYPE_HIST) &&
+		(count_mv_attnums(clauses, relid,
+						  STATS_TYPE_MCV | STATS_TYPE_HIST) >= 2))
 	{
 		/* collect attributes from the compatible conditions */
 		Bitmapset  *mvattnums = collect_mv_attnums(clauses, relid,
-												   STATS_TYPE_MCV);
+									  STATS_TYPE_MCV | STATS_TYPE_HIST);
 
 		/* and search for the statistic covering the most attributes */
 		MVStatisticInfo *mvstat = choose_mv_statistics(stats, mvattnums,
-													   STATS_TYPE_MCV);
+									  STATS_TYPE_MCV | STATS_TYPE_HIST);
 
 		if (mvstat != NULL)		/* we have a matching stats */
 		{
@@ -226,7 +238,7 @@ clauselist_selectivity(PlannerInfo *root,
 
 			/* split the clauselist into regular and mv-clauses */
 			clauses = clauselist_mv_split(root, relid, clauses, &mvclauses,
-										  mvstat, STATS_TYPE_MCV);
+							  mvstat, STATS_TYPE_MCV | STATS_TYPE_HIST);
 
 			/* we've chosen the histogram to match the clauses */
 			Assert(mvclauses != NIL);
@@ -1178,6 +1190,8 @@ static Selectivity
 clauselist_mv_selectivity(PlannerInfo *root, List *clauses, MVStatisticInfo *mvstats)
 {
 	bool		fullmatch = false;
+	Selectivity s1 = 0.0,
+				s2 = 0.0;
 
 	/*
 	 * Lowest frequency in the MCV list (may be used as an upper bound for
@@ -1191,9 +1205,26 @@ clauselist_mv_selectivity(PlannerInfo *root, List *clauses, MVStatisticInfo *mvs
 	 * order by selectivity (to optimize the MCV/histogram evaluation).
 	 */
 
-	/* Evaluate the MCV selectivity */
-	return clauselist_mv_selectivity_mcvlist(root, clauses, mvstats,
-											 &fullmatch, &mcv_low);
+	/* Evaluate the MCV first. */
+	s1 = clauselist_mv_selectivity_mcvlist(root, clauses, mvstats,
+										   &fullmatch, &mcv_low);
+
+	/*
+	 * If we got a full equality match on the MCV list, we're done (and the
+	 * estimate is pretty good).
+	 */
+	if (fullmatch && (s1 > 0.0))
+		return s1;
+
+	/*
+	 * TODO if (fullmatch) without matching MCV item, use the mcv_low
+	 * selectivity as upper bound
+	 */
+
+	s2 = clauselist_mv_selectivity_histogram(root, clauses, mvstats);
+
+	/* TODO clamp to <= 1.0 (or more strictly, when possible) */
+	return s1 + s2;
 }
 
 /*
@@ -1379,7 +1410,8 @@ choose_mv_statistics(List *stats, Bitmapset *attnums, int types)
 
 		/* skip statistics not matching any of the requested types */
 		if (! ((info->deps_built && (STATS_TYPE_FDEPS & types)) ||
-			   (info->mcv_built && (STATS_TYPE_MCV & types))))
+			   (info->mcv_built && (STATS_TYPE_MCV & types)) ||
+			   (info->hist_built && (STATS_TYPE_HIST & types))))
 			continue;
 
 		/* count columns covered by the statistics */
@@ -1609,7 +1641,7 @@ mv_compatible_walker(Node *node, mv_compatible_context *context)
 			case F_SCALARGTSEL:
 
 				/* not compatible with functional dependencies */
-				if (!(context->types & STATS_TYPE_MCV))
+				if (!(context->types & (STATS_TYPE_MCV | STATS_TYPE_HIST)))
 					return true;	/* terminate */
 
 				break;
@@ -1677,6 +1709,9 @@ stats_type_matches(MVStatisticInfo *stat, int type)
 	if ((type & STATS_TYPE_MCV) && stat->mcv_built)
 		return true;
 
+	if ((type & STATS_TYPE_HIST) && stat->hist_built)
+		return true;
+
 	return false;
 }
 
@@ -1695,6 +1730,9 @@ has_stats(List *stats, int type)
 		/* terminate if we've found at least one matching statistics */
 		if (stats_type_matches(stat, type))
 			return true;
+
+		if ((type & STATS_TYPE_HIST) && stat->hist_built)
+			return true;
 	}
 
 	return false;
@@ -1725,12 +1763,12 @@ find_stats(PlannerInfo *root, Index relid)
  *
  * The algorithm works like this:
  *
- *	 1) mark all items as 'match'
- *	 2) walk through all the clauses
- *	 3) for a particular clause, walk through all the items
- *	 4) skip items that are already 'no match'
- *	 5) check clause for items that still match
- *	 6) sum frequencies for items to get selectivity
+ * 1) mark all items as 'match'
+ * 2) walk through all the clauses
+ * 3) for a particular clause, walk through all the items
+ * 4) skip items that are already 'no match'
+ * 5) check clause for items that still match
+ * 6) sum frequencies for items to get selectivity
  *
  * The function also returns the frequency of the least frequent item
  * on the MCV list, which may be useful for clamping estimate from the
@@ -2116,3 +2154,537 @@ update_match_bitmap_mcvlist(PlannerInfo *root, List *clauses,
 
 	return nmatches;
 }
+
+/*
+ * Estimate selectivity of clauses using a histogram.
+ *
+ * If there's no histogram for the stats, the function returns 0.0.
+ *
+ * The general idea of this method is similar to how MCV lists are
+ * processed, except that this introduces the concept of a partial
+ * match (MCV only works with full match / mismatch).
+ *
+ * The algorithm works like this:
+ *
+ *	 1) mark all buckets as 'full match'
+ *	 2) walk through all the clauses
+ *	 3) for a particular clause, walk through all the buckets
+ *	 4) skip buckets that are already 'no match'
+ *	 5) check clause for buckets that still match (at least partially)
+ *	 6) sum frequencies for buckets to get selectivity
+ *
+ * Unlike MCV lists, histograms have a concept of a partial match. In
+ * that case we use 1/2 the bucket, to minimize the average error. The
+ * MV histograms are usually less detailed than the per-column ones,
+ * meaning the sum is often quite high (thanks to combining a lot of
+ * "partially hit" buckets).
+ *
+ * Maybe we could use per-bucket information with number of distinct
+ * values it contains (for each dimension), and then use that to correct
+ * the estimate (so with 10 distinct values, we'd use 1/10 of the bucket
+ * frequency). We might also scale the value depending on the actual
+ * ndistinct estimate (not just the values observed in the sample).
+ *
+ * Another option would be to multiply the selectivities, i.e. if we get
+ * 'partial match' for a bucket for multiple conditions, we might use
+ * 0.5^k (where k is the number of conditions), instead of 0.5. This
+ * probably does not minimize the average error, though.
+ *
+ * TODO: This might use a similar shortcut to MCV lists - count buckets
+ * marked as partial/full match, and terminate once this drop to 0.
+ * Not sure if it's really worth it - for MCV lists a situation like
+ * this is not uncommon, but for histograms it's not that clear.
+ */
+static Selectivity
+clauselist_mv_selectivity_histogram(PlannerInfo *root, List *clauses,
+									MVStatisticInfo *mvstats)
+{
+	int			i;
+	Selectivity s = 0.0;
+	Selectivity u = 0.0;
+
+	int			nmatches = 0;
+	char	   *matches = NULL;
+
+	MVSerializedHistogram mvhist = NULL;
+
+	/* there's no histogram */
+	if (!mvstats->hist_built)
+		return 0.0;
+
+	/* There may be no histogram in the stats (check hist_built flag) */
+	mvhist = load_mv_histogram(mvstats->mvoid);
+
+	Assert(mvhist != NULL);
+	Assert(clauses != NIL);
+	Assert(list_length(clauses) >= 2);
+
+	/*
+	 * Bitmap of bucket matches (mismatch, partial, full). by default all
+	 * buckets fully match (and we'll eliminate them).
+	 */
+	matches = palloc0(sizeof(char) * mvhist->nbuckets);
+	memset(matches, MVSTATS_MATCH_FULL, sizeof(char) * mvhist->nbuckets);
+
+	nmatches = mvhist->nbuckets;
+
+	/* build the match bitmap */
+	update_match_bitmap_histogram(root, clauses,
+								  mvstats->stakeys, mvhist,
+								  nmatches, matches, false);
+
+	/* now, walk through the buckets and sum the selectivities */
+	for (i = 0; i < mvhist->nbuckets; i++)
+	{
+		/*
+		 * Find out what part of the data is covered by the histogram, so that
+		 * we can 'scale' the selectivity properly (e.g. when only 50% of the
+		 * sample got into the histogram, and the rest is in a MCV list).
+		 *
+		 * TODO This might be handled by keeping a global "frequency" for the
+		 * whole histogram, which might save us some time spent accessing the
+		 * not-matching part of the histogram. Although it's likely in a
+		 * cache, so it's very fast.
+		 */
+		u += mvhist->buckets[i]->ntuples;
+
+		if (matches[i] == MVSTATS_MATCH_FULL)
+			s += mvhist->buckets[i]->ntuples;
+		else if (matches[i] == MVSTATS_MATCH_PARTIAL)
+			s += 0.5 * mvhist->buckets[i]->ntuples;
+	}
+
+#ifdef DEBUG_MVHIST
+	debug_histogram_matches(mvhist, matches);
+#endif
+
+	/* release the allocated bitmap and deserialized histogram */
+	pfree(matches);
+	pfree(mvhist);
+
+	return s * u;
+}
+
+/* cached result of bucket boundary comparison for a single dimension */
+
+#define HIST_CACHE_NOT_FOUND		0x00
+#define HIST_CACHE_FALSE			0x01
+#define HIST_CACHE_TRUE				0x03
+#define HIST_CACHE_MASK				0x02
+
+static char
+bucket_contains_value(FmgrInfo ltproc, Datum constvalue,
+					  Datum min_value, Datum max_value,
+					  int min_index, int max_index,
+					  bool min_include, bool max_include,
+					  char *callcache)
+{
+	bool		a,
+				b;
+
+	char		min_cached = callcache[min_index];
+	char		max_cached = callcache[max_index];
+
+	/*
+	 * First some quick checks on equality - if any of the boundaries equals,
+	 * we have a partial match (so no need to call the comparator).
+	 */
+	if (((min_value == constvalue) && (min_include)) ||
+		((max_value == constvalue) && (max_include)))
+		return MVSTATS_MATCH_PARTIAL;
+
+	/* Keep the values 0/1 because of the XOR at the end. */
+	a = ((min_cached & HIST_CACHE_MASK) >> 1);
+	b = ((max_cached & HIST_CACHE_MASK) >> 1);
+
+	/*
+	 * If result for the bucket lower bound not in cache, evaluate the
+	 * function and store the result in the cache.
+	 */
+	if (!min_cached)
+	{
+		a = DatumGetBool(FunctionCall2Coll(&ltproc,
+										   DEFAULT_COLLATION_OID,
+										   constvalue, min_value));
+		/* remember the result */
+		callcache[min_index] = (a) ? HIST_CACHE_TRUE : HIST_CACHE_FALSE;
+	}
+
+	/* And do the same for the upper bound. */
+	if (!max_cached)
+	{
+		b = DatumGetBool(FunctionCall2Coll(&ltproc,
+										   DEFAULT_COLLATION_OID,
+										   constvalue, max_value));
+		/* remember the result */
+		callcache[max_index] = (b) ? HIST_CACHE_TRUE : HIST_CACHE_FALSE;
+	}
+
+	return (a ^ b) ? MVSTATS_MATCH_PARTIAL : MVSTATS_MATCH_NONE;
+}
+
+static char
+bucket_is_smaller_than_value(FmgrInfo opproc, Datum constvalue,
+							 Datum min_value, Datum max_value,
+							 int min_index, int max_index,
+							 bool min_include, bool max_include,
+							 char *callcache, bool isgt)
+{
+	char		min_cached = callcache[min_index];
+	char		max_cached = callcache[max_index];
+
+	/* Keep the values 0/1 because of the XOR at the end. */
+	bool		a = ((min_cached & HIST_CACHE_MASK) >> 1);
+	bool		b = ((max_cached & HIST_CACHE_MASK) >> 1);
+
+	if (!min_cached)
+	{
+		a = DatumGetBool(FunctionCall2Coll(&opproc,
+										   DEFAULT_COLLATION_OID,
+										   min_value,
+										   constvalue));
+		/* remember the result */
+		callcache[min_index] = (a) ? HIST_CACHE_TRUE : HIST_CACHE_FALSE;
+	}
+
+	if (!max_cached)
+	{
+		b = DatumGetBool(FunctionCall2Coll(&opproc,
+										   DEFAULT_COLLATION_OID,
+										   max_value,
+										   constvalue));
+		/* remember the result */
+		callcache[max_index] = (b) ? HIST_CACHE_TRUE : HIST_CACHE_FALSE;
+	}
+
+	/*
+	 * Now, we need to combine both results into the final answer, and we need
+	 * to be careful about the 'isgt' variable which kinda inverts the
+	 * meaning.
+	 *
+	 * First, we handle the case when each boundary returns different results.
+	 * In that case the outcome can only be 'partial' match.
+	 */
+	if (a != b)
+		return MVSTATS_MATCH_PARTIAL;
+
+	/*
+	 * When the results are the same, then it depends on the 'isgt' value.
+	 * There are four options:
+	 *
+	 * isgt=false a=b=true	=> full match isgt=false a=b=false => empty
+	 * isgt=true  a=b=true	=> empty isgt=true	a=b=false => full match
+	 *
+	 * We'll cheat a bit, because we know that (a=b) so we'll use just one of
+	 * them.
+	 */
+	if (isgt)
+		return (!a) ? MVSTATS_MATCH_FULL : MVSTATS_MATCH_NONE;
+	else
+		return (a) ? MVSTATS_MATCH_FULL : MVSTATS_MATCH_NONE;
+}
+
+/*
+ * Evaluate clauses using the histogram, and update the match bitmap.
+ *
+ * The bitmap may be already partially set, so this is really a way to
+ * combine results of several clause lists - either when computing
+ * conditional probability P(A|B) or a combination of AND/OR clauses.
+ *
+ * Note: This is not a simple bitmap in the sense that there are more
+ * than two possible values for each item - no match, partial
+ * match and full match. So we need 2 bits per item.
+ *
+ * TODO: This works with 'bitmap' where each item is represented as a
+ * char, which is slightly wasteful. Instead, we could use a bitmap
+ * with 2 bits per item, reducing the size to ~1/4. By using values
+ * 0, 1 and 3 (instead of 0, 1 and 2), the operations (merging etc.)
+ * might be performed just like for simple bitmap by using & and |,
+ * which might be faster than min/max.
+ */
+static int
+update_match_bitmap_histogram(PlannerInfo *root, List *clauses,
+							  int2vector *stakeys,
+							  MVSerializedHistogram mvhist,
+							  int nmatches, char *matches,
+							  bool is_or)
+{
+	int			i;
+	ListCell   *l;
+
+	/*
+	 * Used for caching function calls, only once per deduplicated value.
+	 *
+	 * We know may have up to (2 * nbuckets) values per dimension. It's
+	 * probably overkill, but let's allocate that once for all clauses, to
+	 * minimize overhead.
+	 *
+	 * Also, we only need two bits per value, but this allocates byte per
+	 * value. Might be worth optimizing.
+	 *
+	 * 0x00 - not yet called 0x01 - called, result is 'false' 0x03 - called,
+	 * result is 'true'
+	 */
+	char	   *callcache = palloc(mvhist->nbuckets);
+
+	Assert(mvhist != NULL);
+	Assert(mvhist->nbuckets > 0);
+	Assert(nmatches >= 0);
+	Assert(nmatches <= mvhist->nbuckets);
+
+	Assert(clauses != NIL);
+	Assert(list_length(clauses) >= 1);
+
+	/* loop through the clauses and do the estimation */
+	foreach(l, clauses)
+	{
+		Node	   *clause = (Node *) lfirst(l);
+
+		/* if it's a RestrictInfo, then extract the clause */
+		if (IsA(clause, RestrictInfo))
+			clause = (Node *) ((RestrictInfo *) clause)->clause;
+
+		/* it's either OpClause, or NullTest */
+		if (is_opclause(clause))
+		{
+			OpExpr	   *expr = (OpExpr *) clause;
+			bool		varonleft = true;
+			bool		ok;
+
+			FmgrInfo	opproc; /* operator */
+
+			fmgr_info(get_opcode(expr->opno), &opproc);
+
+			/* reset the cache (per clause) */
+			memset(callcache, 0, mvhist->nbuckets);
+
+			ok = (NumRelids(clause) == 1) &&
+				(is_pseudo_constant_clause(lsecond(expr->args)) ||
+				 (varonleft = false,
+				  is_pseudo_constant_clause(linitial(expr->args))));
+
+			if (ok)
+			{
+				FmgrInfo	ltproc;
+				RegProcedure oprrest = get_oprrest(expr->opno);
+
+				Var		   *var = (varonleft) ? linitial(expr->args) : lsecond(expr->args);
+				Const	   *cst = (varonleft) ? lsecond(expr->args) : linitial(expr->args);
+				bool		isgt = (!varonleft);
+
+				TypeCacheEntry *typecache
+				= lookup_type_cache(var->vartype, TYPECACHE_LT_OPR);
+
+				/* lookup dimension for the attribute */
+				int			idx = mv_get_index(var->varattno, stakeys);
+
+				fmgr_info(get_opcode(typecache->lt_opr), &ltproc);
+
+				/*
+				 * Check this for all buckets that still have "true" in the
+				 * bitmap
+				 *
+				 * We already know the clauses use suitable operators (because
+				 * that's how we filtered them).
+				 */
+				for (i = 0; i < mvhist->nbuckets; i++)
+				{
+					char		res = MVSTATS_MATCH_NONE;
+
+					MVSerializedBucket bucket = mvhist->buckets[i];
+
+					/* histogram boundaries */
+					Datum		minval,
+								maxval;
+					bool		mininclude,
+								maxinclude;
+					int			minidx,
+								maxidx;
+
+					/*
+					 * For AND-lists, we can also mark NULL buckets as 'no
+					 * match' (and then skip them). For OR-lists this is not
+					 * possible.
+					 */
+					if ((!is_or) && bucket->nullsonly[idx])
+						matches[i] = MVSTATS_MATCH_NONE;
+
+					/*
+					 * Skip buckets that were already eliminated - this is
+					 * impotant considering how we update the info (we only
+					 * lower the match). We can't really do anything about the
+					 * MATCH_PARTIAL buckets.
+					 */
+					if ((!is_or) && (matches[i] == MVSTATS_MATCH_NONE))
+						continue;
+					else if (is_or && (matches[i] == MVSTATS_MATCH_FULL))
+						continue;
+
+					/* lookup the values and cache of function calls */
+					minidx = bucket->min[idx];
+					maxidx = bucket->max[idx];
+
+					minval = mvhist->values[idx][bucket->min[idx]];
+					maxval = mvhist->values[idx][bucket->max[idx]];
+
+					mininclude = bucket->min_inclusive[idx];
+					maxinclude = bucket->max_inclusive[idx];
+
+					/*
+					 * TODO Maybe it's possible to add here a similar
+					 * optimization as for the MCV lists:
+					 *
+					 * (nmatches == 0) && AND-list => all eliminated (FALSE)
+					 * (nmatches == N) && OR-list  => all eliminated (TRUE)
+					 *
+					 * But it's more complex because of the partial matches.
+					 */
+
+					/*
+					 * If it's not a "<" or ">" or "=" operator, just ignore
+					 * the clause. Otherwise note the relid and attnum for the
+					 * variable.
+					 *
+					 * TODO I'm really unsure the handling of 'isgt' flag
+					 * (that is, clauses with reverse order of
+					 * variable/constant) is correct. I wouldn't be surprised
+					 * if there was some mixup. Using the lt/gt operators
+					 * instead of messing with the opproc could make it
+					 * simpler. It would however be using a different operator
+					 * than the query, although it's not any shadier than
+					 * using the selectivity function as is done currently.
+					 */
+					switch (oprrest)
+					{
+						case F_SCALARLTSEL:		/* Var < Const */
+						case F_SCALARGTSEL:		/* Var > Const */
+
+							res = bucket_is_smaller_than_value(opproc, cst->constvalue,
+															   minval, maxval,
+															   minidx, maxidx,
+													  mininclude, maxinclude,
+															callcache, isgt);
+							break;
+
+						case F_EQSEL:
+
+							/*
+							 * We only check whether the value is within the
+							 * bucket, using the lt operator, and we also
+							 * check for equality with the boundaries.
+							 */
+
+							res = bucket_contains_value(ltproc, cst->constvalue,
+														minval, maxval,
+														minidx, maxidx,
+													  mininclude, maxinclude,
+														callcache);
+							break;
+					}
+
+					UPDATE_RESULT(matches[i], res, is_or);
+
+				}
+			}
+		}
+		else if (IsA(clause, NullTest))
+		{
+			NullTest   *expr = (NullTest *) clause;
+			Var		   *var = (Var *) (expr->arg);
+
+			/* FIXME proper matching attribute to dimension */
+			int			idx = mv_get_index(var->varattno, stakeys);
+
+			/*
+			 * Walk through the buckets and evaluate the current clause. We
+			 * can skip items that were already ruled out, and terminate if
+			 * there are no remaining buckets that might possibly match.
+			 */
+			for (i = 0; i < mvhist->nbuckets; i++)
+			{
+				MVSerializedBucket bucket = mvhist->buckets[i];
+
+				/*
+				 * Skip buckets that were already eliminated - this is
+				 * impotant considering how we update the info (we only lower
+				 * the match)
+				 */
+				if ((!is_or) && (matches[i] == MVSTATS_MATCH_NONE))
+					continue;
+				else if (is_or && (matches[i] == MVSTATS_MATCH_FULL))
+					continue;
+
+				/* if the clause mismatches the bucket, set it as MATCH_NONE */
+				if ((expr->nulltesttype == IS_NULL)
+					&& (!bucket->nullsonly[idx]))
+					UPDATE_RESULT(matches[i], MVSTATS_MATCH_NONE, is_or);
+
+				else if ((expr->nulltesttype == IS_NOT_NULL) &&
+						 (bucket->nullsonly[idx]))
+					UPDATE_RESULT(matches[i], MVSTATS_MATCH_NONE, is_or);
+			}
+		}
+		else if (or_clause(clause) || and_clause(clause))
+		{
+			/*
+			 * AND/OR clause, with all clauses compatible with the selected MV
+			 * stat
+			 */
+
+			int			i;
+			BoolExpr   *orclause = ((BoolExpr *) clause);
+			List	   *orclauses = orclause->args;
+
+			/* match/mismatch bitmap for each bucket */
+			int			or_nmatches = 0;
+			char	   *or_matches = NULL;
+
+			Assert(orclauses != NIL);
+			Assert(list_length(orclauses) >= 2);
+
+			/* number of matching buckets */
+			or_nmatches = mvhist->nbuckets;
+
+			/* by default none of the buckets matches the clauses */
+			or_matches = palloc0(sizeof(char) * or_nmatches);
+
+			if (or_clause(clause))
+			{
+				/* OR clauses assume nothing matches, initially */
+				memset(or_matches, MVSTATS_MATCH_NONE, sizeof(char) * or_nmatches);
+				or_nmatches = 0;
+			}
+			else
+			{
+				/* AND clauses assume nothing matches, initially */
+				memset(or_matches, MVSTATS_MATCH_FULL, sizeof(char) * or_nmatches);
+			}
+
+			/* build the match bitmap for the OR-clauses */
+			or_nmatches = update_match_bitmap_histogram(root, orclauses,
+														stakeys, mvhist,
+								 or_nmatches, or_matches, or_clause(clause));
+
+			/* merge the bitmap into the existing one */
+			for (i = 0; i < mvhist->nbuckets; i++)
+			{
+				/*
+				 * Merge the result into the bitmap (Min for AND, Max for OR).
+				 *
+				 * FIXME this does not decrease the number of matches
+				 */
+				UPDATE_RESULT(matches[i], or_matches[i], is_or);
+			}
+
+			pfree(or_matches);
+
+		}
+		else
+			elog(ERROR, "unknown clause type: %d", clause->type);
+	}
+
+	/* free the call cache */
+	pfree(callcache);
+
+	return nmatches;
+}
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 9dd4e83..c804e13 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -1287,7 +1287,7 @@ get_relation_statistics(RelOptInfo *rel, Relation relation)
 		mvstat = (Form_pg_mv_statistic) GETSTRUCT(htup);
 
 		/* unavailable stats are not interesting for the planner */
-		if (mvstat->deps_built || mvstat->ndist_built || mvstat->mcv_built)
+		if (mvstat->deps_built || mvstat->ndist_built || mvstat->mcv_built || mvstat->hist_built)
 		{
 			info = makeNode(MVStatisticInfo);
 
@@ -1298,11 +1298,13 @@ get_relation_statistics(RelOptInfo *rel, Relation relation)
 			info->ndist_enabled = mvstat->ndist_enabled;
 			info->deps_enabled = mvstat->deps_enabled;
 			info->mcv_enabled = mvstat->mcv_enabled;
+			info->hist_enabled = mvstat->hist_enabled;
 
 			/* built/available statistics */
 			info->ndist_built = mvstat->ndist_built;
 			info->deps_built = mvstat->deps_built;
 			info->mcv_built = mvstat->mcv_built;
+			info->hist_built = mvstat->hist_built;
 
 			/* stakeys */
 			adatum = SysCacheGetAttr(MVSTATOID, htup,
diff --git a/src/backend/utils/mvstats/Makefile b/src/backend/utils/mvstats/Makefile
index d5d47ba..d4b88e9 100644
--- a/src/backend/utils/mvstats/Makefile
+++ b/src/backend/utils/mvstats/Makefile
@@ -12,6 +12,6 @@ subdir = src/backend/utils/mvstats
 top_builddir = ../../../..
 include $(top_builddir)/src/Makefile.global
 
-OBJS = common.o dependencies.o mcv.o mvdist.o
+OBJS = common.o dependencies.o histogram.o mcv.o mvdist.o
 
 include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/utils/mvstats/README.histogram b/src/backend/utils/mvstats/README.histogram
new file mode 100644
index 0000000..a182fa3
--- /dev/null
+++ b/src/backend/utils/mvstats/README.histogram
@@ -0,0 +1,299 @@
+Multivariate histograms
+=======================
+
+Histograms on individual attributes consist of buckets represented by ranges,
+covering the domain of the attribute. That is, each bucket is a [min,max]
+interval, and contains all values in this range. The histogram is built in such
+a way that all buckets have about the same frequency.
+
+Multivariate histograms are an extension into n-dimensional space - the buckets
+are n-dimensional intervals (i.e. n-dimensional rectagles), covering the domain
+of the combination of attributes. That is, each bucket has a vector of lower
+and upper boundaries, denoted min[i] and max[i] (where i = 1..n).
+
+In addition to the boundaries, each bucket tracks additional info:
+
+    * frequency (fraction of tuples in the bucket)
+    * whether the boundaries are inclusive or exclusive
+    * whether the dimension contains only NULL values
+    * number of distinct values in each dimension (for building only)
+
+It's possible that in the future we'll multiple histogram types, with different
+features. We do however expect all the types to share the same representation
+(buckets as ranges) and only differ in how we build them.
+
+The current implementation builds non-overlapping buckets, that may not be true
+for some histogram types and the code should not rely on this assumption. There
+are interesting types of histograms (or algorithms) with overlapping buckets.
+
+When used on low-cardinality data, histograms usually perform considerably worse
+than MCV lists (which are a good fit for this kind of data). This is especially
+true on label-like values, where ordering of the values is mostly unrelated to
+meaning of the data, as proper ordering is crucial for histograms.
+
+On high-cardinality data the histograms are usually a better choice, because MCV
+lists can't represent the distribution accurately enough.
+
+
+Selectivity estimation
+----------------------
+
+The estimation is implemented in clauselist_mv_selectivity_histogram(), and
+works very similarly to clauselist_mv_selectivity_mcvlist.
+
+The main difference is that while MCV lists support exact matches, histograms
+often result in approximate matches - e.g. with equality we can only say if
+the constant would be part of the bucket, but not whether it really is there
+or what fraction of the bucket it corresponds to. In this case we rely on
+some defaults just like in the per-column histograms.
+
+The current implementation uses histograms to estimates those types of clauses
+(think of WHERE conditions):
+
+    (a) equality clauses    WHERE (a = 1) AND (b = 2)
+    (b) inequality clauses  WHERE (a < 1) AND (b >= 2)
+    (c) NULL clauses        WHERE (a IS NULL) AND (b IS NOT NULL)
+    (d) OR-clauses          WHERE (a = 1)  OR (b = 2)
+
+Similarly to MCV lists, it's possible to add support for additional types of
+clauses, for example:
+
+    (e) multi-var clauses   WHERE (a > b)
+
+and so on. These are tasks for the future, not yet implemented.
+
+
+When evaluating a clause on a bucket, we may get one of three results:
+
+    (a) FULL_MATCH - The bucket definitely matches the clause.
+
+    (b) PARTIAL_MATCH - The bucket matches the clause, but not necessarily all
+                        the tuples it represents.
+
+    (c) NO_MATCH - The bucket definitely does not match the clause.
+
+This may be illustrated using a range [1, 5], which is essentially a 1-D bucket.
+With clause
+
+    WHERE (a < 10) => FULL_MATCH (all range values are below
+                      10, so the whole bucket matches)
+
+    WHERE (a < 3)  => PARTIAL_MATCH (there may be values matching
+                      the clause, but we don't know how many)
+
+    WHERE (a < 0)  => NO_MATCH (the whole range is above 1, so
+                      no values from the bucket can match)
+
+Some clauses may produce only some of those results - for example equality
+clauses may never produce FULL_MATCH as we always hit only part of the bucket
+(we can't match both boundaries at the same time). This results in less accurate
+estimates compared to MCV lists, where we can hit a MCV items exactly (there's
+no PARTIAL match in MCV).
+
+There are also clauses that may not produce any PARTIAL_MATCH results. A nice
+example of that is 'IS [NOT] NULL' clause, which either matches the bucket
+completely (FULL_MATCH) or not at all (NO_MATCH), thanks to how the NULL-buckets
+are constructed.
+
+Computing the total selectivity estimate is trivial - simply sum selectivities
+from all the FULL_MATCH and PARTIAL_MATCH buckets (but for buckets marked with
+PARTIAL_MATCH, multiply the frequency by 0.5 to minimize the average error).
+
+
+Building a histogram
+---------------------
+
+The algorithm of building a histogram in general is quite simple:
+
+    (a) create an initial bucket (containing all sample rows)
+
+    (b) create NULL buckets (by splitting the initial bucket)
+
+    (c) repeat
+
+        (1) choose bucket to split next
+
+        (2) terminate if no bucket that might be split found, or if we've
+            reached the maximum number of buckets (16384)
+
+        (3) choose dimension to partition the bucket by
+
+        (4) partition the bucket by the selected dimension
+
+The main complexity is hidden in steps (c.1) and (c.3), i.e. how we choose the
+bucket and dimension for the split, as discussed in the next section.
+
+
+Partitioning criteria
+---------------------
+
+Similarly to one-dimensional histograms, we want to produce buckets with roughly
+the same frequency.
+
+We also need to produce "regular" buckets, because buckets with one dimension
+much longer than the others are very likely to match a lot of conditions (which
+increases error, even if the bucket frequency is very low).
+
+This is especially important when handling OR-clauses, because in that case each
+clause may add buckets independently. With AND-clauses all the clauses have to
+match each bucket, which makes this issue somewhat less concenrning.
+
+To achieve this, we choose the largest bucket (containing the most sample rows),
+but we only choose buckets that can actually be split (have at least 3 different
+combinations of values).
+
+Then we choose the "longest" dimension of the bucket, which is computed by using
+the distinct values in the sample as a measure.
+
+For details see functions select_bucket_to_partition() and partition_bucket(),
+which also includes further discussion.
+
+
+The current limit on number of buckets (16384) is mostly arbitrary, but chosen
+so that it guarantees we don't exceed the number of distinct values indexable by
+uint16 in any of the dimensions. In practice we could handle more buckets as we
+index each dimension separately and the splits should use the dimensions evenly.
+
+Also, histograms this large (with 16k values in multiple dimensions) would be
+quite expensive to build and process, so the 16k limit is rather reasonable.
+
+The actual number of buckets is also related to statistics target, because we
+require MIN_BUCKET_ROWS (10) tuples per bucket before a split, so we can't have
+more than (2 * 300 * target / 10) buckets. For the default target (100) this
+evaluates to ~6k.
+
+
+NULL handling (create_null_buckets)
+-----------------------------------
+
+When building histograms on a single attribute, we first filter out NULL values.
+In the multivariate case, we can't really do that because the rows may contain
+a mix of NULL and non-NULL values in different columns (so we can't simply
+filter all of them out).
+
+For this reason, the histograms are built in a way so that for each bucket, each
+dimension only contains only NULL or non-NULL values. Building the NULL-buckets
+happens as the first step in the build, by the create_null_buckets() function.
+The number of NULL buckets, as produced by this function, has a clear upper
+boundary (2^N) where N is the number of dimensions (attributes the histogram is
+built on). Or rather 2^K where K is the number of attributes that are not marked
+as not-NULL.
+
+The buckets with NULL dimensions are then subject to the same build algorithm
+(i.e. may be split into smaller buckets) just like any other bucket, but may
+only be split by non-NULL dimension.
+
+
+Serialization
+-------------
+
+To store the histogram in pg_mv_statistic table, it is serialized into a more
+efficient form. We also use the representation for estimation, i.e. we don't
+fully deserialize the histogram.
+
+For example the boundary values are deduplicated to minimize the required space.
+How much redundancy is there, actually? Let's assume there are no NULL values,
+so we start with a single bucket - in that case we have 2*N boundaries. Each
+time we split a bucket we introduce one new value (in the "middle" of one of
+the dimensions), and keep boundries for all the other dimensions. So after K
+splits, we have up to
+
+    2*N + K
+
+unique boundary values (we may have fewe values, if the same value is used for
+several splits). But after K splits we do have (K+1) buckets, so
+
+    (K+1) * 2 * N
+
+boundary values. Using e.g. N=4 and K=999, we arrive to those numbers:
+
+    2*N + K       = 1007
+    (K+1) * 2 * N = 8000
+
+wich means a lot of redundancy. It's somewhat counter-intuitive that the number
+of distinct values does not really depend on the number of dimensions (except
+for the initial bucket, but that's negligible compared to the total).
+
+By deduplicating the values and replacing them with 16-bit indexes (uint16), we
+reduce the required space to
+
+    1007 * 8 + 8000 * 2 ~= 24kB
+
+which is significantly less than 64kB required for the 'raw' histogram (assuming
+the values are 8B).
+
+While the bytea compression (pglz) might achieve the same reduction of space,
+the deduplicated representation is used to optimize the estimation by caching
+results of function calls for already visited values. This significantly
+reduces the number of calls to (often quite expensive) operators.
+
+Note: Of course, this reasoning only holds for histograms built by the algorithm
+that simply splits the buckets in half. Other histograms types (e.g. containing
+overlapping buckets) may behave differently and require different serialization.
+
+Serialized histograms are marked with 'magic' constant, to make it easier to
+check the bytea value really is a serialized histogram.
+
+
+varlena compression
+-------------------
+
+This serialization may however disable automatic varlena compression, the array
+of unique values is placed at the beginning of the serialized form. Which is
+exactly the chunk used by pglz to check if the data is compressible, and it
+will probably decide it's not very compressible. This is similar to the issue
+we had with JSONB initially.
+
+Maybe storing buckets first would make it work, as the buckets may be better
+compressible.
+
+On the other hand the serialization is actually a context-aware compression,
+usually compressing to ~30% (or even less, with large data types). So the lack
+of additional pglz compression may be acceptable.
+
+
+Deserialization
+---------------
+
+The deserialization is not a perfect inverse of the serialization, as we keep
+the deduplicated arrays. This reduces the amount of memory and also allows
+optimizations during estimation (e.g. we can cache results for the distinct
+values, saving expensive function calls).
+
+
+Inspecting the histogram
+------------------------
+
+Inspecting the regular (per-attribute) histograms is trivial, as it's enough
+to select the columns from pg_stats - the data is encoded as anyarray, so we
+simply get the text representation of the array.
+
+With multivariate histograms it's not that simple due to the possible mix of
+data types in the histogram. It might be possible to produce similar array-like
+text representation, but that'd unnecessarily complicate further processing
+and analysis of the histogram. Instead, there's a SRF function that allows
+access to lower/upper boundaries, frequencies etc.
+
+    SELECT * FROM pg_mv_histogram_buckets();
+
+It has two input parameters:
+
+    oid   - OID of the histogram (pg_mv_statistic.staoid)
+    otype - type of output
+
+and produces a table with these columns:
+
+    - bucket ID                (0...nbuckets-1)
+    - lower bucket boundaries  (string array)
+    - upper bucket boundaries  (string array)
+    - nulls only dimensions    (boolean array)
+    - lower boundary inclusive (boolean array)
+    - upper boundary includive (boolean array)
+    - frequency                (double precision)
+
+The 'otype' accepts three values, determining what will be returned in the
+lower/upper boundary arrays:
+
+    - 0 - values stored in the histogram, encoded as text
+    - 1 - indexes into the deduplicated arrays
+    - 2 - idnexes into the deduplicated arrays, scaled to [0,1]
diff --git a/src/backend/utils/mvstats/README.stats b/src/backend/utils/mvstats/README.stats
index 8d3d268..9cc1c3e 100644
--- a/src/backend/utils/mvstats/README.stats
+++ b/src/backend/utils/mvstats/README.stats
@@ -18,6 +18,8 @@ Currently we only have two kinds of multivariate statistics
 
     (b) MCV lists (README.mcv)
 
+    (c) multivariate histograms (README.histogram)
+
 
 Compatible clause types
 -----------------------
diff --git a/src/backend/utils/mvstats/common.c b/src/backend/utils/mvstats/common.c
index fc8eae2..82f4e4a 100644
--- a/src/backend/utils/mvstats/common.c
+++ b/src/backend/utils/mvstats/common.c
@@ -13,6 +13,7 @@
  *
  *-------------------------------------------------------------------------
  */
+#include "postgres.h"
 
 #include "common.h"
 #include "utils/array.h"
@@ -24,7 +25,7 @@ static List *list_mv_stats(Oid relid);
 
 static void update_mv_stats(Oid relid,
 					  MVNDistinct ndistinct, MVDependencies dependencies,
-					  MCVList mcvlist,
+					  MCVList mcvlist, MVHistogram histogram,
 					  int2vector *attrs, VacAttrStats **stats);
 
 /*
@@ -57,7 +58,8 @@ build_mv_stats(Relation onerel, double totalrows,
 		MVNDistinct	ndistinct = NULL;
 		MVDependencies deps = NULL;
 		MCVList		mcvlist = NULL;
-		int			numrows_filtered = 0;
+		MVHistogram histogram = NULL;
+		int			numrows_filtered = numrows;
 
 		VacAttrStats **stats = NULL;
 		int			numatts = 0;
@@ -102,8 +104,12 @@ build_mv_stats(Relation onerel, double totalrows,
 		if (stat->mcv_enabled)
 			mcvlist = build_mv_mcvlist(numrows, rows, attrs, stats, &numrows_filtered);
 
-		/* store the statistics in the catalog */
-		update_mv_stats(stat->mvoid, ndistinct, deps, mcvlist, attrs, stats);
+		/* build a multivariate histogram on the columns */
+		if ((numrows_filtered > 0) && (stat->hist_enabled))
+			histogram = build_mv_histogram(numrows_filtered, rows, attrs, stats, numrows);
+
+		/* store the histogram / MCV list in the catalog */
+		update_mv_stats(stat->mvoid, ndistinct, deps, mcvlist, histogram, attrs, stats);
 	}
 }
 
@@ -187,6 +193,8 @@ list_mv_stats(Oid relid)
 		info->deps_built = stats->deps_built;
 		info->mcv_enabled = stats->mcv_enabled;
 		info->mcv_built = stats->mcv_built;
+		info->hist_enabled = stats->hist_enabled;
+		info->hist_built = stats->hist_built;
 
 		result = lappend(result, info);
 	}
@@ -255,7 +263,8 @@ find_mv_attnums(Oid mvoid, Oid *relid)
  */
 static void
 update_mv_stats(Oid mvoid,
-				MVNDistinct ndistinct, MVDependencies dependencies, MCVList mcvlist,
+				MVNDistinct ndistinct, MVDependencies dependencies,
+				MCVList mcvlist, MVHistogram histogram,
 				int2vector *attrs, VacAttrStats **stats)
 {
 	HeapTuple	stup,
@@ -297,15 +306,26 @@ update_mv_stats(Oid mvoid,
 		values[Anum_pg_mv_statistic_stamcv - 1] = PointerGetDatum(data);
 	}
 
+	if (histogram != NULL)
+	{
+		bytea	   *data = serialize_mv_histogram(histogram, attrs, stats);
+
+		nulls[Anum_pg_mv_statistic_stahist - 1] = (data == NULL);
+		values[Anum_pg_mv_statistic_stahist - 1]
+			= PointerGetDatum(data);
+	}
+
 	/* always replace the value (either by bytea or NULL) */
 	replaces[Anum_pg_mv_statistic_standist - 1] = true;
 	replaces[Anum_pg_mv_statistic_stadeps - 1] = true;
 	replaces[Anum_pg_mv_statistic_stamcv - 1] = true;
+	replaces[Anum_pg_mv_statistic_stahist - 1] = true;
 
 	/* always change the availability flags */
 	nulls[Anum_pg_mv_statistic_ndist_built - 1] = false;
 	nulls[Anum_pg_mv_statistic_deps_built - 1] = false;
 	nulls[Anum_pg_mv_statistic_mcv_built - 1] = false;
+	nulls[Anum_pg_mv_statistic_hist_built - 1] = false;
 
 	nulls[Anum_pg_mv_statistic_stakeys - 1] = false;
 
@@ -313,12 +333,14 @@ update_mv_stats(Oid mvoid,
 	replaces[Anum_pg_mv_statistic_ndist_built - 1] = true;
 	replaces[Anum_pg_mv_statistic_deps_built - 1] = true;
 	replaces[Anum_pg_mv_statistic_mcv_built - 1] = true;
+	replaces[Anum_pg_mv_statistic_hist_built - 1] = true;
 
 	replaces[Anum_pg_mv_statistic_stakeys - 1] = true;
 
 	values[Anum_pg_mv_statistic_ndist_built - 1] = BoolGetDatum(ndistinct != NULL);
 	values[Anum_pg_mv_statistic_deps_built - 1] = BoolGetDatum(dependencies != NULL);
 	values[Anum_pg_mv_statistic_mcv_built - 1] = BoolGetDatum(mcvlist != NULL);
+	values[Anum_pg_mv_statistic_hist_built - 1] = BoolGetDatum(histogram != NULL);
 
 	values[Anum_pg_mv_statistic_stakeys - 1] = PointerGetDatum(attrs);
 
diff --git a/src/backend/utils/mvstats/common.h b/src/backend/utils/mvstats/common.h
index fe56f51..96c0317 100644
--- a/src/backend/utils/mvstats/common.h
+++ b/src/backend/utils/mvstats/common.h
@@ -77,7 +77,7 @@ MultiSortSupport multi_sort_init(int ndims);
 void multi_sort_add_dimension(MultiSortSupport mss, int sortdim,
 						 int dim, VacAttrStats **vacattrstats);
 
-int			multi_sort_compare(const void *a, const void *b, void *arg);
+int multi_sort_compare(const void *a, const void *b, void *arg);
 
 int multi_sort_compare_dim(int dim, const SortItem *a,
 					   const SortItem *b, MultiSortSupport mss);
@@ -86,9 +86,9 @@ int multi_sort_compare_dims(int start, int end, const SortItem *a,
 						const SortItem *b, MultiSortSupport mss);
 
 /* comparators, used when constructing multivariate stats */
-int			compare_datums_simple(Datum a, Datum b, SortSupport ssup);
-int			compare_scalars_simple(const void *a, const void *b, void *arg);
-int			compare_scalars_partition(const void *a, const void *b, void *arg);
+int compare_datums_simple(Datum a, Datum b, SortSupport ssup);
+int compare_scalars_simple(const void *a, const void *b, void *arg);
+int compare_scalars_partition(const void *a, const void *b, void *arg);
 
 void *bsearch_arg(const void *key, const void *base,
 			size_t nmemb, size_t size,
diff --git a/src/backend/utils/mvstats/histogram.c b/src/backend/utils/mvstats/histogram.c
new file mode 100644
index 0000000..fc0c9c2
--- /dev/null
+++ b/src/backend/utils/mvstats/histogram.c
@@ -0,0 +1,2123 @@
+/*-------------------------------------------------------------------------
+ *
+ * histogram.c
+ *	  POSTGRES multivariate histograms
+ *
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/mvstats/histogram.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "fmgr.h"
+#include "funcapi.h"
+
+#include "utils/bytea.h"
+#include "utils/lsyscache.h"
+
+#include "common.h"
+#include <math.h>
+
+
+static MVBucket create_initial_mv_bucket(int numrows, HeapTuple *rows,
+						 int2vector *attrs,
+						 VacAttrStats **stats);
+
+static MVBucket select_bucket_to_partition(int nbuckets, MVBucket *buckets);
+
+static MVBucket partition_bucket(MVBucket bucket, int2vector *attrs,
+				 VacAttrStats **stats,
+				 int *ndistvalues, Datum **distvalues);
+
+static MVBucket copy_mv_bucket(MVBucket bucket, uint32 ndimensions);
+
+static void update_bucket_ndistinct(MVBucket bucket, int2vector *attrs,
+						VacAttrStats **stats);
+
+static void update_dimension_ndistinct(MVBucket bucket, int dimension,
+						   int2vector *attrs,
+						   VacAttrStats **stats,
+						   bool update_boundaries);
+
+static void create_null_buckets(MVHistogram histogram, int bucket_idx,
+					int2vector *attrs, VacAttrStats **stats);
+
+static Datum *build_ndistinct(int numrows, HeapTuple *rows, int2vector *attrs,
+				VacAttrStats **stats, int i, int *nvals);
+
+/*
+ * Each serialized bucket needs to store (in this order):
+ *
+ * - number of tuples	  (float)
+ * - number of distinct   (float)
+ * - min inclusive flags  (ndim * sizeof(bool))
+ * - max inclusive flags  (ndim * sizeof(bool))
+ * - null dimension flags (ndim * sizeof(bool))
+ * - min boundary indexes (2 * ndim * sizeof(uint16))
+ * - max boundary indexes (2 * ndim * sizeof(uint16))
+ *
+ * So in total:
+ *
+ *	 ndim * (4 * sizeof(uint16) + 3 * sizeof(bool)) + (2 * sizeof(float))
+ */
+#define BUCKET_SIZE(ndims)	\
+	(ndims * (4 * sizeof(uint16) + 3 * sizeof(bool)) + sizeof(float))
+
+/* pointers into a flat serialized bucket of BUCKET_SIZE(n) bytes */
+#define BUCKET_NTUPLES(b)		(*(float*)b)
+#define BUCKET_MIN_INCL(b,n)	((bool*)(b + sizeof(float)))
+#define BUCKET_MAX_INCL(b,n)	(BUCKET_MIN_INCL(b,n) + n)
+#define BUCKET_NULLS_ONLY(b,n)	(BUCKET_MAX_INCL(b,n) + n)
+#define BUCKET_MIN_INDEXES(b,n) ((uint16*)(BUCKET_NULLS_ONLY(b,n) + n))
+#define BUCKET_MAX_INDEXES(b,n) ((BUCKET_MIN_INDEXES(b,n) + n))
+
+/* can't split bucket with less than 10 rows */
+#define MIN_BUCKET_ROWS			10
+
+/*
+ * Data used while building the histogram.
+ */
+typedef struct HistogramBuildData
+{
+
+	float		ndistinct;		/* frequency of distinct values */
+
+	HeapTuple  *rows;			/* aray of sample rows */
+	uint32		numrows;		/* number of sample rows (array size) */
+
+	/*
+	 * Number of distinct values in each dimension. This is used when building
+	 * the histogram (and is not serialized/deserialized).
+	 */
+	uint32	   *ndistincts;
+
+} HistogramBuildData;
+
+typedef HistogramBuildData *HistogramBuild;
+
+/*
+ * builds a multivariate algorithm
+ *
+ * The build algorithm is iterative - initially a single bucket containing all
+ * the sample rows is formed, and then repeatedly split into smaller buckets.
+ * In each step the largest bucket (in some sense) is chosen to be split next.
+ *
+ * The criteria for selecting the largest bucket (and the dimension for the
+ * split) needs to be elaborate enough to produce buckets of roughly the same
+ * size, and also regular shape (not very long in one dimension).
+ *
+ * The current algorithm works like this:
+ *
+ *	   build NULL-buckets (create_null_buckets)
+ *
+ *	   while [maximum number of buckets not reached]
+ *
+ *		   choose bucket to partition (largest bucket)
+ *			   if no bucket to partition
+ *				   terminate the algorithm
+ *
+ *		   choose bucket dimension to partition (largest dimension)
+ *			   split the bucket into two buckets
+ *
+ * See the discussion at select_bucket_to_partition and partition_bucket for
+ * more details about the algorithm.
+ */
+MVHistogram
+build_mv_histogram(int numrows, HeapTuple *rows, int2vector *attrs,
+				   VacAttrStats **stats, int numrows_total)
+{
+	int			i;
+	int			numattrs = attrs->dim1;
+
+	int		   *ndistvalues;
+	Datum	  **distvalues;
+
+	MVHistogram histogram;
+
+	HeapTuple  *rows_copy = (HeapTuple *) palloc0(numrows * sizeof(HeapTuple));
+
+	memcpy(rows_copy, rows, sizeof(HeapTuple) * numrows);
+
+	Assert((numattrs >= 2) && (numattrs <= MVSTATS_MAX_DIMENSIONS));
+
+	/* build histogram header */
+
+	histogram = (MVHistogram) palloc0(sizeof(MVHistogramData));
+
+	histogram->magic = MVSTAT_HIST_MAGIC;
+	histogram->type = MVSTAT_HIST_TYPE_BASIC;
+
+	histogram->nbuckets = 1;
+	histogram->ndimensions = numattrs;
+
+	/* create max buckets (better than repalloc for short-lived objects) */
+	histogram->buckets
+		= (MVBucket *) palloc0(MVSTAT_HIST_MAX_BUCKETS * sizeof(MVBucket));
+
+	/* create the initial bucket, covering the whole sample set */
+	histogram->buckets[0]
+		= create_initial_mv_bucket(numrows, rows_copy, attrs, stats);
+
+	/*
+	 * Collect info on distinct values in each dimension (used later to select
+	 * dimension to partition).
+	 */
+	ndistvalues = (int *) palloc0(sizeof(int) * numattrs);
+	distvalues = (Datum **) palloc0(sizeof(Datum *) * numattrs);
+
+	for (i = 0; i < numattrs; i++)
+		distvalues[i] = build_ndistinct(numrows, rows, attrs, stats, i,
+										&ndistvalues[i]);
+
+	/*
+	 * Split the initial bucket into buckets that don't mix NULL and non-NULL
+	 * values in a single dimension.
+	 */
+	create_null_buckets(histogram, 0, attrs, stats);
+
+	/*
+	 * Do the actual histogram build - select a bucket and split it.
+	 */
+	while (histogram->nbuckets < MVSTAT_HIST_MAX_BUCKETS)
+	{
+		MVBucket	bucket = select_bucket_to_partition(histogram->nbuckets,
+														histogram->buckets);
+
+		/* no buckets eligible for partitioning */
+		if (bucket == NULL)
+			break;
+
+		/* we modify the bucket in-place and add one new bucket */
+		histogram->buckets[histogram->nbuckets++]
+			= partition_bucket(bucket, attrs, stats, ndistvalues, distvalues);
+	}
+
+	/* finalize the histogram build - compute the frequencies etc. */
+	for (i = 0; i < histogram->nbuckets; i++)
+	{
+		HistogramBuild build_data
+		= ((HistogramBuild) histogram->buckets[i]->build_data);
+
+		/*
+		 * The frequency has to be computed from the whole sample, in case
+		 * some of the rows were used for MCV.
+		 *
+		 * XXX Perhaps this should simply compute frequency with respect to
+		 * the local freuquency, and then factor-in the MCV later.
+		 *
+		 * FIXME The 'ntuples' sounds a bit inappropriate for frequency.
+		 */
+		histogram->buckets[i]->ntuples
+			= (build_data->numrows * 1.0) / numrows_total;
+	}
+
+	return histogram;
+}
+
+/* build array of distinct values for a single attribute */
+static Datum *
+build_ndistinct(int numrows, HeapTuple *rows, int2vector *attrs,
+				VacAttrStats **stats, int i, int *nvals)
+{
+	int			j;
+	int			nvalues,
+				ndistinct;
+	Datum	   *values,
+			   *distvalues;
+
+	SortSupportData ssup;
+	StdAnalyzeData *mystats = (StdAnalyzeData *) stats[i]->extra_data;
+
+	/* initialize sort support, etc. */
+	memset(&ssup, 0, sizeof(ssup));
+	ssup.ssup_cxt = CurrentMemoryContext;
+
+	/* We always use the default collation for statistics */
+	ssup.ssup_collation = DEFAULT_COLLATION_OID;
+	ssup.ssup_nulls_first = false;
+
+	PrepareSortSupportFromOrderingOp(mystats->ltopr, &ssup);
+
+	nvalues = 0;
+	values = (Datum *) palloc0(sizeof(Datum) * numrows);
+
+	/* collect values from the sample rows, ignore NULLs */
+	for (j = 0; j < numrows; j++)
+	{
+		Datum		value;
+		bool		isnull;
+
+		/*
+		 * remember the index of the sample row, to make the partitioning
+		 * simpler
+		 */
+		value = heap_getattr(rows[j], attrs->values[i],
+							 stats[i]->tupDesc, &isnull);
+
+		if (isnull)
+			continue;
+
+		values[nvalues++] = value;
+	}
+
+	/* if no non-NULL values were found, free the memory and terminate */
+	if (nvalues == 0)
+	{
+		pfree(values);
+		return NULL;
+	}
+
+	/* sort the array of values using the SortSupport */
+	qsort_arg((void *) values, nvalues, sizeof(Datum),
+			  compare_scalars_simple, (void *) &ssup);
+
+	/* count the distinct values first, and allocate just enough memory */
+	ndistinct = 1;
+	for (j = 1; j < nvalues; j++)
+		if (compare_scalars_simple(&values[j], &values[j - 1], &ssup) != 0)
+			ndistinct += 1;
+
+	distvalues = (Datum *) palloc0(sizeof(Datum) * ndistinct);
+
+	/* now collect distinct values into the array */
+	distvalues[0] = values[0];
+	ndistinct = 1;
+
+	for (j = 1; j < nvalues; j++)
+	{
+		if (compare_scalars_simple(&values[j], &values[j - 1], &ssup) != 0)
+		{
+			distvalues[ndistinct] = values[j];
+			ndistinct += 1;
+		}
+	}
+
+	pfree(values);
+
+	*nvals = ndistinct;
+	return distvalues;
+}
+
+/* fetch the histogram (as a bytea) from the pg_mv_statistic catalog */
+MVSerializedHistogram
+load_mv_histogram(Oid mvoid)
+{
+	bool		isnull = false;
+	Datum		histogram;
+
+#ifdef USE_ASSERT_CHECKING
+	Form_pg_mv_statistic mvstat;
+#endif
+
+	/* Prepare to scan pg_mv_statistic for entries having indrelid = this rel. */
+	HeapTuple	htup = SearchSysCache1(MVSTATOID, ObjectIdGetDatum(mvoid));
+
+	if (!HeapTupleIsValid(htup))
+		return NULL;
+
+#ifdef USE_ASSERT_CHECKING
+	mvstat = (Form_pg_mv_statistic) GETSTRUCT(htup);
+	Assert(mvstat->hist_enabled && mvstat->hist_built);
+#endif
+
+	histogram = SysCacheGetAttr(MVSTATOID, htup,
+								Anum_pg_mv_statistic_stahist, &isnull);
+
+	Assert(!isnull);
+
+	ReleaseSysCache(htup);
+
+	return deserialize_mv_histogram(DatumGetByteaP(histogram));
+}
+
+/* print some basic info about the histogram */
+Datum
+pg_mv_stats_histogram_info(PG_FUNCTION_ARGS)
+{
+	bytea	   *data = PG_GETARG_BYTEA_P(0);
+	char	   *result;
+
+	MVSerializedHistogram hist = deserialize_mv_histogram(data);
+
+	result = palloc0(128);
+	snprintf(result, 128, "nbuckets=%d", hist->nbuckets);
+
+	PG_RETURN_TEXT_P(cstring_to_text(result));
+}
+
+/*
+ * Serialize the MV histogram into a bytea value. The basic algorithm is quite
+ * simple, and mostly mimincs the MCV serialization:
+ *
+ * (1) perform deduplication for each attribute (separately)
+ *
+ *	   (a) collect all (non-NULL) attribute values from all buckets
+ *	   (b) sort the data (using 'lt' from VacAttrStats)
+ *	   (c) remove duplicate values from the array
+ *
+ * (2) serialize the arrays into a bytea value
+ *
+ * (3) process all buckets
+ *
+ *	   (a) replace min/max values with indexes into the arrays
+ *
+ * Each attribute has to be processed separately, as we're mixing different
+ * datatypes, and we we need to use the right operators to compare/sort them.
+ * We're also mixing pass-by-value and pass-by-ref types, and so on.
+ *
+ *
+ * FIXME This probably leaks memory, or at least uses it inefficiently
+ * (many small palloc() calls instead of a large one).
+ *
+ * TODO Consider packing boolean flags (NULL) for each item into 'char' or
+ * a longer type (instead of using an array of bool items).
+ */
+bytea *
+serialize_mv_histogram(MVHistogram histogram, int2vector *attrs,
+					   VacAttrStats **stats)
+{
+	int			i = 0,
+				j = 0;
+	Size		total_length = 0;
+
+	bytea	   *output = NULL;
+	char	   *data = NULL;
+
+	DimensionInfo *info;
+	SortSupport ssup;
+
+	int			nbuckets = histogram->nbuckets;
+	int			ndims = histogram->ndimensions;
+
+	/* allocated for serialized bucket data */
+	int			bucketsize = BUCKET_SIZE(ndims);
+	char	   *bucket = palloc0(bucketsize);
+
+	/* values per dimension (and number of non-NULL values) */
+	Datum	  **values = (Datum **) palloc0(sizeof(Datum *) * ndims);
+	int		   *counts = (int *) palloc0(sizeof(int) * ndims);
+
+	/* info about dimensions (for deserialize) */
+	info = (DimensionInfo *) palloc0(sizeof(DimensionInfo) * ndims);
+
+	/* sort support data */
+	ssup = (SortSupport) palloc0(sizeof(SortSupportData) * ndims);
+
+	/* collect and deduplicate values for each dimension separately */
+	for (i = 0; i < ndims; i++)
+	{
+		int			count;
+		StdAnalyzeData *tmp = (StdAnalyzeData *) stats[i]->extra_data;
+
+		/* keep important info about the data type */
+		info[i].typlen = stats[i]->attrtype->typlen;
+		info[i].typbyval = stats[i]->attrtype->typbyval;
+
+		/*
+		 * Allocate space for all min/max values, including NULLs (we won't
+		 * use them, but we don't know how many are there), and then collect
+		 * all non-NULL values.
+		 */
+		values[i] = (Datum *) palloc0(sizeof(Datum) * nbuckets * 2);
+
+		for (j = 0; j < histogram->nbuckets; j++)
+		{
+			/* skip buckets where this dimension is NULL-only */
+			if (!histogram->buckets[j]->nullsonly[i])
+			{
+				values[i][counts[i]] = histogram->buckets[j]->min[i];
+				counts[i] += 1;
+
+				values[i][counts[i]] = histogram->buckets[j]->max[i];
+				counts[i] += 1;
+			}
+		}
+
+		/* there are just NULL values in this dimension */
+		if (counts[i] == 0)
+			continue;
+
+		/* sort and deduplicate */
+		ssup[i].ssup_cxt = CurrentMemoryContext;
+		ssup[i].ssup_collation = DEFAULT_COLLATION_OID;
+		ssup[i].ssup_nulls_first = false;
+
+		PrepareSortSupportFromOrderingOp(tmp->ltopr, &ssup[i]);
+
+		qsort_arg(values[i], counts[i], sizeof(Datum),
+				  compare_scalars_simple, &ssup[i]);
+
+		/*
+		 * Walk through the array and eliminate duplicitate values, but keep
+		 * the ordering (so that we can do bsearch later). We know there's at
+		 * least 1 item, so we can skip the first element.
+		 */
+		count = 1;				/* number of deduplicated items */
+		for (j = 1; j < counts[i]; j++)
+		{
+			/* if it's different from the previous value, we need to keep it */
+			if (compare_datums_simple(values[i][j - 1], values[i][j], &ssup[i]) != 0)
+			{
+				/* XXX: not needed if (count == j) */
+				values[i][count] = values[i][j];
+				count += 1;
+			}
+		}
+
+		/* make sure we fit into uint16 */
+		Assert(count <= UINT16_MAX);
+
+		/* keep info about the deduplicated count */
+		info[i].nvalues = count;
+
+		/* compute size of the serialized data */
+		if (info[i].typlen > 0)
+			/* byval or byref, but with fixed length (name, tid, ...) */
+			info[i].nbytes = info[i].nvalues * info[i].typlen;
+		else if (info[i].typlen == -1)
+			/* varlena, so just use VARSIZE_ANY */
+			for (j = 0; j < info[i].nvalues; j++)
+				info[i].nbytes += VARSIZE_ANY(values[i][j]);
+		else if (info[i].typlen == -2)
+			/* cstring, so simply strlen */
+			for (j = 0; j < info[i].nvalues; j++)
+				info[i].nbytes += strlen(DatumGetPointer(values[i][j]));
+		else
+			elog(ERROR, "unknown data type typbyval=%d typlen=%d",
+				 info[i].typbyval, info[i].typlen);
+	}
+
+	/*
+	 * Now we finally know how much space we'll need for the serialized
+	 * histogram, as it contains these fields:
+	 *
+	 * - length (4B) for varlena - magic (4B) - type (4B) - ndimensions (4B) -
+	 * nbuckets (4B) - info (ndim * sizeof(DimensionInfo) - arrays of values
+	 * for each dimension - serialized buckets (nbuckets * bucketsize)
+	 *
+	 * So the 'header' size is 20B + ndim * sizeof(DimensionInfo) and then
+	 * we'll place the data (and buckets).
+	 */
+	total_length = (sizeof(int32) + offsetof(MVHistogramData, buckets)
+					+ndims * sizeof(DimensionInfo)
+					+ nbuckets * bucketsize);
+
+	/* account for the deduplicated data */
+	for (i = 0; i < ndims; i++)
+		total_length += info[i].nbytes;
+
+	/* enforce arbitrary limit of 1MB */
+	if (total_length > (1024 * 1024))
+		elog(ERROR, "serialized histogram exceeds 1MB (%ld > %d)",
+			 total_length, (1024 * 1024));
+
+	/* allocate space for the serialized histogram list, set header */
+	output = (bytea *) palloc0(total_length);
+	SET_VARSIZE(output, total_length);
+
+	/* we'll use 'data' to keep track of the place to write data */
+	data = VARDATA(output);
+
+	memcpy(data, histogram, offsetof(MVHistogramData, buckets));
+	data += offsetof(MVHistogramData, buckets);
+
+	memcpy(data, info, sizeof(DimensionInfo) * ndims);
+	data += sizeof(DimensionInfo) * ndims;
+
+	/* serialize the deduplicated values for all attributes */
+	for (i = 0; i < ndims; i++)
+	{
+#ifdef USE_ASSERT_CHECKING
+		char	   *tmp = data;
+#endif
+		for (j = 0; j < info[i].nvalues; j++)
+		{
+			Datum		v = values[i][j];
+
+			if (info[i].typbyval)		/* passed by value */
+			{
+				memcpy(data, &v, info[i].typlen);
+				data += info[i].typlen;
+			}
+			else if (info[i].typlen > 0)		/* pased by reference */
+			{
+				memcpy(data, DatumGetPointer(v), info[i].typlen);
+				data += info[i].typlen;
+			}
+			else if (info[i].typlen == -1)		/* varlena */
+			{
+				memcpy(data, DatumGetPointer(v), VARSIZE_ANY(v));
+				data += VARSIZE_ANY(values[i][j]);
+			}
+			else if (info[i].typlen == -2)		/* cstring */
+			{
+				memcpy(data, DatumGetPointer(v), strlen(DatumGetPointer(v)) + 1);
+				data += strlen(DatumGetPointer(v)) + 1;
+			}
+		}
+
+		/* make sure we got exactly the amount of data we expected */
+		Assert((data - tmp) == info[i].nbytes);
+	}
+
+	/* finally serialize the items, with uint16 indexes instead of the values */
+	for (i = 0; i < nbuckets; i++)
+	{
+		/* don't write beyond the allocated space */
+		Assert(data <= (char *) output + total_length - bucketsize);
+
+		/* reset the values for each item */
+		memset(bucket, 0, bucketsize);
+
+		BUCKET_NTUPLES(bucket) = histogram->buckets[i]->ntuples;
+
+		for (j = 0; j < ndims; j++)
+		{
+			/* do the lookup only for non-NULL values */
+			if (!histogram->buckets[i]->nullsonly[j])
+			{
+				uint16		idx;
+				Datum	   *v = NULL;
+
+				/* min boundary */
+				v = (Datum *) bsearch_arg(&histogram->buckets[i]->min[j],
+								   values[j], info[j].nvalues, sizeof(Datum),
+										  compare_scalars_simple, &ssup[j]);
+
+				Assert(v != NULL);		/* serialization or deduplication
+										 * error */
+
+				/* compute index within the array */
+				idx = (v - values[j]);
+
+				Assert((idx >= 0) && (idx < info[j].nvalues));
+
+				BUCKET_MIN_INDEXES(bucket, ndims)[j] = idx;
+
+				/* max boundary */
+				v = (Datum *) bsearch_arg(&histogram->buckets[i]->max[j],
+								   values[j], info[j].nvalues, sizeof(Datum),
+										  compare_scalars_simple, &ssup[j]);
+
+				Assert(v != NULL);		/* serialization or deduplication
+										 * error */
+
+				/* compute index within the array */
+				idx = (v - values[j]);
+
+				Assert((idx >= 0) && (idx < info[j].nvalues));
+
+				BUCKET_MAX_INDEXES(bucket, ndims)[j] = idx;
+			}
+		}
+
+		/* copy flags (nulls, min/max inclusive) */
+		memcpy(BUCKET_NULLS_ONLY(bucket, ndims),
+			   histogram->buckets[i]->nullsonly, sizeof(bool) * ndims);
+
+		memcpy(BUCKET_MIN_INCL(bucket, ndims),
+			   histogram->buckets[i]->min_inclusive, sizeof(bool) * ndims);
+
+		memcpy(BUCKET_MAX_INCL(bucket, ndims),
+			   histogram->buckets[i]->max_inclusive, sizeof(bool) * ndims);
+
+		/* copy the item into the array */
+		memcpy(data, bucket, bucketsize);
+
+		data += bucketsize;
+	}
+
+	/* at this point we expect to match the total_length exactly */
+	Assert((data - (char *) output) == total_length);
+
+	/* free the values/counts arrays here */
+	pfree(counts);
+	pfree(info);
+	pfree(ssup);
+
+	for (i = 0; i < ndims; i++)
+		pfree(values[i]);
+
+	pfree(values);
+
+	return output;
+}
+
+/*
+ * Returns histogram in a partially-serialized form (keeps the boundary values
+ * deduplicated, so that it's possible to optimize the estimation part by
+ * caching function call results between buckets etc.).
+ */
+MVSerializedHistogram
+deserialize_mv_histogram(bytea *data)
+{
+	int			i = 0,
+				j = 0;
+
+	Size		expected_size;
+	char	   *tmp = NULL;
+
+	MVSerializedHistogram histogram;
+	DimensionInfo *info;
+
+	int			nbuckets;
+	int			ndims;
+	int			bucketsize;
+
+	/* temporary deserialization buffer */
+	int			bufflen;
+	char	   *buff;
+	char	   *ptr;
+
+	if (data == NULL)
+		return NULL;
+
+	if (VARSIZE_ANY_EXHDR(data) < offsetof(MVSerializedHistogramData, buckets))
+		elog(ERROR, "invalid histogram size %ld (expected at least %ld)",
+			 VARSIZE_ANY_EXHDR(data), offsetof(MVSerializedHistogramData, buckets));
+
+	/* read the histogram header */
+	histogram
+		= (MVSerializedHistogram) palloc(sizeof(MVSerializedHistogramData));
+
+	/* initialize pointer to the data part (skip the varlena header) */
+	tmp = VARDATA_ANY(data);
+
+	/* get the header and perform basic sanity checks */
+	memcpy(histogram, tmp, offsetof(MVSerializedHistogramData, buckets));
+	tmp += offsetof(MVSerializedHistogramData, buckets);
+
+	if (histogram->magic != MVSTAT_HIST_MAGIC)
+		elog(ERROR, "invalid histogram magic %d (expected %dd)",
+			 histogram->magic, MVSTAT_HIST_MAGIC);
+
+	if (histogram->type != MVSTAT_HIST_TYPE_BASIC)
+		elog(ERROR, "invalid histogram type %d (expected %dd)",
+			 histogram->type, MVSTAT_HIST_TYPE_BASIC);
+
+	nbuckets = histogram->nbuckets;
+	ndims = histogram->ndimensions;
+	bucketsize = BUCKET_SIZE(ndims);
+
+	Assert((nbuckets > 0) && (nbuckets <= MVSTAT_HIST_MAX_BUCKETS));
+	Assert((ndims >= 2) && (ndims <= MVSTATS_MAX_DIMENSIONS));
+
+	/*
+	 * What size do we expect with those parameters (it's incomplete, as we
+	 * yet have to count the array sizes (from DimensionInfo records).
+	 */
+	expected_size = offsetof(MVSerializedHistogramData, buckets) +
+		ndims * sizeof(DimensionInfo) +
+		(nbuckets * bucketsize);
+
+	/* check that we have at least the DimensionInfo records */
+	if (VARSIZE_ANY_EXHDR(data) < expected_size)
+		elog(ERROR, "invalid histogram size %ld (expected %ld)",
+			 VARSIZE_ANY_EXHDR(data), expected_size);
+
+	info = (DimensionInfo *) (tmp);
+	tmp += ndims * sizeof(DimensionInfo);
+
+	/* account for the value arrays */
+	for (i = 0; i < ndims; i++)
+		expected_size += info[i].nbytes;
+
+	if (VARSIZE_ANY_EXHDR(data) != expected_size)
+		elog(ERROR, "invalid histogram size %ld (expected %ld)",
+			 VARSIZE_ANY_EXHDR(data), expected_size);
+
+	/* looks OK - not corrupted or something */
+
+	/* a single buffer for all the values and counts */
+	bufflen = (sizeof(int) + sizeof(Datum *)) * ndims;
+
+	for (i = 0; i < ndims; i++)
+		/* don't allocate space for byval types, matching Datum */
+		if (!(info[i].typbyval && (info[i].typlen == sizeof(Datum))))
+			bufflen += (sizeof(Datum) * info[i].nvalues);
+
+	/* also, include space for the result, tracking the buckets */
+	bufflen += nbuckets * (
+						   sizeof(MVSerializedBucket) + /* bucket pointer */
+						   sizeof(MVSerializedBucketData));		/* bucket data */
+
+	buff = palloc0(bufflen);
+	ptr = buff;
+
+	histogram->nvalues = (int *) ptr;
+	ptr += (sizeof(int) * ndims);
+
+	histogram->values = (Datum **) ptr;
+	ptr += (sizeof(Datum *) * ndims);
+
+	/*
+	 * FIXME This uses pointers to the original data array (the types not
+	 * passed by value), so when someone frees the memory, e.g. by doing
+	 * something like this:
+	 *
+	 * bytea * data = ... fetch the data from catalog ... MCVList mcvlist =
+	 * deserialize_mcv_list(data); pfree(data);
+	 *
+	 * then 'mcvlist' references the freed memory. This needs to copy the
+	 * pieces.
+	 *
+	 * TODO same as in MCV deserialization / consider moving to common.c
+	 */
+	for (i = 0; i < ndims; i++)
+	{
+		histogram->nvalues[i] = info[i].nvalues;
+
+		if (info[i].typbyval)
+		{
+			/* passed by value / Datum - simply reuse the array */
+			if (info[i].typlen == sizeof(Datum))
+			{
+				histogram->values[i] = (Datum *) tmp;
+				tmp += info[i].nbytes;
+			}
+			else
+			{
+				histogram->values[i] = (Datum *) ptr;
+				ptr += (sizeof(Datum) * info[i].nvalues);
+
+				for (j = 0; j < info[i].nvalues; j++)
+				{
+					/* just point into the array */
+					memcpy(&histogram->values[i][j], tmp, info[i].typlen);
+					tmp += info[i].typlen;
+				}
+			}
+		}
+		else
+		{
+			/* all the other types need a chunk of the buffer */
+			histogram->values[i] = (Datum *) ptr;
+			ptr += (sizeof(Datum) * info[i].nvalues);
+
+			if (info[i].typlen > 0)
+			{
+				/* pased by reference, but fixed length (name, tid, ...) */
+				for (j = 0; j < info[i].nvalues; j++)
+				{
+					/* just point into the array */
+					histogram->values[i][j] = PointerGetDatum(tmp);
+					tmp += info[i].typlen;
+				}
+			}
+			else if (info[i].typlen == -1)
+			{
+				/* varlena */
+				for (j = 0; j < info[i].nvalues; j++)
+				{
+					/* just point into the array */
+					histogram->values[i][j] = PointerGetDatum(tmp);
+					tmp += VARSIZE_ANY(tmp);
+				}
+			}
+			else if (info[i].typlen == -2)
+			{
+				/* cstring */
+				for (j = 0; j < info[i].nvalues; j++)
+				{
+					/* just point into the array */
+					histogram->values[i][j] = PointerGetDatum(tmp);
+					tmp += (strlen(tmp) + 1);	/* don't forget the \0 */
+				}
+			}
+		}
+	}
+
+	histogram->buckets = (MVSerializedBucket *) ptr;
+	ptr += (sizeof(MVSerializedBucket) * nbuckets);
+
+	for (i = 0; i < nbuckets; i++)
+	{
+		MVSerializedBucket bucket = (MVSerializedBucket) ptr;
+
+		ptr += sizeof(MVSerializedBucketData);
+
+		bucket->ntuples = BUCKET_NTUPLES(tmp);
+		bucket->nullsonly = BUCKET_NULLS_ONLY(tmp, ndims);
+		bucket->min_inclusive = BUCKET_MIN_INCL(tmp, ndims);
+		bucket->max_inclusive = BUCKET_MAX_INCL(tmp, ndims);
+
+		bucket->min = BUCKET_MIN_INDEXES(tmp, ndims);
+		bucket->max = BUCKET_MAX_INDEXES(tmp, ndims);
+
+		histogram->buckets[i] = bucket;
+
+		Assert(tmp <= (char *) data + VARSIZE_ANY(data));
+
+		tmp += bucketsize;
+	}
+
+	/* at this point we expect to match the total_length exactly */
+	Assert((tmp - VARDATA(data)) == expected_size);
+
+	/* we should exhaust the output buffer exactly */
+	Assert((ptr - buff) == bufflen);
+
+	return histogram;
+}
+
+/*
+ * Build the initial bucket, which will be then split into smaller ones.
+ */
+static MVBucket
+create_initial_mv_bucket(int numrows, HeapTuple *rows, int2vector *attrs,
+						 VacAttrStats **stats)
+{
+	int			i;
+	int			numattrs = attrs->dim1;
+	HistogramBuild data = NULL;
+
+	/* TODO allocate bucket as a single piece, including all the fields. */
+	MVBucket	bucket = (MVBucket) palloc0(sizeof(MVBucketData));
+
+	Assert(numrows > 0);
+	Assert(rows != NULL);
+	Assert((numattrs >= 2) && (numattrs <= MVSTATS_MAX_DIMENSIONS));
+
+	/* allocate the per-dimension arrays */
+
+	/* flags for null-only dimensions */
+	bucket->nullsonly = (bool *) palloc0(numattrs * sizeof(bool));
+
+	/* inclusiveness boundaries - lower/upper bounds */
+	bucket->min_inclusive = (bool *) palloc0(numattrs * sizeof(bool));
+	bucket->max_inclusive = (bool *) palloc0(numattrs * sizeof(bool));
+
+	/* lower/upper boundaries */
+	bucket->min = (Datum *) palloc0(numattrs * sizeof(Datum));
+	bucket->max = (Datum *) palloc0(numattrs * sizeof(Datum));
+
+	/* build-data */
+	data = (HistogramBuild) palloc0(sizeof(HistogramBuildData));
+
+	/* number of distinct values (per dimension) */
+	data->ndistincts = (uint32 *) palloc0(numattrs * sizeof(uint32));
+
+	/* all the sample rows fall into the initial bucket */
+	data->numrows = numrows;
+	data->rows = rows;
+
+	bucket->build_data = data;
+
+	/*
+	 * Update the number of ndistinct combinations in the bucket (which we use
+	 * when selecting bucket to partition), and then number of distinct values
+	 * for each partition (which we use when choosing which dimension to
+	 * split).
+	 */
+	update_bucket_ndistinct(bucket, attrs, stats);
+
+	/* Update ndistinct (and also set min/max) for all dimensions. */
+	for (i = 0; i < numattrs; i++)
+		update_dimension_ndistinct(bucket, i, attrs, stats, true);
+
+	return bucket;
+}
+
+/*
+ * Choose the bucket to partition next.
+ *
+ * The current criteria is rather simple, chosen so that the algorithm produces
+ * buckets with about equal frequency and regular size. We select the bucket
+ * with the highest number of distinct values, and then split it by the longest
+ * dimension.
+ *
+ * The distinct values are uniformly mapped to [0,1] interval, and this is used
+ * to compute length of the value range.
+ *
+ * NOTE: This is not the same array used for deduplication, as this contains
+ *		 values for all the tuples from the sample, not just the boundary values.
+ *
+ * Returns either pointer to the bucket selected to be partitioned, or NULL if
+ * there are no buckets that may be split (e.g. if all buckets are too small
+ * or contain too few distinct values).
+ *
+ *
+ * Tricky example
+ * --------------
+ *
+ * Consider this table:
+ *
+ *	   CREATE TABLE t AS SELECT i AS a, i AS b
+ *						   FROM generate_series(1,1000000) s(i);
+ *
+ *	   CREATE STATISTICS s1 ON t (a,b) WITH (histogram);
+ *
+ *	   ANALYZE t;
+ *
+ * It's a very specific (and perhaps artificial) example, because every bucket
+ * always has exactly the same number of distinct values in all dimensions,
+ * which makes the partitioning tricky.
+ *
+ * Then:
+ *
+ *	   SELECT * FROM t WHERE (a < 100) AND (b < 100);
+ *
+ * is estimated to return ~120 rows, while in reality it returns only 99.
+ *
+ *							 QUERY PLAN
+ *	   -------------------------------------------------------------
+ *		Seq Scan on t  (cost=0.00..19425.00 rows=117 width=8)
+ *					   (actual time=0.129..82.776 rows=99 loops=1)
+ *		  Filter: ((a < 100) AND (b < 100))
+ *		  Rows Removed by Filter: 999901
+ *		Planning time: 1.286 ms
+ *		Execution time: 82.984 ms
+ *	   (5 rows)
+ *
+ * So this estimate is reasonably close. Let's change the query to OR clause:
+ *
+ *	   SELECT * FROM t WHERE (a < 100) OR (b < 100);
+ *
+ *							 QUERY PLAN
+ *	   -------------------------------------------------------------
+ *		Seq Scan on t  (cost=0.00..19425.00 rows=8100 width=8)
+ *					   (actual time=0.145..99.910 rows=99 loops=1)
+ *		  Filter: ((a < 100) OR (b < 100))
+ *		  Rows Removed by Filter: 999901
+ *		Planning time: 1.578 ms
+ *		Execution time: 100.132 ms
+ *	   (5 rows)
+ *
+ * That's clearly a much worse estimate. This happens because the histogram
+ * contains buckets like this:
+ *
+ *	   bucket 592  [3 30310] [30134 30593] => [0.000233]
+ *
+ * i.e. the length of "a" dimension is (30310-3)=30307, while the length of "b"
+ * is (30593-30134)=459. So the "b" dimension is much narrower than "a".
+ * Of course, there are also buckets where "b" is the wider dimension.
+ *
+ * This is partially mitigated by selecting the "longest" dimension but that
+ * only happens after we already selected the bucket. So if we never select the
+ * bucket, this optimization does not apply.
+ *
+ * The other reason why this particular example behaves so poorly is due to the
+ * way we actually split the selected bucket. We do attempt to divide the bucket
+ * into two parts containing about the same number of tuples, but that does not
+ * too well when most of the tuples is squashed on one side of the bucket.
+ *
+ * For example for columns with data on the diagonal (i.e. when a=b), we end up
+ * with a narrow bucket on the diagonal and a huge bucket overing the remaining
+ * part (with much lower density).
+ *
+ * So perhaps we need two partitioning strategies - one aiming to split buckets
+ * with high frequency (number of sampled rows), the other aiming to split
+ * "large" buckets. And alternating between them, somehow.
+ *
+ * TODO Consider using similar lower boundary for row count as for simple
+ * histograms, i.e. 300 tuples per bucket.
+ */
+static MVBucket
+select_bucket_to_partition(int nbuckets, MVBucket *buckets)
+{
+	int			i;
+	int			numrows = 0;
+	MVBucket	bucket = NULL;
+
+	for (i = 0; i < nbuckets; i++)
+	{
+		HistogramBuild data = (HistogramBuild) buckets[i]->build_data;
+
+		/* if the number of rows is higher, use this bucket */
+		if ((data->ndistinct > 2) &&
+			(data->numrows > numrows) &&
+			(data->numrows >= MIN_BUCKET_ROWS))
+		{
+			bucket = buckets[i];
+			numrows = data->numrows;
+		}
+	}
+
+	/* may be NULL if there are not buckets with (ndistinct>1) */
+	return bucket;
+}
+
+/*
+ * A simple bucket partitioning implementation - we choose the longest bucket
+ * dimension, measured using the array of distinct values built at the very
+ * beginning of the build.
+ *
+ * We map all the distinct values to a [0,1] interval, uniformly distributed,
+ * and then use this to measure length. It's essentially a number of distinct
+ * values within the range, normalized to [0,1].
+ *
+ * Then we choose a 'middle' value splitting the bucket into two parts with
+ * roughly the same frequency.
+ *
+ * This splits the bucket by tweaking the existing one, and returning the new
+ * bucket (essentially shrinking the existing one in-place and returning the
+ * other "half" as a new bucket). The caller is responsible for adding the new
+ * bucket into the list of buckets.
+ *
+ * There are multiple histogram options, centered around the partitioning
+ * criteria, specifying both how to choose a bucket and the dimension most in
+ * need of a split. For a nice summary and general overview, see "rK-Hist : an
+ * R-Tree based histogram for multi-dimensional selectivity estimation" thesis
+ * by J. A. Lopez, Concordia University, p.34-37 (and possibly p. 32-34 for
+ * explanation of the terms).
+ *
+ * It requires care to prevent splitting only one dimension and not splitting
+ * another one at all (which might happen easily in case of strongly dependent
+ * columns - e.g. y=x). The current algorithm minimizes this, but may still
+ * happen for perfectly dependent examples (when all the dimensions have equal
+ * length, the first one will be selected).
+ *
+ * TODO Should probably consider statistics target for the columns (e.g.
+ * to split dimensions with higher statistics target more frequently).
+ */
+static MVBucket
+partition_bucket(MVBucket bucket, int2vector *attrs,
+				 VacAttrStats **stats,
+				 int *ndistvalues, Datum **distvalues)
+{
+	int			i;
+	int			dimension;
+	int			numattrs = attrs->dim1;
+
+	Datum		split_value;
+	MVBucket	new_bucket;
+	HistogramBuild new_data;
+
+	/* needed for sort, when looking for the split value */
+	bool		isNull;
+	int			nvalues = 0;
+	HistogramBuild data = (HistogramBuild) bucket->build_data;
+	StdAnalyzeData *mystats = NULL;
+	ScalarItem *values = (ScalarItem *) palloc0(data->numrows * sizeof(ScalarItem));
+	SortSupportData ssup;
+
+	int			nrows = 1;		/* number of rows below current value */
+	double		delta;
+
+	/* needed when splitting the values */
+	HeapTuple  *oldrows = data->rows;
+	int			oldnrows = data->numrows;
+
+	/*
+	 * We can't split buckets with a single distinct value (this also
+	 * disqualifies NULL-only dimensions). Also, there has to be multiple
+	 * sample rows (otherwise, how could there be more distinct values).
+	 */
+	Assert(data->ndistinct > 1);
+	Assert(data->numrows > 1);
+	Assert((numattrs >= 2) && (numattrs <= MVSTATS_MAX_DIMENSIONS));
+
+	/* Look for the next dimension to split. */
+	delta = 0.0;
+	dimension = -1;
+
+	for (i = 0; i < numattrs; i++)
+	{
+		Datum	   *a,
+				   *b;
+
+		mystats = (StdAnalyzeData *) stats[i]->extra_data;
+
+		/* initialize sort support, etc. */
+		memset(&ssup, 0, sizeof(ssup));
+		ssup.ssup_cxt = CurrentMemoryContext;
+
+		/* We always use the default collation for statistics */
+		ssup.ssup_collation = DEFAULT_COLLATION_OID;
+		ssup.ssup_nulls_first = false;
+
+		PrepareSortSupportFromOrderingOp(mystats->ltopr, &ssup);
+
+		/* can't split NULL-only dimension */
+		if (bucket->nullsonly[i])
+			continue;
+
+		/* can't split dimension with a single ndistinct value */
+		if (data->ndistincts[i] <= 1)
+			continue;
+
+		/* search for min boundary in the distinct list */
+		a = (Datum *) bsearch_arg(&bucket->min[i],
+								  distvalues[i], ndistvalues[i],
+							   sizeof(Datum), compare_scalars_simple, &ssup);
+
+		b = (Datum *) bsearch_arg(&bucket->max[i],
+								  distvalues[i], ndistvalues[i],
+							   sizeof(Datum), compare_scalars_simple, &ssup);
+
+		/* if this dimension is 'larger' then partition by it */
+		if (((b - a) * 1.0 / ndistvalues[i]) > delta)
+		{
+			delta = ((b - a) * 1.0 / ndistvalues[i]);
+			dimension = i;
+		}
+	}
+
+	/*
+	 * If we haven't found a dimension here, we've done something wrong in
+	 * select_bucket_to_partition.
+	 */
+	Assert(dimension != -1);
+
+	/*
+	 * Walk through the selected dimension, collect and sort the values and
+	 * then choose the value to use as the new boundary.
+	 */
+	mystats = (StdAnalyzeData *) stats[dimension]->extra_data;
+
+	/* initialize sort support, etc. */
+	memset(&ssup, 0, sizeof(ssup));
+	ssup.ssup_cxt = CurrentMemoryContext;
+
+	/* We always use the default collation for statistics */
+	ssup.ssup_collation = DEFAULT_COLLATION_OID;
+	ssup.ssup_nulls_first = false;
+
+	PrepareSortSupportFromOrderingOp(mystats->ltopr, &ssup);
+
+	for (i = 0; i < data->numrows; i++)
+	{
+		/*
+		 * remember the index of the sample row, to make the partitioning
+		 * simpler
+		 */
+		values[nvalues].value = heap_getattr(data->rows[i], attrs->values[dimension],
+										 stats[dimension]->tupDesc, &isNull);
+		values[nvalues].tupno = i;
+
+		/* no NULL values allowed here (we never split null-only dimension) */
+		Assert(!isNull);
+
+		nvalues++;
+	}
+
+	/* sort the array of values */
+	qsort_arg((void *) values, nvalues, sizeof(ScalarItem),
+			  compare_scalars_partition, (void *) &ssup);
+
+	/*
+	 * We know there are bucket->ndistincts[dimension] distinct values in this
+	 * dimension, and we want to split this into half, so walk through the
+	 * array and stop once we see (ndistinct/2) values.
+	 *
+	 * We always choose the "next" value, i.e. (n/2+1)-th distinct value, and
+	 * use it as an exclusive upper boundary (and inclusive lower boundary).
+	 *
+	 * TODO Maybe we should use "average" of the two middle distinct values
+	 * (at least for even distinct counts), but that would require being able
+	 * to do an average (which does not work for non-numeric types).
+	 *
+	 * TODO Another option is to look for a split that'd give about 50% tuples
+	 * (not distinct values) in each partition. That might work better when
+	 * there are a few very frequent values, and many rare ones.
+	 */
+	delta = fabs(data->numrows);
+	split_value = values[0].value;
+
+	for (i = 1; i < data->numrows; i++)
+	{
+		if (values[i].value != values[i - 1].value)
+		{
+			/* are we closer to splitting the bucket in half? */
+			if (fabs(i - data->numrows / 2.0) < delta)
+			{
+				/* let's assume we'll use this value for the split */
+				split_value = values[i].value;
+				delta = fabs(i - data->numrows / 2.0);
+				nrows = i;
+			}
+		}
+	}
+
+	Assert(nrows > 0);
+	Assert(nrows < data->numrows);
+
+	/*
+	 * create the new bucket as a (incomplete) copy of the one being
+	 * partitioned.
+	 */
+	new_bucket = copy_mv_bucket(bucket, numattrs);
+	new_data = (HistogramBuild) new_bucket->build_data;
+
+	/*
+	 * Do the actual split of the chosen dimension, using the split value as
+	 * the upper bound for the existing bucket, and lower bound for the new
+	 * one.
+	 */
+	bucket->max[dimension] = split_value;
+	new_bucket->min[dimension] = split_value;
+
+	/*
+	 * We also treat only one side of the new boundary as inclusive, in the
+	 * bucket where it happens to be the upper boundary. We never set the
+	 * min_inclusive[] to false anywhere, but we set it to true anyway.
+	 */
+	bucket->max_inclusive[dimension] = false;
+	new_bucket->min_inclusive[dimension] = true;
+
+	/*
+	 * Redistribute the sample tuples using the 'ScalarItem->tupno' index. We
+	 * know 'nrows' rows should remain in the original bucket and the rest
+	 * goes to the new one.
+	 */
+
+	data->rows = (HeapTuple *) palloc0(nrows * sizeof(HeapTuple));
+	new_data->rows = (HeapTuple *) palloc0((oldnrows - nrows) * sizeof(HeapTuple));
+
+	data->numrows = nrows;
+	new_data->numrows = (oldnrows - nrows);
+
+	/*
+	 * The first nrows should go to the first bucket, the rest should go to
+	 * the new one. Use the tupno field to get the actual HeapTuple row from
+	 * the original array of sample rows.
+	 */
+	for (i = 0; i < nrows; i++)
+		memcpy(&data->rows[i], &oldrows[values[i].tupno], sizeof(HeapTuple));
+
+	for (i = nrows; i < oldnrows; i++)
+		memcpy(&new_data->rows[i - nrows], &oldrows[values[i].tupno], sizeof(HeapTuple));
+
+	/* update ndistinct values for the buckets (total and per dimension) */
+	update_bucket_ndistinct(bucket, attrs, stats);
+	update_bucket_ndistinct(new_bucket, attrs, stats);
+
+	/*
+	 * TODO We don't need to do this for the dimension we used for split,
+	 * because we know how many distinct values went to each partition.
+	 */
+	for (i = 0; i < numattrs; i++)
+	{
+		update_dimension_ndistinct(bucket, i, attrs, stats, false);
+		update_dimension_ndistinct(new_bucket, i, attrs, stats, false);
+	}
+
+	pfree(oldrows);
+	pfree(values);
+
+	return new_bucket;
+}
+
+/*
+ * Copy a histogram bucket. The copy does not include the build-time data, i.e.
+ * sampled rows etc.
+ */
+static MVBucket
+copy_mv_bucket(MVBucket bucket, uint32 ndimensions)
+{
+	/* TODO allocate as a single piece (including all the fields) */
+	MVBucket	new_bucket = (MVBucket) palloc0(sizeof(MVBucketData));
+	HistogramBuild data = (HistogramBuild) palloc0(sizeof(HistogramBuildData));
+
+	/*
+	 * Copy only the attributes that will stay the same after the split, and
+	 * we'll recompute the rest after the split.
+	 */
+
+	/* allocate the per-dimension arrays */
+	new_bucket->nullsonly = (bool *) palloc0(ndimensions * sizeof(bool));
+
+	/* inclusiveness boundaries - lower/upper bounds */
+	new_bucket->min_inclusive = (bool *) palloc0(ndimensions * sizeof(bool));
+	new_bucket->max_inclusive = (bool *) palloc0(ndimensions * sizeof(bool));
+
+	/* lower/upper boundaries */
+	new_bucket->min = (Datum *) palloc0(ndimensions * sizeof(Datum));
+	new_bucket->max = (Datum *) palloc0(ndimensions * sizeof(Datum));
+
+	/* copy data */
+	memcpy(new_bucket->nullsonly, bucket->nullsonly, ndimensions * sizeof(bool));
+
+	memcpy(new_bucket->min_inclusive, bucket->min_inclusive, ndimensions * sizeof(bool));
+	memcpy(new_bucket->min, bucket->min, ndimensions * sizeof(Datum));
+
+	memcpy(new_bucket->max_inclusive, bucket->max_inclusive, ndimensions * sizeof(bool));
+	memcpy(new_bucket->max, bucket->max, ndimensions * sizeof(Datum));
+
+	/* allocate and copy the interesting part of the build data */
+	data->ndistincts = (uint32 *) palloc0(ndimensions * sizeof(uint32));
+
+	new_bucket->build_data = data;
+
+	return new_bucket;
+}
+
+/*
+ * Counts the number of distinct values in the bucket. This just copies the
+ * Datum values into a simple array, and sorts them using memcmp-based
+ * comparator. That means it only works for pass-by-value data types (assuming
+ * they don't use collations etc.)
+ */
+static void
+update_bucket_ndistinct(MVBucket bucket, int2vector *attrs, VacAttrStats **stats)
+{
+	int			i,
+				j;
+	int			numattrs = attrs->dim1;
+
+	HistogramBuild data = (HistogramBuild) bucket->build_data;
+	int			numrows = data->numrows;
+
+	MultiSortSupport mss = multi_sort_init(numattrs);
+
+	/*
+	 * We could collect this while walking through all the attributes above
+	 * (this way we have to call heap_getattr twice).
+	 */
+	SortItem   *items = (SortItem *) palloc0(numrows * sizeof(SortItem));
+	Datum	   *values = (Datum *) palloc0(numrows * sizeof(Datum) * numattrs);
+	bool	   *isnull = (bool *) palloc0(numrows * sizeof(bool) * numattrs);
+
+	for (i = 0; i < numrows; i++)
+	{
+		items[i].values = &values[i * numattrs];
+		items[i].isnull = &isnull[i * numattrs];
+	}
+
+	/* prepare the sort function for the first dimension */
+	for (i = 0; i < numattrs; i++)
+		multi_sort_add_dimension(mss, i, i, stats);
+
+	/* collect the values */
+	for (i = 0; i < numrows; i++)
+		for (j = 0; j < numattrs; j++)
+			items[i].values[j]
+				= heap_getattr(data->rows[i], attrs->values[j],
+							   stats[j]->tupDesc, &items[i].isnull[j]);
+
+	qsort_arg((void *) items, numrows, sizeof(SortItem),
+			  multi_sort_compare, mss);
+
+	data->ndistinct = 1;
+
+	for (i = 1; i < numrows; i++)
+		if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
+			data->ndistinct += 1;
+
+	pfree(items);
+	pfree(values);
+	pfree(isnull);
+}
+
+/*
+ * Count distinct values per bucket dimension.
+ */
+static void
+update_dimension_ndistinct(MVBucket bucket, int dimension, int2vector *attrs,
+						   VacAttrStats **stats, bool update_boundaries)
+{
+	int			j;
+	int			nvalues = 0;
+	bool		isNull;
+	HistogramBuild data = (HistogramBuild) bucket->build_data;
+	Datum	   *values = (Datum *) palloc0(data->numrows * sizeof(Datum));
+	SortSupportData ssup;
+
+	StdAnalyzeData *mystats = (StdAnalyzeData *) stats[dimension]->extra_data;
+
+	/* we may already know this is a NULL-only dimension */
+	if (bucket->nullsonly[dimension])
+		data->ndistincts[dimension] = 1;
+
+	memset(&ssup, 0, sizeof(ssup));
+	ssup.ssup_cxt = CurrentMemoryContext;
+
+	/* We always use the default collation for statistics */
+	ssup.ssup_collation = DEFAULT_COLLATION_OID;
+	ssup.ssup_nulls_first = false;
+
+	PrepareSortSupportFromOrderingOp(mystats->ltopr, &ssup);
+
+	for (j = 0; j < data->numrows; j++)
+	{
+		values[nvalues] = heap_getattr(data->rows[j], attrs->values[dimension],
+									   stats[dimension]->tupDesc, &isNull);
+
+		/* ignore NULL values */
+		if (!isNull)
+			nvalues++;
+	}
+
+	/* there's always at least 1 distinct value (may be NULL) */
+	data->ndistincts[dimension] = 1;
+
+	/*
+	 * if there are only NULL values in the column, mark it so and continue
+	 * with the next one
+	 */
+	if (nvalues == 0)
+	{
+		pfree(values);
+		bucket->nullsonly[dimension] = true;
+		return;
+	}
+
+	/* sort the array (pass-by-value datum */
+	qsort_arg((void *) values, nvalues, sizeof(Datum),
+			  compare_scalars_simple, (void *) &ssup);
+
+	/*
+	 * Update min/max boundaries to the smallest bounding box. Generally, this
+	 * needs to be done only when constructing the initial bucket.
+	 */
+	if (update_boundaries)
+	{
+		/* store the min/max values */
+		bucket->min[dimension] = values[0];
+		bucket->min_inclusive[dimension] = true;
+
+		bucket->max[dimension] = values[nvalues - 1];
+		bucket->max_inclusive[dimension] = true;
+	}
+
+	/*
+	 * Walk through the array and count distinct values by comparing
+	 * succeeding values.
+	 *
+	 * FIXME This only works for pass-by-value types (i.e. not VARCHARs etc.).
+	 * Although thanks to the deduplication it might work even for those types
+	 * (equal values will get the same item in the deduplicated array).
+	 */
+	for (j = 1; j < nvalues; j++)
+	{
+		if (values[j] != values[j - 1])
+			data->ndistincts[dimension] += 1;
+	}
+
+	pfree(values);
+}
+
+/*
+ * A properly built histogram must not contain buckets mixing NULL and non-NULL
+ * values in a single dimension. Each dimension may either be marked as 'nulls
+ * only', and thus containing only NULL values, or it must not contain any NULL
+ * values.
+ *
+ * Therefore, if the sample contains NULL values in any of the columns, it's
+ * necessary to build those NULL-buckets. This is done in an iterative way
+ * using this algorithm, operating on a single bucket:
+ *
+ *	   (1) Check that all dimensions are well-formed (not mixing NULL and
+ *		   non-NULL values).
+ *
+ *	   (2) If all dimensions are well-formed, terminate.
+ *
+ *	   (3) If the dimension contains only NULL values, but is not marked as
+ *		   NULL-only, mark it as NULL-only and run the algorithm again (on
+ *		   this bucket).
+ *
+ *	   (4) If the dimension mixes NULL and non-NULL values, split the bucket
+ *		   into two parts - one with NULL values, one with non-NULL values
+ *		   (replacing the current one). Then run the algorithm on both buckets.
+ *
+ * This is executed in a recursive manner, but the number of executions should
+ * be quite low - limited by the number of NULL-buckets. Also, in each branch
+ * the number of nested calls is limited by the number of dimensions
+ * (attributes) of the histogram.
+ *
+ * At the end, there should be buckets with no mixed dimensions. The number of
+ * buckets produced by this algorithm is rather limited - with N dimensions,
+ * there may be only 2^N such buckets (each dimension may be either NULL or
+ * non-NULL). So with 8 dimensions (current value of MVSTATS_MAX_DIMENSIONS)
+ * there may be only 256 such buckets.
+ *
+ * After this, a 'regular' bucket-split algorithm shall run, further optimizing
+ * the histogram.
+ */
+static void
+create_null_buckets(MVHistogram histogram, int bucket_idx,
+					int2vector *attrs, VacAttrStats **stats)
+{
+	int			i,
+				j;
+	int			null_dim = -1;
+	int			null_count = 0;
+	bool		null_found = false;
+	MVBucket	bucket,
+				null_bucket;
+	int			null_idx,
+				curr_idx;
+	HistogramBuild data,
+				null_data;
+
+	/* remember original values from the bucket */
+	int			numrows;
+	HeapTuple  *oldrows = NULL;
+
+	Assert(bucket_idx < histogram->nbuckets);
+	Assert(histogram->ndimensions == attrs->dim1);
+
+	bucket = histogram->buckets[bucket_idx];
+	data = (HistogramBuild) bucket->build_data;
+
+	numrows = data->numrows;
+	oldrows = data->rows;
+
+	/*
+	 * Walk through all rows / dimensions, and stop once we find NULL in a
+	 * dimension not yet marked as NULL-only.
+	 */
+	for (i = 0; i < data->numrows; i++)
+	{
+		/*
+		 * FIXME We don't need to start from the first attribute here - we can
+		 * start from the last known dimension.
+		 */
+		for (j = 0; j < histogram->ndimensions; j++)
+		{
+			/* Is this a NULL-only dimension? If yes, skip. */
+			if (bucket->nullsonly[j])
+				continue;
+
+			/* found a NULL in that dimension? */
+			if (heap_attisnull(data->rows[i], attrs->values[j]))
+			{
+				null_found = true;
+				null_dim = j;
+				break;
+			}
+		}
+
+		/* terminate if we found attribute with NULL values */
+		if (null_found)
+			break;
+	}
+
+	/* no regular dimension contains NULL values => we're done */
+	if (!null_found)
+		return;
+
+	/* walk through the rows again, count NULL values in 'null_dim' */
+	for (i = 0; i < data->numrows; i++)
+	{
+		if (heap_attisnull(data->rows[i], attrs->values[null_dim]))
+			null_count += 1;
+	}
+
+	Assert(null_count <= data->numrows);
+
+	/*
+	 * If (null_count == numrows) the dimension already is NULL-only, but is
+	 * not yet marked like that. It's enough to mark it and repeat the process
+	 * recursively (until we run out of dimensions).
+	 */
+	if (null_count == data->numrows)
+	{
+		bucket->nullsonly[null_dim] = true;
+		create_null_buckets(histogram, bucket_idx, attrs, stats);
+		return;
+	}
+
+	/*
+	 * We have to split the bucket into two - one with NULL values in the
+	 * dimension, one with non-NULL values. We don't need to sort the data or
+	 * anything, but otherwise it's similar to what partition_bucket() does.
+	 */
+
+	/* create bucket with NULL-only dimension 'dim' */
+	null_bucket = copy_mv_bucket(bucket, histogram->ndimensions);
+	null_data = (HistogramBuild) null_bucket->build_data;
+
+	/* remember the current array info */
+	oldrows = data->rows;
+	numrows = data->numrows;
+
+	/* we'll keep non-NULL values in the current bucket */
+	data->numrows = (numrows - null_count);
+	data->rows
+		= (HeapTuple *) palloc0(data->numrows * sizeof(HeapTuple));
+
+	/* and the NULL values will go to the new one */
+	null_data->numrows = null_count;
+	null_data->rows
+		= (HeapTuple *) palloc0(null_data->numrows * sizeof(HeapTuple));
+
+	/* mark the dimension as NULL-only (in the new bucket) */
+	null_bucket->nullsonly[null_dim] = true;
+
+	/* walk through the sample rows and distribute them accordingly */
+	null_idx = 0;
+	curr_idx = 0;
+	for (i = 0; i < numrows; i++)
+	{
+		if (heap_attisnull(oldrows[i], attrs->values[null_dim]))
+			/* NULL => copy to the new bucket */
+			memcpy(&null_data->rows[null_idx++], &oldrows[i],
+				   sizeof(HeapTuple));
+		else
+			memcpy(&data->rows[curr_idx++], &oldrows[i],
+				   sizeof(HeapTuple));
+	}
+
+	/* update ndistinct values for the buckets (total and per dimension) */
+	update_bucket_ndistinct(bucket, attrs, stats);
+	update_bucket_ndistinct(null_bucket, attrs, stats);
+
+	/*
+	 * TODO We don't need to do this for the dimension we used for split,
+	 * because we know how many distinct values went to each bucket (NULL is
+	 * not a value, so NULL buckets get 0, and the other bucket got all the
+	 * distinct values).
+	 */
+	for (i = 0; i < histogram->ndimensions; i++)
+	{
+		update_dimension_ndistinct(bucket, i, attrs, stats, false);
+		update_dimension_ndistinct(null_bucket, i, attrs, stats, false);
+	}
+
+	pfree(oldrows);
+
+	/* add the NULL bucket to the histogram */
+	histogram->buckets[histogram->nbuckets++] = null_bucket;
+
+	/*
+	 * And now run the function recursively on both buckets (the new one
+	 * first, because the call may change number of buckets, and it's used as
+	 * an index).
+	 */
+	create_null_buckets(histogram, (histogram->nbuckets - 1), attrs, stats);
+	create_null_buckets(histogram, bucket_idx, attrs, stats);
+}
+
+/*
+ * SRF with details about buckets of a histogram:
+ *
+ * - bucket ID (0...nbuckets)
+ * - min values (string array)
+ * - max values (string array)
+ * - nulls only (boolean array)
+ * - min inclusive flags (boolean array)
+ * - max inclusive flags (boolean array)
+ * - frequency (double precision)
+ *
+ * The input is the OID of the statistics, and there are no rows returned if the
+ * statistics contains no histogram (or if there's no statistics for the OID).
+ *
+ * The second parameter (type) determines what values will be returned
+ * in the (minvals,maxvals). There are three possible values:
+ *
+ * 0 (actual values)
+ * -----------------
+ *	  - prints actual values
+ *	  - using the output function of the data type (as string)
+ *	  - handy for investigating the histogram
+ *
+ * 1 (distinct index)
+ * ------------------
+ *	  - prints index of the distinct value (into the serialized array)
+ *	  - makes it easier to spot neighbor buckets, etc.
+ *	  - handy for plotting the histogram
+ *
+ * 2 (normalized distinct index)
+ * -----------------------------
+ *	  - prints index of the distinct value, but normalized into [0,1]
+ *	  - similar to 1, but shows how 'long' the bucket range is
+ *	  - handy for plotting the histogram
+ *
+ * When plotting the histogram, be careful as the (1) and (2) options skew the
+ * lengths by distributing the distinct values uniformly. For data types
+ * without a clear meaning of 'distance' (e.g. strings) that is not a big deal,
+ * but for numbers it may be confusing.
+ */
+PG_FUNCTION_INFO_V1(pg_mv_histogram_buckets);
+
+#define OUTPUT_FORMAT_RAW		0
+#define OUTPUT_FORMAT_INDEXES	1
+#define OUTPUT_FORMAT_DISTINCT	2
+
+Datum
+pg_mv_histogram_buckets(PG_FUNCTION_ARGS)
+{
+	FuncCallContext *funcctx;
+	int			call_cntr;
+	int			max_calls;
+	TupleDesc	tupdesc;
+	AttInMetadata *attinmeta;
+
+	Oid			mvoid = PG_GETARG_OID(0);
+	int			otype = PG_GETARG_INT32(1);
+
+	if ((otype < 0) || (otype > 2))
+		elog(ERROR, "invalid output type specified");
+
+	/* stuff done only on the first call of the function */
+	if (SRF_IS_FIRSTCALL())
+	{
+		MemoryContext oldcontext;
+		MVSerializedHistogram histogram;
+
+		/* create a function context for cross-call persistence */
+		funcctx = SRF_FIRSTCALL_INIT();
+
+		/* switch to memory context appropriate for multiple function calls */
+		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+		histogram = load_mv_histogram(mvoid);
+
+		funcctx->user_fctx = histogram;
+
+		/* total number of tuples to be returned */
+		funcctx->max_calls = 0;
+		if (funcctx->user_fctx != NULL)
+			funcctx->max_calls = histogram->nbuckets;
+
+		/* Build a tuple descriptor for our result type */
+		if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("function returning record called in context "
+							"that cannot accept type record")));
+
+		/*
+		 * generate attribute metadata needed later to produce tuples from raw
+		 * C strings
+		 */
+		attinmeta = TupleDescGetAttInMetadata(tupdesc);
+		funcctx->attinmeta = attinmeta;
+
+		MemoryContextSwitchTo(oldcontext);
+	}
+
+	/* stuff done on every call of the function */
+	funcctx = SRF_PERCALL_SETUP();
+
+	call_cntr = funcctx->call_cntr;
+	max_calls = funcctx->max_calls;
+	attinmeta = funcctx->attinmeta;
+
+	if (call_cntr < max_calls)	/* do when there is more left to send */
+	{
+		char	  **values;
+		HeapTuple	tuple;
+		Datum		result;
+		int2vector *stakeys;
+		Oid			relid;
+		double		bucket_volume = 1.0;
+		StringInfo	bufs;
+
+		char	   *format;
+		int			i;
+
+		Oid		   *outfuncs;
+		FmgrInfo   *fmgrinfo;
+
+		MVSerializedHistogram histogram;
+		MVSerializedBucket bucket;
+
+		histogram = (MVSerializedHistogram) funcctx->user_fctx;
+
+		Assert(call_cntr < histogram->nbuckets);
+
+		bucket = histogram->buckets[call_cntr];
+
+		stakeys = find_mv_attnums(mvoid, &relid);
+
+		/*
+		 * The scalar values will be formatted directly, using snprintf.
+		 *
+		 * The 'array' values will be formatted through StringInfo.
+		 */
+		values = (char **) palloc0(9 * sizeof(char *));
+		bufs = (StringInfo) palloc0(9 * sizeof(StringInfoData));
+
+		values[0] = (char *) palloc(64 * sizeof(char));
+
+		initStringInfo(&bufs[1]);		/* lower boundaries */
+		initStringInfo(&bufs[2]);		/* upper boundaries */
+		initStringInfo(&bufs[3]);		/* nulls-only */
+		initStringInfo(&bufs[4]);		/* lower inclusive */
+		initStringInfo(&bufs[5]);		/* upper inclusive */
+
+		values[6] = (char *) palloc(64 * sizeof(char));
+		values[7] = (char *) palloc(64 * sizeof(char));
+		values[8] = (char *) palloc(64 * sizeof(char));
+
+		/* we need to do this only when printing the actual values */
+		outfuncs = (Oid *) palloc0(sizeof(Oid) * histogram->ndimensions);
+		fmgrinfo = (FmgrInfo *) palloc0(sizeof(FmgrInfo) * histogram->ndimensions);
+
+		/*
+		 * lookup output functions for all histogram dimensions
+		 *
+		 * XXX This might be one in the first call and stored in user_fctx.
+		 */
+		for (i = 0; i < histogram->ndimensions; i++)
+		{
+			bool		isvarlena;
+
+			getTypeOutputInfo(get_atttype(relid, stakeys->values[i]),
+							  &outfuncs[i], &isvarlena);
+
+			fmgr_info(outfuncs[i], &fmgrinfo[i]);
+		}
+
+		snprintf(values[0], 64, "%d", call_cntr);		/* bucket ID */
+
+		/*
+		 * for the arrays of lower/upper boundaries, formated according to
+		 * otype
+		 */
+		for (i = 0; i < histogram->ndimensions; i++)
+		{
+			Datum	   *vals = histogram->values[i];
+
+			uint16		minidx = bucket->min[i];
+			uint16		maxidx = bucket->max[i];
+
+			/*
+			 * compute bucket volume, using distinct values as a measure
+			 *
+			 * XXX Not really sure what to do for NULL dimensions here, so
+			 * let's simply count them as '1'.
+			 */
+			bucket_volume
+				*= (double) (maxidx - minidx + 1) / (histogram->nvalues[i] - 1);
+
+			if (i == 0)
+				format = "{%s"; /* fist dimension */
+			else if (i < (histogram->ndimensions - 1))
+				format = ", %s";	/* medium dimensions */
+			else
+				format = ", %s}";		/* last dimension */
+
+			appendStringInfo(&bufs[3], format, bucket->nullsonly[i] ? "t" : "f");
+			appendStringInfo(&bufs[4], format, bucket->min_inclusive[i] ? "t" : "f");
+			appendStringInfo(&bufs[5], format, bucket->max_inclusive[i] ? "t" : "f");
+
+			/*
+			 * for NULL-only  dimension, simply put there the NULL and
+			 * continue
+			 */
+			if (bucket->nullsonly[i])
+			{
+				if (i == 0)
+					format = "{%s";
+				else if (i < (histogram->ndimensions - 1))
+					format = ", %s";
+				else
+					format = ", %s}";
+
+				appendStringInfo(&bufs[1], format, "NULL");
+				appendStringInfo(&bufs[2], format, "NULL");
+
+				continue;
+			}
+
+			/* otherwise we really need to format the value */
+			switch (otype)
+			{
+				case OUTPUT_FORMAT_RAW: /* actual boundary values */
+
+					if (i == 0)
+						format = "{%s";
+					else if (i < (histogram->ndimensions - 1))
+						format = ", %s";
+					else
+						format = ", %s}";
+
+					appendStringInfo(&bufs[1], format,
+								  FunctionCall1(&fmgrinfo[i], vals[minidx]));
+
+					appendStringInfo(&bufs[2], format,
+								  FunctionCall1(&fmgrinfo[i], vals[maxidx]));
+
+					break;
+
+				case OUTPUT_FORMAT_INDEXES:		/* indexes into deduplicated
+												 * arrays */
+
+					if (i == 0)
+						format = "{%d";
+					else if (i < (histogram->ndimensions - 1))
+						format = ", %d";
+					else
+						format = ", %d}";
+
+					appendStringInfo(&bufs[1], format, minidx);
+
+					appendStringInfo(&bufs[2], format, maxidx);
+
+					break;
+
+				case OUTPUT_FORMAT_DISTINCT:	/* distinct arrays as measure */
+
+					if (i == 0)
+						format = "{%f";
+					else if (i < (histogram->ndimensions - 1))
+						format = ", %f";
+					else
+						format = ", %f}";
+
+					appendStringInfo(&bufs[1], format,
+							   (minidx * 1.0 / (histogram->nvalues[i] - 1)));
+
+					appendStringInfo(&bufs[2], format,
+							   (maxidx * 1.0 / (histogram->nvalues[i] - 1)));
+
+					break;
+
+				default:
+					elog(ERROR, "unknown output type: %d", otype);
+			}
+		}
+
+		values[1] = bufs[1].data;
+		values[2] = bufs[2].data;
+		values[3] = bufs[3].data;
+		values[4] = bufs[4].data;
+		values[5] = bufs[5].data;
+
+		snprintf(values[6], 64, "%f", bucket->ntuples); /* frequency */
+		snprintf(values[7], 64, "%f", bucket->ntuples / bucket_volume); /* density */
+		snprintf(values[8], 64, "%f", bucket_volume);	/* volume (as a
+														 * fraction) */
+
+		/* build a tuple */
+		tuple = BuildTupleFromCStrings(attinmeta, values);
+
+		/* make the tuple into a datum */
+		result = HeapTupleGetDatum(tuple);
+
+		/* clean up (this is not really necessary) */
+		pfree(values[0]);
+		pfree(values[6]);
+		pfree(values[7]);
+		pfree(values[8]);
+
+		resetStringInfo(&bufs[1]);
+		resetStringInfo(&bufs[2]);
+		resetStringInfo(&bufs[3]);
+		resetStringInfo(&bufs[4]);
+		resetStringInfo(&bufs[5]);
+
+		pfree(bufs);
+		pfree(values);
+
+		SRF_RETURN_NEXT(funcctx, result);
+	}
+	else	/* do when there is no more left */
+	{
+		SRF_RETURN_DONE(funcctx);
+	}
+}
+
+/*
+ * pg_histogram_in		- input routine for type pg_histogram.
+ *
+ * pg_histogram is real enough to be a table column, but it has no operations
+ * of its own, and disallows input too
+ *
+ * XXX This is inspired by what pg_node_tree does.
+ */
+Datum
+pg_histogram_in(PG_FUNCTION_ARGS)
+{
+	/*
+	 * pg_node_list stores the data in binary form and parsing text input is
+	 * not needed, so disallow this.
+	 */
+	ereport(ERROR,
+			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+			 errmsg("cannot accept a value of type %s", "pg_histogram")));
+
+	PG_RETURN_VOID();			/* keep compiler quiet */
+}
+
+/*
+ * pg_histogram		- output routine for type PG_HISTOGRAM.
+ *
+ * histograms are serialized into a bytea value, so we simply call byteaout()
+ * to serialize the value into text. But it'd be nice to serialize that into
+ * a meaningful representation (e.g. for inspection by people).
+ *
+ * FIXME not implemented yet, returning dummy value
+ */
+Datum
+pg_histogram_out(PG_FUNCTION_ARGS)
+{
+	return byteaout(fcinfo);
+}
+
+/*
+ * pg_histogram_recv		- binary input routine for type pg_histogram.
+ */
+Datum
+pg_histogram_recv(PG_FUNCTION_ARGS)
+{
+	ereport(ERROR,
+			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+			 errmsg("cannot accept a value of type %s", "pg_histogram")));
+
+	PG_RETURN_VOID();			/* keep compiler quiet */
+}
+
+/*
+ * pg_histogram_send		- binary output routine for type pg_histogram.
+ *
+ * XXX Histograms are serialized into a bytea value, so let's just send that.
+ */
+Datum
+pg_histogram_send(PG_FUNCTION_ARGS)
+{
+	return byteasend(fcinfo);
+}
+
+#ifdef DEBUG_MVHIST
+/*
+ * prints debugging info about matched histogram buckets (full/partial)
+ *
+ * XXX Currently works only for INT data type.
+ */
+void
+debug_histogram_matches(MVSerializedHistogram mvhist, char *matches)
+{
+	int			i,
+				j;
+
+	float		ffull = 0,
+				fpartial = 0;
+	int			nfull = 0,
+				npartial = 0;
+
+	StringInfoData buf;
+
+	initStringInfo(&buf);
+
+	for (i = 0; i < mvhist->nbuckets; i++)
+	{
+		MVSerializedBucket bucket = mvhist->buckets[i];
+
+		if (!matches[i])
+			continue;
+
+		/* increment the counters */
+		nfull += (matches[i] == MVSTATS_MATCH_FULL) ? 1 : 0;
+		npartial += (matches[i] == MVSTATS_MATCH_PARTIAL) ? 1 : 0;
+
+		/* and also update the frequencies */
+		ffull += (matches[i] == MVSTATS_MATCH_FULL) ? bucket->ntuples : 0;
+		fpartial += (matches[i] == MVSTATS_MATCH_PARTIAL) ? bucket->ntuples : 0;
+
+		resetStringInfo(&buf);
+
+		/* build ranges for all the dimentions */
+		for (j = 0; j < mvhist->ndimensions; j++)
+		{
+			appendStringInfo(&buf, '[%d %d]',
+							 DatumGetInt32(mvhist->values[j][bucket->min[j]]),
+						   DatumGetInt32(mvhist->values[j][bucket->max[j]]));
+		}
+
+		elog(WARNING, "bucket %d %s => %d [%f]", i, buf.data, matches[i], bucket->ntuples);
+	}
+
+	elog(WARNING, "full=%f partial=%f (%f)", ffull, fpartial, (ffull + 0.5 * fpartial));
+}
+#endif
diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c
index db74d93..cf73aec 100644
--- a/src/bin/psql/describe.c
+++ b/src/bin/psql/describe.c
@@ -2298,8 +2298,8 @@ describeOneTableDetails(const char *schemaname,
 		{
 			printfPQExpBuffer(&buf,
 							  "SELECT oid, stanamespace::regnamespace AS nsp, staname, stakeys,\n"
-							  "  ndist_enabled, deps_enabled, mcv_enabled,\n"
-							  "  ndist_built, deps_built, mcv_built,\n"
+							  "  ndist_enabled, deps_enabled, mcv_enabled, hist_enabled,\n"
+							  "  ndist_built, deps_built, mcv_built, hist_built,\n"
 							  "  (SELECT string_agg(attname::text,', ')\n"
 						   "    FROM ((SELECT unnest(stakeys) AS attnum) s\n"
 							  "         JOIN pg_attribute a ON (starelid = a.attrelid and a.attnum = s.attnum))) AS attnums\n"
@@ -2342,8 +2342,17 @@ describeOneTableDetails(const char *schemaname,
 						first = false;
 					}
 
+					if (!strcmp(PQgetvalue(result, i, 6), "t"))
+					{
+						if (!first)
+							appendPQExpBuffer(&buf, ", histogram");
+						else
+							appendPQExpBuffer(&buf, "(histogram");
+						first = false;
+					}
+
 					appendPQExpBuffer(&buf, ") ON (%s)",
-									  PQgetvalue(result, i, 9));
+									  PQgetvalue(result, i, 12));
 
 					printTableAddFooter(&cont, buf.data);
 				}
diff --git a/src/include/catalog/pg_cast.h b/src/include/catalog/pg_cast.h
index 80d8ea2..f62ba50 100644
--- a/src/include/catalog/pg_cast.h
+++ b/src/include/catalog/pg_cast.h
@@ -266,6 +266,9 @@ DATA(insert (  3358	 25    0 i i ));
 DATA(insert (  441	 17    0 i b ));
 DATA(insert (  441	 25    0 i i ));
 
+/* pg_histogram can be coerced to, but not from, bytea */
+DATA(insert (  774	 17    0 i b ));
+
 
 /*
  * Datetime category
diff --git a/src/include/catalog/pg_mv_statistic.h b/src/include/catalog/pg_mv_statistic.h
index 34049d6..d30d3cd9 100644
--- a/src/include/catalog/pg_mv_statistic.h
+++ b/src/include/catalog/pg_mv_statistic.h
@@ -40,11 +40,13 @@ CATALOG(pg_mv_statistic,3381)
 	bool		ndist_enabled;	/* build ndist coefficient? */
 	bool		deps_enabled;	/* analyze dependencies? */
 	bool		mcv_enabled;	/* build MCV list? */
+	bool		hist_enabled;	/* build histogram? */
 
 	/* statistics that are available (if requested) */
 	bool		ndist_built;	/* ndistinct coeff built */
 	bool		deps_built;		/* dependencies were built */
 	bool		mcv_built;		/* MCV list was built */
+	bool		hist_built;		/* histogram was built */
 
 	/*
 	 * variable-length fields start here, but we allow direct access to
@@ -56,6 +58,7 @@ CATALOG(pg_mv_statistic,3381)
 	pg_ndistinct		standist;		/* ndistinct coeff (serialized) */
 	pg_dependencies		stadeps;		/* dependencies (serialized) */
 	pg_mcv_list			stamcv;			/* MCV list (serialized) */
+	pg_histogram		stahist;		/* MV histogram (serialized) */
 #endif
 
 } FormData_pg_mv_statistic;
@@ -71,7 +74,7 @@ typedef FormData_pg_mv_statistic *Form_pg_mv_statistic;
  *		compiler constants for pg_mv_statistic
  * ----------------
  */
-#define Natts_pg_mv_statistic					14
+#define Natts_pg_mv_statistic					17
 #define Anum_pg_mv_statistic_starelid			1
 #define Anum_pg_mv_statistic_staname			2
 #define Anum_pg_mv_statistic_stanamespace		3
@@ -79,12 +82,15 @@ typedef FormData_pg_mv_statistic *Form_pg_mv_statistic;
 #define Anum_pg_mv_statistic_ndist_enabled		5
 #define Anum_pg_mv_statistic_deps_enabled		6
 #define Anum_pg_mv_statistic_mcv_enabled		7
-#define Anum_pg_mv_statistic_ndist_built		8
-#define Anum_pg_mv_statistic_deps_built			9
-#define Anum_pg_mv_statistic_mcv_built			10
-#define Anum_pg_mv_statistic_stakeys			11
-#define Anum_pg_mv_statistic_standist			12
-#define Anum_pg_mv_statistic_stadeps			13
-#define Anum_pg_mv_statistic_stamcv				14
+#define Anum_pg_mv_statistic_hist_enabled		8
+#define Anum_pg_mv_statistic_ndist_built		9
+#define Anum_pg_mv_statistic_deps_built			10
+#define Anum_pg_mv_statistic_mcv_built			11
+#define Anum_pg_mv_statistic_hist_built			12
+#define Anum_pg_mv_statistic_stakeys			13
+#define Anum_pg_mv_statistic_standist			14
+#define Anum_pg_mv_statistic_stadeps			15
+#define Anum_pg_mv_statistic_stamcv				16
+#define Anum_pg_mv_statistic_stahist			17
 
 #endif   /* PG_MV_STATISTIC_H */
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 7cf1e5a..653bf1a 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -2730,6 +2730,10 @@ DATA(insert OID = 3376 (  pg_mv_stats_mcvlist_info	PGNSP PGUID 12 1 0 0 0 f f f
 DESCR("multi-variate statistics: MCV list info");
 DATA(insert OID = 3373 (  pg_mv_mcv_items PGNSP PGUID 12 1 1000 0 0 f f f f t t i s 1 0 2249 "26" "{26,23,1009,1000,701}" "{i,o,o,o,o}" "{oid,index,values,nulls,frequency}" _null_ _null_ pg_mv_mcv_items _null_ _null_ _null_ ));
 DESCR("details about MCV list items");
+DATA(insert OID = 3375 (  pg_mv_stats_histogram_info	PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 25 "774" _null_ _null_ _null_ _null_ _null_ pg_mv_stats_histogram_info _null_ _null_ _null_ ));
+DESCR("multi-variate statistics: histogram info");
+DATA(insert OID = 3374 (  pg_mv_histogram_buckets PGNSP PGUID 12 1 1000 0 0 f f f f t t i s 2 0 2249 "26 23" "{26,23,23,1009,1009,1000,1000,1000,701,701,701}" "{i,i,o,o,o,o,o,o,o,o,o}" "{oid,otype,index,minvals,maxvals,nullsonly,mininclusive,maxinclusive,frequency,density,bucket_volume}" _null_ _null_ pg_mv_histogram_buckets _null_ _null_ _null_ ));
+DESCR("details about histogram buckets");
 
 DATA(insert OID = 3354 (  pg_ndistinct_in	PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 3353 "2275" _null_ _null_ _null_ _null_ _null_ pg_ndistinct_in _null_ _null_ _null_ ));
 DESCR("I/O");
@@ -2758,6 +2762,15 @@ DESCR("I/O");
 DATA(insert OID = 445 (  pg_mcv_list_send	PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 17 "441" _null_ _null_ _null_ _null_ _null_	pg_mcv_list_send _null_ _null_ _null_ ));
 DESCR("I/O");
 
+DATA(insert OID = 775 (  pg_histogram_in	PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 774 "2275" _null_ _null_ _null_ _null_ _null_ pg_histogram_in _null_ _null_ _null_ ));
+DESCR("I/O");
+DATA(insert OID = 776 (  pg_histogram_out	PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 2275 "774" _null_ _null_ _null_ _null_ _null_ pg_histogram_out _null_ _null_ _null_ ));
+DESCR("I/O");
+DATA(insert OID = 777 (  pg_histogram_recv	PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 774 "2281" _null_ _null_ _null_ _null_ _null_ pg_histogram_recv _null_ _null_ _null_ ));
+DESCR("I/O");
+DATA(insert OID = 778 (  pg_histogram_send	PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 17 "774" _null_ _null_ _null_ _null_ _null_	pg_histogram_send _null_ _null_ _null_ ));
+DESCR("I/O");
+
 DATA(insert OID = 1928 (  pg_stat_get_numscans			PGNSP PGUID 12 1 0 0 0 f f f f t f s r 1 0 20 "26" _null_ _null_ _null_ _null_ _null_ pg_stat_get_numscans _null_ _null_ _null_ ));
 DESCR("statistics: number of scans done for table/index");
 DATA(insert OID = 1929 (  pg_stat_get_tuples_returned	PGNSP PGUID 12 1 0 0 0 f f f f t f s r 1 0 20 "26" _null_ _null_ _null_ _null_ _null_ pg_stat_get_tuples_returned _null_ _null_ _null_ ));
diff --git a/src/include/catalog/pg_type.h b/src/include/catalog/pg_type.h
index fbac135..7133862 100644
--- a/src/include/catalog/pg_type.h
+++ b/src/include/catalog/pg_type.h
@@ -376,6 +376,10 @@ DATA(insert OID = 441 ( pg_mcv_list		PGNSP PGUID -1 f b S f t \054 0 0 0 pg_mcv_
 DESCR("multivariate MCV list");
 #define PGMCVLISTOID	441
 
+DATA(insert OID = 774 ( pg_histogram		PGNSP PGUID -1 f b S f t \054 0 0 0 pg_histogram_in pg_histogram_out pg_histogram_recv pg_histogram_send - - - i x f 0 -1 0 100 _null_ _null_ _null_ ));
+DESCR("multivariate histogram");
+#define PGHISTOGRAMOID	774
+
 DATA(insert OID = 32 ( pg_ddl_command	PGNSP PGUID SIZEOF_POINTER t p P f t \054 0 0 0 pg_ddl_command_in pg_ddl_command_out pg_ddl_command_recv pg_ddl_command_send - - - ALIGNOF_POINTER p f 0 -1 0 0 _null_ _null_ _null_ ));
 DESCR("internal type for passing CollectedCommand");
 #define PGDDLCOMMANDOID 32
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index d912827..f99f547 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -684,11 +684,13 @@ typedef struct MVStatisticInfo
 	bool		ndist_enabled;	/* ndistinct coefficient enabled */
 	bool		deps_enabled;	/* functional dependencies enabled */
 	bool		mcv_enabled;	/* MCV list enabled */
+	bool		hist_enabled;	/* histogram enabled */
 
 	/* built/available statistics */
 	bool		ndist_built;	/* ndistinct coefficient built */
 	bool		deps_built;		/* functional dependencies built */
 	bool		mcv_built;		/* MCV list built */
+	bool		hist_built;		/* histogram built */
 
 	/* columns in the statistics (attnums) */
 	int2vector *stakeys;		/* attnums of the columns covered */
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index 9ed080a..1c7925b 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -81,6 +81,10 @@ extern Datum pg_mcv_list_in(PG_FUNCTION_ARGS);
 extern Datum pg_mcv_list_out(PG_FUNCTION_ARGS);
 extern Datum pg_mcv_list_recv(PG_FUNCTION_ARGS);
 extern Datum pg_mcv_list_send(PG_FUNCTION_ARGS);
+extern Datum pg_histogram_in(PG_FUNCTION_ARGS);
+extern Datum pg_histogram_out(PG_FUNCTION_ARGS);
+extern Datum pg_histogram_recv(PG_FUNCTION_ARGS);
+extern Datum pg_histogram_send(PG_FUNCTION_ARGS);
 
 /* regexp.c */
 extern char *regexp_fixed_prefix(text *text_re, bool case_insensitive,
diff --git a/src/include/utils/mvstats.h b/src/include/utils/mvstats.h
index 0c4f621..5d8c024 100644
--- a/src/include/utils/mvstats.h
+++ b/src/include/utils/mvstats.h
@@ -18,7 +18,7 @@
 #include "commands/vacuum.h"
 
 /*
- * Degree of how much MCV item matches a clause.
+ * Degree of how much MCV item / histogram bucket matches a clause.
  * This is then considered when computing the selectivity.
  */
 #define MVSTATS_MATCH_NONE		0		/* no match at all */
@@ -114,19 +114,133 @@ bool dependency_implies_attribute(MVDependency dependency, AttrNumber attnum,
 bool dependency_is_fully_matched(MVDependency dependency, Bitmapset *attnums,
 								 int16 *attmap);
 
+/* used to flag stats serialized to bytea */
+#define MVSTAT_HIST_MAGIC		0x7F8C5670		/* marks serialized bytea */
+#define MVSTAT_HIST_TYPE_BASIC	1				/* basic histogram type */
+
+/* max buckets in a histogram (mostly arbitrary number */
+#define MVSTAT_HIST_MAX_BUCKETS 16384
+
+/*
+ * Multivariate histograms
+ */
+typedef struct MVBucketData
+{
+
+	/* Frequencies of this bucket. */
+	float		ntuples;		/* frequency of tuples tuples */
+
+	/*
+	 * Information about dimensions being NULL-only. Not yet used.
+	 */
+	bool	   *nullsonly;
+
+	/* lower boundaries - values and information about the inequalities */
+	Datum	   *min;
+	bool	   *min_inclusive;
+
+	/* upper boundaries - values and information about the inequalities */
+	Datum	   *max;
+	bool	   *max_inclusive;
+
+	/* used when building the histogram (not serialized/deserialized) */
+	void	   *build_data;
+
+} MVBucketData;
+
+typedef MVBucketData *MVBucket;
+
+
+typedef struct MVHistogramData
+{
+
+	uint32		magic;			/* magic constant marker */
+	uint32		type;			/* type of histogram (BASIC) */
+	uint32		nbuckets;		/* number of buckets (buckets array) */
+	uint32		ndimensions;	/* number of dimensions */
+
+	MVBucket   *buckets;		/* array of buckets */
+
+} MVHistogramData;
+
+typedef MVHistogramData *MVHistogram;
+
+/*
+ * Histogram in a partially serialized form, with deduplicated boundary
+ * values etc.
+ *
+ * TODO add more detailed description here
+ */
+
+typedef struct MVSerializedBucketData
+{
+
+	/* Frequencies of this bucket. */
+	float		ntuples;		/* frequency of tuples tuples */
+
+	/*
+	 * Information about dimensions being NULL-only. Not yet used.
+	 */
+	bool	   *nullsonly;
+
+	/* lower boundaries - values and information about the inequalities */
+	uint16	   *min;
+	bool	   *min_inclusive;
+
+	/*
+	 * indexes of upper boundaries - values and information about the
+	 * inequalities (exclusive vs. inclusive)
+	 */
+	uint16	   *max;
+	bool	   *max_inclusive;
+
+} MVSerializedBucketData;
+
+typedef MVSerializedBucketData *MVSerializedBucket;
+
+typedef struct MVSerializedHistogramData
+{
+
+	uint32		magic;			/* magic constant marker */
+	uint32		type;			/* type of histogram (BASIC) */
+	uint32		nbuckets;		/* number of buckets (buckets array) */
+	uint32		ndimensions;	/* number of dimensions */
+
+	/*
+	 * keep this the same with MVHistogramData, because of deserialization
+	 * (same offset)
+	 */
+	MVSerializedBucket *buckets;	/* array of buckets */
+
+	/*
+	 * serialized boundary values, one array per dimension, deduplicated (the
+	 * min/max indexes point into these arrays)
+	 */
+	int		   *nvalues;
+	Datum	  **values;
+
+} MVSerializedHistogramData;
+
+typedef MVSerializedHistogramData *MVSerializedHistogram;
+
+
 MVNDistinct		load_mv_ndistinct(Oid mvoid);
 MVDependencies	load_mv_dependencies(Oid mvoid);
 MCVList			load_mv_mcvlist(Oid mvoid);
+MVSerializedHistogram load_mv_histogram(Oid mvoid);
 
 bytea *serialize_mv_ndistinct(MVNDistinct ndistinct);
 bytea *serialize_mv_dependencies(MVDependencies dependencies);
 bytea *serialize_mv_mcvlist(MCVList mcvlist, int2vector *attrs,
 							VacAttrStats **stats);
+bytea *serialize_mv_histogram(MVHistogram histogram, int2vector *attrs,
+							VacAttrStats **stats);
 
 /* deserialization of stats (serialization is private to analyze) */
 MVNDistinct deserialize_mv_ndistinct(bytea *data);
 MVDependencies deserialize_mv_dependencies(bytea *data);
 MCVList deserialize_mv_mcvlist(bytea *data);
+MVSerializedHistogram deserialize_mv_histogram(bytea * data);
 
 /*
  * Returns index of the attribute number within the vector (i.e. a
@@ -139,6 +253,8 @@ int2vector *find_mv_attnums(Oid mvoid, Oid *relid);
 /* functions for inspecting the statistics */
 extern Datum pg_mv_stats_mcvlist_info(PG_FUNCTION_ARGS);
 extern Datum pg_mv_mcvlist_items(PG_FUNCTION_ARGS);
+extern Datum pg_mv_stats_histogram_info(PG_FUNCTION_ARGS);
+extern Datum pg_mv_histogram_buckets(PG_FUNCTION_ARGS);
 
 
 MVNDistinct build_mv_ndistinct(double totalrows, int numrows, HeapTuple *rows,
@@ -151,8 +267,15 @@ MVDependencies build_mv_dependencies(int numrows, HeapTuple *rows,
 MCVList build_mv_mcvlist(int numrows, HeapTuple *rows, int2vector *attrs,
 				 VacAttrStats **stats, int *numrows_filtered);
 
+MVHistogram build_mv_histogram(int numrows, HeapTuple *rows, int2vector *attrs,
+				   VacAttrStats **stats, int numrows_total);
+
 void build_mv_stats(Relation onerel, double totalrows,
 			   int numrows, HeapTuple *rows,
 			   int natts, VacAttrStats **vacattrstats);
 
+#ifdef DEBUG_MVHIST
+extern void debug_histogram_matches(MVSerializedHistogram mvhist, char *matches);
+#endif
+
 #endif
diff --git a/src/test/regress/expected/mv_histogram.out b/src/test/regress/expected/mv_histogram.out
new file mode 100644
index 0000000..16410ce
--- /dev/null
+++ b/src/test/regress/expected/mv_histogram.out
@@ -0,0 +1,198 @@
+-- data type passed by value
+CREATE TABLE mv_histogram (
+    a INT,
+    b INT,
+    c INT
+);
+-- unknown column
+CREATE STATISTICS s7 WITH (histogram) ON (unknown_column) FROM mv_histogram;
+ERROR:  column "unknown_column" referenced in statistics does not exist
+-- single column
+CREATE STATISTICS s7 WITH (histogram) ON (a) FROM mv_histogram;
+ERROR:  statistics require at least 2 columns
+-- single column, duplicated
+CREATE STATISTICS s7 WITH (histogram) ON (a, a) FROM mv_histogram;
+ERROR:  duplicate column name in statistics definition
+-- two columns, one duplicated
+CREATE STATISTICS s7 WITH (histogram) ON (a, a, b) FROM mv_histogram;
+ERROR:  duplicate column name in statistics definition
+-- unknown option
+CREATE STATISTICS s7 WITH (unknown_option) ON (a, b, c) FROM mv_histogram;
+ERROR:  unrecognized STATISTICS option "unknown_option"
+-- correct command
+CREATE STATISTICS s7 WITH (histogram) ON (a, b, c) FROM mv_histogram;
+-- random data (no functional dependencies)
+INSERT INTO mv_histogram
+     SELECT mod(i, 111), mod(i, 123), mod(i, 23) FROM generate_series(1,10000) s(i);
+ANALYZE mv_histogram;
+SELECT hist_enabled, hist_built
+  FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+ hist_enabled | hist_built 
+--------------+------------
+ t            | t
+(1 row)
+
+TRUNCATE mv_histogram;
+-- a => b, a => c, b => c
+INSERT INTO mv_histogram
+     SELECT i/10, i/100, i/200 FROM generate_series(1,10000) s(i);
+ANALYZE mv_histogram;
+SELECT hist_enabled, hist_built
+  FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+ hist_enabled | hist_built 
+--------------+------------
+ t            | t
+(1 row)
+
+TRUNCATE mv_histogram;
+-- a => b, a => c
+INSERT INTO mv_histogram
+     SELECT i/10, i/150, i/200 FROM generate_series(1,10000) s(i);
+ANALYZE mv_histogram;
+SELECT hist_enabled, hist_built
+  FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+ hist_enabled | hist_built 
+--------------+------------
+ t            | t
+(1 row)
+
+TRUNCATE mv_histogram;
+-- check explain (expect bitmap index scan, not plain index scan)
+INSERT INTO mv_histogram
+     SELECT i/100, i/200, i/400 FROM generate_series(1,30000) s(i);
+CREATE INDEX hist_idx ON mv_histogram (a, b);
+ANALYZE mv_histogram;
+SELECT hist_enabled, hist_built
+  FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+ hist_enabled | hist_built 
+--------------+------------
+ t            | t
+(1 row)
+
+EXPLAIN (COSTS off)
+ SELECT * FROM mv_histogram WHERE a = 10 AND b = 5;
+                 QUERY PLAN                 
+--------------------------------------------
+ Bitmap Heap Scan on mv_histogram
+   Recheck Cond: ((a = 10) AND (b = 5))
+   ->  Bitmap Index Scan on hist_idx
+         Index Cond: ((a = 10) AND (b = 5))
+(4 rows)
+
+DROP TABLE mv_histogram;
+-- varlena type (text)
+CREATE TABLE mv_histogram (
+    a TEXT,
+    b TEXT,
+    c TEXT
+);
+CREATE STATISTICS s8 WITH (histogram) ON (a, b, c) FROM mv_histogram;
+-- random data (no functional dependencies)
+INSERT INTO mv_histogram
+     SELECT mod(i, 111), mod(i, 123), mod(i, 23) FROM generate_series(1,10000) s(i);
+ANALYZE mv_histogram;
+SELECT hist_enabled, hist_built
+  FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+ hist_enabled | hist_built 
+--------------+------------
+ t            | t
+(1 row)
+
+TRUNCATE mv_histogram;
+-- a => b, a => c, b => c
+INSERT INTO mv_histogram
+     SELECT i/10, i/100, i/200 FROM generate_series(1,10000) s(i);
+ANALYZE mv_histogram;
+SELECT hist_enabled, hist_built
+  FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+ hist_enabled | hist_built 
+--------------+------------
+ t            | t
+(1 row)
+
+TRUNCATE mv_histogram;
+-- a => b, a => c
+INSERT INTO mv_histogram
+     SELECT i/10, i/150, i/200 FROM generate_series(1,10000) s(i);
+ANALYZE mv_histogram;
+SELECT hist_enabled, hist_built
+  FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+ hist_enabled | hist_built 
+--------------+------------
+ t            | t
+(1 row)
+
+TRUNCATE mv_histogram;
+-- check explain (expect bitmap index scan, not plain index scan)
+INSERT INTO mv_histogram
+     SELECT i/100, i/200, i/400 FROM generate_series(1,30000) s(i);
+CREATE INDEX hist_idx ON mv_histogram (a, b);
+ANALYZE mv_histogram;
+SELECT hist_enabled, hist_built
+  FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+ hist_enabled | hist_built 
+--------------+------------
+ t            | t
+(1 row)
+
+EXPLAIN (COSTS off)
+ SELECT * FROM mv_histogram WHERE a = '10' AND b = '5';
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Bitmap Heap Scan on mv_histogram
+   Recheck Cond: ((a = '10'::text) AND (b = '5'::text))
+   ->  Bitmap Index Scan on hist_idx
+         Index Cond: ((a = '10'::text) AND (b = '5'::text))
+(4 rows)
+
+TRUNCATE mv_histogram;
+-- check explain (expect bitmap index scan, not plain index scan) with NULLs
+INSERT INTO mv_histogram
+     SELECT
+       (CASE WHEN i/100 = 0 THEN NULL ELSE i/100 END),
+       (CASE WHEN i/200 = 0 THEN NULL ELSE i/200 END),
+       (CASE WHEN i/400 = 0 THEN NULL ELSE i/400 END)
+     FROM generate_series(1,30000) s(i);
+ANALYZE mv_histogram;
+SELECT hist_enabled, hist_built
+  FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+ hist_enabled | hist_built 
+--------------+------------
+ t            | t
+(1 row)
+
+EXPLAIN (COSTS off)
+ SELECT * FROM mv_histogram WHERE a IS NULL AND b IS NULL;
+                    QUERY PLAN                     
+---------------------------------------------------
+ Bitmap Heap Scan on mv_histogram
+   Recheck Cond: ((a IS NULL) AND (b IS NULL))
+   ->  Bitmap Index Scan on hist_idx
+         Index Cond: ((a IS NULL) AND (b IS NULL))
+(4 rows)
+
+DROP TABLE mv_histogram;
+-- NULL values (mix of int and text columns)
+CREATE TABLE mv_histogram (
+    a INT,
+    b TEXT,
+    c INT,
+    d TEXT
+);
+CREATE STATISTICS s9 WITH (histogram) ON (a, b, c, d) FROM mv_histogram;
+INSERT INTO mv_histogram
+     SELECT
+         mod(i, 100),
+         (CASE WHEN mod(i, 200) = 0 THEN NULL ELSE mod(i,200) END),
+         mod(i, 400),
+         (CASE WHEN mod(i, 300) = 0 THEN NULL ELSE mod(i,600) END)
+     FROM generate_series(1,10000) s(i);
+ANALYZE mv_histogram;
+SELECT hist_enabled, hist_built
+  FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+ hist_enabled | hist_built 
+--------------+------------
+ t            | t
+(1 row)
+
+DROP TABLE mv_histogram;
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index 9969c10..a9d8163 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -820,11 +820,12 @@ WHERE c.castmethod = 'b' AND
  pg_ndistinct      | bytea             |        0 | i
  pg_dependencies   | bytea             |        0 | i
  pg_mcv_list       | bytea             |        0 | i
+ pg_histogram      | bytea             |        0 | i
  cidr              | inet              |        0 | i
  xml               | text              |        0 | a
  xml               | character varying |        0 | a
  xml               | character         |        0 | a
-(10 rows)
+(11 rows)
 
 -- **************** pg_conversion ****************
 -- Look for illegal values in pg_conversion fields.
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 2e3c40e..27e903c 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1383,7 +1383,9 @@ pg_mv_stats| SELECT n.nspname AS schemaname,
     length((s.standist)::bytea) AS ndistbytes,
     length((s.stadeps)::bytea) AS depsbytes,
     length((s.stamcv)::bytea) AS mcvbytes,
-    pg_mv_stats_mcvlist_info(s.stamcv) AS mcvinfo
+    pg_mv_stats_mcvlist_info(s.stamcv) AS mcvinfo,
+    length((s.stahist)::bytea) AS histbytes,
+    pg_mv_stats_histogram_info(s.stahist) AS histinfo
    FROM ((pg_mv_statistic s
      JOIN pg_class c ON ((c.oid = s.starelid)))
      LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace)));
diff --git a/src/test/regress/expected/type_sanity.out b/src/test/regress/expected/type_sanity.out
index dde15b9..4d3c4d7 100644
--- a/src/test/regress/expected/type_sanity.out
+++ b/src/test/regress/expected/type_sanity.out
@@ -73,8 +73,9 @@ WHERE p1.typtype not in ('c','d','p') AND p1.typname NOT LIKE E'\\_%'
  3353 | pg_ndistinct
  3358 | pg_dependencies
   441 | pg_mcv_list
+  774 | pg_histogram
   210 | smgr
-(5 rows)
+(6 rows)
 
 -- Make sure typarray points to a varlena array type of our own base
 SELECT p1.oid, p1.typname as basetype, p2.typname as arraytype,
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index d805840..36dd618 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -118,4 +118,4 @@ test: event_trigger
 test: stats
 
 # run tests of multivariate stats
-test: mv_ndistinct mv_dependencies mv_mcv
+test: mv_ndistinct mv_dependencies mv_mcv mv_histogram
diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule
index 72c6acd..34f5467 100644
--- a/src/test/regress/serial_schedule
+++ b/src/test/regress/serial_schedule
@@ -174,3 +174,4 @@ test: stats
 test: mv_ndistinct
 test: mv_dependencies
 test: mv_mcv
+test: mv_histogram
diff --git a/src/test/regress/sql/mv_histogram.sql b/src/test/regress/sql/mv_histogram.sql
new file mode 100644
index 0000000..55197cb
--- /dev/null
+++ b/src/test/regress/sql/mv_histogram.sql
@@ -0,0 +1,167 @@
+-- data type passed by value
+CREATE TABLE mv_histogram (
+    a INT,
+    b INT,
+    c INT
+);
+
+-- unknown column
+CREATE STATISTICS s7 WITH (histogram) ON (unknown_column) FROM mv_histogram;
+
+-- single column
+CREATE STATISTICS s7 WITH (histogram) ON (a) FROM mv_histogram;
+
+-- single column, duplicated
+CREATE STATISTICS s7 WITH (histogram) ON (a, a) FROM mv_histogram;
+
+-- two columns, one duplicated
+CREATE STATISTICS s7 WITH (histogram) ON (a, a, b) FROM mv_histogram;
+
+-- unknown option
+CREATE STATISTICS s7 WITH (unknown_option) ON (a, b, c) FROM mv_histogram;
+
+-- correct command
+CREATE STATISTICS s7 WITH (histogram) ON (a, b, c) FROM mv_histogram;
+
+-- random data (no functional dependencies)
+INSERT INTO mv_histogram
+     SELECT mod(i, 111), mod(i, 123), mod(i, 23) FROM generate_series(1,10000) s(i);
+
+ANALYZE mv_histogram;
+
+SELECT hist_enabled, hist_built
+  FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+
+TRUNCATE mv_histogram;
+
+-- a => b, a => c, b => c
+INSERT INTO mv_histogram
+     SELECT i/10, i/100, i/200 FROM generate_series(1,10000) s(i);
+
+ANALYZE mv_histogram;
+
+SELECT hist_enabled, hist_built
+  FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+
+TRUNCATE mv_histogram;
+
+-- a => b, a => c
+INSERT INTO mv_histogram
+     SELECT i/10, i/150, i/200 FROM generate_series(1,10000) s(i);
+ANALYZE mv_histogram;
+
+SELECT hist_enabled, hist_built
+  FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+
+TRUNCATE mv_histogram;
+
+-- check explain (expect bitmap index scan, not plain index scan)
+INSERT INTO mv_histogram
+     SELECT i/100, i/200, i/400 FROM generate_series(1,30000) s(i);
+CREATE INDEX hist_idx ON mv_histogram (a, b);
+ANALYZE mv_histogram;
+
+SELECT hist_enabled, hist_built
+  FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+
+EXPLAIN (COSTS off)
+ SELECT * FROM mv_histogram WHERE a = 10 AND b = 5;
+
+DROP TABLE mv_histogram;
+
+-- varlena type (text)
+CREATE TABLE mv_histogram (
+    a TEXT,
+    b TEXT,
+    c TEXT
+);
+
+CREATE STATISTICS s8 WITH (histogram) ON (a, b, c) FROM mv_histogram;
+
+-- random data (no functional dependencies)
+INSERT INTO mv_histogram
+     SELECT mod(i, 111), mod(i, 123), mod(i, 23) FROM generate_series(1,10000) s(i);
+
+ANALYZE mv_histogram;
+
+SELECT hist_enabled, hist_built
+  FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+
+TRUNCATE mv_histogram;
+
+-- a => b, a => c, b => c
+INSERT INTO mv_histogram
+     SELECT i/10, i/100, i/200 FROM generate_series(1,10000) s(i);
+
+ANALYZE mv_histogram;
+
+SELECT hist_enabled, hist_built
+  FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+
+TRUNCATE mv_histogram;
+
+-- a => b, a => c
+INSERT INTO mv_histogram
+     SELECT i/10, i/150, i/200 FROM generate_series(1,10000) s(i);
+ANALYZE mv_histogram;
+
+SELECT hist_enabled, hist_built
+  FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+
+TRUNCATE mv_histogram;
+
+-- check explain (expect bitmap index scan, not plain index scan)
+INSERT INTO mv_histogram
+     SELECT i/100, i/200, i/400 FROM generate_series(1,30000) s(i);
+CREATE INDEX hist_idx ON mv_histogram (a, b);
+ANALYZE mv_histogram;
+
+SELECT hist_enabled, hist_built
+  FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+
+EXPLAIN (COSTS off)
+ SELECT * FROM mv_histogram WHERE a = '10' AND b = '5';
+
+TRUNCATE mv_histogram;
+
+-- check explain (expect bitmap index scan, not plain index scan) with NULLs
+INSERT INTO mv_histogram
+     SELECT
+       (CASE WHEN i/100 = 0 THEN NULL ELSE i/100 END),
+       (CASE WHEN i/200 = 0 THEN NULL ELSE i/200 END),
+       (CASE WHEN i/400 = 0 THEN NULL ELSE i/400 END)
+     FROM generate_series(1,30000) s(i);
+ANALYZE mv_histogram;
+
+SELECT hist_enabled, hist_built
+  FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+
+EXPLAIN (COSTS off)
+ SELECT * FROM mv_histogram WHERE a IS NULL AND b IS NULL;
+
+DROP TABLE mv_histogram;
+
+-- NULL values (mix of int and text columns)
+CREATE TABLE mv_histogram (
+    a INT,
+    b TEXT,
+    c INT,
+    d TEXT
+);
+
+CREATE STATISTICS s9 WITH (histogram) ON (a, b, c, d) FROM mv_histogram;
+
+INSERT INTO mv_histogram
+     SELECT
+         mod(i, 100),
+         (CASE WHEN mod(i, 200) = 0 THEN NULL ELSE mod(i,200) END),
+         mod(i, 400),
+         (CASE WHEN mod(i, 300) = 0 THEN NULL ELSE mod(i,600) END)
+     FROM generate_series(1,10000) s(i);
+
+ANALYZE mv_histogram;
+
+SELECT hist_enabled, hist_built
+  FROM pg_mv_statistic WHERE starelid = 'mv_histogram'::regclass;
+
+DROP TABLE mv_histogram;
-- 
2.5.5