From 699bc7bb78f1fc1a37225f2f69439ee39ee6adcf Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas@pgaddict.com>
Date: Sun, 23 Oct 2016 17:38:02 +0200
Subject: [PATCH 5/9] PATCH: multivariate MCV lists

- extends the pg_mv_statistic catalog (add 'mcv' fields)
- building the MCV lists during ANALYZE
- simple estimation while planning the queries
- pg_mcv_list data type (varlena-based)

Includes regression tests, mostly equal to regression tests for
functional dependencies.

A varlena-based data type for storing serialized MCV lists.
---
 doc/src/sgml/catalogs.sgml                |   30 +
 doc/src/sgml/planstats.sgml               |  157 ++++
 doc/src/sgml/ref/create_statistics.sgml   |   34 +
 src/backend/catalog/system_views.sql      |    4 +-
 src/backend/commands/statscmds.c          |   11 +-
 src/backend/nodes/outfuncs.c              |    2 +
 src/backend/optimizer/path/clausesel.c    |  636 +++++++++++++++-
 src/backend/optimizer/util/plancat.c      |    4 +-
 src/backend/utils/mvstats/Makefile        |    2 +-
 src/backend/utils/mvstats/README.mcv      |  137 ++++
 src/backend/utils/mvstats/README.stats    |   87 ++-
 src/backend/utils/mvstats/common.c        |  136 +++-
 src/backend/utils/mvstats/common.h        |   22 +-
 src/backend/utils/mvstats/mcv.c           | 1184 +++++++++++++++++++++++++++++
 src/bin/psql/describe.c                   |   24 +-
 src/include/catalog/pg_cast.h             |    5 +
 src/include/catalog/pg_mv_statistic.h     |   18 +-
 src/include/catalog/pg_proc.h             |   14 +
 src/include/catalog/pg_type.h             |    4 +
 src/include/nodes/relation.h              |    6 +-
 src/include/utils/builtins.h              |    4 +
 src/include/utils/mvstats.h               |   64 ++
 src/test/regress/expected/mv_mcv.out      |  198 +++++
 src/test/regress/expected/opr_sanity.out  |    3 +-
 src/test/regress/expected/rules.out       |    4 +-
 src/test/regress/expected/type_sanity.out |    3 +-
 src/test/regress/parallel_schedule        |    2 +-
 src/test/regress/serial_schedule          |    1 +
 src/test/regress/sql/mv_mcv.sql           |  169 ++++
 29 files changed, 2896 insertions(+), 69 deletions(-)
 create mode 100644 src/backend/utils/mvstats/README.mcv
 create mode 100644 src/backend/utils/mvstats/mcv.c
 create mode 100644 src/test/regress/expected/mv_mcv.out
 create mode 100644 src/test/regress/sql/mv_mcv.sql
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index 852f573..bca03e9 100644
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -4296,6 +4296,17 @@
      </row>
 
      <row>
+      <entry><structfield>mcv_enabled</structfield></entry>
+      <entry><type>bool</type></entry>
+      <entry></entry>
+      <entry>
+       If true, MVC list will be computed for the combination of columns,
+       covered by the statistics. This does not mean the MCV list is already
+       computed, though.
+      </entry>
+     </row>
+
+     <row>
       <entry><structfield>ndist_built</structfield></entry>
       <entry><type>bool</type></entry>
       <entry></entry>
@@ -4316,6 +4327,16 @@
      </row>
 
      <row>
+      <entry><structfield>mcv_built</structfield></entry>
+      <entry><type>bool</type></entry>
+      <entry></entry>
+      <entry>
+       If true, MCV list is already computed and available for use during query
+       estimation.
+      </entry>
+     </row>
+
+     <row>
       <entry><structfield>stakeys</structfield></entry>
       <entry><type>int2vector</type></entry>
       <entry><literal><link linkend="catalog-pg-attribute"><structname>pg_attribute</structname></link>.attnum</literal></entry>
@@ -4344,6 +4365,15 @@
       </entry>
      </row>
 
+     <row>
+      <entry><structfield>stamcv</structfield></entry>
+      <entry><type>pg_mcv_list</type></entry>
+      <entry></entry>
+      <entry>
+       MCV list, serialized as <structname>pg_mcv_list</> type.
+      </entry>
+     </row>
+
     </tbody>
    </tgroup>
   </table>
diff --git a/doc/src/sgml/planstats.sgml b/doc/src/sgml/planstats.sgml
index 5436c8a..57f9441 100644
--- a/doc/src/sgml/planstats.sgml
+++ b/doc/src/sgml/planstats.sgml
@@ -757,6 +757,163 @@ EXPLAIN ANALYZE SELECT * FROM t WHERE a = 1 AND b = 10;
 
   </sect2>
 
+  <sect2 id="mcv-lists">
+   <title>MCV lists</title>
+
+   <para>
+    As explained in the previous section, functional dependencies are very
+    cheap and efficient type of statistics, but it has limitations due to the
+    global nature (only tracking column-level dependencies, not between values
+    stored in the columns).
+   </para>
+
+   <para>
+    This section introduces multivariate most-common values (<acronym>MCV</>)
+    lists, a direct generalization of the statistics introduced in
+    <xref linkend="row-estimation-examples">, that is not subject to this
+    limitation. It is however more expensive, both in terms of storage and
+    planning time.
+   </para>
+
+   <para>
+    Let's look at the example query from the previous section again, creating
+    a multivariate <acronym>MCV</> list on the columns (after dropping the
+    functional dependencies, to make sure the planner uses the newly created
+    <acronym>MCV</> list when computing the estimates).
+
+<programlisting>
+DROP STATISTICS s1;
+CREATE STATISTICS s2 ON t (a,b) WITH (mcv);
+ANALYZE t;
+EXPLAIN ANALYZE SELECT * FROM t WHERE a = 1 AND b = 1;
+                                           QUERY PLAN
+-------------------------------------------------------------------------------------------------
+ Seq Scan on t  (cost=0.00..195.00 rows=100 width=8) (actual time=0.036..3.011 rows=100 loops=1)
+   Filter: ((a = 1) AND (b = 1))
+   Rows Removed by Filter: 9900
+ Planning time: 0.188 ms
+ Execution time: 3.229 ms
+(5 rows)
+</programlisting>
+
+    The estimate is as accurate as with the functional dependencies, mostly
+    thanks to the table being a fairly small and having a simple distribution
+    with low number of distinct values. Before looking at the second query,
+    which was not handled by functional dependencies this well, let's inspect
+    the <acronym>MCV</> list a bit.
+   </para>
+
+   <para>
+    First, let's list statistics defined on a table using <command>\d</>
+    in <application>psql</>:
+
+<programlisting>
+\d t
+       Table "public.t"
+ Column |  Type   | Modifiers
+--------+---------+-----------
+ a      | integer |
+ b      | integer |
+Statistics:
+    "public.s2" (mcv) ON (a, b)
+</programlisting>
+
+   </para>
+
+   <para>
+    To inspect details of the <acronym>MCV</> statistics, we can look into the
+    <structname>pg_mv_stats</structname> view
+
+<programlisting>
+SELECT tablename, staname, attnums, mcvbytes, mcvinfo
+  FROM pg_mv_stats WHERE staname = 's2';
+ tablename | staname | attnums | mcvbytes |  mcvinfo
+-----------+---------+---------+----------+------------
+ t         | s2      | 1 2     |     2048 | nitems=100
+(1 row)
+</programlisting>
+
+    According to this, the statistics has 2kB when serialized into
+    a <literal>bytea</> value, and <command>ANALYZE</> found 100 distinct
+    combinations of values in the two columns.
+   </para>
+
+   <para>
+    Inspecting the contents of the MCV list is possible using
+    <function>pg_mv_mcv_items</> function.
+
+<programlisting>
+SELECT * FROM pg_mv_mcv_items((SELECT oid FROM pg_mv_statistic WHERE staname = 's2'));
+ index | values  | nulls | frequency
+-------+---------+-------+-----------
+     0 | {0,0}   | {f,f} |      0.01
+     1 | {1,1}   | {f,f} |      0.01
+     2 | {2,2}   | {f,f} |      0.01
+...
+    49 | {49,49} | {f,f} |      0.01
+    50 | {50,0}  | {f,f} |      0.01
+...
+    97 | {97,47} | {f,f} |      0.01
+    98 | {98,48} | {f,f} |      0.01
+    99 | {99,49} | {f,f} |      0.01
+(100 rows)
+</programlisting>
+
+    Which confirms there are 100 distinct combinations of values in the two
+    columns, and all of them are equally likely (1% frequency for each).
+    Had there been any null values in either of the columns, this would be
+    identified in the <structfield>nulls</> column.
+   </para>
+
+   <para>
+    When estimating the selectivity, the planner applies all the conditions
+    on items in the <acronym>MCV</> list, and them sums the frequencies
+    of the matching ones. See <function>clauselist_mv_selectivity_mcvlist</>
+    in <filename>clausesel.c</> for details.
+   </para>
+
+   <para>
+    Compared to functional dependencies, <acronym>MCV</> lists have two major
+    advantages. Firstly, the list stores actual values, making it possible to
+    detect "incompatible" combinations.
+
+<programlisting>
+EXPLAIN ANALYZE SELECT * FROM t WHERE a = 1 AND b = 10;
+                                         QUERY PLAN
+---------------------------------------------------------------------------------------------
+ Seq Scan on t  (cost=0.00..195.00 rows=1 width=8) (actual time=2.823..2.823 rows=0 loops=1)
+   Filter: ((a = 1) AND (b = 10))
+   Rows Removed by Filter: 10000
+ Planning time: 0.268 ms
+ Execution time: 2.866 ms
+(5 rows)
+</programlisting>
+
+    Secondly, <acronym>MCV</> also handle a wide range of clause types, not
+    just equality clauses like functional dependencies. See for example the
+    example range query, presented earlier:
+
+<programlisting>
+EXPLAIN ANALYZE SELECT * FROM t WHERE a <= 49 AND b > 49;
+                                         QUERY PLAN
+---------------------------------------------------------------------------------------------
+ Seq Scan on t  (cost=0.00..195.00 rows=1 width=8) (actual time=3.349..3.349 rows=0 loops=1)
+   Filter: ((a <= 49) AND (b > 49))
+   Rows Removed by Filter: 10000
+ Planning time: 0.163 ms
+ Execution time: 3.389 ms
+(5 rows)
+</programlisting>
+
+   </para>
+
+   <para>
+    For additional information about multivariate MCV lists, see
+    <filename>src/backend/utils/mvstats/README.mcv</>.
+   </para>
+
+  </sect2>
+
  </sect1>
 
 </chapter>
diff --git a/doc/src/sgml/ref/create_statistics.sgml b/doc/src/sgml/ref/create_statistics.sgml
index eaa39ee..e95d8d3 100644
--- a/doc/src/sgml/ref/create_statistics.sgml
+++ b/doc/src/sgml/ref/create_statistics.sgml
@@ -124,6 +124,15 @@ CREATE STATISTICS [ IF NOT EXISTS ] <replaceable class="PARAMETER">statistics_na
    </varlistentry>
 
    <varlistentry>
+    <term><literal>mcv</> (<type>boolean</>)</term>
+    <listitem>
+     <para>
+      Enables MCV list for the statistics.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
     <term><literal>ndistinct</> (<type>boolean</>)</term>
     <listitem>
      <para>
@@ -167,6 +176,31 @@ EXPLAIN ANALYZE SELECT * FROM t1 WHERE (a = 1) AND (b = 1);
 </programlisting>
   </para>
 
+  <para>
+   Create table <structname>t2</> with two perfectly correlated columns
+   (containing identical data), and a MCV list on those columns:
+
+<programlisting>
+CREATE TABLE t2 (
+    a   int,
+    b   int
+);
+
+INSERT INTO t2 SELECT mod(i,100), mod(i,100)
+                 FROM generate_series(1,1000000) s(i);
+
+CREATE STATISTICS s2 WITH (mcv) ON (a, b) FROM t2;
+
+ANALYZE t2;
+
+-- valid combination (found in MCV)
+EXPLAIN ANALYZE SELECT * FROM t2 WHERE (a = 1) AND (b = 1);
+
+-- invalid combination (not found in MCV)
+EXPLAIN ANALYZE SELECT * FROM t2 WHERE (a = 1) AND (b = 2);
+</programlisting>
+  </para>
+
  </refsect1>
 
  <refsect1>
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 216ece5..d4d9c24 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -188,7 +188,9 @@ CREATE VIEW pg_mv_stats AS
         S.staname AS staname,
         S.stakeys AS attnums,
         length(s.standist::bytea) AS ndistbytes,
-        length(S.stadeps::bytea) AS depsbytes
+        length(S.stadeps::bytea) AS depsbytes,
+        length(S.stamcv::bytea) AS mcvbytes,
+        pg_mv_stats_mcvlist_info(S.stamcv) AS mcvinfo
     FROM (pg_mv_statistic S JOIN pg_class C ON (C.oid = S.starelid))
         LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace);
 
diff --git a/src/backend/commands/statscmds.c b/src/backend/commands/statscmds.c
index af4f4d3..ef05745 100644
--- a/src/backend/commands/statscmds.c
+++ b/src/backend/commands/statscmds.c
@@ -70,7 +70,8 @@ CreateStatistics(CreateStatsStmt *stmt)
 
 	/* by default build nothing */
 	bool		build_ndistinct = false,
-				build_dependencies = false;
+				build_dependencies = false,
+				build_mcv = false;
 
 	Assert(IsA(stmt, CreateStatsStmt));
 
@@ -169,6 +170,8 @@ CreateStatistics(CreateStatsStmt *stmt)
 			build_ndistinct = defGetBoolean(opt);
 		else if (strcmp(opt->defname, "dependencies") == 0)
 			build_dependencies = defGetBoolean(opt);
+		else if (strcmp(opt->defname, "mcv") == 0)
+			build_mcv = defGetBoolean(opt);
 		else
 			ereport(ERROR,
 					(errcode(ERRCODE_SYNTAX_ERROR),
@@ -177,10 +180,10 @@ CreateStatistics(CreateStatsStmt *stmt)
 	}
 
 	/* Make sure there's at least one statistics type specified. */
-	if (! (build_ndistinct || build_dependencies))
+	if (!(build_ndistinct || build_dependencies || build_mcv))
 		ereport(ERROR,
 				(errcode(ERRCODE_SYNTAX_ERROR),
-				 errmsg("no statistics type (ndistinct, dependencies) requested")));
+				 errmsg("no statistics type (ndistinct, dependencies, mcv) requested")));
 
 	stakeys = buildint2vector(attnums, numcols);
 
@@ -203,9 +206,11 @@ CreateStatistics(CreateStatsStmt *stmt)
 	/* enabled statistics */
 	values[Anum_pg_mv_statistic_ndist_enabled - 1] = BoolGetDatum(build_ndistinct);
 	values[Anum_pg_mv_statistic_deps_enabled - 1] = BoolGetDatum(build_dependencies);
+	values[Anum_pg_mv_statistic_mcv_enabled - 1] = BoolGetDatum(build_mcv);
 
 	nulls[Anum_pg_mv_statistic_standist - 1] = true;
 	nulls[Anum_pg_mv_statistic_stadeps - 1] = true;
+	nulls[Anum_pg_mv_statistic_stamcv - 1] = true;
 
 	/* insert the tuple into pg_mv_statistic */
 	mvstatrel = heap_open(MvStatisticRelationId, RowExclusiveLock);
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index c72473b..a9cc9ad 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -2203,10 +2203,12 @@ _outMVStatisticInfo(StringInfo str, const MVStatisticInfo *node)
 	/* enabled statistics */
 	WRITE_BOOL_FIELD(ndist_enabled);
 	WRITE_BOOL_FIELD(deps_enabled);
+	WRITE_BOOL_FIELD(mcv_enabled);
 
 	/* built/available statistics */
 	WRITE_BOOL_FIELD(ndist_built);
 	WRITE_BOOL_FIELD(deps_built);
+	WRITE_BOOL_FIELD(mcv_built);
 }
 
 static void
diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c
index cc79282..abdbc5b 100644
--- a/src/backend/optimizer/path/clausesel.c
+++ b/src/backend/optimizer/path/clausesel.c
@@ -15,6 +15,7 @@
 #include "postgres.h"
 
 #include "access/sysattr.h"
+#include "catalog/pg_collation.h"
 #include "catalog/pg_operator.h"
 #include "nodes/makefuncs.h"
 #include "optimizer/clauses.h"
@@ -47,12 +48,14 @@ static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
 			   bool varonleft, bool isLTsel, Selectivity s2);
 
 #define		STATS_TYPE_FDEPS	0x01
+#define		STATS_TYPE_MCV		0x02
 
-static bool clause_is_mv_compatible(Node *clause, Index relid, AttrNumber *attnum);
+static bool clause_is_mv_compatible(Node *clause, Index relid, Bitmapset **attnums,
+						int type);
 
-static Bitmapset *collect_mv_attnums(List *clauses, Index relid);
+static Bitmapset *collect_mv_attnums(List *clauses, Index relid, int type);
 
-static int	count_mv_attnums(List *clauses, Index relid);
+static int	count_mv_attnums(List *clauses, Index relid, int type);
 
 static int	count_varnos(List *clauses, Index *relid);
 
@@ -63,10 +66,23 @@ static List *clauselist_mv_split(PlannerInfo *root, Index relid,
 					List *clauses, List **mvclauses,
 					MVStatisticInfo *mvstats, int types);
 
+static Selectivity clauselist_mv_selectivity(PlannerInfo *root,
+						  List *clauses, MVStatisticInfo *mvstats);
+
 static Selectivity clauselist_mv_selectivity_deps(PlannerInfo *root,
 						Index relid, List *clauses, MVStatisticInfo *mvstats,
 						Index varRelid, JoinType jointype, SpecialJoinInfo *sjinfo);
 
+static Selectivity clauselist_mv_selectivity_mcvlist(PlannerInfo *root,
+								  List *clauses, MVStatisticInfo *mvstats,
+								  bool *fullmatch, Selectivity *lowsel);
+
+static int update_match_bitmap_mcvlist(PlannerInfo *root, List *clauses,
+							int2vector *stakeys, MCVList mcvlist,
+							int nmatches, char *matches,
+							Selectivity *lowsel, bool *fullmatch,
+							bool is_or);
+
 static bool has_stats(List *stats, int type);
 
 static List *find_stats(PlannerInfo *root, Index relid);
@@ -74,6 +90,9 @@ static List *find_stats(PlannerInfo *root, Index relid);
 static bool stats_type_matches(MVStatisticInfo *stat, int type);
 
 
+#define UPDATE_RESULT(m,r,isor) \
+	(m) = (isor) ? (Max(m,r)) : (Min(m,r))
+
 /****************************************************************************
  *		ROUTINES TO COMPUTE SELECTIVITIES
  ****************************************************************************/
@@ -99,11 +118,13 @@ static bool stats_type_matches(MVStatisticInfo *stat, int type);
  * to verify that suitable multivariate statistics exist.
  *
  * If we identify such multivariate statistics apply, we try to apply them.
- * Currently we only have (soft) functional dependencies, so we try to reduce
- * the list of clauses.
  *
- * Then we remove the clauses estimated using multivariate stats, and process
- * the rest of the clauses using the regular per-column stats.
+ * First we try to reduce the list of clauses by applying (soft) functional
+ * dependencies, and then we try to estimate the selectivity of the reduced
+ * list of clauses using the multivariate MCV list.
+ *
+ * Finally we remove the portion of clauses estimated using multivariate stats,
+ * and process the rest of the clauses using the regular per-column stats.
  *
  * Currently, the only extra smarts we have is to recognize "range queries",
  * such as "x > 34 AND x < 42".  Clauses are recognized as possible range
@@ -173,7 +194,10 @@ clauselist_selectivity(PlannerInfo *root,
 
 	/*
 	 * Check that there are multivariate statistics usable for selectivity
-	 * estimation, i.e. anything except ndistinct coefficients.
+	 * estimation. We try to apply MCV lists first, because statistics
+	 * tracking actual values tend to provide more reliable estimates than
+	 * functional dependencies (which assume that the clauses are consistent
+	 * with the statistics).
 	 *
 	 * Also check the number of attributes in clauses that might be estimated
 	 * using those statistics, and that there are at least two such attributes.
@@ -184,14 +208,43 @@ clauselist_selectivity(PlannerInfo *root,
 	 * If there are no such stats or not enough attributes, don't waste time
 	 * simply skip to estimation using the plain per-column stats.
 	 */
+	if (has_stats(stats, STATS_TYPE_MCV) &&
+		(count_mv_attnums(clauses, relid, STATS_TYPE_MCV) >= 2))
+	{
+		/* collect attributes from the compatible conditions */
+		Bitmapset  *mvattnums = collect_mv_attnums(clauses, relid,
+												   STATS_TYPE_MCV);
+
+		/* and search for the statistic covering the most attributes */
+		MVStatisticInfo *mvstat = choose_mv_statistics(stats, mvattnums,
+													   STATS_TYPE_MCV);
+
+		if (mvstat != NULL)		/* we have a matching stats */
+		{
+			/* clauses compatible with multi-variate stats */
+			List	   *mvclauses = NIL;
+
+			/* split the clauselist into regular and mv-clauses */
+			clauses = clauselist_mv_split(root, relid, clauses, &mvclauses,
+										  mvstat, STATS_TYPE_MCV);
+
+			/* we've chosen the histogram to match the clauses */
+			Assert(mvclauses != NIL);
+
+			/* compute the multivariate stats */
+			s1 *= clauselist_mv_selectivity(root, mvclauses, mvstat);
+		}
+	}
+
+	/* Now try to apply functional dependencies on the remaining clauses. */
 	if (has_stats(stats, STATS_TYPE_FDEPS) &&
-		(count_mv_attnums(clauses, relid) >= 2))
+		(count_mv_attnums(clauses, relid, STATS_TYPE_FDEPS) >= 2))
 	{
 		MVStatisticInfo *mvstat;
 		Bitmapset  *mvattnums;
 
 		/* collect attributes from the compatible conditions */
-		mvattnums = collect_mv_attnums(clauses, relid);
+		mvattnums = collect_mv_attnums(clauses, relid, STATS_TYPE_FDEPS);
 
 		/* and search for the statistic covering the most attributes */
 		mvstat = choose_mv_statistics(stats, mvattnums, STATS_TYPE_FDEPS);
@@ -994,7 +1047,7 @@ clauselist_mv_selectivity_deps(PlannerInfo *root, Index relid,
 		/* clauses remaining after removing those on the "implied" attribute */
 		List		   *clauses_filtered = NIL;
 
-		attnums = collect_mv_attnums(clauses, relid);
+		attnums = collect_mv_attnums(clauses, relid, STATS_TYPE_FDEPS);
 
 		/* no point in looking for dependencies with fewer than 2 attributes */
 		if (bms_num_members(attnums) < 2)
@@ -1017,7 +1070,7 @@ clauselist_mv_selectivity_deps(PlannerInfo *root, Index relid,
 		 */
 		foreach(lc, clauses)
 		{
-			AttrNumber	attnum_clause = InvalidAttrNumber;
+			Bitmapset  *attnums_clause = NULL;
 			Node	   *clause = (Node *) lfirst(lc);
 
 			/*
@@ -1026,17 +1079,20 @@ clauselist_mv_selectivity_deps(PlannerInfo *root, Index relid,
 			 * we should only see equality clauses compatible with functional
 			 * dependencies, so just error out if we stumble upon something else.
 			 */
-			if (! clause_is_mv_compatible(clause, relid, &attnum_clause))
+			if (! clause_is_mv_compatible(clause, relid, &attnums_clause,
+										  STATS_TYPE_FDEPS))
 				elog(ERROR, "clause not compatible with functional dependencies");
 
-			Assert(AttributeNumberIsValid(attnum_clause));
+			/* we also expect only simple equality clauses */
+			Assert(bms_num_members(attnums_clause) == 1);
 
 			/*
 			 * If the clause is not on the implied attribute, add it to the list
 			 * of filtered clauses (for the next round) and continue with the
 			 * next one.
 			 */
-			if (! dependency_implies_attribute(dependency, attnum_clause,
+			if (! dependency_implies_attribute(dependency,
+											   bms_singleton_member(attnums_clause),
 											   mvstats->stakeys->values))
 			{
 				clauses_filtered = lappend(clauses_filtered, clause);
@@ -1080,10 +1136,71 @@ clauselist_mv_selectivity_deps(PlannerInfo *root, Index relid,
 }
 
 /*
+ * estimate selectivity of clauses using multivariate statistic
+ *
+ * Perform estimation of the clauses using a MCV list.
+ *
+ * This assumes all the clauses are compatible with the selected statistics
+ * (e.g. only reference columns covered by the statistics, use supported
+ * operator, etc.).
+ *
+ * TODO: We may support some additional conditions, most importantly those
+ * matching multiple columns (e.g. "a = b" or "a < b").
+ *
+ * TODO: Clamp the selectivity by min of the per-clause selectivities (i.e. the
+ * selectivity of the most restrictive clause), because that's the maximum
+ * we can ever get from ANDed list of clauses. This may probably prevent
+ * issues with hitting too many buckets and low precision histograms.
+ *
+ * TODO: We may remember the lowest frequency in the MCV list, and then later
+ * use it as a upper boundary for the selectivity (had there been a more
+ * frequent item, it'd be in the MCV list). This might improve cases with
+ * low-detail histograms.
+ *
+ * TODO: We may also derive some additional boundaries for the selectivity from
+ * the MCV list, because
+ *
+ * (a) if we have a "full equality condition" (one equality condition on
+ * each column of the statistic) and we found a match in the MCV list,
+ * then this is the final selectivity (and pretty accurate),
+ *
+ * (b) if we have a "full equality condition" and we haven't found a match
+ * in the MCV list, then the selectivity is below the lowest frequency
+ * found in the MCV list,
+ *
+ * TODO: When applying the clauses to the histogram/MCV list, we can do that
+ * from the most selective clauses first, because that'll eliminate the
+ * buckets/items sooner (so we'll be able to skip them without inspection,
+ * which is more expensive). But this requires really knowing the per-clause
+ * selectivities in advance, and that's not what we do now.
+ */
+static Selectivity
+clauselist_mv_selectivity(PlannerInfo *root, List *clauses, MVStatisticInfo *mvstats)
+{
+	bool		fullmatch = false;
+
+	/*
+	 * Lowest frequency in the MCV list (may be used as an upper bound for
+	 * full equality conditions that did not match any MCV item).
+	 */
+	Selectivity mcv_low = 0.0;
+
+	/*
+	 * TODO: Evaluate simple 1D selectivities, use the smallest one as an
+	 * upper bound, product as lower bound, and sort the clauses in ascending
+	 * order by selectivity (to optimize the MCV/histogram evaluation).
+	 */
+
+	/* Evaluate the MCV selectivity */
+	return clauselist_mv_selectivity_mcvlist(root, clauses, mvstats,
+											 &fullmatch, &mcv_low);
+}
+
+/*
  * Collect attributes from mv-compatible clauses.
  */
 static Bitmapset *
-collect_mv_attnums(List *clauses, Index relid)
+collect_mv_attnums(List *clauses, Index relid, int types)
 {
 	Bitmapset  *attnums = NULL;
 	ListCell   *l;
@@ -1099,12 +1216,10 @@ collect_mv_attnums(List *clauses, Index relid)
 	 */
 	foreach(l, clauses)
 	{
-		AttrNumber	attnum;
 		Node	   *clause = (Node *) lfirst(l);
 
-		/* ignore the result for now - we only need the info */
-		if (clause_is_mv_compatible(clause, relid, &attnum))
-			attnums = bms_add_member(attnums, attnum);
+		/* ignore the result here - we only need the attnums */
+		clause_is_mv_compatible(clause, relid, &attnums, types);
 	}
 
 	/*
@@ -1125,10 +1240,10 @@ collect_mv_attnums(List *clauses, Index relid)
  * Count the number of attributes in clauses compatible with multivariate stats.
  */
 static int
-count_mv_attnums(List *clauses, Index relid)
+count_mv_attnums(List *clauses, Index relid, int type)
 {
 	int			c;
-	Bitmapset  *attnums = collect_mv_attnums(clauses, relid);
+	Bitmapset  *attnums = collect_mv_attnums(clauses, relid, type);
 
 	c = bms_num_members(attnums);
 
@@ -1263,7 +1378,8 @@ choose_mv_statistics(List *stats, Bitmapset *attnums, int types)
 		int			numattrs = info->stakeys->dim1;
 
 		/* skip statistics not matching any of the requested types */
-		if (! (info->deps_built && (STATS_TYPE_FDEPS & types)))
+		if (! ((info->deps_built && (STATS_TYPE_FDEPS & types)) ||
+			   (info->mcv_built && (STATS_TYPE_MCV & types))))
 			continue;
 
 		/* count columns covered by the statistics */
@@ -1317,13 +1433,13 @@ clauselist_mv_split(PlannerInfo *root, Index relid,
 	foreach(l, clauses)
 	{
 		bool		match = false;		/* by default not mv-compatible */
-		AttrNumber	attnum = InvalidAttrNumber;
+		Bitmapset  *attnums = NULL;
 		Node	   *clause = (Node *) lfirst(l);
 
-		if (clause_is_mv_compatible(clause, relid, &attnum))
+		if (clause_is_mv_compatible(clause, relid, &attnums, types))
 		{
 			/* are all the attributes part of the selected stats? */
-			if (bms_is_member(attnum, mvattnums))
+			if (bms_is_subset(attnums, mvattnums))
 				match = true;
 		}
 
@@ -1348,6 +1464,7 @@ clauselist_mv_split(PlannerInfo *root, Index relid,
 
 typedef struct
 {
+	int			types;			/* types of statistics ? */
 	Index		varno;			/* relid we're interested in */
 	Bitmapset  *varattnos;		/* attnums referenced by the clauses */
 } mv_compatible_context;
@@ -1382,6 +1499,49 @@ mv_compatible_walker(Node *node, mv_compatible_context *context)
 		return mv_compatible_walker((Node *) rinfo->clause, (void *) context);
 	}
 
+	if (or_clause(node) || and_clause(node) || not_clause(node))
+	{
+		/*
+		 * AND/OR/NOT-clauses are supported if all sub-clauses are supported
+		 *
+		 * TODO: We might support mixed case, where some of the clauses are
+		 * supported and some are not, and treat all supported subclauses as a
+		 * single clause, compute it's selectivity using mv stats, and compute
+		 * the total selectivity using the current algorithm.
+		 *
+		 * TODO: For RestrictInfo above an OR-clause, we might use the
+		 * orclause with nested RestrictInfo - we won't have to call
+		 * pull_varnos() for each clause, saving time.
+		 *
+		 * TODO: Perhaps this needs a bit more thought for functional
+		 * dependencies? Those don't quite work for NOT cases.
+		 */
+		BoolExpr   *expr = (BoolExpr *) node;
+		ListCell   *lc;
+
+		foreach(lc, expr->args)
+		{
+			if (mv_compatible_walker((Node *) lfirst(lc), context))
+				return true;
+		}
+
+		return false;
+	}
+
+	if (IsA(node, NullTest))
+	{
+		NullTest   *nt = (NullTest *) node;
+
+		/*
+		 * Only simple (Var IS NULL) expressions supported for now. Maybe we
+		 * could use examine_variable to fix this?
+		 */
+		if (!IsA(nt->arg, Var))
+			return true;
+
+		return mv_compatible_walker((Node *) (nt->arg), context);
+	}
+
 	if (IsA(node, Var))
 	{
 		Var		   *var = (Var *) node;
@@ -1442,10 +1602,18 @@ mv_compatible_walker(Node *node, mv_compatible_context *context)
 		switch (get_oprrest(expr->opno))
 		{
 			case F_EQSEL:
-
 				/* equality conditions are compatible with all statistics */
 				break;
 
+			case F_SCALARLTSEL:
+			case F_SCALARGTSEL:
+
+				/* not compatible with functional dependencies */
+				if (!(context->types & STATS_TYPE_MCV))
+					return true;	/* terminate */
+
+				break;
+
 			default:
 
 				/* unknown estimator */
@@ -1479,10 +1647,11 @@ mv_compatible_walker(Node *node, mv_compatible_context *context)
  * evaluate them using multivariate stats.
  */
 static bool
-clause_is_mv_compatible(Node *clause, Index relid, AttrNumber *attnum)
+clause_is_mv_compatible(Node *clause, Index relid, Bitmapset **attnums, int types)
 {
 	mv_compatible_context context;
 
+	context.types = types;
 	context.varno = relid;
 	context.varattnos = NULL;	/* no attnums */
 
@@ -1490,7 +1659,7 @@ clause_is_mv_compatible(Node *clause, Index relid, AttrNumber *attnum)
 		return false;
 
 	/* remember the newly collected attnums */
-	*attnum = bms_singleton_member(context.varattnos);
+	*attnums = bms_add_members(*attnums, context.varattnos);
 
 	return true;
 }
@@ -1505,6 +1674,9 @@ stats_type_matches(MVStatisticInfo *stat, int type)
 	if ((type & STATS_TYPE_FDEPS) && stat->deps_built)
 		return true;
 
+	if ((type & STATS_TYPE_MCV) && stat->mcv_built)
+		return true;
+
 	return false;
 }
 
@@ -1538,3 +1710,409 @@ find_stats(PlannerInfo *root, Index relid)
 
 	return root->simple_rel_array[relid]->mvstatlist;
 }
+
+/*
+ * Estimate selectivity of clauses using a MCV list.
+ *
+ * If there's no MCV list for the stats, the function returns 0.0.
+ *
+ * While computing the estimate, the function checks whether all the
+ * columns were matched with an equality condition. If that's the case,
+ * we can skip processing the histogram, as there can be no rows in
+ * it with the same values - all the rows matching the condition are
+ * represented by the MCV item. This can only happen with equality
+ * on all the attributes.
+ *
+ * The algorithm works like this:
+ *
+ *	 1) mark all items as 'match'
+ *	 2) walk through all the clauses
+ *	 3) for a particular clause, walk through all the items
+ *	 4) skip items that are already 'no match'
+ *	 5) check clause for items that still match
+ *	 6) sum frequencies for items to get selectivity
+ *
+ * The function also returns the frequency of the least frequent item
+ * on the MCV list, which may be useful for clamping estimate from the
+ * histogram (all items not present in the MCV list are less frequent).
+ * This however seems useful only for cases with conditions on all
+ * attributes.
+ *
+ * TODO: This only handles AND-ed clauses, but it might work for OR-ed
+ * lists too - it just needs to reverse the logic a bit. I.e. start
+ * with 'no match' for all items, and mark the items as a match
+ * as the clauses are processed (and skip items that are 'match').
+ */
+static Selectivity
+clauselist_mv_selectivity_mcvlist(PlannerInfo *root, List *clauses,
+								  MVStatisticInfo *mvstats, bool *fullmatch,
+								  Selectivity *lowsel)
+{
+	int			i;
+	Selectivity s = 0.0;
+	Selectivity u = 0.0;
+
+	MCVList		mcvlist = NULL;
+	int			nmatches = 0;
+
+	/* match/mismatch bitmap for each MCV item */
+	char	   *matches = NULL;
+
+	Assert(clauses != NIL);
+	Assert(list_length(clauses) >= 2);
+
+	/* there's no MCV list built yet */
+	if (!mvstats->mcv_built)
+		return 0.0;
+
+	mcvlist = load_mv_mcvlist(mvstats->mvoid);
+
+	Assert(mcvlist != NULL);
+	Assert(mcvlist->nitems > 0);
+
+	/* by default all the MCV items match the clauses fully */
+	matches = palloc0(sizeof(char) * mcvlist->nitems);
+	memset(matches, MVSTATS_MATCH_FULL, sizeof(char) * mcvlist->nitems);
+
+	/* number of matching MCV items */
+	nmatches = mcvlist->nitems;
+
+	nmatches = update_match_bitmap_mcvlist(root, clauses,
+										   mvstats->stakeys, mcvlist,
+										   nmatches, matches,
+										   lowsel, fullmatch, false);
+
+	/* sum frequencies for all the matching MCV items */
+	for (i = 0; i < mcvlist->nitems; i++)
+	{
+		/* used to 'scale' for MCV lists not covering all tuples */
+		u += mcvlist->items[i]->frequency;
+
+		if (matches[i] != MVSTATS_MATCH_NONE)
+			s += mcvlist->items[i]->frequency;
+	}
+
+	pfree(matches);
+	pfree(mcvlist);
+
+	return s * u;
+}
+
+/*
+ * Evaluate clauses using the MCV list, and update the match bitmap.
+ *
+ * The bitmap may be already partially set, so this is really a way to
+ * combine results of several clause lists - either when computing
+ * conditional probability P(A|B) or a combination of AND/OR clauses.
+ *
+ * TODO: This works with 'bitmap' where each bit is represented as a char,
+ * which is slightly wasteful. Instead, we could use a regular
+ * bitmap, reducing the size to ~1/8. Another thing is merging the
+ * bitmaps using & and |, which might be faster than min/max.
+ */
+static int
+update_match_bitmap_mcvlist(PlannerInfo *root, List *clauses,
+							int2vector *stakeys, MCVList mcvlist,
+							int nmatches, char *matches,
+							Selectivity *lowsel, bool *fullmatch,
+							bool is_or)
+{
+	int			i;
+	ListCell   *l;
+
+	Bitmapset  *eqmatches = NULL;		/* attributes with equality matches */
+
+	/* The bitmap may be partially built. */
+	Assert(nmatches >= 0);
+	Assert(nmatches <= mcvlist->nitems);
+	Assert(clauses != NIL);
+	Assert(list_length(clauses) >= 1);
+	Assert(mcvlist != NULL);
+	Assert(mcvlist->nitems > 0);
+
+	/* No possible matches (only works for AND-ded clauses) */
+	if (((nmatches == 0) && (!is_or)) ||
+		((nmatches == mcvlist->nitems) && is_or))
+		return nmatches;
+
+	/*
+	 * find the lowest frequency in the MCV list
+	 *
+	 * We need to do that here, because we do various tricks in the following
+	 * code - skipping items already ruled out, etc.
+	 *
+	 * XXX A loop is necessary because the MCV list is not sorted by
+	 * frequency.
+	 */
+	*lowsel = 1.0;
+	for (i = 0; i < mcvlist->nitems; i++)
+	{
+		MCVItem		item = mcvlist->items[i];
+
+		if (item->frequency < *lowsel)
+			*lowsel = item->frequency;
+	}
+
+	/*
+	 * Loop through the list of clauses, and for each of them evaluate all the
+	 * MCV items not yet eliminated by the preceding clauses.
+	 */
+	foreach(l, clauses)
+	{
+		Node	   *clause = (Node *) lfirst(l);
+
+		/* if it's a RestrictInfo, then extract the clause */
+		if (IsA(clause, RestrictInfo))
+			clause = (Node *) ((RestrictInfo *) clause)->clause;
+
+		/* if there are no remaining matches possible, we can stop */
+		if (((nmatches == 0) && (!is_or)) ||
+			((nmatches == mcvlist->nitems) && is_or))
+			break;
+
+		/* it's either OpClause, or NullTest */
+		if (is_opclause(clause))
+		{
+			OpExpr	   *expr = (OpExpr *) clause;
+			bool		varonleft = true;
+			bool		ok;
+			FmgrInfo	opproc;
+
+			/* get procedure computing operator selectivity */
+			RegProcedure oprrest = get_oprrest(expr->opno);
+
+			fmgr_info(get_opcode(expr->opno), &opproc);
+
+			ok = (NumRelids(clause) == 1) &&
+				(is_pseudo_constant_clause(lsecond(expr->args)) ||
+				 (varonleft = false,
+				  is_pseudo_constant_clause(linitial(expr->args))));
+
+			if (ok)
+			{
+
+				FmgrInfo	gtproc;
+				Var		   *var = (varonleft) ? linitial(expr->args) : lsecond(expr->args);
+				Const	   *cst = (varonleft) ? lsecond(expr->args) : linitial(expr->args);
+				bool		isgt = (!varonleft);
+
+				TypeCacheEntry *typecache
+				= lookup_type_cache(var->vartype, TYPECACHE_GT_OPR);
+
+				/* FIXME proper matching attribute to dimension */
+				int			idx = mv_get_index(var->varattno, stakeys);
+
+				fmgr_info(get_opcode(typecache->gt_opr), &gtproc);
+
+				/*
+				 * Walk through the MCV items and evaluate the current clause.
+				 * We can skip items that were already ruled out, and
+				 * terminate if there are no remaining MCV items that might
+				 * possibly match.
+				 */
+				for (i = 0; i < mcvlist->nitems; i++)
+				{
+					bool		mismatch = false;
+					MCVItem		item = mcvlist->items[i];
+
+					/*
+					 * If there are no more matches (AND) or no remaining
+					 * unmatched items (OR), we can stop processing this
+					 * clause.
+					 */
+					if (((nmatches == 0) && (!is_or)) ||
+						((nmatches == mcvlist->nitems) && is_or))
+						break;
+
+					/*
+					 * For AND-lists, we can also mark NULL items as 'no
+					 * match' (and then skip them). For OR-lists this is not
+					 * possible.
+					 */
+					if ((!is_or) && item->isnull[idx])
+						matches[i] = MVSTATS_MATCH_NONE;
+
+					/* skip MCV items that were already ruled out */
+					if ((!is_or) && (matches[i] == MVSTATS_MATCH_NONE))
+						continue;
+					else if (is_or && (matches[i] == MVSTATS_MATCH_FULL))
+						continue;
+
+					switch (oprrest)
+					{
+						case F_EQSEL:
+
+							/*
+							 * We don't care about isgt in equality, because
+							 * it does not matter whether it's (var = const)
+							 * or (const = var).
+							 */
+							mismatch = !DatumGetBool(FunctionCall2Coll(&opproc,
+													   DEFAULT_COLLATION_OID,
+															 cst->constvalue,
+														 item->values[idx]));
+
+							if (!mismatch)
+								eqmatches = bms_add_member(eqmatches, idx);
+
+							break;
+
+						case F_SCALARLTSEL:		/* column < constant */
+						case F_SCALARGTSEL:		/* column > constant */
+
+							/*
+							 * First check whether the constant is below the
+							 * lower boundary (in that case we can skip the
+							 * bucket, because there's no overlap).
+							 */
+							if (isgt)
+								mismatch = !DatumGetBool(FunctionCall2Coll(&opproc,
+														   DEFAULT_COLLATION_OID,
+															 cst->constvalue,
+															item->values[idx]));
+							else
+								mismatch = !DatumGetBool(FunctionCall2Coll(&opproc,
+														   DEFAULT_COLLATION_OID,
+															 item->values[idx],
+															  cst->constvalue));
+
+							break;
+					}
+
+					/*
+					 * XXX The conditions on matches[i] are not needed, as we
+					 * skip MCV items that can't become true/false, depending
+					 * on the current flag. See beginning of the loop over MCV
+					 * items.
+					 */
+
+					if ((is_or) && (matches[i] == MVSTATS_MATCH_NONE) && (!mismatch))
+					{
+						/* OR - was MATCH_NONE, but will be MATCH_FULL */
+						matches[i] = MVSTATS_MATCH_FULL;
+						++nmatches;
+						continue;
+					}
+					else if ((!is_or) && (matches[i] == MVSTATS_MATCH_FULL) && mismatch)
+					{
+						/* AND - was MATC_FULL, but will be MATCH_NONE */
+						matches[i] = MVSTATS_MATCH_NONE;
+						--nmatches;
+						continue;
+					}
+
+				}
+			}
+		}
+		else if (IsA(clause, NullTest))
+		{
+			NullTest   *expr = (NullTest *) clause;
+			Var		   *var = (Var *) (expr->arg);
+
+			/* FIXME proper matching attribute to dimension */
+			int			idx = mv_get_index(var->varattno, stakeys);
+
+			/*
+			 * Walk through the MCV items and evaluate the current clause. We
+			 * can skip items that were already ruled out, and terminate if
+			 * there are no remaining MCV items that might possibly match.
+			 */
+			for (i = 0; i < mcvlist->nitems; i++)
+			{
+				MCVItem		item = mcvlist->items[i];
+
+				/*
+				 * if there are no more matches, we can stop processing this
+				 * clause
+				 */
+				if (nmatches == 0)
+					break;
+
+				/* skip MCV items that were already ruled out */
+				if (matches[i] == MVSTATS_MATCH_NONE)
+					continue;
+
+				/* if the clause mismatches the MCV item, set it as MATCH_NONE */
+				if (((expr->nulltesttype == IS_NULL) && (!item->isnull[idx])) ||
+				((expr->nulltesttype == IS_NOT_NULL) && (item->isnull[idx])))
+				{
+					matches[i] = MVSTATS_MATCH_NONE;
+					--nmatches;
+				}
+			}
+		}
+		else if (or_clause(clause) || and_clause(clause))
+		{
+			/*
+			 * AND/OR clause, with all clauses compatible with the selected MV
+			 * stat
+			 */
+
+			int			i;
+			BoolExpr   *orclause = ((BoolExpr *) clause);
+			List	   *orclauses = orclause->args;
+
+			/* match/mismatch bitmap for each MCV item */
+			int			or_nmatches = 0;
+			char	   *or_matches = NULL;
+
+			Assert(orclauses != NIL);
+			Assert(list_length(orclauses) >= 2);
+
+			/* number of matching MCV items */
+			or_nmatches = mcvlist->nitems;
+
+			/* by default none of the MCV items matches the clauses */
+			or_matches = palloc0(sizeof(char) * or_nmatches);
+
+			if (or_clause(clause))
+			{
+				/* OR clauses assume nothing matches, initially */
+				memset(or_matches, MVSTATS_MATCH_NONE, sizeof(char) * or_nmatches);
+				or_nmatches = 0;
+			}
+			else
+			{
+				/* AND clauses assume nothing matches, initially */
+				memset(or_matches, MVSTATS_MATCH_FULL, sizeof(char) * or_nmatches);
+			}
+
+			/* build the match bitmap for the OR-clauses */
+			or_nmatches = update_match_bitmap_mcvlist(root, orclauses,
+													  stakeys, mcvlist,
+													  or_nmatches, or_matches,
+									   lowsel, fullmatch, or_clause(clause));
+
+			/* merge the bitmap into the existing one */
+			for (i = 0; i < mcvlist->nitems; i++)
+			{
+				/*
+				 * Merge the result into the bitmap (Min for AND, Max for OR).
+				 *
+				 * FIXME this does not decrease the number of matches
+				 */
+				UPDATE_RESULT(matches[i], or_matches[i], is_or);
+			}
+
+			pfree(or_matches);
+
+		}
+		else
+		{
+			elog(ERROR, "unknown clause type: %d", clause->type);
+		}
+	}
+
+	/*
+	 * If all the columns were matched by equality, it's a full match. In this
+	 * case there can be just a single MCV item, matching the clause (if there
+	 * were two, both would match the other one).
+	 */
+	*fullmatch = (bms_num_members(eqmatches) == mcvlist->ndimensions);
+
+	/* free the allocated pieces */
+	if (eqmatches)
+		pfree(eqmatches);
+
+	return nmatches;
+}
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 8129143..9dd4e83 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -1287,7 +1287,7 @@ get_relation_statistics(RelOptInfo *rel, Relation relation)
 		mvstat = (Form_pg_mv_statistic) GETSTRUCT(htup);
 
 		/* unavailable stats are not interesting for the planner */
-		if (mvstat->deps_built || mvstat->ndist_built)
+		if (mvstat->deps_built || mvstat->ndist_built || mvstat->mcv_built)
 		{
 			info = makeNode(MVStatisticInfo);
 
@@ -1297,10 +1297,12 @@ get_relation_statistics(RelOptInfo *rel, Relation relation)
 			/* enabled statistics */
 			info->ndist_enabled = mvstat->ndist_enabled;
 			info->deps_enabled = mvstat->deps_enabled;
+			info->mcv_enabled = mvstat->mcv_enabled;
 
 			/* built/available statistics */
 			info->ndist_built = mvstat->ndist_built;
 			info->deps_built = mvstat->deps_built;
+			info->mcv_built = mvstat->mcv_built;
 
 			/* stakeys */
 			adatum = SysCacheGetAttr(MVSTATOID, htup,
diff --git a/src/backend/utils/mvstats/Makefile b/src/backend/utils/mvstats/Makefile
index 21fe7e5..d5d47ba 100644
--- a/src/backend/utils/mvstats/Makefile
+++ b/src/backend/utils/mvstats/Makefile
@@ -12,6 +12,6 @@ subdir = src/backend/utils/mvstats
 top_builddir = ../../../..
 include $(top_builddir)/src/Makefile.global
 
-OBJS = common.o dependencies.o mvdist.o
+OBJS = common.o dependencies.o mcv.o mvdist.o
 
 include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/utils/mvstats/README.mcv b/src/backend/utils/mvstats/README.mcv
new file mode 100644
index 0000000..e93cfe4
--- /dev/null
+++ b/src/backend/utils/mvstats/README.mcv
@@ -0,0 +1,137 @@
+MCV lists
+=========
+
+Multivariate MCV (most-common values) lists are a straightforward extension of
+regular MCV list, tracking most frequent combinations of values for a group of
+attributes.
+
+This works particularly well for columns with a small number of distinct values,
+as the list may include all the combinations and approximate the distribution
+very accurately.
+
+For columns with large number of distinct values (e.g. those with continuous
+domains), the list will only track the most frequent combinations. If the
+distribution is mostly uniform (all combinations about equally frequent), the
+MCV list will be empty.
+
+Estimates of some clauses (e.g. equality) based on MCV lists are more accurate
+than when using histograms.
+
+Also, MCV lists don't necessarily require sorting of the values (the fact that
+we use sorting when building them is implementation detail), but even more
+importantly the ordering is not built into the approximation (while histograms
+are built on ordering). So MCV lists work well even for attributes where the
+ordering of the data type is disconnected from the meaning of the data. For
+example we know how to sort strings, but it's unlikely to make much sense for
+city names (or other label-like attributes).
+
+
+Selectivity estimation
+----------------------
+
+The estimation, implemented in clauselist_mv_selectivity_mcvlist(), is quite
+simple in principle - we need to identify MCV items matching all the clauses
+and sum frequencies of all those items.
+
+Currently MCV lists support estimation of the following clause types:
+
+    (a) equality clauses    WHERE (a = 1) AND (b = 2)
+    (b) inequality clauses  WHERE (a < 1) AND (b >= 2)
+    (c) NULL clauses        WHERE (a IS NULL) AND (b IS NOT NULL)
+    (d) OR clauses          WHERE (a < 1) OR (b >= 2)
+
+It's possible to add support for additional clauses, for example:
+
+    (e) multi-var clauses   WHERE (a > b)
+
+and possibly others. These are tasks for the future, not yet implemented.
+
+
+Estimating equality clauses
+---------------------------
+
+When computing selectivity estimate for equality clauses
+
+    (a = 1) AND (b = 2)
+
+we can do this estimate pretty exactly assuming that two conditions are met:
+
+    (1) there's an equality condition on all attributes of the statistic
+
+    (2) we find a matching item in the MCV list
+
+In this case we know the MCV item represents all tuples matching the clauses,
+and the selectivity estimate is complete (i.e. we don't need to perform
+estimation using the histogram). This is what we call 'full match'.
+
+When only (1) holds, but there's no matching MCV item, we don't know whether
+there are no such rows or just are not very frequent. We can however use the
+frequency of the least frequent MCV item as an upper bound for the selectivity.
+
+For a combination of equality conditions (not full-match case) we can clamp the
+selectivity by the minimum of selectivities for each condition. For example if
+we know the number of distinct values for each column, we can use 1/ndistinct
+as a per-column estimate. Or rather 1/ndistinct + selectivity derived from the
+MCV list.
+
+We should also probably only use the 'residual ndistinct' by exluding the items
+included in the MCV list (and also residual frequency):
+
+     f = (1.0 - sum(MCV frequencies)) / (ndistinct - ndistinct(MCV list))
+
+but it's worth pointing out the ndistinct values are multi-variate for the
+columns referenced by the equality conditions.
+
+Note: Only the "full match" limit is currently implemented.
+
+
+Hashed MCV (not yet implemented)
+--------------------------------
+
+Regular MCV lists have to include actual values for each item, so if those items
+are large the list may be quite large. This is especially true for multi-variate
+MCV lists, although the current implementation partially mitigates this by
+performing de-duplicating the values before storing them on disk.
+
+It's possible to only store hashes (32-bit values) instead of the actual values,
+significantly reducing the space requirements. Obviously, this would only make
+the MCV lists useful for estimating equality conditions (assuming the 32-bit
+hashes make the collisions rare enough).
+
+This might also complicate matching the columns to available stats.
+
+
+TODO Consider implementing hashed MCV list, storing just 32-bit hashes instead
+     of the actual values. This type of MCV list will be useful only for
+     estimating equality clauses, and will reduce space requirements for large
+     varlena types (in such cases we usually only want equality anyway).
+
+TODO Currently there's no logic to consider building only a MCV list (and not
+     building the histogram at all), except for doing this decision manually in
+     ADD STATISTICS.
+
+
+Inspecting the MCV list
+-----------------------
+
+Inspecting the regular (per-attribute) MCV lists is trivial, as it's enough
+to select the columns from pg_stats - the data is encoded as anyarrays, so we
+simply get the text representation of the arrays.
+
+With multivariate MCV lits it's not that simple due to the possible mix of
+data types. It might be possible to produce similar array-like representation,
+but that'd unnecessarily complicate further processing and analysis of the MCV
+list. Instead, there's a SRF function providing values, frequencies etc.
+
+    SELECT * FROM pg_mv_mcv_items();
+
+It has two input parameters:
+
+    oid   - OID of the MCV list (pg_mv_statistic.staoid)
+
+and produces a table with these columns:
+
+    - item ID (0...nitems-1)
+    - values (string array)
+    - nulls only (boolean array)
+    - frequency (double precision)
diff --git a/src/backend/utils/mvstats/README.stats b/src/backend/utils/mvstats/README.stats
index 814f39c..8d3d268 100644
--- a/src/backend/utils/mvstats/README.stats
+++ b/src/backend/utils/mvstats/README.stats
@@ -8,9 +8,50 @@ not true, resulting in estimation errors.
 Multivariate stats track different types of dependencies between the columns,
 hopefully improving the estimates.
 
-Currently we only have one kind of multivariate statistics - soft functional
-dependencies, and we use it to improve estimates of equality clauses. See
-README.dependencies for details.
+
+Types of statistics
+-------------------
+
+Currently we only have two kinds of multivariate statistics
+
+    (a) soft functional dependencies (README.dependencies)
+
+    (b) MCV lists (README.mcv)
+
+
+Compatible clause types
+-----------------------
+
+Each type of statistics may be used to estimate some subset of clause types.
+
+    (a) functional dependencies - equality clauses (AND), possibly IS NULL
+
+    (b) MCV list - equality and inequality clauses, IS [NOT] NULL, AND/OR
+
+Currently only simple operator clauses (Var op Const) are supported, but it's
+possible to support more complex clause types, e.g. (Var op Var).
+
+
+Complex clauses
+---------------
+
+We also support estimating more complex clauses - essentially AND/OR clauses
+with (Var op Const) as leaves, as long as all the referenced attributes are
+covered by a single statistics.
+
+For example this condition
+
+    (a=1) AND ((b=2) OR ((c=3) AND (d=4)))
+
+may be estimated using statistics on (a,b,c,d). If we only have statistics on
+(b,c,d) we may estimate the second part, and estimate (a=1) using simple stats.
+
+If we only have statistics on (a,b,c) we can't apply it at all at this point,
+but it's worth pointing out clauselist_selectivity() works recursively and when
+handling the second part (the OR-clause), we'll be able to apply the statistics.
+
+Note: The multi-statistics estimation patch also makes it possible to pass some
+clauses as 'conditions' into the deeper parts of the expression tree.
 
 
 Selectivity estimation
@@ -23,21 +64,53 @@ When estimating selectivity, we aim to achieve several things:
     (b) minimize the overhead, especially when no suitable multivariate stats
         exist (so if you are not using multivariate stats, there's no overhead)
 
-This clauselist_selectivity() performs several inexpensive checks first, before
+Thus clauselist_selectivity() performs several inexpensive checks first, before
 even attempting to do the more expensive estimation.
 
     (1) check if there are multivariate stats on the relation
 
-    (2) check there are at least two attributes referenced by clauses compatible
-        with multivariate statistics (equality clauses for func. dependencies)
+    (2) check that there are functional dependencies on the table, and that
+        there are at least two attributes referenced by compatible clauses
+        (equality clauses for func. dependencies)
 
     (3) perform reduction of equality clauses using func. dependencies
 
-    (4) estimate the reduced list of clauses using regular statistics
+    (4) check that there are multivariate MCV lists on the table, and that
+        there are at least two attributes referenced by compatible clauses
+        (equalities, inequalities, etc.)
+
+    (5) find the best multivariate statistics (matching the most conditions)
+        and use it to compute the estimate
+
+    (6) estimate the remaining clauses (not estimated using multivariate stats)
+        using the regular per-column statistics
 
 Whenever we find there are no suitable stats, we skip the expensive steps.
 
 
+Further (possibly crazy) ideas
+------------------------------
+
+Currently the clauses are only estimated using a single statistics, even if
+there are multiple candidate statistics - for example assume we have statistics
+on (a,b,c) and (b,c,d), and estimate conditions
+
+    (b = 1) AND (c = 2)
+
+Then both statistics may be used, but we only use one of them. Maybe we could
+use compute estimates using all candidate stats, and somehow aggregate them
+into the final estimate by using average or median.
+
+Some stats may give better estimates than others, but it's very difficult to say
+in advance which stats are the best (it depends on the number of buckets, number
+of additional columns not referenced in the clauses, type of condition etc.).
+
+But of course, this may result in expensive estimation (CPU-wise).
+
+So we might add a GUC to choose between a simple (single statistics) and thus
+multi-statistic estimation, possibly table-level parameter (ALTER TABLE ...).
+
+
 Size of sample in ANALYZE
 -------------------------
 When performing ANALYZE, the number of rows to sample is determined as
diff --git a/src/backend/utils/mvstats/common.c b/src/backend/utils/mvstats/common.c
index 39e3b92..fc8eae2 100644
--- a/src/backend/utils/mvstats/common.c
+++ b/src/backend/utils/mvstats/common.c
@@ -15,6 +15,7 @@
  */
 
 #include "common.h"
+#include "utils/array.h"
 
 static VacAttrStats **lookup_var_attr_stats(int2vector *attrs,
 					  int natts, VacAttrStats **vacattrstats);
@@ -23,9 +24,9 @@ static List *list_mv_stats(Oid relid);
 
 static void update_mv_stats(Oid relid,
 					  MVNDistinct ndistinct, MVDependencies dependencies,
+					  MCVList mcvlist,
 					  int2vector *attrs, VacAttrStats **stats);
 
-
 /*
  * Compute requested multivariate stats, using the rows sampled for the
  * plain (single-column) stats.
@@ -55,6 +56,8 @@ build_mv_stats(Relation onerel, double totalrows,
 		MVStatisticInfo *stat = (MVStatisticInfo *) lfirst(lc);
 		MVNDistinct	ndistinct = NULL;
 		MVDependencies deps = NULL;
+		MCVList		mcvlist = NULL;
+		int			numrows_filtered = 0;
 
 		VacAttrStats **stats = NULL;
 		int			numatts = 0;
@@ -95,8 +98,12 @@ build_mv_stats(Relation onerel, double totalrows,
 		if (stat->deps_enabled)
 			deps = build_mv_dependencies(numrows, rows, attrs, stats);
 
+		/* build the MCV list */
+		if (stat->mcv_enabled)
+			mcvlist = build_mv_mcvlist(numrows, rows, attrs, stats, &numrows_filtered);
+
 		/* store the statistics in the catalog */
-		update_mv_stats(stat->mvoid, ndistinct, deps, attrs, stats);
+		update_mv_stats(stat->mvoid, ndistinct, deps, mcvlist, attrs, stats);
 	}
 }
 
@@ -178,6 +185,8 @@ list_mv_stats(Oid relid)
 		info->ndist_built = stats->ndist_built;
 		info->deps_enabled = stats->deps_enabled;
 		info->deps_built = stats->deps_built;
+		info->mcv_enabled = stats->mcv_enabled;
+		info->mcv_built = stats->mcv_built;
 
 		result = lappend(result, info);
 	}
@@ -195,11 +204,58 @@ list_mv_stats(Oid relid)
 }
 
 /*
+ * Find attnums of MV stats using the mvoid.
+ */
+int2vector *
+find_mv_attnums(Oid mvoid, Oid *relid)
+{
+	ArrayType  *arr;
+	Datum		adatum;
+	bool		isnull;
+	HeapTuple	htup;
+	int2vector *keys;
+
+	/* Prepare to scan pg_mv_statistic for entries having indrelid = this rel. */
+	htup = SearchSysCache1(MVSTATOID,
+						   ObjectIdGetDatum(mvoid));
+
+	/* XXX syscache contains OIDs of deleted stats (not invalidated) */
+	if (!HeapTupleIsValid(htup))
+		return NULL;
+
+	/* starelid */
+	adatum = SysCacheGetAttr(MVSTATOID, htup,
+							 Anum_pg_mv_statistic_starelid, &isnull);
+	Assert(!isnull);
+
+	*relid = DatumGetObjectId(adatum);
+
+	/* stakeys */
+	adatum = SysCacheGetAttr(MVSTATOID, htup,
+							 Anum_pg_mv_statistic_stakeys, &isnull);
+	Assert(!isnull);
+
+	arr = DatumGetArrayTypeP(adatum);
+
+	keys = buildint2vector((int16 *) ARR_DATA_PTR(arr),
+						   ARR_DIMS(arr)[0]);
+	ReleaseSysCache(htup);
+
+	/*
+	 * TODO maybe save the list into relcache, as in RelationGetIndexList
+	 * (which was used as an inspiration of this one)?.
+	 */
+
+	return keys;
+}
+
+/*
  * update_mv_stats
  *	Serializes the statistics and stores them into the pg_mv_statistic tuple.
  */
 static void
-update_mv_stats(Oid mvoid, MVNDistinct ndistinct, MVDependencies dependencies,
+update_mv_stats(Oid mvoid,
+				MVNDistinct ndistinct, MVDependencies dependencies, MCVList mcvlist,
 				int2vector *attrs, VacAttrStats **stats)
 {
 	HeapTuple	stup,
@@ -233,22 +289,36 @@ update_mv_stats(Oid mvoid, MVNDistinct ndistinct, MVDependencies dependencies,
 			= PointerGetDatum(serialize_mv_dependencies(dependencies));
 	}
 
+	if (mcvlist != NULL)
+	{
+		bytea	   *data = serialize_mv_mcvlist(mcvlist, attrs, stats);
+
+		nulls[Anum_pg_mv_statistic_stamcv - 1] = (data == NULL);
+		values[Anum_pg_mv_statistic_stamcv - 1] = PointerGetDatum(data);
+	}
+
 	/* always replace the value (either by bytea or NULL) */
 	replaces[Anum_pg_mv_statistic_standist - 1] = true;
 	replaces[Anum_pg_mv_statistic_stadeps - 1] = true;
+	replaces[Anum_pg_mv_statistic_stamcv - 1] = true;
 
 	/* always change the availability flags */
 	nulls[Anum_pg_mv_statistic_ndist_built - 1] = false;
 	nulls[Anum_pg_mv_statistic_deps_built - 1] = false;
+	nulls[Anum_pg_mv_statistic_mcv_built - 1] = false;
+
 	nulls[Anum_pg_mv_statistic_stakeys - 1] = false;
 
 	/* use the new attnums, in case we removed some dropped ones */
 	replaces[Anum_pg_mv_statistic_ndist_built - 1] = true;
 	replaces[Anum_pg_mv_statistic_deps_built - 1] = true;
+	replaces[Anum_pg_mv_statistic_mcv_built - 1] = true;
+
 	replaces[Anum_pg_mv_statistic_stakeys - 1] = true;
 
 	values[Anum_pg_mv_statistic_ndist_built - 1] = BoolGetDatum(ndistinct != NULL);
 	values[Anum_pg_mv_statistic_deps_built - 1] = BoolGetDatum(dependencies != NULL);
+	values[Anum_pg_mv_statistic_mcv_built - 1] = BoolGetDatum(mcvlist != NULL);
 
 	values[Anum_pg_mv_statistic_stakeys - 1] = PointerGetDatum(attrs);
 
@@ -278,6 +348,23 @@ update_mv_stats(Oid mvoid, MVNDistinct ndistinct, MVDependencies dependencies,
 	heap_close(sd, RowExclusiveLock);
 }
 
+
+int
+mv_get_index(AttrNumber varattno, int2vector *stakeys)
+{
+	int			i,
+				idx = 0;
+
+	for (i = 0; i < stakeys->dim1; i++)
+	{
+		if (stakeys->values[i] < varattno)
+			idx += 1;
+		else
+			break;
+	}
+	return idx;
+}
+
 /* multi-variate stats comparator */
 
 /*
@@ -288,11 +375,15 @@ update_mv_stats(Oid mvoid, MVNDistinct ndistinct, MVDependencies dependencies,
 int
 compare_scalars_simple(const void *a, const void *b, void *arg)
 {
-	Datum		da = *(Datum *) a;
-	Datum		db = *(Datum *) b;
-	SortSupport ssup = (SortSupport) arg;
+	return compare_datums_simple(*(Datum *) a,
+								 *(Datum *) b,
+								 (SortSupport) arg);
+}
 
-	return ApplySortComparator(da, false, db, false, ssup);
+int
+compare_datums_simple(Datum a, Datum b, SortSupport ssup)
+{
+	return ApplySortComparator(a, false, b, false, ssup);
 }
 
 /*
@@ -410,3 +501,34 @@ multi_sort_compare_dims(int start, int end,
 
 	return 0;
 }
+
+/* simple counterpart to qsort_arg */
+void *
+bsearch_arg(const void *key, const void *base, size_t nmemb, size_t size,
+			int (*compar) (const void *, const void *, void *),
+			void *arg)
+{
+	size_t		l,
+				u,
+				idx;
+	const void *p;
+	int			comparison;
+
+	l = 0;
+	u = nmemb;
+	while (l < u)
+	{
+		idx = (l + u) / 2;
+		p = (void *) (((const char *) base) + (idx * size));
+		comparison = (*compar) (key, p, arg);
+
+		if (comparison < 0)
+			u = idx;
+		else if (comparison > 0)
+			l = idx + 1;
+		else
+			return (void *) p;
+	}
+
+	return NULL;
+}
diff --git a/src/backend/utils/mvstats/common.h b/src/backend/utils/mvstats/common.h
index e471c88..fe56f51 100644
--- a/src/backend/utils/mvstats/common.h
+++ b/src/backend/utils/mvstats/common.h
@@ -47,6 +47,15 @@ typedef struct
 	int			tupno;			/* position index for tuple it came from */
 } ScalarItem;
 
+/* (de)serialization info */
+typedef struct DimensionInfo
+{
+	int			nvalues;		/* number of deduplicated values */
+	int			nbytes;			/* number of bytes (serialized) */
+	int			typlen;			/* pg_type.typlen */
+	bool		typbyval;		/* pg_type.typbyval */
+} DimensionInfo;
+
 /* multi-sort */
 typedef struct MultiSortSupportData
 {
@@ -60,6 +69,7 @@ typedef struct SortItem
 {
 	Datum	   *values;
 	bool	   *isnull;
+	int			count;
 } SortItem;
 
 MultiSortSupport multi_sort_init(int ndims);
@@ -67,7 +77,7 @@ MultiSortSupport multi_sort_init(int ndims);
 void multi_sort_add_dimension(MultiSortSupport mss, int sortdim,
 						 int dim, VacAttrStats **vacattrstats);
 
-int multi_sort_compare(const void *a, const void *b, void *arg);
+int			multi_sort_compare(const void *a, const void *b, void *arg);
 
 int multi_sort_compare_dim(int dim, const SortItem *a,
 					   const SortItem *b, MultiSortSupport mss);
@@ -76,5 +86,11 @@ int multi_sort_compare_dims(int start, int end, const SortItem *a,
 						const SortItem *b, MultiSortSupport mss);
 
 /* comparators, used when constructing multivariate stats */
-int compare_scalars_simple(const void *a, const void *b, void *arg);
-int compare_scalars_partition(const void *a, const void *b, void *arg);
+int			compare_datums_simple(Datum a, Datum b, SortSupport ssup);
+int			compare_scalars_simple(const void *a, const void *b, void *arg);
+int			compare_scalars_partition(const void *a, const void *b, void *arg);
+
+void *bsearch_arg(const void *key, const void *base,
+			size_t nmemb, size_t size,
+			int (*compar) (const void *, const void *, void *),
+			void *arg);
diff --git a/src/backend/utils/mvstats/mcv.c b/src/backend/utils/mvstats/mcv.c
new file mode 100644
index 0000000..c1c2409
--- /dev/null
+++ b/src/backend/utils/mvstats/mcv.c
@@ -0,0 +1,1184 @@
+/*-------------------------------------------------------------------------
+ *
+ * mcv.c
+ *	  POSTGRES multivariate MCV lists
+ *
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/mvstats/mcv.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "fmgr.h"
+#include "funcapi.h"
+
+#include "utils/bytea.h"
+#include "utils/lsyscache.h"
+
+#include "common.h"
+
+/*
+ * Each serialized item needs to store (in this order):
+ *
+ * - indexes			  (ndim * sizeof(uint16))
+ * - null flags			  (ndim * sizeof(bool))
+ * - frequency			  (sizeof(double))
+ *
+ * So in total:
+ *
+ *	 ndim * (sizeof(uint16) + sizeof(bool)) + sizeof(double)
+ */
+#define ITEM_SIZE(ndims)	\
+	(ndims * (sizeof(uint16) + sizeof(bool)) + sizeof(double))
+
+/* Macros for convenient access to parts of the serialized MCV item */
+#define ITEM_INDEXES(item)			((uint16*)item)
+#define ITEM_NULLS(item,ndims)		((bool*)(ITEM_INDEXES(item) + ndims))
+#define ITEM_FREQUENCY(item,ndims)	((double*)(ITEM_NULLS(item,ndims) + ndims))
+
+static MultiSortSupport build_mss(VacAttrStats **stats, int2vector *attrs);
+
+static SortItem *build_sorted_items(int numrows, HeapTuple *rows,
+				   TupleDesc tdesc, MultiSortSupport mss,
+				   int2vector *attrs);
+
+static SortItem *build_distinct_groups(int numrows, SortItem *items,
+					  MultiSortSupport mss, int *ndistinct);
+
+static int count_distinct_groups(int numrows, SortItem *items,
+					  MultiSortSupport mss);
+
+/*
+ * Builds MCV list from the set of sampled rows.
+ *
+ * The algorithm is quite simple:
+ *
+ *	   (1) sort the data (default collation, '<' for the data type)
+ *
+ *	   (2) count distinct groups, decide how many to keep
+ *
+ *	   (3) build the MCV list using the threshold determined in (2)
+ *
+ *	   (4) remove rows represented by the MCV from the sample
+ *
+ * The method also removes rows matching the MCV items from the input array,
+ * and passes the number of remaining rows (useful for building histograms)
+ * using the numrows_filtered parameter.
+ *
+ * FIXME: Single-dimensional MCV is sorted by frequency (descending). We should
+ * do that too, because when walking through the list we want to check
+ * the most frequent items first.
+ *
+ * TODO: We're using Datum (8B), even for data types (e.g. int4 or float4).
+ * Maybe we could save some space here, but the bytea compression should
+ * handle it just fine.
+ *
+ * TODO: This probably should not use the ndistinct directly (as computed from
+ * the table, but rather estimate the number of distinct values in the
+ * table), no?
+ */
+MCVList
+build_mv_mcvlist(int numrows, HeapTuple *rows, int2vector *attrs,
+				 VacAttrStats **stats, int *numrows_filtered)
+{
+	int			i;
+	int			numattrs = attrs->dim1;
+	int			ndistinct = 0;
+	int			mcv_threshold = 0;
+	int			nitems = 0;
+
+	MCVList		mcvlist = NULL;
+
+	/* comparator for all the columns */
+	MultiSortSupport mss = build_mss(stats, attrs);
+
+	/* sort the rows */
+	SortItem   *items = build_sorted_items(numrows, rows, stats[0]->tupDesc,
+										   mss, attrs);
+
+	/* transform the sorted rows into groups (sorted by frequency) */
+	SortItem   *groups = build_distinct_groups(numrows, items, mss, &ndistinct);
+
+	/*
+	 * Determine the minimum size of a group to be eligible for MCV list, and
+	 * check how many groups actually pass that threshold. We use 1.25x the
+	 * avarage group size, just like for regular statistics.
+	 *
+	 * But if we can fit all the distinct values in the MCV list (i.e. if
+	 * there are less distinct groups than MVSTAT_MCVLIST_MAX_ITEMS), we'll
+	 * require only 2 rows per group.
+	 */
+	mcv_threshold = 1.25 * numrows / ndistinct;
+	mcv_threshold = (mcv_threshold < 4) ? 4 : mcv_threshold;
+
+	if (ndistinct <= MVSTAT_MCVLIST_MAX_ITEMS)
+		mcv_threshold = 2;
+
+	/* Walk through the groups and stop once we fall below the threshold. */
+	nitems = 0;
+	for (i = 0; i < ndistinct; i++)
+	{
+		if (groups[i].count < mcv_threshold)
+			break;
+
+		nitems++;
+	}
+
+	/* we know the number of MCV list items, so let's build the list */
+	if (nitems > 0)
+	{
+		/* allocate the MCV list structure, set parameters we know */
+		mcvlist = (MCVList) palloc0(sizeof(MCVListData));
+
+		mcvlist->magic = MVSTAT_MCV_MAGIC;
+		mcvlist->type = MVSTAT_MCV_TYPE_BASIC;
+		mcvlist->ndimensions = numattrs;
+		mcvlist->nitems = nitems;
+
+		/*
+		 * Preallocate Datum/isnull arrays (not as a single chunk, as we will
+		 * pass the result outside and thus it needs to be easy to pfree().
+		 *
+		 * XXX Although we're the only ones dealing with this.
+		 */
+		mcvlist->items = (MCVItem *) palloc0(sizeof(MCVItem) * nitems);
+
+		for (i = 0; i < nitems; i++)
+		{
+			mcvlist->items[i] = (MCVItem) palloc0(sizeof(MCVItemData));
+			mcvlist->items[i]->values = (Datum *) palloc0(sizeof(Datum) * numattrs);
+			mcvlist->items[i]->isnull = (bool *) palloc0(sizeof(bool) * numattrs);
+		}
+
+		/* Copy the first chunk of groups into the result. */
+		for (i = 0; i < nitems; i++)
+		{
+			/* just pointer to the proper place in the list */
+			MCVItem		item = mcvlist->items[i];
+
+			/* copy values from the _previous_ group (last item of) */
+			memcpy(item->values, groups[i].values, sizeof(Datum) * numattrs);
+			memcpy(item->isnull, groups[i].isnull, sizeof(bool) * numattrs);
+
+			/* and finally the group frequency */
+			item->frequency = (double) groups[i].count / numrows;
+		}
+
+		/* make sure the loops are consistent */
+		Assert(nitems == mcvlist->nitems);
+
+		/*
+		 * Remove the rows matching the MCV list (i.e. keep only rows that are
+		 * not represented by the MCV list). We will first sort the groups by
+		 * the keys (not by count) and then use binary search.
+		 */
+		if (nitems > ndistinct)
+		{
+			int			i,
+						j;
+			int			nfiltered = 0;
+
+			/* used for the searches */
+			SortItem	key;
+
+			/* wfill this with data from the rows */
+			key.values = (Datum *) palloc0(numattrs * sizeof(Datum));
+			key.isnull = (bool *) palloc0(numattrs * sizeof(bool));
+
+			/*
+			 * Sort the groups for bsearch_r (but only the items that actually
+			 * made it to the MCV list).
+			 */
+			qsort_arg((void *) groups, nitems, sizeof(SortItem),
+					  multi_sort_compare, mss);
+
+			/* walk through the tuples, compare the values to MCV items */
+			for (i = 0; i < numrows; i++)
+			{
+				/* collect the key values from the row */
+				for (j = 0; j < numattrs; j++)
+					key.values[j]
+						= heap_getattr(rows[i], attrs->values[j],
+									   stats[j]->tupDesc, &key.isnull[j]);
+
+				/* if not included in the MCV list, keep it in the array */
+				if (bsearch_arg(&key, groups, nitems, sizeof(SortItem),
+								multi_sort_compare, mss) == NULL)
+					rows[nfiltered++] = rows[i];
+			}
+
+			/* remember how many rows we actually kept */
+			*numrows_filtered = nfiltered;
+
+			/* free all the data used here */
+			pfree(key.values);
+			pfree(key.isnull);
+		}
+		else
+			/* the MCV list convers all the rows */
+			*numrows_filtered = 0;
+	}
+
+	pfree(items);
+	pfree(groups);
+
+	return mcvlist;
+}
+
+/* build MultiSortSupport for the attributes passed in attrs */
+static MultiSortSupport
+build_mss(VacAttrStats **stats, int2vector *attrs)
+{
+	int			i;
+	int			numattrs = attrs->dim1;
+
+	/* Sort by multiple columns (using array of SortSupport) */
+	MultiSortSupport mss = multi_sort_init(numattrs);
+
+	/* prepare the sort functions for all the attributes */
+	for (i = 0; i < numattrs; i++)
+		multi_sort_add_dimension(mss, i, i, stats);
+
+	return mss;
+}
+
+/* build sorted array of SortItem with values from rows */
+static SortItem *
+build_sorted_items(int numrows, HeapTuple *rows, TupleDesc tdesc,
+				   MultiSortSupport mss, int2vector *attrs)
+{
+	int			i,
+				j,
+				len;
+	int			numattrs = attrs->dim1;
+	int			nvalues = numrows * numattrs;
+
+	/*
+	 * We won't allocate the arrays for each item independenly, but in one
+	 * large chunk and then just set the pointers.
+	 */
+	SortItem   *items;
+	Datum	   *values;
+	bool	   *isnull;
+	char	   *ptr;
+
+	/* Compute the total amount of memory we need (both items and values). */
+	len = numrows * sizeof(SortItem) + nvalues * (sizeof(Datum) + sizeof(bool));
+
+	/* Allocate the memory and split it into the pieces. */
+	ptr = palloc0(len);
+
+	/* items to sort */
+	items = (SortItem *) ptr;
+	ptr += numrows * sizeof(SortItem);
+
+	/* values and null flags */
+	values = (Datum *) ptr;
+	ptr += nvalues * sizeof(Datum);
+
+	isnull = (bool *) ptr;
+	ptr += nvalues * sizeof(bool);
+
+	/* make sure we consumed the whole buffer exactly */
+	Assert((ptr - (char *) items) == len);
+
+	/* fix the pointers to Datum and bool arrays */
+	for (i = 0; i < numrows; i++)
+	{
+		items[i].values = &values[i * numattrs];
+		items[i].isnull = &isnull[i * numattrs];
+
+		/* load the values/null flags from sample rows */
+		for (j = 0; j < numattrs; j++)
+		{
+			items[i].values[j] = heap_getattr(rows[i],
+											  attrs->values[j], /* attnum */
+											  tdesc,
+											  &items[i].isnull[j]);		/* isnull */
+		}
+	}
+
+	/* do the sort, using the multi-sort */
+	qsort_arg((void *) items, numrows, sizeof(SortItem),
+			  multi_sort_compare, mss);
+
+	return items;
+}
+
+/* count distinct combinations of SortItems in the array */
+static int
+count_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss)
+{
+	int			i;
+	int			ndistinct;
+
+	ndistinct = 1;
+	for (i = 1; i < numrows; i++)
+		if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
+			ndistinct += 1;
+
+	return ndistinct;
+}
+
+/* compares frequencies of the SortItem entries (in descending order) */
+static int
+compare_sort_item_count(const void *a, const void *b)
+{
+	SortItem   *ia = (SortItem *) a;
+	SortItem   *ib = (SortItem *) b;
+
+	if (ia->count == ib->count)
+		return 0;
+	else if (ia->count > ib->count)
+		return -1;
+
+	return 1;
+}
+
+/* builds SortItems for distinct groups and counts the matching items */
+static SortItem *
+build_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss,
+					  int *ndistinct)
+{
+	int			i,
+				j;
+	int			ngroups = count_distinct_groups(numrows, items, mss);
+
+	SortItem   *groups = (SortItem *) palloc0(ngroups * sizeof(SortItem));
+
+	j = 0;
+	groups[0] = items[0];
+	groups[0].count = 1;
+
+	for (i = 1; i < numrows; i++)
+	{
+		if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
+			groups[++j] = items[i];
+
+		groups[j].count++;
+	}
+
+	pg_qsort((void *) groups, ngroups, sizeof(SortItem),
+			 compare_sort_item_count);
+
+	*ndistinct = ngroups;
+	return groups;
+}
+
+
+/* fetch the MCV list (as a bytea) from the pg_mv_statistic catalog */
+MCVList
+load_mv_mcvlist(Oid mvoid)
+{
+	bool		isnull = false;
+	Datum		mcvlist;
+
+#ifdef USE_ASSERT_CHECKING
+	Form_pg_mv_statistic mvstat;
+#endif
+
+	/* Prepare to scan pg_mv_statistic for entries having indrelid = this rel. */
+	HeapTuple	htup = SearchSysCache1(MVSTATOID, ObjectIdGetDatum(mvoid));
+
+	if (!HeapTupleIsValid(htup))
+		return NULL;
+
+#ifdef USE_ASSERT_CHECKING
+	mvstat = (Form_pg_mv_statistic) GETSTRUCT(htup);
+	Assert(mvstat->mcv_enabled && mvstat->mcv_built);
+#endif
+
+	mcvlist = SysCacheGetAttr(MVSTATOID, htup,
+							  Anum_pg_mv_statistic_stamcv, &isnull);
+
+	Assert(!isnull);
+
+	ReleaseSysCache(htup);
+
+	return deserialize_mv_mcvlist(DatumGetByteaP(mcvlist));
+}
+
+/* print some basic info about the MCV list
+ *
+ * TODO: Add info about what part of the table this covers.
+ */
+Datum
+pg_mv_stats_mcvlist_info(PG_FUNCTION_ARGS)
+{
+	bytea	   *data = PG_GETARG_BYTEA_P(0);
+	char	   *result;
+
+	MCVList		mcvlist = deserialize_mv_mcvlist(data);
+
+	result = palloc0(128);
+	snprintf(result, 128, "nitems=%d", mcvlist->nitems);
+
+	pfree(mcvlist);
+
+	PG_RETURN_TEXT_P(cstring_to_text(result));
+}
+
+/*
+ * serialize MCV list into a bytea value
+ *
+ *
+ * The basic algorithm is simple:
+ *
+ * (1) perform deduplication (for each attribute separately)
+ *	   (a) collect all (non-NULL) attribute values from all MCV items
+ *	   (b) sort the data (using 'lt' from VacAttrStats)
+ *	   (c) remove duplicate values from the array
+ *
+ * (2) serialize the arrays into a bytea value
+ *
+ * (3) process all MCV list items
+ *	   (a) replace values with indexes into the arrays
+ *
+ * Each attribute has to be processed separately, because we may be mixing
+ * different datatypes, with different sort operators, etc.
+ *
+ * We'll use uint16 values for the indexes in step (3), as we don't allow more
+ * than 8k MCV items, although that's mostly arbitrary limit. We might increase
+ * this to 65k and still fit into uint16.
+ *
+ * We don't really expect the serialization to save as much space as for
+ * histograms, because we are not doing any bucket splits (which is the source
+ * of high redundancy in histograms).
+ *
+ * TODO: Consider packing boolean flags (NULL) for each item into a single char
+ * (or a longer type) instead of using an array of bool items.
+ */
+bytea *
+serialize_mv_mcvlist(MCVList mcvlist, int2vector *attrs,
+					 VacAttrStats **stats)
+{
+	int			i,
+				j;
+	int			ndims = mcvlist->ndimensions;
+	int			itemsize = ITEM_SIZE(ndims);
+
+	SortSupport ssup;
+	DimensionInfo *info;
+
+	Size		total_length;
+
+	/* allocate just once */
+	char	   *item = palloc0(itemsize);
+
+	/* serialized items (indexes into arrays, etc.) */
+	bytea	   *output;
+	char	   *data = NULL;
+
+	/* values per dimension (and number of non-NULL values) */
+	Datum	  **values = (Datum **) palloc0(sizeof(Datum *) * ndims);
+	int		   *counts = (int *) palloc0(sizeof(int) * ndims);
+
+	/*
+	 * We'll include some rudimentary information about the attributes (type
+	 * length, etc.), so that we don't have to look them up while
+	 * deserializing the MCV list.
+	 */
+	info = (DimensionInfo *) palloc0(sizeof(DimensionInfo) * ndims);
+
+	/* sort support data for all attributes included in the MCV list */
+	ssup = (SortSupport) palloc0(sizeof(SortSupportData) * ndims);
+
+	/* collect and deduplicate values for all attributes */
+	for (i = 0; i < ndims; i++)
+	{
+		int			ndistinct;
+		StdAnalyzeData *tmp = (StdAnalyzeData *) stats[i]->extra_data;
+
+		/* copy important info about the data type (length, by-value) */
+		info[i].typlen = stats[i]->attrtype->typlen;
+		info[i].typbyval = stats[i]->attrtype->typbyval;
+
+		/* allocate space for values in the attribute and collect them */
+		values[i] = (Datum *) palloc0(sizeof(Datum) * mcvlist->nitems);
+
+		for (j = 0; j < mcvlist->nitems; j++)
+		{
+			/* skip NULL values - we don't need to serialize them */
+			if (mcvlist->items[j]->isnull[i])
+				continue;
+
+			values[i][counts[i]] = mcvlist->items[j]->values[i];
+			counts[i] += 1;
+		}
+
+		/* there are just NULL values in this dimension, we're done */
+		if (counts[i] == 0)
+			continue;
+
+		/* sort and deduplicate the data */
+		ssup[i].ssup_cxt = CurrentMemoryContext;
+		ssup[i].ssup_collation = DEFAULT_COLLATION_OID;
+		ssup[i].ssup_nulls_first = false;
+
+		PrepareSortSupportFromOrderingOp(tmp->ltopr, &ssup[i]);
+
+		qsort_arg(values[i], counts[i], sizeof(Datum),
+				  compare_scalars_simple, &ssup[i]);
+
+		/*
+		 * Walk through the array and eliminate duplicate values, but keep the
+		 * ordering (so that we can do bsearch later). We know there's at
+		 * least one item as (counts[i] != 0), so we can skip the first
+		 * element.
+		 */
+		ndistinct = 1;			/* number of distinct values */
+		for (j = 1; j < counts[i]; j++)
+		{
+			/* if the value is the same as the previous one, we can skip it */
+			if (!compare_datums_simple(values[i][j - 1], values[i][j], &ssup[i]))
+				continue;
+
+			values[i][ndistinct] = values[i][j];
+			ndistinct += 1;
+		}
+
+		/* we must not exceed UINT16_MAX, as we use uint16 indexes */
+		Assert(ndistinct <= UINT16_MAX);
+
+		/*
+		 * Store additional info about the attribute - number of deduplicated
+		 * values, and also size of the serialized data. For fixed-length data
+		 * types this is trivial to compute, for varwidth types we need to
+		 * actually walk the array and sum the sizes.
+		 */
+		info[i].nvalues = ndistinct;
+
+		if (info[i].typlen > 0) /* fixed-length data types */
+			info[i].nbytes = info[i].nvalues * info[i].typlen;
+		else if (info[i].typlen == -1)	/* varlena */
+		{
+			info[i].nbytes = 0;
+			for (j = 0; j < info[i].nvalues; j++)
+				info[i].nbytes += VARSIZE_ANY(values[i][j]);
+		}
+		else if (info[i].typlen == -2)	/* cstring */
+		{
+			info[i].nbytes = 0;
+			for (j = 0; j < info[i].nvalues; j++)
+				info[i].nbytes += strlen(DatumGetPointer(values[i][j]));
+		}
+
+		/* we know (count>0) so there must be some data */
+		Assert(info[i].nbytes > 0);
+	}
+
+	/*
+	 * Now we can finally compute how much space we'll actually need for the
+	 * serialized MCV list, as it contains these fields:
+	 *
+	 * - length (4B) for varlena - magic (4B) - type (4B) - ndimensions (4B) -
+	 * nitems (4B) - info (ndim * sizeof(DimensionInfo) - arrays of values for
+	 * each dimension - serialized items (nitems * itemsize)
+	 *
+	 * So the 'header' size is 20B + ndim * sizeof(DimensionInfo) and then we
+	 * will place all the data (values + indexes).
+	 */
+	total_length = (sizeof(int32) + offsetof(MCVListData, items)
+					+ndims * sizeof(DimensionInfo)
+					+ mcvlist->nitems * itemsize);
+
+	for (i = 0; i < ndims; i++)
+		total_length += info[i].nbytes;
+
+	/* enforce arbitrary limit of 1MB */
+	if (total_length > (1024 * 1024))
+		elog(ERROR, "serialized MCV list exceeds 1MB (%ld)", total_length);
+
+	/* allocate space for the serialized MCV list, set header fields */
+	output = (bytea *) palloc0(total_length);
+	SET_VARSIZE(output, total_length);
+
+	/* 'data' points to the current position in the output buffer */
+	data = VARDATA(output);
+
+	/* MCV list header (number of items, ...) */
+	memcpy(data, mcvlist, offsetof(MCVListData, items));
+	data += offsetof(MCVListData, items);
+
+	/* information about the attributes */
+	memcpy(data, info, sizeof(DimensionInfo) * ndims);
+	data += sizeof(DimensionInfo) * ndims;
+
+	/* now serialize the deduplicated values for all attributes */
+	for (i = 0; i < ndims; i++)
+	{
+#ifdef USE_ASSERT_CHECKING
+		char	   *tmp = data; /* remember the starting point */
+#endif
+		for (j = 0; j < info[i].nvalues; j++)
+		{
+			Datum		v = values[i][j];
+
+			if (info[i].typbyval)		/* passed by value */
+			{
+				memcpy(data, &v, info[i].typlen);
+				data += info[i].typlen;
+			}
+			else if (info[i].typlen > 0)		/* pased by reference */
+			{
+				memcpy(data, DatumGetPointer(v), info[i].typlen);
+				data += info[i].typlen;
+			}
+			else if (info[i].typlen == -1)		/* varlena */
+			{
+				memcpy(data, DatumGetPointer(v), VARSIZE_ANY(v));
+				data += VARSIZE_ANY(v);
+			}
+			else if (info[i].typlen == -2)		/* cstring */
+			{
+				memcpy(data, DatumGetPointer(v), strlen(DatumGetPointer(v)) + 1);
+				data += strlen(DatumGetPointer(v)) + 1; /* terminator */
+			}
+		}
+
+		/* make sure we got exactly the amount of data we expected */
+		Assert((data - tmp) == info[i].nbytes);
+	}
+
+	/* finally serialize the items, with uint16 indexes instead of the values */
+	for (i = 0; i < mcvlist->nitems; i++)
+	{
+		MCVItem		mcvitem = mcvlist->items[i];
+
+		/* don't write beyond the allocated space */
+		Assert(data <= (char *) output + total_length - itemsize);
+
+		/* reset the item (we only allocate it once and reuse it) */
+		memset(item, 0, itemsize);
+
+		for (j = 0; j < ndims; j++)
+		{
+			Datum	   *v = NULL;
+
+			/* do the lookup only for non-NULL values */
+			if (mcvlist->items[i]->isnull[j])
+				continue;
+
+			v = (Datum *) bsearch_arg(&mcvitem->values[j], values[j],
+									  info[j].nvalues, sizeof(Datum),
+									  compare_scalars_simple, &ssup[j]);
+
+			Assert(v != NULL);	/* serialization or deduplication error */
+
+			/* compute index within the array */
+			ITEM_INDEXES(item)[j] = (v - values[j]);
+
+			/* check the index is within expected bounds */
+			Assert(ITEM_INDEXES(item)[j] >= 0);
+			Assert(ITEM_INDEXES(item)[j] < info[j].nvalues);
+		}
+
+		/* copy NULL and frequency flags into the item */
+		memcpy(ITEM_NULLS(item, ndims), mcvitem->isnull, sizeof(bool) * ndims);
+		memcpy(ITEM_FREQUENCY(item, ndims), &mcvitem->frequency, sizeof(double));
+
+		/* copy the serialized item into the array */
+		memcpy(data, item, itemsize);
+
+		data += itemsize;
+	}
+
+	/* at this point we expect to match the total_length exactly */
+	Assert((data - (char *) output) == total_length);
+
+	return output;
+}
+
+/*
+ * deserialize MCV list from the varlena value
+ *
+ *
+ * We deserialize the MCV list fully, because we don't expect there bo be a lot
+ * of duplicate values. But perhaps we should keep the MCV in serialized form
+ * just like histograms.
+ */
+MCVList
+deserialize_mv_mcvlist(bytea *data)
+{
+	int			i,
+				j;
+	Size		expected_size;
+	MCVList		mcvlist;
+	char	   *tmp;
+
+	int			ndims,
+				nitems,
+				itemsize;
+	DimensionInfo *info = NULL;
+
+	uint16	   *indexes = NULL;
+	Datum	  **values = NULL;
+
+	/* local allocation buffer (used only for deserialization) */
+	int			bufflen;
+	char	   *buff;
+	char	   *ptr;
+
+	/* buffer used for the result */
+	int			rbufflen;
+	char	   *rbuff;
+	char	   *rptr;
+
+	if (data == NULL)
+		return NULL;
+
+	/* we can't deserialize the MCV if there's not even a complete header */
+	expected_size = offsetof(MCVListData, items);
+
+	if (VARSIZE_ANY_EXHDR(data) < expected_size)
+		elog(ERROR, "invalid MCV Size %ld (expected at least %ld)",
+			 VARSIZE_ANY_EXHDR(data), offsetof(MCVListData, items));
+
+	/* read the MCV list header */
+	mcvlist = (MCVList) palloc0(sizeof(MCVListData));
+
+	/* initialize pointer to the data part (skip the varlena header) */
+	tmp = VARDATA_ANY(data);
+
+	/* get the header and perform further sanity checks */
+	memcpy(mcvlist, tmp, offsetof(MCVListData, items));
+	tmp += offsetof(MCVListData, items);
+
+	if (mcvlist->magic != MVSTAT_MCV_MAGIC)
+		elog(ERROR, "invalid MCV magic %d (expected %dd)",
+			 mcvlist->magic, MVSTAT_MCV_MAGIC);
+
+	if (mcvlist->type != MVSTAT_MCV_TYPE_BASIC)
+		elog(ERROR, "invalid MCV type %d (expected %dd)",
+			 mcvlist->type, MVSTAT_MCV_TYPE_BASIC);
+
+	nitems = mcvlist->nitems;
+	ndims = mcvlist->ndimensions;
+	itemsize = ITEM_SIZE(ndims);
+
+	Assert((nitems > 0) && (nitems <= MVSTAT_MCVLIST_MAX_ITEMS));
+	Assert((ndims >= 2) && (ndims <= MVSTATS_MAX_DIMENSIONS));
+
+	/*
+	 * Check amount of data including DimensionInfo for all dimensions and
+	 * also the serialized items (including uint16 indexes). Also, walk
+	 * through the dimension information and add it to the sum.
+	 */
+	expected_size += ndims * sizeof(DimensionInfo) +
+		(nitems * itemsize);
+
+	/* check that we have at least the DimensionInfo records */
+	if (VARSIZE_ANY_EXHDR(data) < expected_size)
+		elog(ERROR, "invalid MCV size %ld (expected %ld)",
+			 VARSIZE_ANY_EXHDR(data), expected_size);
+
+	info = (DimensionInfo *) (tmp);
+	tmp += ndims * sizeof(DimensionInfo);
+
+	/* account for the value arrays */
+	for (i = 0; i < ndims; i++)
+	{
+		Assert(info[i].nvalues >= 0);
+		Assert(info[i].nbytes >= 0);
+
+		expected_size += info[i].nbytes;
+	}
+
+	if (VARSIZE_ANY_EXHDR(data) != expected_size)
+		elog(ERROR, "invalid MCV size %ld (expected %ld)",
+			 VARSIZE_ANY_EXHDR(data), expected_size);
+
+	/* looks OK - not corrupted or something */
+
+	/*
+	 * Allocate one large chunk of memory for the intermediate data, needed
+	 * only for deserializing the MCV list (and allocate densely to minimize
+	 * the palloc overhead).
+	 *
+	 * Let's see how much space we'll actually need, and also include space
+	 * for the array with pointers.
+	 */
+	bufflen = sizeof(Datum *) * ndims;	/* space for pointers */
+
+	for (i = 0; i < ndims; i++)
+		/* for full-size byval types, we reuse the serialized value */
+		if (!(info[i].typbyval && info[i].typlen == sizeof(Datum)))
+			bufflen += (sizeof(Datum) * info[i].nvalues);
+
+	buff = palloc0(bufflen);
+	ptr = buff;
+
+	values = (Datum **) buff;
+	ptr += (sizeof(Datum *) * ndims);
+
+	/*
+	 * XXX This uses pointers to the original data array (the types not passed
+	 * by value), so when someone frees the memory, e.g. by doing something
+	 * like this:
+	 *
+	 * bytea * data = ... fetch the data from catalog ... MCVList mcvlist =
+	 * deserialize_mcv_list(data); pfree(data);
+	 *
+	 * then 'mcvlist' references the freed memory. Should copy the pieces.
+	 */
+	for (i = 0; i < ndims; i++)
+	{
+		if (info[i].typbyval)
+		{
+			/* passed by value / Datum - simply reuse the array */
+			if (info[i].typlen == sizeof(Datum))
+			{
+				values[i] = (Datum *) tmp;
+				tmp += info[i].nbytes;
+			}
+			else
+			{
+				values[i] = (Datum *) ptr;
+				ptr += (sizeof(Datum) * info[i].nvalues);
+
+				for (j = 0; j < info[i].nvalues; j++)
+				{
+					/* just point into the array */
+					memcpy(&values[i][j], tmp, info[i].typlen);
+					tmp += info[i].typlen;
+				}
+			}
+		}
+		else
+		{
+			/* all the other types need a chunk of the buffer */
+			values[i] = (Datum *) ptr;
+			ptr += (sizeof(Datum) * info[i].nvalues);
+
+			/* pased by reference, but fixed length (name, tid, ...) */
+			if (info[i].typlen > 0)
+			{
+				for (j = 0; j < info[i].nvalues; j++)
+				{
+					/* just point into the array */
+					values[i][j] = PointerGetDatum(tmp);
+					tmp += info[i].typlen;
+				}
+			}
+			else if (info[i].typlen == -1)
+			{
+				/* varlena */
+				for (j = 0; j < info[i].nvalues; j++)
+				{
+					/* just point into the array */
+					values[i][j] = PointerGetDatum(tmp);
+					tmp += VARSIZE_ANY(tmp);
+				}
+			}
+			else if (info[i].typlen == -2)
+			{
+				/* cstring */
+				for (j = 0; j < info[i].nvalues; j++)
+				{
+					/* just point into the array */
+					values[i][j] = PointerGetDatum(tmp);
+					tmp += (strlen(tmp) + 1);	/* don't forget the \0 */
+				}
+			}
+		}
+	}
+
+	/* we should have exhausted the buffer exactly */
+	Assert((ptr - buff) == bufflen);
+
+	/* allocate space for all the MCV items in a single piece */
+	rbufflen = (sizeof(MCVItem) + sizeof(MCVItemData) +
+				sizeof(Datum) * ndims + sizeof(bool) * ndims) * nitems;
+
+	rbuff = palloc0(rbufflen);
+	rptr = rbuff;
+
+	mcvlist->items = (MCVItem *) rbuff;
+	rptr += (sizeof(MCVItem) * nitems);
+
+	for (i = 0; i < nitems; i++)
+	{
+		MCVItem		item = (MCVItem) rptr;
+
+		rptr += (sizeof(MCVItemData));
+
+		item->values = (Datum *) rptr;
+		rptr += (sizeof(Datum) * ndims);
+
+		item->isnull = (bool *) rptr;
+		rptr += (sizeof(bool) * ndims);
+
+		/* just point to the right place */
+		indexes = ITEM_INDEXES(tmp);
+
+		memcpy(item->isnull, ITEM_NULLS(tmp, ndims), sizeof(bool) * ndims);
+		memcpy(&item->frequency, ITEM_FREQUENCY(tmp, ndims), sizeof(double));
+
+#ifdef ASSERT_CHECKING
+		for (j = 0; j < ndims; j++)
+			Assert(indexes[j] <= UINT16_MAX);
+#endif
+
+		/* translate the values */
+		for (j = 0; j < ndims; j++)
+			if (!item->isnull[j])
+				item->values[j] = values[j][indexes[j]];
+
+		mcvlist->items[i] = item;
+
+		tmp += ITEM_SIZE(ndims);
+
+		Assert(tmp <= (char *) data + VARSIZE_ANY(data));
+	}
+
+	/* check that we processed all the data */
+	Assert(tmp == (char *) data + VARSIZE_ANY(data));
+
+	/* release the temporary buffer */
+	pfree(buff);
+
+	return mcvlist;
+}
+
+/*
+ * SRF with details about buckets of a histogram:
+ *
+ * - item ID (0...nitems)
+ * - values (string array)
+ * - nulls only (boolean array)
+ * - frequency (double precision)
+ *
+ * The input is the OID of the statistics, and there are no rows returned if
+ * the statistics contains no histogram.
+ */
+PG_FUNCTION_INFO_V1(pg_mv_mcv_items);
+
+Datum
+pg_mv_mcv_items(PG_FUNCTION_ARGS)
+{
+	FuncCallContext *funcctx;
+	int			call_cntr;
+	int			max_calls;
+	TupleDesc	tupdesc;
+	AttInMetadata *attinmeta;
+
+	/* stuff done only on the first call of the function */
+	if (SRF_IS_FIRSTCALL())
+	{
+		MemoryContext oldcontext;
+		MCVList		mcvlist;
+
+		/* create a function context for cross-call persistence */
+		funcctx = SRF_FIRSTCALL_INIT();
+
+		/* switch to memory context appropriate for multiple function calls */
+		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+		mcvlist = load_mv_mcvlist(PG_GETARG_OID(0));
+
+		funcctx->user_fctx = mcvlist;
+
+		/* total number of tuples to be returned */
+		funcctx->max_calls = 0;
+		if (funcctx->user_fctx != NULL)
+			funcctx->max_calls = mcvlist->nitems;
+
+		/* Build a tuple descriptor for our result type */
+		if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("function returning record called in context "
+							"that cannot accept type record")));
+
+		/* build metadata needed later to produce tuples from raw C-strings */
+		attinmeta = TupleDescGetAttInMetadata(tupdesc);
+		funcctx->attinmeta = attinmeta;
+
+		MemoryContextSwitchTo(oldcontext);
+	}
+
+	/* stuff done on every call of the function */
+	funcctx = SRF_PERCALL_SETUP();
+
+	call_cntr = funcctx->call_cntr;
+	max_calls = funcctx->max_calls;
+	attinmeta = funcctx->attinmeta;
+
+	if (call_cntr < max_calls)	/* do when there is more left to send */
+	{
+		char	  **values;
+		HeapTuple	tuple;
+		Datum		result;
+		int2vector *stakeys;
+		Oid			relid;
+
+		char	   *buff = palloc0(1024);
+		char	   *format;
+
+		int			i;
+
+		Oid		   *outfuncs;
+		FmgrInfo   *fmgrinfo;
+
+		MCVList		mcvlist;
+		MCVItem		item;
+
+		mcvlist = (MCVList) funcctx->user_fctx;
+
+		Assert(call_cntr < mcvlist->nitems);
+
+		item = mcvlist->items[call_cntr];
+
+		stakeys = find_mv_attnums(PG_GETARG_OID(0), &relid);
+
+		/*
+		 * Prepare a values array for building the returned tuple. This should
+		 * be an array of C strings which will be processed later by the type
+		 * input functions.
+		 */
+		values = (char **) palloc(4 * sizeof(char *));
+
+		values[0] = (char *) palloc(64 * sizeof(char));
+
+		/* arrays */
+		values[1] = (char *) palloc0(1024 * sizeof(char));
+		values[2] = (char *) palloc0(1024 * sizeof(char));
+
+		/* frequency */
+		values[3] = (char *) palloc(64 * sizeof(char));
+
+		outfuncs = (Oid *) palloc0(sizeof(Oid) * mcvlist->ndimensions);
+		fmgrinfo = (FmgrInfo *) palloc0(sizeof(FmgrInfo) * mcvlist->ndimensions);
+
+		for (i = 0; i < mcvlist->ndimensions; i++)
+		{
+			bool		isvarlena;
+
+			getTypeOutputInfo(get_atttype(relid, stakeys->values[i]),
+							  &outfuncs[i], &isvarlena);
+
+			fmgr_info(outfuncs[i], &fmgrinfo[i]);
+		}
+
+		snprintf(values[0], 64, "%d", call_cntr);		/* item ID */
+
+		for (i = 0; i < mcvlist->ndimensions; i++)
+		{
+			Datum		val,
+						valout;
+
+			format = "%s, %s";
+			if (i == 0)
+				format = "{%s%s";
+			else if (i == mcvlist->ndimensions - 1)
+				format = "%s, %s}";
+
+			if (item->isnull[i])
+				valout = CStringGetDatum("NULL");
+			else
+			{
+				val = item->values[i];
+				valout = FunctionCall1(&fmgrinfo[i], val);
+			}
+
+			snprintf(buff, 1024, format, values[1], DatumGetPointer(valout));
+			strncpy(values[1], buff, 1023);
+			buff[0] = '\0';
+
+			snprintf(buff, 1024, format, values[2], item->isnull[i] ? "t" : "f");
+			strncpy(values[2], buff, 1023);
+			buff[0] = '\0';
+		}
+
+		snprintf(values[3], 64, "%f", item->frequency); /* frequency */
+
+		/* build a tuple */
+		tuple = BuildTupleFromCStrings(attinmeta, values);
+
+		/* make the tuple into a datum */
+		result = HeapTupleGetDatum(tuple);
+
+		/* clean up (this is not really necessary) */
+		pfree(values[0]);
+		pfree(values[1]);
+		pfree(values[2]);
+		pfree(values[3]);
+
+		pfree(values);
+
+		SRF_RETURN_NEXT(funcctx, result);
+	}
+	else	/* do when there is no more left */
+	{
+		SRF_RETURN_DONE(funcctx);
+	}
+}
+
+/*
+ * pg_mcv_list_in		- input routine for type PG_MCV_LIST.
+ *
+ * pg_mcv_list is real enough to be a table column, but it has no operations
+ * of its own, and disallows input too
+ *
+ * XXX This is inspired by what pg_node_tree does.
+ */
+Datum
+pg_mcv_list_in(PG_FUNCTION_ARGS)
+{
+	/*
+	 * pg_node_list stores the data in binary form and parsing text input is
+	 * not needed, so disallow this.
+	 */
+	ereport(ERROR,
+			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+			 errmsg("cannot accept a value of type %s", "pg_mcv_list")));
+
+	PG_RETURN_VOID();			/* keep compiler quiet */
+}
+
+
+/*
+ * pg_mcv_list_out		- output routine for type PG_MCV_LIST.
+ *
+ * MCV lists are serialized into a bytea value, so we simply call byteaout()
+ * to serialize the value into text. But it'd be nice to serialize that into
+ * a meaningful representation (e.g. for inspection by people).
+ *
+ * FIXME not implemented yet, returning dummy value
+ */
+Datum
+pg_mcv_list_out(PG_FUNCTION_ARGS)
+{
+	return byteaout(fcinfo);
+}
+
+/*
+ * pg_mcv_list_recv		- binary input routine for type PG_MCV_LIST.
+ */
+Datum
+pg_mcv_list_recv(PG_FUNCTION_ARGS)
+{
+	ereport(ERROR,
+			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+			 errmsg("cannot accept a value of type %s", "pg_mcv_list")));
+
+	PG_RETURN_VOID();			/* keep compiler quiet */
+}
+
+/*
+ * pg_mcv_list_send		- binary output routine for type PG_MCV_LIST.
+ *
+ * XXX MCV lists are serialized into a bytea value, so let's just send that.
+ */
+Datum
+pg_mcv_list_send(PG_FUNCTION_ARGS)
+{
+	return byteasend(fcinfo);
+}
diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c
index e7d5b51..db74d93 100644
--- a/src/bin/psql/describe.c
+++ b/src/bin/psql/describe.c
@@ -2298,8 +2298,8 @@ describeOneTableDetails(const char *schemaname,
 		{
 			printfPQExpBuffer(&buf,
 							  "SELECT oid, stanamespace::regnamespace AS nsp, staname, stakeys,\n"
-							  "  ndist_enabled,\n"
-							  "  ndist_built,\n"
+							  "  ndist_enabled, deps_enabled, mcv_enabled,\n"
+							  "  ndist_built, deps_built, mcv_built,\n"
 							  "  (SELECT string_agg(attname::text,', ')\n"
 						   "    FROM ((SELECT unnest(stakeys) AS attnum) s\n"
 							  "         JOIN pg_attribute a ON (starelid = a.attrelid and a.attnum = s.attnum))) AS attnums\n"
@@ -2317,6 +2317,8 @@ describeOneTableDetails(const char *schemaname,
 				printTableAddFooter(&cont, _("Statistics:"));
 				for (i = 0; i < tuples; i++)
 				{
+					bool		first = true;
+
 					printfPQExpBuffer(&buf, "    ");
 
 					/* statistics name (qualified with namespace) */
@@ -2326,10 +2328,22 @@ describeOneTableDetails(const char *schemaname,
 
 					/* options */
 					if (!strcmp(PQgetvalue(result, i, 4), "t"))
-						appendPQExpBuffer(&buf, "(dependencies)");
+					{
+						appendPQExpBuffer(&buf, "(dependencies");
+						first = false;
+					}
+
+					if (!strcmp(PQgetvalue(result, i, 5), "t"))
+					{
+						if (!first)
+							appendPQExpBuffer(&buf, ", mcv");
+						else
+							appendPQExpBuffer(&buf, "(mcv");
+						first = false;
+					}
 
-					appendPQExpBuffer(&buf, " ON (%s)",
-									  PQgetvalue(result, i, 6));
+					appendPQExpBuffer(&buf, ") ON (%s)",
+									  PQgetvalue(result, i, 9));
 
 					printTableAddFooter(&cont, buf.data);
 				}
diff --git a/src/include/catalog/pg_cast.h b/src/include/catalog/pg_cast.h
index 22fa4b8..80d8ea2 100644
--- a/src/include/catalog/pg_cast.h
+++ b/src/include/catalog/pg_cast.h
@@ -262,6 +262,11 @@ DATA(insert (  3353	 25    0 i i ));
 DATA(insert (  3358	 17    0 i b ));
 DATA(insert (  3358	 25    0 i i ));
 
+/* pg_mcv_list can be coerced to, but not from, bytea and text */
+DATA(insert (  441	 17    0 i b ));
+DATA(insert (  441	 25    0 i i ));
+
+
 /*
  * Datetime category
  */
diff --git a/src/include/catalog/pg_mv_statistic.h b/src/include/catalog/pg_mv_statistic.h
index e119cb7..34049d6 100644
--- a/src/include/catalog/pg_mv_statistic.h
+++ b/src/include/catalog/pg_mv_statistic.h
@@ -39,10 +39,12 @@ CATALOG(pg_mv_statistic,3381)
 	/* statistics requested to build */
 	bool		ndist_enabled;	/* build ndist coefficient? */
 	bool		deps_enabled;	/* analyze dependencies? */
+	bool		mcv_enabled;	/* build MCV list? */
 
 	/* statistics that are available (if requested) */
 	bool		ndist_built;	/* ndistinct coeff built */
 	bool		deps_built;		/* dependencies were built */
+	bool		mcv_built;		/* MCV list was built */
 
 	/*
 	 * variable-length fields start here, but we allow direct access to
@@ -53,6 +55,7 @@ CATALOG(pg_mv_statistic,3381)
 #ifdef CATALOG_VARLEN
 	pg_ndistinct		standist;		/* ndistinct coeff (serialized) */
 	pg_dependencies		stadeps;		/* dependencies (serialized) */
+	pg_mcv_list			stamcv;			/* MCV list (serialized) */
 #endif
 
 } FormData_pg_mv_statistic;
@@ -68,17 +71,20 @@ typedef FormData_pg_mv_statistic *Form_pg_mv_statistic;
  *		compiler constants for pg_mv_statistic
  * ----------------
  */
-#define Natts_pg_mv_statistic					11
+#define Natts_pg_mv_statistic					14
 #define Anum_pg_mv_statistic_starelid			1
 #define Anum_pg_mv_statistic_staname			2
 #define Anum_pg_mv_statistic_stanamespace		3
 #define Anum_pg_mv_statistic_staowner			4
 #define Anum_pg_mv_statistic_ndist_enabled		5
 #define Anum_pg_mv_statistic_deps_enabled		6
-#define Anum_pg_mv_statistic_ndist_built		7
-#define Anum_pg_mv_statistic_deps_built			8
-#define Anum_pg_mv_statistic_stakeys			9
-#define Anum_pg_mv_statistic_standist			10
-#define Anum_pg_mv_statistic_stadeps			11
+#define Anum_pg_mv_statistic_mcv_enabled		7
+#define Anum_pg_mv_statistic_ndist_built		8
+#define Anum_pg_mv_statistic_deps_built			9
+#define Anum_pg_mv_statistic_mcv_built			10
+#define Anum_pg_mv_statistic_stakeys			11
+#define Anum_pg_mv_statistic_standist			12
+#define Anum_pg_mv_statistic_stadeps			13
+#define Anum_pg_mv_statistic_stamcv				14
 
 #endif   /* PG_MV_STATISTIC_H */
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index b1f7b75..7cf1e5a 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -2726,6 +2726,11 @@ DESCR("current user privilege on any column by rel name");
 DATA(insert OID = 3029 (  has_any_column_privilege	   PGNSP PGUID 12 10 0 0 0 f f f f t f s s 2 0 16 "26 25" _null_ _null_ _null_ _null_ _null_ has_any_column_privilege_id _null_ _null_ _null_ ));
 DESCR("current user privilege on any column by rel oid");
 
+DATA(insert OID = 3376 (  pg_mv_stats_mcvlist_info	PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 25 "441" _null_ _null_ _null_ _null_ _null_ pg_mv_stats_mcvlist_info _null_ _null_ _null_ ));
+DESCR("multi-variate statistics: MCV list info");
+DATA(insert OID = 3373 (  pg_mv_mcv_items PGNSP PGUID 12 1 1000 0 0 f f f f t t i s 1 0 2249 "26" "{26,23,1009,1000,701}" "{i,o,o,o,o}" "{oid,index,values,nulls,frequency}" _null_ _null_ pg_mv_mcv_items _null_ _null_ _null_ ));
+DESCR("details about MCV list items");
+
 DATA(insert OID = 3354 (  pg_ndistinct_in	PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 3353 "2275" _null_ _null_ _null_ _null_ _null_ pg_ndistinct_in _null_ _null_ _null_ ));
 DESCR("I/O");
 DATA(insert OID = 3355 (  pg_ndistinct_out	PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 2275 "3353" _null_ _null_ _null_ _null_ _null_ pg_ndistinct_out _null_ _null_ _null_ ));
@@ -2744,6 +2749,15 @@ DESCR("I/O");
 DATA(insert OID = 3362 (  pg_dependencies_send	PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 17 "3358" _null_ _null_ _null_ _null_ _null_	pg_dependencies_send _null_ _null_ _null_ ));
 DESCR("I/O");
 
+DATA(insert OID = 442 (  pg_mcv_list_in	PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 441 "2275" _null_ _null_ _null_ _null_ _null_ pg_mcv_list_in _null_ _null_ _null_ ));
+DESCR("I/O");
+DATA(insert OID = 443 (  pg_mcv_list_out	PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 2275 "441" _null_ _null_ _null_ _null_ _null_ pg_mcv_list_out _null_ _null_ _null_ ));
+DESCR("I/O");
+DATA(insert OID = 444 (  pg_mcv_list_recv	PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 441 "2281" _null_ _null_ _null_ _null_ _null_ pg_mcv_list_recv _null_ _null_ _null_ ));
+DESCR("I/O");
+DATA(insert OID = 445 (  pg_mcv_list_send	PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 17 "441" _null_ _null_ _null_ _null_ _null_	pg_mcv_list_send _null_ _null_ _null_ ));
+DESCR("I/O");
+
 DATA(insert OID = 1928 (  pg_stat_get_numscans			PGNSP PGUID 12 1 0 0 0 f f f f t f s r 1 0 20 "26" _null_ _null_ _null_ _null_ _null_ pg_stat_get_numscans _null_ _null_ _null_ ));
 DESCR("statistics: number of scans done for table/index");
 DATA(insert OID = 1929 (  pg_stat_get_tuples_returned	PGNSP PGUID 12 1 0 0 0 f f f f t f s r 1 0 20 "26" _null_ _null_ _null_ _null_ _null_ pg_stat_get_tuples_returned _null_ _null_ _null_ ));
diff --git a/src/include/catalog/pg_type.h b/src/include/catalog/pg_type.h
index da637d4..fbac135 100644
--- a/src/include/catalog/pg_type.h
+++ b/src/include/catalog/pg_type.h
@@ -372,6 +372,10 @@ DATA(insert OID = 3358 ( pg_dependencies		PGNSP PGUID -1 f b S f t \054 0 0 0 pg
 DESCR("multivariate histogram");
 #define PGDEPENDENCIESOID	3358
 
+DATA(insert OID = 441 ( pg_mcv_list		PGNSP PGUID -1 f b S f t \054 0 0 0 pg_mcv_list_in pg_mcv_list_out pg_mcv_list_recv pg_mcv_list_send - - - i x f 0 -1 0 100 _null_ _null_ _null_ ));
+DESCR("multivariate MCV list");
+#define PGMCVLISTOID	441
+
 DATA(insert OID = 32 ( pg_ddl_command	PGNSP PGUID SIZEOF_POINTER t p P f t \054 0 0 0 pg_ddl_command_in pg_ddl_command_out pg_ddl_command_recv pg_ddl_command_send - - - ALIGNOF_POINTER p f 0 -1 0 0 _null_ _null_ _null_ ));
 DESCR("internal type for passing CollectedCommand");
 #define PGDDLCOMMANDOID 32
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index 56957e8..d912827 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -681,12 +681,14 @@ typedef struct MVStatisticInfo
 	RelOptInfo *rel;			/* back-link to index's table */
 
 	/* enabled statistics */
-	bool		deps_enabled;	/* functional dependencies enabled */
 	bool		ndist_enabled;	/* ndistinct coefficient enabled */
+	bool		deps_enabled;	/* functional dependencies enabled */
+	bool		mcv_enabled;	/* MCV list enabled */
 
 	/* built/available statistics */
-	bool		deps_built;		/* functional dependencies built */
 	bool		ndist_built;	/* ndistinct coefficient built */
+	bool		deps_built;		/* functional dependencies built */
+	bool		mcv_built;		/* MCV list built */
 
 	/* columns in the statistics (attnums) */
 	int2vector *stakeys;		/* attnums of the columns covered */
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index 9ffd80c..9ed080a 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -77,6 +77,10 @@ extern Datum pg_dependencies_in(PG_FUNCTION_ARGS);
 extern Datum pg_dependencies_out(PG_FUNCTION_ARGS);
 extern Datum pg_dependencies_recv(PG_FUNCTION_ARGS);
 extern Datum pg_dependencies_send(PG_FUNCTION_ARGS);
+extern Datum pg_mcv_list_in(PG_FUNCTION_ARGS);
+extern Datum pg_mcv_list_out(PG_FUNCTION_ARGS);
+extern Datum pg_mcv_list_recv(PG_FUNCTION_ARGS);
+extern Datum pg_mcv_list_send(PG_FUNCTION_ARGS);
 
 /* regexp.c */
 extern char *regexp_fixed_prefix(text *text_re, bool case_insensitive,
diff --git a/src/include/utils/mvstats.h b/src/include/utils/mvstats.h
index b230747..0c4f621 100644
--- a/src/include/utils/mvstats.h
+++ b/src/include/utils/mvstats.h
@@ -17,6 +17,14 @@
 #include "fmgr.h"
 #include "commands/vacuum.h"
 
+/*
+ * Degree of how much MCV item matches a clause.
+ * This is then considered when computing the selectivity.
+ */
+#define MVSTATS_MATCH_NONE		0		/* no match at all */
+#define MVSTATS_MATCH_PARTIAL	1		/* partial match */
+#define MVSTATS_MATCH_FULL		2		/* full match */
+
 #define MVSTATS_MAX_DIMENSIONS	8		/* max number of attributes */
 
 #define MVSTAT_NDISTINCT_MAGIC		0xA352BFA4		/* marks serialized bytea */
@@ -65,6 +73,42 @@ typedef struct MVDependenciesData
 
 typedef MVDependenciesData *MVDependencies;
 
+
+/* used to flag stats serialized to bytea */
+#define MVSTAT_MCV_MAGIC		0xE1A651C2		/* marks serialized bytea */
+#define MVSTAT_MCV_TYPE_BASIC	1				/* basic MCV list type */
+
+/* max items in MCV list (mostly arbitrary number */
+#define MVSTAT_MCVLIST_MAX_ITEMS	8192
+
+/*
+ * Multivariate MCV (most-common value) lists
+ *
+ * A straight-forward extension of MCV items - i.e. a list (array) of
+ * combinations of attribute values, together with a frequency and
+ * null flags.
+ */
+typedef struct MCVItemData
+{
+	double		frequency;		/* frequency of this combination */
+	bool	   *isnull;			/* lags of NULL values (up to 32 columns) */
+	Datum	   *values;			/* variable-length (ndimensions) */
+} MCVItemData;
+
+typedef MCVItemData *MCVItem;
+
+/* multivariate MCV list - essentally an array of MCV items */
+typedef struct MCVListData
+{
+	uint32		magic;			/* magic constant marker */
+	uint32		type;			/* type of MCV list (BASIC) */
+	uint32		ndimensions;	/* number of dimensions */
+	uint32		nitems;			/* number of MCV items in the array */
+	MCVItem    *items;			/* array of MCV items */
+} MCVListData;
+
+typedef MCVListData *MCVList;
+
 bool dependency_implies_attribute(MVDependency dependency, AttrNumber attnum,
 								  int16 *attmap);
 bool dependency_is_fully_matched(MVDependency dependency, Bitmapset *attnums,
@@ -72,13 +116,30 @@ bool dependency_is_fully_matched(MVDependency dependency, Bitmapset *attnums,
 
 MVNDistinct		load_mv_ndistinct(Oid mvoid);
 MVDependencies	load_mv_dependencies(Oid mvoid);
+MCVList			load_mv_mcvlist(Oid mvoid);
 
 bytea *serialize_mv_ndistinct(MVNDistinct ndistinct);
 bytea *serialize_mv_dependencies(MVDependencies dependencies);
+bytea *serialize_mv_mcvlist(MCVList mcvlist, int2vector *attrs,
+							VacAttrStats **stats);
 
 /* deserialization of stats (serialization is private to analyze) */
 MVNDistinct deserialize_mv_ndistinct(bytea *data);
 MVDependencies deserialize_mv_dependencies(bytea *data);
+MCVList deserialize_mv_mcvlist(bytea *data);
+
+/*
+ * Returns index of the attribute number within the vector (i.e. a
+ * dimension within the stats).
+ */
+int mv_get_index(AttrNumber varattno, int2vector *stakeys);
+
+int2vector *find_mv_attnums(Oid mvoid, Oid *relid);
+
+/* functions for inspecting the statistics */
+extern Datum pg_mv_stats_mcvlist_info(PG_FUNCTION_ARGS);
+extern Datum pg_mv_mcvlist_items(PG_FUNCTION_ARGS);
+
 
 MVNDistinct build_mv_ndistinct(double totalrows, int numrows, HeapTuple *rows,
 							   int2vector *attrs, VacAttrStats **stats);
@@ -87,6 +148,9 @@ MVDependencies build_mv_dependencies(int numrows, HeapTuple *rows,
 					  int2vector *attrs,
 					  VacAttrStats **stats);
 
+MCVList build_mv_mcvlist(int numrows, HeapTuple *rows, int2vector *attrs,
+				 VacAttrStats **stats, int *numrows_filtered);
+
 void build_mv_stats(Relation onerel, double totalrows,
 			   int numrows, HeapTuple *rows,
 			   int natts, VacAttrStats **vacattrstats);
diff --git a/src/test/regress/expected/mv_mcv.out b/src/test/regress/expected/mv_mcv.out
new file mode 100644
index 0000000..d8ba619
--- /dev/null
+++ b/src/test/regress/expected/mv_mcv.out
@@ -0,0 +1,198 @@
+-- data type passed by value
+CREATE TABLE mcv_list (
+    a INT,
+    b INT,
+    c INT
+);
+-- unknown column
+CREATE STATISTICS s4 WITH (mcv) ON (unknown_column) FROM mcv_list;
+ERROR:  column "unknown_column" referenced in statistics does not exist
+-- single column
+CREATE STATISTICS s4 WITH (mcv) ON (a) FROM mcv_list;
+ERROR:  statistics require at least 2 columns
+-- single column, duplicated
+CREATE STATISTICS s4 WITH (mcv) ON (a, a) FROM mcv_list;
+ERROR:  duplicate column name in statistics definition
+-- two columns, one duplicated
+CREATE STATISTICS s4 WITH (mcv) ON (a, a, b) FROM mcv_list;
+ERROR:  duplicate column name in statistics definition
+-- unknown option
+CREATE STATISTICS s4 WITH (unknown_option) ON (a, b, c) FROM mcv_list;
+ERROR:  unrecognized STATISTICS option "unknown_option"
+-- correct command
+CREATE STATISTICS s4 WITH (mcv) ON (a, b, c) FROM mcv_list;
+-- random data
+INSERT INTO mcv_list
+     SELECT mod(i, 111), mod(i, 123), mod(i, 23) FROM generate_series(1,10000) s(i);
+ANALYZE mcv_list;
+SELECT mcv_enabled, mcv_built, pg_mv_stats_mcvlist_info(stamcv)
+  FROM pg_mv_statistic WHERE starelid = 'mcv_list'::regclass;
+ mcv_enabled | mcv_built | pg_mv_stats_mcvlist_info 
+-------------+-----------+--------------------------
+ t           | f         | 
+(1 row)
+
+TRUNCATE mcv_list;
+-- a => b, a => c, b => c
+INSERT INTO mcv_list
+     SELECT i/10, i/100, i/200 FROM generate_series(1,10000) s(i);
+ANALYZE mcv_list;
+SELECT mcv_enabled, mcv_built, pg_mv_stats_mcvlist_info(stamcv)
+  FROM pg_mv_statistic WHERE starelid = 'mcv_list'::regclass;
+ mcv_enabled | mcv_built | pg_mv_stats_mcvlist_info 
+-------------+-----------+--------------------------
+ t           | t         | nitems=1000
+(1 row)
+
+TRUNCATE mcv_list;
+-- a => b, a => c
+INSERT INTO mcv_list
+     SELECT i/10, i/150, i/200 FROM generate_series(1,10000) s(i);
+ANALYZE mcv_list;
+SELECT mcv_enabled, mcv_built, pg_mv_stats_mcvlist_info(stamcv)
+  FROM pg_mv_statistic WHERE starelid = 'mcv_list'::regclass;
+ mcv_enabled | mcv_built | pg_mv_stats_mcvlist_info 
+-------------+-----------+--------------------------
+ t           | t         | nitems=1000
+(1 row)
+
+TRUNCATE mcv_list;
+-- check explain (expect bitmap index scan, not plain index scan)
+INSERT INTO mcv_list
+     SELECT i/100, i/200, i/400 FROM generate_series(1,10000) s(i);
+CREATE INDEX mcv_idx ON mcv_list (a, b);
+ANALYZE mcv_list;
+SELECT mcv_enabled, mcv_built, pg_mv_stats_mcvlist_info(stamcv)
+  FROM pg_mv_statistic WHERE starelid = 'mcv_list'::regclass;
+ mcv_enabled | mcv_built | pg_mv_stats_mcvlist_info 
+-------------+-----------+--------------------------
+ t           | t         | nitems=100
+(1 row)
+
+EXPLAIN (COSTS off)
+ SELECT * FROM mcv_list WHERE a = 10 AND b = 5;
+                 QUERY PLAN                 
+--------------------------------------------
+ Bitmap Heap Scan on mcv_list
+   Recheck Cond: ((a = 10) AND (b = 5))
+   ->  Bitmap Index Scan on mcv_idx
+         Index Cond: ((a = 10) AND (b = 5))
+(4 rows)
+
+DROP TABLE mcv_list;
+-- varlena type (text)
+CREATE TABLE mcv_list (
+    a TEXT,
+    b TEXT,
+    c TEXT
+);
+CREATE STATISTICS s5 WITH (mcv) ON (a, b, c) FROM mcv_list;
+-- random data
+INSERT INTO mcv_list
+     SELECT mod(i, 111), mod(i, 123), mod(i, 23) FROM generate_series(1,10000) s(i);
+ANALYZE mcv_list;
+SELECT mcv_enabled, mcv_built, pg_mv_stats_mcvlist_info(stamcv)
+  FROM pg_mv_statistic WHERE starelid = 'mcv_list'::regclass;
+ mcv_enabled | mcv_built | pg_mv_stats_mcvlist_info 
+-------------+-----------+--------------------------
+ t           | f         | 
+(1 row)
+
+TRUNCATE mcv_list;
+-- a => b, a => c, b => c
+INSERT INTO mcv_list
+     SELECT i/10, i/100, i/200 FROM generate_series(1,10000) s(i);
+ANALYZE mcv_list;
+SELECT mcv_enabled, mcv_built, pg_mv_stats_mcvlist_info(stamcv)
+  FROM pg_mv_statistic WHERE starelid = 'mcv_list'::regclass;
+ mcv_enabled | mcv_built | pg_mv_stats_mcvlist_info 
+-------------+-----------+--------------------------
+ t           | t         | nitems=1000
+(1 row)
+
+TRUNCATE mcv_list;
+-- a => b, a => c
+INSERT INTO mcv_list
+     SELECT i/10, i/150, i/200 FROM generate_series(1,10000) s(i);
+ANALYZE mcv_list;
+SELECT mcv_enabled, mcv_built, pg_mv_stats_mcvlist_info(stamcv)
+  FROM pg_mv_statistic WHERE starelid = 'mcv_list'::regclass;
+ mcv_enabled | mcv_built | pg_mv_stats_mcvlist_info 
+-------------+-----------+--------------------------
+ t           | t         | nitems=1000
+(1 row)
+
+TRUNCATE mcv_list;
+-- check explain (expect bitmap index scan, not plain index scan)
+INSERT INTO mcv_list
+     SELECT i/100, i/200, i/400 FROM generate_series(1,10000) s(i);
+CREATE INDEX mcv_idx ON mcv_list (a, b);
+ANALYZE mcv_list;
+SELECT mcv_enabled, mcv_built, pg_mv_stats_mcvlist_info(stamcv)
+  FROM pg_mv_statistic WHERE starelid = 'mcv_list'::regclass;
+ mcv_enabled | mcv_built | pg_mv_stats_mcvlist_info 
+-------------+-----------+--------------------------
+ t           | t         | nitems=100
+(1 row)
+
+EXPLAIN (COSTS off)
+ SELECT * FROM mcv_list WHERE a = '10' AND b = '5';
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Bitmap Heap Scan on mcv_list
+   Recheck Cond: ((a = '10'::text) AND (b = '5'::text))
+   ->  Bitmap Index Scan on mcv_idx
+         Index Cond: ((a = '10'::text) AND (b = '5'::text))
+(4 rows)
+
+TRUNCATE mcv_list;
+-- check explain (expect bitmap index scan, not plain index scan) with NULLs
+INSERT INTO mcv_list
+     SELECT
+       (CASE WHEN i/100 = 0 THEN NULL ELSE i/100 END),
+       (CASE WHEN i/200 = 0 THEN NULL ELSE i/200 END),
+       (CASE WHEN i/400 = 0 THEN NULL ELSE i/400 END)
+     FROM generate_series(1,10000) s(i);
+ANALYZE mcv_list;
+SELECT mcv_enabled, mcv_built, pg_mv_stats_mcvlist_info(stamcv)
+  FROM pg_mv_statistic WHERE starelid = 'mcv_list'::regclass;
+ mcv_enabled | mcv_built | pg_mv_stats_mcvlist_info 
+-------------+-----------+--------------------------
+ t           | t         | nitems=100
+(1 row)
+
+EXPLAIN (COSTS off)
+ SELECT * FROM mcv_list WHERE a IS NULL AND b IS NULL;
+                    QUERY PLAN                     
+---------------------------------------------------
+ Bitmap Heap Scan on mcv_list
+   Recheck Cond: ((a IS NULL) AND (b IS NULL))
+   ->  Bitmap Index Scan on mcv_idx
+         Index Cond: ((a IS NULL) AND (b IS NULL))
+(4 rows)
+
+DROP TABLE mcv_list;
+-- NULL values (mix of int and text columns)
+CREATE TABLE mcv_list (
+    a INT,
+    b TEXT,
+    c INT,
+    d TEXT
+);
+CREATE STATISTICS s6 WITH (mcv) ON (a, b, c, d) FROM mcv_list;
+INSERT INTO mcv_list
+     SELECT
+         mod(i, 100),
+         (CASE WHEN mod(i, 200) = 0 THEN NULL ELSE mod(i,200) END),
+         mod(i, 400),
+         (CASE WHEN mod(i, 300) = 0 THEN NULL ELSE mod(i,600) END)
+     FROM generate_series(1,10000) s(i);
+ANALYZE mcv_list;
+SELECT mcv_enabled, mcv_built, pg_mv_stats_mcvlist_info(stamcv)
+  FROM pg_mv_statistic WHERE starelid = 'mcv_list'::regclass;
+ mcv_enabled | mcv_built | pg_mv_stats_mcvlist_info 
+-------------+-----------+--------------------------
+ t           | t         | nitems=1200
+(1 row)
+
+DROP TABLE mcv_list;
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index db1cf8a..9969c10 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -819,11 +819,12 @@ WHERE c.castmethod = 'b' AND
  pg_node_tree      | text              |        0 | i
  pg_ndistinct      | bytea             |        0 | i
  pg_dependencies   | bytea             |        0 | i
+ pg_mcv_list       | bytea             |        0 | i
  cidr              | inet              |        0 | i
  xml               | text              |        0 | a
  xml               | character varying |        0 | a
  xml               | character         |        0 | a
-(9 rows)
+(10 rows)
 
 -- **************** pg_conversion ****************
 -- Look for illegal values in pg_conversion fields.
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 39179a6..2e3c40e 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1381,7 +1381,9 @@ pg_mv_stats| SELECT n.nspname AS schemaname,
     s.staname,
     s.stakeys AS attnums,
     length((s.standist)::bytea) AS ndistbytes,
-    length((s.stadeps)::bytea) AS depsbytes
+    length((s.stadeps)::bytea) AS depsbytes,
+    length((s.stamcv)::bytea) AS mcvbytes,
+    pg_mv_stats_mcvlist_info(s.stamcv) AS mcvinfo
    FROM ((pg_mv_statistic s
      JOIN pg_class c ON ((c.oid = s.starelid)))
      LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace)));
diff --git a/src/test/regress/expected/type_sanity.out b/src/test/regress/expected/type_sanity.out
index b0b40ca..dde15b9 100644
--- a/src/test/regress/expected/type_sanity.out
+++ b/src/test/regress/expected/type_sanity.out
@@ -72,8 +72,9 @@ WHERE p1.typtype not in ('c','d','p') AND p1.typname NOT LIKE E'\\_%'
   194 | pg_node_tree
  3353 | pg_ndistinct
  3358 | pg_dependencies
+  441 | pg_mcv_list
   210 | smgr
-(4 rows)
+(5 rows)
 
 -- Make sure typarray points to a varlena array type of our own base
 SELECT p1.oid, p1.typname as basetype, p2.typname as arraytype,
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index fda9166..d805840 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -118,4 +118,4 @@ test: event_trigger
 test: stats
 
 # run tests of multivariate stats
-test: mv_ndistinct mv_dependencies
+test: mv_ndistinct mv_dependencies mv_mcv
diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule
index 90d74d2..72c6acd 100644
--- a/src/test/regress/serial_schedule
+++ b/src/test/regress/serial_schedule
@@ -173,3 +173,4 @@ test: event_trigger
 test: stats
 test: mv_ndistinct
 test: mv_dependencies
+test: mv_mcv
diff --git a/src/test/regress/sql/mv_mcv.sql b/src/test/regress/sql/mv_mcv.sql
new file mode 100644
index 0000000..693288f
--- /dev/null
+++ b/src/test/regress/sql/mv_mcv.sql
@@ -0,0 +1,169 @@
+-- data type passed by value
+CREATE TABLE mcv_list (
+    a INT,
+    b INT,
+    c INT
+);
+
+-- unknown column
+CREATE STATISTICS s4 WITH (mcv) ON (unknown_column) FROM mcv_list;
+
+-- single column
+CREATE STATISTICS s4 WITH (mcv) ON (a) FROM mcv_list;
+
+-- single column, duplicated
+CREATE STATISTICS s4 WITH (mcv) ON (a, a) FROM mcv_list;
+
+-- two columns, one duplicated
+CREATE STATISTICS s4 WITH (mcv) ON (a, a, b) FROM mcv_list;
+
+-- unknown option
+CREATE STATISTICS s4 WITH (unknown_option) ON (a, b, c) FROM mcv_list;
+
+-- correct command
+CREATE STATISTICS s4 WITH (mcv) ON (a, b, c) FROM mcv_list;
+
+-- random data
+INSERT INTO mcv_list
+     SELECT mod(i, 111), mod(i, 123), mod(i, 23) FROM generate_series(1,10000) s(i);
+
+ANALYZE mcv_list;
+
+SELECT mcv_enabled, mcv_built, pg_mv_stats_mcvlist_info(stamcv)
+  FROM pg_mv_statistic WHERE starelid = 'mcv_list'::regclass;
+
+TRUNCATE mcv_list;
+
+-- a => b, a => c, b => c
+INSERT INTO mcv_list
+     SELECT i/10, i/100, i/200 FROM generate_series(1,10000) s(i);
+
+ANALYZE mcv_list;
+
+SELECT mcv_enabled, mcv_built, pg_mv_stats_mcvlist_info(stamcv)
+  FROM pg_mv_statistic WHERE starelid = 'mcv_list'::regclass;
+
+TRUNCATE mcv_list;
+
+-- a => b, a => c
+INSERT INTO mcv_list
+     SELECT i/10, i/150, i/200 FROM generate_series(1,10000) s(i);
+
+ANALYZE mcv_list;
+
+SELECT mcv_enabled, mcv_built, pg_mv_stats_mcvlist_info(stamcv)
+  FROM pg_mv_statistic WHERE starelid = 'mcv_list'::regclass;
+
+TRUNCATE mcv_list;
+
+-- check explain (expect bitmap index scan, not plain index scan)
+INSERT INTO mcv_list
+     SELECT i/100, i/200, i/400 FROM generate_series(1,10000) s(i);
+CREATE INDEX mcv_idx ON mcv_list (a, b);
+
+ANALYZE mcv_list;
+
+SELECT mcv_enabled, mcv_built, pg_mv_stats_mcvlist_info(stamcv)
+  FROM pg_mv_statistic WHERE starelid = 'mcv_list'::regclass;
+
+EXPLAIN (COSTS off)
+ SELECT * FROM mcv_list WHERE a = 10 AND b = 5;
+
+DROP TABLE mcv_list;
+
+-- varlena type (text)
+CREATE TABLE mcv_list (
+    a TEXT,
+    b TEXT,
+    c TEXT
+);
+
+CREATE STATISTICS s5 WITH (mcv) ON (a, b, c) FROM mcv_list;
+
+-- random data
+INSERT INTO mcv_list
+     SELECT mod(i, 111), mod(i, 123), mod(i, 23) FROM generate_series(1,10000) s(i);
+
+ANALYZE mcv_list;
+
+SELECT mcv_enabled, mcv_built, pg_mv_stats_mcvlist_info(stamcv)
+  FROM pg_mv_statistic WHERE starelid = 'mcv_list'::regclass;
+
+TRUNCATE mcv_list;
+
+-- a => b, a => c, b => c
+INSERT INTO mcv_list
+     SELECT i/10, i/100, i/200 FROM generate_series(1,10000) s(i);
+
+ANALYZE mcv_list;
+
+SELECT mcv_enabled, mcv_built, pg_mv_stats_mcvlist_info(stamcv)
+  FROM pg_mv_statistic WHERE starelid = 'mcv_list'::regclass;
+
+TRUNCATE mcv_list;
+
+-- a => b, a => c
+INSERT INTO mcv_list
+     SELECT i/10, i/150, i/200 FROM generate_series(1,10000) s(i);
+ANALYZE mcv_list;
+
+SELECT mcv_enabled, mcv_built, pg_mv_stats_mcvlist_info(stamcv)
+  FROM pg_mv_statistic WHERE starelid = 'mcv_list'::regclass;
+
+TRUNCATE mcv_list;
+
+-- check explain (expect bitmap index scan, not plain index scan)
+INSERT INTO mcv_list
+     SELECT i/100, i/200, i/400 FROM generate_series(1,10000) s(i);
+CREATE INDEX mcv_idx ON mcv_list (a, b);
+ANALYZE mcv_list;
+
+SELECT mcv_enabled, mcv_built, pg_mv_stats_mcvlist_info(stamcv)
+  FROM pg_mv_statistic WHERE starelid = 'mcv_list'::regclass;
+
+EXPLAIN (COSTS off)
+ SELECT * FROM mcv_list WHERE a = '10' AND b = '5';
+
+TRUNCATE mcv_list;
+
+-- check explain (expect bitmap index scan, not plain index scan) with NULLs
+INSERT INTO mcv_list
+     SELECT
+       (CASE WHEN i/100 = 0 THEN NULL ELSE i/100 END),
+       (CASE WHEN i/200 = 0 THEN NULL ELSE i/200 END),
+       (CASE WHEN i/400 = 0 THEN NULL ELSE i/400 END)
+     FROM generate_series(1,10000) s(i);
+ANALYZE mcv_list;
+
+SELECT mcv_enabled, mcv_built, pg_mv_stats_mcvlist_info(stamcv)
+  FROM pg_mv_statistic WHERE starelid = 'mcv_list'::regclass;
+
+EXPLAIN (COSTS off)
+ SELECT * FROM mcv_list WHERE a IS NULL AND b IS NULL;
+
+DROP TABLE mcv_list;
+
+-- NULL values (mix of int and text columns)
+CREATE TABLE mcv_list (
+    a INT,
+    b TEXT,
+    c INT,
+    d TEXT
+);
+
+CREATE STATISTICS s6 WITH (mcv) ON (a, b, c, d) FROM mcv_list;
+
+INSERT INTO mcv_list
+     SELECT
+         mod(i, 100),
+         (CASE WHEN mod(i, 200) = 0 THEN NULL ELSE mod(i,200) END),
+         mod(i, 400),
+         (CASE WHEN mod(i, 300) = 0 THEN NULL ELSE mod(i,600) END)
+     FROM generate_series(1,10000) s(i);
+
+ANALYZE mcv_list;
+
+SELECT mcv_enabled, mcv_built, pg_mv_stats_mcvlist_info(stamcv)
+  FROM pg_mv_statistic WHERE starelid = 'mcv_list'::regclass;
+
+DROP TABLE mcv_list;
-- 
2.5.5