diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index 2897245..d160814 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -40,6 +40,13 @@ static void consider_parallel_nestloop(PlannerInfo *root, RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra); +static void consider_parallel_mergejoin(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + JoinType jointype, + JoinPathExtraData *extra, + Path *inner_cheapest_total); static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra); @@ -59,6 +66,14 @@ static void generate_mergejoin_paths(PlannerInfo *root, bool useallclauses, Path *inner_cheapest_total, List *merge_pathkeys); +static void generate_partial_mergejoin_paths(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *innerrel, + Path *outerpath, + JoinType jointype, + JoinPathExtraData *extra, + Path *inner_cheapest_total, + List *merge_pathkeys); /* @@ -481,6 +496,76 @@ try_mergejoin_path(PlannerInfo *root, } /* + * try_partial_mergejoin_path + * Consider a partial merge join path; if it appears useful, push it into + * the joinrel's pathlist via add_partial_path(). + */ +static void +try_partial_mergejoin_path(PlannerInfo *root, + RelOptInfo *joinrel, + Path *outer_path, + Path *inner_path, + List *pathkeys, + List *mergeclauses, + List *outersortkeys, + List *innersortkeys, + JoinType jointype, + JoinPathExtraData *extra) +{ + JoinCostWorkspace workspace; + + /* + * See comments in try_partial_hashjoin_path(). + */ + Assert(bms_is_empty(joinrel->lateral_relids)); + if (inner_path->param_info != NULL) + { + Relids inner_paramrels = inner_path->param_info->ppi_req_outer; + + if (!bms_is_empty(inner_paramrels)) + return; + } + + /* + * If the given paths are already well enough ordered, we can skip doing + * an explicit sort. + */ + if (outersortkeys && + pathkeys_contained_in(outersortkeys, outer_path->pathkeys)) + outersortkeys = NIL; + if (innersortkeys && + pathkeys_contained_in(innersortkeys, inner_path->pathkeys)) + innersortkeys = NIL; + + /* + * See comments in try_partial_nestloop_path(). + */ + initial_cost_mergejoin(root, &workspace, jointype, mergeclauses, + outer_path, inner_path, + outersortkeys, innersortkeys, + extra->sjinfo); + + if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys)) + return; + + /* Might be good enough to be worth trying, so let's try it. */ + add_partial_path(joinrel, (Path *) + create_mergejoin_path(root, + joinrel, + jointype, + &workspace, + extra->sjinfo, + outer_path, + inner_path, + extra->restrictlist, + pathkeys, + NULL, + mergeclauses, + outersortkeys, + innersortkeys)); +} + +/* * try_hashjoin_path * Consider a hash join path; if it appears useful, push it into * the joinrel's pathlist via add_path(). @@ -649,6 +734,7 @@ sort_inner_and_outer(PlannerInfo *root, JoinType jointype, JoinPathExtraData *extra) { + JoinType save_jointype = jointype; Path *outer_path; Path *inner_path; List *all_pathkeys; @@ -782,6 +868,37 @@ sort_inner_and_outer(PlannerInfo *root, innerkeys, jointype, extra); + + /* + * If the joinrel is parallel-safe, we may be able to consider a + * partial merge join. However, we can't handle JOIN_UNIQUE_OUTER, + * because the outer path will be partial, and therefore we won't be + * able to properly guarantee uniqueness. Similarly, we can't handle + * JOIN_FULL and JOIN_RIGHT, because they can produce false null + * extended rows. Also, the resulting path must not be parameterized. + */ + if (joinrel->consider_parallel && + save_jointype != JOIN_UNIQUE_OUTER && + save_jointype != JOIN_FULL && + save_jointype != JOIN_RIGHT && + outerrel->partial_pathlist != NIL && + bms_is_empty(joinrel->lateral_relids) && + inner_path->parallel_safe) + { + Path *cheapest_partial_outer = (Path *) linitial( + outerrel->partial_pathlist); + + try_partial_mergejoin_path(root, + joinrel, + cheapest_partial_outer, + inner_path, + merge_pathkeys, + cur_mergeclauses, + outerkeys, + innerkeys, + jointype, + extra); + } } } @@ -1021,6 +1138,198 @@ generate_mergejoin_paths(PlannerInfo *root, } /* + * generate_partial_mergejoin_paths + * + * As above, but it will generate partial paths. And, also in this function + * we don't handle JOIN_UNIQUE_OUTER, JOIN_FULL or JOIN_RIGHT join types + * because we don't support these join types for partial paths. + */ +static void +generate_partial_mergejoin_paths(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *innerrel, + Path *outerpath, + JoinType jointype, + JoinPathExtraData *extra, + Path *inner_cheapest_total, + List *merge_pathkeys) +{ + List *mergeclauses; + List *innersortkeys; + List *trialsortkeys; + Path *cheapest_startup_inner; + Path *cheapest_total_inner; + JoinType save_jointype = jointype; + int num_sortkeys; + int sortkeycnt; + + /* + * We should never come here for JOIN_UNIQUE_OUTER, JOIN_FULL or + * JOIN_RIGHT. + */ + Assert(jointype != JOIN_UNIQUE_OUTER); + Assert(jointype != JOIN_FULL); + Assert(jointype != JOIN_RIGHT); + + if (jointype == JOIN_UNIQUE_INNER) + jointype = JOIN_INNER; + + /* Look for useful mergeclauses (if any) */ + mergeclauses = find_mergeclauses_for_pathkeys(root, + outerpath->pathkeys, + true, + extra->mergeclause_list); + + /* + * Done with this outer path if no chance for a mergejoin. + */ + if (mergeclauses == NIL) + return; + + /* Compute the required ordering of the inner path */ + innersortkeys = make_inner_pathkeys_for_merge(root, + mergeclauses, + outerpath->pathkeys); + + /* Generate partial path if inner is parallel safe. */ + if (inner_cheapest_total->parallel_safe) + try_partial_mergejoin_path(root, + joinrel, + outerpath, + inner_cheapest_total, + merge_pathkeys, + mergeclauses, + NIL, + innersortkeys, + jointype, + extra); + + /* Can't do anything else if inner path needs to be unique'd */ + if (save_jointype == JOIN_UNIQUE_INNER) + return; + + /* + * See comments in generate_mergejoin_paths + */ + if (pathkeys_contained_in(innersortkeys, + inner_cheapest_total->pathkeys)) + { + /* inner_cheapest_total didn't require a sort */ + cheapest_startup_inner = inner_cheapest_total; + cheapest_total_inner = inner_cheapest_total; + } + else + { + /* it did require a sort, at least for the full set of keys */ + cheapest_startup_inner = NULL; + cheapest_total_inner = NULL; + } + num_sortkeys = list_length(innersortkeys); + if (num_sortkeys > 1) + trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */ + else + trialsortkeys = innersortkeys; /* won't really truncate */ + + for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--) + { + Path *innerpath; + List *newclauses = NIL; + + /* + * Look for an inner path ordered well enough for the first + * 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified + * destructively, which is why we made a copy... + */ + trialsortkeys = list_truncate(trialsortkeys, sortkeycnt); + innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist, + trialsortkeys, + NULL, + TOTAL_COST); + /* Consider only parallel safe inner path */ + if (innerpath != NULL && + innerpath->parallel_safe && + (cheapest_total_inner == NULL || + cheapest_total_inner->parallel_safe == false || + compare_path_costs(innerpath, cheapest_total_inner, + TOTAL_COST) < 0)) + + { + /* Found a cheap (or even-cheaper) sorted parallel safe path */ + /* Select the right mergeclauses, if we didn't already */ + if (sortkeycnt < num_sortkeys) + { + newclauses = find_mergeclauses_for_pathkeys(root, + trialsortkeys, + false, + mergeclauses); + Assert(newclauses != NIL); + } + else + newclauses = mergeclauses; + + try_partial_mergejoin_path(root, + joinrel, + outerpath, + innerpath, + merge_pathkeys, + newclauses, + NIL, + NIL, + jointype, + extra); + + cheapest_total_inner = innerpath; + } + /* Same on the basis of cheapest startup cost ... */ + innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist, + trialsortkeys, + NULL, + STARTUP_COST); + if (innerpath != NULL && + innerpath->parallel_safe && + (cheapest_startup_inner == NULL || + cheapest_startup_inner->parallel_safe == false || + compare_path_costs(innerpath, cheapest_startup_inner, + STARTUP_COST) < 0)) + { + /* Found a cheap (or even-cheaper) sorted parallel safe path */ + if (innerpath != cheapest_total_inner) + { + /* + * Avoid rebuilding clause list if we already made one; saves + * memory in big join trees... + */ + if (newclauses == NIL) + { + if (sortkeycnt < num_sortkeys) + { + newclauses = + find_mergeclauses_for_pathkeys(root, + trialsortkeys, + false, + mergeclauses); + Assert(newclauses != NIL); + } + else + newclauses = mergeclauses; + } + try_partial_mergejoin_path(root, + joinrel, + outerpath, + innerpath, + merge_pathkeys, + newclauses, + NIL, + NIL, + jointype, + extra); + } + cheapest_startup_inner = innerpath; + } + } +} + +/* * match_unsorted_outer * Creates possible join paths for processing a single join relation * 'joinrel' by employing either iterative substitution or @@ -1223,18 +1532,73 @@ match_unsorted_outer(PlannerInfo *root, } /* - * If the joinrel is parallel-safe and the join type supports nested - * loops, we may be able to consider a partial nestloop plan. However, we - * can't handle JOIN_UNIQUE_OUTER, because the outer path will be partial, - * and therefore we won't be able to properly guarantee uniqueness. Nor - * can we handle extra_lateral_rels, since partial paths must not be - * parameterized. + * Consider partial nestloop and mergejoin plan if outerrel has any + * partial path and the joinrel is parallel-safe. However, we can't + * handle JOIN_UNIQUE_OUTER, because the outer path will be partial, and + * therefore we won't be able to properly guarantee uniqueness. Nor can + * we handle extra_lateral_rels, since partial paths must not be + * parameterized. Similarly, we can't handle JOIN_FULL and JOIN_RIGHT, + * because they can produce false null extended rows. */ - if (joinrel->consider_parallel && nestjoinOK && - save_jointype != JOIN_UNIQUE_OUTER && - bms_is_empty(joinrel->lateral_relids)) + if (!(joinrel->consider_parallel && + save_jointype != JOIN_UNIQUE_OUTER && + save_jointype != JOIN_FULL && + save_jointype != JOIN_RIGHT && + outerrel->partial_pathlist != NIL && + bms_is_empty(joinrel->lateral_relids))) + return; + + if (nestjoinOK) consider_parallel_nestloop(root, joinrel, outerrel, innerrel, save_jointype, extra); + + /* Can't generate mergejoin path if inner rel is parameterized by outer */ + if (inner_cheapest_total != NULL) + consider_parallel_mergejoin(root, joinrel, outerrel, innerrel, + save_jointype, extra, + inner_cheapest_total); +} + +/* + * consider_parallel_mergejoin + * Try to build partial paths for a joinrel by joining a partial path + * for the outer relation to a complete path for the inner relation. + * + * 'joinrel' is the join relation + * 'outerrel' is the outer join relation + * 'innerrel' is the inner join relation + * 'jointype' is the type of join to do + * 'extra' contains additional input values + * 'inner_cheapest_total' cheapest total path for innerrel. + */ +static void +consider_parallel_mergejoin(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + JoinType jointype, + JoinPathExtraData *extra, + Path *inner_cheapest_total) +{ + ListCell *lc1; + + /* generate merge join path for each partial outer path */ + foreach(lc1, outerrel->partial_pathlist) + { + Path *outerpath = (Path *) lfirst(lc1); + List *merge_pathkeys; + + /* + * Figure out what useful ordering any paths we create will have. + */ + merge_pathkeys = build_join_pathkeys(root, joinrel, jointype, + outerpath->pathkeys); + + generate_partial_mergejoin_paths(root, joinrel, innerrel, + outerpath, jointype, extra, + inner_cheapest_total, + merge_pathkeys); + } } /* diff --git a/src/test/regress/expected/select_parallel.out b/src/test/regress/expected/select_parallel.out index a5a2232..75558d0 100644 --- a/src/test/regress/expected/select_parallel.out +++ b/src/test/regress/expected/select_parallel.out @@ -169,6 +169,31 @@ select count(*) from tenk1 where thousand > 95; reset enable_seqscan; reset enable_bitmapscan; +-- test parallel merge join path. +set enable_hashjoin to off; +set enable_nestloop to off; +explain (costs off) + select count(*) from tenk1, tenk2 where tenk1.unique1 = tenk2.unique1; + QUERY PLAN +------------------------------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 4 + -> Partial Aggregate + -> Merge Join + Merge Cond: (tenk1.unique1 = tenk2.unique1) + -> Parallel Index Only Scan using tenk1_unique1 on tenk1 + -> Index Only Scan using tenk2_unique1 on tenk2 +(8 rows) + +select count(*) from tenk1, tenk2 where tenk1.unique1 = tenk2.unique1; + count +------- + 10000 +(1 row) + +reset enable_hashjoin; +reset enable_nestloop; set force_parallel_mode=1; explain (costs off) select stringu1::int2 from tenk1 where unique1 = 1; diff --git a/src/test/regress/sql/select_parallel.sql b/src/test/regress/sql/select_parallel.sql index d72addf..ebdae7e 100644 --- a/src/test/regress/sql/select_parallel.sql +++ b/src/test/regress/sql/select_parallel.sql @@ -64,6 +64,16 @@ select count(*) from tenk1 where thousand > 95; reset enable_seqscan; reset enable_bitmapscan; +-- test parallel merge join path. +set enable_hashjoin to off; +set enable_nestloop to off; + +explain (costs off) + select count(*) from tenk1, tenk2 where tenk1.unique1 = tenk2.unique1; +select count(*) from tenk1, tenk2 where tenk1.unique1 = tenk2.unique1; + +reset enable_hashjoin; +reset enable_nestloop; set force_parallel_mode=1; explain (costs off)