From 5c64d2d3315d7e38676f349c10f94f445da2da58 Mon Sep 17 00:00:00 2001 From: Kyotaro Horiguchi Date: Thu, 19 Oct 2017 17:24:07 +0900 Subject: [PATCH v4 3/3] async postgres_fdw --- contrib/postgres_fdw/connection.c | 28 + .../postgres_fdw/expected/postgres_fdw.out | 222 ++++--- contrib/postgres_fdw/postgres_fdw.c | 601 +++++++++++++++--- contrib/postgres_fdw/postgres_fdw.h | 2 + contrib/postgres_fdw/sql/postgres_fdw.sql | 20 +- 5 files changed, 691 insertions(+), 182 deletions(-) diff --git a/contrib/postgres_fdw/connection.c b/contrib/postgres_fdw/connection.c index 52d1fe3563..d9edc5e4de 100644 --- a/contrib/postgres_fdw/connection.c +++ b/contrib/postgres_fdw/connection.c @@ -58,6 +58,7 @@ typedef struct ConnCacheEntry bool invalidated; /* true if reconnect is pending */ uint32 server_hashvalue; /* hash value of foreign server OID */ uint32 mapping_hashvalue; /* hash value of user mapping OID */ + void *storage; /* connection specific storage */ } ConnCacheEntry; /* @@ -202,6 +203,7 @@ GetConnection(UserMapping *user, bool will_prep_stmt) elog(DEBUG3, "new postgres_fdw connection %p for server \"%s\" (user mapping oid %u, userid %u)", entry->conn, server->servername, user->umid, user->userid); + entry->storage = NULL; } /* @@ -215,6 +217,32 @@ GetConnection(UserMapping *user, bool will_prep_stmt) return entry->conn; } +/* + * Returns the connection specific storage for this user. Allocate with + * initsize if not exists. + */ +void * +GetConnectionSpecificStorage(UserMapping *user, size_t initsize) +{ + bool found; + ConnCacheEntry *entry; + ConnCacheKey key; + + /* Find storage using the same key with GetConnection */ + key = user->umid; + entry = hash_search(ConnectionHash, &key, HASH_ENTER, &found); + Assert(found); + + /* Create one if not yet. */ + if (entry->storage == NULL) + { + entry->storage = MemoryContextAlloc(CacheMemoryContext, initsize); + memset(entry->storage, 0, initsize); + } + + return entry->storage; +} + /* * Connect to remote server using specified server and user mapping properties. */ diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index 82fc1290ef..29aa09db8e 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -6973,7 +6973,7 @@ INSERT INTO a(aa) VALUES('aaaaa'); INSERT INTO b(aa) VALUES('bbb'); INSERT INTO b(aa) VALUES('bbbb'); INSERT INTO b(aa) VALUES('bbbbb'); -SELECT tableoid::regclass, * FROM a; +SELECT tableoid::regclass, * FROM a ORDER BY 1, 2; tableoid | aa ----------+------- a | aaa @@ -7001,7 +7001,7 @@ SELECT tableoid::regclass, * FROM ONLY a; (3 rows) UPDATE a SET aa = 'zzzzzz' WHERE aa LIKE 'aaaa%'; -SELECT tableoid::regclass, * FROM a; +SELECT tableoid::regclass, * FROM a ORDER BY 1, 2; tableoid | aa ----------+-------- a | aaa @@ -7029,7 +7029,7 @@ SELECT tableoid::regclass, * FROM ONLY a; (3 rows) UPDATE b SET aa = 'new'; -SELECT tableoid::regclass, * FROM a; +SELECT tableoid::regclass, * FROM a ORDER BY 1, 2; tableoid | aa ----------+-------- a | aaa @@ -7057,7 +7057,7 @@ SELECT tableoid::regclass, * FROM ONLY a; (3 rows) UPDATE a SET aa = 'newtoo'; -SELECT tableoid::regclass, * FROM a; +SELECT tableoid::regclass, * FROM a ORDER BY 1, 2; tableoid | aa ----------+-------- a | newtoo @@ -7127,35 +7127,41 @@ insert into bar2 values(3,33,33); insert into bar2 values(4,44,44); insert into bar2 values(7,77,77); explain (verbose, costs off) -select * from bar where f1 in (select f1 from foo) for update; - QUERY PLAN ----------------------------------------------------------------------------------------------- +select * from bar where f1 in (select f1 from foo) order by 1 for update; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------- LockRows Output: bar.f1, bar.f2, bar.ctid, foo.ctid, bar.*, bar.tableoid, foo.*, foo.tableoid - -> Hash Join + -> Merge Join Output: bar.f1, bar.f2, bar.ctid, foo.ctid, bar.*, bar.tableoid, foo.*, foo.tableoid Inner Unique: true - Hash Cond: (bar.f1 = foo.f1) - -> Append - -> Seq Scan on public.bar bar_1 + Merge Cond: (bar.f1 = foo.f1) + -> Merge Append + Sort Key: bar.f1 + -> Sort Output: bar_1.f1, bar_1.f2, bar_1.ctid, bar_1.*, bar_1.tableoid + Sort Key: bar_1.f1 + -> Seq Scan on public.bar bar_1 + Output: bar_1.f1, bar_1.f2, bar_1.ctid, bar_1.*, bar_1.tableoid -> Foreign Scan on public.bar2 bar_2 Output: bar_2.f1, bar_2.f2, bar_2.ctid, bar_2.*, bar_2.tableoid - Remote SQL: SELECT f1, f2, f3, ctid FROM public.loct2 FOR UPDATE - -> Hash + Remote SQL: SELECT f1, f2, f3, ctid FROM public.loct2 ORDER BY f1 ASC NULLS LAST FOR UPDATE + -> Sort Output: foo.ctid, foo.f1, foo.*, foo.tableoid + Sort Key: foo.f1 -> HashAggregate Output: foo.ctid, foo.f1, foo.*, foo.tableoid Group Key: foo.f1 -> Append - -> Seq Scan on public.foo foo_1 - Output: foo_1.ctid, foo_1.f1, foo_1.*, foo_1.tableoid - -> Foreign Scan on public.foo2 foo_2 + Async subplans: 1 + -> Async Foreign Scan on public.foo2 foo_2 Output: foo_2.ctid, foo_2.f1, foo_2.*, foo_2.tableoid Remote SQL: SELECT f1, f2, f3, ctid FROM public.loct1 -(23 rows) + -> Seq Scan on public.foo foo_1 + Output: foo_1.ctid, foo_1.f1, foo_1.*, foo_1.tableoid +(29 rows) -select * from bar where f1 in (select f1 from foo) for update; +select * from bar where f1 in (select f1 from foo) order by 1 for update; f1 | f2 ----+---- 1 | 11 @@ -7165,35 +7171,41 @@ select * from bar where f1 in (select f1 from foo) for update; (4 rows) explain (verbose, costs off) -select * from bar where f1 in (select f1 from foo) for share; - QUERY PLAN ----------------------------------------------------------------------------------------------- +select * from bar where f1 in (select f1 from foo) order by 1 for share; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------- LockRows Output: bar.f1, bar.f2, bar.ctid, foo.ctid, bar.*, bar.tableoid, foo.*, foo.tableoid - -> Hash Join + -> Merge Join Output: bar.f1, bar.f2, bar.ctid, foo.ctid, bar.*, bar.tableoid, foo.*, foo.tableoid Inner Unique: true - Hash Cond: (bar.f1 = foo.f1) - -> Append - -> Seq Scan on public.bar bar_1 + Merge Cond: (bar.f1 = foo.f1) + -> Merge Append + Sort Key: bar.f1 + -> Sort Output: bar_1.f1, bar_1.f2, bar_1.ctid, bar_1.*, bar_1.tableoid + Sort Key: bar_1.f1 + -> Seq Scan on public.bar bar_1 + Output: bar_1.f1, bar_1.f2, bar_1.ctid, bar_1.*, bar_1.tableoid -> Foreign Scan on public.bar2 bar_2 Output: bar_2.f1, bar_2.f2, bar_2.ctid, bar_2.*, bar_2.tableoid - Remote SQL: SELECT f1, f2, f3, ctid FROM public.loct2 FOR SHARE - -> Hash + Remote SQL: SELECT f1, f2, f3, ctid FROM public.loct2 ORDER BY f1 ASC NULLS LAST FOR SHARE + -> Sort Output: foo.ctid, foo.f1, foo.*, foo.tableoid + Sort Key: foo.f1 -> HashAggregate Output: foo.ctid, foo.f1, foo.*, foo.tableoid Group Key: foo.f1 -> Append - -> Seq Scan on public.foo foo_1 - Output: foo_1.ctid, foo_1.f1, foo_1.*, foo_1.tableoid - -> Foreign Scan on public.foo2 foo_2 + Async subplans: 1 + -> Async Foreign Scan on public.foo2 foo_2 Output: foo_2.ctid, foo_2.f1, foo_2.*, foo_2.tableoid Remote SQL: SELECT f1, f2, f3, ctid FROM public.loct1 -(23 rows) + -> Seq Scan on public.foo foo_1 + Output: foo_1.ctid, foo_1.f1, foo_1.*, foo_1.tableoid +(29 rows) -select * from bar where f1 in (select f1 from foo) for share; +select * from bar where f1 in (select f1 from foo) order by 1 for share; f1 | f2 ----+---- 1 | 11 @@ -7223,11 +7235,12 @@ update bar set f2 = f2 + 100 where f1 in (select f1 from foo); Output: foo.ctid, foo.f1, foo.*, foo.tableoid Group Key: foo.f1 -> Append - -> Seq Scan on public.foo foo_1 - Output: foo_1.ctid, foo_1.f1, foo_1.*, foo_1.tableoid - -> Foreign Scan on public.foo2 foo_2 + Async subplans: 1 + -> Async Foreign Scan on public.foo2 foo_2 Output: foo_2.ctid, foo_2.f1, foo_2.*, foo_2.tableoid Remote SQL: SELECT f1, f2, f3, ctid FROM public.loct1 + -> Seq Scan on public.foo foo_1 + Output: foo_1.ctid, foo_1.f1, foo_1.*, foo_1.tableoid -> Hash Join Output: bar_1.f1, (bar_1.f2 + 100), bar_1.f3, bar_1.ctid, foo.ctid, foo.*, foo.tableoid Inner Unique: true @@ -7241,12 +7254,13 @@ update bar set f2 = f2 + 100 where f1 in (select f1 from foo); Output: foo.ctid, foo.f1, foo.*, foo.tableoid Group Key: foo.f1 -> Append - -> Seq Scan on public.foo foo_1 - Output: foo_1.ctid, foo_1.f1, foo_1.*, foo_1.tableoid - -> Foreign Scan on public.foo2 foo_2 + Async subplans: 1 + -> Async Foreign Scan on public.foo2 foo_2 Output: foo_2.ctid, foo_2.f1, foo_2.*, foo_2.tableoid Remote SQL: SELECT f1, f2, f3, ctid FROM public.loct1 -(39 rows) + -> Seq Scan on public.foo foo_1 + Output: foo_1.ctid, foo_1.f1, foo_1.*, foo_1.tableoid +(41 rows) update bar set f2 = f2 + 100 where f1 in (select f1 from foo); select tableoid::regclass, * from bar order by 1,2; @@ -7276,16 +7290,17 @@ where bar.f1 = ss.f1; Output: bar.f1, (bar.f2 + 100), bar.ctid, (ROW(foo.f1)) Hash Cond: (foo.f1 = bar.f1) -> Append + Async subplans: 2 + -> Async Foreign Scan on public.foo2 foo_1 + Output: ROW(foo_1.f1), foo_1.f1 + Remote SQL: SELECT f1 FROM public.loct1 + -> Async Foreign Scan on public.foo2 foo_3 + Output: ROW((foo_3.f1 + 3)), (foo_3.f1 + 3) + Remote SQL: SELECT f1 FROM public.loct1 -> Seq Scan on public.foo Output: ROW(foo.f1), foo.f1 - -> Foreign Scan on public.foo2 foo_1 - Output: ROW(foo_1.f1), foo_1.f1 - Remote SQL: SELECT f1 FROM public.loct1 -> Seq Scan on public.foo foo_2 Output: ROW((foo_2.f1 + 3)), (foo_2.f1 + 3) - -> Foreign Scan on public.foo2 foo_3 - Output: ROW((foo_3.f1 + 3)), (foo_3.f1 + 3) - Remote SQL: SELECT f1 FROM public.loct1 -> Hash Output: bar.f1, bar.f2, bar.ctid -> Seq Scan on public.bar @@ -7303,17 +7318,18 @@ where bar.f1 = ss.f1; Output: (ROW(foo.f1)), foo.f1 Sort Key: foo.f1 -> Append + Async subplans: 2 + -> Async Foreign Scan on public.foo2 foo_1 + Output: ROW(foo_1.f1), foo_1.f1 + Remote SQL: SELECT f1 FROM public.loct1 + -> Async Foreign Scan on public.foo2 foo_3 + Output: ROW((foo_3.f1 + 3)), (foo_3.f1 + 3) + Remote SQL: SELECT f1 FROM public.loct1 -> Seq Scan on public.foo Output: ROW(foo.f1), foo.f1 - -> Foreign Scan on public.foo2 foo_1 - Output: ROW(foo_1.f1), foo_1.f1 - Remote SQL: SELECT f1 FROM public.loct1 -> Seq Scan on public.foo foo_2 Output: ROW((foo_2.f1 + 3)), (foo_2.f1 + 3) - -> Foreign Scan on public.foo2 foo_3 - Output: ROW((foo_3.f1 + 3)), (foo_3.f1 + 3) - Remote SQL: SELECT f1 FROM public.loct1 -(45 rows) +(47 rows) update bar set f2 = f2 + 100 from @@ -7463,27 +7479,33 @@ delete from foo where f1 < 5 returning *; (5 rows) explain (verbose, costs off) -update bar set f2 = f2 + 100 returning *; - QUERY PLAN ------------------------------------------------------------------------------- - Update on public.bar - Output: bar.f1, bar.f2 - Update on public.bar - Foreign Update on public.bar2 bar_1 - -> Seq Scan on public.bar - Output: bar.f1, (bar.f2 + 100), bar.ctid - -> Foreign Update on public.bar2 bar_1 - Remote SQL: UPDATE public.loct2 SET f2 = (f2 + 100) RETURNING f1, f2 -(8 rows) +with u as (update bar set f2 = f2 + 100 returning *) select * from u order by 1; + QUERY PLAN +-------------------------------------------------------------------------------------- + Sort + Output: u.f1, u.f2 + Sort Key: u.f1 + CTE u + -> Update on public.bar + Output: bar.f1, bar.f2 + Update on public.bar + Foreign Update on public.bar2 bar_1 + -> Seq Scan on public.bar + Output: bar.f1, (bar.f2 + 100), bar.ctid + -> Foreign Update on public.bar2 bar_1 + Remote SQL: UPDATE public.loct2 SET f2 = (f2 + 100) RETURNING f1, f2 + -> CTE Scan on u + Output: u.f1, u.f2 +(14 rows) -update bar set f2 = f2 + 100 returning *; +with u as (update bar set f2 = f2 + 100 returning *) select * from u order by 1; f1 | f2 ----+----- 1 | 311 2 | 322 - 6 | 266 3 | 333 4 | 344 + 6 | 266 7 | 277 (6 rows) @@ -8558,11 +8580,12 @@ SELECT t1.a,t2.b,t3.c FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) INNER J Sort Sort Key: t1.a, t3.c -> Append - -> Foreign Scan + Async subplans: 2 + -> Async Foreign Scan Relations: ((ftprt1_p1 t1_1) INNER JOIN (ftprt2_p1 t2_1)) INNER JOIN (ftprt1_p1 t3_1) - -> Foreign Scan + -> Async Foreign Scan Relations: ((ftprt1_p2 t1_2) INNER JOIN (ftprt2_p2 t2_2)) INNER JOIN (ftprt1_p2 t3_2) -(7 rows) +(8 rows) SELECT t1.a,t2.b,t3.c FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) INNER JOIN fprt1 t3 ON (t2.b = t3.a) WHERE t1.a % 25 =0 ORDER BY 1,2,3; a | b | c @@ -8597,20 +8620,22 @@ SELECT t1.a,t2.b,t2.c FROM fprt1 t1 LEFT JOIN (SELECT * FROM fprt2 WHERE a < 10) -- with whole-row reference; partitionwise join does not apply EXPLAIN (COSTS OFF) SELECT t1.wr, t2.wr FROM (SELECT t1 wr, a FROM fprt1 t1 WHERE t1.a % 25 = 0) t1 FULL JOIN (SELECT t2 wr, b FROM fprt2 t2 WHERE t2.b % 25 = 0) t2 ON (t1.a = t2.b) ORDER BY 1,2; - QUERY PLAN --------------------------------------------------------- + QUERY PLAN +-------------------------------------------------------------- Sort Sort Key: ((t1.*)::fprt1), ((t2.*)::fprt2) -> Hash Full Join Hash Cond: (t1.a = t2.b) -> Append - -> Foreign Scan on ftprt1_p1 t1_1 - -> Foreign Scan on ftprt1_p2 t1_2 + Async subplans: 2 + -> Async Foreign Scan on ftprt1_p1 t1_1 + -> Async Foreign Scan on ftprt1_p2 t1_2 -> Hash -> Append - -> Foreign Scan on ftprt2_p1 t2_1 - -> Foreign Scan on ftprt2_p2 t2_2 -(11 rows) + Async subplans: 2 + -> Async Foreign Scan on ftprt2_p1 t2_1 + -> Async Foreign Scan on ftprt2_p2 t2_2 +(13 rows) SELECT t1.wr, t2.wr FROM (SELECT t1 wr, a FROM fprt1 t1 WHERE t1.a % 25 = 0) t1 FULL JOIN (SELECT t2 wr, b FROM fprt2 t2 WHERE t2.b % 25 = 0) t2 ON (t1.a = t2.b) ORDER BY 1,2; wr | wr @@ -8639,11 +8664,12 @@ SELECT t1.a,t1.b FROM fprt1 t1, LATERAL (SELECT t2.a, t2.b FROM fprt2 t2 WHERE t Sort Sort Key: t1.a, t1.b -> Append - -> Foreign Scan + Async subplans: 2 + -> Async Foreign Scan Relations: (ftprt1_p1 t1_1) INNER JOIN (ftprt2_p1 t2_1) - -> Foreign Scan + -> Async Foreign Scan Relations: (ftprt1_p2 t1_2) INNER JOIN (ftprt2_p2 t2_2) -(7 rows) +(8 rows) SELECT t1.a,t1.b FROM fprt1 t1, LATERAL (SELECT t2.a, t2.b FROM fprt2 t2 WHERE t1.a = t2.b AND t1.b = t2.a) q WHERE t1.a%25 = 0 ORDER BY 1,2; a | b @@ -8696,21 +8722,23 @@ SELECT t1.a, t1.phv, t2.b, t2.phv FROM (SELECT 't1_phv' phv, * FROM fprt1 WHERE -- test FOR UPDATE; partitionwise join does not apply EXPLAIN (COSTS OFF) SELECT t1.a, t2.b FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) WHERE t1.a % 25 = 0 ORDER BY 1,2 FOR UPDATE OF t1; - QUERY PLAN --------------------------------------------------------------- + QUERY PLAN +-------------------------------------------------------------------- LockRows -> Sort Sort Key: t1.a -> Hash Join Hash Cond: (t2.b = t1.a) -> Append - -> Foreign Scan on ftprt2_p1 t2_1 - -> Foreign Scan on ftprt2_p2 t2_2 + Async subplans: 2 + -> Async Foreign Scan on ftprt2_p1 t2_1 + -> Async Foreign Scan on ftprt2_p2 t2_2 -> Hash -> Append - -> Foreign Scan on ftprt1_p1 t1_1 - -> Foreign Scan on ftprt1_p2 t1_2 -(12 rows) + Async subplans: 2 + -> Async Foreign Scan on ftprt1_p1 t1_1 + -> Async Foreign Scan on ftprt1_p2 t1_2 +(14 rows) SELECT t1.a, t2.b FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) WHERE t1.a % 25 = 0 ORDER BY 1,2 FOR UPDATE OF t1; a | b @@ -8745,18 +8773,19 @@ ANALYZE fpagg_tab_p3; SET enable_partitionwise_aggregate TO false; EXPLAIN (COSTS OFF) SELECT a, sum(b), min(b), count(*) FROM pagg_tab GROUP BY a HAVING avg(b) < 22 ORDER BY 1; - QUERY PLAN ------------------------------------------------------------ + QUERY PLAN +----------------------------------------------------------------- Sort Sort Key: pagg_tab.a -> HashAggregate Group Key: pagg_tab.a Filter: (avg(pagg_tab.b) < '22'::numeric) -> Append - -> Foreign Scan on fpagg_tab_p1 pagg_tab_1 - -> Foreign Scan on fpagg_tab_p2 pagg_tab_2 - -> Foreign Scan on fpagg_tab_p3 pagg_tab_3 -(9 rows) + Async subplans: 3 + -> Async Foreign Scan on fpagg_tab_p1 pagg_tab_1 + -> Async Foreign Scan on fpagg_tab_p2 pagg_tab_2 + -> Async Foreign Scan on fpagg_tab_p3 pagg_tab_3 +(10 rows) -- Plan with partitionwise aggregates is enabled SET enable_partitionwise_aggregate TO true; @@ -8767,13 +8796,14 @@ SELECT a, sum(b), min(b), count(*) FROM pagg_tab GROUP BY a HAVING avg(b) < 22 O Sort Sort Key: pagg_tab.a -> Append - -> Foreign Scan + Async subplans: 3 + -> Async Foreign Scan Relations: Aggregate on (fpagg_tab_p1 pagg_tab) - -> Foreign Scan + -> Async Foreign Scan Relations: Aggregate on (fpagg_tab_p2 pagg_tab_1) - -> Foreign Scan + -> Async Foreign Scan Relations: Aggregate on (fpagg_tab_p3 pagg_tab_2) -(9 rows) +(10 rows) SELECT a, sum(b), min(b), count(*) FROM pagg_tab GROUP BY a HAVING avg(b) < 22 ORDER BY 1; a | sum | min | count diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index 9fc53cad68..4bfc2d39ea 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -21,6 +21,8 @@ #include "commands/defrem.h" #include "commands/explain.h" #include "commands/vacuum.h" +#include "executor/execAsync.h" +#include "executor/nodeForeignscan.h" #include "foreign/fdwapi.h" #include "funcapi.h" #include "miscadmin.h" @@ -35,6 +37,7 @@ #include "optimizer/restrictinfo.h" #include "optimizer/tlist.h" #include "parser/parsetree.h" +#include "pgstat.h" #include "postgres_fdw.h" #include "utils/builtins.h" #include "utils/float.h" @@ -56,6 +59,9 @@ PG_MODULE_MAGIC; /* If no remote estimates, assume a sort costs 20% extra */ #define DEFAULT_FDW_SORT_MULTIPLIER 1.2 +/* Retrieve PgFdwScanState struct from ForeignScanState */ +#define GetPgFdwScanState(n) ((PgFdwScanState *)(n)->fdw_state) + /* * Indexes of FDW-private information stored in fdw_private lists. * @@ -122,11 +128,29 @@ enum FdwDirectModifyPrivateIndex FdwDirectModifyPrivateSetProcessed }; +/* + * Connection common state - shared among all PgFdwState instances using the + * same connection. + */ +typedef struct PgFdwConnCommonState +{ + ForeignScanState *leader; /* leader node of this connection */ + bool busy; /* true if this connection is busy */ +} PgFdwConnCommonState; + +/* Execution state base type */ +typedef struct PgFdwState +{ + PGconn *conn; /* connection for the scan */ + PgFdwConnCommonState *commonstate; /* connection common state */ +} PgFdwState; + /* * Execution state of a foreign scan using postgres_fdw. */ typedef struct PgFdwScanState { + PgFdwState s; /* common structure */ Relation rel; /* relcache entry for the foreign table. NULL * for a foreign join scan. */ TupleDesc tupdesc; /* tuple descriptor of scan */ @@ -137,7 +161,6 @@ typedef struct PgFdwScanState List *retrieved_attrs; /* list of retrieved attribute numbers */ /* for remote query execution */ - PGconn *conn; /* connection for the scan */ unsigned int cursor_number; /* quasi-unique ID for my cursor */ bool cursor_exists; /* have we created the cursor? */ int numParams; /* number of parameters passed to query */ @@ -153,6 +176,12 @@ typedef struct PgFdwScanState /* batch-level state, for optimizing rewinds and avoiding useless fetch */ int fetch_ct_2; /* Min(# of fetches done, 2) */ bool eof_reached; /* true if last fetch reached EOF */ + bool async; /* true if run asynchronously */ + bool queued; /* true if this node is in waiter queue */ + ForeignScanState *waiter; /* Next node to run a query among nodes + * sharing the same connection */ + ForeignScanState *last_waiter; /* last element in waiter queue. + * valid only on the leader node */ /* working memory contexts */ MemoryContext batch_cxt; /* context holding current batch of tuples */ @@ -166,11 +195,11 @@ typedef struct PgFdwScanState */ typedef struct PgFdwModifyState { + PgFdwState s; /* common structure */ Relation rel; /* relcache entry for the foreign table */ AttInMetadata *attinmeta; /* attribute datatype conversion metadata */ /* for remote query execution */ - PGconn *conn; /* connection for the scan */ char *p_name; /* name of prepared statement, if created */ /* extracted fdw_private data */ @@ -197,6 +226,7 @@ typedef struct PgFdwModifyState */ typedef struct PgFdwDirectModifyState { + PgFdwState s; /* common structure */ Relation rel; /* relcache entry for the foreign table */ AttInMetadata *attinmeta; /* attribute datatype conversion metadata */ @@ -326,6 +356,7 @@ static void postgresBeginForeignScan(ForeignScanState *node, int eflags); static TupleTableSlot *postgresIterateForeignScan(ForeignScanState *node); static void postgresReScanForeignScan(ForeignScanState *node); static void postgresEndForeignScan(ForeignScanState *node); +static void postgresShutdownForeignScan(ForeignScanState *node); static void postgresAddForeignUpdateTargets(Query *parsetree, RangeTblEntry *target_rte, Relation target_relation); @@ -391,6 +422,10 @@ static void postgresGetForeignUpperPaths(PlannerInfo *root, RelOptInfo *input_rel, RelOptInfo *output_rel, void *extra); +static bool postgresIsForeignPathAsyncCapable(ForeignPath *path); +static bool postgresForeignAsyncConfigureWait(ForeignScanState *node, + WaitEventSet *wes, + void *caller_data, bool reinit); /* * Helper functions @@ -419,7 +454,9 @@ static bool ec_member_matches_foreign(PlannerInfo *root, RelOptInfo *rel, EquivalenceClass *ec, EquivalenceMember *em, void *arg); static void create_cursor(ForeignScanState *node); -static void fetch_more_data(ForeignScanState *node); +static void request_more_data(ForeignScanState *node); +static void fetch_received_data(ForeignScanState *node); +static void vacate_connection(PgFdwState *fdwconn, bool clear_queue); static void close_cursor(PGconn *conn, unsigned int cursor_number); static PgFdwModifyState *create_foreign_modify(EState *estate, RangeTblEntry *rte, @@ -522,6 +559,7 @@ postgres_fdw_handler(PG_FUNCTION_ARGS) routine->IterateForeignScan = postgresIterateForeignScan; routine->ReScanForeignScan = postgresReScanForeignScan; routine->EndForeignScan = postgresEndForeignScan; + routine->ShutdownForeignScan = postgresShutdownForeignScan; /* Functions for updating foreign tables */ routine->AddForeignUpdateTargets = postgresAddForeignUpdateTargets; @@ -558,6 +596,10 @@ postgres_fdw_handler(PG_FUNCTION_ARGS) /* Support functions for upper relation push-down */ routine->GetForeignUpperPaths = postgresGetForeignUpperPaths; + /* Support functions for async execution */ + routine->IsForeignPathAsyncCapable = postgresIsForeignPathAsyncCapable; + routine->ForeignAsyncConfigureWait = postgresForeignAsyncConfigureWait; + PG_RETURN_POINTER(routine); } @@ -1434,12 +1476,22 @@ postgresBeginForeignScan(ForeignScanState *node, int eflags) * Get connection to the foreign server. Connection manager will * establish new connection if necessary. */ - fsstate->conn = GetConnection(user, false); + fsstate->s.conn = GetConnection(user, false); + fsstate->s.commonstate = (PgFdwConnCommonState *) + GetConnectionSpecificStorage(user, sizeof(PgFdwConnCommonState)); + fsstate->s.commonstate->leader = NULL; + fsstate->s.commonstate->busy = false; + fsstate->waiter = NULL; + fsstate->last_waiter = node; /* Assign a unique ID for my cursor */ - fsstate->cursor_number = GetCursorNumber(fsstate->conn); + fsstate->cursor_number = GetCursorNumber(fsstate->s.conn); fsstate->cursor_exists = false; + /* Initialize async execution status */ + fsstate->async = false; + fsstate->queued = false; + /* Get private info created by planner functions. */ fsstate->query = strVal(list_nth(fsplan->fdw_private, FdwScanPrivateSelectSql)); @@ -1487,40 +1539,241 @@ postgresBeginForeignScan(ForeignScanState *node, int eflags) &fsstate->param_values); } +/* + * Async queue manipulation functions + */ + +/* + * add_async_waiter: + * + * Enqueue node if it isn't in the queue. Immediately send request it if the + * underlying connection is not busy. + */ +static inline void +add_async_waiter(ForeignScanState *node) +{ + PgFdwScanState *fsstate = GetPgFdwScanState(node); + ForeignScanState *leader = fsstate->s.commonstate->leader; + + /* + * Do nothing if the node is already in the queue or already eof'ed. + * Note: leader node is not marked as queued. + */ + if (leader == node || fsstate->queued || fsstate->eof_reached) + return; + + if (leader == NULL) + { + /* no leader means not busy, send request immediately */ + request_more_data(node); + } + else + { + /* the connection is busy, queue the node */ + PgFdwScanState *leader_state = GetPgFdwScanState(leader); + PgFdwScanState *last_waiter_state + = GetPgFdwScanState(leader_state->last_waiter); + + last_waiter_state->waiter = node; + leader_state->last_waiter = node; + fsstate->queued = true; + } +} + +/* + * move_to_next_waiter: + * + * Make the first waiter be the next leader + * Returns the new leader or NULL if there's no waiter. + */ +static inline ForeignScanState * +move_to_next_waiter(ForeignScanState *node) +{ + PgFdwScanState *leader_state = GetPgFdwScanState(node); + ForeignScanState *next_leader = leader_state->waiter; + + Assert(leader_state->s.commonstate->leader = node); + + if (next_leader) + { + /* the first waiter becomes the next leader */ + PgFdwScanState *next_leader_state = GetPgFdwScanState(next_leader); + next_leader_state->last_waiter = leader_state->last_waiter; + next_leader_state->queued = false; + } + + leader_state->waiter = NULL; + leader_state->s.commonstate->leader = next_leader; + + return next_leader; +} + +/* + * Remove the node from waiter queue. + * + * Remaining results are cleared if the node is a busy leader. + * This intended to be used during node shutdown. + */ +static inline void +remove_async_node(ForeignScanState *node) +{ + PgFdwScanState *fsstate = GetPgFdwScanState(node); + ForeignScanState *leader = fsstate->s.commonstate->leader; + PgFdwScanState *leader_state; + ForeignScanState *prev; + PgFdwScanState *prev_state; + ForeignScanState *cur; + + /* no need to remove me */ + if (!leader || !fsstate->queued) + return; + + leader_state = GetPgFdwScanState(leader); + + if (leader == node) + { + if (leader_state->s.commonstate->busy) + { + /* + * this node is waiting for result, absorb the result first so + * that the following commands can be sent on the connection. + */ + PgFdwScanState *leader_state = GetPgFdwScanState(leader); + PGconn *conn = leader_state->s.conn; + + while(PQisBusy(conn)) + PQclear(PQgetResult(conn)); + + leader_state->s.commonstate->busy = false; + } + + move_to_next_waiter(node); + + return; + } + + /* + * Just remove the node from the queue + * + * Nodes don't have a link to the previous node but anyway this function is + * called on the shutdown path, so we don't bother seeking for faster way + * to do this. + */ + prev = leader; + prev_state = leader_state; + cur = GetPgFdwScanState(prev)->waiter; + while (cur) + { + PgFdwScanState *curstate = GetPgFdwScanState(cur); + + if (cur == node) + { + prev_state->waiter = curstate->waiter; + + /* relink to the previous node if the last node was removed */ + if (leader_state->last_waiter == cur) + leader_state->last_waiter = prev; + + fsstate->queued = false; + + return; + } + prev = cur; + prev_state = curstate; + cur = curstate->waiter; + } +} + /* * postgresIterateForeignScan - * Retrieve next row from the result set, or clear tuple slot to indicate - * EOF. + * Retrieve next row from the result set. + * + * For synchronous nodes, returns clear tuple slot means EOF. + * + * For asynchronous nodes, if clear tuple slot is returned, the caller + * needs to check async state to tell if all tuples received + * (AS_AVAILABLE) or waiting for the next data to come (AS_WAITING). */ static TupleTableSlot * postgresIterateForeignScan(ForeignScanState *node) { - PgFdwScanState *fsstate = (PgFdwScanState *) node->fdw_state; + PgFdwScanState *fsstate = GetPgFdwScanState(node); TupleTableSlot *slot = node->ss.ss_ScanTupleSlot; - /* - * If this is the first call after Begin or ReScan, we need to create the - * cursor on the remote side. - */ - if (!fsstate->cursor_exists) - create_cursor(node); - - /* - * Get some more tuples, if we've run out. - */ + if (fsstate->next_tuple >= fsstate->num_tuples && !fsstate->eof_reached) + { + /* we've run out, get some more tuples */ + if (!node->fs_async) + { + /* + * finish the running query before sending the next command for + * this node + */ + if (!fsstate->s.commonstate->busy) + vacate_connection((PgFdwState *)fsstate, false); + + request_more_data(node); + + /* Fetch the result immediately. */ + fetch_received_data(node); + } + else if (!fsstate->s.commonstate->busy) + { + /* If the connection is not busy, just send the request. */ + request_more_data(node); + } + else + { + /* The connection is busy, queue the request */ + bool available = true; + ForeignScanState *leader = fsstate->s.commonstate->leader; + PgFdwScanState *leader_state = GetPgFdwScanState(leader); + + /* queue the requested node */ + add_async_waiter(node); + + /* + * The request for the next node cannot be sent before the leader + * responds. Finish the current leader if possible. + */ + if (PQisBusy(leader_state->s.conn)) + { + int rc = WaitLatchOrSocket(NULL, + WL_SOCKET_READABLE | WL_TIMEOUT | + WL_EXIT_ON_PM_DEATH, + PQsocket(leader_state->s.conn), 0, + WAIT_EVENT_ASYNC_WAIT); + if (!(rc & WL_SOCKET_READABLE)) + available = false; + } + + /* fetch the leader's data and enqueue it for the next request */ + if (available) + { + fetch_received_data(leader); + add_async_waiter(leader); + } + } + } + if (fsstate->next_tuple >= fsstate->num_tuples) { - /* No point in another fetch if we already detected EOF, though. */ - if (!fsstate->eof_reached) - fetch_more_data(node); - /* If we didn't get any tuples, must be end of data. */ - if (fsstate->next_tuple >= fsstate->num_tuples) - return ExecClearTuple(slot); + /* + * We haven't received a result for the given node this time, return + * with no tuple to give way to another node. + */ + if (fsstate->eof_reached) + node->ss.ps.asyncstate = AS_AVAILABLE; + else + node->ss.ps.asyncstate = AS_WAITING; + + return ExecClearTuple(slot); } /* * Return the next tuple. */ + node->ss.ps.asyncstate = AS_AVAILABLE; ExecStoreHeapTuple(fsstate->tuples[fsstate->next_tuple++], slot, false); @@ -1535,7 +1788,7 @@ postgresIterateForeignScan(ForeignScanState *node) static void postgresReScanForeignScan(ForeignScanState *node) { - PgFdwScanState *fsstate = (PgFdwScanState *) node->fdw_state; + PgFdwScanState *fsstate = GetPgFdwScanState(node); char sql[64]; PGresult *res; @@ -1543,6 +1796,8 @@ postgresReScanForeignScan(ForeignScanState *node) if (!fsstate->cursor_exists) return; + vacate_connection((PgFdwState *)fsstate, true); + /* * If any internal parameters affecting this node have changed, we'd * better destroy and recreate the cursor. Otherwise, rewinding it should @@ -1571,9 +1826,9 @@ postgresReScanForeignScan(ForeignScanState *node) * We don't use a PG_TRY block here, so be careful not to throw error * without releasing the PGresult. */ - res = pgfdw_exec_query(fsstate->conn, sql); + res = pgfdw_exec_query(fsstate->s.conn, sql); if (PQresultStatus(res) != PGRES_COMMAND_OK) - pgfdw_report_error(ERROR, res, fsstate->conn, true, sql); + pgfdw_report_error(ERROR, res, fsstate->s.conn, true, sql); PQclear(res); /* Now force a fresh FETCH. */ @@ -1591,7 +1846,7 @@ postgresReScanForeignScan(ForeignScanState *node) static void postgresEndForeignScan(ForeignScanState *node) { - PgFdwScanState *fsstate = (PgFdwScanState *) node->fdw_state; + PgFdwScanState *fsstate = GetPgFdwScanState(node); /* if fsstate is NULL, we are in EXPLAIN; nothing to do */ if (fsstate == NULL) @@ -1599,15 +1854,31 @@ postgresEndForeignScan(ForeignScanState *node) /* Close the cursor if open, to prevent accumulation of cursors */ if (fsstate->cursor_exists) - close_cursor(fsstate->conn, fsstate->cursor_number); + close_cursor(fsstate->s.conn, fsstate->cursor_number); /* Release remote connection */ - ReleaseConnection(fsstate->conn); - fsstate->conn = NULL; + ReleaseConnection(fsstate->s.conn); + fsstate->s.conn = NULL; /* MemoryContexts will be deleted automatically. */ } +/* + * postgresShutdownForeignScan + * Remove asynchrony stuff and cleanup garbage on the connection. + */ +static void +postgresShutdownForeignScan(ForeignScanState *node) +{ + ForeignScan *plan = (ForeignScan *) node->ss.ps.plan; + + if (plan->operation != CMD_SELECT) + return; + + /* remove the node from waiting queue */ + remove_async_node(node); +} + /* * postgresAddForeignUpdateTargets * Add resjunk column(s) needed for update/delete on a foreign table @@ -2372,7 +2643,9 @@ postgresBeginDirectModify(ForeignScanState *node, int eflags) * Get connection to the foreign server. Connection manager will * establish new connection if necessary. */ - dmstate->conn = GetConnection(user, false); + dmstate->s.conn = GetConnection(user, false); + dmstate->s.commonstate = (PgFdwConnCommonState *) + GetConnectionSpecificStorage(user, sizeof(PgFdwConnCommonState)); /* Update the foreign-join-related fields. */ if (fsplan->scan.scanrelid == 0) @@ -2457,7 +2730,11 @@ postgresIterateDirectModify(ForeignScanState *node) * If this is the first call after Begin, execute the statement. */ if (dmstate->num_tuples == -1) + { + /* finish running query to send my command */ + vacate_connection((PgFdwState *)dmstate, true); execute_dml_stmt(node); + } /* * If the local query doesn't specify RETURNING, just clear tuple slot. @@ -2504,8 +2781,8 @@ postgresEndDirectModify(ForeignScanState *node) PQclear(dmstate->result); /* Release remote connection */ - ReleaseConnection(dmstate->conn); - dmstate->conn = NULL; + ReleaseConnection(dmstate->s.conn); + dmstate->s.conn = NULL; /* MemoryContext will be deleted automatically. */ } @@ -2703,6 +2980,7 @@ estimate_path_cost_size(PlannerInfo *root, List *local_param_join_conds; StringInfoData sql; PGconn *conn; + PgFdwConnCommonState *commonstate; Selectivity local_sel; QualCost local_cost; List *fdw_scan_tlist = NIL; @@ -2747,6 +3025,18 @@ estimate_path_cost_size(PlannerInfo *root, /* Get the remote estimate */ conn = GetConnection(fpinfo->user, false); + commonstate = GetConnectionSpecificStorage(fpinfo->user, + sizeof(PgFdwConnCommonState)); + if (commonstate) + { + PgFdwState tmpstate; + tmpstate.conn = conn; + tmpstate.commonstate = commonstate; + + /* finish running query to send my command */ + vacate_connection(&tmpstate, true); + } + get_remote_estimate(sql.data, conn, &rows, &width, &startup_cost, &total_cost); ReleaseConnection(conn); @@ -3317,11 +3607,11 @@ ec_member_matches_foreign(PlannerInfo *root, RelOptInfo *rel, static void create_cursor(ForeignScanState *node) { - PgFdwScanState *fsstate = (PgFdwScanState *) node->fdw_state; + PgFdwScanState *fsstate = GetPgFdwScanState(node); ExprContext *econtext = node->ss.ps.ps_ExprContext; int numParams = fsstate->numParams; const char **values = fsstate->param_values; - PGconn *conn = fsstate->conn; + PGconn *conn = fsstate->s.conn; StringInfoData buf; PGresult *res; @@ -3384,50 +3674,119 @@ create_cursor(ForeignScanState *node) } /* - * Fetch some more rows from the node's cursor. + * Sends the next request of the node. If the given node is different from the + * current connection leader, pushes it back to waiter queue and let the given + * node be the leader. */ static void -fetch_more_data(ForeignScanState *node) +request_more_data(ForeignScanState *node) { - PgFdwScanState *fsstate = (PgFdwScanState *) node->fdw_state; + PgFdwScanState *fsstate = GetPgFdwScanState(node); + ForeignScanState *leader = fsstate->s.commonstate->leader; + PGconn *conn = fsstate->s.conn; + char sql[64]; + + /* must be non-busy */ + Assert(!fsstate->s.commonstate->busy); + /* must be not-eof'ed */ + Assert(!fsstate->eof_reached); + + /* + * If this is the first call after Begin or ReScan, we need to create the + * cursor on the remote side. + */ + if (!fsstate->cursor_exists) + create_cursor(node); + + snprintf(sql, sizeof(sql), "FETCH %d FROM c%u", + fsstate->fetch_size, fsstate->cursor_number); + + if (!PQsendQuery(conn, sql)) + pgfdw_report_error(ERROR, NULL, conn, false, sql); + + fsstate->s.commonstate->busy = true; + + /* The node is the current leader, just return. */ + if (leader == node) + return; + + /* Let the node be the leader */ + if (leader != NULL) + { + remove_async_node(node); + fsstate->last_waiter = GetPgFdwScanState(leader)->last_waiter; + fsstate->waiter = leader; + } + else + { + fsstate->last_waiter = node; + fsstate->waiter = NULL; + } + + fsstate->s.commonstate->leader = node; +} + +/* + * Fetches received data and automatically send requests of the next waiter. + */ +static void +fetch_received_data(ForeignScanState *node) +{ + PgFdwScanState *fsstate = GetPgFdwScanState(node); PGresult *volatile res = NULL; MemoryContext oldcontext; + ForeignScanState *waiter; + + /* I should be the current connection leader */ + Assert(fsstate->s.commonstate->leader == node); /* * We'll store the tuples in the batch_cxt. First, flush the previous - * batch. + * batch if no tuple is remaining */ - fsstate->tuples = NULL; - MemoryContextReset(fsstate->batch_cxt); + if (fsstate->next_tuple >= fsstate->num_tuples) + { + fsstate->tuples = NULL; + fsstate->num_tuples = 0; + MemoryContextReset(fsstate->batch_cxt); + } + else if (fsstate->next_tuple > 0) + { + /* There's some remains. Move them to the beginning of the store */ + int n = 0; + + while(fsstate->next_tuple < fsstate->num_tuples) + fsstate->tuples[n++] = fsstate->tuples[fsstate->next_tuple++]; + fsstate->num_tuples = n; + } + oldcontext = MemoryContextSwitchTo(fsstate->batch_cxt); /* PGresult must be released before leaving this function. */ PG_TRY(); { - PGconn *conn = fsstate->conn; - char sql[64]; - int numrows; + PGconn *conn = fsstate->s.conn; + int addrows; + size_t newsize; int i; - snprintf(sql, sizeof(sql), "FETCH %d FROM c%u", - fsstate->fetch_size, fsstate->cursor_number); - - res = pgfdw_exec_query(conn, sql); - /* On error, report the original query, not the FETCH. */ + res = pgfdw_get_result(conn, fsstate->query); if (PQresultStatus(res) != PGRES_TUPLES_OK) pgfdw_report_error(ERROR, res, conn, false, fsstate->query); /* Convert the data into HeapTuples */ - numrows = PQntuples(res); - fsstate->tuples = (HeapTuple *) palloc0(numrows * sizeof(HeapTuple)); - fsstate->num_tuples = numrows; - fsstate->next_tuple = 0; + addrows = PQntuples(res); + newsize = (fsstate->num_tuples + addrows) * sizeof(HeapTuple); + if (fsstate->tuples) + fsstate->tuples = (HeapTuple *) repalloc(fsstate->tuples, newsize); + else + fsstate->tuples = (HeapTuple *) palloc(newsize); - for (i = 0; i < numrows; i++) + for (i = 0; i < addrows; i++) { Assert(IsA(node->ss.ps.plan, ForeignScan)); - fsstate->tuples[i] = + fsstate->tuples[fsstate->num_tuples + i] = make_tuple_from_result_row(res, i, fsstate->rel, fsstate->attinmeta, @@ -3437,22 +3796,73 @@ fetch_more_data(ForeignScanState *node) } /* Update fetch_ct_2 */ - if (fsstate->fetch_ct_2 < 2) + if (fsstate->fetch_ct_2 < 2 && fsstate->next_tuple == 0) fsstate->fetch_ct_2++; + fsstate->next_tuple = 0; + fsstate->num_tuples += addrows; + /* Must be EOF if we didn't get as many tuples as we asked for. */ - fsstate->eof_reached = (numrows < fsstate->fetch_size); + fsstate->eof_reached = (addrows < fsstate->fetch_size); } PG_FINALLY(); { + fsstate->s.commonstate->busy = false; + if (res) PQclear(res); } PG_END_TRY(); + /* let the first waiter be the next leader of this connection */ + waiter = move_to_next_waiter(node); + + /* send the next request if any */ + if (waiter) + request_more_data(waiter); + MemoryContextSwitchTo(oldcontext); } +/* + * Vacate the underlying connection so that this node can send the next query. + */ +static void +vacate_connection(PgFdwState *fdwstate, bool clear_queue) +{ + PgFdwConnCommonState *commonstate = fdwstate->commonstate; + ForeignScanState *leader; + + Assert(commonstate != NULL); + + /* just return if the connection is already available */ + if (commonstate->leader == NULL || !commonstate->busy) + return; + + /* + * let the current connection leader read all of the result for the running + * query + */ + leader = commonstate->leader; + fetch_received_data(leader); + + /* let the first waiter be the next leader of this connection */ + move_to_next_waiter(leader); + + if (!clear_queue) + return; + + /* Clear the waiting list */ + while (leader) + { + PgFdwScanState *fsstate = GetPgFdwScanState(leader); + + fsstate->last_waiter = NULL; + leader = fsstate->waiter; + fsstate->waiter = NULL; + } +} + /* * Force assorted GUC parameters to settings that ensure that we'll output * data values in a form that is unambiguous to the remote server. @@ -3566,7 +3976,9 @@ create_foreign_modify(EState *estate, user = GetUserMapping(userid, table->serverid); /* Open connection; report that we'll create a prepared statement. */ - fmstate->conn = GetConnection(user, true); + fmstate->s.conn = GetConnection(user, true); + fmstate->s.commonstate = (PgFdwConnCommonState *) + GetConnectionSpecificStorage(user, sizeof(PgFdwConnCommonState)); fmstate->p_name = NULL; /* prepared statement not made yet */ /* Set up remote query information. */ @@ -3653,6 +4065,9 @@ execute_foreign_modify(EState *estate, operation == CMD_UPDATE || operation == CMD_DELETE); + /* finish running query to send my command */ + vacate_connection((PgFdwState *)fmstate, true); + /* Set up the prepared statement on the remote server, if we didn't yet */ if (!fmstate->p_name) prepare_foreign_modify(fmstate); @@ -3680,14 +4095,14 @@ execute_foreign_modify(EState *estate, /* * Execute the prepared statement. */ - if (!PQsendQueryPrepared(fmstate->conn, + if (!PQsendQueryPrepared(fmstate->s.conn, fmstate->p_name, fmstate->p_nums, p_values, NULL, NULL, 0)) - pgfdw_report_error(ERROR, NULL, fmstate->conn, false, fmstate->query); + pgfdw_report_error(ERROR, NULL, fmstate->s.conn, false, fmstate->query); /* * Get the result, and check for success. @@ -3695,10 +4110,10 @@ execute_foreign_modify(EState *estate, * We don't use a PG_TRY block here, so be careful not to throw error * without releasing the PGresult. */ - res = pgfdw_get_result(fmstate->conn, fmstate->query); + res = pgfdw_get_result(fmstate->s.conn, fmstate->query); if (PQresultStatus(res) != (fmstate->has_returning ? PGRES_TUPLES_OK : PGRES_COMMAND_OK)) - pgfdw_report_error(ERROR, res, fmstate->conn, true, fmstate->query); + pgfdw_report_error(ERROR, res, fmstate->s.conn, true, fmstate->query); /* Check number of rows affected, and fetch RETURNING tuple if any */ if (fmstate->has_returning) @@ -3734,7 +4149,7 @@ prepare_foreign_modify(PgFdwModifyState *fmstate) /* Construct name we'll use for the prepared statement. */ snprintf(prep_name, sizeof(prep_name), "pgsql_fdw_prep_%u", - GetPrepStmtNumber(fmstate->conn)); + GetPrepStmtNumber(fmstate->s.conn)); p_name = pstrdup(prep_name); /* @@ -3744,12 +4159,12 @@ prepare_foreign_modify(PgFdwModifyState *fmstate) * the prepared statements we use in this module are simple enough that * the remote server will make the right choices. */ - if (!PQsendPrepare(fmstate->conn, + if (!PQsendPrepare(fmstate->s.conn, p_name, fmstate->query, 0, NULL)) - pgfdw_report_error(ERROR, NULL, fmstate->conn, false, fmstate->query); + pgfdw_report_error(ERROR, NULL, fmstate->s.conn, false, fmstate->query); /* * Get the result, and check for success. @@ -3757,9 +4172,9 @@ prepare_foreign_modify(PgFdwModifyState *fmstate) * We don't use a PG_TRY block here, so be careful not to throw error * without releasing the PGresult. */ - res = pgfdw_get_result(fmstate->conn, fmstate->query); + res = pgfdw_get_result(fmstate->s.conn, fmstate->query); if (PQresultStatus(res) != PGRES_COMMAND_OK) - pgfdw_report_error(ERROR, res, fmstate->conn, true, fmstate->query); + pgfdw_report_error(ERROR, res, fmstate->s.conn, true, fmstate->query); PQclear(res); /* This action shows that the prepare has been done. */ @@ -3888,16 +4303,16 @@ finish_foreign_modify(PgFdwModifyState *fmstate) * We don't use a PG_TRY block here, so be careful not to throw error * without releasing the PGresult. */ - res = pgfdw_exec_query(fmstate->conn, sql); + res = pgfdw_exec_query(fmstate->s.conn, sql); if (PQresultStatus(res) != PGRES_COMMAND_OK) - pgfdw_report_error(ERROR, res, fmstate->conn, true, sql); + pgfdw_report_error(ERROR, res, fmstate->s.conn, true, sql); PQclear(res); fmstate->p_name = NULL; } /* Release remote connection */ - ReleaseConnection(fmstate->conn); - fmstate->conn = NULL; + ReleaseConnection(fmstate->s.conn); + fmstate->s.conn = NULL; } /* @@ -4056,9 +4471,9 @@ execute_dml_stmt(ForeignScanState *node) * the desired result. This allows us to avoid assuming that the remote * server has the same OIDs we do for the parameters' types. */ - if (!PQsendQueryParams(dmstate->conn, dmstate->query, numParams, + if (!PQsendQueryParams(dmstate->s.conn, dmstate->query, numParams, NULL, values, NULL, NULL, 0)) - pgfdw_report_error(ERROR, NULL, dmstate->conn, false, dmstate->query); + pgfdw_report_error(ERROR, NULL, dmstate->s.conn, false, dmstate->query); /* * Get the result, and check for success. @@ -4066,10 +4481,10 @@ execute_dml_stmt(ForeignScanState *node) * We don't use a PG_TRY block here, so be careful not to throw error * without releasing the PGresult. */ - dmstate->result = pgfdw_get_result(dmstate->conn, dmstate->query); + dmstate->result = pgfdw_get_result(dmstate->s.conn, dmstate->query); if (PQresultStatus(dmstate->result) != (dmstate->has_returning ? PGRES_TUPLES_OK : PGRES_COMMAND_OK)) - pgfdw_report_error(ERROR, dmstate->result, dmstate->conn, true, + pgfdw_report_error(ERROR, dmstate->result, dmstate->s.conn, true, dmstate->query); /* Get the number of rows affected. */ @@ -5560,6 +5975,40 @@ postgresGetForeignJoinPaths(PlannerInfo *root, /* XXX Consider parameterized paths for the join relation */ } +static bool +postgresIsForeignPathAsyncCapable(ForeignPath *path) +{ + return true; +} + + +/* + * Configure waiting event. + * + * Add wait event so that the ForeignScan node is going to wait for. + */ +static bool +postgresForeignAsyncConfigureWait(ForeignScanState *node, WaitEventSet *wes, + void *caller_data, bool reinit) +{ + PgFdwScanState *fsstate = GetPgFdwScanState(node); + + + /* Reinit is not supported for now. */ + Assert(reinit); + + if (fsstate->s.commonstate->leader == node) + { + AddWaitEventToSet(wes, + WL_SOCKET_READABLE, PQsocket(fsstate->s.conn), + NULL, caller_data); + return true; + } + + return false; +} + + /* * Assess whether the aggregation, grouping and having operations can be pushed * down to the foreign server. As a side effect, save information we obtain in diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h index eef410db39..96af75a33e 100644 --- a/contrib/postgres_fdw/postgres_fdw.h +++ b/contrib/postgres_fdw/postgres_fdw.h @@ -85,6 +85,7 @@ typedef struct PgFdwRelationInfo UserMapping *user; /* only set in use_remote_estimate mode */ int fetch_size; /* fetch size for this remote table */ + bool allow_prefetch; /* true to allow overlapped fetching */ /* * Name of the relation, for use while EXPLAINing ForeignScan. It is used @@ -130,6 +131,7 @@ extern void reset_transmission_modes(int nestlevel); /* in connection.c */ extern PGconn *GetConnection(UserMapping *user, bool will_prep_stmt); +void *GetConnectionSpecificStorage(UserMapping *user, size_t initsize); extern void ReleaseConnection(PGconn *conn); extern unsigned int GetCursorNumber(PGconn *conn); extern unsigned int GetPrepStmtNumber(PGconn *conn); diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql index 83971665e3..359208a12a 100644 --- a/contrib/postgres_fdw/sql/postgres_fdw.sql +++ b/contrib/postgres_fdw/sql/postgres_fdw.sql @@ -1780,25 +1780,25 @@ INSERT INTO b(aa) VALUES('bbb'); INSERT INTO b(aa) VALUES('bbbb'); INSERT INTO b(aa) VALUES('bbbbb'); -SELECT tableoid::regclass, * FROM a; +SELECT tableoid::regclass, * FROM a ORDER BY 1, 2; SELECT tableoid::regclass, * FROM b; SELECT tableoid::regclass, * FROM ONLY a; UPDATE a SET aa = 'zzzzzz' WHERE aa LIKE 'aaaa%'; -SELECT tableoid::regclass, * FROM a; +SELECT tableoid::regclass, * FROM a ORDER BY 1, 2; SELECT tableoid::regclass, * FROM b; SELECT tableoid::regclass, * FROM ONLY a; UPDATE b SET aa = 'new'; -SELECT tableoid::regclass, * FROM a; +SELECT tableoid::regclass, * FROM a ORDER BY 1, 2; SELECT tableoid::regclass, * FROM b; SELECT tableoid::regclass, * FROM ONLY a; UPDATE a SET aa = 'newtoo'; -SELECT tableoid::regclass, * FROM a; +SELECT tableoid::regclass, * FROM a ORDER BY 1, 2; SELECT tableoid::regclass, * FROM b; SELECT tableoid::regclass, * FROM ONLY a; @@ -1840,12 +1840,12 @@ insert into bar2 values(4,44,44); insert into bar2 values(7,77,77); explain (verbose, costs off) -select * from bar where f1 in (select f1 from foo) for update; -select * from bar where f1 in (select f1 from foo) for update; +select * from bar where f1 in (select f1 from foo) order by 1 for update; +select * from bar where f1 in (select f1 from foo) order by 1 for update; explain (verbose, costs off) -select * from bar where f1 in (select f1 from foo) for share; -select * from bar where f1 in (select f1 from foo) for share; +select * from bar where f1 in (select f1 from foo) order by 1 for share; +select * from bar where f1 in (select f1 from foo) order by 1 for share; -- Check UPDATE with inherited target and an inherited source table explain (verbose, costs off) @@ -1904,8 +1904,8 @@ explain (verbose, costs off) delete from foo where f1 < 5 returning *; delete from foo where f1 < 5 returning *; explain (verbose, costs off) -update bar set f2 = f2 + 100 returning *; -update bar set f2 = f2 + 100 returning *; +with u as (update bar set f2 = f2 + 100 returning *) select * from u order by 1; +with u as (update bar set f2 = f2 + 100 returning *) select * from u order by 1; -- Test that UPDATE/DELETE with inherited target works with row-level triggers CREATE TRIGGER trig_row_before -- 2.18.2