From e3b411cdd5ca5c8c3f6ccd73ae4c7d8ef1470903 Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Sun, 25 Feb 2018 23:45:09 +1300 Subject: [PATCH 2/2] Enable the read-only SERIALIZABLE optimization for parallel query. A SERIALIZABLEXACT can be marked as SXACT_FLAG_RO_SAFE by a concurrent session, meaning that it is safe to throw away this SERIALIZABLEXACT and start behaving like a REPEATABLE READ transaction. The problem is that the leader and workers are sharing the same SERIALIZABLEXACT so this must be coordinated carefully. This commit solves that problem as follows: The first backend to observe the SXACT_FLAG_RO_SAFE flag will 'partially release' it, meaning that the conflicts and locks it holds can be released, but the SERIALIZABLEXACT itself will remain active because other backends might have a pointer to it. Whenever any backend notices the SXACT_FLAG_RO_SAFE flag, it clears its own MySerializableXact variable so that it can skip SSI checks for the rest of the transaction. In the special case of the leader process, it transfers the SERIALIZABLEXACT to a new variable SavedSerializableXact, so that it can be completely released at the end of the transaction after all workers have exited. Author: Thomas Munro Discussion: https://postgr.es/m/CAEepm=0gXGYhtrVDWOTHS8SQQy_=S9xo+8oCxGLWZAOoeJ=yzQ@mail.gmail.com --- src/backend/storage/lmgr/predicate.c | 136 ++++++++++++++++++++++++++---- src/backend/utils/resowner/resowner.c | 2 +- src/include/storage/predicate.h | 2 +- src/include/storage/predicate_internals.h | 6 ++ 4 files changed, 127 insertions(+), 19 deletions(-) diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c index 617208c42cb..a3e36081db8 100644 --- a/src/backend/storage/lmgr/predicate.c +++ b/src/backend/storage/lmgr/predicate.c @@ -170,7 +170,7 @@ * PredicateLockPageCombine(Relation relation, BlockNumber oldblkno, * BlockNumber newblkno) * TransferPredicateLocksToHeapRelation(Relation relation) - * ReleasePredicateLocks(bool isCommit) + * ReleasePredicateLocks(bool isCommit, bool isReadOnlySafe) * * conflict detection (may also trigger rollback) * CheckForSerializableConflictOut(bool visible, Relation relation, @@ -288,6 +288,7 @@ #define SxactIsDeferrableWaiting(sxact) (((sxact)->flags & SXACT_FLAG_DEFERRABLE_WAITING) != 0) #define SxactIsROSafe(sxact) (((sxact)->flags & SXACT_FLAG_RO_SAFE) != 0) #define SxactIsROUnsafe(sxact) (((sxact)->flags & SXACT_FLAG_RO_UNSAFE) != 0) +#define SxactIsPartiallyReleased(sxact) (((sxact)->flags & SXACT_FLAG_PARTIALLY_RELEASED) != 0) /* * Compute the hash code associated with a PREDICATELOCKTARGETTAG. @@ -422,6 +423,15 @@ static HTAB *LocalPredicateLockHash = NULL; static SERIALIZABLEXACT *MySerializableXact = InvalidSerializableXact; static bool MyXactDidWrite = false; +/* + * The SXACT_FLAG_RO_UNSAFE optimization might lead us to release + * MySerializableXact early. If that happens in a parallel query, the leader + * needs to defer the destruction of the SERIALIZABLEXACT until end of + * transaction, because the workers still have a reference to it. In that + * case, the leader stores it here. + */ +static SERIALIZABLEXACT *SavedSerializableXact = InvalidSerializableXact; + /* local functions */ static SERIALIZABLEXACT *CreatePredXact(void); @@ -532,12 +542,10 @@ SerializationNeededForRead(Relation relation, Snapshot snapshot) * A transaction is flagged as RO_SAFE if all concurrent R/W transactions * commit without having conflicts out to an earlier snapshot, thus * ensuring that no conflicts are possible for this transaction. - * - * This optimization is not yet supported in parallel mode. */ - if (SxactIsROSafe(MySerializableXact) && !IsInParallelMode()) + if (SxactIsROSafe(MySerializableXact)) { - ReleasePredicateLocks(false); + ReleasePredicateLocks(false, true); return false; } @@ -1573,14 +1581,14 @@ GetSafeSnapshot(Snapshot origSnapshot) ereport(DEBUG2, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("deferrable snapshot was unsafe; trying a new one"))); - ReleasePredicateLocks(false); + ReleasePredicateLocks(false, false); } /* * Now we have a safe snapshot, so we don't need to do any further checks. */ Assert(SxactIsROSafe(MySerializableXact)); - ReleasePredicateLocks(false); + ReleasePredicateLocks(false, true); return snapshot; } @@ -3307,9 +3315,17 @@ SetNewSxactGlobalXmin(void) * If this transaction is committing and is holding any predicate locks, * it must be added to a list of completed serializable transactions still * holding locks. + * + * If isReadOnlySafe is true, then predicate locks are being released before + * the end of the transaction because MySerializableXact has been determined + * to be RO_SAFE. In non-parallel mode we can release it completely, but it + * in parallel mode we partially release the SERIALIZABLEXACT and keep it + * around until the end of the transaction, allowing each backend to clear its + * MySerializableXact variable and benefit from the optimization in its own + * time. */ void -ReleasePredicateLocks(bool isCommit) +ReleasePredicateLocks(bool isCommit, bool isReadOnlySafe) { bool needToClear; RWConflict conflict, @@ -3328,22 +3344,93 @@ ReleasePredicateLocks(bool isCommit) */ bool topLevelIsDeclaredReadOnly; + /* We can't be both committing and releasing early due to RO_SAFE. */ + Assert(!(isCommit && isReadOnlySafe)); + + /* Are we at the end of a transaction, that is, a commit or abort? */ + if (!isReadOnlySafe) + { + /* + * Parallel workers mustn't release predicate locks at the end of + * their transaction. The leader will do that at the end of its + * transaction. + */ + if (IsParallelWorker()) + goto backend_local_cleanup; + + /* + * By the time the leader in a parallel query reaches end of + * transaction, it has waited for all workers to exit. + */ + Assert(!ParallelContextActive()); + + /* + * If the leader in a parallel query earler stashed a partially + * released SERIALIZABLEXACT for final clean-up at end of transaction + * (because workers might still have been accessing it), then it's + * time to restore it. + */ + if (SavedSerializableXact != InvalidSerializableXact) + { + Assert(MySerializableXact == InvalidSerializableXact); + MySerializableXact = SavedSerializableXact; + SavedSerializableXact = InvalidSerializableXact; + Assert(SxactIsPartiallyReleased(MySerializableXact)); + } + } + if (MySerializableXact == InvalidSerializableXact) { Assert(LocalPredicateLockHash == NULL); return; } - /* Parallel workers mustn't release predicate locks. */ - if (IsParallelWorker()) - goto backend_local_cleanup; - LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE); + /* + * If the transaction is committing, but it has been partially released + * already, then treat this as a roll back. It was marked as rolled back. + */ + if (isCommit && SxactIsPartiallyReleased(MySerializableXact)) + isCommit = false; + + /* + * If we're called in the middle of a transaction because we discovered + * that the SXACT_FLAG_RO_SAFE flag was set, then we'll partially release + * it (that is, release the predicate locks and conflicts, but not the + * SERIALIZABLEXACT itself) if we're the first backend to have noticed. + */ + if (isReadOnlySafe && IsInParallelMode()) + { + /* + * The leader needs to stash a pointer to it, so that it can + * completely release it at end-of-transaction. + */ + if (!IsParallelWorker()) + SavedSerializableXact = MySerializableXact; + + /* + * The first backend to reach this condition will partially release + * the SERIALIZABLEXACT. All others will just clear their + * backend-local state so that they stop doing SSI checks for the rest + * of the transaction. + */ + if (SxactIsPartiallyReleased(MySerializableXact)) + { + LWLockRelease(SerializableXactHashLock); + goto backend_local_cleanup; + } + else + { + MySerializableXact->flags |= SXACT_FLAG_PARTIALLY_RELEASED; + /* ... and proceed to perform the partial release below. */ + } + } Assert(!isCommit || SxactIsPrepared(MySerializableXact)); Assert(!isCommit || !SxactIsDoomed(MySerializableXact)); Assert(!SxactIsCommitted(MySerializableXact)); - Assert(!SxactIsRolledBack(MySerializableXact)); + Assert(SxactIsPartiallyReleased(MySerializableXact) + || !SxactIsRolledBack(MySerializableXact)); /* may not be serializable during COMMIT/ROLLBACK PREPARED */ Assert(MySerializableXact->pid == 0 || IsolationIsSerializable()); @@ -3392,7 +3479,8 @@ ReleasePredicateLocks(bool isCommit) * cleanup. This means it should not be considered when calculating * SxactGlobalXmin. */ - MySerializableXact->flags |= SXACT_FLAG_DOOMED; + if (!isReadOnlySafe) + MySerializableXact->flags |= SXACT_FLAG_DOOMED; MySerializableXact->flags |= SXACT_FLAG_ROLLED_BACK; /* @@ -3588,7 +3676,8 @@ ReleasePredicateLocks(bool isCommit) * was launched. */ needToClear = false; - if (TransactionIdEquals(MySerializableXact->xmin, PredXact->SxactGlobalXmin)) + if (!isReadOnlySafe && + TransactionIdEquals(MySerializableXact->xmin, PredXact->SxactGlobalXmin)) { Assert(PredXact->SxactGlobalXminCount > 0); if (--(PredXact->SxactGlobalXminCount) == 0) @@ -3607,8 +3696,16 @@ ReleasePredicateLocks(bool isCommit) SHMQueueInsertBefore(FinishedSerializableTransactions, &MySerializableXact->finishedLink); + /* + * If we're releasing a RO_SAFE transaction in parallel mode, we'll only + * partially release it. That's necessary because other backends may have + * a reference to it. The leader will release the SERIALIZABLEXACT itself + * at the end of the transaction after workers have stopped running. + */ if (!isCommit) - ReleaseOneSerializableXact(MySerializableXact, false, false); + ReleaseOneSerializableXact(MySerializableXact, + isReadOnlySafe && IsInParallelMode(), + false); LWLockRelease(SerializableFinishedListLock); @@ -3807,6 +3904,8 @@ ReleaseOneSerializableXact(SERIALIZABLEXACT *sxact, bool partial, * them to OldCommittedSxact if summarize is true) */ LWLockAcquire(SerializablePredicateLockListLock, LW_SHARED); + if (IsInParallelMode()) + LWLockAcquire(&sxact->predicateLockListLock, LW_EXCLUSIVE); predlock = (PREDICATELOCK *) SHMQueueNext(&(sxact->predicateLocks), &(sxact->predicateLocks), @@ -3886,6 +3985,8 @@ ReleaseOneSerializableXact(SERIALIZABLEXACT *sxact, bool partial, */ SHMQueueInit(&sxact->predicateLocks); + if (IsInParallelMode()) + LWLockRelease(&sxact->predicateLockListLock); LWLockRelease(SerializablePredicateLockListLock); sxidtag.xid = sxact->topXid; @@ -4776,6 +4877,7 @@ PreCommit_CheckForSerializationFailure(void) /* Check if someone else has already decided that we need to die */ if (SxactIsDoomed(MySerializableXact)) { + Assert(!SxactIsPartiallyReleased(MySerializableXact)); LWLockRelease(SerializableXactHashLock); ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), @@ -4973,7 +5075,7 @@ PredicateLockTwoPhaseFinish(TransactionId xid, bool isCommit) MySerializableXact = sxid->myXact; MyXactDidWrite = true; /* conservatively assume that we wrote * something */ - ReleasePredicateLocks(isCommit); + ReleasePredicateLocks(isCommit, false); } /* diff --git a/src/backend/utils/resowner/resowner.c b/src/backend/utils/resowner/resowner.c index e09a4f1ddb4..ab0523e90a5 100644 --- a/src/backend/utils/resowner/resowner.c +++ b/src/backend/utils/resowner/resowner.c @@ -551,7 +551,7 @@ ResourceOwnerReleaseInternal(ResourceOwner owner, if (owner == TopTransactionResourceOwner) { ProcReleaseLocks(isCommit); - ReleasePredicateLocks(isCommit); + ReleasePredicateLocks(isCommit, false); } } else diff --git a/src/include/storage/predicate.h b/src/include/storage/predicate.h index 23f3acc3ce1..0925270b91e 100644 --- a/src/include/storage/predicate.h +++ b/src/include/storage/predicate.h @@ -61,7 +61,7 @@ extern void PredicateLockTuple(Relation relation, HeapTuple tuple, Snapshot snap extern void PredicateLockPageSplit(Relation relation, BlockNumber oldblkno, BlockNumber newblkno); extern void PredicateLockPageCombine(Relation relation, BlockNumber oldblkno, BlockNumber newblkno); extern void TransferPredicateLocksToHeapRelation(Relation relation); -extern void ReleasePredicateLocks(bool isCommit); +extern void ReleasePredicateLocks(bool isCommit, bool isReadOnlySafe); /* conflict detection (may also trigger rollback) */ extern void CheckForSerializableConflictOut(bool valid, Relation relation, HeapTuple tuple, diff --git a/src/include/storage/predicate_internals.h b/src/include/storage/predicate_internals.h index 59eb49e57ee..04de63877d5 100644 --- a/src/include/storage/predicate_internals.h +++ b/src/include/storage/predicate_internals.h @@ -127,6 +127,12 @@ typedef struct SERIALIZABLEXACT #define SXACT_FLAG_RO_UNSAFE 0x00000100 #define SXACT_FLAG_SUMMARY_CONFLICT_IN 0x00000200 #define SXACT_FLAG_SUMMARY_CONFLICT_OUT 0x00000400 +/* + * The following flag means the transaction has been partially released + * already, but is being preserved because parallel workers might have a + * reference to it. It'll be recycled by the leader at end-of-transaction. + */ +#define SXACT_FLAG_PARTIALLY_RELEASED 0x00000800 /* * The following types are used to provide an ad hoc list for holding -- 2.15.1