diff --git a/contrib/pgstattuple/pgstattuple.c b/contrib/pgstattuple/pgstattuple.c
index c1122b4..d02539a 100644
--- a/contrib/pgstattuple/pgstattuple.c
+++ b/contrib/pgstattuple/pgstattuple.c
@@ -400,7 +400,6 @@ pgstat_hash_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno,
 	Buffer		buf;
 	Page		page;
 
-	_hash_getlock(rel, blkno, HASH_SHARE);
 	buf = _hash_getbuf_with_strategy(rel, blkno, HASH_READ, 0, bstrategy);
 	page = BufferGetPage(buf);
 
@@ -431,7 +430,6 @@ pgstat_hash_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno,
 	}
 
 	_hash_relbuf(rel, buf);
-	_hash_droplock(rel, blkno, HASH_SHARE);
 }
 
 /*
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index 49a6c81..f95ac00 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -407,12 +407,15 @@ hashbeginscan(Relation rel, int nkeys, int norderbys)
 
 	so = (HashScanOpaque) palloc(sizeof(HashScanOpaqueData));
 	so->hashso_bucket_valid = false;
-	so->hashso_bucket_blkno = 0;
 	so->hashso_curbuf = InvalidBuffer;
+	so->hashso_bucket_buf = InvalidBuffer;
+	so->hashso_old_bucket_buf = InvalidBuffer;
 	/* set position invalid (this will cause _hash_first call) */
 	ItemPointerSetInvalid(&(so->hashso_curpos));
 	ItemPointerSetInvalid(&(so->hashso_heappos));
 
+	so->hashso_skip_moved_tuples = false;
+
 	scan->opaque = so;
 
 	/* register scan in case we change pages it's using */
@@ -436,10 +439,15 @@ hashrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
 		_hash_dropbuf(rel, so->hashso_curbuf);
 	so->hashso_curbuf = InvalidBuffer;
 
-	/* release lock on bucket, too */
-	if (so->hashso_bucket_blkno)
-		_hash_droplock(rel, so->hashso_bucket_blkno, HASH_SHARE);
-	so->hashso_bucket_blkno = 0;
+	/* release pin we hold on old primary bucket */
+	if (BufferIsValid(so->hashso_old_bucket_buf))
+		_hash_dropbuf(rel, so->hashso_old_bucket_buf);
+	so->hashso_old_bucket_buf = InvalidBuffer;
+
+	/* release pin we hold on primary bucket */
+	if (BufferIsValid(so->hashso_bucket_buf))
+		_hash_dropbuf(rel, so->hashso_bucket_buf);
+	so->hashso_bucket_buf = InvalidBuffer;
 
 	/* set position invalid (this will cause _hash_first call) */
 	ItemPointerSetInvalid(&(so->hashso_curpos));
@@ -453,6 +461,8 @@ hashrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
 				scan->numberOfKeys * sizeof(ScanKeyData));
 		so->hashso_bucket_valid = false;
 	}
+
+	so->hashso_skip_moved_tuples = false;
 }
 
 /*
@@ -472,10 +482,15 @@ hashendscan(IndexScanDesc scan)
 		_hash_dropbuf(rel, so->hashso_curbuf);
 	so->hashso_curbuf = InvalidBuffer;
 
-	/* release lock on bucket, too */
-	if (so->hashso_bucket_blkno)
-		_hash_droplock(rel, so->hashso_bucket_blkno, HASH_SHARE);
-	so->hashso_bucket_blkno = 0;
+	/* release pin we hold on old primary bucket */
+	if (BufferIsValid(so->hashso_old_bucket_buf))
+		_hash_dropbuf(rel, so->hashso_old_bucket_buf);
+	so->hashso_old_bucket_buf = InvalidBuffer;
+
+	/* release pin we hold on primary bucket */
+	if (BufferIsValid(so->hashso_bucket_buf))
+		_hash_dropbuf(rel, so->hashso_bucket_buf);
+	so->hashso_bucket_buf = InvalidBuffer;
 
 	pfree(so);
 	scan->opaque = NULL;
@@ -486,6 +501,9 @@ hashendscan(IndexScanDesc scan)
  * The set of target tuples is specified via a callback routine that tells
  * whether any given heap tuple (identified by ItemPointer) is being deleted.
  *
+ * This function also delete the tuples that are moved by split to other
+ * bucket.
+ *
  * Result: a palloc'd struct containing statistical info for VACUUM displays.
  */
 IndexBulkDeleteResult *
@@ -530,35 +548,61 @@ loop_top:
 	{
 		BlockNumber bucket_blkno;
 		BlockNumber blkno;
+		Buffer		bucket_buf;
+		Buffer		buf;
+		HashPageOpaque bucket_opaque;
+		Page		page;
 		bool		bucket_dirty = false;
+		bool		bucket_has_garbage = false;
 
 		/* Get address of bucket's start page */
 		bucket_blkno = BUCKET_TO_BLKNO(&local_metapage, cur_bucket);
 
-		/* Exclusive-lock the bucket so we can shrink it */
-		_hash_getlock(rel, bucket_blkno, HASH_EXCLUSIVE);
-
 		/* Shouldn't have any active scans locally, either */
 		if (_hash_has_active_scan(rel, cur_bucket))
 			elog(ERROR, "hash index has active scan during VACUUM");
 
-		/* Scan each page in bucket */
 		blkno = bucket_blkno;
-		while (BlockNumberIsValid(blkno))
+
+		/*
+		 * Maintain a cleanup lock on primary bucket till we scan all the
+		 * pages in bucket.  This is required to ensure that we don't delete
+		 * tuples which are needed for concurrent scans on buckets where split
+		 * is in progress.  Retaining it till end of bucket scan ensures that
+		 * concurrent split can't be started on it.  In future, we might want
+		 * to relax the requirement for vacuum to take cleanup lock only for
+		 * buckets where split is in progress, however for squeeze phase we
+		 * need a cleanup lock, otherwise squeeze will move the tuples to a
+		 * different location and that can lead to change in order of results.
+		 */
+		buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, info->strategy);
+		LockBufferForCleanup(buf);
+		_hash_checkpage(rel, buf, LH_BUCKET_PAGE);
+
+		page = BufferGetPage(buf);
+		bucket_opaque = (HashPageOpaque) PageGetSpecialPointer(page);
+
+		/*
+		 * If the bucket contains tuples that are moved by split, then we need
+		 * to delete such tuples as well.
+		 */
+		if (bucket_opaque->hasho_flag & LH_BUCKET_PAGE_HAS_GARBAGE)
+			bucket_has_garbage = true;
+
+		bucket_buf = buf;
+
+		/* Scan each page in bucket */
+		for (;;)
 		{
-			Buffer		buf;
-			Page		page;
 			HashPageOpaque opaque;
 			OffsetNumber offno;
 			OffsetNumber maxoffno;
 			OffsetNumber deletable[MaxOffsetNumber];
 			int			ndeletable = 0;
+			bool		release_buf = false;
 
 			vacuum_delay_point();
 
-			buf = _hash_getbuf_with_strategy(rel, blkno, HASH_WRITE,
-										   LH_BUCKET_PAGE | LH_OVERFLOW_PAGE,
-											 info->strategy);
 			page = BufferGetPage(buf);
 			opaque = (HashPageOpaque) PageGetSpecialPointer(page);
 			Assert(opaque->hasho_bucket == cur_bucket);
@@ -571,6 +615,7 @@ loop_top:
 			{
 				IndexTuple	itup;
 				ItemPointer htup;
+				Bucket		bucket;
 
 				itup = (IndexTuple) PageGetItem(page,
 												PageGetItemId(page, offno));
@@ -581,32 +626,72 @@ loop_top:
 					deletable[ndeletable++] = offno;
 					tuples_removed += 1;
 				}
+				else if (bucket_has_garbage)
+				{
+					/* delete the tuples that are moved by split. */
+					bucket = _hash_hashkey2bucket(_hash_get_indextuple_hashkey(itup),
+											  local_metapage.hashm_maxbucket,
+											   local_metapage.hashm_highmask,
+											   local_metapage.hashm_lowmask);
+					if (bucket != cur_bucket)
+					{
+						/* mark the item for deletion */
+						deletable[ndeletable++] = offno;
+						tuples_removed += 1;
+					}
+				}
 				else
 					num_index_tuples += 1;
 			}
 
 			/*
-			 * Apply deletions and write page if needed, advance to next page.
+			 * We don't release the lock on primary bucket till end of bucket
+			 * scan.
 			 */
+			if (blkno != bucket_blkno)
+				release_buf = true;
+
 			blkno = opaque->hasho_nextblkno;
 
+			/*
+			 * Apply deletions and write page if needed, advance to next page.
+			 */
 			if (ndeletable > 0)
 			{
 				PageIndexMultiDelete(page, deletable, ndeletable);
-				_hash_wrtbuf(rel, buf);
+				if (release_buf)
+					_hash_wrtbuf(rel, buf);
+				else
+					MarkBufferDirty(buf);
 				bucket_dirty = true;
 			}
-			else
+			else if (release_buf)
 				_hash_relbuf(rel, buf);
+
+			/* bail out if there are no more pages to scan. */
+			if (!BlockNumberIsValid(blkno))
+				break;
+
+			buf = _hash_getbuf_with_strategy(rel, blkno, HASH_WRITE,
+											 LH_OVERFLOW_PAGE,
+											 info->strategy);
 		}
 
+		/*
+		 * Clear the garbage flag from bucket after deleting the tuples that
+		 * are moved by split.  We purposefully clear the flag before squeeze
+		 * bucket, so that after restart, vacuum shouldn't again try to delete
+		 * the moved by split tuples.
+		 */
+		if (bucket_has_garbage)
+			bucket_opaque->hasho_flag &= ~LH_BUCKET_PAGE_HAS_GARBAGE;
+
 		/* If we deleted anything, try to compact free space */
 		if (bucket_dirty)
-			_hash_squeezebucket(rel, cur_bucket, bucket_blkno,
+			_hash_squeezebucket(rel, cur_bucket, bucket_blkno, bucket_buf,
 								info->strategy);
 
-		/* Release bucket lock */
-		_hash_droplock(rel, bucket_blkno, HASH_EXCLUSIVE);
+		_hash_relbuf(rel, bucket_buf);
 
 		/* Advance to next bucket */
 		cur_bucket++;
diff --git a/src/backend/access/hash/hashinsert.c b/src/backend/access/hash/hashinsert.c
index acd2e64..eedf6ae 100644
--- a/src/backend/access/hash/hashinsert.c
+++ b/src/backend/access/hash/hashinsert.c
@@ -32,8 +32,6 @@ _hash_doinsert(Relation rel, IndexTuple itup)
 	Buffer		metabuf;
 	HashMetaPage metap;
 	BlockNumber blkno;
-	BlockNumber oldblkno = InvalidBlockNumber;
-	bool		retry = false;
 	Page		page;
 	HashPageOpaque pageopaque;
 	Size		itemsz;
@@ -70,45 +68,22 @@ _hash_doinsert(Relation rel, IndexTuple itup)
 			errhint("Values larger than a buffer page cannot be indexed.")));
 
 	/*
-	 * Loop until we get a lock on the correct target bucket.
+	 * Compute the target bucket number, and convert to block number.
 	 */
-	for (;;)
-	{
-		/*
-		 * Compute the target bucket number, and convert to block number.
-		 */
-		bucket = _hash_hashkey2bucket(hashkey,
+	bucket = _hash_hashkey2bucket(hashkey,
 									  metap->hashm_maxbucket,
 									  metap->hashm_highmask,
 									  metap->hashm_lowmask);
 
-		blkno = BUCKET_TO_BLKNO(metap, bucket);
-
-		/* Release metapage lock, but keep pin. */
-		_hash_chgbufaccess(rel, metabuf, HASH_READ, HASH_NOLOCK);
+	blkno = BUCKET_TO_BLKNO(metap, bucket);
 
-		/*
-		 * If the previous iteration of this loop locked what is still the
-		 * correct target bucket, we are done.  Otherwise, drop any old lock
-		 * and lock what now appears to be the correct bucket.
-		 */
-		if (retry)
-		{
-			if (oldblkno == blkno)
-				break;
-			_hash_droplock(rel, oldblkno, HASH_SHARE);
-		}
-		_hash_getlock(rel, blkno, HASH_SHARE);
-
-		/*
-		 * Reacquire metapage lock and check that no bucket split has taken
-		 * place while we were awaiting the bucket lock.
-		 */
-		_hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_READ);
-		oldblkno = blkno;
-		retry = true;
-	}
+	_hash_chgbufaccess(rel, metabuf, HASH_READ, HASH_NOLOCK);
 
+	/*
+	 * FixMe: If the split operation happens during insertion and it
+	 * doesn't account the tuple being inserted, then it can be lost
+	 * for future searches.
+	 */
 	/* Fetch the primary bucket page for the bucket */
 	buf = _hash_getbuf(rel, blkno, HASH_WRITE, LH_BUCKET_PAGE);
 	page = BufferGetPage(buf);
@@ -141,10 +116,10 @@ _hash_doinsert(Relation rel, IndexTuple itup)
 			 */
 
 			/* release our write lock without modifying buffer */
-			_hash_chgbufaccess(rel, buf, HASH_READ, HASH_NOLOCK);
+			_hash_chgbufaccess(rel, buf, HASH_WRITE, HASH_NOLOCK);
 
 			/* chain to a new overflow page */
-			buf = _hash_addovflpage(rel, metabuf, buf);
+			buf = _hash_addovflpage(rel, metabuf, buf, false);
 			page = BufferGetPage(buf);
 
 			/* should fit now, given test above */
@@ -161,9 +136,6 @@ _hash_doinsert(Relation rel, IndexTuple itup)
 	/* write and release the modified page */
 	_hash_wrtbuf(rel, buf);
 
-	/* We can drop the bucket lock now */
-	_hash_droplock(rel, blkno, HASH_SHARE);
-
 	/*
 	 * Write-lock the metapage so we can increment the tuple count. After
 	 * incrementing it, check to see if it's time for a split.
diff --git a/src/backend/access/hash/hashovfl.c b/src/backend/access/hash/hashovfl.c
index db3e268..184236c 100644
--- a/src/backend/access/hash/hashovfl.c
+++ b/src/backend/access/hash/hashovfl.c
@@ -82,23 +82,20 @@ blkno_to_bitno(HashMetaPage metap, BlockNumber ovflblkno)
  *
  *	On entry, the caller must hold a pin but no lock on 'buf'.  The pin is
  *	dropped before exiting (we assume the caller is not interested in 'buf'
- *	anymore).  The returned overflow page will be pinned and write-locked;
- *	it is guaranteed to be empty.
+ *	anymore) if not asked to retain.  The pin will be retained only for the
+ *	primary bucket.  The returned overflow page will be pinned and
+ *	write-locked; it is guaranteed to be empty.
  *
  *	The caller must hold a pin, but no lock, on the metapage buffer.
  *	That buffer is returned in the same state.
  *
- *	The caller must hold at least share lock on the bucket, to ensure that
- *	no one else tries to compact the bucket meanwhile.  This guarantees that
- *	'buf' won't stop being part of the bucket while it's unlocked.
- *
  * NB: since this could be executed concurrently by multiple processes,
  * one should not assume that the returned overflow page will be the
  * immediate successor of the originally passed 'buf'.  Additional overflow
  * pages might have been added to the bucket chain in between.
  */
 Buffer
-_hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf)
+_hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf, bool retain_pin)
 {
 	Buffer		ovflbuf;
 	Page		page;
@@ -131,7 +128,10 @@ _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf)
 			break;
 
 		/* we assume we do not need to write the unmodified page */
-		_hash_relbuf(rel, buf);
+		if ((pageopaque->hasho_flag & LH_BUCKET_PAGE) && retain_pin)
+			_hash_chgbufaccess(rel, buf, HASH_WRITE, HASH_NOLOCK);
+		else
+			_hash_relbuf(rel, buf);
 
 		buf = _hash_getbuf(rel, nextblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
 	}
@@ -149,7 +149,13 @@ _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf)
 
 	/* logically chain overflow page to previous page */
 	pageopaque->hasho_nextblkno = BufferGetBlockNumber(ovflbuf);
-	_hash_wrtbuf(rel, buf);
+	if ((pageopaque->hasho_flag & LH_BUCKET_PAGE) && retain_pin)
+	{
+		MarkBufferDirty(buf);
+		_hash_chgbufaccess(rel, buf, HASH_WRITE, HASH_NOLOCK);
+	}
+	else
+		_hash_wrtbuf(rel, buf);
 
 	return ovflbuf;
 }
@@ -570,7 +576,7 @@ _hash_initbitmap(Relation rel, HashMetaPage metap, BlockNumber blkno,
  *	required that to be true on entry as well, but it's a lot easier for
  *	callers to leave empty overflow pages and let this guy clean it up.
  *
- *	Caller must hold exclusive lock on the target bucket.  This allows
+ *	Caller must hold cleanup lock on the target bucket.  This allows
  *	us to safely lock multiple pages in the bucket.
  *
  *	Since this function is invoked in VACUUM, we provide an access strategy
@@ -580,6 +586,7 @@ void
 _hash_squeezebucket(Relation rel,
 					Bucket bucket,
 					BlockNumber bucket_blkno,
+					Buffer bucket_buf,
 					BufferAccessStrategy bstrategy)
 {
 	BlockNumber wblkno;
@@ -591,16 +598,13 @@ _hash_squeezebucket(Relation rel,
 	HashPageOpaque wopaque;
 	HashPageOpaque ropaque;
 	bool		wbuf_dirty;
+	bool		release_buf = false;
 
 	/*
 	 * start squeezing into the base bucket page.
 	 */
 	wblkno = bucket_blkno;
-	wbuf = _hash_getbuf_with_strategy(rel,
-									  wblkno,
-									  HASH_WRITE,
-									  LH_BUCKET_PAGE,
-									  bstrategy);
+	wbuf = bucket_buf;
 	wpage = BufferGetPage(wbuf);
 	wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage);
 
@@ -669,12 +673,17 @@ _hash_squeezebucket(Relation rel,
 			{
 				Assert(!PageIsEmpty(wpage));
 
+				if (wblkno != bucket_blkno)
+					release_buf = true;
+
 				wblkno = wopaque->hasho_nextblkno;
 				Assert(BlockNumberIsValid(wblkno));
 
-				if (wbuf_dirty)
+				if (wbuf_dirty && release_buf)
 					_hash_wrtbuf(rel, wbuf);
-				else
+				else if (wbuf_dirty)
+					MarkBufferDirty(wbuf);
+				else if (release_buf)
 					_hash_relbuf(rel, wbuf);
 
 				/* nothing more to do if we reached the read page */
@@ -700,6 +709,7 @@ _hash_squeezebucket(Relation rel,
 				wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage);
 				Assert(wopaque->hasho_bucket == bucket);
 				wbuf_dirty = false;
+				release_buf = false;
 			}
 
 			/*
@@ -733,11 +743,17 @@ _hash_squeezebucket(Relation rel,
 		/* are we freeing the page adjacent to wbuf? */
 		if (rblkno == wblkno)
 		{
-			/* yes, so release wbuf lock first */
-			if (wbuf_dirty)
+			if (wblkno != bucket_blkno)
+				release_buf = true;
+
+			/* yes, so release wbuf lock first if needed */
+			if (wbuf_dirty && release_buf)
 				_hash_wrtbuf(rel, wbuf);
-			else
+			else if (wbuf_dirty)
+				MarkBufferDirty(wbuf);
+			else if (release_buf)
 				_hash_relbuf(rel, wbuf);
+
 			/* free this overflow page (releases rbuf) */
 			_hash_freeovflpage(rel, rbuf, bstrategy);
 			/* done */
diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c
index 178463f..1ba4d52 100644
--- a/src/backend/access/hash/hashpage.c
+++ b/src/backend/access/hash/hashpage.c
@@ -38,7 +38,7 @@ static bool _hash_alloc_buckets(Relation rel, BlockNumber firstblock,
 					uint32 nblocks);
 static void _hash_splitbucket(Relation rel, Buffer metabuf,
 				  Bucket obucket, Bucket nbucket,
-				  BlockNumber start_oblkno,
+				  Buffer obuf,
 				  Buffer nbuf,
 				  uint32 maxbucket,
 				  uint32 highmask, uint32 lowmask);
@@ -55,46 +55,6 @@ static void _hash_splitbucket(Relation rel, Buffer metabuf,
 
 
 /*
- * _hash_getlock() -- Acquire an lmgr lock.
- *
- * 'whichlock' should the block number of a bucket's primary bucket page to
- * acquire the per-bucket lock.  (See README for details of the use of these
- * locks.)
- *
- * 'access' must be HASH_SHARE or HASH_EXCLUSIVE.
- */
-void
-_hash_getlock(Relation rel, BlockNumber whichlock, int access)
-{
-	if (USELOCKING(rel))
-		LockPage(rel, whichlock, access);
-}
-
-/*
- * _hash_try_getlock() -- Acquire an lmgr lock, but only if it's free.
- *
- * Same as above except we return FALSE without blocking if lock isn't free.
- */
-bool
-_hash_try_getlock(Relation rel, BlockNumber whichlock, int access)
-{
-	if (USELOCKING(rel))
-		return ConditionalLockPage(rel, whichlock, access);
-	else
-		return true;
-}
-
-/*
- * _hash_droplock() -- Release an lmgr lock.
- */
-void
-_hash_droplock(Relation rel, BlockNumber whichlock, int access)
-{
-	if (USELOCKING(rel))
-		UnlockPage(rel, whichlock, access);
-}
-
-/*
  *	_hash_getbuf() -- Get a buffer by block number for read or write.
  *
  *		'access' must be HASH_READ, HASH_WRITE, or HASH_NOLOCK.
@@ -489,9 +449,8 @@ _hash_pageinit(Page page, Size size)
 /*
  * Attempt to expand the hash table by creating one new bucket.
  *
- * This will silently do nothing if it cannot get the needed locks.
- *
- * The caller should hold no locks on the hash index.
+ * This will silently do nothing if there are active scans of our own
+ * backend or the old bucket contains tuples from previous split.
  *
  * The caller must hold a pin, but no lock, on the metapage buffer.
  * The buffer is returned in the same state.
@@ -506,6 +465,9 @@ _hash_expandtable(Relation rel, Buffer metabuf)
 	BlockNumber start_oblkno;
 	BlockNumber start_nblkno;
 	Buffer		buf_nblkno;
+	Buffer		buf_oblkno;
+	Page		opage;
+	HashPageOpaque oopaque;
 	uint32		maxbucket;
 	uint32		highmask;
 	uint32		lowmask;
@@ -548,11 +510,9 @@ _hash_expandtable(Relation rel, Buffer metabuf)
 		goto fail;
 
 	/*
-	 * Determine which bucket is to be split, and attempt to lock the old
-	 * bucket.  If we can't get the lock, give up.
-	 *
-	 * The lock protects us against other backends, but not against our own
-	 * backend.  Must check for active scans separately.
+	 * Determine which bucket is to be split, and if it still contains tuples
+	 * from previous split or there is any active scan of our own backend,
+	 * then give up.
 	 */
 	new_bucket = metap->hashm_maxbucket + 1;
 
@@ -563,11 +523,18 @@ _hash_expandtable(Relation rel, Buffer metabuf)
 	if (_hash_has_active_scan(rel, old_bucket))
 		goto fail;
 
-	if (!_hash_try_getlock(rel, start_oblkno, HASH_EXCLUSIVE))
+	buf_oblkno = _hash_getbuf(rel, start_oblkno, HASH_READ, LH_BUCKET_PAGE);
+
+	opage = BufferGetPage(buf_oblkno);
+	oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
+	if (oopaque->hasho_flag & LH_BUCKET_PAGE_HAS_GARBAGE)
+	{
+		_hash_relbuf(rel, buf_oblkno);
 		goto fail;
+	}
 
 	/*
-	 * Likewise lock the new bucket (should never fail).
+	 * There shouldn't be any active scan on new bucket.
 	 *
 	 * Note: it is safe to compute the new bucket's blkno here, even though we
 	 * may still need to update the BUCKET_TO_BLKNO mapping.  This is because
@@ -579,9 +546,6 @@ _hash_expandtable(Relation rel, Buffer metabuf)
 	if (_hash_has_active_scan(rel, new_bucket))
 		elog(ERROR, "scan in progress on supposedly new bucket");
 
-	if (!_hash_try_getlock(rel, start_nblkno, HASH_EXCLUSIVE))
-		elog(ERROR, "could not get lock on supposedly new bucket");
-
 	/*
 	 * If the split point is increasing (hashm_maxbucket's log base 2
 	 * increases), we need to allocate a new batch of bucket pages.
@@ -600,8 +564,7 @@ _hash_expandtable(Relation rel, Buffer metabuf)
 		if (!_hash_alloc_buckets(rel, start_nblkno, new_bucket))
 		{
 			/* can't split due to BlockNumber overflow */
-			_hash_droplock(rel, start_oblkno, HASH_EXCLUSIVE);
-			_hash_droplock(rel, start_nblkno, HASH_EXCLUSIVE);
+			_hash_relbuf(rel, buf_oblkno);
 			goto fail;
 		}
 	}
@@ -665,13 +628,9 @@ _hash_expandtable(Relation rel, Buffer metabuf)
 	/* Relocate records to the new bucket */
 	_hash_splitbucket(rel, metabuf,
 					  old_bucket, new_bucket,
-					  start_oblkno, buf_nblkno,
+					  buf_oblkno, buf_nblkno,
 					  maxbucket, highmask, lowmask);
 
-	/* Release bucket locks, allowing others to access them */
-	_hash_droplock(rel, start_oblkno, HASH_EXCLUSIVE);
-	_hash_droplock(rel, start_nblkno, HASH_EXCLUSIVE);
-
 	return;
 
 	/* Here if decide not to split or fail to acquire old bucket lock */
@@ -745,6 +704,10 @@ _hash_alloc_buckets(Relation rel, BlockNumber firstblock, uint32 nblocks)
  * The buffer is returned in the same state.  (The metapage is only
  * touched if it becomes necessary to add or remove overflow pages.)
  *
+ * Split needs to hold pin on primary bucket pages of both old and new
+ * buckets till end of operation.  This is to prevent vacuum to start
+ * when split is in progress.
+ *
  * In addition, the caller must have created the new bucket's base page,
  * which is passed in buffer nbuf, pinned and write-locked.  That lock and
  * pin are released here.  (The API is set up this way because we must do
@@ -756,37 +719,46 @@ _hash_splitbucket(Relation rel,
 				  Buffer metabuf,
 				  Bucket obucket,
 				  Bucket nbucket,
-				  BlockNumber start_oblkno,
+				  Buffer obuf,
 				  Buffer nbuf,
 				  uint32 maxbucket,
 				  uint32 highmask,
 				  uint32 lowmask)
 {
-	Buffer		obuf;
+	Buffer		bucket_obuf;
+	Buffer		bucket_nbuf;
 	Page		opage;
 	Page		npage;
 	HashPageOpaque oopaque;
 	HashPageOpaque nopaque;
+	HashPageOpaque bucket_nopaque;
 
-	/*
-	 * It should be okay to simultaneously write-lock pages from each bucket,
-	 * since no one else can be trying to acquire buffer lock on pages of
-	 * either bucket.
-	 */
-	obuf = _hash_getbuf(rel, start_oblkno, HASH_WRITE, LH_BUCKET_PAGE);
+	bucket_nbuf = nbuf;
+	bucket_obuf = obuf;
 	opage = BufferGetPage(obuf);
 	oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
 
+	/*
+	 * Mark the old bucket to indicate that it has deletable tuples. Vacuum
+	 * will clear this flag after deleting such tuples.
+	 */
+	oopaque->hasho_flag |= LH_BUCKET_PAGE_HAS_GARBAGE;
+
 	npage = BufferGetPage(nbuf);
 
-	/* initialize the new bucket's primary page */
+	/*
+	 * initialize the new bucket's primary page and mark it to indicate that
+	 * split is in progress.
+	 */
 	nopaque = (HashPageOpaque) PageGetSpecialPointer(npage);
 	nopaque->hasho_prevblkno = InvalidBlockNumber;
 	nopaque->hasho_nextblkno = InvalidBlockNumber;
 	nopaque->hasho_bucket = nbucket;
-	nopaque->hasho_flag = LH_BUCKET_PAGE;
+	nopaque->hasho_flag = LH_BUCKET_PAGE | LH_BUCKET_PAGE_SPLIT;
 	nopaque->hasho_page_id = HASHO_PAGE_ID;
 
+	bucket_nopaque = nopaque;
+
 	/*
 	 * Partition the tuples in the old bucket between the old bucket and the
 	 * new bucket, advancing along the old bucket's overflow bucket chain and
@@ -798,8 +770,6 @@ _hash_splitbucket(Relation rel,
 		BlockNumber oblkno;
 		OffsetNumber ooffnum;
 		OffsetNumber omaxoffnum;
-		OffsetNumber deletable[MaxOffsetNumber];
-		int			ndeletable = 0;
 
 		/* Scan each tuple in old page */
 		omaxoffnum = PageGetMaxOffsetNumber(opage);
@@ -822,6 +792,18 @@ _hash_splitbucket(Relation rel,
 
 			if (bucket == nbucket)
 			{
+				Size		itupsize = 0;
+
+				/*
+				 * mark the index tuple as moved by split, such tuples are
+				 * skipped by scan if there is split in progress for a primary
+				 * bucket.
+				 */
+				itupsize = itup->t_info & INDEX_SIZE_MASK;
+				itup->t_info &= ~INDEX_SIZE_MASK;
+				itup->t_info |= INDEX_MOVED_BY_SPLIT_MASK;
+				itup->t_info |= itupsize;
+
 				/*
 				 * insert the tuple into the new bucket.  if it doesn't fit on
 				 * the current page in the new bucket, we must allocate a new
@@ -840,9 +822,10 @@ _hash_splitbucket(Relation rel,
 					/* write out nbuf and drop lock, but keep pin */
 					_hash_chgbufaccess(rel, nbuf, HASH_WRITE, HASH_NOLOCK);
 					/* chain to a new overflow page */
-					nbuf = _hash_addovflpage(rel, metabuf, nbuf);
+					nbuf = _hash_addovflpage(rel, metabuf, nbuf,
+						nopaque->hasho_flag & LH_BUCKET_PAGE ? true : false);
 					npage = BufferGetPage(nbuf);
-					/* we don't need nopaque within the loop */
+					nopaque = (HashPageOpaque) PageGetSpecialPointer(npage);
 				}
 
 				/*
@@ -853,11 +836,6 @@ _hash_splitbucket(Relation rel,
 				 * the new page and qsort them before insertion.
 				 */
 				(void) _hash_pgaddtup(rel, nbuf, itemsz, itup);
-
-				/*
-				 * Mark tuple for deletion from old page.
-				 */
-				deletable[ndeletable++] = ooffnum;
 			}
 			else
 			{
@@ -870,15 +848,9 @@ _hash_splitbucket(Relation rel,
 
 		oblkno = oopaque->hasho_nextblkno;
 
-		/*
-		 * Done scanning this old page.  If we moved any tuples, delete them
-		 * from the old page.
-		 */
-		if (ndeletable > 0)
-		{
-			PageIndexMultiDelete(opage, deletable, ndeletable);
-			_hash_wrtbuf(rel, obuf);
-		}
+		/* retain the pin on the old primary bucket */
+		if (obuf == bucket_obuf)
+			_hash_chgbufaccess(rel, obuf, HASH_READ, HASH_NOLOCK);
 		else
 			_hash_relbuf(rel, obuf);
 
@@ -887,18 +859,24 @@ _hash_splitbucket(Relation rel,
 			break;
 
 		/* Else, advance to next old page */
-		obuf = _hash_getbuf(rel, oblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
+		obuf = _hash_getbuf(rel, oblkno, HASH_READ, LH_OVERFLOW_PAGE);
 		opage = BufferGetPage(obuf);
 		oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
 	}
 
+	/* indicate that split is finished */
+	bucket_nopaque->hasho_flag &= ~LH_BUCKET_PAGE_SPLIT;
+
+	/* release the pin on the old primary bucket */
+	_hash_dropbuf(rel, bucket_obuf);
+
 	/*
 	 * We're at the end of the old bucket chain, so we're done partitioning
-	 * the tuples.  Before quitting, call _hash_squeezebucket to ensure the
-	 * tuples remaining in the old bucket (including the overflow pages) are
-	 * packed as tightly as possible.  The new bucket is already tight.
+	 * the tuples.
 	 */
 	_hash_wrtbuf(rel, nbuf);
 
-	_hash_squeezebucket(rel, obucket, start_oblkno, NULL);
+	/* release the pin on the new primary bucket */
+	if (!(nopaque->hasho_flag & LH_BUCKET_PAGE))
+		_hash_dropbuf(rel, bucket_nbuf);
 }
diff --git a/src/backend/access/hash/hashsearch.c b/src/backend/access/hash/hashsearch.c
index 4825558..b73559a 100644
--- a/src/backend/access/hash/hashsearch.c
+++ b/src/backend/access/hash/hashsearch.c
@@ -20,6 +20,7 @@
 #include "pgstat.h"
 #include "utils/rel.h"
 
+static BlockNumber _hash_get_oldblk(Relation rel, HashPageOpaque opaque);
 
 /*
  *	_hash_next() -- Get the next item in a scan.
@@ -72,7 +73,23 @@ _hash_readnext(Relation rel,
 	BlockNumber blkno;
 
 	blkno = (*opaquep)->hasho_nextblkno;
-	_hash_relbuf(rel, *bufp);
+
+	/*
+	 * Retain the pin on primary bucket page till the end of scan to ensure
+	 * that Vacuum can't delete the tuples (that are moved by split to new
+	 * bucket) which are required by the scans that are started on splitted
+	 * buckets before a new bucket's split in progress flag
+	 * (LH_BUCKET_PAGE_SPLIT) is cleared.  Now the requirement to retain a pin
+	 * on primary bucket can be relaxed for buckets that are not splitted by
+	 * maintaining a flag like has_garbage in bucket but still we need to
+	 * retain pin for squeeze phase otherwise the movement of tuples could
+	 * lead to change the ordering of scan results.
+	 */
+	if ((*opaquep)->hasho_flag & LH_BUCKET_PAGE)
+		_hash_chgbufaccess(rel, *bufp, HASH_READ, HASH_NOLOCK);
+	else
+		_hash_relbuf(rel, *bufp);
+
 	*bufp = InvalidBuffer;
 	/* check for interrupts while we're not holding any buffer lock */
 	CHECK_FOR_INTERRUPTS();
@@ -94,7 +111,23 @@ _hash_readprev(Relation rel,
 	BlockNumber blkno;
 
 	blkno = (*opaquep)->hasho_prevblkno;
-	_hash_relbuf(rel, *bufp);
+
+	/*
+	 * Retain the pin on primary bucket page till the end of scan to ensure
+	 * that Vacuum can't delete the tuples (that are moved by split to new
+	 * bucket) which are required by the scans that are started on splitted
+	 * buckets before a new bucket's split in progress flag
+	 * (LH_BUCKET_PAGE_SPLIT) is cleared.  Now the requirement to retain a pin
+	 * on primary bucket can be relaxed for buckets that are not splitted by
+	 * maintaining a flag like has_garbage in bucket but still we need to
+	 * retain pin for squeeze phase otherwise the movemenet of tuples could
+	 * lead to change the ordering of scan results.
+	 */
+	if ((*opaquep)->hasho_flag & LH_BUCKET_PAGE)
+		_hash_chgbufaccess(rel, *bufp, HASH_READ, HASH_NOLOCK);
+	else
+		_hash_relbuf(rel, *bufp);
+
 	*bufp = InvalidBuffer;
 	/* check for interrupts while we're not holding any buffer lock */
 	CHECK_FOR_INTERRUPTS();
@@ -125,8 +158,6 @@ _hash_first(IndexScanDesc scan, ScanDirection dir)
 	uint32		hashkey;
 	Bucket		bucket;
 	BlockNumber blkno;
-	BlockNumber oldblkno = InvalidBuffer;
-	bool		retry = false;
 	Buffer		buf;
 	Buffer		metabuf;
 	Page		page;
@@ -192,52 +223,21 @@ _hash_first(IndexScanDesc scan, ScanDirection dir)
 	metap = HashPageGetMeta(page);
 
 	/*
-	 * Loop until we get a lock on the correct target bucket.
+	 * Compute the target bucket number, and convert to block number.
 	 */
-	for (;;)
-	{
-		/*
-		 * Compute the target bucket number, and convert to block number.
-		 */
-		bucket = _hash_hashkey2bucket(hashkey,
-									  metap->hashm_maxbucket,
-									  metap->hashm_highmask,
-									  metap->hashm_lowmask);
-
-		blkno = BUCKET_TO_BLKNO(metap, bucket);
-
-		/* Release metapage lock, but keep pin. */
-		_hash_chgbufaccess(rel, metabuf, HASH_READ, HASH_NOLOCK);
-
-		/*
-		 * If the previous iteration of this loop locked what is still the
-		 * correct target bucket, we are done.  Otherwise, drop any old lock
-		 * and lock what now appears to be the correct bucket.
-		 */
-		if (retry)
-		{
-			if (oldblkno == blkno)
-				break;
-			_hash_droplock(rel, oldblkno, HASH_SHARE);
-		}
-		_hash_getlock(rel, blkno, HASH_SHARE);
+	bucket = _hash_hashkey2bucket(hashkey,
+								  metap->hashm_maxbucket,
+								  metap->hashm_highmask,
+								  metap->hashm_lowmask);
 
-		/*
-		 * Reacquire metapage lock and check that no bucket split has taken
-		 * place while we were awaiting the bucket lock.
-		 */
-		_hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_READ);
-		oldblkno = blkno;
-		retry = true;
-	}
+	blkno = BUCKET_TO_BLKNO(metap, bucket);
 
 	/* done with the metapage */
-	_hash_dropbuf(rel, metabuf);
+	_hash_relbuf(rel, metabuf);
 
 	/* Update scan opaque state to show we have lock on the bucket */
 	so->hashso_bucket = bucket;
 	so->hashso_bucket_valid = true;
-	so->hashso_bucket_blkno = blkno;
 
 	/* Fetch the primary bucket page for the bucket */
 	buf = _hash_getbuf(rel, blkno, HASH_READ, LH_BUCKET_PAGE);
@@ -245,6 +245,54 @@ _hash_first(IndexScanDesc scan, ScanDirection dir)
 	opaque = (HashPageOpaque) PageGetSpecialPointer(page);
 	Assert(opaque->hasho_bucket == bucket);
 
+	so->hashso_bucket_buf = buf;
+
+	/*
+	 * If the bucket split is in progress, then we need to skip tuples that
+	 * are moved from old bucket.  To ensure that vacuum doesn't clean any
+	 * tuples from old or new buckets till this scan is in progress, maintain
+	 * a pin on both of the buckets.  Here, we have to be cautious about lock
+	 * ordering, first acquire the lock on old bucket, release the lock on old
+	 * bucket, but not pin, then acuire the lock on new bucket and again
+	 * re-verify whether the bucket split still is in progress. Acquiring lock
+	 * on old bucket first ensures that the vacuum waits for this scan to
+	 * finish.
+	 */
+	if (opaque->hasho_flag & LH_BUCKET_PAGE_SPLIT)
+	{
+		BlockNumber old_blkno;
+		Buffer		old_buf;
+
+		old_blkno = _hash_get_oldblk(rel, opaque);
+
+		/*
+		 * release the lock on new bucket and re-acquire it after acquiring
+		 * the lock on old bucket.
+		 */
+		_hash_relbuf(rel, buf);
+
+		old_buf = _hash_getbuf(rel, old_blkno, HASH_READ, LH_BUCKET_PAGE);
+
+		/*
+		 * remember the old bucket buffer so as to release it later.
+		 */
+		so->hashso_old_bucket_buf = buf;
+		_hash_chgbufaccess(rel, old_buf, HASH_READ, HASH_NOLOCK);
+
+		buf = _hash_getbuf(rel, blkno, HASH_READ, LH_BUCKET_PAGE);
+		page = BufferGetPage(buf);
+		opaque = (HashPageOpaque) PageGetSpecialPointer(page);
+		Assert(opaque->hasho_bucket == bucket);
+
+		if (opaque->hasho_flag & LH_BUCKET_PAGE_SPLIT)
+			so->hashso_skip_moved_tuples = true;
+		else
+		{
+			_hash_dropbuf(rel, so->hashso_old_bucket_buf);
+			so->hashso_old_bucket_buf = InvalidBuffer;
+		}
+	}
+
 	/* If a backwards scan is requested, move to the end of the chain */
 	if (ScanDirectionIsBackward(dir))
 	{
@@ -273,6 +321,13 @@ _hash_first(IndexScanDesc scan, ScanDirection dir)
  *		false.  Else, return true and set the hashso_curpos for the
  *		scan to the right thing.
  *
+ *		Here we also scan the old bucket if the split for current bucket
+ *		was in progress at the start of scan.  The basic idea is that
+ *		skip the tuples that are moved by split while scanning current
+ *		bucket and then scan the old bucket to cover all such tuples. This
+ *		is done ensure that we don't miss any tuples in the current scan
+ *		when split was in progress.
+ *
  *		'bufP' points to the current buffer, which is pinned and read-locked.
  *		On success exit, we have pin and read-lock on whichever page
  *		contains the right item; on failure, we have released all buffers.
@@ -338,6 +393,16 @@ _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
 					{
 						Assert(offnum >= FirstOffsetNumber);
 						itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
+
+						/*
+						 * skip the tuples that are moved by split operation
+						 * for the scan that has started when split was in
+						 * progress
+						 */
+						if (so->hashso_skip_moved_tuples &&
+							(itup->t_info & INDEX_MOVED_BY_SPLIT_MASK))
+							continue;
+
 						if (so->hashso_sk_hash == _hash_get_indextuple_hashkey(itup))
 							break;		/* yes, so exit for-loop */
 					}
@@ -353,9 +418,52 @@ _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
 					}
 					else
 					{
-						/* end of bucket */
-						itup = NULL;
-						break;	/* exit for-loop */
+						/*
+						 * end of bucket, scan old bucket if there was a split
+						 * in progress at the start of scan.
+						 */
+						if (so->hashso_skip_moved_tuples)
+						{
+							page = BufferGetPage(so->hashso_bucket_buf);
+							opaque = (HashPageOpaque) PageGetSpecialPointer(page);
+
+							blkno = _hash_get_oldblk(rel, opaque);
+
+							Assert(BlockNumberIsValid(blkno));
+							buf = _hash_getbuf(rel, blkno, HASH_READ, LH_BUCKET_PAGE);
+
+							/*
+							 * remember the old bucket buffer so as to release
+							 * the pin at end of scan.  If this scan already
+							 * has a pin on old buffer, then release it as one
+							 * pin is sufficient to hold-off vacuum to clean
+							 * the bucket where scan is in progress.
+							 */
+							if (BufferIsValid(so->hashso_old_bucket_buf))
+							{
+								_hash_dropbuf(rel, so->hashso_old_bucket_buf);
+								so->hashso_old_bucket_buf = InvalidBuffer;
+							}
+							so->hashso_old_bucket_buf = buf;
+
+							page = BufferGetPage(buf);
+							maxoff = PageGetMaxOffsetNumber(page);
+							offnum = _hash_binsearch(page, so->hashso_sk_hash);
+
+							/*
+							 * setting hashso_skip_moved_tuples to false
+							 * ensures that we don't check for tuples that are
+							 * moved by split in old bucket and it also
+							 * ensures that we won't retry to scan the old
+							 * bucket once the scan for same is finished.
+							 */
+							so->hashso_skip_moved_tuples = false;
+						}
+						else
+						{
+							itup = NULL;
+							break;		/* exit for-loop */
+						}
 					}
 				}
 				break;
@@ -379,6 +487,16 @@ _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
 					{
 						Assert(offnum <= maxoff);
 						itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
+
+						/*
+						 * skip the tuples that are moved by split operation
+						 * for the scan that has started when split was in
+						 * progress
+						 */
+						if (so->hashso_skip_moved_tuples &&
+							(itup->t_info & INDEX_MOVED_BY_SPLIT_MASK))
+							continue;
+
 						if (so->hashso_sk_hash == _hash_get_indextuple_hashkey(itup))
 							break;		/* yes, so exit for-loop */
 					}
@@ -394,9 +512,64 @@ _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
 					}
 					else
 					{
-						/* end of bucket */
-						itup = NULL;
-						break;	/* exit for-loop */
+						/*
+						 * end of bucket, scan old bucket if there was a split
+						 * in progress at the start of scan.
+						 */
+						if (so->hashso_skip_moved_tuples)
+						{
+							page = BufferGetPage(so->hashso_bucket_buf);
+							opaque = (HashPageOpaque) PageGetSpecialPointer(page);
+
+							blkno = _hash_get_oldblk(rel, opaque);
+
+							/* read the old page */
+							Assert(BlockNumberIsValid(blkno));
+							buf = _hash_getbuf(rel, blkno, HASH_READ, LH_BUCKET_PAGE);
+
+							/*
+							 * remember the old bucket buffer so as to release
+							 * the pin at end of scan.  If this scan already
+							 * has a pin on old buffer, then release it as one
+							 * pin is sufficient to hold-off vacuum to clean
+							 * the bucket where scan is in progress.
+							 */
+							if (BufferIsValid(so->hashso_old_bucket_buf))
+							{
+								_hash_dropbuf(rel, so->hashso_old_bucket_buf);
+								so->hashso_old_bucket_buf = InvalidBuffer;
+							}
+							so->hashso_old_bucket_buf = buf;
+
+							page = BufferGetPage(buf);
+
+							opaque = (HashPageOpaque) PageGetSpecialPointer(page);
+
+							/*
+							 * For backward scan, we need to start scan from
+							 * the last overflow page of old bucket till
+							 * primary bucket page.
+							 */
+							while (BlockNumberIsValid(opaque->hasho_nextblkno))
+								_hash_readnext(rel, &buf, &page, &opaque);
+
+							maxoff = PageGetMaxOffsetNumber(page);
+							offnum = _hash_binsearch(page, so->hashso_sk_hash);
+
+							/*
+							 * setting hashso_skip_moved_tuples to false
+							 * ensures that we don't check for tuples that are
+							 * moved by split in old bucket and it also
+							 * ensures that we won't retry to scan the old
+							 * bucket once the scan for same is finished.
+							 */
+							so->hashso_skip_moved_tuples = false;
+						}
+						else
+						{
+							itup = NULL;
+							break;		/* exit for-loop */
+						}
 					}
 				}
 				break;
@@ -425,3 +598,39 @@ _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
 	ItemPointerSet(current, blkno, offnum);
 	return true;
 }
+
+/*
+ *	_hash_get_oldblk() -- get the block number from which current bucket
+ *			is being splitted.
+ */
+static BlockNumber
+_hash_get_oldblk(Relation rel, HashPageOpaque opaque)
+{
+	Bucket		curr_bucket;
+	Bucket		old_bucket;
+	uint32		mask;
+	Buffer		metabuf;
+	HashMetaPage metap;
+	BlockNumber blkno;
+
+	/*
+	 * To get the old bucket from the current bucket, we need a mask to modulo
+	 * into lower half of table.  This mask is stored in meta page as
+	 * hashm_lowmask, but here we can't rely on the same, because we need a
+	 * value of lowmask that was prevalent at the time when bucket split was
+	 * started.  lowmask is always equal to last bucket number in lower half
+	 * of the table which can be calculate from current bucket.
+	 */
+	curr_bucket = opaque->hasho_bucket;
+	mask = (((uint32) 1) << _hash_log2((uint32) curr_bucket) / 2) - 1;
+	old_bucket = curr_bucket & mask;
+
+	metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
+	metap = HashPageGetMeta(BufferGetPage(metabuf));
+
+	blkno = BUCKET_TO_BLKNO(metap, old_bucket);
+
+	_hash_relbuf(rel, metabuf);
+
+	return blkno;
+}
diff --git a/src/include/access/hash.h b/src/include/access/hash.h
index fa3f9b6..cd40ed7 100644
--- a/src/include/access/hash.h
+++ b/src/include/access/hash.h
@@ -52,6 +52,8 @@ typedef uint32 Bucket;
 #define LH_BUCKET_PAGE			(1 << 1)
 #define LH_BITMAP_PAGE			(1 << 2)
 #define LH_META_PAGE			(1 << 3)
+#define LH_BUCKET_PAGE_SPLIT	(1 << 4)
+#define LH_BUCKET_PAGE_HAS_GARBAGE	(1 << 5)
 
 typedef struct HashPageOpaqueData
 {
@@ -88,12 +90,6 @@ typedef struct HashScanOpaqueData
 	bool		hashso_bucket_valid;
 
 	/*
-	 * If we have a share lock on the bucket, we record it here.  When
-	 * hashso_bucket_blkno is zero, we have no such lock.
-	 */
-	BlockNumber hashso_bucket_blkno;
-
-	/*
 	 * We also want to remember which buffer we're currently examining in the
 	 * scan. We keep the buffer pinned (but not locked) across hashgettuple
 	 * calls, in order to avoid doing a ReadBuffer() for every tuple in the
@@ -101,11 +97,23 @@ typedef struct HashScanOpaqueData
 	 */
 	Buffer		hashso_curbuf;
 
+	/* remember the buffer associated with primary bucket */
+	Buffer		hashso_bucket_buf;
+
+	/*
+	 * remember the buffer associated with old primary bucket which is
+	 * required during the scan of the bucket for which split is in progress.
+	 */
+	Buffer		hashso_old_bucket_buf;
+
 	/* Current position of the scan, as an index TID */
 	ItemPointerData hashso_curpos;
 
 	/* Current position of the scan, as a heap TID */
 	ItemPointerData hashso_heappos;
+
+	/* Whether scan needs to skip tuples that are moved by split */
+	bool		hashso_skip_moved_tuples;
 } HashScanOpaqueData;
 
 typedef HashScanOpaqueData *HashScanOpaque;
@@ -176,6 +184,8 @@ typedef HashMetaPageData *HashMetaPage;
 				  sizeof(ItemIdData) - \
 				  MAXALIGN(sizeof(HashPageOpaqueData)))
 
+#define INDEX_MOVED_BY_SPLIT_MASK	0x2000
+
 #define HASH_MIN_FILLFACTOR			10
 #define HASH_DEFAULT_FILLFACTOR		75
 
@@ -299,19 +309,17 @@ extern OffsetNumber _hash_pgaddtup(Relation rel, Buffer buf,
 			   Size itemsize, IndexTuple itup);
 
 /* hashovfl.c */
-extern Buffer _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf);
+extern Buffer _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf, bool retain_pin);
 extern BlockNumber _hash_freeovflpage(Relation rel, Buffer ovflbuf,
 				   BufferAccessStrategy bstrategy);
 extern void _hash_initbitmap(Relation rel, HashMetaPage metap,
 				 BlockNumber blkno, ForkNumber forkNum);
 extern void _hash_squeezebucket(Relation rel,
 					Bucket bucket, BlockNumber bucket_blkno,
+					Buffer bucket_buf,
 					BufferAccessStrategy bstrategy);
 
 /* hashpage.c */
-extern void _hash_getlock(Relation rel, BlockNumber whichlock, int access);
-extern bool _hash_try_getlock(Relation rel, BlockNumber whichlock, int access);
-extern void _hash_droplock(Relation rel, BlockNumber whichlock, int access);
 extern Buffer _hash_getbuf(Relation rel, BlockNumber blkno,
 			 int access, int flags);
 extern Buffer _hash_getinitbuf(Relation rel, BlockNumber blkno);