From 7fb80c97650a58e9cf15bb8c6a09e9b034471af4 Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Thu, 12 Jul 2018 13:14:02 +1200 Subject: [PATCH] Use pread()/pwrite() instead of lseek() + read()/write(). Cut down on system calls by doing random IO using POSIX.1-2008 offset-based IO routines, where available. Remove the code for tracking the 'virtual' seek position. The only reason left to call FileSeek() was to get the file's size, so provide a new function FileSize() instead. Author: Oskari Saarenmaa, Thomas Munro Reviewed-by: Thomas Munro, Jesper Pedersen Discussion: https://postgr.es/m/CAEepm=02rapCpPR3ZGF2vW=SBHSdFYO_bz_f-wwWJonmA3APgw@mail.gmail.com Discussion: https://postgr.es/m/b8748d39-0b19-0514-a1b9-4e5a28e6a208%40gmail.com Discussion: https://postgr.es/m/a86bd200-ebbe-d829-e3ca-0c4474b2fcb7%40ohmu.fi --- configure | 2 +- configure.in | 2 +- src/backend/access/heap/rewriteheap.c | 2 +- src/backend/access/transam/xlog.c | 13 ++ src/backend/storage/file/buffile.c | 46 +----- src/backend/storage/file/fd.c | 217 +++++--------------------- src/backend/storage/smgr/md.c | 35 +---- src/include/pg_config.h.in | 6 + src/include/storage/fd.h | 12 +- 9 files changed, 72 insertions(+), 263 deletions(-) diff --git a/configure b/configure index 6414ec1ea6d..d026ba75248 100755 --- a/configure +++ b/configure @@ -15100,7 +15100,7 @@ fi LIBS_including_readline="$LIBS" LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'` -for ac_func in cbrt clock_gettime fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll posix_fallocate ppoll pstat pthread_is_threaded_np readlink setproctitle setproctitle_fast setsid shm_open symlink sync_file_range utime utimes wcstombs_l +for ac_func in cbrt clock_gettime fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll posix_fallocate ppoll pread pstat pthread_is_threaded_np pwrite readlink setproctitle setproctitle_fast setsid shm_open symlink sync_file_range utime utimes wcstombs_l do : as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" diff --git a/configure.in b/configure.in index 158d5a1ac82..a598e5be04c 100644 --- a/configure.in +++ b/configure.in @@ -1571,7 +1571,7 @@ PGAC_FUNC_WCSTOMBS_L LIBS_including_readline="$LIBS" LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'` -AC_CHECK_FUNCS([cbrt clock_gettime fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll posix_fallocate ppoll pstat pthread_is_threaded_np readlink setproctitle setproctitle_fast setsid shm_open symlink sync_file_range utime utimes wcstombs_l]) +AC_CHECK_FUNCS([cbrt clock_gettime fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll posix_fallocate ppoll pread pstat pthread_is_threaded_np pwrite readlink setproctitle setproctitle_fast setsid shm_open symlink sync_file_range utime utimes wcstombs_l]) AC_REPLACE_FUNCS(fseeko) case $host_os in diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c index 85f92973c95..5f573bafda6 100644 --- a/src/backend/access/heap/rewriteheap.c +++ b/src/backend/access/heap/rewriteheap.c @@ -922,7 +922,7 @@ logical_heap_rewrite_flush_mappings(RewriteState state) * Note that we deviate from the usual WAL coding practices here, * check the above "Logical rewrite support" comment for reasoning. */ - written = FileWrite(src->vfd, waldata_start, len, + written = FileWrite(src->vfd, waldata_start, len, src->off, WAIT_EVENT_LOGICAL_REWRITE_WRITE); if (written != len) ereport(ERROR, diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 5abaeb005b3..157c8465bd0 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -2484,6 +2484,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) Size nleft; int written; +#ifndef HAVE_PWRITE /* Need to seek in the file? */ if (openLogOff != startoffset) { @@ -2495,6 +2496,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) startoffset))); openLogOff = startoffset; } +#endif /* OK to write the page(s) */ from = XLogCtl->pages + startidx * (Size) XLOG_BLCKSZ; @@ -2504,7 +2506,11 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) { errno = 0; pgstat_report_wait_start(WAIT_EVENT_WAL_WRITE); +#ifdef HAVE_PWRITE + written = pwrite(openLogFile, from, nleft, startoffset); +#else written = write(openLogFile, from, nleft); +#endif pgstat_report_wait_end(); if (written <= 0) { @@ -2519,6 +2525,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) } nleft -= written; from += written; + startoffset += written; } while (nleft > 0); /* Update state for write */ @@ -11819,6 +11826,7 @@ retry: /* Read the requested page */ readOff = targetPageOff; +#ifndef HAVE_PREAD if (lseek(readFile, (off_t) readOff, SEEK_SET) < 0) { char fname[MAXFNAMELEN]; @@ -11832,9 +11840,14 @@ retry: fname, readOff))); goto next_record_is_invalid; } +#endif pgstat_report_wait_start(WAIT_EVENT_WAL_READ); +#ifdef HAVE_PREAD + r = pread(readFile, readBuf, XLOG_BLCKSZ, (off_t) readOff); +#else r = read(readFile, readBuf, XLOG_BLCKSZ); +#endif if (r != XLOG_BLCKSZ) { char fname[MAXFNAMELEN]; diff --git a/src/backend/storage/file/buffile.c b/src/backend/storage/file/buffile.c index e93813d9737..dd687dfe71f 100644 --- a/src/backend/storage/file/buffile.c +++ b/src/backend/storage/file/buffile.c @@ -67,12 +67,6 @@ struct BufFile int numFiles; /* number of physical files in set */ /* all files except the last have length exactly MAX_PHYSICAL_FILESIZE */ File *files; /* palloc'd array with numFiles entries */ - off_t *offsets; /* palloc'd array with numFiles entries */ - - /* - * offsets[i] is the current seek position of files[i]. We use this to - * avoid making redundant FileSeek calls. - */ bool isInterXact; /* keep open over transactions? */ bool dirty; /* does buffer need to be written? */ @@ -116,7 +110,6 @@ makeBufFileCommon(int nfiles) BufFile *file = (BufFile *) palloc(sizeof(BufFile)); file->numFiles = nfiles; - file->offsets = (off_t *) palloc0(sizeof(off_t) * nfiles); file->isInterXact = false; file->dirty = false; file->resowner = CurrentResourceOwner; @@ -170,10 +163,7 @@ extendBufFile(BufFile *file) file->files = (File *) repalloc(file->files, (file->numFiles + 1) * sizeof(File)); - file->offsets = (off_t *) repalloc(file->offsets, - (file->numFiles + 1) * sizeof(off_t)); file->files[file->numFiles] = pfile; - file->offsets[file->numFiles] = 0L; file->numFiles++; } @@ -396,7 +386,6 @@ BufFileClose(BufFile *file) FileClose(file->files[i]); /* release the buffer space */ pfree(file->files); - pfree(file->offsets); pfree(file); } @@ -422,27 +411,17 @@ BufFileLoadBuffer(BufFile *file) file->curOffset = 0L; } - /* - * May need to reposition physical file. - */ - thisfile = file->files[file->curFile]; - if (file->curOffset != file->offsets[file->curFile]) - { - if (FileSeek(thisfile, file->curOffset, SEEK_SET) != file->curOffset) - return; /* seek failed, read nothing */ - file->offsets[file->curFile] = file->curOffset; - } - /* * Read whatever we can get, up to a full bufferload. */ + thisfile = file->files[file->curFile]; file->nbytes = FileRead(thisfile, file->buffer.data, sizeof(file->buffer), + file->curOffset, WAIT_EVENT_BUFFILE_READ); if (file->nbytes < 0) file->nbytes = 0; - file->offsets[file->curFile] += file->nbytes; /* we choose not to advance curOffset here */ if (file->nbytes > 0) @@ -491,23 +470,14 @@ BufFileDumpBuffer(BufFile *file) if ((off_t) bytestowrite > availbytes) bytestowrite = (int) availbytes; - /* - * May need to reposition physical file. - */ thisfile = file->files[file->curFile]; - if (file->curOffset != file->offsets[file->curFile]) - { - if (FileSeek(thisfile, file->curOffset, SEEK_SET) != file->curOffset) - return; /* seek failed, give up */ - file->offsets[file->curFile] = file->curOffset; - } bytestowrite = FileWrite(thisfile, file->buffer.data + wpos, bytestowrite, + file->curOffset, WAIT_EVENT_BUFFILE_WRITE); if (bytestowrite <= 0) return; /* failed to write */ - file->offsets[file->curFile] += bytestowrite; file->curOffset += bytestowrite; wpos += bytestowrite; @@ -803,11 +773,10 @@ BufFileSize(BufFile *file) { off_t lastFileSize; - /* Get the size of the last physical file by seeking to end. */ - lastFileSize = FileSeek(file->files[file->numFiles - 1], 0, SEEK_END); + /* Get the size of the last physical file. */ + lastFileSize = FileSize(file->files[file->numFiles - 1]); if (lastFileSize < 0) return -1; - file->offsets[file->numFiles - 1] = lastFileSize; return ((file->numFiles - 1) * (off_t) MAX_PHYSICAL_FILESIZE) + lastFileSize; @@ -849,13 +818,8 @@ BufFileAppend(BufFile *target, BufFile *source) target->files = (File *) repalloc(target->files, sizeof(File) * newNumFiles); - target->offsets = (off_t *) - repalloc(target->offsets, sizeof(off_t) * newNumFiles); for (i = target->numFiles; i < newNumFiles; i++) - { target->files[i] = source->files[i - target->numFiles]; - target->offsets[i] = source->offsets[i - target->numFiles]; - } target->numFiles = newNumFiles; return startBlock; diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index 8dd51f17674..a380f794014 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -16,8 +16,8 @@ * including base tables, scratch files (e.g., sort and hash spool * files), and random calls to C library routines like system(3); it * is quite easy to exceed system limits on the number of open files a - * single process can have. (This is around 256 on many modern - * operating systems, but can be as low as 32 on others.) + * single process can have. (This is around 1024 on many modern + * operating systems, but may be lower on others.) * * VFDs are managed as an LRU pool, with actual OS file descriptors * being opened and closed as needed. Obviously, if a routine is @@ -167,15 +167,6 @@ int max_safe_fds = 32; /* default if not changed */ #define FileIsNotOpen(file) (VfdCache[file].fd == VFD_CLOSED) -/* - * Note: a VFD's seekPos is normally always valid, but if for some reason - * an lseek() fails, it might become set to FileUnknownPos. We can struggle - * along without knowing the seek position in many cases, but in some places - * we have to fail if we don't have it. - */ -#define FileUnknownPos ((off_t) -1) -#define FilePosIsUnknown(pos) ((pos) < 0) - /* these are the assigned bits in fdstate below: */ #define FD_DELETE_AT_CLOSE (1 << 0) /* T = delete when closed */ #define FD_CLOSE_AT_EOXACT (1 << 1) /* T = close at eoXact */ @@ -189,7 +180,6 @@ typedef struct vfd File nextFree; /* link to next free VFD, if in freelist */ File lruMoreRecently; /* doubly linked recency-of-use list */ File lruLessRecently; - off_t seekPos; /* current logical file position, or -1 */ off_t fileSize; /* current size of file (0 if not temporary) */ char *fileName; /* name of file, or NULL for unused VFD */ /* NB: fileName is malloc'd, and must be free'd when closing the VFD */ @@ -407,9 +397,7 @@ pg_fdatasync(int fd) /* * pg_flush_data --- advise OS that the described dirty data should be flushed * - * offset of 0 with nbytes 0 means that the entire file should be flushed; - * in this case, this function may have side-effects on the file's - * seek position! + * offset of 0 with nbytes 0 means that the entire file should be flushed */ void pg_flush_data(int fd, off_t offset, off_t nbytes) @@ -1029,22 +1017,6 @@ LruDelete(File file) vfdP = &VfdCache[file]; - /* - * Normally we should know the seek position, but if for some reason we - * have lost track of it, try again to get it. If we still can't get it, - * we have a problem: we will be unable to restore the file seek position - * when and if the file is re-opened. But we can't really throw an error - * and refuse to close the file, or activities such as transaction cleanup - * will be broken. - */ - if (FilePosIsUnknown(vfdP->seekPos)) - { - vfdP->seekPos = lseek(vfdP->fd, (off_t) 0, SEEK_CUR); - if (FilePosIsUnknown(vfdP->seekPos)) - elog(LOG, "could not seek file \"%s\" before closing: %m", - vfdP->fileName); - } - /* * Close the file. We aren't expecting this to fail; if it does, better * to leak the FD than to mess up our internal state. @@ -1113,33 +1085,6 @@ LruInsert(File file) { ++nfile; } - - /* - * Seek to the right position. We need no special case for seekPos - * equal to FileUnknownPos, as lseek() will certainly reject that - * (thus completing the logic noted in LruDelete() that we will fail - * to re-open a file if we couldn't get its seek position before - * closing). - */ - if (vfdP->seekPos != (off_t) 0) - { - if (lseek(vfdP->fd, vfdP->seekPos, SEEK_SET) < 0) - { - /* - * If we fail to restore the seek position, treat it like an - * open() failure. - */ - int save_errno = errno; - - elog(LOG, "could not seek file \"%s\" after re-opening: %m", - vfdP->fileName); - (void) close(vfdP->fd); - vfdP->fd = VFD_CLOSED; - --nfile; - errno = save_errno; - return -1; - } - } } /* @@ -1406,7 +1351,6 @@ PathNameOpenFilePerm(const char *fileName, int fileFlags, mode_t fileMode) /* Saved flags are adjusted to be OK for re-opening file */ vfdP->fileFlags = fileFlags & ~(O_CREAT | O_TRUNC | O_EXCL); vfdP->fileMode = fileMode; - vfdP->seekPos = 0; vfdP->fileSize = 0; vfdP->fdstate = 0x0; vfdP->resowner = NULL; @@ -1820,7 +1764,6 @@ FileClose(File file) /* * FilePrefetch - initiate asynchronous read of a given range of the file. - * The logical seek position is unaffected. * * Currently the only implementation of this function is using posix_fadvise * which is the simplest standardized interface that accomplishes this. @@ -1867,10 +1810,6 @@ FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info) file, VfdCache[file].fileName, (int64) offset, (int64) nbytes)); - /* - * Caution: do not call pg_flush_data with nbytes = 0, it could trash the - * file's seek position. We prefer to define that as a no-op here. - */ if (nbytes <= 0) return; @@ -1884,7 +1823,8 @@ FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info) } int -FileRead(File file, char *buffer, int amount, uint32 wait_event_info) +FileRead(File file, char *buffer, int amount, off_t offset, + uint32 wait_event_info) { int returnCode; Vfd *vfdP; @@ -1893,7 +1833,7 @@ FileRead(File file, char *buffer, int amount, uint32 wait_event_info) DO_DB(elog(LOG, "FileRead: %d (%s) " INT64_FORMAT " %d %p", file, VfdCache[file].fileName, - (int64) VfdCache[file].seekPos, + (int64) offset, amount, buffer)); returnCode = FileAccess(file); @@ -1904,16 +1844,16 @@ FileRead(File file, char *buffer, int amount, uint32 wait_event_info) retry: pgstat_report_wait_start(wait_event_info); - returnCode = read(vfdP->fd, buffer, amount); +#ifdef HAVE_PREAD + returnCode = pread(vfdP->fd, buffer, amount, offset); +#else + returnCode = lseek(VfdCache[file].fd, offset, SEEK_SET); + if (returnCode >= 0) + returnCode = read(vfdP->fd, buffer, amount); +#endif pgstat_report_wait_end(); - if (returnCode >= 0) - { - /* if seekPos is unknown, leave it that way */ - if (!FilePosIsUnknown(vfdP->seekPos)) - vfdP->seekPos += returnCode; - } - else + if (returnCode < 0) { /* * Windows may run out of kernel buffers and return "Insufficient @@ -1939,16 +1879,14 @@ retry: /* OK to retry if interrupted */ if (errno == EINTR) goto retry; - - /* Trouble, so assume we don't know the file position anymore */ - vfdP->seekPos = FileUnknownPos; } return returnCode; } int -FileWrite(File file, char *buffer, int amount, uint32 wait_event_info) +FileWrite(File file, char *buffer, int amount, off_t offset, + uint32 wait_event_info) { int returnCode; Vfd *vfdP; @@ -1957,7 +1895,7 @@ FileWrite(File file, char *buffer, int amount, uint32 wait_event_info) DO_DB(elog(LOG, "FileWrite: %d (%s) " INT64_FORMAT " %d %p", file, VfdCache[file].fileName, - (int64) VfdCache[file].seekPos, + (int64) offset, amount, buffer)); returnCode = FileAccess(file); @@ -1976,26 +1914,13 @@ FileWrite(File file, char *buffer, int amount, uint32 wait_event_info) */ if (temp_file_limit >= 0 && (vfdP->fdstate & FD_TEMP_FILE_LIMIT)) { - off_t newPos; + off_t past_write = offset + amount; - /* - * Normally we should know the seek position, but if for some reason - * we have lost track of it, try again to get it. Here, it's fine to - * throw an error if we still can't get it. - */ - if (FilePosIsUnknown(vfdP->seekPos)) - { - vfdP->seekPos = lseek(vfdP->fd, (off_t) 0, SEEK_CUR); - if (FilePosIsUnknown(vfdP->seekPos)) - elog(ERROR, "could not seek file \"%s\": %m", vfdP->fileName); - } - - newPos = vfdP->seekPos + amount; - if (newPos > vfdP->fileSize) + if (past_write > vfdP->fileSize) { uint64 newTotal = temporary_files_size; - newTotal += newPos - vfdP->fileSize; + newTotal += past_write - vfdP->fileSize; if (newTotal > (uint64) temp_file_limit * (uint64) 1024) ereport(ERROR, (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED), @@ -2007,7 +1932,13 @@ FileWrite(File file, char *buffer, int amount, uint32 wait_event_info) retry: errno = 0; pgstat_report_wait_start(wait_event_info); - returnCode = write(vfdP->fd, buffer, amount); +#ifdef HAVE_PWRITE + returnCode = pwrite(VfdCache[file].fd, buffer, amount, offset); +#else + returnCode = lseek(VfdCache[file].fd, offset, SEEK_SET); + if (returnCode >= 0) + returnCode = write(VfdCache[file].fd, buffer, amount); +#endif pgstat_report_wait_end(); /* if write didn't set errno, assume problem is no disk space */ @@ -2016,10 +1947,6 @@ retry: if (returnCode >= 0) { - /* if seekPos is unknown, leave it that way */ - if (!FilePosIsUnknown(vfdP->seekPos)) - vfdP->seekPos += returnCode; - /* * Maintain fileSize and temporary_files_size if it's a temp file. * @@ -2029,12 +1956,12 @@ retry: */ if (vfdP->fdstate & FD_TEMP_FILE_LIMIT) { - off_t newPos = vfdP->seekPos; + off_t past_write = offset + amount; - if (newPos > vfdP->fileSize) + if (past_write > vfdP->fileSize) { - temporary_files_size += newPos - vfdP->fileSize; - vfdP->fileSize = newPos; + temporary_files_size += past_write - vfdP->fileSize; + vfdP->fileSize = past_write; } } } @@ -2060,9 +1987,6 @@ retry: /* OK to retry if interrupted */ if (errno == EINTR) goto retry; - - /* Trouble, so assume we don't know the file position anymore */ - vfdP->seekPos = FileUnknownPos; } return returnCode; @@ -2090,92 +2014,25 @@ FileSync(File file, uint32 wait_event_info) } off_t -FileSeek(File file, off_t offset, int whence) +FileSize(File file) { Vfd *vfdP; Assert(FileIsValid(file)); - DO_DB(elog(LOG, "FileSeek: %d (%s) " INT64_FORMAT " " INT64_FORMAT " %d", - file, VfdCache[file].fileName, - (int64) VfdCache[file].seekPos, - (int64) offset, whence)); + DO_DB(elog(LOG, "FileSize %d (%s)", + file, VfdCache[file].fileName)); vfdP = &VfdCache[file]; if (FileIsNotOpen(file)) { - switch (whence) - { - case SEEK_SET: - if (offset < 0) - { - errno = EINVAL; - return (off_t) -1; - } - vfdP->seekPos = offset; - break; - case SEEK_CUR: - if (FilePosIsUnknown(vfdP->seekPos) || - vfdP->seekPos + offset < 0) - { - errno = EINVAL; - return (off_t) -1; - } - vfdP->seekPos += offset; - break; - case SEEK_END: - if (FileAccess(file) < 0) - return (off_t) -1; - vfdP->seekPos = lseek(vfdP->fd, offset, whence); - break; - default: - elog(ERROR, "invalid whence: %d", whence); - break; - } - } - else - { - switch (whence) - { - case SEEK_SET: - if (offset < 0) - { - errno = EINVAL; - return (off_t) -1; - } - if (vfdP->seekPos != offset) - vfdP->seekPos = lseek(vfdP->fd, offset, whence); - break; - case SEEK_CUR: - if (offset != 0 || FilePosIsUnknown(vfdP->seekPos)) - vfdP->seekPos = lseek(vfdP->fd, offset, whence); - break; - case SEEK_END: - vfdP->seekPos = lseek(vfdP->fd, offset, whence); - break; - default: - elog(ERROR, "invalid whence: %d", whence); - break; - } + if (FileAccess(file) < 0) + return (off_t) -1; } - return vfdP->seekPos; -} - -/* - * XXX not actually used but here for completeness - */ -#ifdef NOT_USED -off_t -FileTell(File file) -{ - Assert(FileIsValid(file)); - DO_DB(elog(LOG, "FileTell %d (%s)", - file, VfdCache[file].fileName)); - return VfdCache[file].seekPos; + return lseek(VfdCache[file].fd, 0, SEEK_END); } -#endif int FileTruncate(File file, off_t offset, uint32 wait_event_info) diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index f4374d077be..86013a5c8b2 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -522,22 +522,7 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE); - /* - * Note: because caller usually obtained blocknum by calling mdnblocks, - * which did a seek(SEEK_END), this seek is often redundant and will be - * optimized away by fd.c. It's not redundant, however, if there is a - * partial page at the end of the file. In that case we want to try to - * overwrite the partial page with a full page. It's also not redundant - * if bufmgr.c had to dump another buffer of the same file to make room - * for the new page's buffer. - */ - if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not seek to block %u in file \"%s\": %m", - blocknum, FilePathName(v->mdfd_vfd)))); - - if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, WAIT_EVENT_DATA_FILE_EXTEND)) != BLCKSZ) + if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_EXTEND)) != BLCKSZ) { if (nbytes < 0) ereport(ERROR, @@ -748,13 +733,7 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE); - if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not seek to block %u in file \"%s\": %m", - blocknum, FilePathName(v->mdfd_vfd)))); - - nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ, WAIT_EVENT_DATA_FILE_READ); + nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_READ); TRACE_POSTGRESQL_SMGR_MD_READ_DONE(forknum, blocknum, reln->smgr_rnode.node.spcNode, @@ -824,13 +803,7 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE); - if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not seek to block %u in file \"%s\": %m", - blocknum, FilePathName(v->mdfd_vfd)))); - - nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, WAIT_EVENT_DATA_FILE_WRITE); + nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_WRITE); TRACE_POSTGRESQL_SMGR_MD_WRITE_DONE(forknum, blocknum, reln->smgr_rnode.node.spcNode, @@ -1979,7 +1952,7 @@ _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg) { off_t len; - len = FileSeek(seg->mdfd_vfd, 0L, SEEK_END); + len = FileSize(seg->mdfd_vfd); if (len < 0) ereport(ERROR, (errcode_for_file_access(), diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index 90dda8ea050..1b02ec0ad90 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -438,6 +438,9 @@ /* Define to 1 if you have the `ppoll' function. */ #undef HAVE_PPOLL +/* Define to 1 if you have the `pread' function. */ +#undef HAVE_PREAD + /* Define to 1 if you have the `pstat' function. */ #undef HAVE_PSTAT @@ -453,6 +456,9 @@ /* Have PTHREAD_PRIO_INHERIT. */ #undef HAVE_PTHREAD_PRIO_INHERIT +/* Define to 1 if you have the `pwrite' function. */ +#undef HAVE_PWRITE + /* Define to 1 if you have the `random' function. */ #undef HAVE_RANDOM diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h index 8e7c9728f4b..f8b6fa8ece5 100644 --- a/src/include/storage/fd.h +++ b/src/include/storage/fd.h @@ -15,7 +15,7 @@ /* * calls: * - * File {Close, Read, Write, Seek, Tell, Sync} + * File {Close, Read, Write, Size, Tell, Sync} * {Path Name Open, Allocate, Free} File * * These are NOT JUST RENAMINGS OF THE UNIX ROUTINES. @@ -42,10 +42,6 @@ #include -/* - * FileSeek uses the standard UNIX lseek(2) flags. - */ - typedef int File; @@ -68,10 +64,10 @@ extern File PathNameOpenFilePerm(const char *fileName, int fileFlags, mode_t fil extern File OpenTemporaryFile(bool interXact); extern void FileClose(File file); extern int FilePrefetch(File file, off_t offset, int amount, uint32 wait_event_info); -extern int FileRead(File file, char *buffer, int amount, uint32 wait_event_info); -extern int FileWrite(File file, char *buffer, int amount, uint32 wait_event_info); +extern int FileRead(File file, char *buffer, int amount, off_t offset, uint32 wait_event_info); +extern int FileWrite(File file, char *buffer, int amount, off_t offset, uint32 wait_event_info); extern int FileSync(File file, uint32 wait_event_info); -extern off_t FileSeek(File file, off_t offset, int whence); +extern off_t FileSize(File file); extern int FileTruncate(File file, off_t offset, uint32 wait_event_info); extern void FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info); extern char *FilePathName(File file); -- 2.17.1 (Apple Git-112)