>From 8e69433fae0b4f51879793f6594c76b99d764818 Mon Sep 17 00:00:00 2001 From: Abhijit Menon-Sen Date: Thu, 28 May 2015 22:02:29 +0530 Subject: Make initdb -S behave like xlog.c:fsync_pgdata() In particular, it should silently skip unreadable/unexpected files in the data directory and follow symlinks only for pg_xlog and under pg_tblspc. --- src/bin/initdb/initdb.c | 235 +++++++++++++++++++++++++----------------------- 1 file changed, 122 insertions(+), 113 deletions(-) diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index f1c4920..8ebaa55 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -218,9 +218,9 @@ static char **filter_lines_with_token(char **lines, const char *token); static char **readfile(const char *path); static void writefile(char *path, char **lines); static void walkdir(char *path, void (*action) (char *fname, bool isdir)); -static void walktblspc_links(char *path, void (*action) (char *fname, bool isdir)); +static void walktblspc_entries(char *path, void (*action) (char *fname, bool isdir)); static void pre_sync_fname(char *fname, bool isdir); -static void fsync_fname(char *fname, bool isdir); +static void fsync_fname_ext(char *fname, bool isdir); static FILE *popen_check(const char *command, const char *mode); static void exit_nicely(void); static char *get_id(void); @@ -248,7 +248,7 @@ static void load_plpgsql(void); static void vacuum_db(void); static void make_template0(void); static void make_postgres(void); -static void perform_fsync(void); +static void fsync_pgdata(void); static void trapsig(int signum); static void check_ok(void); static char *escape_quotes(const char *src); @@ -518,38 +518,38 @@ writefile(char *path, char **lines) * walkdir: recursively walk a directory, applying the action to each * regular file and directory (including the named directory itself). * - * Adapted from copydir() in copydir.c. + * Adapted from walkdir() in backend/storage/file/fd.c. */ static void walkdir(char *path, void (*action) (char *fname, bool isdir)) { DIR *dir; - struct dirent *direntry; - char subpath[MAXPGPATH]; + struct dirent *de; dir = opendir(path); if (dir == NULL) { fprintf(stderr, _("%s: could not open directory \"%s\": %s\n"), progname, path, strerror(errno)); - exit_nicely(); + return; } - while (errno = 0, (direntry = readdir(dir)) != NULL) + while (errno = 0, (de = readdir(dir)) != NULL) { + char subpath[MAXPGPATH]; struct stat fst; - if (strcmp(direntry->d_name, ".") == 0 || - strcmp(direntry->d_name, "..") == 0) + if (strcmp(de->d_name, ".") == 0 || + strcmp(de->d_name, "..") == 0) continue; - snprintf(subpath, MAXPGPATH, "%s/%s", path, direntry->d_name); + snprintf(subpath, MAXPGPATH, "%s/%s", path, de->d_name); if (lstat(subpath, &fst) < 0) { fprintf(stderr, _("%s: could not stat file \"%s\": %s\n"), progname, subpath, strerror(errno)); - exit_nicely(); + continue; } if (S_ISDIR(fst.st_mode)) @@ -559,75 +559,72 @@ walkdir(char *path, void (*action) (char *fname, bool isdir)) } if (errno) - { fprintf(stderr, _("%s: could not read directory \"%s\": %s\n"), progname, path, strerror(errno)); - exit_nicely(); - } - if (closedir(dir)) - { - fprintf(stderr, _("%s: could not close directory \"%s\": %s\n"), - progname, path, strerror(errno)); - exit_nicely(); - } + (void) closedir(dir); - /* - * It's important to fsync the destination directory itself as individual - * file fsyncs don't guarantee that the directory entry for the file is - * synced. Recent versions of ext4 have made the window much wider but - * it's been an issue for ext3 and other filesystems in the past. - */ (*action) (path, true); } /* - * walktblspc_links: call walkdir on each entry under the given - * pg_tblspc directory, or do nothing if pg_tblspc doesn't exist. + * walktblspc_entries -- apply the action to the entries in pb_tblspc + * + * We expect to find only symlinks to tablespace directories here, but + * we'll apply the action to regular files, and call walkdir() on any + * directories as well. + * + * Adapted from walktblspc_entries in backend/storage/file/fd.c */ static void -walktblspc_links(char *path, void (*action) (char *fname, bool isdir)) +walktblspc_entries(char *path, void (*action) (char *fname, bool isdir)) { DIR *dir; - struct dirent *direntry; - char subpath[MAXPGPATH]; + struct dirent *de; dir = opendir(path); if (dir == NULL) { - if (errno == ENOENT) - return; fprintf(stderr, _("%s: could not open directory \"%s\": %s\n"), progname, path, strerror(errno)); - exit_nicely(); + return; } - while (errno = 0, (direntry = readdir(dir)) != NULL) + while (errno = 0, (de = readdir(dir)) != NULL) { - if (strcmp(direntry->d_name, ".") == 0 || - strcmp(direntry->d_name, "..") == 0) + char subpath[MAXPGPATH]; + struct stat fst; + + if (strcmp(de->d_name, ".") == 0 || + strcmp(de->d_name, "..") == 0) continue; - /* fsync the version specific tablespace subdirectory */ - snprintf(subpath, sizeof(subpath), "%s/%s/%s", - path, direntry->d_name, TABLESPACE_VERSION_DIRECTORY); + snprintf(subpath, MAXPGPATH, "%s/%s", path, de->d_name); - walkdir(subpath, action); + /* + * We use stat rather than lstat here because we want to know if + * a link points to a directory or not. + */ + if (stat(subpath, &fst) < 0) + { + fprintf(stderr, _("%s: could not stat file \"%s\": %s\n"), + progname, subpath, strerror(errno)); + continue; + } + + if (S_ISREG(fst.st_mode)) + (*action) (subpath, false); + else if (S_ISDIR(fst.st_mode)) + walkdir(subpath, action); } if (errno) - { fprintf(stderr, _("%s: could not read directory \"%s\": %s\n"), progname, path, strerror(errno)); - exit_nicely(); - } - if (closedir(dir)) - { - fprintf(stderr, _("%s: could not close directory \"%s\": %s\n"), - progname, path, strerror(errno)); - exit_nicely(); - } + (void) closedir(dir); + + (*action) (path, true); } /* @@ -641,19 +638,19 @@ pre_sync_fname(char *fname, bool isdir) int fd; fd = open(fname, O_RDONLY | PG_BINARY); - - /* - * Some OSs don't allow us to open directories at all (Windows returns - * EACCES) - */ - if (fd < 0 && isdir && (errno == EISDIR || errno == EACCES)) - return; - if (fd < 0) { - fprintf(stderr, _("%s: could not open file \"%s\": %s\n"), + if (errno == EACCES || (isdir && errno == EISDIR)) + return; + +#ifdef ETXTBSY + if (errno == ETXTBSY) + return; +#endif + + fprintf(stdout, _("%s: could not open file \"%s\": %s\n"), progname, fname, strerror(errno)); - exit_nicely(); + return; } /* @@ -676,12 +673,14 @@ pre_sync_fname(char *fname, bool isdir) * Try to fsync directories but ignore errors that indicate the OS * just doesn't allow/require fsyncing directories. * - * Adapted from fsync_fname() in copydir.c. + * Adapted from fsync_fname_ext() in backend/storage/file/fd.c (in + * particular, it silently ignores errors opening unreadable files). */ static void -fsync_fname(char *fname, bool isdir) +fsync_fname_ext(char *fname, bool isdir) { int fd; + int flags; int returncode; /* @@ -689,40 +688,33 @@ fsync_fname(char *fname, bool isdir) * systems don't allow us to fsync files opened read-only; so we need both * cases here */ + flags = PG_BINARY; if (!isdir) - fd = open(fname, O_RDWR | PG_BINARY); + flags |= O_RDWR; else - fd = open(fname, O_RDONLY | PG_BINARY); - - /* - * Some OSs don't allow us to open directories at all (Windows returns - * EACCES) - */ - if (fd < 0 && isdir && (errno == EISDIR || errno == EACCES)) - return; + flags |= O_RDONLY; - else if (fd < 0) + fd = open(fname, flags); + if (fd < 0) { + if (errno == EACCES || (isdir && errno == EISDIR)) + return; + +#ifdef ETXTBSY + if (errno == ETXTBSY) + return; +#endif + fprintf(stderr, _("%s: could not open file \"%s\": %s\n"), progname, fname, strerror(errno)); - exit_nicely(); + return; } returncode = fsync(fd); - /* Some OSs don't allow us to fsync directories at all */ - if (returncode != 0 && isdir && errno == EBADF) - { - close(fd); - return; - } - - if (returncode != 0) - { + if (returncode != 0 && !(isdir && errno == EBADF)) fprintf(stderr, _("%s: could not fsync file \"%s\": %s\n"), progname, fname, strerror(errno)); - exit_nicely(); - } close(fd); } @@ -2419,50 +2411,67 @@ make_postgres(void) } /* - * fsync everything down to disk + * fsync the data directory and its contents to disk + * + * Ignore unreadable files, and follow symlinks only for pg_xlog and + * under pg_tblspc. */ static void -perform_fsync(void) +fsync_pgdata(void) { - char pdir[MAXPGPATH]; + bool xlog_is_symlink; + char pg_xlog[MAXPGPATH]; char pg_tblspc[MAXPGPATH]; + struct stat st; fputs(_("syncing data to disk ... "), stdout); fflush(stdout); - /* - * We need to name the parent of PGDATA. get_parent_directory() isn't - * enough here, because it can result in an empty string. - */ - snprintf(pdir, MAXPGPATH, "%s/..", pg_data); - canonicalize_path(pdir); + snprintf(pg_xlog, MAXPGPATH, "%s/pg_xlog", pg_data); + snprintf(pg_tblspc, MAXPGPATH, "%s/pg_tblspc", pg_data); /* - * Hint to the OS so that we're going to fsync each of these files soon. + * If pg_xlog is a symlink, then we need to recurse into it separately, + * because the first walkdir below will ignore it. */ + xlog_is_symlink = false; - /* first the parent of the PGDATA directory */ - pre_sync_fname(pdir, true); - - /* then recursively through the data directory */ - walkdir(pg_data, pre_sync_fname); + if (lstat(pg_xlog, &st) < 0) + { + fprintf(stderr, _("%s: could not stat file \"%s\": %s\n"), + progname, pg_xlog, strerror(errno)); + exit_nicely(); + } - /* now do the same thing for everything under pg_tblspc */ - snprintf(pg_tblspc, MAXPGPATH, "%s/pg_tblspc", pg_data); - walktblspc_links(pg_tblspc, pre_sync_fname); +#ifndef WIN32 + if (S_ISLNK(st.st_mode)) + xlog_is_symlink = true; +#else + if (pgwin32_is_junction(subpath)) + xlog_is_symlink = true; +#endif /* - * Now, do the fsync()s in the same order. + * If possible, hint to the kernel that we're soon going to fsync the data + * directory and its contents. */ + walkdir(pg_data, pre_sync_fname); + if (xlog_is_symlink) + walkdir(pg_xlog, pre_sync_fname); + walktblspc_entries(pg_tblspc, pre_sync_fname); - /* first the parent of the PGDATA directory */ - fsync_fname(pdir, true); - - /* then recursively through the data directory */ - walkdir(pg_data, fsync_fname); + /* + * Now we do the fsync()s in the same order. + * + * It's important to fsync the destination directory itself as individual + * file fsyncs don't guarantee that the directory entry for the file is + * synced. + */ - /* and now the same for all tablespaces */ - walktblspc_links(pg_tblspc, fsync_fname); + walkdir(pg_data, fsync_fname_ext); + if (xlog_is_symlink) + walkdir(pg_xlog, fsync_fname_ext); + walktblspc_entries(pg_tblspc, fsync_fname_ext); check_ok(); } @@ -3576,7 +3585,7 @@ main(int argc, char *argv[]) if (sync_only) { setup_pgdata(); - perform_fsync(); + fsync_pgdata(); return 0; } @@ -3629,7 +3638,7 @@ main(int argc, char *argv[]) initialize_data_directory(); if (do_sync) - perform_fsync(); + fsync_pgdata(); else printf(_("\nSync to disk skipped.\nThe data directory might become corrupt if the operating system crashes.\n")); -- 1.9.1