From 10f0d4215493e0f44dfa7e399ccd9978f6cf7a94 Mon Sep 17 00:00:00 2001 From: Justin Pryzby Date: Sun, 8 Mar 2020 22:52:14 -0500 Subject: [PATCH v25 11/11] Add recursion option in pg_ls_dir_files.. Need catversion bumped ? --- doc/src/sgml/func.sgml | 6 +- src/backend/catalog/system_views.sql | 2 +- src/backend/utils/adt/genfile.c | 78 ++++++++++++++++---- src/bin/pg_rewind/libpq_source.c | 22 ++---- src/bin/pg_rewind/t/RewindTest.pm | 5 +- src/include/catalog/pg_proc.dat | 17 +++-- src/test/regress/expected/misc_functions.out | 26 ++++++- src/test/regress/sql/misc_functions.sql | 8 +- 8 files changed, 120 insertions(+), 44 deletions(-) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index cd4375f75b..7f7d674767 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -25762,7 +25762,8 @@ SELECT pg_size_pretty(sum(pg_relation_size(relid))) AS total_size pg_ls_dir_metadata ( dirname text , missing_ok boolean, - include_dot_dirs boolean ) + include_dot_dirs boolean, + recurse boolean ) setof record ( filename text, size bigint, @@ -25770,7 +25771,8 @@ SELECT pg_size_pretty(sum(pg_relation_size(relid))) AS total_size modification timestamp with time zone, change timestamp with time zone, creation timestamp with time zone, - type char ) + type char, + path text ) For each file in the specified directory, list the file and its diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 93d4f1e4a6..2ce4b27e0f 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -1506,7 +1506,7 @@ REVOKE EXECUTE ON FUNCTION pg_stat_file(text,boolean) FROM public; REVOKE EXECUTE ON FUNCTION pg_ls_dir(text) FROM public; REVOKE EXECUTE ON FUNCTION pg_ls_dir(text,boolean,boolean) FROM public; -REVOKE EXECUTE ON FUNCTION pg_ls_dir_metadata(text,boolean,boolean) FROM public; +REVOKE EXECUTE ON FUNCTION pg_ls_dir_metadata(text,boolean,boolean,boolean) FROM public; -- -- We also set up some things as accessible to standard roles. diff --git a/src/backend/utils/adt/genfile.c b/src/backend/utils/adt/genfile.c index 5c4a7f748d..14ce7da3e4 100644 --- a/src/backend/utils/adt/genfile.c +++ b/src/backend/utils/adt/genfile.c @@ -40,6 +40,8 @@ static char get_file_type(mode_t mode, const char *path); static void tuple_from_stat(struct stat *fst, const char *path, Datum *values, bool *nulls); static Datum pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, int flags); +void pg_ls_dir_files_internal(const char *dirname, DIR *dirdesc, + Tuplestorestate *tupstore, TupleDesc tupdesc, int flags); #define LS_DIR_TYPE (1<<0) /* Show column: type */ #define LS_DIR_METADATA (1<<1) /* Show columns: mtime, size */ @@ -48,6 +50,7 @@ static Datum pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, int flags #define LS_DIR_SKIP_HIDDEN (1<<4) /* Do not show anything begining with . */ #define LS_DIR_SKIP_DIRS (1<<5) /* Do not show directories */ #define LS_DIR_SKIP_SPECIAL (1<<6) /* Do not show special file types */ +#define LS_DIR_RECURSE (1<<7) /* Recurse into subdirs */ /* Shortcut for common behavior */ #define LS_DIR_COMMON (LS_DIR_SKIP_HIDDEN | LS_DIR_METADATA) @@ -580,7 +583,6 @@ pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, int flags) TupleDesc tupdesc; Tuplestorestate *tupstore; DIR *dirdesc; - struct dirent *de; MemoryContext oldcontext; TypeFuncClass tuptype ; @@ -590,9 +592,8 @@ pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, int flags) Assert(!(flags&LS_DIR_TYPE) || !(flags&LS_DIR_SKIP_DIRS)); /* check the optional arguments */ - if (PG_NARGS() == 3) - { - if (!PG_ARGISNULL(1)) + if (PG_NARGS() > 1 && + !PG_ARGISNULL(1)) { if (PG_GETARG_BOOL(1)) flags |= LS_DIR_MISSING_OK; @@ -600,14 +601,30 @@ pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, int flags) flags &= ~LS_DIR_MISSING_OK; } - if (!PG_ARGISNULL(2)) + if (PG_NARGS() > 2 && + !PG_ARGISNULL(2)) { if (PG_GETARG_BOOL(2)) flags &= ~LS_DIR_SKIP_DOT_DIRS; else flags |= LS_DIR_SKIP_DOT_DIRS; } - } + + if (PG_NARGS() > 3 && + !PG_ARGISNULL(3)) + { + if (PG_GETARG_BOOL(3)) + flags |= LS_DIR_RECURSE; + else + flags &= ~LS_DIR_RECURSE; + } + + if ((flags & LS_DIR_RECURSE) != 0 && + (flags & LS_DIR_SKIP_DOT_DIRS) == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_RECURSION), // ?? + errmsg("recursion requires skipping dot dirs"))); + /* check to see if caller supports us returning a tuplestore */ if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) @@ -659,10 +676,20 @@ pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, int flags) /* Otherwise, we can let ReadDir() throw the error */ } - while ((de = ReadDir(dirdesc, dir)) != NULL) + pg_ls_dir_files_internal(dir, dirdesc, tupstore, tupdesc, flags); + FreeDir(dirdesc); + return (Datum) 0; +} + +void pg_ls_dir_files_internal(const char *dirname, DIR *dirdesc, + Tuplestorestate *tupstore, TupleDesc tupdesc, int flags) +{ + struct dirent *de; + + while ((de = ReadDir(dirdesc, dirname)) != NULL) { - Datum values[7]; - bool nulls[7]; + Datum values[8]; + bool nulls[8]; char path[MAXPGPATH * 2]; struct stat attrib; @@ -678,7 +705,11 @@ pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, int flags) continue; /* Get the file info */ - snprintf(path, sizeof(path), "%s/%s", dir, de->d_name); + if (strcmp(dirname, ".") != 0) + snprintf(path, sizeof(path), "%s/%s", dirname, de->d_name); + else + snprintf(path, sizeof(path), "%s", de->d_name); + if (lstat(path, &attrib) < 0) { /* Ignore concurrently-deleted files, else complain */ @@ -703,14 +734,33 @@ pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, int flags) memset(nulls, false, sizeof(nulls)); values[0] = CStringGetTextDatum(de->d_name); - if (flags & LS_DIR_METADATA) + if ((flags & (LS_DIR_RECURSE|LS_DIR_METADATA)) != 0) + { tuple_from_stat(&attrib, path, 1+values, 1+nulls); + /* + * path is only really useful for recursion, but this function + * can't return different fields when recursing + * XXX: return dirname (which is nice since it's the original, + * unprocessed input to this recursion) or path (which is nice + * since it's a "cooked" value without leading/duplicate slashes) + */ + values[7] = CStringGetTextDatum(path); + } + tuplestore_putvalues(tupstore, tupdesc, values, nulls); - } - FreeDir(dirdesc); - return (Datum) 0; + /* Recurse? */ + if ((flags & LS_DIR_RECURSE) != 0 && + S_ISDIR(attrib.st_mode)) + { + DIR *newdir = AllocateDir(path); + /* Failure handled by ReadDir */ + pg_ls_dir_files_internal(path, newdir, tupstore, tupdesc, flags); + Assert(newdir != NULL); + FreeDir(newdir); + } + } } /* Function to return the list of files in the log directory */ diff --git a/src/bin/pg_rewind/libpq_source.c b/src/bin/pg_rewind/libpq_source.c index 47beba277a..9ebb31615c 100644 --- a/src/bin/pg_rewind/libpq_source.c +++ b/src/bin/pg_rewind/libpq_source.c @@ -237,30 +237,18 @@ libpq_traverse_files(rewind_source *source, process_file_callback_t callback) /* * Create a recursive directory listing of the whole data directory. * - * The WITH RECURSIVE part does most of the work. The second part gets the - * targets of the symlinks in pg_tblspc directory. + * Join to pg_tablespace to get the targets of the symlinks in + * pg_tblspc directory. * * XXX: There is no backend function to get a symbolic link's target in * general, so if the admin has put any custom symbolic links in the data * directory, they won't be copied correctly. */ sql = - "WITH RECURSIVE files (path, filename, size, isdir) AS (\n" - " SELECT '' AS path, filename, size, isdir FROM\n" - " (SELECT pg_ls_dir('.', true, false) AS filename) AS fn,\n" - " pg_stat_file(fn.filename, true) AS this\n" - " UNION ALL\n" - " SELECT parent.path || parent.filename || '/' AS path,\n" - " fn, this.size, this.isdir\n" - " FROM files AS parent,\n" - " pg_ls_dir(parent.path || parent.filename, true, false) AS fn,\n" - " pg_stat_file(parent.path || parent.filename || '/' || fn, true) AS this\n" - " WHERE parent.isdir = 't'\n" - ")\n" - "SELECT path || filename, size, isdir,\n" + "SELECT path, size, type='d' AS isdir,\n" " pg_tablespace_location(pg_tablespace.oid) AS link_target\n" - "FROM files\n" - "LEFT OUTER JOIN pg_tablespace ON files.path = 'pg_tblspc/'\n" + "FROM pg_ls_dir_metadata('.', true, false, true) files\n" + "LEFT OUTER JOIN pg_tablespace ON files.path = 'pg_tblspc'\n" " AND oid::text = files.filename\n"; res = PQexec(conn, sql); diff --git a/src/bin/pg_rewind/t/RewindTest.pm b/src/bin/pg_rewind/t/RewindTest.pm index 41ed7d4b3b..d911b7de52 100644 --- a/src/bin/pg_rewind/t/RewindTest.pm +++ b/src/bin/pg_rewind/t/RewindTest.pm @@ -160,7 +160,10 @@ sub start_primary GRANT EXECUTE ON function pg_catalog.pg_read_binary_file(text) TO rewind_user; GRANT EXECUTE ON function pg_catalog.pg_read_binary_file(text, bigint, bigint, boolean) - TO rewind_user;"); + TO rewind_user; + GRANT EXECUTE ON function pg_catalog.pg_ls_dir_metadata(text, bool, bool, bool) + TO rewind_user; + "); #### Now run the test-specific parts to initialize the primary before setting # up standby diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index a75929cb92..b54be0bc63 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -10978,17 +10978,24 @@ prosrc => 'pg_ls_tmpdir_1arg' }, { oid => '9979', descr => 'list directory with metadata', proname => 'pg_ls_dir_metadata', procost => '10', prorows => '20', proretset => 't', - provolatile => 'v', prorettype => 'record', proargtypes => 'text bool bool', - proallargtypes => '{text,bool,bool,text,int8,timestamptz,timestamptz,timestamptz,timestamptz,char}', proargmodes => '{i,i,i,o,o,o,o,o,o,o}', - proargnames => '{dirname,missing_ok,include_dot_dirs,filename,size,access,modification,change,creation,type}', + provolatile => 'v', prorettype => 'record', proargtypes => 'text bool bool bool', + proallargtypes => '{text,bool,bool,bool,text,int8,timestamptz,timestamptz,timestamptz,timestamptz,char,text}', proargmodes => '{i,i,i,i,o,o,o,o,o,o,o,o}', + proargnames => '{dirname,missing_ok,include_dot_dirs,recurse,filename,size,access,modification,change,creation,type,path}', prosrc => 'pg_ls_dir_metadata' }, { oid => '9980', descr => 'list directory with metadata', proname => 'pg_ls_dir_metadata', procost => '10', prorows => '20', proretset => 't', provolatile => 'v', prorettype => 'record', proargtypes => 'text', - proallargtypes => '{text,text,int8,timestamptz,timestamptz,timestamptz,timestamptz,char}', proargmodes => '{i,o,o,o,o,o,o,o}', - proargnames => '{dirname,filename,size,access,modification,change,creation,type}', + proallargtypes => '{text,text,int8,timestamptz,timestamptz,timestamptz,timestamptz,char,text}', proargmodes => '{i,o,o,o,o,o,o,o,o}', + proargnames => '{dirname,filename,size,access,modification,change,creation,type,path}', prosrc => 'pg_ls_dir_metadata_1arg' }, +{ oid => '9981', descr => 'list all files in a directory recursively', + proname => 'pg_ls_dir_recurse_sql', prorows => '10000', proretset => 't', + provolatile => 'v', prorettype => 'record', proargtypes => 'text', + proallargtypes => '{text,text,text,int8,timestamptz,timestamptz,timestamptz,timestamptz,char}', + proargnames => '{dirname,path,filename,size,access,modification,change,creation,type}', proargmodes => '{i,o,o,o,o,o,o,o,o}', + prolang => 'sql', prosrc => "with recursive ls as (select dirname as path, * from pg_ls_dir_metadata(dirname, false, false) union all select coalesce(nullif(parent.path,'.')||'/','')||parent.filename, a.filename, a.size, a.access, a.modification, a.change, a.creation, a.type from ls as parent, lateral pg_ls_dir_metadata(parent.path||'/'||parent.filename, false, false) as a where parent.type='d') select * from ls" }, + # hash partitioning constraint function { oid => '5028', descr => 'hash partition CHECK constraint', proname => 'satisfies_hash_partition', provariadic => 'any', diff --git a/src/test/regress/expected/misc_functions.out b/src/test/regress/expected/misc_functions.out index e1e9ced303..b14547c857 100644 --- a/src/test/regress/expected/misc_functions.out +++ b/src/test/regress/expected/misc_functions.out @@ -238,17 +238,37 @@ select filename, type from pg_ls_dir_metadata('.') where filename='.'; . | d (1 row) -select filename, type from pg_ls_dir_metadata('.', false, false) where filename='.'; -- include_dot_dirs=false +select filename, type from pg_ls_dir_metadata('.', false, false, false) where filename='.'; -- include_dot_dirs=false filename | type ----------+------ (0 rows) -- Check that expected columns are present select * from pg_ls_dir_metadata('.') limit 0; - filename | size | access | modification | change | creation | type -----------+------+--------+--------------+--------+----------+------ + filename | size | access | modification | change | creation | type | path +----------+------+--------+--------------+--------+----------+------+------ (0 rows) +-- Exercise recursion +select lower(path), filename, type from pg_ls_dir_metadata('.', true, false, true) where +path in ('base', 'base/pgsql_tmp', 'global', 'global/pg_control', 'global/pg_filenode.map', 'PG_VERSION', 'pg_multixact', 'pg_multixact/members', 'pg_multixact/offsets', 'pg_wal', 'pg_wal/archive_status') +-- (type='d' or path~'^(global/.*|PG_VERSION|postmaster\.opts|postmaster\.pid|pg_logical/replorigin_checkpoint)$') and filename!~'[0-9]' +order by 1; + lower | filename | type +------------------------+-----------------+------ + base | base | d + base/pgsql_tmp | pgsql_tmp | d + global | global | d + global/pg_control | pg_control | - + global/pg_filenode.map | pg_filenode.map | - + pg_multixact | pg_multixact | d + pg_multixact/members | members | d + pg_multixact/offsets | offsets | d + pg_version | PG_VERSION | - + pg_wal | pg_wal | d + pg_wal/archive_status | archive_status | d +(11 rows) + -- -- Test adding a support function to a subject function -- diff --git a/src/test/regress/sql/misc_functions.sql b/src/test/regress/sql/misc_functions.sql index 0961cdc058..328cf42e47 100644 --- a/src/test/regress/sql/misc_functions.sql +++ b/src/test/regress/sql/misc_functions.sql @@ -74,11 +74,17 @@ select * from pg_ls_tmpdir() where name='Does not exist'; select filename, type from pg_ls_dir_metadata('.') where filename='.'; -select filename, type from pg_ls_dir_metadata('.', false, false) where filename='.'; -- include_dot_dirs=false +select filename, type from pg_ls_dir_metadata('.', false, false, false) where filename='.'; -- include_dot_dirs=false -- Check that expected columns are present select * from pg_ls_dir_metadata('.') limit 0; +-- Exercise recursion +select lower(path), filename, type from pg_ls_dir_metadata('.', true, false, true) where +path in ('base', 'base/pgsql_tmp', 'global', 'global/pg_control', 'global/pg_filenode.map', 'PG_VERSION', 'pg_multixact', 'pg_multixact/members', 'pg_multixact/offsets', 'pg_wal', 'pg_wal/archive_status') +-- (type='d' or path~'^(global/.*|PG_VERSION|postmaster\.opts|postmaster\.pid|pg_logical/replorigin_checkpoint)$') and filename!~'[0-9]' +order by 1; + -- -- Test adding a support function to a subject function -- -- 2.17.0