From eb4122ed62a7775ee47baa9f7cc8c5531bcf1aa7 Mon Sep 17 00:00:00 2001 From: Mark Dilger Date: Wed, 21 Oct 2020 20:25:21 -0700 Subject: [PATCH v20 2/5] Adding contrib module pg_amcheck Adding new contrib module pg_amcheck, which is a command line interface for running amcheck's verifications against tables and indexes. --- contrib/Makefile | 1 + contrib/pg_amcheck/.gitignore | 2 + contrib/pg_amcheck/Makefile | 28 + contrib/pg_amcheck/pg_amcheck.c | 1281 ++++++++++++++++++++ contrib/pg_amcheck/pg_amcheck.control | 5 + contrib/pg_amcheck/t/001_basic.pl | 9 + contrib/pg_amcheck/t/002_nonesuch.pl | 60 + contrib/pg_amcheck/t/003_check.pl | 231 ++++ contrib/pg_amcheck/t/004_verify_heapam.pl | 489 ++++++++ contrib/pg_amcheck/t/005_opclass_damage.pl | 52 + doc/src/sgml/contrib.sgml | 1 + doc/src/sgml/filelist.sgml | 1 + doc/src/sgml/pgamcheck.sgml | 228 ++++ src/tools/msvc/Mkvcbuild.pm | 6 +- src/tools/pgindent/typedefs.list | 2 + 15 files changed, 2393 insertions(+), 3 deletions(-) create mode 100644 contrib/pg_amcheck/.gitignore create mode 100644 contrib/pg_amcheck/Makefile create mode 100644 contrib/pg_amcheck/pg_amcheck.c create mode 100644 contrib/pg_amcheck/pg_amcheck.control create mode 100644 contrib/pg_amcheck/t/001_basic.pl create mode 100644 contrib/pg_amcheck/t/002_nonesuch.pl create mode 100644 contrib/pg_amcheck/t/003_check.pl create mode 100644 contrib/pg_amcheck/t/004_verify_heapam.pl create mode 100644 contrib/pg_amcheck/t/005_opclass_damage.pl create mode 100644 doc/src/sgml/pgamcheck.sgml diff --git a/contrib/Makefile b/contrib/Makefile index 7a4866e338..0fd4125902 100644 --- a/contrib/Makefile +++ b/contrib/Makefile @@ -30,6 +30,7 @@ SUBDIRS = \ old_snapshot \ pageinspect \ passwordcheck \ + pg_amcheck \ pg_buffercache \ pg_freespacemap \ pg_prewarm \ diff --git a/contrib/pg_amcheck/.gitignore b/contrib/pg_amcheck/.gitignore new file mode 100644 index 0000000000..f8eecf70bf --- /dev/null +++ b/contrib/pg_amcheck/.gitignore @@ -0,0 +1,2 @@ +/pg_amcheck +/tmp_check/ diff --git a/contrib/pg_amcheck/Makefile b/contrib/pg_amcheck/Makefile new file mode 100644 index 0000000000..74554b9e8d --- /dev/null +++ b/contrib/pg_amcheck/Makefile @@ -0,0 +1,28 @@ +# contrib/pg_amcheck/Makefile + +PGFILEDESC = "pg_amcheck - detects corruption within database relations" +PGAPPICON = win32 + +PROGRAM = pg_amcheck +OBJS = \ + $(WIN32RES) \ + pg_amcheck.o + +REGRESS_OPTS += --load-extension=amcheck --load-extension=pageinspect +EXTRA_INSTALL += contrib/amcheck contrib/pageinspect + +TAP_TESTS = 1 + +PG_CPPFLAGS = -I$(libpq_srcdir) +PG_LIBS_INTERNAL = -L$(top_builddir)/src/fe_utils -lpgfeutils $(libpq_pgport) + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = contrib/pg_amcheck +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/contrib/pg_amcheck/pg_amcheck.c b/contrib/pg_amcheck/pg_amcheck.c new file mode 100644 index 0000000000..6d20ff3d78 --- /dev/null +++ b/contrib/pg_amcheck/pg_amcheck.c @@ -0,0 +1,1281 @@ +/*------------------------------------------------------------------------- + * + * pg_amcheck.c + * Detects corruption within database relations. + * + * Copyright (c) 2017-2020, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/pg_amcheck/pg_amcheck.c + * + *------------------------------------------------------------------------- + */ +#include "postgres_fe.h" + +#include "catalog/pg_am.h" +#include "catalog/pg_class.h" +#include "common/logging.h" +#include "common/username.h" +#include "common/connect.h" +#include "common/string.h" +#include "fe_utils/print.h" +#include "fe_utils/simple_list.h" +#include "fe_utils/string_utils.h" +#include "pg_getopt.h" + +const char *usage_text[] = { + "pg_amcheck is the PostgreSQL command line frontend for the amcheck database corruption checker.", + "", + "Usage:", + " pg_amcheck [OPTION]... [DBNAME [USERNAME]]", + "", + "General options:", + " -V, --version output version information, then exit", + " -?, --help show this help, then exit", + " -s, --strict-names require include patterns to match at least one entity each", + " -o, --on-error-stop stop checking at end of first corrupt page", + "", + "Schema checking options:", + " -n, --schema=PATTERN check relations in the specified schema(s) only", + " -N, --exclude-schema=PATTERN do NOT check relations in the specified schema(s)", + "", + "Table checking options:", + " -t, --table=PATTERN check the specified table(s) only", + " -T, --exclude-table=PATTERN do NOT check the specified table(s)", + " -b, --startblock begin checking table(s) at the given starting block number", + " -e, --endblock check table(s) only up to the given ending block number", + " -f, --skip-all-frozen do NOT check blocks marked as all frozen", + " -v, --skip-all-visible do NOT check blocks marked as all visible", + "", + "TOAST table checking options:", + " -z, --check-toast check associated toast tables and toast indexes", + " -Z, --skip-toast do NOT check associated toast tables and toast indexes", + " -B, --toast-startblock begin checking toast table(s) at the given starting block", + " -E, --toast-endblock check toast table(s) only up to the given ending block", + "", + "Index checking options:", + " -x, --check-indexes check btree indexes associated with tables being checked", + " -X, --skip-indexes do NOT check any btree indexes", + " -i, --index=PATTERN check the specified index(es) only", + " -I, --exclude-index=PATTERN do NOT check the specified index(es)", + " -c, --check-corrupt check indexes even if their associated table is corrupt", + " -C, --skip-corrupt do NOT check indexes if their associated table is corrupt", + " -a, --heapallindexed check index tuples against the table tuples", + " -A, --no-heapallindexed do NOT check index tuples against the table tuples", + " -r, --rootdescend search from the root page for each index tuple", + " -R, --no-rootdescend do NOT search from the root page for each index tuple", + "", + "Connection options:", + " -d, --dbname=DBNAME database name to connect to", + " -h, --host=HOSTNAME database server host or socket directory", + " -p, --port=PORT database server port", + " -U, --username=USERNAME database user name", + " -w, --no-password never prompt for password", + " -W, --password force password prompt (should happen automatically)", + "", + NULL /* sentinel */ +}; + +typedef struct +AmCheckSettings +{ + char *dbname; + char *host; + char *port; + char *username; +} ConnectOptions; + +typedef enum trivalue +{ + TRI_DEFAULT, + TRI_NO, + TRI_YES +} trivalue; + +typedef struct +{ + PGconn *db; /* connection to backend */ + bool notty; /* stdin or stdout is not a tty (as determined + * on startup) */ + trivalue getPassword; /* prompt for a username and password */ + const char *progname; /* in case you renamed pg_amcheck */ + bool strict_names; /* The specified names/patterns should to + * match at least one entity */ + bool on_error_stop; /* The checking of each table should stop + * after the first corrupt page is found. */ + bool skip_frozen; /* Do not check pages marked all frozen */ + bool skip_visible; /* Do not check pages marked all visible */ + bool check_indexes; /* Check btree indexes */ + bool check_toast; /* Check associated toast tables and indexes */ + bool check_corrupt; /* Check indexes even if table is corrupt */ + bool heapallindexed; /* Perform index to table reconciling checks */ + bool rootdescend; /* Perform index rootdescend checks */ + char *startblock; /* Block number where checking begins */ + char *endblock; /* Block number where checking ends, inclusive */ + char *toaststart; /* Block number where toast checking begins */ + char *toastend; /* Block number where toast checking ends, + * inclusive */ +} AmCheckSettings; + +static AmCheckSettings settings; + +/* + * Object inclusion/exclusion lists + * + * The string lists record the patterns given by command-line switches, + * which we then convert to lists of Oids of matching objects. + */ +static SimpleStringList schema_include_patterns = {NULL, NULL}; +static SimpleOidList schema_include_oids = {NULL, NULL}; +static SimpleStringList schema_exclude_patterns = {NULL, NULL}; +static SimpleOidList schema_exclude_oids = {NULL, NULL}; + +static SimpleStringList table_include_patterns = {NULL, NULL}; +static SimpleOidList table_include_oids = {NULL, NULL}; +static SimpleStringList table_exclude_patterns = {NULL, NULL}; +static SimpleOidList table_exclude_oids = {NULL, NULL}; + +static SimpleStringList index_include_patterns = {NULL, NULL}; +static SimpleOidList index_include_oids = {NULL, NULL}; +static SimpleStringList index_exclude_patterns = {NULL, NULL}; +static SimpleOidList index_exclude_oids = {NULL, NULL}; + +/* + * List of tables to be checked, compiled from above lists. + */ +static SimpleOidList checklist = {NULL, NULL}; + +/* + * Strings to be constructed once upon first use. These could be made + * string constants instead, but that would require embedding knowledge + * of the single character values for each relkind, such as 'm' for + * materialized views, which we'd rather not embed here. + */ +static char *table_relkind_quals = NULL; +static char *index_relkind_quals = NULL; + +/* + * Functions to get pointers to the two strings, above, after initializing + * them upon the first call to the function. + */ +static const char *get_table_relkind_quals(void); +static const char *get_index_relkind_quals(void); + +/* + * Functions for running the various corruption checks. + */ +static void check_tables(SimpleOidList *checklist); +static uint64 check_toast(Oid tbloid); +static uint64 check_table(Oid tbloid, const char *startblock, + const char *endblock, bool on_error_stop, + bool check_toast); +static uint64 check_indexes(Oid tbloid, const SimpleOidList *include_oids, + const SimpleOidList *exclude_oids); +static uint64 check_index(const char *idxoid, const char *idxname, + const char *tblname); + +/* + * Functions implementing standard command line behaviors. + */ +static void parse_cli_options(int argc, char *argv[], + ConnectOptions *connOpts); +static void usage(void); +static void showVersion(void); +static void NoticeProcessor(void *arg, const char *message); + +/* + * Functions for converting command line options that include or exclude + * schemas, tables, and indexes by pattern into internally useful lists of + * Oids for objects that match those patterns. + */ +static void expand_schema_name_patterns(const SimpleStringList *patterns, + const SimpleOidList *exclude_oids, + SimpleOidList *oids, + bool strict_names); +static void expand_relkind_name_patterns(const SimpleStringList *patterns, + const SimpleOidList *exclude_nsp_oids, + const SimpleOidList *exclude_oids, + SimpleOidList *oids, + bool strict_names, + const char *missing_errtext, + const char *relkind_quals); +static void expand_table_name_patterns(const SimpleStringList *patterns, + const SimpleOidList *exclude_nsp_oids, + const SimpleOidList *exclude_oids, + SimpleOidList *oids, + bool strict_names); +static void expand_index_name_patterns(const SimpleStringList *patterns, + const SimpleOidList *exclude_nsp_oids, + const SimpleOidList *exclude_oids, + SimpleOidList *oids, + bool strict_names); +static void get_table_check_list(const SimpleOidList *include_nsp, + const SimpleOidList *exclude_nsp, + const SimpleOidList *include_tbl, + const SimpleOidList *exclude_tbl, + SimpleOidList *checklist); +static PGresult *ExecuteSqlQuery(const char *query, char **error); +static PGresult *ExecuteSqlQueryOrDie(const char *query); + +static void append_csv_oids(PQExpBuffer querybuf, const SimpleOidList *oids); +static void apply_filter(PQExpBuffer querybuf, const char *lval, + const SimpleOidList *oids, bool include); + +#define fatal(...) do { pg_log_error(__VA_ARGS__); exit(1); } while(0) + +/* Like fatal(), but with a complaint about a particular query. */ +static void +die_on_query_failure(const char *query) +{ + pg_log_error("query failed: %s", + PQerrorMessage(settings.db)); + fatal("query was: %s", query); +} + +#define EXIT_BADCONN 2 + +int +main(int argc, char **argv) +{ + ConnectOptions connOpts; + bool have_password = false; + char *password = NULL; + bool new_pass; + + pg_logging_init(argv[0]); + set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_amcheck")); + + if (argc > 1) + { + if ((strcmp(argv[1], "-?") == 0) || + (argc == 2 && (strcmp(argv[1], "--help") == 0))) + { + usage(); + exit(EXIT_SUCCESS); + } + if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0) + { + showVersion(); + exit(EXIT_SUCCESS); + } + } + + memset(&settings, 0, sizeof(settings)); + settings.progname = get_progname(argv[0]); + + settings.db = NULL; + setDecimalLocale(); + + settings.notty = (!isatty(fileno(stdin)) || !isatty(fileno(stdout))); + + settings.getPassword = TRI_DEFAULT; + + /* + * Default behaviors for user settable options. Note that these default + * to doing all the safe checks and none of the unsafe ones, on the theory + * that if a user says "pg_amcheck mydb" without specifying any additional + * options, we should check everything we know how to check without + * risking any backend aborts. + */ + + settings.on_error_stop = false; + settings.skip_frozen = false; + settings.skip_visible = false; + + /* Index checking options */ + settings.check_indexes = false; + settings.check_corrupt = false; + settings.heapallindexed = false; + settings.rootdescend = false; + + /* + * Reconciling toasted attributes from the main table against the toast + * table can crash the backend if the toast table or index are corrupt. + * We can optionally check the toast table and then the toast index prior + * to checking the main table, but if the toast table or index are + * concurrently corrupted after we conclude they are valid, the check of + * the main table can crash the backend. The oneous is on any caller who + * enables this option to make certain the environment is sufficiently + * stable that concurrent corruptions of the toast is not possible. + */ + settings.check_toast = false; + + parse_cli_options(argc, argv, &connOpts); + + if (settings.getPassword == TRI_YES) + { + /* + * We can't be sure yet of the username that will be used, so don't + * offer a potentially wrong one. Typical uses of this option are + * noninteractive anyway. + */ + password = simple_prompt("Password: ", false); + have_password = true; + } + + /* loop until we have a password if requested by backend */ + do + { +#define ARRAY_SIZE 8 + const char **keywords = pg_malloc(ARRAY_SIZE * sizeof(*keywords)); + const char **values = pg_malloc(ARRAY_SIZE * sizeof(*values)); + + keywords[0] = "host"; + values[0] = connOpts.host; + keywords[1] = "port"; + values[1] = connOpts.port; + keywords[2] = "user"; + values[2] = connOpts.username; + keywords[3] = "password"; + values[3] = have_password ? password : NULL; + keywords[4] = "dbname"; /* see do_connect() */ + if (connOpts.dbname == NULL) + { + if (getenv("PGDATABASE")) + values[4] = getenv("PGDATABASE"); + else if (getenv("PGUSER")) + values[4] = getenv("PGUSER"); + else + values[4] = "postgres"; + } + else + values[4] = connOpts.dbname; + keywords[5] = "fallback_application_name"; + values[5] = settings.progname; + keywords[6] = "client_encoding"; + values[6] = (settings.notty || + getenv("PGCLIENTENCODING")) ? NULL : "auto"; + keywords[7] = NULL; + values[7] = NULL; + + new_pass = false; + settings.db = PQconnectdbParams(keywords, values, true); + if (settings.db == NULL) + { + pg_log_error("no connection to server after initial attempt"); + exit(EXIT_BADCONN); + } + + free(keywords); + free(values); + + if (PQstatus(settings.db) == CONNECTION_BAD && + PQconnectionNeedsPassword(settings.db) && + !have_password && + settings.getPassword != TRI_NO) + { + /* + * Before closing the old PGconn, extract the user name that was + * actually connected with. + */ + const char *realusername = PQuser(settings.db); + char *password_prompt; + + if (realusername && realusername[0]) + password_prompt = psprintf("Password for user %s: ", + realusername); + else + password_prompt = pg_strdup("Password: "); + PQfinish(settings.db); + + password = simple_prompt(password_prompt, false); + free(password_prompt); + have_password = true; + new_pass = true; + } + } while (new_pass); + + if (!settings.db) + { + pg_log_error("no connection to server"); + exit(EXIT_BADCONN); + } + + if (PQstatus(settings.db) == CONNECTION_BAD) + { + pg_log_error("could not connect to server: %s", + PQerrorMessage(settings.db)); + PQfinish(settings.db); + exit(EXIT_BADCONN); + } + + /* + * Expand schema, table, and index exclusion patterns, if any. Note that + * non-matching exclusion patterns are not an error, even when + * --strict-names was specified. + */ + expand_schema_name_patterns(&schema_exclude_patterns, NULL, + &schema_exclude_oids, false); + expand_table_name_patterns(&table_exclude_patterns, NULL, NULL, + &table_exclude_oids, false); + expand_index_name_patterns(&index_exclude_patterns, NULL, NULL, + &index_exclude_oids, false); + + /* Expand schema selection patterns into Oid lists */ + if (schema_include_patterns.head != NULL) + { + expand_schema_name_patterns(&schema_include_patterns, + &schema_exclude_oids, + &schema_include_oids, + settings.strict_names); + if (schema_include_oids.head == NULL) + fatal("no matching schemas were found"); + } + + /* Expand table selection patterns into Oid lists */ + if (table_include_patterns.head != NULL) + { + expand_table_name_patterns(&table_include_patterns, + &schema_exclude_oids, + &table_exclude_oids, + &table_include_oids, + settings.strict_names); + if (table_include_oids.head == NULL) + fatal("no matching tables were found"); + } + + /* Expand index selection patterns into Oid lists */ + if (index_include_patterns.head != NULL) + { + expand_index_name_patterns(&index_include_patterns, + &schema_exclude_oids, + &index_exclude_oids, + &index_include_oids, + settings.strict_names); + if (index_include_oids.head == NULL) + fatal("no matching indexes were found"); + } + + /* + * Compile list of all tables to be checked based on namespace and table + * includes and excludes. + */ + get_table_check_list(&schema_include_oids, &schema_exclude_oids, + &table_include_oids, &table_exclude_oids, &checklist); + + PQsetNoticeProcessor(settings.db, NoticeProcessor, NULL); + + /* + * All information about corrupt indexes are returned via ereport, not as + * tuples. We want all the details to report if corruption exists. + */ + PQsetErrorVerbosity(settings.db, PQERRORS_VERBOSE); + + check_tables(&checklist); + + return 0; +} + +/* + * Conditionally add a restriction to a query such that lval must be an Oid in + * the given list of Oids, except that for a null or empty oids list argument, + * no filtering is done and we return without having modified the query buffer. + * + * The query argument must already have begun the WHERE clause and must be in a + * state where we can append an AND clause. No checking of this requirement is + * done here. + * + * On return, the query buffer will be extended with an AND clause that filters + * only those rows where the lval is an Oid present in the given list of oids. + */ +static inline void +include_filter(PQExpBuffer querybuf, const char *lval, const SimpleOidList *oids) +{ + apply_filter(querybuf, lval, oids, true); +} + +/* + * Same as include_filter, above, except that for a non-null, non-empty oids + * list, the lval is restricted to not be any of the values in the list. + */ +static inline void +exclude_filter(PQExpBuffer querybuf, const char *lval, const SimpleOidList *oids) +{ + apply_filter(querybuf, lval, oids, false); +} + +/* + * Check each table from the given checklist per the user specified options. + */ +static void +check_tables(SimpleOidList *checklist) +{ + const SimpleOidListCell *cell; + + for (cell = checklist->head; cell; cell = cell->next) + { + uint64 corruptions = 0; + bool reconcile_toast; + + /* + * If we skip checking the toast table, or if during the check we + * detect any toast table corruption, the main table checks below must + * not reconcile toasted attributes against the toast table, as such + * accesses to the toast table might crash the backend. Instead, skip + * such reconciliations for this table. + * + * This protection contains a race condition; the toast table or index + * could become corrupted concurrently with our checks, but prevention + * of such concurrent corruption is documented as the caller's + * reponsibility, so we don't worry about it here. + */ + reconcile_toast = false; + if (settings.check_toast) + { + if (check_toast(cell->val) == 0) + reconcile_toast = true; + } + + corruptions = check_table(cell->val, + settings.startblock, + settings.endblock, + settings.on_error_stop, + reconcile_toast); + + if (settings.check_indexes) + { + bool old_heapallindexed; + + /* Optionally skip the index checks for a corrupt table. */ + if (corruptions && !settings.check_corrupt) + continue; + + /* + * The btree checking logic which optionally checks the contents + * of an index against the corresponding table has not yet been + * sufficiently hardened against corrupt tables. In particular, + * when called with heapallindexed true, it segfaults if the file + * backing the table relation has been erroneously unlinked. In + * any event, it seems unwise to reconcile an index against its + * table when we already know the table is corrupt. + */ + old_heapallindexed = settings.heapallindexed; + if (corruptions) + settings.heapallindexed = false; + + corruptions += check_indexes(cell->val, + &index_include_oids, + &index_exclude_oids); + + settings.heapallindexed = old_heapallindexed; + } + } +} + +/* + * For a given main table relation, returns the associated toast table, + * or InvalidOid if none exists. + */ +static Oid +get_toast_oid(Oid tbloid) +{ + PQExpBuffer querybuf = createPQExpBuffer(); + PGresult *res; + char *error = NULL; + Oid result = InvalidOid; + + appendPQExpBuffer(querybuf, + "SELECT c.reltoastrelid" + "\nFROM pg_catalog.pg_class c" + "\nWHERE c.oid = %u", + tbloid); + res = ExecuteSqlQuery(querybuf->data, &error); + if (PQresultStatus(res) == PGRES_TUPLES_OK && PQntuples(res) > 0) + result = atooid(PQgetvalue(res, 0, 0)); + else if (error) + die_on_query_failure(querybuf->data); + + PQclear(res); + destroyPQExpBuffer(querybuf); + + return result; +} + +/* + * For the given main table relation, checks the associated toast table and + * index, in any. This should be performed *before* checking the main table + * relation, as the checks inside verify_heapam assume both the toast table and + * toast index are usable. + * + * Returns the number of corruptions detected. + */ +static uint64 +check_toast(Oid tbloid) +{ + Oid toastoid; + uint64 corruption_cnt = 0; + + if (settings.db == NULL) + fatal("no connection on entry to check_toast"); + + toastoid = get_toast_oid(tbloid); + if (OidIsValid(toastoid)) + { + corruption_cnt = check_table(toastoid, settings.toaststart, + settings.toastend, settings.on_error_stop, + false); + + /* + * If the toast table is corrupt, checking the index is not safe. + * There is a race condition here, as the toast table could be + * concurrently corrupted, but preventing concurrent corruption is the + * caller's responsibility, not ours. + */ + if (corruption_cnt == 0) + corruption_cnt += check_indexes(toastoid, NULL, NULL); + } + + return corruption_cnt; +} + +/* + * Checks the given table for corruption, returning the number of corruptions + * detected and printed to the user. + */ +static uint64 +check_table(Oid tbloid, const char *startblock, const char *endblock, + bool on_error_stop, bool check_toast) +{ + PQExpBuffer querybuf; + PGresult *res; + int i; + char *skip; + char *toast; + const char *stop; + char *error = NULL; + uint64 corruption_cnt = 0; + + if (settings.db == NULL) + fatal("no connection on entry to check_table"); + + if (startblock == NULL) + startblock = "NULL"; + if (endblock == NULL) + endblock = "NULL"; + if (settings.skip_frozen) + skip = pg_strdup("'all frozen'"); + else if (settings.skip_visible) + skip = pg_strdup("'all visible'"); + else + skip = pg_strdup("'none'"); + stop = (on_error_stop) ? "true" : "false"; + toast = (check_toast) ? "true" : "false"; + + querybuf = createPQExpBuffer(); + + appendPQExpBuffer(querybuf, + "SELECT c.relname, v.blkno, v.offnum, v.attnum, v.msg " + "FROM verify_heapam(" + "relation := %u, " + "on_error_stop := %s, " + "skip := %s, " + "check_toast := %s, " + "startblock := %s, " + "endblock := %s) v, " + "pg_catalog.pg_class c " + "WHERE c.oid = %u", + tbloid, stop, skip, toast, startblock, endblock, tbloid); + + res = ExecuteSqlQuery(querybuf->data, &error); + if (PQresultStatus(res) == PGRES_TUPLES_OK && PQntuples(res) > 0) + { + corruption_cnt += PQntuples(res); + for (i = 0; i < PQntuples(res); i++) + { + printf("(relname=%s,blkno=%s,offnum=%s,attnum=%s)\n%s\n", + PQgetvalue(res, i, 0), /* relname */ + PQgetvalue(res, i, 1), /* blkno */ + PQgetvalue(res, i, 2), /* offnum */ + PQgetvalue(res, i, 3), /* attnum */ + PQgetvalue(res, i, 4)); /* msg */ + } + } + else if (error) + { + corruption_cnt++; + printf("%s\n", error); + pfree(error); + } + + PQclear(res); + destroyPQExpBuffer(querybuf); + return corruption_cnt; +} + +static uint64 +check_indexes(Oid tbloid, const SimpleOidList *include_oids, + const SimpleOidList *exclude_oids) +{ + PQExpBuffer querybuf; + PGresult *res; + int i; + char *error = NULL; + uint64 corruption_cnt = 0; + + if (settings.db == NULL) + fatal("no connection on entry to check_indexes"); + + querybuf = createPQExpBuffer(); + appendPQExpBuffer(querybuf, + "SELECT i.indexrelid, ci.relname, ct.relname" + "\nFROM pg_catalog.pg_index i, pg_catalog.pg_class ci, " + "pg_catalog.pg_class ct" + "\nWHERE i.indexrelid = ci.oid" + "\n AND i.indrelid = ct.oid" + "\n AND ci.relam = %u" + "\n AND i.indrelid = %u", + BTREE_AM_OID, tbloid); + include_filter(querybuf, "i.indexrelid", include_oids); + exclude_filter(querybuf, "i.indexrelid", exclude_oids); + + res = ExecuteSqlQuery(querybuf->data, &error); + if (PQresultStatus(res) == PGRES_TUPLES_OK) + { + for (i = 0; i < PQntuples(res); i++) + corruption_cnt += check_index(PQgetvalue(res, i, 0), + PQgetvalue(res, i, 1), + PQgetvalue(res, i, 2)); + } + else if (error) + { + corruption_cnt++; + printf("%s\n", error); + pfree(error); + } + + PQclear(res); + destroyPQExpBuffer(querybuf); + + return corruption_cnt; +} + +static uint64 +check_index(const char *idxoid, const char *idxname, const char *tblname) +{ + PQExpBuffer querybuf; + PGresult *res; + uint64 corruption_cnt = 0; + + if (settings.db == NULL) + fatal("no connection on entry to check_index"); + if (idxname == NULL) + fatal("no index name on entry to check_index"); + if (tblname == NULL) + fatal("no table name on entry to check_index"); + + querybuf = createPQExpBuffer(); + appendPQExpBuffer(querybuf, + "SELECT bt_index_parent_check('%s'::regclass, %s, %s)", + idxoid, + settings.heapallindexed ? "true" : "false", + settings.rootdescend ? "true" : "false"); + res = PQexec(settings.db, querybuf->data); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + corruption_cnt++; + printf("index check failed for index %s of table %s:\n", + idxname, tblname); + printf("%s", PQerrorMessage(settings.db)); + } + + PQclear(res); + destroyPQExpBuffer(querybuf); + + return corruption_cnt; +} + +static void +parse_cli_options(int argc, char *argv[], ConnectOptions *connOpts) +{ + static struct option long_options[] = + { + {"check-corrupt", no_argument, NULL, 'c'}, + {"check-indexes", no_argument, NULL, 'x'}, + {"check-toast", no_argument, NULL, 'z'}, + {"dbname", required_argument, NULL, 'd'}, + {"endblock", required_argument, NULL, 'e'}, + {"exclude-index", required_argument, NULL, 'I'}, + {"exclude-schema", required_argument, NULL, 'N'}, + {"exclude-table", required_argument, NULL, 'T'}, + {"heapallindexed", no_argument, NULL, 'a'}, + {"help", optional_argument, NULL, '?'}, + {"host", required_argument, NULL, 'h'}, + {"index", required_argument, NULL, 'i'}, + {"no-heapallindexed", no_argument, NULL, 'A'}, + {"no-password", no_argument, NULL, 'w'}, + {"no-rootdescend", no_argument, NULL, 'R'}, + {"on-error-stop", no_argument, NULL, 'o'}, + {"password", no_argument, NULL, 'W'}, + {"port", required_argument, NULL, 'p'}, + {"rootdescend", no_argument, NULL, 'r'}, + {"schema", required_argument, NULL, 'n'}, + {"skip-all-frozen", no_argument, NULL, 'f'}, + {"skip-all-visible", no_argument, NULL, 'v'}, + {"skip-corrupt", no_argument, NULL, 'C'}, + {"skip-indexes", no_argument, NULL, 'X'}, + {"skip-toast", no_argument, NULL, 'Z'}, + {"startblock", required_argument, NULL, 'b'}, + {"strict-names", no_argument, NULL, 's'}, + {"table", required_argument, NULL, 't'}, + {"toast-endblock", required_argument, NULL, 'E'}, + {"toast-startblock", required_argument, NULL, 'B'}, + {"username", required_argument, NULL, 'U'}, + {"version", no_argument, NULL, 'V'}, + {NULL, 0, NULL, 0} + }; + + int optindex; + int c; + + memset(connOpts, 0, sizeof *connOpts); + + while ((c = getopt_long(argc, argv, "aAb:B:cCd:e:E:fh:i:I:n:N:op:rRst:T:U:vVwWxXzZ?1", + long_options, &optindex)) != -1) + { + switch (c) + { + case 'a': + settings.heapallindexed = true; + break; + case 'A': + settings.heapallindexed = false; + break; + case 'b': + settings.startblock = pg_strdup(optarg); + break; + case 'B': + settings.toaststart = pg_strdup(optarg); + break; + case 'c': + settings.check_corrupt = true; + break; + case 'C': + settings.check_corrupt = false; + break; + case 'd': + connOpts->dbname = pg_strdup(optarg); + break; + case 'e': + settings.endblock = pg_strdup(optarg); + break; + case 'E': + settings.toastend = pg_strdup(optarg); + break; + case 'f': + settings.skip_frozen = true; + break; + case 'h': + connOpts->host = pg_strdup(optarg); + break; + case 'i': + simple_string_list_append(&index_include_patterns, optarg); + break; + case 'I': + simple_string_list_append(&index_exclude_patterns, optarg); + break; + case 'n': /* include schema(s) */ + simple_string_list_append(&schema_include_patterns, optarg); + break; + case 'N': /* exclude schema(s) */ + simple_string_list_append(&schema_exclude_patterns, optarg); + break; + case 'o': + settings.on_error_stop = true; + break; + case 'p': + connOpts->port = pg_strdup(optarg); + break; + case 's': + settings.strict_names = true; + break; + case 'r': + settings.rootdescend = true; + break; + case 'R': + settings.rootdescend = false; + break; + case 't': /* include table(s) */ + simple_string_list_append(&table_include_patterns, optarg); + break; + case 'T': /* exclude table(s) */ + simple_string_list_append(&table_exclude_patterns, optarg); + break; + case 'U': + connOpts->username = pg_strdup(optarg); + break; + case 'v': + settings.skip_visible = true; + break; + case 'V': + showVersion(); + exit(EXIT_SUCCESS); + case 'w': + settings.getPassword = TRI_NO; + break; + case 'W': + settings.getPassword = TRI_YES; + break; + case 'x': + settings.check_indexes = true; + break; + case 'X': + settings.check_indexes = false; + break; + case 'z': + settings.check_toast = true; + break; + case 'Z': + settings.check_toast = false; + break; + case '?': + if (optind <= argc && + strcmp(argv[optind - 1], "-?") == 0) + { + /* actual help option given */ + usage(); + exit(EXIT_SUCCESS); + } + else + { + /* getopt error (unknown option or missing argument) */ + goto unknown_option; + } + break; + case 1: + { + if (!optarg || strcmp(optarg, "options") == 0) + usage(); + else + goto unknown_option; + + exit(EXIT_SUCCESS); + } + break; + default: + unknown_option: + fprintf(stderr, "Try \"%s --help\" for more information.\n", + settings.progname); + exit(EXIT_FAILURE); + break; + } + } + + /* + * if we still have arguments, use it as the database name and username + */ + while (argc - optind >= 1) + { + if (!connOpts->dbname) + connOpts->dbname = argv[optind]; + else if (!connOpts->username) + connOpts->username = argv[optind]; + else + pg_log_warning("extra command-line argument \"%s\" ignored", + argv[optind]); + + optind++; + } + +} + +/* + * usage + * + * print out command line arguments + */ +static void +usage(void) +{ + int lineno; + + for (lineno = 0; usage_text[lineno]; lineno++) + printf("%s\n", usage_text[lineno]); + printf("Report bugs to <%s>.\n", PACKAGE_BUGREPORT); + printf("%s home page: <%s>\n", PACKAGE_NAME, PACKAGE_URL); +} + +static void +showVersion(void) +{ + puts("pg_amcheck (PostgreSQL) " PG_VERSION); +} + +/* + * for backend Notice messages (INFO, WARNING, etc) + */ +static void +NoticeProcessor(void *arg, const char *message) +{ + (void) arg; /* not used */ + pg_log_info("%s", message); +} + +/* + * Helper function for apply_filter, below. + */ +static void +append_csv_oids(PQExpBuffer querybuf, const SimpleOidList *oids) +{ + const SimpleOidListCell *cell; + const char *comma; + + for (comma = "", cell = oids->head; cell; comma = ", ", cell = cell->next) + appendPQExpBuffer(querybuf, "%s%u", comma, cell->val); +} + +/* + * Internal implementation of include_filter and exclude_filter + */ +static void +apply_filter(PQExpBuffer querybuf, const char *lval, const SimpleOidList *oids, + bool include) +{ + if (!oids || !oids->head) + return; + if (include) + appendPQExpBuffer(querybuf, "\nAND %s OPERATOR(pg_catalog.=) ANY(array[", lval); + else + appendPQExpBuffer(querybuf, "\nAND %s OPERATOR(pg_catalog.!=) ALL(array[", lval); + append_csv_oids(querybuf, oids); + appendPQExpBuffer(querybuf, "]::OID[])"); +} + +/* + * Find and append to the given Oid list the Oids of all schemas matching the + * given list of patterns but not included in the given list of excluded Oids. + */ +static void +expand_schema_name_patterns(const SimpleStringList *patterns, + const SimpleOidList *exclude_nsp, + SimpleOidList *oids, + bool strict_names) +{ + PQExpBuffer querybuf; + PGresult *res; + SimpleStringListCell *cell; + int i; + + if (settings.db == NULL) + fatal("no connection on entry to expand_schema_name_patterns"); + + if (patterns->head == NULL) + return; /* nothing to do */ + + querybuf = createPQExpBuffer(); + + /* + * The loop below runs multiple SELECTs might sometimes result in + * duplicate entries in the Oid list, but we don't care. + */ + + for (cell = patterns->head; cell; cell = cell->next) + { + appendPQExpBufferStr(querybuf, + "SELECT oid FROM pg_catalog.pg_namespace n\n"); + processSQLNamePattern(settings.db, querybuf, cell->val, false, + false, NULL, "n.nspname", NULL, NULL); + exclude_filter(querybuf, "n.oid", exclude_nsp); + + res = ExecuteSqlQueryOrDie(querybuf->data); + if (strict_names && PQntuples(res) == 0) + fatal("no matching schemas were found for pattern \"%s\"", + cell->val); + + for (i = 0; i < PQntuples(res); i++) + { + simple_oid_list_append(oids, atooid(PQgetvalue(res, i, 0))); + } + + PQclear(res); + resetPQExpBuffer(querybuf); + } + + destroyPQExpBuffer(querybuf); +} + +/* + * Find and append to the given Oid list the Oids of all relations matching the + * given list of patterns but not included in the given list of excluded Oids + * nor in one of the given excluded namespaces. The relations are filtered by + * the given schema_quals. They are further filtered by the given + * relkind_quals, allowing the caller to restrict the relations to just indexes + * or tables. The missing_errtext should be a message for use in error + * messages if no matching relations are found and strict_names was specified. + */ +static void +expand_relkind_name_patterns(const SimpleStringList *patterns, + const SimpleOidList *exclude_nsp_oids, + const SimpleOidList *exclude_oids, + SimpleOidList *oids, + bool strict_names, + const char *missing_errtext, + const char *relkind_quals) +{ + PQExpBuffer querybuf; + PGresult *res; + SimpleStringListCell *cell; + int i; + + if (settings.db == NULL) + fatal("no connection on entry to expand_relkind_name_patterns"); + + if (patterns->head == NULL) + return; /* nothing to do */ + + querybuf = createPQExpBuffer(); + + /* + * this might sometimes result in duplicate entries in the Oid list, but + * we don't care. + */ + + for (cell = patterns->head; cell; cell = cell->next) + { + /* + * Query must remain ABSOLUTELY devoid of unqualified names. This + * would be unnecessary given a pg_table_is_visible() variant taking a + * search_path argument. + */ + appendPQExpBuffer(querybuf, + "SELECT c.oid" + "\nFROM pg_catalog.pg_class c" + "\n LEFT JOIN pg_catalog.pg_namespace n" + "\n ON n.oid OPERATOR(pg_catalog.=) c.relnamespace" + "\nWHERE c.relkind OPERATOR(pg_catalog.=) %s\n", + relkind_quals); + exclude_filter(querybuf, "c.oid", exclude_oids); + exclude_filter(querybuf, "n.oid", exclude_nsp_oids); + processSQLNamePattern(settings.db, querybuf, cell->val, true, + false, "n.nspname", "c.relname", NULL, NULL); + res = ExecuteSqlQueryOrDie(querybuf->data); + if (strict_names && PQntuples(res) == 0) + fatal("%s \"%s\"", missing_errtext, cell->val); + + for (i = 0; i < PQntuples(res); i++) + simple_oid_list_append(oids, atooid(PQgetvalue(res, i, 0))); + + PQclear(res); + resetPQExpBuffer(querybuf); + } + + destroyPQExpBuffer(querybuf); +} + +/* + * Find the Oids of all tables matching the given list of patterns, + * and append them to the given Oid list. + */ +static void +expand_table_name_patterns(const SimpleStringList *patterns, const SimpleOidList *exclude_nsp_oids, + const SimpleOidList *exclude_oids, SimpleOidList *oids, bool strict_names) +{ + expand_relkind_name_patterns(patterns, exclude_nsp_oids, exclude_oids, oids, strict_names, + "no matching tables were found for pattern", + get_table_relkind_quals()); +} + +/* + * Find the Oids of all indexes matching the given list of patterns, + * and append them to the given Oid list. + */ +static void +expand_index_name_patterns(const SimpleStringList *patterns, const SimpleOidList *exclude_nsp_oids, + const SimpleOidList *exclude_oids, SimpleOidList *oids, bool strict_names) +{ + expand_relkind_name_patterns(patterns, exclude_nsp_oids, exclude_oids, oids, strict_names, + "no matching indexes were found for pattern", + get_index_relkind_quals()); +} + +static void +get_table_check_list(const SimpleOidList *include_nsp, const SimpleOidList *exclude_nsp, + const SimpleOidList *include_tbl, const SimpleOidList *exclude_tbl, + SimpleOidList *checklist) +{ + PQExpBuffer querybuf; + PGresult *res; + int i; + + if (settings.db == NULL) + fatal("no connection on entry to get_table_check_list"); + + querybuf = createPQExpBuffer(); + + appendPQExpBuffer(querybuf, + "SELECT c.oid" + "\nFROM pg_catalog.pg_class c, pg_catalog.pg_namespace n" + "\nWHERE n.oid OPERATOR(pg_catalog.=) c.relnamespace" + "\n AND c.relkind OPERATOR(pg_catalog.=) %s\n", + get_table_relkind_quals()); + include_filter(querybuf, "n.oid", include_nsp); + exclude_filter(querybuf, "n.oid", exclude_nsp); + include_filter(querybuf, "c.oid", include_tbl); + exclude_filter(querybuf, "c.oid", exclude_tbl); + + res = ExecuteSqlQueryOrDie(querybuf->data); + for (i = 0; i < PQntuples(res); i++) + simple_oid_list_append(checklist, atooid(PQgetvalue(res, i, 0))); + + PQclear(res); + destroyPQExpBuffer(querybuf); +} + +static PGresult * +ExecuteSqlQueryOrDie(const char *query) +{ + PGresult *res; + + res = PQexec(settings.db, query); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + die_on_query_failure(query); + return res; +} + +/* + * Execute the given SQL query. This function should only be used for queries + * which are not expected to fail under normal circumstances, as failures will + * result in the printing of error messages, which will look a bit messy when + * interleaved with corruption reports. + * + * On error, use the supplied error_context string and the error string + * returned from the database connection to print an error message for the + * user. + * + * The error_context argument is pfree'd by us at the end of the call. + */ +static PGresult * +ExecuteSqlQuery(const char *query, char **error) +{ + PGresult *res; + + res = PQexec(settings.db, query); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + *error = pstrdup(PQerrorMessage(settings.db)); + return res; +} + +/* + * Return the cached relkind quals string for tables, computing it first if we + * don't have one cached. + */ +static const char * +get_table_relkind_quals(void) +{ + if (!table_relkind_quals) + table_relkind_quals = psprintf("ANY(array['%c', '%c', '%c'])", + RELKIND_RELATION, RELKIND_MATVIEW, + RELKIND_PARTITIONED_TABLE); + return table_relkind_quals; +} + +/* + * Return the cached relkind quals string for indexes, computing it first if we + * don't have one cached. + */ +static const char * +get_index_relkind_quals(void) +{ + if (!index_relkind_quals) + index_relkind_quals = psprintf("'%c'", RELKIND_INDEX); + return index_relkind_quals; +} diff --git a/contrib/pg_amcheck/pg_amcheck.control b/contrib/pg_amcheck/pg_amcheck.control new file mode 100644 index 0000000000..395f368101 --- /dev/null +++ b/contrib/pg_amcheck/pg_amcheck.control @@ -0,0 +1,5 @@ +# pg_amcheck extension +comment = 'command-line tool for verifying relation integrity' +default_version = '1.3' +module_pathname = '$libdir/pg_amcheck' +relocatable = true diff --git a/contrib/pg_amcheck/t/001_basic.pl b/contrib/pg_amcheck/t/001_basic.pl new file mode 100644 index 0000000000..dfa0ae9e06 --- /dev/null +++ b/contrib/pg_amcheck/t/001_basic.pl @@ -0,0 +1,9 @@ +use strict; +use warnings; + +use TestLib; +use Test::More tests => 8; + +program_help_ok('pg_amcheck'); +program_version_ok('pg_amcheck'); +program_options_handling_ok('pg_amcheck'); diff --git a/contrib/pg_amcheck/t/002_nonesuch.pl b/contrib/pg_amcheck/t/002_nonesuch.pl new file mode 100644 index 0000000000..68be9c6585 --- /dev/null +++ b/contrib/pg_amcheck/t/002_nonesuch.pl @@ -0,0 +1,60 @@ +use strict; +use warnings; + +use PostgresNode; +use TestLib; +use Test::More tests => 14; + +# Test set-up +my ($node, $port); +$node = get_new_node('test'); +$node->init; +$node->start; +$port = $node->port; + +# Load the amcheck extension, upon which pg_amcheck depends +$node->safe_psql('postgres', q(CREATE EXTENSION amcheck)); + +######################################### +# Test connecting to a non-existent database + +command_fails_like( + [ 'pg_amcheck', '-p', "$port", 'qqq' ], + qr/\Qpg_amcheck: error: could not connect to server: FATAL: database "qqq" does not exist\E/, + 'connecting to a non-existent database'); + +######################################### +# Test connecting with a non-existent user + +command_fails_like( + [ 'pg_amcheck', '-p', "$port", '-U=no_such_user' ], + qr/\Qpg_amcheck: error: could not connect to server: FATAL: role "=no_such_user" does not exist\E/, + 'connecting with a non-existent user'); + +######################################### +# Test checking a non-existent schema, table, and patterns with --strict-names + +command_fails_like( + [ 'pg_amcheck', '-p', "$port", '-n', 'nonexistent' ], + qr/\Qpg_amcheck: error: no matching schemas were found\E/, + 'checking a non-existent schema'); + +command_fails_like( + [ 'pg_amcheck', '-p', "$port", '-t', 'nonexistent' ], + qr/\Qpg_amcheck: error: no matching tables were found\E/, + 'checking a non-existent table'); + +command_fails_like( + [ 'pg_amcheck', '-p', "$port", '--strict-names', '-n', 'nonexistent*' ], + qr/\Qpg_amcheck: error: no matching schemas were found for pattern\E/, + 'no matching schemas'); + +command_fails_like( + [ 'pg_amcheck', '-p', "$port", '--strict-names', '-t', 'nonexistent*' ], + qr/\Qpg_amcheck: error: no matching tables were found for pattern\E/, + 'no matching tables'); + +command_fails_like( + [ 'pg_amcheck', '-p', "$port", '--strict-names', '-i', 'nonexistent*' ], + qr/\Qpg_amcheck: error: no matching indexes were found for pattern\E/, + 'no matching indexes'); diff --git a/contrib/pg_amcheck/t/003_check.pl b/contrib/pg_amcheck/t/003_check.pl new file mode 100644 index 0000000000..4d8e61d871 --- /dev/null +++ b/contrib/pg_amcheck/t/003_check.pl @@ -0,0 +1,231 @@ +use strict; +use warnings; + +use PostgresNode; +use TestLib; +use Test::More tests => 39; + +my ($node, $port); + +# Returns the filesystem path for the named relation. +# +# Assumes the test node is running +sub relation_filepath($) +{ + my ($relname) = @_; + + my $pgdata = $node->data_dir; + my $rel = $node->safe_psql('postgres', + qq(SELECT pg_relation_filepath('$relname'))); + die "path not found for relation $relname" unless defined $rel; + return "$pgdata/$rel"; +} + +# Stops the test node, corrupts the first page of the named relation, and +# restarts the node. +# +# Assumes the node is running. +sub corrupt_first_page($) +{ + my ($relname) = @_; + my $relpath = relation_filepath($relname); + + $node->stop; + my $fh; + open($fh, '+<', $relpath); + binmode $fh; + seek($fh, 32, 0); + syswrite($fh, '\x77\x77\x77\x77', 500); + close($fh); + $node->start; +} + +# Stops the test node, unlinks the file from the filesystem that backs the +# relation, and restarts the node. +# +# Assumes the test node is running +sub remove_relation_file($) +{ + my ($relname) = @_; + my $relpath = relation_filepath($relname); + + $node->stop(); + unlink($relpath); + $node->start; +} + +# Test set-up +$node = get_new_node('test'); +$node->init; +$node->start; +$port = $node->port; + +# Load the amcheck extension, upon which pg_amcheck depends +$node->safe_psql('postgres', q(CREATE EXTENSION amcheck)); + +# Create schemas and tables for checking pg_amcheck's include +# and exclude schema and table command line options +$node->safe_psql('postgres', q( +-- We'll corrupt all indexes in s1 +CREATE SCHEMA s1; +CREATE TABLE s1.t1 (a TEXT); +CREATE TABLE s1.t2 (a TEXT); +CREATE INDEX i1 ON s1.t1(a); +CREATE INDEX i2 ON s1.t2(a); +INSERT INTO s1.t1 (a) (SELECT gs::TEXT FROM generate_series(1,10000) AS gs); +INSERT INTO s1.t2 (a) (SELECT gs::TEXT FROM generate_series(1,10000) AS gs); + +-- We'll corrupt all tables in s2 +CREATE SCHEMA s2; +CREATE TABLE s2.t1 (a TEXT); +CREATE TABLE s2.t2 (a TEXT); +CREATE INDEX i1 ON s2.t1(a); +CREATE INDEX i2 ON s2.t2(a); +INSERT INTO s2.t1 (a) (SELECT gs::TEXT FROM generate_series(1,10000) AS gs); +INSERT INTO s2.t2 (a) (SELECT gs::TEXT FROM generate_series(1,10000) AS gs); + +-- We'll corrupt all tables and indexes in s3 +CREATE SCHEMA s3; +CREATE TABLE s3.t1 (a TEXT); +CREATE TABLE s3.t2 (a TEXT); +CREATE INDEX i1 ON s3.t1(a); +CREATE INDEX i2 ON s3.t2(a); +INSERT INTO s3.t1 (a) (SELECT gs::TEXT FROM generate_series(1,10000) AS gs); +INSERT INTO s3.t2 (a) (SELECT gs::TEXT FROM generate_series(1,10000) AS gs); + +-- We'll leave everything in s4 uncorrupted +CREATE SCHEMA s4; +CREATE TABLE s4.t1 (a TEXT); +CREATE TABLE s4.t2 (a TEXT); +CREATE INDEX i1 ON s4.t1(a); +CREATE INDEX i2 ON s4.t2(a); +INSERT INTO s4.t1 (a) (SELECT gs::TEXT FROM generate_series(1,10000) AS gs); +INSERT INTO s4.t2 (a) (SELECT gs::TEXT FROM generate_series(1,10000) AS gs); +)); + +# Corrupt indexes in schema "s1" +remove_relation_file('s1.i1'); +corrupt_first_page('s1.i2'); + +# Corrupt tables in schema "s2" +remove_relation_file('s2.t1'); +corrupt_first_page('s2.t2'); + +# Corrupt tables and indexes in schema "s3" +remove_relation_file('s3.i1'); +corrupt_first_page('s3.i2'); +remove_relation_file('s3.t1'); +corrupt_first_page('s3.t2'); + +# Leave schema "s4" alone + + +# The pg_amcheck command itself should return a success exit status, even +# though tables and indexes are corrupt. An error code returned would mean the +# pg_amcheck command itself failed, for example because a connection to the +# database could not be established. +# +# For these checks, we're ignoring any corruption reported and focusing +# exclusively on the exit code from pg_amcheck. +# +$node->command_ok( + [ 'pg_amcheck', '-x', '-p', $port, 'postgres' ], + 'pg_amcheck all schemas and tables'); + +$node->command_ok( + [ 'pg_amcheck', '-p', $port, 'postgres' ], + 'pg_amcheck all schemas, tables and indexes'); + +$node->command_ok( + [ 'pg_amcheck', '-p', $port, 'postgres', '-n', 's1' ], + 'pg_amcheck all objects in schema s1'); + +$node->command_ok( + [ 'pg_amcheck', '-p', $port, 'postgres', '-n', 's*', '-t', 't1' ], + 'pg_amcheck all tables named t1 and their indexes'); + +$node->command_ok( + [ 'pg_amcheck', '-x', '-p', $port, 'postgres', '-T', 't1' ], + 'pg_amcheck all tables not named t1'); + +$node->command_ok( + [ 'pg_amcheck', '-x', '-p', $port, 'postgres', '-N', 's1', '-T', 't1' ], + 'pg_amcheck all tables not named t1 nor in schema s1'); + +# Scans of indexes in s1 should detect the specific corruption that we created +# above. For missing relation forks, we know what the error message looks +# like. For corrupted index pages, the error might vary depending on how the +# page was formatted on disk, including variations due to alignment differences +# between platforms, so we accept any non-empty error message. +# +$node->command_like( + [ 'pg_amcheck', '-x', '-p', $port, 'postgres', '-n', 's1', '-i', 'i1' ], + qr/index "i1" lacks a main relation fork/, + 'pg_amcheck index s1.i1 reports missing main relation fork'); + +$node->command_like( + [ 'pg_amcheck', '-x', '-p', $port, 'postgres', '-n', 's1', '-i', 'i2' ], + qr/.+/, # Any non-empty error message is acceptable + 'pg_amcheck index s1.s2 reports index corruption'); + + +# In schema s3, the tables and indexes are both corrupt. Ordinarily, checking +# of indexes will not be performed for corrupt tables, but the --check-corrupt +# option (-c) forces the indexes to also be checked. +# +$node->command_like( + [ 'pg_amcheck', '-x', '-c', '-p', $port, 'postgres', '-n', 's3', '-i', 'i1' ], + qr/index "i1" lacks a main relation fork/, + 'pg_amcheck index s3.i1 reports missing main relation fork'); + +$node->command_like( + [ 'pg_amcheck', '-x', '-c', '-p', $port, 'postgres', '-n', 's3', '-i', 'i2' ], + qr/.+/, # Any non-empty error message is acceptable + 'pg_amcheck index s3.s2 reports index corruption'); + + +# Check that '-x' and '-X' work as expected. Since only index corruption +# (and not table corruption) exists in s1, '-X' should give no errors, and +# '-x' should give errors about index corruption. +# +$node->command_like( + [ 'pg_amcheck', '-x', '-p', $port, 'postgres', '-n', 's1' ], + qr/.+/, # Any non-empty error message is acceptable + 'pg_amcheck over tables and indexes in schema s1 reports corruption'); + +$node->command_like( + [ 'pg_amcheck', '-X', '-p', $port, 'postgres', '-n', 's1' ], + qr/^$/, # Empty + 'pg_amcheck over only tables in schema s1 reports no corruption'); + + +# Check that table corruption is reported as expected, with or without +# index checking +# +$node->command_like( + [ 'pg_amcheck', '-x', '-p', $port, 'postgres', '-n', 's2' ], + qr/could not open file/, + 'pg_amcheck over tables in schema s2 reports table corruption'); + +$node->command_like( + [ 'pg_amcheck', '-p', $port, 'postgres', '-n', 's2' ], + qr/could not open file/, + 'pg_amcheck over tables and indexes in schema s2 reports table corruption'); + +# Check that no corruption is reported in schema s4 +$node->command_like( + [ 'pg_amcheck', '-p', $port, 'postgres', '-n', 's4' ], + qr/^$/, # Empty + 'pg_amcheck over schema s4 reports no corruption'); + +# Check that no corruption is reported if we exclude corrupt schemas +$node->command_like( + [ 'pg_amcheck', '-p', $port, 'postgres', '-N', 's1', '-N', 's2', '-N', 's3' ], + qr/^$/, # Empty + 'pg_amcheck excluding corrupt schemas reports no corruption'); + +# Check that no corruption is reported if we exclude corrupt tables +$node->command_like( + [ 'pg_amcheck', '-p', $port, 'postgres', '-T', 't1', '-T', 't2' ], + qr/^$/, # Empty + 'pg_amcheck excluding corrupt tables reports no corruption'); diff --git a/contrib/pg_amcheck/t/004_verify_heapam.pl b/contrib/pg_amcheck/t/004_verify_heapam.pl new file mode 100644 index 0000000000..1cc36b25b7 --- /dev/null +++ b/contrib/pg_amcheck/t/004_verify_heapam.pl @@ -0,0 +1,489 @@ +use strict; +use warnings; + +use PostgresNode; +use TestLib; + +use Test::More tests => 22; + +# This regression test demonstrates that the pg_amcheck binary supplied with +# the pg_amcheck contrib module correctly identifies specific kinds of +# corruption within pages. To test this, we need a mechanism to create corrupt +# pages with predictable, repeatable corruption. The postgres backend cannot +# be expected to help us with this, as its design is not consistent with the +# goal of intentionally corrupting pages. +# +# Instead, we create a table to corrupt, and with careful consideration of how +# postgresql lays out heap pages, we seek to offsets within the page and +# overwrite deliberately chosen bytes with specific values calculated to +# corrupt the page in expected ways. We then verify that pg_amcheck reports +# the corruption, and that it runs without crashing. Note that the backend +# cannot simply be started to run queries against the corrupt table, as the +# backend will crash, at least for some of the corruption types we generate. +# +# Autovacuum potentially touching the table in the background makes the exact +# behavior of this test harder to reason about. We turn it off to keep things +# simpler. We use a "belt and suspenders" approach, turning it off for the +# system generally in postgresql.conf, and turning it off specifically for the +# test table. +# +# This test depends on the table being written to the heap file exactly as we +# expect it to be, so we take care to arrange the columns of the table, and +# insert rows of the table, that give predictable sizes and locations within +# the table page. +# +# The HeapTupleHeaderData has 23 bytes of fixed size fields before the variable +# length t_bits[] array. We have exactly 3 columns in the table, so natts = 3, +# t_bits is 1 byte long, and t_hoff = MAXALIGN(23 + 1) = 24. +# +# We're not too fussy about which datatypes we use for the test, but we do care +# about some specific properties. We'd like to test both fixed size and +# varlena types. We'd like some varlena data inline and some toasted. And +# we'd like the layout of the table such that the datums land at predictable +# offsets within the tuple. We choose a structure without padding on all +# supported architectures: +# +# a BIGINT +# b TEXT +# c TEXT +# +# We always insert a 7-ascii character string into field 'b', which with a +# 1-byte varlena header gives an 8 byte inline value. We always insert a long +# text string in field 'c', long enough to force toast storage. +# +# We choose to read and write binary copies of our table's tuples, using perl's +# pack() and unpack() functions. Perl uses a packing code system in which: +# +# L = "Unsigned 32-bit Long", +# S = "Unsigned 16-bit Short", +# C = "Unsigned 8-bit Octet", +# c = "signed 8-bit octet", +# q = "signed 64-bit quadword" +# +# Each tuple in our table has a layout as follows: +# +# xx xx xx xx t_xmin: xxxx offset = 0 L +# xx xx xx xx t_xmax: xxxx offset = 4 L +# xx xx xx xx t_field3: xxxx offset = 8 L +# xx xx bi_hi: xx offset = 12 S +# xx xx bi_lo: xx offset = 14 S +# xx xx ip_posid: xx offset = 16 S +# xx xx t_infomask2: xx offset = 18 S +# xx xx t_infomask: xx offset = 20 S +# xx t_hoff: x offset = 22 C +# xx t_bits: x offset = 23 C +# xx xx xx xx xx xx xx xx 'a': xxxxxxxx offset = 24 q +# xx xx xx xx xx xx xx xx 'b': xxxxxxxx offset = 32 Cccccccc +# xx xx xx xx xx xx xx xx 'c': xxxxxxxx offset = 40 SSSS +# xx xx xx xx xx xx xx xx : xxxxxxxx ...continued SSSS +# xx xx : xx ...continued S +# +# We could choose to read and write columns 'b' and 'c' in other ways, but +# it is convenient enough to do it this way. We define packing code +# constants here, where they can be compared easily against the layout. + +use constant HEAPTUPLE_PACK_CODE => 'LLLSSSSSCCqCcccccccSSSSSSSSS'; +use constant HEAPTUPLE_PACK_LENGTH => 58; # Total size + +# Read a tuple of our table from a heap page. +# +# Takes an open filehandle to the heap file, and the offset of the tuple. +# +# Rather than returning the binary data from the file, unpacks the data into a +# perl hash with named fields. These fields exactly match the ones understood +# by write_tuple(), below. Returns a reference to this hash. +# +sub read_tuple ($$) +{ + my ($fh, $offset) = @_; + my ($buffer, %tup); + seek($fh, $offset, 0); + sysread($fh, $buffer, HEAPTUPLE_PACK_LENGTH); + + @_ = unpack(HEAPTUPLE_PACK_CODE, $buffer); + %tup = (t_xmin => shift, + t_xmax => shift, + t_field3 => shift, + bi_hi => shift, + bi_lo => shift, + ip_posid => shift, + t_infomask2 => shift, + t_infomask => shift, + t_hoff => shift, + t_bits => shift, + a => shift, + b_header => shift, + b_body1 => shift, + b_body2 => shift, + b_body3 => shift, + b_body4 => shift, + b_body5 => shift, + b_body6 => shift, + b_body7 => shift, + c1 => shift, + c2 => shift, + c3 => shift, + c4 => shift, + c5 => shift, + c6 => shift, + c7 => shift, + c8 => shift, + c9 => shift); + # Stitch together the text for column 'b' + $tup{b} = join('', map { chr($tup{"b_body$_"}) } (1..7)); + return \%tup; +} + +# Write a tuple of our table to a heap page. +# +# Takes an open filehandle to the heap file, the offset of the tuple, and a +# reference to a hash with the tuple values, as returned by read_tuple(). +# Writes the tuple fields from the hash into the heap file. +# +# The purpose of this function is to write a tuple back to disk with some +# subset of fields modified. The function does no error checking. Use +# cautiously. +# +sub write_tuple($$$) +{ + my ($fh, $offset, $tup) = @_; + my $buffer = pack(HEAPTUPLE_PACK_CODE, + $tup->{t_xmin}, + $tup->{t_xmax}, + $tup->{t_field3}, + $tup->{bi_hi}, + $tup->{bi_lo}, + $tup->{ip_posid}, + $tup->{t_infomask2}, + $tup->{t_infomask}, + $tup->{t_hoff}, + $tup->{t_bits}, + $tup->{a}, + $tup->{b_header}, + $tup->{b_body1}, + $tup->{b_body2}, + $tup->{b_body3}, + $tup->{b_body4}, + $tup->{b_body5}, + $tup->{b_body6}, + $tup->{b_body7}, + $tup->{c1}, + $tup->{c2}, + $tup->{c3}, + $tup->{c4}, + $tup->{c5}, + $tup->{c6}, + $tup->{c7}, + $tup->{c8}, + $tup->{c9}); + seek($fh, $offset, 0); + syswrite($fh, $buffer, HEAPTUPLE_PACK_LENGTH); + return; +} + +# Set umask so test directories and files are created with default permissions +umask(0077); + +# Set up the node. Once we create and corrupt the table, +# autovacuum workers visiting the table could crash the backend. +# Disable autovacuum so that won't happen. +my $node = get_new_node('test'); +$node->init; +$node->append_conf('postgresql.conf', 'autovacuum=off'); + +# Start the node and load the extensions. We depend on both +# amcheck and pageinspect for this test. +$node->start; +my $port = $node->port; +my $pgdata = $node->data_dir; +$node->safe_psql('postgres', "CREATE EXTENSION amcheck"); +$node->safe_psql('postgres', "CREATE EXTENSION pageinspect"); + +# Get a non-zero datfrozenxid +$node->safe_psql('postgres', qq(VACUUM FREEZE)); + +# Create the test table with precisely the schema that our corruption function +# expects. +$node->safe_psql( + 'postgres', qq( + CREATE TABLE public.test (a BIGINT, b TEXT, c TEXT); + ALTER TABLE public.test SET (autovacuum_enabled=false); + ALTER TABLE public.test ALTER COLUMN c SET STORAGE EXTERNAL; + CREATE INDEX test_idx ON public.test(a, b); + )); + +# We want (0 < datfrozenxid < test.relfrozenxid). To achieve this, we freeze +# an otherwise unused table, public.junk, prior to inserting data and freezing +# public.test +$node->safe_psql( + 'postgres', qq( + CREATE TABLE public.junk AS SELECT 'junk'::TEXT AS junk_column; + ALTER TABLE public.junk SET (autovacuum_enabled=false); + VACUUM FREEZE public.junk + )); + +my $rel = $node->safe_psql('postgres', qq(SELECT pg_relation_filepath('public.test'))); +my $relpath = "$pgdata/$rel"; + +# Insert data and freeze public.test +use constant ROWCOUNT => 16; +$node->safe_psql('postgres', qq( + INSERT INTO public.test (a, b, c) + VALUES ( + 12345678, + 'abcdefg', + repeat('w', 10000) + ); + VACUUM FREEZE public.test + )) for (1..ROWCOUNT); + +my $relfrozenxid = $node->safe_psql('postgres', + q(select relfrozenxid from pg_class where relname = 'test')); +my $datfrozenxid = $node->safe_psql('postgres', + q(select datfrozenxid from pg_database where datname = 'postgres')); + +# Find where each of the tuples is located on the page. +my @lp_off; +for my $tup (0..ROWCOUNT-1) +{ + push (@lp_off, $node->safe_psql('postgres', qq( +select lp_off from heap_page_items(get_raw_page('test', 'main', 0)) + offset $tup limit 1))); +} + +# Check that pg_amcheck runs against the uncorrupted table without error. +$node->command_ok(['pg_amcheck', '-p', $port, 'postgres'], + 'pg_amcheck test table, prior to corruption'); + +# Check that pg_amcheck runs against the uncorrupted table and index without error. +$node->command_ok(['pg_amcheck', '-p', $port, 'postgres'], + 'pg_amcheck test table and index, prior to corruption'); + +$node->stop; + +# Sanity check that our 'test' table has a relfrozenxid newer than the +# datfrozenxid for the database, and that the datfrozenxid is greater than the +# first normal xid. We rely on these invariants in some of our tests. +if ($datfrozenxid <= 3 || $datfrozenxid >= $relfrozenxid) +{ + fail('Xid thresholds not as expected'); + $node->clean_node; + exit; +} + +# Some #define constants from access/htup_details.h for use while corrupting. +use constant HEAP_HASNULL => 0x0001; +use constant HEAP_XMAX_LOCK_ONLY => 0x0080; +use constant HEAP_XMIN_COMMITTED => 0x0100; +use constant HEAP_XMIN_INVALID => 0x0200; +use constant HEAP_XMAX_COMMITTED => 0x0400; +use constant HEAP_XMAX_INVALID => 0x0800; +use constant HEAP_NATTS_MASK => 0x07FF; +use constant HEAP_XMAX_IS_MULTI => 0x1000; +use constant HEAP_KEYS_UPDATED => 0x2000; + +# Helper functions +sub header +{ + my ($blkno, $offnum, $attnum) = @_; + qr/\(relname=test,blkno=$blkno,offnum=$offnum,attnum=$attnum\)\s+/ms; +} + +# Corrupt the tuples, one type of corruption per tuple. Some types of +# corruption cause verify_heapam to skip to the next tuple without +# performing any remaining checks, so we can't exercise the system properly if +# we focus all our corruption on a single tuple. +# +my @expected; +my $file; +open($file, '+<', $relpath); +binmode $file; + +for (my $tupidx = 0; $tupidx < ROWCOUNT; $tupidx++) +{ + my $offnum = $tupidx + 1; # offnum is 1-based, not zero-based + my $offset = $lp_off[$tupidx]; + my $tup = read_tuple($file, $offset); + + # Sanity-check that the data appears on the page where we expect. + if ($tup->{a} ne '12345678' || $tup->{b} ne 'abcdefg') + { + fail('Page layout differs from our expectations'); + $node->clean_node; + exit; + } + + my $header = header(0, $offnum, ''); + if ($offnum == 1) + { + # Corruptly set xmin < relfrozenxid + my $xmin = $relfrozenxid - 1; + $tup->{t_xmin} = $xmin; + $tup->{t_infomask} &= ~HEAP_XMIN_COMMITTED; + $tup->{t_infomask} &= ~HEAP_XMIN_INVALID; + + # Expected corruption report + push @expected, + qr/${header}xmin $xmin precedes relation freeze threshold 0:\d+/; + } + if ($offnum == 2) + { + # Corruptly set xmin < datfrozenxid + my $xmin = 3; + $tup->{t_xmin} = $xmin; + $tup->{t_infomask} &= ~HEAP_XMIN_COMMITTED; + $tup->{t_infomask} &= ~HEAP_XMIN_INVALID; + + push @expected, + qr/${$header}xmin $xmin precedes oldest valid transaction ID 0:\d+/; + } + elsif ($offnum == 3) + { + # Corruptly set xmin < datfrozenxid, further back, noting circularity + # of xid comparison. For a new cluster with epoch = 0, the corrupt + # xmin will be interpreted as in the future + $tup->{t_xmin} = 4026531839; + $tup->{t_infomask} &= ~HEAP_XMIN_COMMITTED; + $tup->{t_infomask} &= ~HEAP_XMIN_INVALID; + + push @expected, + qr/${$header}xmin 4026531839 equals or exceeds next valid transaction ID 0:\d+/; + } + elsif ($offnum == 4) + { + # Corruptly set xmax < relminmxid; + $tup->{t_xmax} = 4026531839; + $tup->{t_infomask} &= ~HEAP_XMAX_INVALID; + + push @expected, + qr/${$header}xmax 4026531839 equals or exceeds next valid transaction ID 0:\d+/; + } + elsif ($offnum == 5) + { + # Corrupt the tuple t_hoff, but keep it aligned properly + $tup->{t_hoff} += 128; + + push @expected, + qr/${$header}data begins at offset 152 beyond the tuple length 58/, + qr/${$header}tuple data should begin at byte 24, but actually begins at byte 152 \(3 attributes, no nulls\)/; + } + elsif ($offnum == 6) + { + # Corrupt the tuple t_hoff, wrong alignment + $tup->{t_hoff} += 3; + + push @expected, + qr/${$header}tuple data should begin at byte 24, but actually begins at byte 27 \(3 attributes, no nulls\)/; + } + elsif ($offnum == 7) + { + # Corrupt the tuple t_hoff, underflow but correct alignment + $tup->{t_hoff} -= 8; + + push @expected, + qr/${$header}tuple data should begin at byte 24, but actually begins at byte 16 \(3 attributes, no nulls\)/; + } + elsif ($offnum == 8) + { + # Corrupt the tuple t_hoff, underflow and wrong alignment + $tup->{t_hoff} -= 3; + + push @expected, + qr/${$header}tuple data should begin at byte 24, but actually begins at byte 21 \(3 attributes, no nulls\)/; + } + elsif ($offnum == 9) + { + # Corrupt the tuple to look like it has lots of attributes, not just 3 + $tup->{t_infomask2} |= HEAP_NATTS_MASK; + + push @expected, + qr/${$header}number of attributes 2047 exceeds maximum expected for table 3/; + } + elsif ($offnum == 10) + { + # Corrupt the tuple to look like it has lots of attributes, some of + # them null. This falsely creates the impression that the t_bits + # array is longer than just one byte, but t_hoff still says otherwise. + $tup->{t_infomask} |= HEAP_HASNULL; + $tup->{t_infomask2} |= HEAP_NATTS_MASK; + $tup->{t_bits} = 0xAA; + + push @expected, + qr/${$header}tuple data should begin at byte 280, but actually begins at byte 24 \(2047 attributes, has nulls\)/; + } + elsif ($offnum == 11) + { + # Same as above, but this time t_hoff plays along + $tup->{t_infomask} |= HEAP_HASNULL; + $tup->{t_infomask2} |= (HEAP_NATTS_MASK & 0x40); + $tup->{t_bits} = 0xAA; + $tup->{t_hoff} = 32; + + push @expected, + qr/${$header}number of attributes 67 exceeds maximum expected for table 3/; + } + elsif ($offnum == 12) + { + # Corrupt the bits in column 'b' 1-byte varlena header + $tup->{b_header} = 0x80; + + $header = header(0, $offnum, 1); + push @expected, + qr/${header}attribute 1 with length 4294967295 ends at offset 416848000 beyond total tuple length 58/; + } + elsif ($offnum == 13) + { + # Corrupt the bits in column 'c' toast pointer + $tup->{c6} = 41; + $tup->{c7} = 41; + + $header = header(0, $offnum, 2); + push @expected, + qr/${header}final toast chunk number 0 differs from expected value 6/, + qr/${header}toasted value for attribute 2 missing from toast table/; + } + elsif ($offnum == 14) + { + # Set both HEAP_XMAX_LOCK_ONLY and HEAP_KEYS_UPDATED + $tup->{t_infomask} |= HEAP_XMAX_LOCK_ONLY; + $tup->{t_infomask2} |= HEAP_KEYS_UPDATED; + + push @expected, + qr/${header}tuple is marked as only locked, but also claims key columns were updated/; + } + elsif ($offnum == 15) + { + # Set both HEAP_XMAX_COMMITTED and HEAP_XMAX_IS_MULTI + $tup->{t_infomask} |= HEAP_XMAX_COMMITTED; + $tup->{t_infomask} |= HEAP_XMAX_IS_MULTI; + $tup->{t_xmax} = 4; + + push @expected, + qr/${header}multitransaction ID 4 equals or exceeds next valid multitransaction ID 1/; + } + elsif ($offnum == 16) # Last offnum must equal ROWCOUNT + { + # Set both HEAP_XMAX_COMMITTED and HEAP_XMAX_IS_MULTI + $tup->{t_infomask} |= HEAP_XMAX_COMMITTED; + $tup->{t_infomask} |= HEAP_XMAX_IS_MULTI; + $tup->{t_xmax} = 4000000000; + + push @expected, + qr/${header}multitransaction ID 4000000000 precedes relation minimum multitransaction ID threshold 1/; + } + write_tuple($file, $offset, $tup); +} +close($file); +$node->start; + +# Run pg_amcheck against the corrupt table with epoch=0, comparing actual +# corruption messages against the expected messages +$node->command_checks_all( + ['pg_amcheck', '--check-toast', '--skip-indexes', '-p', $port, 'postgres'], + 0, + [ @expected ], + [ qr/^$/ ], + 'Expected corruption message output'); + +$node->teardown_node; +$node->clean_node; diff --git a/contrib/pg_amcheck/t/005_opclass_damage.pl b/contrib/pg_amcheck/t/005_opclass_damage.pl new file mode 100644 index 0000000000..fdbb1ea402 --- /dev/null +++ b/contrib/pg_amcheck/t/005_opclass_damage.pl @@ -0,0 +1,52 @@ +# This regression test checks the behavior of the btree validation in the +# presence of breaking sort order changes. +# +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More tests => 6; + +my $node = get_new_node('test'); +$node->init; +$node->start; + +# Create a custom operator class and an index which uses it. +$node->safe_psql('postgres', q( + CREATE EXTENSION amcheck; + + CREATE FUNCTION int4_asc_cmp (a int4, b int4) RETURNS int LANGUAGE sql AS $$ + SELECT CASE WHEN $1 = $2 THEN 0 WHEN $1 > $2 THEN 1 ELSE -1 END; $$; + + CREATE OPERATOR CLASS int4_fickle_ops FOR TYPE int4 USING btree AS + OPERATOR 1 < (int4, int4), OPERATOR 2 <= (int4, int4), + OPERATOR 3 = (int4, int4), OPERATOR 4 >= (int4, int4), + OPERATOR 5 > (int4, int4), FUNCTION 1 int4_asc_cmp(int4, int4); + + CREATE TABLE int4tbl (i int4); + INSERT INTO int4tbl (SELECT * FROM generate_series(1,1000) gs); + CREATE INDEX fickleidx ON int4tbl USING btree (i int4_fickle_ops); +)); + +# We have not yet broken the index, so we should get no corruption +$node->command_like( + [ 'pg_amcheck', '-p', $node->port, 'postgres' ], + qr/^$/, + 'pg_amcheck all schemas, tables and indexes reports no corruption'); + +# Change the operator class to use a function which sorts in a different +# order to corrupt the btree index +$node->safe_psql('postgres', q( + CREATE FUNCTION int4_desc_cmp (int4, int4) RETURNS int LANGUAGE sql AS $$ + SELECT CASE WHEN $1 = $2 THEN 0 WHEN $1 > $2 THEN -1 ELSE 1 END; $$; + UPDATE pg_catalog.pg_amproc + SET amproc = 'int4_desc_cmp'::regproc + WHERE amproc = 'int4_asc_cmp'::regproc +)); + +# Index corruption should now be reported +$node->command_like( + [ 'pg_amcheck', '-x', '-p', $node->port, 'postgres' ], + qr/item order invariant violated for index "fickleidx"/, + 'pg_amcheck all schemas, tables and indexes reports fickleidx corruption' +); diff --git a/doc/src/sgml/contrib.sgml b/doc/src/sgml/contrib.sgml index 4e833d79ef..1efca8adc4 100644 --- a/doc/src/sgml/contrib.sgml +++ b/doc/src/sgml/contrib.sgml @@ -119,6 +119,7 @@ CREATE EXTENSION module_name; &oldsnapshot; &pageinspect; &passwordcheck; + &pgamcheck; &pgbuffercache; &pgcrypto; &pgfreespacemap; diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml index 38e8aa0bbf..a4e1b28b38 100644 --- a/doc/src/sgml/filelist.sgml +++ b/doc/src/sgml/filelist.sgml @@ -133,6 +133,7 @@ + diff --git a/doc/src/sgml/pgamcheck.sgml b/doc/src/sgml/pgamcheck.sgml new file mode 100644 index 0000000000..3e059e7753 --- /dev/null +++ b/doc/src/sgml/pgamcheck.sgml @@ -0,0 +1,228 @@ + + + + pg_amcheck + + + pg_amcheck + + + + The pg_amcheck module provides a command line interface + to the corruption checking functionality. + + + + pg_amcheck is a regular + PostgreSQL client application. You can perform + corruption checks from any remote host that has access to the database + connecting as a user with sufficient privileges to check tables and indexes. + Currently, this requires execute privileges on 's + bt_index_parent_check and verify_heapam + functions. + + + +pg_amcheck [OPTION]... [DBNAME [USERNAME]] + General options: + -V, --version output version information, then exit + -?, --help show this help, then exit + -s, --strict-names require include patterns to match at least one entity each + -o, --on-error-stop stop checking at end of first corrupt page + + Schema checking options: + -n, --schema=PATTERN check relations in the specified schema(s) only + -N, --exclude-schema=PATTERN do NOT check relations in the specified schema(s) + + Table checking options: + -t, --table=PATTERN check the specified table(s) only + -T, --exclude-table=PATTERN do NOT check the specified table(s) + -b, --startblock begin checking table(s) at the given starting block number + -e, --endblock check table(s) only up to the given ending block number + -f, --skip-all-frozen do NOT check blocks marked as all-frozen + -v, --skip-all-visible do NOT check blocks marked as all-visible + + TOAST table checking options: + -z, --check-toast check associated toast tables and toast indexes + -Z, --skip-toast do NOT check associated toast tables and toast indexes + -B, --toast-startblock begin checking toast table(s) at the given starting block + -E, --toast-endblock check toast table(s) only up to the given ending block + + Index checking options: + -x, --check-indexes check btree indexes associated with tables being checked + -X, --skip-indexes do NOT check any btree indexes + -i, --index=PATTERN check the specified index(es) only + -I, --exclude-index=PATTERN do NOT check the specified index(es) + -c, --check-corrupt check indexes even if their associated table is corrupt + -C, --skip-corrupt do NOT check indexes if their associated table is corrupt + -a, --heapallindexed check index tuples against the table tuples + -A, --no-heapallindexed do NOT check index tuples against the table tuples + -r, --rootdescend search from the root page for each index tuple + -R, --no-rootdescend do NOT search from the root page for each index tuple + + Connection options: + -d, --dbname=DBNAME database name to connect to + -h, --host=HOSTNAME database server host or socket directory + -p, --port=PORT database server port + -U, --username=USERNAME database user name + -w, --no-password never prompt for password + -W, --password force password prompt (should happen automatically) + + + + Options + + + To specify which database server pg_amcheck should + contact, use the command line options or + and or + . The default host is the local host + or whatever your PGHOST environment variable specifies. + Similarly, the default port is indicated by the PGPORT + environment variable or, failing that, by the compiled-in default. + + + + Like any other PostgreSQL client application, + pg_amcheck will by default connect with the + database user name that is equal to the current operating system user name. + To override this, either specify the option or set the + environment variable PGUSER. Remember that + pg_amcheck connections are subject to the normal + client authentication mechanisms (which are described in ). + + + + To restrict checking of tables and indexes to specific schemas, specify the + or option with a pattern. + To exclude checking of tables and indexes within specific schemas, specify + the or option with + a pattern. + + + + To specify which tables are checked, specify the + or option with a pattern. + To exclude checking of tables, specify the + or option with a + pattern. + + + + To check indexes associated with checked tables, specify the + or option. Only + indexes on tables which are being checked will themselves be checked. To + check all indexes in a database, all tables on which the indexes exist must + also be checked. This restriction may be relaxed in the future. + + + + To restrict the range of blocks within a table that are checked, specify the + or and/or + or options with numeric + values for the starting and ending block numbers. Although these options + make the most sense when applied to a single table, if specified along with + options that select multiple tables, each table check will be restricted to + the specified blocks. If is omitted, checking + begins with the first block. If is omitted, + checking continues to the end of the relation. + + + + Some users may wish to periodically check tables without incurring the cost + of rechecking older table blocks, presumably because those blocks have + already been checked in the past. There is at present no perfect way to do + this. Although the and + options can be used to restrict blocks, the user is not expected to have + perfect knowledge of which blocks have already been checked, and in any + event, some blocks that were previously checked may have been subject to + modification since the last check. As an approximation to the desired + functionality, one can specify the + or option, or + alternatively the + or option to skip + blocks marked in the visibility map as all-frozen or all-visible, + respectively. + + + + + Example Usage + + + For table corruption, each detected corruption is reported on two lines, the + first line shows the location and the second line shows a message describing + the problem. + + + + Checking an entire database which contains one corrupt table, "mytable", + along with the output: + + + +% pg_amcheck --check-toast --skip-indexes mydb +(relname=mytable,blkno=17,offnum=12,attnum=) +xmin 4294967295 precedes relation freeze threshold 17:1134217582 +(relname=mytable,blkno=960,offnum=4,attnum=) +data begins at offset 152 beyond the tuple length 58 +(relname=mytable,blkno=960,offnum=4,attnum=) +tuple data should begin at byte 24, but actually begins at byte 152 (3 attributes, no nulls) +(relname=mytable,blkno=960,offnum=5,attnum=) +tuple data should begin at byte 24, but actually begins at byte 27 (3 attributes, no nulls) +(relname=mytable,blkno=960,offnum=6,attnum=) +tuple data should begin at byte 24, but actually begins at byte 16 (3 attributes, no nulls) +(relname=mytable,blkno=960,offnum=7,attnum=) +tuple data should begin at byte 24, but actually begins at byte 21 (3 attributes, no nulls) +(relname=mytable,blkno=1147,offnum=2,attnum=) +number of attributes 2047 exceeds maximum expected for table 3 +(relname=mytable,blkno=1147,offnum=10,attnum=) +tuple data should begin at byte 280, but actually begins at byte 24 (2047 attributes, has nulls) +(relname=mytable,blkno=1147,offnum=15,attnum=) +number of attributes 67 exceeds maximum expected for table 3 +(relname=mytable,blkno=1147,offnum=16,attnum=1) +attribute 1 with length 4294967295 ends at offset 416848000 beyond total tuple length 58 +(relname=mytable,blkno=1147,offnum=18,attnum=2) +final toast chunk number 0 differs from expected value 6 +(relname=mytable,blkno=1147,offnum=19,attnum=2) +toasted value for attribute 2 missing from toast table +(relname=mytable,blkno=1147,offnum=21,attnum=) +tuple is marked as only locked, but also claims key columns were updated +(relname=mytable,blkno=1147,offnum=22,attnum=) +multitransaction ID 1775655 is from before relation cutoff 2355572 + + + + For index corruption, the output is more free-form, and may span differing + numbers of lines per corruption detected. + + + + Checking an entire database which contains one corrupt index, + "corrupt_index", with corruption in the page header, along with the output: + + + +% pg_amcheck --check-toast --check-indexes --schema=public --table=table_with_corrupt_index mydb +index check failed for index corrupt_index of table table_with_corrupt_index: +ERROR: XX002: index "corrupt_index" is not a btree +LOCATION: _bt_getmeta, nbtpage.c:152 + + + + Checking again after rebuilding the index but corrupting the contents, + along with the output: + + + +% pg_amcheck --check-toast --check-indexes --schema=public --table=table_with_corrupt_index mydb +index check failed for index corrupt_index of table table_with_corrupt_index: +ERROR: XX002: index tuple size does not equal lp_len in index "corrupt_index" +DETAIL: Index tid=(39,49) tuple size=3373 lp_len=24 page lsn=0/2B548C0. +HINT: This could be a torn page problem. +LOCATION: bt_target_page_check, verify_nbtree.c:1125 + + + + diff --git a/src/tools/msvc/Mkvcbuild.pm b/src/tools/msvc/Mkvcbuild.pm index 90594bd41b..ec87fb85b3 100644 --- a/src/tools/msvc/Mkvcbuild.pm +++ b/src/tools/msvc/Mkvcbuild.pm @@ -33,9 +33,9 @@ my @unlink_on_exit; # Set of variables for modules in contrib/ and src/test/modules/ my $contrib_defines = { 'refint' => 'REFINT_VERBOSE' }; -my @contrib_uselibpq = ('dblink', 'oid2name', 'postgres_fdw', 'vacuumlo'); -my @contrib_uselibpgport = ('oid2name', 'pg_standby', 'vacuumlo'); -my @contrib_uselibpgcommon = ('oid2name', 'pg_standby', 'vacuumlo'); +my @contrib_uselibpq = ('dblink', 'oid2name', 'pg_amcheck', 'postgres_fdw', 'vacuumlo'); +my @contrib_uselibpgport = ('oid2name', 'pg_amcheck', 'pg_standby', 'vacuumlo'); +my @contrib_uselibpgcommon = ('oid2name', 'pg_amcheck', 'pg_standby', 'vacuumlo'); my $contrib_extralibs = undef; my $contrib_extraincludes = { 'dblink' => ['src/backend'] }; my $contrib_extrasource = { diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index ff853634bc..2408bb2bf6 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -102,6 +102,7 @@ AlterUserMappingStmt AlteredTableInfo AlternativeSubPlan AlternativeSubPlanState +AmCheckSettings AnalyzeAttrComputeStatsFunc AnalyzeAttrFetchFunc AnalyzeForeignTable_function @@ -403,6 +404,7 @@ ConfigData ConfigVariable ConnCacheEntry ConnCacheKey +ConnectOptions ConnStatusType ConnType ConnectionStateEnum -- 2.21.1 (Apple Git-122.3)