From eaa7e5635d1880131a3daa75d7b7ac3bb50d54e8 Mon Sep 17 00:00:00 2001 From: John Naylor Date: Sun, 11 Mar 2018 17:50:20 +0700 Subject: [PATCH] Use default values on more catalogs When the default values mechanism was introduced in commit 49c784ece, it could only be used for pg_attribute. Add default values to some other catalogs. More could be done here, but this is enough for a first pass. --- src/backend/catalog/Catalog.pm | 11 ++++- src/backend/catalog/README | 14 ++++++ src/include/catalog/pg_aggregate.h | 38 ++++++++--------- src/include/catalog/pg_amop.h | 31 ++++++++++---- src/include/catalog/pg_opclass.h | 17 ++++++-- src/include/catalog/pg_operator.h | 43 +++++++++++++------ src/include/catalog/pg_opfamily.h | 8 +++- src/include/catalog/pg_proc.h | 87 +++++++++++++++++++++++++------------- src/include/catalog/pg_type.h | 47 +++++++++++--------- src/include/catalog/rewrite_dat.pl | 34 +++++++++++++++ 10 files changed, 233 insertions(+), 97 deletions(-) diff --git a/src/backend/catalog/Catalog.pm b/src/backend/catalog/Catalog.pm index 1ec79c9..0a88c08 100644 --- a/src/backend/catalog/Catalog.pm +++ b/src/backend/catalog/Catalog.pm @@ -203,8 +203,9 @@ sub ParseData my ($input_file, $schema, $preserve_formatting) = @_; open(my $ifd, '<', $input_file) || die "$input_file: $!"; - $input_file =~ /\w+\.dat$/ + $input_file =~ /(\w+)\.dat$/ or die "Input file needs to be a .dat file.\n"; + my $catname = $1; my $data = []; my $prev_blank = 0; @@ -247,6 +248,14 @@ sub ParseData { die "Error parsing $_\n$!"; } + + # Expand tuples to their full representation. + my $error = AddDefaultValues($datum, $schema); + if ($error) + { + print "Failed to form full tuple for $catname\n"; + die $error; + } } else { diff --git a/src/backend/catalog/README b/src/backend/catalog/README index 84e6e07..aa0785e 100644 --- a/src/backend/catalog/README +++ b/src/backend/catalog/README @@ -59,6 +59,20 @@ quotes, since we don't know what kind of characters will be substituted. line within the curly brackets. This is done automatically during rewriting so their placement is not crucial during development. +- If the .h file specifies a default value for a column, and a data entry +has that same value, rewrite_dat.pl will omit it from the data file. This +keeps the data representation compact. + +- If you want to add a new default value, you must change the relevant .h +file to use the new default, and then run +"perl -I ../../backend/catalog rewrite_dat.pl pg_foo.dat". +If you want to change an existing default value, you must first run +rewrite_dat.pl with the "--expand" argument before proceeding as above. + +-If you want to add a new method of making the data representation +smaller, you must implement it in rewrite_dat.pl and also teach +Catalog::ParseData() how to expand the data back into the full representation. + - Some catalogs require that OIDs be preallocated to tuples because of cross-references from other pre-loaded tuples. For example, pg_type contains pointers into pg_proc (e.g., pg_type.typinput), and pg_proc diff --git a/src/include/catalog/pg_aggregate.h b/src/include/catalog/pg_aggregate.h index 97d8f22..74f2b27 100644 --- a/src/include/catalog/pg_aggregate.h +++ b/src/include/catalog/pg_aggregate.h @@ -55,29 +55,29 @@ CATALOG(pg_aggregate,2600) BKI_WITHOUT_OIDS { regproc aggfnoid; - char aggkind; - int16 aggnumdirectargs; + char aggkind BKI_DEFAULT(n); + int16 aggnumdirectargs BKI_DEFAULT(0); regproc aggtransfn; - regproc aggfinalfn; - regproc aggcombinefn; - regproc aggserialfn; - regproc aggdeserialfn; - regproc aggmtransfn; - regproc aggminvtransfn; - regproc aggmfinalfn; - bool aggfinalextra; - bool aggmfinalextra; - char aggfinalmodify; - char aggmfinalmodify; - Oid aggsortop; + regproc aggfinalfn BKI_DEFAULT(-); + regproc aggcombinefn BKI_DEFAULT(-); + regproc aggserialfn BKI_DEFAULT(-); + regproc aggdeserialfn BKI_DEFAULT(-); + regproc aggmtransfn BKI_DEFAULT(-); + regproc aggminvtransfn BKI_DEFAULT(-); + regproc aggmfinalfn BKI_DEFAULT(-); + bool aggfinalextra BKI_DEFAULT(f); + bool aggmfinalextra BKI_DEFAULT(f); + char aggfinalmodify BKI_DEFAULT(r); + char aggmfinalmodify BKI_DEFAULT(r); + Oid aggsortop BKI_DEFAULT(0); Oid aggtranstype; - int32 aggtransspace; - Oid aggmtranstype; - int32 aggmtransspace; + int32 aggtransspace BKI_DEFAULT(0); + Oid aggmtranstype BKI_DEFAULT(0); + int32 aggmtransspace BKI_DEFAULT(0); #ifdef CATALOG_VARLEN /* variable-length fields start here */ - text agginitval; - text aggminitval; + text agginitval BKI_DEFAULT(_null_); + text aggminitval BKI_DEFAULT(_null_); #endif } FormData_pg_aggregate; diff --git a/src/include/catalog/pg_amop.h b/src/include/catalog/pg_amop.h index 68ee321..e638c30 100644 --- a/src/include/catalog/pg_amop.h +++ b/src/include/catalog/pg_amop.h @@ -55,14 +55,29 @@ CATALOG(pg_amop,2602) { - Oid amopfamily; /* the index opfamily this entry is for */ - Oid amoplefttype; /* operator's left input data type */ - Oid amoprighttype; /* operator's right input data type */ - int16 amopstrategy; /* operator strategy number */ - char amoppurpose; /* is operator for 's'earch or 'o'rdering? */ - Oid amopopr; /* the operator's pg_operator OID */ - Oid amopmethod; /* the index access method this entry is for */ - Oid amopsortfamily; /* ordering opfamily OID, or 0 if search op */ + /* the index opfamily this entry is for */ + Oid amopfamily; + + /* operator's left input data type */ + Oid amoplefttype; + + /* operator's right input data type */ + Oid amoprighttype; + + /* operator strategy number */ + int16 amopstrategy; + + /* is operator for 's'earch or 'o'rdering? */ + char amoppurpose BKI_DEFAULT(s); + + /* the operator's pg_operator OID */ + Oid amopopr; + + /* the index access method this entry is for */ + Oid amopmethod; + + /* ordering opfamily OID, or 0 if search op */ + Oid amopsortfamily BKI_DEFAULT(0); } FormData_pg_amop; /* ---------------- diff --git a/src/include/catalog/pg_opclass.h b/src/include/catalog/pg_opclass.h index 1f64710..6d0fed8 100644 --- a/src/include/catalog/pg_opclass.h +++ b/src/include/catalog/pg_opclass.h @@ -52,12 +52,21 @@ CATALOG(pg_opclass,2616) { Oid opcmethod; /* index access method opclass is for */ NameData opcname; /* name of this opclass */ - Oid opcnamespace; /* namespace of this opclass */ - Oid opcowner; /* opclass owner */ + + /* namespace of this opclass */ + Oid opcnamespace BKI_DEFAULT(PGNSP); + + /* opclass owner */ + Oid opcowner BKI_DEFAULT(PGUID); + Oid opcfamily; /* containing operator family */ Oid opcintype; /* type of data indexed by opclass */ - bool opcdefault; /* T if opclass is default for opcintype */ - Oid opckeytype; /* type of data in index, or InvalidOid */ + + /* T if opclass is default for opcintype */ + bool opcdefault BKI_DEFAULT(t); + + /* type of data in index, or InvalidOid */ + Oid opckeytype BKI_DEFAULT(0); } FormData_pg_opclass; /* ---------------- diff --git a/src/include/catalog/pg_operator.h b/src/include/catalog/pg_operator.h index a0b20f2..e111914 100644 --- a/src/include/catalog/pg_operator.h +++ b/src/include/catalog/pg_operator.h @@ -24,26 +24,41 @@ /* ---------------- * pg_operator definition. cpp turns this into * typedef struct FormData_pg_operator + * + * oprname name of operator + * oprnamespace OID of namespace containing this oper + * oprowner operator owner + * oprkind 'l', 'r', or 'b' + * oprcanmerge can be used in merge join? + * oprcanhash can be used in hash join? + * oprleft left arg type, or 0 if 'l' oprkind + * oprright right arg type, or 0 if 'r' oprkind + * oprresult result datatype + * oprcom OID of commutator oper, or 0 if none + * oprnegate OID of negator oper, or 0 if none + * oprcode OID of underlying function + * oprrest OID of restriction estimator, or 0 + * oprjoin OID of join estimator, or 0 * ---------------- */ #define OperatorRelationId 2617 CATALOG(pg_operator,2617) { - NameData oprname; /* name of operator */ - Oid oprnamespace; /* OID of namespace containing this oper */ - Oid oprowner; /* operator owner */ - char oprkind; /* 'l', 'r', or 'b' */ - bool oprcanmerge; /* can be used in merge join? */ - bool oprcanhash; /* can be used in hash join? */ - Oid oprleft; /* left arg type, or 0 if 'l' oprkind */ - Oid oprright; /* right arg type, or 0 if 'r' oprkind */ - Oid oprresult; /* result datatype */ - Oid oprcom; /* OID of commutator oper, or 0 if none */ - Oid oprnegate; /* OID of negator oper, or 0 if none */ - regproc oprcode; /* OID of underlying function */ - regproc oprrest; /* OID of restriction estimator, or 0 */ - regproc oprjoin; /* OID of join estimator, or 0 */ + NameData oprname; + Oid oprnamespace BKI_DEFAULT(PGNSP); + Oid oprowner BKI_DEFAULT(PGUID); + char oprkind BKI_DEFAULT(b); + bool oprcanmerge BKI_DEFAULT(f); + bool oprcanhash BKI_DEFAULT(f); + Oid oprleft; + Oid oprright; + Oid oprresult; + Oid oprcom BKI_DEFAULT(0); + Oid oprnegate BKI_DEFAULT(0); + regproc oprcode; + regproc oprrest BKI_DEFAULT(-); + regproc oprjoin BKI_DEFAULT(-); } FormData_pg_operator; /* ---------------- diff --git a/src/include/catalog/pg_opfamily.h b/src/include/catalog/pg_opfamily.h index 6ec28ae..571615d 100644 --- a/src/include/catalog/pg_opfamily.h +++ b/src/include/catalog/pg_opfamily.h @@ -32,8 +32,12 @@ CATALOG(pg_opfamily,2753) { Oid opfmethod; /* index access method opfamily is for */ NameData opfname; /* name of this opfamily */ - Oid opfnamespace; /* namespace of this opfamily */ - Oid opfowner; /* opfamily owner */ + + /* namespace of this opfamily */ + Oid opfnamespace BKI_DEFAULT(PGNSP); + + /* opfamily owner */ + Oid opfowner BKI_DEFAULT(PGUID); } FormData_pg_opfamily; /* ---------------- diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 29ca0ee..d181d67 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -23,6 +23,36 @@ /* ---------------- * pg_proc definition. cpp turns this into * typedef struct FormData_pg_proc + * + * proname procedure name + * pronamespace OID of namespace containing this proc + * proowner procedure owner + * prolang OID of pg_language entry + * procost estimated execution cost + * prorows estimated # of rows out (if proretset) + * provariadic element type of variadic array, or 0 + * protransform transforms calls to it during planning + * prokind see PROKIND_ categories below + * prosecdef security definer + * proleakproof is it a leak-proof function? + * proisstrict strict with respect to NULLs? + * proretset returns a set? + * provolatile see PROVOLATILE_ categories below + * proparallel see PROPARALLEL_ categories below + * pronargs number of arguments + * pronargdefaults number of arguments with defaults + * prorettype OID of result type + * proargtypes parameter types (excludes OUT params) + * proallargtypes all param types (NULL if IN only) + * proargmodes parameter modes (NULL if IN only) + * proargnames parameter names (NULL if no names) + * proargdefaults list of expression trees for argument + * defaults (NULL if none) + * protrftypes types for which to apply transforms + * prosrc procedure source + * probin secondary procedure info (can be NULL) + * proconfig procedure-local GUC settings + * proacl access permissions * ---------------- */ #define ProcedureRelationId 1255 @@ -30,42 +60,41 @@ CATALOG(pg_proc,1255) BKI_BOOTSTRAP BKI_ROWTYPE_OID(81) BKI_SCHEMA_MACRO { - NameData proname; /* procedure name */ - Oid pronamespace; /* OID of namespace containing this proc */ - Oid proowner; /* procedure owner */ - Oid prolang; /* OID of pg_language entry */ - float4 procost; /* estimated execution cost */ - float4 prorows; /* estimated # of rows out (if proretset) */ - Oid provariadic; /* element type of variadic array, or 0 */ - regproc protransform; /* transforms calls to it during planning */ - char prokind; /* see PROKIND_ categories below */ - bool prosecdef; /* security definer */ - bool proleakproof; /* is it a leak-proof function? */ - bool proisstrict; /* strict with respect to NULLs? */ - bool proretset; /* returns a set? */ - char provolatile; /* see PROVOLATILE_ categories below */ - char proparallel; /* see PROPARALLEL_ categories below */ - int16 pronargs; /* number of arguments */ - int16 pronargdefaults; /* number of arguments with defaults */ - Oid prorettype; /* OID of result type */ + NameData proname; + Oid pronamespace BKI_DEFAULT(PGNSP); + Oid proowner BKI_DEFAULT(PGUID); + Oid prolang BKI_DEFAULT(12); + float4 procost BKI_DEFAULT(1); + float4 prorows BKI_DEFAULT(0); + Oid provariadic BKI_DEFAULT(0); + regproc protransform BKI_DEFAULT(0); + char prokind BKI_DEFAULT(f); + bool prosecdef BKI_DEFAULT(f); + bool proleakproof BKI_DEFAULT(f); + bool proisstrict BKI_DEFAULT(f); + bool proretset BKI_DEFAULT(f); + char provolatile BKI_DEFAULT(v); + char proparallel BKI_DEFAULT(u); + int16 pronargs; + int16 pronargdefaults BKI_DEFAULT(0); + Oid prorettype; /* * variable-length fields start here, but we allow direct access to * proargtypes */ - oidvector proargtypes; /* parameter types (excludes OUT params) */ + oidvector proargtypes; #ifdef CATALOG_VARLEN - Oid proallargtypes[1]; /* all param types (NULL if IN only) */ - char proargmodes[1]; /* parameter modes (NULL if IN only) */ - text proargnames[1]; /* parameter names (NULL if no names) */ - pg_node_tree proargdefaults; /* list of expression trees for argument - * defaults (NULL if none) */ - Oid protrftypes[1]; /* types for which to apply transforms */ - text prosrc BKI_FORCE_NOT_NULL; /* procedure source text */ - text probin; /* secondary procedure info (can be NULL) */ - text proconfig[1]; /* procedure-local GUC settings */ - aclitem proacl[1]; /* access permissions */ + Oid proallargtypes[1] BKI_DEFAULT(_null_); + char proargmodes[1] BKI_DEFAULT(_null_); + text proargnames[1] BKI_DEFAULT(_null_); + pg_node_tree proargdefaults BKI_DEFAULT(_null_); + Oid protrftypes[1] BKI_DEFAULT(_null_); + text prosrc BKI_FORCE_NOT_NULL; + text probin BKI_DEFAULT(_null_); + text proconfig[1] BKI_DEFAULT(_null_); + aclitem proacl[1] BKI_DEFAULT(_null_); #endif } FormData_pg_proc; diff --git a/src/include/catalog/pg_type.h b/src/include/catalog/pg_type.h index f7ddb0c..b52e677 100644 --- a/src/include/catalog/pg_type.h +++ b/src/include/catalog/pg_type.h @@ -37,8 +37,12 @@ CATALOG(pg_type,1247) BKI_BOOTSTRAP BKI_ROWTYPE_OID(71) BKI_SCHEMA_MACRO { NameData typname; /* type name */ - Oid typnamespace; /* OID of namespace containing this type */ - Oid typowner; /* type owner */ + + /* OID of namespace containing this type */ + Oid typnamespace BKI_DEFAULT(PGNSP); + + /* type owner */ + Oid typowner BKI_DEFAULT(PGUID); /* * For a fixed-size type, typlen is the number of bytes we use to @@ -66,7 +70,7 @@ CATALOG(pg_type,1247) BKI_BOOTSTRAP BKI_ROWTYPE_OID(71) BKI_SCHEMA_MACRO * * If typtype is 'c', typrelid is the OID of the class' entry in pg_class. */ - char typtype; + char typtype BKI_DEFAULT(b); /* * typcategory and typispreferred help the parser distinguish preferred @@ -76,17 +80,20 @@ CATALOG(pg_type,1247) BKI_BOOTSTRAP BKI_ROWTYPE_OID(71) BKI_SCHEMA_MACRO */ char typcategory; /* arbitrary type classification */ - bool typispreferred; /* is type "preferred" within its category? */ + /* is type "preferred" within its category? */ + bool typispreferred BKI_DEFAULT(f); /* * If typisdefined is false, the entry is only a placeholder (forward * reference). We know the type name, but not yet anything else about it. */ - bool typisdefined; + bool typisdefined BKI_DEFAULT(t); - char typdelim; /* delimiter for arrays of this type */ + /* delimiter for arrays of this type */ + char typdelim BKI_DEFAULT(\054); - Oid typrelid; /* 0 if not a composite type */ + /* 0 if not a composite type */ + Oid typrelid BKI_DEFAULT(0); /* * If typelem is not 0 then it identifies another row in pg_type. The @@ -99,7 +106,7 @@ CATALOG(pg_type,1247) BKI_BOOTSTRAP BKI_ROWTYPE_OID(71) BKI_SCHEMA_MACRO * * typelem != 0 and typlen == -1. */ - Oid typelem; + Oid typelem BKI_DEFAULT(0); /* * If there is a "true" array type having this type as element type, @@ -118,13 +125,13 @@ CATALOG(pg_type,1247) BKI_BOOTSTRAP BKI_ROWTYPE_OID(71) BKI_SCHEMA_MACRO /* * I/O functions for optional type modifiers. */ - regproc typmodin; - regproc typmodout; + regproc typmodin BKI_DEFAULT(-); + regproc typmodout BKI_DEFAULT(-); /* * Custom ANALYZE procedure for the datatype (0 selects the default). */ - regproc typanalyze; + regproc typanalyze BKI_DEFAULT(-); /* ---------------- * typalign is the alignment required when storing a value of this @@ -162,7 +169,7 @@ CATALOG(pg_type,1247) BKI_BOOTSTRAP BKI_ROWTYPE_OID(71) BKI_SCHEMA_MACRO * 'm' MAIN like 'x' but try to keep in main tuple * ---------------- */ - char typstorage; + char typstorage BKI_DEFAULT(p); /* * This flag represents a "NOT NULL" constraint against this datatype. @@ -172,32 +179,32 @@ CATALOG(pg_type,1247) BKI_BOOTSTRAP BKI_ROWTYPE_OID(71) BKI_SCHEMA_MACRO * * Used primarily for domain types. */ - bool typnotnull; + bool typnotnull BKI_DEFAULT(f); /* * Domains use typbasetype to show the base (or domain) type that the * domain is based on. Zero if the type is not a domain. */ - Oid typbasetype; + Oid typbasetype BKI_DEFAULT(0); /* * Domains use typtypmod to record the typmod to be applied to their base * type (-1 if base type does not use a typmod). -1 if this type is not a * domain. */ - int32 typtypmod; + int32 typtypmod BKI_DEFAULT(-1); /* * typndims is the declared number of dimensions for an array domain type * (i.e., typbasetype is an array type). Otherwise zero. */ - int32 typndims; + int32 typndims BKI_DEFAULT(0); /* * Collation: 0 if type cannot use collations, DEFAULT_COLLATION_OID for * collatable base types, possibly other OID for domains */ - Oid typcollation; + Oid typcollation BKI_DEFAULT(0); #ifdef CATALOG_VARLEN /* variable-length fields start here */ @@ -206,7 +213,7 @@ CATALOG(pg_type,1247) BKI_BOOTSTRAP BKI_ROWTYPE_OID(71) BKI_SCHEMA_MACRO * a default expression for the type. Currently this is only used for * domains. */ - pg_node_tree typdefaultbin; + pg_node_tree typdefaultbin BKI_DEFAULT(_null_); /* * typdefault is NULL if the type has no associated default value. If @@ -216,12 +223,12 @@ CATALOG(pg_type,1247) BKI_BOOTSTRAP BKI_ROWTYPE_OID(71) BKI_SCHEMA_MACRO * external representation of the type's default value, which may be fed * to the type's input converter to produce a constant. */ - text typdefault; + text typdefault BKI_DEFAULT(_null_); /* * Access permissions */ - aclitem typacl[1]; + aclitem typacl[1] BKI_DEFAULT(_null_); #endif } FormData_pg_type; diff --git a/src/include/catalog/rewrite_dat.pl b/src/include/catalog/rewrite_dat.pl index 614ec01..54434ea 100644 --- a/src/include/catalog/rewrite_dat.pl +++ b/src/include/catalog/rewrite_dat.pl @@ -22,6 +22,7 @@ use warnings; my @input_files; my $output_path = ''; +my $expand_tuples = 0; # Process command line switches. while (@ARGV) @@ -35,6 +36,10 @@ while (@ARGV) { $output_path = length($arg) > 2 ? substr($arg, 2) : shift @ARGV; } + elsif ($arg eq '--expand') + { + $expand_tuples = 1; + } else { usage(); @@ -99,6 +104,14 @@ foreach my $datfile (@input_files) my %values = %$data; print $dat "{ "; + # Write out tuples in a compact representation. + # Note: This is also a convenient place to do one-off + # bulk-editing. + if (!$expand_tuples) + { + strip_default_values(\%values, $schema, $catname); + } + # Separate out metadata fields for readability. my $metadata_line = format_line(\%values, @METADATA); if ($metadata_line) @@ -123,6 +136,26 @@ foreach my $datfile (@input_files) } } +# Leave values out if there is a matching default. +sub strip_default_values +{ + my ($row, $schema, $catname) = @_; + + foreach my $column (@$schema) + { + my $attname = $column->{name}; + die "strip_default_values: $catname.$attname undefined\n" + if ! defined $row->{$attname}; + + # Delete values that match defaults. + if (defined $column->{default} + and ($row->{$attname} eq $column->{default})) + { + delete $row->{$attname}; + } + } +} + # Format the individual elements of a Perl hash into a valid string # representation. We do this ourselves, rather than use native Perl # facilities, so we can keep control over the exact formatting of the @@ -162,6 +195,7 @@ Usage: rewrite_dat.pl [options] datafile... Options: -o output path + --expand write out full tuples Expects a list of .dat files as arguments. -- 2.7.4