use strict; use warnings; use DBI; use IO::Pipe; use Storable; use Data::Dumper; use List::Util qw(shuffle); use Digest::MD5 qw(md5_hex); ## This is a stress tester for PostgreSQL crash recovery. ## It spawns a number of processes which all connect to the database ## and madly update a table until either the server crashes, or ## for a million updates (per process). ## Upon crash, each Perl process reports up to the parent how many times each value was updated ## plus which update was 'in flight' at the time of the crash. (Since we received neither an ## error nor a confirmation, the proper status of this in-flight update is unknowable) ## The parent consolidates this info, waits for the database to recover, and verifies ## that the state of the database matches what we know it ought to be. ## first arg is number of processes (default 8), 2nd is number of updates per process ## (default 1_000_000), 3rd argument causes aborts when a certain discrepancy is seen ## Arranging for the server to crash is the obligation of the outer driving script (do.sh) ## and the accompanying instrumentation patch. ## I invoke this in an outer driving script and let both the Perl messages and the ## postmaster logs spool together into one log file. That way it is easier to correlate ## server events with the client/Perl events chronologically. ## This generates a lot of logging info. The tension here is that if you generate too much ## info, it is hard to find anomalies in the log file. But if you generate too little info, ## then once you do find anomalies you can't figure out the cause. So I error on the side ## of logging too much, and use command lines (memorialized below) to pull out the most ## interesting things. ## But with really high logging, the lines in the log file start ## getting garbled up, so back off a bit. The commented out warn and elog things in this file ## and the patch file show places where I previously needed logging for debugging specific things, ## but decided I don't need it all of the time. Leave the commented code as landmark for the future. ## look for odd messages in log file that originate from Perl #fgrep ' line ' do.out |sort|uniq -c|sort -n|fgrep -v 'in flight' ## look at rate of incrementing over time, for Excel or SpotFire. #grep -P '2014-05|^sum ' do.out |grep -P '^sum' -B1|perl -ne 'my @x=split/PDT/; print $x[0] and next if @x>1; print if /sum/' > ~/jj.txt ## check consistency between child and parent table: (the 10 here matches the 10 in 'if ($count->[0][0] % 10 == 0)' ## psql -c 'select abs(id), sum(count) from foo_parent where count>0 group by abs(id) except select abs(p_id), sum(floor(count::float/10)) from foo group by abs(p_id)' my $SIZE=10_000; ## centralize connections to one place, in case we want to point to a remote server or use a password sub dbconnect { my $dbh = DBI->connect("dbi:Pg:", "", "", {AutoCommit => 1, RaiseError=>1, PrintError=>0}); return $dbh; }; my %count; while (1) { %count=(); eval { my $dbh = dbconnect(); eval { ## on multiple times through, the table already exists, just let it fail ## But if the table exists, don't pollute the log with errors ($dbh->selectrow_array("select count(*) from pg_tables where tablename='foo';"))[0] == 1 and return; $dbh->do(<<'END'); create table foo_parent(id bigserial primary key , count int); create table foo(index int, p_id bigint references foo_parent(id), count int); create unique index on foo(index); create index on foo(p_id); END }; ## get rid of garbage tuples if they were present during the crash $dbh->do("delete from foo where index < 0"); my $dat = $dbh->selectall_arrayref("select index, count from foo where index>=0"); if (@$dat == $SIZE) { $count{$_->[0]}=$_->[1] foreach @$dat; } else { warn "table not correct size, ", scalar @$dat unless @$dat==0; $dbh->do("truncate foo, foo_parent"); %count=(); my $psth=$dbh->prepare("insert into foo_parent (id, count) values (?,0)"); my $sth=$dbh->prepare("insert into foo (index, p_id, count) values (?,?,0)"); $dbh->begin_work(); $psth->execute($_) foreach 1..$SIZE; $psth->execute(-$_) foreach 1..$SIZE; $sth->execute($_,1+int($_/30)) foreach 1..$SIZE; $dbh->commit(); }; ## even the pause every 100 rounds to let autovac do its things is not enough ## because the autovac itself generates enough IO to trigger crashes so that it never completes, ## lead to wrap around shut down. This should keep the vaccum load low enough to complete, at least some times ## $dbh->do("vacuum foo") if rand()<0.1; }; last unless $@; warn "Failed with $@, trying again"; sleep 1; }; warn "init done"; ## Fork off a given number of child presses, opening pipes for them to ## communicate back to the parent. Communication is a one-time shot, ## at the end of their lifetimes. my @child_pipe; my $pipe_up; foreach (1.. ((@ARGV and $ARGV[0]>0) ? $ARGV[0] : 8)) { my $pipe = new IO::Pipe; defined (my $fork = fork) or die "fork failed: $!"; if ($fork) { push @child_pipe, {pipe => $pipe->reader(), pid => $fork}; } else { $pipe_up=$pipe->writer(); @child_pipe=(); last; }; }; #warn "fork done"; if (@child_pipe) { #warn "in harvest"; my %in_flight; ### harvest children data, which consists of the in-flight item, plus a hash with the counts of all confirmed-committed items local $/; foreach my $handle ( @child_pipe ) { my $data=Storable::fd_retrieve($handle->{pipe}); $in_flight{$data->[0]}=() if defined $data->[0]; while (my ($k,$v)=each %{$data->[1]}) { $count{$k}+=$v; }; close $handle->{pipe} or die "$$ closing child failed with bang $!, and question $?"; my $pid =waitpid $handle->{pid}, 0 ; die "$$: my child $pid exited with non-zero status $?" if $?; }; #warn "harvest done"; my ($dat,$dat2); foreach (1..300) { sleep 1; ## used to do just the connect in the eval loop, ## but sometimes the database crashed again during the ## query, so do it all in the eval-loop eval { warn "summary attempt $_" if $_>1; my $dbh = dbconnect(); ## detect wrap around shutdown (actually not shutdown, but read-onlyness) and bail out ## need to detect before $dat is set, or else it won't trigger a Perl fatal error. $dbh->do("create temporary table aldjf (x serial)"); $dat = $dbh->selectall_arrayref("select index, count from foo where index >=0"); warn "sum is ", $dbh->selectrow_array("select sum(count) from foo"), "\n"; warn "count is ", $dbh->selectrow_array("select count(*) from foo"), "\n"; # Try to force it to walk the index to get to each row, so corrupt indexes are detected # If they are not detected here, they will be detected later when the wrong number of # rows gets updated. # (Without the "where index is not null", it won't use an index scan no matter what) $dat2 = $dbh->selectall_arrayref("set enable_seqscan=off; select index, count from foo where index is not null and index >=0 "); }; last unless $@; warn $@; }; die "Database didn't recover even after 5 minutes, giving up" unless $dat2; ## don't do sorts in SQL because it might change the execution plan @$dat=sort {$a->[0]<=>$b->[0]} @$dat; @$dat2=sort {$a->[0]<=>$b->[0]} @$dat2; foreach (@$dat) { $_->[0] == $dat2->[0][0] and $_->[1] == $dat2->[0][1] or die "seq scan doesn't match index scan"; shift @$dat2; no warnings 'uninitialized'; warn "For $_->[0], $_->[1] != $count{$_->[0]}", exists $in_flight{$_->[0]}? " in flight":"" if $_->[1] != $count{$_->[0]}; if ($_->[1] != $count{$_->[0]} and not exists $in_flight{$_->[0]} and defined $ARGV[2]) { #bring down the system now, before autovac destroys the evidence die; }; delete $count{$_->[0]}; }; warn "Left over in %count: @{[%count]}" if %count; die if %count and defined $ARGV[2]; warn "normal exit at ", time(); exit; }; my %h; # how many time has each item been incremented my $i; # in flight item which is not reported to have been committed eval { ## do the dbconnect in the eval, in case we crash when some children are not yet ## up. The children that fail to connect in the first place still ## need to send the empty data to nstore_fd, or else fd_retieve fatals out. my $dbh = dbconnect(); ## Alternating the p_id between positive and negative forces it to obtain locks on the parent row, to ## exercise the multixact code. If p_id doesn't change, there are no locks on parent. ## However, this also defeats HOT updates. That is good, as the non-HOT update code paths need ## exercising, but now the HOT-update path doesn't get exercised. Could we flip the p_id only ## some of the time? #occassionally update to a non-existent parent in case that exposes some interesting code paths. my $sth=$dbh->prepare("update foo set count=count+1, p_id=p_id*case when random()<0.001 then 100000000 when random()<0.5 then -1 else 1 end where index=? returning count,p_id"); ## Abandoned effort to force locks on parent, before p_id flipping was adopted. Was incredibly slow and led ## to wrap around. Revisit in future? #my $dummy=$dbh->prepare("with T as (delete from foo where index=? returning *) insert into foo select * from T"); my $par=$dbh->prepare('update foo_parent set count=count+1 where id=?'); ## Burn through mutlixacts. Needs additional patch and pageinspect. my $burn=$dbh->prepare('select pg_burn_multixact((random()*40)::int+1,(random()*50)::int+1)'); eval { foreach (1..($ARGV[1]//1e6)) { $i=1+int rand($SIZE); my $count; REDO: { $dbh->begin_work(); $count = eval {$dbh->selectall_arrayref($sth,undef,$i);}; if (length $@) { if ($dbh->state() eq '23503') { $dbh->rollback(); redo REDO; }; die $@; }; }; @$count == 1 or die "update did not update 1 row: key $i updated @$count"; ## Test cases where user aborts transactions sometimes. if (rand() < 0.1) { $dbh->rollback(); redo}; if ($count->[0][0] % 10 == 0) { #$dummy->execute($i); $par->execute($count->[0][1]); }; if (rand() < 0.1) { $dbh->rollback(); redo}; $burn->execute(); $dbh->commit(); $h{$i}++; undef $i; }; }; $@ =~ s/\n/\\n /g if defined $@; warn "child exit ", $dbh->state(), " $@" if length $@; }; $@ =~ s/\n/\\n /g if defined $@; die "child abnormal exit $@" if length $@ and $@ =~ / 0E0/; warn "child abnormal exit $@" if length $@; Storable::nstore_fd([$i,\%h],$pipe_up); close $pipe_up or die "$! $?";