You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
186 lines
6.9 KiB
186 lines
6.9 KiB
#!/usr/bin/perl |
|
|
|
use strict; |
|
use warnings qw(all); |
|
use Data::Dumper(); |
|
|
|
my $VERBOSE = 0; |
|
|
|
my $MAX_OK_TIME = 600000; #ms |
|
|
|
my $FILENAME=$ARGV[0] or die "call syntax $0 logfile"; |
|
open (IN, $FILENAME) || die "cannot open $FILENAME because $!"; |
|
warn "starting work with file $FILENAME\n" if ($VERBOSE); |
|
|
|
my %queries_stat = (); |
|
my @long_query = (); |
|
my @huge_query_line = (); |
|
|
|
my $total_time = 1; |
|
my $total_queries = 1; |
|
my $total_unique_queries = 1; |
|
|
|
my $start_time = ''; |
|
my $end_time = ''; |
|
|
|
{ |
|
my ($date, $time, $user, $db, $pid, $duration, $query); |
|
my ($count, $lines) = (0, 0); |
|
|
|
while (<IN>) { |
|
chomp; |
|
$count++; |
|
|
|
if (/^(\d{4}-\d{2}-\d{2}) (\d{2}:\d{2}:\d{2}(?:\.\d+)?) \S+ (\d+) (\S*)@(\S*) from (?:\S*) \[vxid:.*? txid:\d+\] \[.*?\]\s*(.*)$/) { |
|
count_query($time, $user, $db, $pid, $duration, $query, $count) if ($count>1 and $lines>0); |
|
($date, $time, $pid, $user, $db, $lines, $duration, $query) = ($1, $2, $3, $4, $5, parse_log_line($6)); |
|
$start_time = $date . " " . $time if (!$start_time); |
|
} elsif (/^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} \S+/) { |
|
die "cannot parse line '$_'\n"; |
|
next; |
|
} elsif ($lines>0) { |
|
$query.="\n$_"; |
|
$lines++; |
|
die "parser error lines>200000 before line $count" if ($lines>200000); |
|
} else { |
|
# print "skipped line $_\n"; |
|
} |
|
#print STDERR "\r".$count; |
|
} |
|
$end_time = $date . " " . $time if ($date); |
|
} |
|
|
|
sub parse_log_line { |
|
my $line = shift; |
|
#LOG: duration: 2.067 ms statement: ... |
|
if ($line =~ /LOG: duration: (\d+\.\d+) ms (.*)$/) { |
|
return(1, $1, $2); |
|
} else { |
|
# print "skipped header: '$_'\n"; |
|
return (0, 0, ''); |
|
} |
|
} |
|
|
|
|
|
print "===================================================================================================================\n\n\n"; |
|
|
|
my $pos = 0; |
|
my $time_left = $total_time; |
|
print "total time used: ".((int($total_time/36000))/100)." hours \n"; |
|
print "total unique queries found: ".scalar(keys(%queries_stat))."\n"; |
|
print "first query: ".$start_time."\n"; |
|
print "last query: ".$end_time."\n"; |
|
|
|
foreach (sort {$queries_stat{$b}{time} <=> $queries_stat{$a}{time}} keys %queries_stat) { |
|
$pos++; |
|
my $percent = 100*$queries_stat{$_}{time}/$total_time; |
|
last if ($percent<1); |
|
$time_left -= $queries_stat{$_}{time}; |
|
print "\n\n\n\n===================================================\npos:$pos\ttotal_time: ".sprintf('%.02f',($queries_stat{$_}{time}/(3600*1000)))." h (".sprintf('%.02f', $percent)."%)\ttotal_count: $queries_stat{$_}{count}\tavg_time:\t".sprintf('%.02f',($queries_stat{$_}{time}/$queries_stat{$_}{count}))."ms\n$_"; |
|
print "\n\nSample (user=$queries_stat{$_}{user} db=$queries_stat{$_}{db}):\n$queries_stat{$_}{sample}\n"; |
|
} |
|
print "\n\n other: ".int($time_left/100)." s (".sprintf('%.02f', ($time_left*100/$total_time))."%)\n"; |
|
print "====================================================================================================================\n\n\n"; |
|
|
|
$pos = 0; |
|
my $count_left = $total_queries; |
|
print "total queries found: $total_queries\n"; |
|
foreach (sort {$queries_stat{$b}{count} <=> $queries_stat{$a}{count}} keys %queries_stat) { |
|
$pos++; |
|
my $percent = 100*$queries_stat{$_}{count}/$total_queries; |
|
last if ($percent<2); |
|
$count_left -= $queries_stat{$_}{count}; |
|
print "\n\n\n\n===================================================\npos:$pos\ttotal_count: $queries_stat{$_}{count} (".sprintf('%.02f', $percent)."%)\tavg_time:\t".sprintf('%.02f',($queries_stat{$_}{time}/$queries_stat{$_}{count}))."ms\n$_"; |
|
print "\n\nSample (user=$queries_stat{$_}{user} db=$queries_stat{$_}{db}):\n$queries_stat{$_}{sample}\n"; |
|
} |
|
print "\n\n other: ".int($count_left/100)." s (".sprintf('%.02f', ($count_left*100/$total_queries))."%)\n"; |
|
print "===================================================================================================================\n\n\n"; |
|
|
|
print "total long queries found: ".scalar(@long_query)."\n"; |
|
foreach (@long_query) { |
|
chomp; |
|
print "$_\n"; |
|
} |
|
print "===================================================================================================================\n\n\n"; |
|
|
|
print "total huge queries found: ".scalar(@huge_query_line)."\n"; |
|
foreach (@huge_query_line) { |
|
chomp; |
|
print "'$_->[2]...' with length $_->[1] at line $_->[0]\n"; |
|
} |
|
print "===================================================================================================================\n\n\n"; |
|
|
|
sub filter_query { |
|
my $string = shift; |
|
|
|
$string =~ s/^\s*--.*$//gm; |
|
$string =~ s/[\s\n\r\t]+/ /g; |
|
$string = lc($string); |
|
$string =~ s/^ //; |
|
$string =~ s/ $//; |
|
|
|
if ($string =~ /copy .* to stdout/) { |
|
return 'COPY to stdout (backup) queries'; |
|
}; |
|
|
|
$string =~ s/dbdpg_p\d+_\d+/dbdpg/g; |
|
$string =~ s/pdo_stmt_\d+/pdo_stmt/g; |
|
$string =~ s/(?:bind|execute|parse) \S+:/prepared:/g; |
|
$string =~ s/savepoint\s+.*$/SAVEPOINT/; |
|
$string =~ s/\/\*.*?\*\///g; |
|
|
|
$string =~ s/(::[a-z0-9]+)//g; |
|
|
|
$string =~ s/e?'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}(?:\.\d+)?'/TIMESTAMP/g; |
|
$string =~ s/e?'\d{4}-\d{2}-\d{2}'/DATE/g; |
|
$string =~ s/e?'[^']*?'/TEXT/g; |
|
|
|
$string =~ s/_\d{4}_\d{2}/YYYYMM/g; |
|
|
|
$string =~ s/\$\d+/PLACEHOLDER/g; |
|
|
|
$string =~ s/\[\s*\d+\s*\]/[SOMEARRAY]/g; |
|
|
|
$string =~ s/\((DIGIT|TEXT|PLACEHOLDER|null|false|true|DATE|TIMESTAMP)\)/$1/g; |
|
|
|
$string =~ s/(=|<>|\(|,|\+|-|<|>|\*|\/)\s*-?\d+(?:\.\d+)?/$1 DIGIT/g; |
|
$string =~ s/-?\d+(?:\.\d+)?\s*(=|<>|\)|,|\+|-|<|>|\*|\/)/DIGIT $1/g; |
|
|
|
$string =~ s/(?:\s*(?:DIGIT|TEXT|PLACEHOLDER|null|false|true|DATE|TIMESTAMP)\s*,){1,1000}/VALUES_LIST,/g; |
|
$string =~ s/\((?:\s*(?:VALUES_LIST|DIGIT|TEXT|PLACEHOLDER|null|false|true|DATE|TIMESTAMP)\s*,){0,1000}\s*(?:VALUES_LIST|DIGIT|TEXT|PLACEHOLDER|null|false|true|DATE|TIMESTAMP)\s*\)/(VALUES LIST)/g; |
|
$string =~ s/(insert into \S+) \(.*?\)/$1 (FIELD LIST)/g; |
|
$string =~ s/values\s+\(.*?\)/values (VALUES LIST)/g; |
|
$string =~ s/update (?:only )?(\S+) set .*? where/update $1 set SET_LIST where/g; |
|
$string =~ s/(limit|offset) \d+/$1 SOME/g; |
|
|
|
return $string; |
|
} |
|
|
|
sub count_query { |
|
my ($time, $user, $db, $pid, $duration, $orig_query, $count) = @_; |
|
die unless ($time && $user && $db && $pid && $duration && $orig_query); |
|
|
|
#skip too long queries |
|
if ($duration>$MAX_OK_TIME) { |
|
push @long_query, "too long query at $time worked: (".((int($duration/10))/100)." second):\n$orig_query"; |
|
} elsif (length($orig_query) > 500000) { |
|
push @huge_query_line, [$count, length($orig_query), sprintf('%.200s', $orig_query)]; |
|
} else { |
|
my $query = "user=$user db=$db QUERY: ".filter_query($orig_query); |
|
$queries_stat{$query} ||= {time=>0, count=>0, sample=>$orig_query, user=>$user, db=>$db}; |
|
|
|
if ($queries_stat{$query}{count}==0) { |
|
$total_unique_queries++; |
|
# print "new query found:\n$query\noriginal query was\n$orig_query\n=================================================================\n\n"; |
|
if ($VERBOSE and scalar(keys %queries_stat) > 500) { |
|
warn "new query found:\n$query\noriginal query was\n$orig_query\n=================================================================\n\n"; |
|
} |
|
} |
|
|
|
$queries_stat{$query}{time} += $duration; |
|
$queries_stat{$query}{count}++; |
|
$total_time += $duration; |
|
$total_queries++; |
|
} |
|
} |
|
|
|
|