1 changed files with 842 additions and 0 deletions
@ -0,0 +1,842 @@
@@ -0,0 +1,842 @@
|
||||
#!/usr/bin/env perl |
||||
use strict; |
||||
use warnings; |
||||
use File::Temp qw(tempfile); |
||||
use Time::HiRes qw(time); |
||||
use Getopt::Long qw(:config no_ignore_case bundling); |
||||
|
||||
# Version |
||||
my $VERSION = '0.11.0'; |
||||
|
||||
# Configuration |
||||
my $MAX_LINE_LENGTH = 10000; |
||||
my $DEBUG = $ENV{DEBUG} || 0; |
||||
|
||||
# Command line options |
||||
my $deep_search = 0; |
||||
my $partial_match = 0; |
||||
my $max_matches = 10000; |
||||
my $max_matches_size = 50 * 1024 * 1024; # 50MB in bytes |
||||
my $live_output = 0; |
||||
my $show_version = 0; |
||||
my $show_help = 0; |
||||
|
||||
# Global state for Ctrl+C handling |
||||
my $interrupted = 0; |
||||
my $temp_file; |
||||
my $matches_found = 0; |
||||
|
||||
# Signal handlers |
||||
$SIG{INT} = sub { |
||||
$interrupted = 1; |
||||
print STDERR "\n\n[Interrupted by user - showing partial results...]\n"; |
||||
}; |
||||
|
||||
$SIG{__DIE__} = sub { |
||||
system("stty sane 2>/dev/null") if -t STDIN; |
||||
die @_; |
||||
}; |
||||
|
||||
# Parse command line options |
||||
GetOptions( |
||||
'deep-search|d' => \$deep_search, |
||||
'partial-match|p' => \$partial_match, |
||||
'max-matches|m=i' => \$max_matches, |
||||
'max-matches-size|z=s' => sub { |
||||
my ($opt, $val) = @_; |
||||
if ($val =~ /^(\d+(?:\.\d+)?)\s*([KMG]?)B?$/i) { |
||||
my ($num, $unit) = ($1, uc($2)); |
||||
$max_matches_size = $num * ($unit eq 'G' ? 1024**3 : $unit eq 'M' ? 1024**2 : $unit eq 'K' ? 1024 : 1); |
||||
} else { |
||||
die "Invalid size format: $val (use format like '50M', '1G', or '1048576')\n"; |
||||
} |
||||
}, |
||||
'live|l' => \$live_output, |
||||
'version|v' => \$show_version, |
||||
'help|h' => \$show_help, |
||||
) or die "Use --help for usage information\n"; |
||||
|
||||
# Handle --version |
||||
if ($show_version) { |
||||
print "pg_log_search version $VERSION\n"; |
||||
exit 0; |
||||
} |
||||
|
||||
# Handle --help |
||||
if ($show_help) { |
||||
print_help(); |
||||
exit 0; |
||||
} |
||||
|
||||
# Timing variables |
||||
my %timers; |
||||
my $start_time = time(); |
||||
|
||||
# Check for log file - prioritize command line args over stdin |
||||
my $log_file; |
||||
my $use_stdin = 0; |
||||
|
||||
# First check if a log file was provided as argument |
||||
$log_file = $ARGV[0]; |
||||
|
||||
if ($log_file) { |
||||
# Log file provided as argument - use it |
||||
unless (-f $log_file && -r $log_file) { |
||||
die "Error: Cannot read log file: $log_file\n"; |
||||
} |
||||
my $file_size = -s $log_file; |
||||
print "Log file: $log_file\n"; |
||||
print "Log file size: " . sprintf("%.2f MB\n", $file_size / (1024*1024)); |
||||
} elsif (!-t STDIN) { |
||||
# No file argument, but stdin is available - use stdin for log |
||||
$use_stdin = 1; |
||||
print "Reading log from stdin...\n"; |
||||
} else { |
||||
# No file argument, no stdin - try to auto-detect |
||||
$log_file = find_postgresql_log(); |
||||
if (!$log_file) { |
||||
die "Error: No log file specified and couldn't find PostgreSQL log automatically.\n" . |
||||
"Usage: $0 [postgresql_log_file]\n" . |
||||
" or: cat logfile | $0\n"; |
||||
} |
||||
print "Found PostgreSQL log: $log_file\n"; |
||||
my $file_size = -s $log_file; |
||||
print "Log file size: " . sprintf("%.2f MB\n", $file_size / (1024*1024)); |
||||
} |
||||
|
||||
# Get query from user |
||||
my $search_query = read_query_input($use_stdin); |
||||
|
||||
# Normalize query for comparison and pre-compute patterns for performance |
||||
my $timer_start = time(); |
||||
my $normalized_search = normalize_query_fast($search_query); |
||||
|
||||
# Pre-compute search patterns to avoid repeated regex operations |
||||
my $search_pattern = query_to_pattern($normalized_search); |
||||
|
||||
# Pre-compute search structure for deep matching |
||||
my $search_structure = $normalized_search; |
||||
$search_structure =~ s/\bin\s*\([^)]+\)/in(__)/gi; |
||||
# Replace parameters FIRST, before other replacements |
||||
$search_structure =~ s/\$\d+/__/g; |
||||
$search_structure =~ s/-?\d+(?:\.\d+)?/__/g; |
||||
$search_structure =~ s/'[^']*'/__/g; |
||||
$search_structure =~ s/\bnull\b/__/gi; |
||||
|
||||
$timers{normalize} = time() - $timer_start; |
||||
|
||||
if ($use_stdin) { |
||||
print "\nSearching for query in stdin\n"; |
||||
} else { |
||||
print "\nSearching for query in: $log_file\n"; |
||||
} |
||||
print "=" x 80 . "\n\n"; |
||||
|
||||
# Create temp file for output |
||||
my ($temp_fh, $temp_file_path) = tempfile(UNLINK => 1); |
||||
$temp_file = $temp_file_path; # Store in global for Ctrl+C handler |
||||
|
||||
# Process log file |
||||
$timer_start = time(); |
||||
if ($use_stdin) { |
||||
# Process stdin directly without temp file |
||||
$matches_found = search_log(\*STDIN, $normalized_search, $search_pattern, $search_structure, $temp_fh, 0, $partial_match); |
||||
} else { |
||||
# Open log file and process |
||||
open(my $log_fh, '<', $log_file) or die "Cannot open $log_file: $!"; |
||||
$matches_found = search_log($log_fh, $normalized_search, $search_pattern, $search_structure, $temp_fh, -s $log_file, $partial_match); |
||||
close($log_fh); |
||||
} |
||||
$timers{search} = time() - $timer_start; |
||||
|
||||
# Add summary to temp file |
||||
print $temp_fh "\n" . "=" x 80 . "\n"; |
||||
print $temp_fh "Total matches found: $matches_found\n"; |
||||
print $temp_fh " (search interrupted by user)\n" if $interrupted; |
||||
|
||||
# Add timing information |
||||
my $total_time = time() - $start_time; |
||||
print $temp_fh "\nProcessing time: " . sprintf("%.2f seconds\n", $total_time); |
||||
if ($DEBUG) { |
||||
print $temp_fh "\nDetailed timing:\n"; |
||||
for my $key (sort keys %timers) { |
||||
if ($key eq 'lines_processed') { |
||||
print $temp_fh " $key: " . format_number(int($timers{$key})) . "\n"; |
||||
} else { |
||||
print $temp_fh " $key: " . sprintf("%.3f seconds\n", $timers{$key}); |
||||
} |
||||
} |
||||
} |
||||
|
||||
close($temp_fh); |
||||
|
||||
# Display results |
||||
if (!$live_output) { |
||||
if ($matches_found > 0) { |
||||
system("less", $temp_file); |
||||
} else { |
||||
print "No matches found.\n"; |
||||
} |
||||
} |
||||
|
||||
# Print timing to console |
||||
printf "\nTotal processing time: %.2f seconds\n", $total_time; |
||||
if ($DEBUG) { |
||||
print "Detailed timing:\n"; |
||||
for my $key (sort keys %timers) { |
||||
if ($key eq 'lines_processed') { |
||||
printf " %s: %s\n", $key, format_number(int($timers{$key})); |
||||
} else { |
||||
printf " %s: %.3f seconds\n", $key, $timers{$key}; |
||||
} |
||||
} |
||||
} |
||||
|
||||
# Format number with thousand separators |
||||
sub format_number { |
||||
my ($num) = @_; |
||||
$num =~ s/(\d)(?=(\d{3})+$)/$1,/g; |
||||
return $num; |
||||
} |
||||
|
||||
# Read query input from user |
||||
sub read_query_input { |
||||
my ($use_tty) = @_; |
||||
|
||||
# Determine input source |
||||
my $input_fh; |
||||
my $is_terminal; |
||||
|
||||
if ($use_tty) { |
||||
# Stdin is used for log data, read query from terminal |
||||
open($input_fh, '<', '/dev/tty') or die "Cannot open /dev/tty for query input: $!\n"; |
||||
$is_terminal = 1; |
||||
} else { |
||||
# Use stdin for query input |
||||
$input_fh = \*STDIN; |
||||
$is_terminal = -t STDIN; |
||||
} |
||||
|
||||
# Show prompts if reading from terminal |
||||
if ($is_terminal) { |
||||
print STDERR "\nEnter PostgreSQL query (paste FULL query, then press Ctrl-D):\n"; |
||||
print STDERR "Note: Query fragments won't match. Use --partial-match if the query is truncated at the end.\n"; |
||||
} |
||||
|
||||
my $entire_input = ''; |
||||
|
||||
if ($is_terminal) { |
||||
# Terminal input - use raw mode for large pastes |
||||
my $stty_cmd = $use_tty ? "stty -g </dev/tty 2>/dev/null" : "stty -g 2>/dev/null"; |
||||
my $old_stty = `$stty_cmd`; |
||||
chomp $old_stty; |
||||
|
||||
if ($old_stty) { |
||||
# Disable canonical mode to bypass 4KB line buffer limit |
||||
my $set_stty_cmd = $use_tty ? "stty -icanon -echo min 1 time 0 </dev/tty 2>/dev/null" : "stty -icanon -echo min 1 time 0 2>/dev/null"; |
||||
system($set_stty_cmd); |
||||
|
||||
my $buffer; |
||||
my $char_count = 0; |
||||
my $got_ctrl_d = 0; |
||||
|
||||
while (1) { |
||||
my $nread = sysread($input_fh, $buffer, 1); |
||||
last if !defined $nread || $nread == 0; |
||||
|
||||
if (ord($buffer) == 4) { # Ctrl-D |
||||
$got_ctrl_d = 1; |
||||
last; |
||||
} |
||||
|
||||
print STDERR $buffer; # Echo |
||||
$entire_input .= $buffer; |
||||
$char_count++; |
||||
|
||||
# Read in chunks if data available |
||||
if ($char_count % 100 == 0) { |
||||
my $extra; |
||||
while (1) { |
||||
my $extra_read = sysread($input_fh, $extra, 8192); |
||||
last if !defined $extra_read || $extra_read == 0; |
||||
|
||||
if (index($extra, chr(4)) >= 0) { |
||||
my $pos = index($extra, chr(4)); |
||||
$entire_input .= substr($extra, 0, $pos); |
||||
print STDERR substr($extra, 0, $pos); |
||||
$got_ctrl_d = 1; |
||||
last; |
||||
} |
||||
$entire_input .= $extra; |
||||
print STDERR $extra; |
||||
$char_count += length($extra); |
||||
} |
||||
last if $got_ctrl_d; |
||||
} |
||||
} |
||||
|
||||
my $restore_stty_cmd = $use_tty ? "stty $old_stty </dev/tty 2>/dev/null" : "stty $old_stty 2>/dev/null"; |
||||
system($restore_stty_cmd); |
||||
print STDERR "\n"; |
||||
} else { |
||||
# Fallback without stty |
||||
local $/ = undef; |
||||
$entire_input = <$input_fh>; |
||||
} |
||||
} else { |
||||
# Piped input |
||||
local $/ = undef; |
||||
$entire_input = <$input_fh>; |
||||
} |
||||
|
||||
# Close /dev/tty if we opened it |
||||
close($input_fh) if $use_tty; |
||||
|
||||
$entire_input =~ s/\s+$// if defined $entire_input; |
||||
|
||||
if (!$entire_input || $entire_input eq '') { |
||||
die "Error: No query provided\n"; |
||||
} |
||||
|
||||
if ($DEBUG) { |
||||
open(my $debug_fh, '>', '/tmp/pg_log_search_debug_query.txt'); |
||||
print $debug_fh $entire_input; |
||||
close($debug_fh); |
||||
print STDERR "DEBUG: Saved query to /tmp/pg_log_search_debug_query.txt (" . length($entire_input) . " bytes)\n"; |
||||
} |
||||
|
||||
return $entire_input; |
||||
} |
||||
|
||||
# Check if filename should be excluded from auto-detection |
||||
sub should_exclude_log_file { |
||||
my ($filepath) = @_; |
||||
my $filename = (split('/', $filepath))[-1]; # Get basename |
||||
|
||||
my @exclusion_patterns = ( |
||||
'pgbouncer', # pgbouncer logs |
||||
); |
||||
|
||||
# Check if filename contains any exclusion pattern |
||||
for my $pattern (@exclusion_patterns) { |
||||
return 1 if $filename =~ /\Q$pattern\E/i; |
||||
} |
||||
|
||||
return 0; |
||||
} |
||||
|
||||
# Try to find PostgreSQL log file automatically |
||||
sub find_postgresql_log { |
||||
my @possible_locations = ( |
||||
'/var/log/postgresql/*.log', |
||||
'/var/log/postgresql/postgresql-*.log', |
||||
'/var/lib/pgsql/*/data/log/*.log', |
||||
'/var/lib/postgresql/*/main/log/*.log', |
||||
'/usr/local/pgsql/data/log/*.log', |
||||
'/opt/postgresql/*/data/log/*.log', |
||||
'/var/lib/pgsql/data/pg_log/*.log', |
||||
'/var/lib/postgresql/*/main/pg_log/*.log', |
||||
); |
||||
|
||||
# Also check for PostgreSQL data directory from running process |
||||
my $ps_output = `ps aux 2>/dev/null | grep -E 'postgres.*-D|postmaster.*-D' | grep -v grep`; |
||||
if ($ps_output =~ /-D\s*([^\s]+)/) { |
||||
my $data_dir = $1; |
||||
push @possible_locations, "$data_dir/log/*.log", "$data_dir/pg_log/*.log"; |
||||
} |
||||
|
||||
# Find the most recent log file |
||||
my $most_recent_file; |
||||
my $most_recent_time = 0; |
||||
my @skipped_files = (); |
||||
|
||||
for my $pattern (@possible_locations) { |
||||
my @files = glob($pattern); |
||||
for my $file (@files) { |
||||
# Skip excluded log files |
||||
if (should_exclude_log_file($file)) { |
||||
push @skipped_files, $file; |
||||
next; |
||||
} |
||||
|
||||
if (-f $file && -r $file) { |
||||
my $mtime = (stat($file))[9]; |
||||
if ($mtime > $most_recent_time) { |
||||
$most_recent_time = $mtime; |
||||
$most_recent_file = $file; |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
# Debug output for skipped files |
||||
if ($DEBUG && @skipped_files) { |
||||
print STDERR "DEBUG: Skipped excluded log files:\n"; |
||||
for my $file (@skipped_files) { |
||||
print STDERR " - $file\n"; |
||||
} |
||||
} |
||||
|
||||
return $most_recent_file; |
||||
} |
||||
|
||||
sub normalize_query_fast { |
||||
my ($query) = @_; |
||||
|
||||
# Remove comments (keep these as they're fast) |
||||
$query =~ s/--[^\n]*//g; |
||||
$query =~ s/\/\*.*?\*\///gs; |
||||
|
||||
# Convert to lowercase |
||||
$query = lc($query); |
||||
|
||||
# Fast whitespace normalization using tr/// which is much faster than s/// |
||||
# Replace tabs and newlines with spaces |
||||
$query =~ tr/\t\n\r/ /; |
||||
|
||||
# Collapse multiple spaces to single space |
||||
$query =~ tr/ / /s; |
||||
|
||||
# Trim leading/trailing whitespace (matching pg_stat_statements: space, tab, newline, cr, vertical tab, form feed) |
||||
my $len = length($query); |
||||
if ($len) { |
||||
my $start = 0; |
||||
my $end = $len; |
||||
|
||||
# Find first non-whitespace (quick check for common case: no leading whitespace) |
||||
my $ch = substr($query, 0, 1); |
||||
if ($ch eq ' ' || $ch eq "\t" || $ch eq "\n" || $ch eq "\r" || $ch eq "\x0B" || $ch eq "\f") { |
||||
$start++; |
||||
while ($start < $len) { |
||||
$ch = substr($query, $start, 1); |
||||
last unless $ch eq ' ' || $ch eq "\t" || $ch eq "\n" || $ch eq "\r" || $ch eq "\x0B" || $ch eq "\f"; |
||||
$start++; |
||||
} |
||||
} |
||||
|
||||
# Find last non-whitespace (quick check for common case: no trailing whitespace) |
||||
$ch = substr($query, -1, 1); |
||||
if ($ch eq ' ' || $ch eq "\t" || $ch eq "\n" || $ch eq "\r" || $ch eq "\x0B" || $ch eq "\f") { |
||||
$end--; |
||||
while ($end > $start) { |
||||
$ch = substr($query, $end - 1, 1); |
||||
last unless $ch eq ' ' || $ch eq "\t" || $ch eq "\n" || $ch eq "\r" || $ch eq "\x0B" || $ch eq "\f"; |
||||
$end--; |
||||
} |
||||
} |
||||
|
||||
# Single substr only if trimming needed |
||||
$query = substr($query, $start, $end - $start) if $start > 0 || $end < $len; |
||||
} |
||||
|
||||
# Only normalize operators if deep search mode is enabled |
||||
if ($deep_search) { |
||||
# This is the expensive operation - only do it if needed |
||||
$query =~ s/\s*([(),=<>!+\-*\/])\s*/$1/g; |
||||
} |
||||
|
||||
return $query; |
||||
} |
||||
|
||||
sub query_to_pattern { |
||||
my ($query) = @_; |
||||
|
||||
# Escape special regex characters |
||||
my $pattern = quotemeta($query); |
||||
|
||||
# Handle IN clauses - simplified regex |
||||
$pattern =~ s/\\bin\\\s*\\\([^)]+\\\)/in\\s*\\([^)]+\\)/gi; |
||||
|
||||
# Replace values with simplified patterns |
||||
# Use \S+ (non-whitespace) instead of character class |
||||
# Numbers |
||||
$pattern =~ s/(?<!\\)\\-?\\d+(?:\\\\.\\d+)?/\\S+/g; |
||||
|
||||
# Quoted strings - match anything that's not comma or close paren |
||||
$pattern =~ s/\\'[^']*\\'/[^,)]+/g; |
||||
|
||||
# Parameters - match non-whitespace |
||||
$pattern =~ s/\\\$\d+/\\S+/g; |
||||
|
||||
return $pattern; |
||||
} |
||||
|
||||
# matching function with pre-computed patterns |
||||
sub queries_match { |
||||
my ($log_query, $normalized_search, $search_pattern, $search_structure, $partial_match) = @_; |
||||
|
||||
# Direct match first (fastest) |
||||
return 1 if $log_query eq $normalized_search; |
||||
|
||||
# Partial match mode - check if search query is a prefix of log query |
||||
# (useful when queries are truncated due to track_activity_query_size) |
||||
if ($partial_match) { |
||||
# Check if normalized_search is a prefix of log_query |
||||
my $search_len = length($normalized_search); |
||||
if (length($log_query) >= $search_len) { |
||||
return 1 if substr($log_query, 0, $search_len) eq $normalized_search; |
||||
} |
||||
# Also check reverse - if log_query is a prefix of search |
||||
my $log_len = length($log_query); |
||||
if (length($normalized_search) >= $log_len) { |
||||
return 1 if substr($normalized_search, 0, $log_len) eq $log_query; |
||||
} |
||||
} |
||||
|
||||
# Quick structure check - compare lengths (skip if deep search enabled) |
||||
if (!$deep_search) { |
||||
my $len_diff = abs(length($log_query) - length($normalized_search)); |
||||
# In partial match mode, allow larger length differences |
||||
my $max_diff = $partial_match ? length($normalized_search) : length($normalized_search) * 0.5; |
||||
return 0 if $len_diff > $max_diff; # Too different |
||||
} |
||||
|
||||
# Pattern matching - use pre-computed search pattern |
||||
return 1 if $log_query =~ /^$search_pattern$/i; |
||||
|
||||
# Check if log query matches search as pattern |
||||
my $log_pattern = query_to_pattern($log_query); |
||||
return 1 if $normalized_search =~ /^$log_pattern$/i; |
||||
|
||||
# Structure comparison - only enabled with deep search flag |
||||
if ($deep_search) { |
||||
# Use pre-computed search_structure |
||||
my $log_structure = $log_query; |
||||
|
||||
# Only compute structure for log query |
||||
# Replace parameters FIRST, before other replacements |
||||
$log_structure =~ s/\bin\s*\([^)]+\)/in(__)/gi; |
||||
$log_structure =~ s/\$\d+/__/g; |
||||
$log_structure =~ s/-?\d+(?:\.\d+)?/__/g; |
||||
$log_structure =~ s/'[^']*'/__/g; |
||||
$log_structure =~ s/\bnull\b/__/gi; |
||||
|
||||
if ($partial_match) { |
||||
# In partial match mode, check if one is a prefix of the other |
||||
my $search_len = length($search_structure); |
||||
my $log_len = length($log_structure); |
||||
if ($log_len >= $search_len) { |
||||
return 1 if substr($log_structure, 0, $search_len) eq $search_structure; |
||||
} |
||||
if ($search_len >= $log_len) { |
||||
return 1 if substr($search_structure, 0, $log_len) eq $log_structure; |
||||
} |
||||
} else { |
||||
return $search_structure eq $log_structure; |
||||
} |
||||
} |
||||
|
||||
return 0; |
||||
} |
||||
|
||||
# search function - accepts filehandle |
||||
sub search_log { |
||||
my ($fh, $normalized_search, $search_pattern, $search_structure, $output_fh, $file_size, $partial_match) = @_; |
||||
|
||||
my $matches_found = 0; |
||||
my $total_match_size = 0; |
||||
my $limit_reached = ''; |
||||
my @current_statement_lines; |
||||
my $in_statement = 0; |
||||
my $lines_processed = 0; |
||||
my $bytes_read = 0; |
||||
my $last_progress = 0; |
||||
|
||||
# Pre-compile the regex for statement detection |
||||
my $statement_regex = qr/LOG:\s*(?:duration:\s*[\d.]+\s*ms\s*)?\s*(?:statement|execute\s+\S+):\s*/i; |
||||
my $continuation_regex = qr/^\s+\S/; |
||||
my $detail_regex = qr/DETAIL:/i; # Match DETAIL anywhere in line (has timestamp prefix) |
||||
|
||||
# Timing for matching |
||||
my $match_time = 0; |
||||
my $read_time = 0; |
||||
|
||||
while (1) { |
||||
my $read_start = time(); |
||||
my $line = <$fh>; |
||||
last unless defined $line; |
||||
$read_time += time() - $read_start; |
||||
|
||||
chomp $line; |
||||
|
||||
# Trim excessively long lines to prevent memory/performance issues |
||||
if (length($line) > $MAX_LINE_LENGTH) { |
||||
$line = substr($line, 0, $MAX_LINE_LENGTH); |
||||
} |
||||
|
||||
$lines_processed++; |
||||
$bytes_read += length($line) if $file_size; |
||||
|
||||
# Check for interrupt |
||||
last if $interrupted; |
||||
|
||||
# Progress reporting every 10,000 lines |
||||
if ($lines_processed % 10000 == 0) { |
||||
if ($file_size) { |
||||
# File with known size - show percentage |
||||
my $progress = ($bytes_read / $file_size) * 100; |
||||
if ($progress - $last_progress > 1) { # Update every 1% |
||||
printf STDERR "\rProgress: %.1f%% (%d lines, %d matches)", $progress, $lines_processed, $matches_found; |
||||
$last_progress = $progress; |
||||
} |
||||
} else { |
||||
# stdin or unknown size - show line count and matches |
||||
printf STDERR "\rProgress: %s lines, %d matches", format_number($lines_processed), $matches_found; |
||||
} |
||||
} |
||||
|
||||
# Check limits before processing more |
||||
if ($matches_found >= $max_matches) { |
||||
$limit_reached = 'max_matches'; |
||||
last; |
||||
} |
||||
if ($total_match_size >= $max_matches_size) { |
||||
$limit_reached = 'max_size'; |
||||
last; |
||||
} |
||||
|
||||
# Check if this is a log line with a statement |
||||
if ($line =~ $statement_regex) { |
||||
# Process previous statement if exists |
||||
if (@current_statement_lines) { |
||||
my $match_start = time(); |
||||
my $match_size = check_and_print_match(\@current_statement_lines, $normalized_search, $search_pattern, $search_structure, \$matches_found, $output_fh, $partial_match); |
||||
$total_match_size += $match_size; |
||||
$match_time += time() - $match_start; |
||||
} |
||||
|
||||
# Start new statement |
||||
@current_statement_lines = ($line); |
||||
$in_statement = 1; |
||||
|
||||
} elsif ($in_statement) { |
||||
# Check if this is a continuation of the statement |
||||
if ($line =~ $continuation_regex || $line =~ $detail_regex || $line =~ /^\s*$/) { |
||||
push @current_statement_lines, $line; |
||||
} else { |
||||
# End of statement - process it |
||||
my $match_start = time(); |
||||
my $match_size = check_and_print_match(\@current_statement_lines, $normalized_search, $search_pattern, $search_structure, \$matches_found, $output_fh, $partial_match); |
||||
$total_match_size += $match_size; |
||||
$match_time += time() - $match_start; |
||||
@current_statement_lines = (); |
||||
$in_statement = 0; |
||||
} |
||||
} |
||||
} |
||||
|
||||
# Process last statement if exists (unless we hit a limit) |
||||
if (@current_statement_lines && !$limit_reached) { |
||||
my $match_start = time(); |
||||
my $match_size = check_and_print_match(\@current_statement_lines, $normalized_search, $search_pattern, $search_structure, \$matches_found, $output_fh, $partial_match); |
||||
$total_match_size += $match_size; |
||||
$match_time += time() - $match_start; |
||||
} |
||||
|
||||
# Clear progress line (always clear if we showed progress) |
||||
print STDERR "\r" . " " x 80 . "\r" if $lines_processed > 0; |
||||
|
||||
# Report if limits were reached |
||||
if ($limit_reached eq 'max_matches') { |
||||
print STDERR "\nReached maximum match limit ($max_matches). Stopping search.\n"; |
||||
} elsif ($limit_reached eq 'max_size') { |
||||
my $size_mb = sprintf("%.2f", $max_matches_size / (1024*1024)); |
||||
print STDERR "\nReached maximum match size (${size_mb}MB). Stopping search.\n"; |
||||
} |
||||
|
||||
# Store detailed timings |
||||
$timers{read_time} = $read_time; |
||||
$timers{match_time} = $match_time; |
||||
$timers{lines_processed} = $lines_processed; |
||||
|
||||
return $matches_found; |
||||
} |
||||
|
||||
# Replace $N parameters with actual values |
||||
sub replace_parameters { |
||||
my ($lines_ref) = @_; |
||||
|
||||
# Find DETAIL line with parameters |
||||
my $detail_idx = -1; |
||||
for (my $i = 0; $i < @$lines_ref; $i++) { |
||||
if ($lines_ref->[$i] =~ /DETAIL:\s*parameters:/i) { |
||||
$detail_idx = $i; |
||||
last; |
||||
} |
||||
} |
||||
|
||||
return 0 unless $detail_idx >= 0; |
||||
|
||||
my $params_line = $lines_ref->[$detail_idx]; |
||||
|
||||
# Parse parameters from DETAIL line |
||||
if ($params_line =~ /parameters:\s*(.+)$/i) { |
||||
my $params_str = $1; |
||||
my %params; |
||||
|
||||
# Parse parameters - handle various formats |
||||
while ($params_str =~ /\$(\d+)\s*=\s*('(?:[^'\\]|\\.)*'|NULL|[^,]+?)(?:\s*,\s*|\s*$)/g) { |
||||
my $param_num = $1; |
||||
my $param_value = $2; |
||||
# Trim whitespace from value |
||||
$param_value =~ s/^\s+|\s+$//g; |
||||
$params{$param_num} = $param_value; |
||||
} |
||||
|
||||
# Replace parameters in all lines |
||||
for (my $i = 0; $i < @$lines_ref; $i++) { |
||||
next if $i == $detail_idx; # Skip the DETAIL line itself |
||||
|
||||
# Replace each parameter |
||||
foreach my $num (keys %params) { |
||||
$lines_ref->[$i] =~ s/\$$num\b/$params{$num}/g; |
||||
} |
||||
} |
||||
|
||||
# Remove the DETAIL line from output |
||||
splice(@$lines_ref, $detail_idx, 1); |
||||
return 1; |
||||
} |
||||
|
||||
return 0; |
||||
} |
||||
|
||||
# Check if statement matches and print if it does |
||||
# Returns the size of the match in bytes (0 if no match) |
||||
sub check_and_print_match { |
||||
my ($lines, $normalized_search, $search_pattern, $search_structure, $matches_ref, $output_fh, $partial_match) = @_; |
||||
|
||||
return 0 unless @$lines; |
||||
|
||||
# Extract just the query text from the lines |
||||
my $query_text = ''; |
||||
my $first_line = 1; |
||||
|
||||
for my $line (@$lines) { |
||||
if ($first_line && $line =~ /LOG:\s*(?:duration:\s*[\d.]+\s*ms\s*)?\s*(?:statement|execute\s+\S+):\s*(.*)$/i) { |
||||
$query_text = $1; |
||||
$first_line = 0; |
||||
} elsif (!$first_line && $line !~ /^\s*DETAIL:/i) { |
||||
# Continuation line |
||||
if ($line =~ /^\s+(.*)/) { |
||||
$query_text .= "\n" . $1; |
||||
} elsif ($line =~ /^\s*$/) { |
||||
$query_text .= "\n"; |
||||
} |
||||
} |
||||
} |
||||
|
||||
return 0 unless $query_text; |
||||
|
||||
# Normalize the log query |
||||
my $normalized_log = normalize_query_fast($query_text); |
||||
|
||||
# Check if queries match - using pre-computed patterns |
||||
if (queries_match($normalized_log, $normalized_search, $search_pattern, $search_structure, $partial_match)) { |
||||
$$matches_ref++; |
||||
|
||||
# Make a copy of lines for output |
||||
my @output_lines = @$lines; |
||||
|
||||
# Replace parameters if found and remove DETAIL line |
||||
replace_parameters(\@output_lines); |
||||
|
||||
# Print the matched lines |
||||
my $output = join("\n", @output_lines) . "\n\n"; |
||||
print $output_fh $output; |
||||
|
||||
# In live mode, also print to stdout immediately |
||||
if ($live_output) { |
||||
print STDOUT $output; |
||||
} |
||||
|
||||
return length($output); |
||||
} |
||||
|
||||
return 0; |
||||
} |
||||
|
||||
# Print help information |
||||
sub print_help { |
||||
print <<'HELP'; |
||||
pg_log_search - Fast PostgreSQL log query search tool |
||||
|
||||
USAGE: |
||||
pg_log_search [OPTIONS] [logfile] |
||||
cat logfile | pg_log_search [OPTIONS] |
||||
|
||||
DESCRIPTION: |
||||
Searches PostgreSQL log files for specific SQL queries. Handles multi-line |
||||
queries, parameter substitution, and various log formats. Optimized for |
||||
speed with large log files. |
||||
|
||||
OPTIONS: |
||||
-h, --help Show this help message |
||||
-v, --version Show version number |
||||
-d, --deep-search Enable thorough matching with operator normalization |
||||
and structure comparison (slower but more accurate). |
||||
Disables length-based quick rejection optimization. |
||||
-p, --partial-match Enable partial matching for queries truncated at the end |
||||
(useful when queries are cut off due to |
||||
track_activity_query_size limit). Allows matching |
||||
when search query or log query is incomplete. |
||||
-l, --live Show matches in real-time as they're found |
||||
-m, --max-matches N Stop after N matches (default: 10000) |
||||
-z, --max-matches-size SIZE |
||||
Stop when total match size reaches SIZE |
||||
(default: 50M). Accepts K, M, G suffixes. |
||||
|
||||
ENVIRONMENT VARIABLES: |
||||
DEBUG=1 Show detailed timing information |
||||
|
||||
EXAMPLES: |
||||
# Auto-detect most recent PostgreSQL log and search in it (prompts for query) |
||||
pg_log_search |
||||
|
||||
# Search specific log file |
||||
pg_log_search /var/log/postgresql/postgresql.log |
||||
|
||||
# Search with log from stdin (query entered interactively) |
||||
cat postgresql.log | pg_log_search |
||||
|
||||
# Search with query from stdin (log file specified) |
||||
pg_log_search /var/log/postgresql/postgresql.log < query.sql |
||||
|
||||
# Monitor live log with tail -f and show matches in real-time |
||||
tail -f /var/log/postgresql/postgresql.log | pg_log_search --live |
||||
|
||||
# Search log file and show matches as they're found |
||||
pg_log_search --live /var/log/postgresql/postgresql.log |
||||
|
||||
# Limit results and use deep search |
||||
pg_log_search -d -m 100 postgresql.log |
||||
|
||||
# Size limit with live output |
||||
pg_log_search -l --max-matches-size 10M postgresql.log |
||||
|
||||
# Search with partial matching for truncated queries |
||||
pg_log_search --partial-match postgresql.log |
||||
|
||||
QUERY INPUT: |
||||
After starting, paste your FULL SQL query and press Ctrl-D when done. |
||||
Multi-line queries are fully supported. |
||||
|
||||
IMPORTANT: Query fragments won't match - you must provide the complete query |
||||
from beginning to end. If your query might be truncated at the end (e.g., due |
||||
to track_activity_query_size limit), use the --partial-match option. |
||||
|
||||
Note: Queries can contain parameter placeholders ($1, $2, ..., $N) as from |
||||
pg_stat_statements. The tool will match them against actual parameter values |
||||
in the log entries. |
||||
|
||||
INTERRUPTING: |
||||
Press Ctrl+C during search to stop and view partial results found so far. |
||||
|
||||
OUTPUT: |
||||
Matched queries are displayed with: |
||||
- Original log line with timestamp and duration |
||||
- Parameter values substituted for $1, $2, etc. |
||||
- Clean formatting (DETAIL lines removed) |
||||
|
||||
HELP |
||||
} |
||||
Loading…
Reference in new issue