#! /usr/bin/env perl use strict; use File::Basename; use Getopt::Long; my $print_raw_proc_name = 0; my $quiet = 0; my $help = 0; sub print_usage($) { my $ec = $_[0]; my $usage = < [] options: --print-symbol-names Include symbol names and offsets in the output --quiet or -q Don't print status messages while running EOM print $usage; exit($ec); } GetOptions("help|?" => \$help, "print-symbol-names" => \$print_raw_proc_name, "quiet|q" => \$quiet) or print_usage(1); if ($help) { print_usage(0); } my $memlog_fn = $ARGV[0]; my $out_dir = $ARGV[1] || '.'; if (! -f $memlog_fn) { print_usage(1); } # The version of addr2line and friends that you use can make a big difference, # especially on BE ppc64, where older versions of addr2line did not account # correctly for the function descriptor setup. At ALCF, on the BG/Q, we have # newer versions not in the default search path (from bgclang). my $alcf_bu_dir = '/soft/compilers/bgclang/current/binutils/bin'; if (-d $alcf_bu_dir) { $ENV{'PATH'} = $alcf_bu_dir . ':' . $ENV{'PATH'}; } open(MEMLOG, $memlog_fn) || die "Can't open $memlog_fn: $!"; # The first step is to determine the high-water mark. my $max_rss = 0; foreach my $line () { chomp($line); my @parts = split(/\t/, $line); my $op = shift(@parts); my $state = shift(@parts); my ($time, $then_max_rss, $tid) = split(/\s+/, $state); if ($max_rss < $then_max_rss) { $max_rss = $then_max_rss; } } seek(MEMLOG, 0, 0); # Scan the log for malloc/free pairings. We're interested only in active # allocations at the time when the rss reaches the final maxrss. my $max_rss_time = 0; my %malloc_lines; foreach my $line () { chomp($line); my @parts = split(/\t/, $line); my $op = shift(@parts); my $state = shift(@parts); if ($op =~ /^M:/) { my ($size, $ptr) = ($op =~ /^M: (\d+) 0x(\w+)/); $malloc_lines{$ptr} = $line; } elsif ($op =~ /^F:/) { my ($ptr) = ($op =~ /^F: 0x(\w+)/); delete $malloc_lines{$ptr}; } else { next; } # If we've reached the max rss, we've seen all we need to see. my ($time, $then_max_rss, $tid) = split(/\s+/, $state); $max_rss_time = $time; if ($then_max_rss == $max_rss) { last; } } close(MEMLOG); # Convert maxrss, currently in KB, to bytes. $max_rss *= 1024; my $total_size = 0; my %roots; my %all_nodes; foreach my $line (values %malloc_lines) { my @parts = split(/\t/, $line); my $op = shift(@parts); my $state = shift(@parts); # Only dealing with allocations here... if ($op !~ /^M:/) { next; } my ($size, $ptr) = ($op =~ /^M: (\d+) 0x(\w+)/); my ($time, $then_max_rss, $tid) = split(/\s+/, $state); $total_size += $size; sub level_parts($) { my $level = @_[0]; my ($file_name, $proc_name, $off, $pc, $relpc) = ($level =~ /^(.*) \((.*)\+0x(\w+)\) \[0x(\w+) \(0x(\w+)\)\]/); return ($file_name, $proc_name, $off, $pc, $relpc); } # Put the top of the stack first. @parts = reverse(@parts); my $parent = \%roots; for (my $i = 0; $i < scalar(@parts); ++$i) { my $level = $parts[$i]; my ($file_name, $proc_name, $off, $pc, $relpc) = level_parts($level); # Skip this level if we don't even know from what file it came. if ($file_name eq '?') { next; } # print STDERR "parsed: $file_name, $proc_name, $off, $pc, $relpc\n"; if (!exists $all_nodes{$pc}) { $all_nodes{$pc}->{'file_name'} = $file_name; $all_nodes{$pc}->{'proc_name'} = $proc_name; $all_nodes{$pc}->{'off'} = $off; $all_nodes{$pc}->{'pc'} = $pc; $all_nodes{$pc}->{'relpc'} = $relpc; } if (!exists $parent->{$pc}) { $parent->{$pc} = $all_nodes{$pc}; } $parent->{$pc}->{'size'} += $size; my ($next_file_name, $next_proc_name, $next_off, $next_pc, $next_relpc); if ($i < scalar(@parts)-1) { my $next_level = $parts[$i+1]; ($next_file_name, $next_proc_name, $next_off, $next_pc, $next_relpc) = level_parts($next_level); $parent->{$pc}->{'child_sizes'}->{$next_pc} += $size; } if (!exists $parent->{'children'}) { $parent->{'children'} = {}; } $parent = $parent->{'children'}; } } my $txt_fn = "$out_dir/" . basename($memlog_fn) . ".txt"; my $dot_fn = "$out_dir/" . basename($memlog_fn) . ".dot"; my $ps_fn = "$out_dir/" . basename($memlog_fn) . ".ps"; my $pdf_fn = "$out_dir/" . basename($memlog_fn) . ".pdf"; if (!$quiet) { print "Creating $txt_fn\n"; } open(TXT, ">$txt_fn") || die "Can't open $txt_fn: $!"; if (!$quiet) { print "Creating $dot_fn\n"; } open(DOT, ">$dot_fn") || die "Can't open $dot_fn: $!"; sub format_bytes($) { my @sizes = qw( B KB MB GB TB PB ); my $size = $_[0]; my $i = 0; while ($size > 1024) { $size /= 1024; ++$i; } return sprintf("%.3f $sizes[$i]", $size); } print DOT ("digraph \"memlog\" {\n"); print DOT ("size=\"8,11\";\n"); print DOT ("node [width=0.375,height=0.25];\n"); printf DOT ("Legend [shape=box, fontsize=100, shape=oval," . "label=\"Total: %s active at maxrss = %s after %s s\"];\n", format_bytes($total_size), format_bytes($max_rss), $max_rss_time); printf TXT ("memlog: Total: %s active at maxrss = %s after %s s\n\n", format_bytes($total_size), format_bytes($max_rss), $max_rss_time); my %cached_names; sub get_name($) { my $node = $_[0]; my $pc = $node->{'pc'}; if (exists $cached_names{$pc}) { return $cached_names{$pc}; } my $ret = ''; # Prefer the relative offset (that is what we want for shared libraries), but # if is not available, use the full offset (which is what we want for the # base executable). my $exe_off = $node->{'relpc'}; if (!$exe_off) { $exe_off = $pc; } my $file_name = $node->{'file_name'}; # If we don't have an absolute path, this is probably the base dynamic # executable, so ask the shell which one it used (not foolproof because we # might not have the same PATH now, but hopefully is generally the right # thing). if ($file_name !~ /^\//) { $file_name = `which '$file_name'`; chomp($file_name); } my ($func, $loc) = `addr2line -e '$file_name' -f 0x$exe_off`; chomp($func); chomp($loc); if ($func !~ /^\?/) { # In general, this function name might look something like: # 00000329.plt_call.wcsnrtombs@@GLIBC_2.3+0 $func =~ s/@.*//; # Remove trailing symbol version strings $func =~ s/.*\.//; $func = `c++filt '$func'`; chomp($func); # It sometimes happens that addr2line is a bit too smart: when debugging # information is available, it might print a local alias for the # function instead of the full name (for example, printing 'List' # instead of 'Foo::List::List(int, int const&)'). if ($node->{'proc_name'} ne '?') { my $proc_name = $node->{'proc_name'}; $proc_name = `c++filt '$proc_name'`; chomp($proc_name); if (length($proc_name) > length($func)) { $func = $proc_name; } } $ret .= $func . '\n'; if ($loc !~ /^[:?]/) { $ret .= $loc . '\n'; } } elsif ($node->{'proc_name'} ne '?') { my $proc_name = $node->{'proc_name'}; $proc_name = `c++filt '$proc_name'`; chomp($proc_name); $ret .= $proc_name . '\n'; } $ret .= $node->{'file_name'}; if ($print_raw_proc_name and $node->{'proc_name'} ne '?') { $ret .= '\n' . $node->{'proc_name'} . '+0x' . $node->{'off'}; } $cached_names{$pc} = $ret; return $ret; } my $skip_frac = 0.01; my %skipped; foreach my $pc (keys %all_nodes) { my $node = $all_nodes{$pc}; my $local_size = $node->{'size'}; if ($local_size * 1.0 / $total_size < $skip_frac) { $skipped{$pc} = 1; next; } my $fs = 8.0; if ($local_size > 0) { $fs = 50.0 * (abs($local_size * 1.0 / $total_size))**0.125; } my $name = get_name($node); printf DOT ("N%s [label=\"%s\\n%s\", shape=box, fontsize=%.1f%s];\n", $pc, $name, format_bytes($local_size), $fs); } foreach my $pc (keys %all_nodes) { if ($skipped{$pc}) { next; } my $node = $all_nodes{$pc}; my $local_size = $node->{'size'}; foreach my $cpc (keys %{$node->{'child_sizes'}}) { if ($skipped{$cpc}) { next; } my $child_size = $node->{'child_sizes'}->{$cpc}; my $frac = $child_size * 1.0 / $local_size; my $weight = 100.0 * sqrt($frac); my $style = sprintf("setlinewidth(%f)", 8.0 * sqrt($frac)); my $fs = 40.0 * $frac**0.125; printf DOT ("N%s -> N%s [label=\"%s\", weight=%d, style=\"%s\", fontsize=%.1f];\n", $pc, $cpc, format_bytes($child_size), $weight, $style, $fs); } } print DOT ("}\n"); foreach my $pc (sort { $all_nodes{$::b}->{'size'} <=> $all_nodes{$::a}->{'size'} } keys %all_nodes) { if ($skipped{$pc}) { next; } print TXT ('*' x 80) . "\n\n"; my $node = $all_nodes{$pc}; my $local_size = $node->{'size'}; printf TXT ("%s - %.1f%%\n", format_bytes($local_size), $local_size * 100.0 / $total_size); my $name = get_name($node); $name =~ s/\\n/\n/g; print TXT "$name\n$pc\n"; print TXT "\nMEMORY ALLOCATED BY CALLEES:\n"; foreach my $cpc (sort { $node->{'child_sizes'}->{$::b} <=> $node->{'child_sizes'}->{$::a} } keys %{$node->{'child_sizes'}}) { if ($skipped{$cpc}) { next; } my $child_node = $all_nodes{$cpc}; my $child_size = $node->{'child_sizes'}->{$cpc}; printf TXT ("\t%s - %.1f%%\n", format_bytes($child_size), $child_size * 100.0 / $local_size); my $child_name = get_name($child_node); $child_name =~ s/\\n/\n\t/g; print TXT "\t$child_name\n\t$cpc\n"; print TXT "\n"; } print TXT "\n"; } close(TXT); close(DOT); if (!$quiet) { print "Creating $ps_fn\n"; } system("dot -Tps2 < '$dot_fn' > '$ps_fn'"); if (!$quiet) { print "Creating $pdf_fn\n"; } system("ps2pdf '$ps_fn' '$pdf_fn'"); exit 0;