Context Navigation

source: memlog_analyze @ 9fcaba3

Revision 9fcaba3, 10.1 KB checked in by Hal Finkel <hfinkel@…>, 10 years ago (diff)
add leak-finding mode
Property mode set to `100755`

Line
1	#! /usr/bin/env perl
2	use strict;
3	use File::Basename;
4	use Getopt::Long;
5
6	my $find_leaks = 0;
7	my $print_raw_proc_name = 0;
8	my $quiet = 0;
9	my $help = 0;
10
11	sub print_usage($) {
12	my $ec = $_[0];
13	my $usage = <<EOM;
14	Usage: $0 [options] <memlog file> [<output directory>]
15	options:
16	--leaks
17	Provide information on leaks instead of peak usage
18	--print-symbol-names
19	Include symbol names and offsets in the output
20	--quiet or -q
21	Don't print status messages while running
22	EOM
23
24	print $usage;
25	exit($ec);
26	}
27
28	GetOptions("help\|h\|?" => \$help,
29	"leaks" => \$find_leaks,
30	"print-symbol-names" => \$print_raw_proc_name,
31	"quiet\|q" => \$quiet)
32	or print_usage(1);
33
34	if ($help) {
35	print_usage(0);
36	}
37
38	my $memlog_fn = $ARGV[0];
39	my $out_dir = $ARGV[1] \|\| '.';
40
41	if (! -f $memlog_fn) {
42	print_usage(1);
43	}
44
45	# The version of addr2line and friends that you use can make a big difference,
46	# especially on BE ppc64, where older versions of addr2line did not account
47	# correctly for the function descriptor setup. At ALCF, on the BG/Q, we have
48	# newer versions not in the default search path (from bgclang).
49	my $alcf_bu_dir = '/soft/compilers/bgclang/current/binutils/bin';
50	if (-d $alcf_bu_dir) {
51	$ENV{'PATH'} = $alcf_bu_dir . ':' . $ENV{'PATH'};
52	}
53
54	open(MEMLOG, $memlog_fn) \|\| die "Can't open $memlog_fn: $!";
55
56	# The first step is to determine the high-water mark.
57	my $max_rss = 0;
58	foreach my $line (<MEMLOG>) {
59	chomp($line);
60	my @parts = split(/\t/, $line);
61
62	my $op = shift(@parts);
63	my $state = shift(@parts);
64
65	my ($time, $then_max_rss, $tid) = split(/\s+/, $state);
66	if ($max_rss < $then_max_rss) {
67	$max_rss = $then_max_rss;
68	}
69	}
70
71	seek(MEMLOG, 0, 0);
72
73	# Scan the log for malloc/free pairings. We're interested only in active
74	# allocations at the time when the rss reaches the final maxrss.
75	# If we're finding leaks, then go to the very end.
76	my $active_alloc_time = 0;
77	my %malloc_lines;
78	foreach my $line (<MEMLOG>) {
79	chomp($line);
80	my @parts = split(/\t/, $line);
81
82	my $op = shift(@parts);
83	my $state = shift(@parts);
84
85	if ($op =~ /^M:/) {
86	my ($size, $ptr) = ($op =~ /^M: (\d+) 0x(\w+)/);
87	$malloc_lines{$ptr} = $line;
88	} elsif ($op =~ /^F:/) {
89	my ($ptr) = ($op =~ /^F: 0x(\w+)/);
90	delete $malloc_lines{$ptr};
91	} else {
92	next;
93	}
94
95	my ($time, $then_max_rss, $tid) = split(/\s+/, $state);
96	$active_alloc_time = $time;
97
98	if (!$find_leaks) {
99	# If we've reached the max rss, we've seen all we need to see.
100	if ($then_max_rss == $max_rss) {
101	last;
102	}
103	}
104	}
105
106	close(MEMLOG);
107
108	# Convert maxrss, currently in KB, to bytes.
109	$max_rss *= 1024;
110
111	my $total_size = 0;
112	my %roots;
113	my %all_nodes;
114	foreach my $line (values %malloc_lines) {
115	my @parts = split(/\t/, $line);
116
117	my $op = shift(@parts);
118	my $state = shift(@parts);
119
120	# Only dealing with allocations here...
121	if ($op !~ /^M:/) {
122	next;
123	}
124
125	my ($size, $ptr) = ($op =~ /^M: (\d+) 0x(\w+)/);
126	my ($time, $then_max_rss, $tid) = split(/\s+/, $state);
127
128	$total_size += $size;
129
130	sub level_parts($) {
131	my $level = @_[0];
132	my ($file_name, $proc_name, $off, $pc, $relpc) =
133	($level =~ /^(.) \((.)\+0x(\w+)\) \[0x(\w+) \(0x(\w+)\)\]/);
134
135	return ($file_name, $proc_name, $off, $pc, $relpc);
136	}
137
138	# Put the top of the stack first.
139	@parts = reverse(@parts);
140
141	my $parent = \%roots;
142	for (my $i = 0; $i < scalar(@parts); ++$i) {
143	my $level = $parts[$i];
144	my ($file_name, $proc_name, $off, $pc, $relpc) = level_parts($level);
145
146	# Skip this level if we don't even know from what file it came.
147	if ($file_name eq '?') {
148	next;
149	}
150
151	# print STDERR "parsed: $file_name, $proc_name, $off, $pc, $relpc\n";
152
153	if (!exists $all_nodes{$pc}) {
154	$all_nodes{$pc}->{'file_name'} = $file_name;
155	$all_nodes{$pc}->{'proc_name'} = $proc_name;
156	$all_nodes{$pc}->{'off'} = $off;
157	$all_nodes{$pc}->{'pc'} = $pc;
158	$all_nodes{$pc}->{'relpc'} = $relpc;
159	}
160
161	if (!exists $parent->{$pc}) {
162	$parent->{$pc} = $all_nodes{$pc};
163	}
164
165	$parent->{$pc}->{'size'} += $size;
166
167	my ($next_file_name, $next_proc_name, $next_off, $next_pc, $next_relpc);
168	if ($i < scalar(@parts)-1) {
169	my $next_level = $parts[$i+1];
170	($next_file_name, $next_proc_name, $next_off, $next_pc, $next_relpc) =
171	level_parts($next_level);
172	$parent->{$pc}->{'child_sizes'}->{$next_pc} += $size;
173	}
174
175	if (!exists $parent->{'children'}) {
176	$parent->{'children'} = {};
177	}
178
179	$parent = $parent->{'children'};
180	}
181	}
182
183	my $txt_fn = "$out_dir/" . basename($memlog_fn) . ".txt";
184	my $dot_fn = "$out_dir/" . basename($memlog_fn) . ".dot";
185	my $ps_fn = "$out_dir/" . basename($memlog_fn) . ".ps";
186	my $pdf_fn = "$out_dir/" . basename($memlog_fn) . ".pdf";
187
188	if (!$quiet) {
189	print "Creating $txt_fn\n";
190	}
191
192	open(TXT, ">$txt_fn") \|\| die "Can't open $txt_fn: $!";
193
194	if (!$quiet) {
195	print "Creating $dot_fn\n";
196	}
197
198	open(DOT, ">$dot_fn") \|\| die "Can't open $dot_fn: $!";
199
200	sub format_bytes($) {
201	my @sizes = qw( B KB MB GB TB PB );
202	my $size = $_[0];
203
204	my $i = 0;
205	while ($size > 1024) {
206	$size /= 1024;
207	++$i;
208	}
209
210	return sprintf("%.3f $sizes[$i]", $size);
211	}
212
213	print DOT ("digraph \"memlog\" {\n");
214	print DOT ("size=\"8,11\";\n");
215	print DOT ("node [width=0.375,height=0.25];\n");
216
217	my $find_type = $find_leaks ? " (leaks)" : "";
218	printf DOT ("Legend [shape=box, fontsize=100, shape=oval," .
219	"label=\"Total: %s active$find_type at maxrss = %s after %s s\"];\n",
220	format_bytes($total_size), format_bytes($max_rss),
221	$active_alloc_time);
222
223	printf TXT ("memlog: Total: %s active$find_type at maxrss = %s after %s s\n\n",
224	format_bytes($total_size), format_bytes($max_rss),
225	$active_alloc_time);
226
227	my %cached_names;
228	sub get_name($) {
229	my $node = $_[0];
230	my $pc = $node->{'pc'};
231
232	if (exists $cached_names{$pc}) {
233	return $cached_names{$pc};
234	}
235
236	my $ret = '';
237
238	# Prefer the relative offset (that is what we want for shared libraries), but
239	# if is not available, use the full offset (which is what we want for the
240	# base executable).
241	my $exe_off = $node->{'relpc'};
242	if (!$exe_off) {
243	$exe_off = $pc;
244	}
245
246	my $file_name = $node->{'file_name'};
247
248	# If we don't have an absolute path, this is probably the base dynamic
249	# executable, so ask the shell which one it used (not foolproof because we
250	# might not have the same PATH now, but hopefully is generally the right
251	# thing).
252	if ($file_name !~ /^\//) {
253	$file_name = `which '$file_name'`;
254	chomp($file_name);
255	}
256
257	my ($func, $loc) = `addr2line -e '$file_name' -f 0x$exe_off`;
258	chomp($func);
259	chomp($loc);
260
261	if ($func !~ /^\?/) {
262	# In general, this function name might look something like:
263	# 00000329.plt_call.wcsnrtombs@@GLIBC_2.3+0
264	$func =~ s/@.*//; # Remove trailing symbol version strings
265	$func =~ s/.*\.//;
266	$func = `c++filt '$func'`;
267	chomp($func);
268
269	# It sometimes happens that addr2line is a bit too smart: when debugging
270	# information is available, it might print a local alias for the
271	# function instead of the full name (for example, printing 'List'
272	# instead of 'Foo::List<int>::List(int, int const&)').
273	if ($node->{'proc_name'} ne '?') {
274	my $proc_name = $node->{'proc_name'};
275	$proc_name = `c++filt '$proc_name'`;
276	chomp($proc_name);
277
278	if (length($proc_name) > length($func)) {
279	$func = $proc_name;
280	}
281	}
282
283	$ret .= $func . '\n';
284
285	if ($loc !~ /^[:?]/) {
286	$ret .= $loc . '\n';
287	}
288	} elsif ($node->{'proc_name'} ne '?') {
289	my $proc_name = $node->{'proc_name'};
290	$proc_name = `c++filt '$proc_name'`;
291	chomp($proc_name);
292
293	$ret .= $proc_name . '\n';
294	}
295
296	$ret .= $node->{'file_name'};
297	if ($print_raw_proc_name and $node->{'proc_name'} ne '?') {
298	$ret .= '\n' . $node->{'proc_name'} . '+0x' . $node->{'off'};
299	}
300
301	$cached_names{$pc} = $ret;
302	return $ret;
303	}
304
305	my $skip_frac = 0.01;
306	my %skipped;
307
308	foreach my $pc (keys %all_nodes) {
309	my $node = $all_nodes{$pc};
310
311	my $local_size = $node->{'size'};
312	if ($local_size * 1.0 / $total_size < $skip_frac) {
313	$skipped{$pc} = 1;
314	next;
315	}
316
317	my $fs = 8.0;
318	if ($local_size > 0) {
319	$fs = 50.0 * (abs($local_size * 1.0 / $total_size))**0.125;
320	}
321
322	my $name = get_name($node);
323
324	printf DOT ("N%s [label=\"%s\\n%s\", shape=box, fontsize=%.1f%s];\n",
325	$pc, $name, format_bytes($local_size), $fs);
326	}
327
328	foreach my $pc (keys %all_nodes) {
329	if ($skipped{$pc}) {
330	next;
331	}
332
333	my $node = $all_nodes{$pc};
334	my $local_size = $node->{'size'};
335
336	foreach my $cpc (keys %{$node->{'child_sizes'}}) {
337	if ($skipped{$cpc}) {
338	next;
339	}
340
341	my $child_size = $node->{'child_sizes'}->{$cpc};
342	my $frac = $child_size * 1.0 / $local_size;
343
344	my $weight = 100.0 * sqrt($frac);
345	my $style = sprintf("setlinewidth(%f)", 8.0 * sqrt($frac));
346
347	my $fs = 40.0 * $frac**0.125;
348
349	printf DOT ("N%s -> N%s [label=\"%s\", weight=%d, style=\"%s\", fontsize=%.1f];\n",
350	$pc, $cpc, format_bytes($child_size), $weight, $style, $fs);
351	}
352	}
353
354	print DOT ("}\n");
355
356	foreach my $pc (sort { $all_nodes{$::b}->{'size'} <=>
357	$all_nodes{$::a}->{'size'} } keys %all_nodes) {
358	if ($skipped{$pc}) {
359	next;
360	}
361
362	print TXT ('*' x 80) . "\n\n";
363
364	my $node = $all_nodes{$pc};
365
366	my $local_size = $node->{'size'};
367	printf TXT ("%s - %.1f%%\n", format_bytes($local_size),
368	$local_size * 100.0 / $total_size);
369
370	my $name = get_name($node);
371	$name =~ s/\\n/\n/g;
372
373	print TXT "$name\n$pc\n";
374
375	print TXT "\nMEMORY ALLOCATED BY CALLEES:\n";
376
377	foreach my $cpc (sort { $node->{'child_sizes'}->{$::b} <=>
378	$node->{'child_sizes'}->{$::a} }
379	keys %{$node->{'child_sizes'}}) {
380	if ($skipped{$cpc}) {
381	next;
382	}
383
384	my $child_node = $all_nodes{$cpc};
385	my $child_size = $node->{'child_sizes'}->{$cpc};
386
387	printf TXT ("\t%s - %.1f%%\n", format_bytes($child_size),
388	$child_size * 100.0 / $local_size);
389
390	my $child_name = get_name($child_node);
391	$child_name =~ s/\\n/\n\t/g;
392	print TXT "\t$child_name\n\t$cpc\n";
393
394	print TXT "\n";
395	}
396
397	print TXT "\n";
398	}
399
400	close(TXT);
401	close(DOT);
402
403	if (!$quiet) {
404	print "Creating $ps_fn\n";
405	}
406
407	system("dot -Tps2 < '$dot_fn' > '$ps_fn'");
408
409	if (!$quiet) {
410	print "Creating $pdf_fn\n";
411	}
412
413	system("ps2pdf '$ps_fn' '$pdf_fn'");
414
415	exit 0;
416

Note: See TracBrowser for help on using the repository browser.

Download in other formats: