Changeset 2d9e75d
- Timestamp:
- 07/22/15 14:18:06 (9 years ago)
- Branches:
- master
- Children:
- 96927c5
- Parents:
- 09f3093 (diff), 3c9fc94 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent. - git-author:
- Hal Finkel <hfinkel@…> (07/22/15 14:18:06)
- git-committer:
- Hal Finkel <hfinkel@…> (07/22/15 14:18:06)
- Files:
-
- 1 added
- 3 edited
- 1 moved
Legend:
- Unmodified
- Added
- Removed
-
Makefile
r510af01 r3c9fc94 2 2 CXXFLAGS = -std=gnu++0x -O3 -g 3 3 4 CPPFLAGS = 4 # When compiling with CXX=powerpc64-bgq-linux-g++, we need these: 5 CPPFLAGS = -I/bgsys/drivers/ppcfloor -I/bgsys/drivers/ppcfloor/spi/include/kernel/cnk 6 5 7 LDFLAGS = -lpthread -ldl 6 8 … … 16 18 $(CXX) $(CPPFLAGS) $(CXXFLAGS) $(LDFLAGS) -fPIC -shared -o libmemlog.so memlog.cpp 17 19 18 install: all memlog2dot README 19 cp -a libmemlog.so memlog_s.o memlog2dot README $(DESTDIR)/ 20 install: all memlog_analyze README 21 cp -a libmemlog.so memlog_s.o memlog_analyze README $(DESTDIR)/ 22 echo '-Wl,--wrap,malloc,--wrap,valloc,--wrap,realloc,--wrap,calloc,--wrap,memalign,--wrap,free,--wrap,posix_memalign,--wrap,mmap,--wrap,mmap64,--wrap,munmap $(DESTDIR)/memlog_s.o -lpthread -ldl' > $(DESTDIR)/memlog_s_ld_cmds 20 23 21 24 clean: -
README
r24aa734 r3c9fc94 16 16 -L/path/to/memlog -Wl,-rpath,/path/to/memlog -lmemlog 17 17 18 For statically-linked applications, add the following to your linker flags: 18 For statically-linked applications ld's automatic wrapping functionality is 19 employed, and the exact set of necessary flags is large, so a file named 20 memlog_s_ld_cmds has been provided containing the necessary flags. 19 21 20 -Wl,--wrap,malloc,--wrap,free,--wrap,realloc,--wrap,calloc,--wrap,memalign \ 21 /path/to/memlog/memlog_s.o -lpthread -ldl 22 To your linker flags add: 23 24 `cat /path/to/memlog/memlog_s_ld_cmds` 25 26 or, if your compiler and wrappers support response files (gcc and clang do, for 27 example), simply: 28 29 @/path/to/memlog/memlog_s_ld_cmds 30 31 so your overall linking command might look something like this: 32 33 mpic++ -O3 -g -o my_program my_obj1.o my_obj2.o @/path/to/memlog/memlog_s_ld_cmds 22 34 23 35 ** RUNNING ** … … 29 41 use by running: 30 42 31 /path/to/memlog/memlog 2dot/path/to/HOST.PID.memlog43 /path/to/memlog/memlog_analyze /path/to/HOST.PID.memlog 32 44 33 45 this will generate files named HOST.PID.memlog.dot, HOST.PID.memlog.ps and … … 36 48 in textual form. 37 49 50 If you pass the --leaks option to memlog_analyze, it will provide data on 51 allocations active at the end of the program (leaks) instead of those active 52 when the peak memory usage is first reached. 53 54 You might have many runs of the same application (or output from many ranks of 55 an MPI job), and you'd like to pick the one for analysis with the highest 56 memory usage. If you provide a glob pattern to memlog_analyze it will do this 57 for you. Make sure you quote the glob pattern so that your shell does not 58 expand it. 59 60 /path/to/memlog/memlog_analyze "/path/to/*.memlog" 61 62 When running under common batch systems, the files are named 63 JOB_ID.HOST.PID.memlog, and when running under the BG/Q CNK, the process's rank 64 is used instead of the node-local PID. 65 38 66 Note that te peak memory usage is determined by monitoring the processes's 39 67 maximum resident set size, not just the total allocated heap memory. 40 68 41 memlog2dot depends on dot (from the graphviz package) and ps2pdf (from the 69 memlog_analyze takes, as a second optional parameter, the name of the output 70 directory (the current directory is the default). If the directory does not 71 exist, it will be created. 72 73 memlog_analyze depends on dot (from the graphviz package) and ps2pdf (from the 42 74 ghostscript package), plus various tools from the binutils package. 43 75 -
memlog.cpp
r09f3093 r2d9e75d 45 45 #include <cstdio> 46 46 #include <cstring> 47 #include <cstdint> 47 48 48 49 // NOTE: This source makes very minimal use of C++11 features. It can still be … … 52 53 53 54 #include <limits.h> 55 #include <errno.h> 54 56 #include <malloc.h> 55 57 #include <execinfo.h> 58 #include <sys/mman.h> 56 59 #include <sys/syscall.h> 57 60 #include <sys/time.h> … … 66 69 #include <dlfcn.h> 67 70 71 #ifdef __bgq__ 72 #include <spi/include/kernel/location.h> 73 #include <spi/include/kernel/memory.h> 74 #endif 75 68 76 using namespace std; 69 77 … … 72 80 // -Wl,--wrap,malloc,--wrap,free,--wrap,realloc,--wrap,calloc,--wrap,memalign /path/to/memlog_s.o -lpthread -ldl 73 81 74 FILE *log_file = NULL;82 static FILE *log_file = 0; 75 83 static pthread_mutex_t log_mutex = PTHREAD_MUTEX_INITIALIZER; 76 84 … … 80 88 static char self_path[PATH_MAX+1] = { '\0' }; 81 89 90 #ifdef __bgq__ 91 static int on_bgq = 0; 92 #endif 93 94 static void *initial_brk = 0; 95 96 static unordered_map<void *, Dl_info> *dladdr_cache = 0; 97 82 98 __attribute__((__constructor__)) 83 99 static void record_init() { … … 85 101 uname(&u); 86 102 103 int id = (int) getpid(); 104 #ifdef __bgq__ 105 // If we're really running on a BG/Q compute node, use the job rank instead 106 // of the pid because the node name might not really be globally unique. 107 if (!strcmp(u.sysname, "CNK") && !strcmp(u.machine, "BGQ")) { 108 id = (int) Kernel_GetRank(); 109 on_bgq = 1; 110 } 111 #endif 112 113 // If we're running under a common batch system, add the job id to the output 114 // file names (add it as a prefix so that sorting the files will sort by job 115 // first). 116 char *job_id = 0; 117 const char *job_id_vars[] = 118 { "COBALT_JOBID", "PBS_JOBID", "SLURM_JOB_ID", "JOB_ID" }; 119 for (int i = 0; i < sizeof(job_id_vars)/sizeof(job_id_vars[0]); ++i) { 120 job_id = getenv(job_id_vars[i]); 121 if (job_id) 122 break; 123 } 124 87 125 char log_name[PATH_MAX+1]; 88 snprintf(log_name, PATH_MAX+1, "%s.%d.memlog", u.nodename, getpid()); 126 if (job_id) 127 snprintf(log_name, PATH_MAX+1, "%s.%s.%d.memlog", job_id, u.nodename, id); 128 else 129 snprintf(log_name, PATH_MAX+1, "%s.%d.memlog", u.nodename, id); 89 130 log_file = fopen(log_name, "w"); 90 131 if (!log_file) … … 93 134 const char *link_name = "/proc/self/exe"; 94 135 readlink(link_name, self_path, PATH_MAX); 136 137 initial_brk = sbrk(0); 95 138 } 96 139 … … 110 153 (void) fflush(log_file); 111 154 (void) fclose(log_file); 155 156 if (dladdr_cache) 157 delete dladdr_cache; 112 158 } 113 159 … … 115 161 // we need to cache the lookup results. 116 162 static int dladdr_cached(void * addr, Dl_info *info) { 117 static unordered_map<void *, Dl_info> dladdr_cache; 118 119 auto I = dladdr_cache.find(addr); 120 if (I == dladdr_cache.end()) { 163 if (!dladdr_cache) 164 dladdr_cache = new unordered_map<void *, Dl_info>; 165 166 auto I = dladdr_cache->find(addr); 167 if (I == dladdr_cache->end()) { 121 168 int r; 122 169 if (!(r = dladdr(addr, info))) 123 170 memset(info, 0, sizeof(Dl_info)); 124 171 125 dladdr_cache .insert(make_pair(addr, *info));172 dladdr_cache->insert(make_pair(addr, *info)); 126 173 return r; 127 174 } … … 140 187 fprintf(log_file, "\t%ld.%06ld %ld %ld", usage.ru_utime.tv_sec, 141 188 usage.ru_utime.tv_usec, usage.ru_maxrss, syscall(SYS_gettid)); 189 190 // Some other memory stats (like with maxrss, report these in KB). 191 size_t arena_size = ((size_t) sbrk(0)) - (size_t) initial_brk; 192 193 uint64_t mmap_size = 0; 194 #ifdef __bgq__ 195 if (on_bgq) 196 (void) Kernel_GetMemorySize(KERNEL_MEMSIZE_MMAP, &mmap_size); 197 #endif 198 199 fprintf(log_file, " %ld %ld", arena_size >> 10, mmap_size >> 10); 142 200 143 201 if (!show_backtrace) … … 233 291 } 234 292 293 #ifdef __PIC__ 294 static int (*__real_posix_memalign)(void **memptr, size_t alignment, 295 size_t size) = 0; 296 297 static void *(*__real_mmap)(void *addr, size_t length, int prot, int flags, 298 int fd, off_t offset) = 0; 299 static void *(*__real_mmap64)(void *addr, size_t length, int prot, int flags, 300 int fd, off64_t offset) = 0; 301 static int (*__real_munmap)(void *addr, size_t length) = 0; 302 #else 303 extern "C" { 304 extern int __real_posix_memalign(void **memptr, size_t alignment, size_t size); 305 306 extern void *__real_mmap(void *addr, size_t length, int prot, int flags, 307 int fd, off_t offset); 308 extern void *__real_mmap64(void *addr, size_t length, int prot, int flags, 309 int fd, off64_t offset); 310 extern int __real_munmap(void *addr, size_t length); 311 } 312 #endif 313 235 314 // glibc exports its underlying malloc implementation under the name 236 315 // __libc_malloc so that hooks like this can use it. 237 316 extern "C" { 238 317 extern void *__libc_malloc(size_t size); 318 extern void *__libc_valloc(size_t size); 239 319 extern void *__libc_realloc(void *ptr, size_t size); 240 320 extern void *__libc_calloc(size_t nmemb, size_t size); … … 258 338 259 339 void *ptr = __libc_malloc(size); 260 261 record_malloc(size, ptr, caller); 340 if (ptr) 341 record_malloc(size, ptr, caller); 342 343 in_malloc = 0; 344 return ptr; 345 } 346 347 void *FUNC(valloc)(size_t size) { 348 const void *caller = 349 __builtin_extract_return_addr(__builtin_return_address(0)); 350 351 if (in_malloc) 352 return __libc_valloc(size); 353 354 in_malloc = 1; 355 356 void *ptr = __libc_valloc(size); 357 if (ptr) 358 record_malloc(size, ptr, caller); 262 359 263 360 in_malloc = 0; … … 278 375 if (ptr) 279 376 record_free(ptr, caller); 280 record_malloc(size, nptr, caller); 377 if (nptr) 378 record_malloc(size, nptr, caller); 281 379 282 380 in_malloc = 0; … … 296 394 void *ptr = __libc_calloc(nmemb, size); 297 395 298 record_malloc(nmemb*size, ptr, caller); 396 if (ptr) 397 record_malloc(nmemb*size, ptr, caller); 299 398 300 399 in_malloc = 0; … … 314 413 void *ptr = __libc_memalign(boundary, size); 315 414 316 record_malloc(size, ptr, caller); 415 if (ptr) 416 record_malloc(size, ptr, caller); 317 417 318 418 in_malloc = 0; … … 337 437 } 338 438 439 int FUNC(posix_memalign)(void **memptr, size_t alignment, size_t size) { 440 const void *caller = 441 __builtin_extract_return_addr(__builtin_return_address(0)); 442 443 #ifdef __PIC__ 444 if (!__real_posix_memalign) 445 if (!(*(void **) (&__real_posix_memalign) = 446 dlsym(RTLD_NEXT, "posix_memalign"))) { 447 return ELIBACC; 448 } 449 #endif 450 451 if (in_malloc) 452 return __real_posix_memalign(memptr, alignment, size); 453 454 in_malloc = 1; 455 456 int r = __real_posix_memalign(memptr, alignment, size); 457 458 if (!r) 459 record_malloc(size, *memptr, caller); 460 461 in_malloc = 0; 462 463 return r; 464 } 465 466 void *FUNC(mmap)(void *addr, size_t length, int prot, int flags, 467 int fd, off_t offset) { 468 const void *caller = 469 __builtin_extract_return_addr(__builtin_return_address(0)); 470 471 #ifdef __PIC__ 472 if (!__real_mmap) 473 if (!(*(void **) (&__real_mmap) = dlsym(RTLD_NEXT, "mmap"))) { 474 errno = ELIBACC; 475 return MAP_FAILED; 476 } 477 #endif 478 479 if (in_malloc) 480 return __real_mmap(addr, length, prot, flags, fd, offset); 481 482 in_malloc = 1; 483 484 void *ptr = __real_mmap(addr, length, prot, flags, fd, offset); 485 486 if (ptr != MAP_FAILED) 487 record_malloc(length, ptr, caller); 488 489 in_malloc = 0; 490 491 return ptr; 492 } 493 494 void *FUNC(mmap64)(void *addr, size_t length, int prot, int flags, 495 int fd, off64_t offset) { 496 const void *caller = 497 __builtin_extract_return_addr(__builtin_return_address(0)); 498 499 #ifdef __PIC__ 500 if (!__real_mmap64) 501 if (!(*(void **) (&__real_mmap64) = dlsym(RTLD_NEXT, "mmap64"))) { 502 errno = ELIBACC; 503 return MAP_FAILED; 504 } 505 #endif 506 507 if (in_malloc) 508 return __real_mmap64(addr, length, prot, flags, fd, offset); 509 510 in_malloc = 1; 511 512 void *ptr = __real_mmap64(addr, length, prot, flags, fd, offset); 513 514 if (ptr != MAP_FAILED) 515 record_malloc(length, ptr, caller); 516 517 in_malloc = 0; 518 519 return ptr; 520 } 521 522 int FUNC(munmap)(void *addr, size_t length) { 523 const void *caller = 524 __builtin_extract_return_addr(__builtin_return_address(0)); 525 526 #ifdef __PIC__ 527 if (!__real_munmap) 528 if (!(*(void **) (&__real_munmap) = dlsym(RTLD_NEXT, "munmap"))) { 529 errno = ELIBACC; 530 return -1; 531 } 532 #endif 533 534 if (in_malloc) 535 return __real_munmap(addr, length); 536 537 in_malloc = 1; 538 539 record_free(addr, caller); 540 541 int r = __real_munmap(addr, length); 542 543 in_malloc = 0; 544 545 return r; 546 } 547 339 548 } // extern "C" 340 549 -
memlog_analyze
r09f3093 r2d9e75d 42 42 use strict; 43 43 use File::Basename; 44 use File::Path qw(make_path); 45 use Getopt::Long; 46 47 my $find_leaks = 0; 48 my $print_raw_proc_name = 0; 49 my $quiet = 0; 50 my $help = 0; 51 52 sub print_usage($) { 53 my $ec = $_[0]; 54 my $usage = <<EOM; 55 Usage: $0 [options] <memlog file or glob> [<output directory>] 56 options: 57 --leaks 58 Provide information on leaks instead of peak usage 59 --print-symbol-names 60 Include symbol names and offsets in the output 61 --quiet or -q 62 Don't print status messages while running 63 EOM 64 65 print $usage; 66 exit($ec); 67 } 68 69 GetOptions("help|h|?" => \$help, 70 "leaks" => \$find_leaks, 71 "print-symbol-names" => \$print_raw_proc_name, 72 "quiet|q" => \$quiet) 73 or print_usage(1); 74 75 if ($help) { 76 print_usage(0); 77 } 78 79 if (scalar(@ARGV) > 2) { 80 print_usage(2); 81 } 44 82 45 83 my $memlog_fn = $ARGV[0]; 46 84 my $out_dir = $ARGV[1] || '.'; 47 85 48 m y $print_raw_proc_name = 0;86 make_path($out_dir); 49 87 50 88 if (! -f $memlog_fn) { 51 print "Usage: $0 <memlog file> [<output directory>]\n"; 52 exit 1; 53 } 89 my @pot_fns = glob($memlog_fn); 90 if (scalar(@pot_fns)) { 91 if (!$quiet) { 92 print "Searching all files matching '$memlog_fn'\n"; 93 } 94 95 my $pot_max_rss = 0; 96 my $pos_max_rss_fn; 97 foreach my $pot_fn (@pot_fns) { 98 my $last_line = `tail -n 1 '$pot_fn'`; 99 chomp($last_line); 100 101 my @parts = split(/\t/, $last_line); 102 103 my $op = shift(@parts); 104 my $state = shift(@parts); 105 106 my ($time, $then_max_rss, $tid, $then_arena, $then_mmap) = 107 split(/\s+/, $state); 108 if ($pot_max_rss < $then_max_rss) { 109 $pot_max_rss = $then_max_rss; 110 $pos_max_rss_fn = $pot_fn; 111 } 112 } 113 114 if (defined $pos_max_rss_fn) { 115 $memlog_fn = $pos_max_rss_fn; 116 goto have_memlog_fn; 117 } 118 } 119 120 print_usage(1); 121 } 122 have_memlog_fn: 54 123 55 124 # The version of addr2line and friends that you use can make a big difference, … … 66 135 # The first step is to determine the high-water mark. 67 136 my $max_rss = 0; 137 my $arena = 0; 138 my $mmap = 0; 68 139 foreach my $line (<MEMLOG>) { 69 140 chomp($line); … … 73 144 my $state = shift(@parts); 74 145 75 my ($time, $then_max_rss, $tid) = split(/\s+/, $state); 146 my ($time, $then_max_rss, $tid, $then_arena, $then_mmap) = 147 split(/\s+/, $state); 76 148 if ($max_rss < $then_max_rss) { 77 149 $max_rss = $then_max_rss; 150 $arena = $then_arena; 151 $mmap = $then_mmap; 78 152 } 79 153 } … … 83 157 # Scan the log for malloc/free pairings. We're interested only in active 84 158 # allocations at the time when the rss reaches the final maxrss. 85 my $max_rss_time = 0; 159 # If we're finding leaks, then go to the very end. 160 my $active_alloc_time = 0; 86 161 my %malloc_lines; 87 162 foreach my $line (<MEMLOG>) { … … 102 177 } 103 178 104 # If we've reached the max rss, we've seen all we need to see. 105 my ($time, $then_max_rss, $tid) = split(/\s+/, $state); 106 $max_rss_time = $time; 107 if ($then_max_rss == $max_rss) { 108 last; 179 my ($time, $then_max_rss, $tid, $then_arena, $then_mmap) = 180 split(/\s+/, $state); 181 $active_alloc_time = $time; 182 183 if (!$find_leaks) { 184 # If we've reached the max rss, we've seen all we need to see. 185 if ($then_max_rss == $max_rss) { 186 last; 187 } 109 188 } 110 189 } … … 114 193 # Convert maxrss, currently in KB, to bytes. 115 194 $max_rss *= 1024; 195 if (defined $arena) { 196 $arena *= 1024; 197 } 198 if (defined $mmap) { 199 $mmap *= 1024; 200 } 116 201 117 202 my $total_size = 0; … … 130 215 131 216 my ($size, $ptr) = ($op =~ /^M: (\d+) 0x(\w+)/); 132 my ($time, $then_max_rss, $tid) = split(/\s+/, $state); 217 my ($time, $then_max_rss, $tid, $then_arena, $then_mmap) = 218 split(/\s+/, $state); 133 219 134 220 $total_size += $size; … … 192 278 my $pdf_fn = "$out_dir/" . basename($memlog_fn) . ".pdf"; 193 279 280 if (!$quiet) { 281 print "Creating $txt_fn\n"; 282 } 283 194 284 open(TXT, ">$txt_fn") || die "Can't open $txt_fn: $!"; 285 286 if (!$quiet) { 287 print "Creating $dot_fn\n"; 288 } 289 195 290 open(DOT, ">$dot_fn") || die "Can't open $dot_fn: $!"; 196 291 … … 208 303 } 209 304 305 sub format_bytes_or_unk($) { 306 my $b = $_[0]; 307 return defined($b) ? format_bytes($b) : "(unknown)"; 308 } 309 210 310 print DOT ("digraph \"memlog\" {\n"); 211 311 print DOT ("size=\"8,11\";\n"); 212 312 print DOT ("node [width=0.375,height=0.25];\n"); 213 313 214 printf DOT ("Legend [shape=box, fontsize=100, shape=oval," . 215 "label=\"Total: %s active at maxrss = %s after %s s\"];\n", 216 format_bytes($total_size), format_bytes($max_rss), $max_rss_time); 217 218 printf TXT ("memlog: Total: %s active at maxrss = %s after %s s\n\n", 219 format_bytes($total_size), format_bytes($max_rss), $max_rss_time); 314 my $find_type = $find_leaks ? " (leaks)" : ""; 315 print DOT "subgraph cluster_key {\n"; 316 print DOT "\trank=min;\n"; 317 print DOT "\tlabel=\"memlog\";\n"; 318 print DOT "\tfontsize=100;\n"; 319 print DOT "\trankdir=UR;\n"; 320 printf DOT ("Legend [shape=box, fontsize=100, shape=plaintext," . 321 "label=\"Total: %s active$find_type at maxrss = %s after %s s\\narena: %s\\nmmap: %s\"];\n", 322 format_bytes($total_size), format_bytes($max_rss), 323 $active_alloc_time, format_bytes_or_unk($arena), 324 format_bytes_or_unk($mmap)); 325 print DOT "}\n"; 326 327 printf TXT ("memlog: Total: %s active$find_type at maxrss = %s after %s s\n\tarena: %s\tmmap: %s\n\n", 328 format_bytes($total_size), format_bytes($max_rss), 329 $active_alloc_time, format_bytes_or_unk($arena), 330 format_bytes_or_unk($mmap)); 220 331 221 332 my %cached_names; … … 277 388 $ret .= $func . '\n'; 278 389 279 if ($loc !~ /^ \?/) {390 if ($loc !~ /^[:?]/) { 280 391 $ret .= $loc . '\n'; 281 392 } … … 395 506 close(DOT); 396 507 508 if (!$quiet) { 509 print "Creating $ps_fn\n"; 510 } 511 397 512 system("dot -Tps2 < '$dot_fn' > '$ps_fn'"); 513 514 if (!$quiet) { 515 print "Creating $pdf_fn\n"; 516 } 517 398 518 system("ps2pdf '$ps_fn' '$pdf_fn'"); 399 519
Note: See TracChangeset
for help on using the changeset viewer.