Changes in / [2d9e75d:09f3093]
- Files:
-
- 1 added
- 1 deleted
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
Makefile
r3c9fc94 r510af01 2 2 CXXFLAGS = -std=gnu++0x -O3 -g 3 3 4 # When compiling with CXX=powerpc64-bgq-linux-g++, we need these: 5 CPPFLAGS = -I/bgsys/drivers/ppcfloor -I/bgsys/drivers/ppcfloor/spi/include/kernel/cnk 6 4 CPPFLAGS = 7 5 LDFLAGS = -lpthread -ldl 8 6 … … 18 16 $(CXX) $(CPPFLAGS) $(CXXFLAGS) $(LDFLAGS) -fPIC -shared -o libmemlog.so memlog.cpp 19 17 20 install: all memlog_analyze README 21 cp -a libmemlog.so memlog_s.o memlog_analyze README $(DESTDIR)/ 22 echo '-Wl,--wrap,malloc,--wrap,valloc,--wrap,realloc,--wrap,calloc,--wrap,memalign,--wrap,free,--wrap,posix_memalign,--wrap,mmap,--wrap,mmap64,--wrap,munmap $(DESTDIR)/memlog_s.o -lpthread -ldl' > $(DESTDIR)/memlog_s_ld_cmds 18 install: all memlog2dot README 19 cp -a libmemlog.so memlog_s.o memlog2dot README $(DESTDIR)/ 23 20 24 21 clean: -
README
r3c9fc94 r24aa734 16 16 -L/path/to/memlog -Wl,-rpath,/path/to/memlog -lmemlog 17 17 18 For statically-linked applications ld's automatic wrapping functionality is 19 employed, and the exact set of necessary flags is large, so a file named 20 memlog_s_ld_cmds has been provided containing the necessary flags. 18 For statically-linked applications, add the following to your linker flags: 21 19 22 To your linker flags add: 23 24 `cat /path/to/memlog/memlog_s_ld_cmds` 25 26 or, if your compiler and wrappers support response files (gcc and clang do, for 27 example), simply: 28 29 @/path/to/memlog/memlog_s_ld_cmds 30 31 so your overall linking command might look something like this: 32 33 mpic++ -O3 -g -o my_program my_obj1.o my_obj2.o @/path/to/memlog/memlog_s_ld_cmds 20 -Wl,--wrap,malloc,--wrap,free,--wrap,realloc,--wrap,calloc,--wrap,memalign \ 21 /path/to/memlog/memlog_s.o -lpthread -ldl 34 22 35 23 ** RUNNING ** … … 41 29 use by running: 42 30 43 /path/to/memlog/memlog _analyze/path/to/HOST.PID.memlog31 /path/to/memlog/memlog2dot /path/to/HOST.PID.memlog 44 32 45 33 this will generate files named HOST.PID.memlog.dot, HOST.PID.memlog.ps and … … 48 36 in textual form. 49 37 50 If you pass the --leaks option to memlog_analyze, it will provide data on51 allocations active at the end of the program (leaks) instead of those active52 when the peak memory usage is first reached.53 54 You might have many runs of the same application (or output from many ranks of55 an MPI job), and you'd like to pick the one for analysis with the highest56 memory usage. If you provide a glob pattern to memlog_analyze it will do this57 for you. Make sure you quote the glob pattern so that your shell does not58 expand it.59 60 /path/to/memlog/memlog_analyze "/path/to/*.memlog"61 62 When running under common batch systems, the files are named63 JOB_ID.HOST.PID.memlog, and when running under the BG/Q CNK, the process's rank64 is used instead of the node-local PID.65 66 38 Note that te peak memory usage is determined by monitoring the processes's 67 39 maximum resident set size, not just the total allocated heap memory. 68 40 69 memlog_analyze takes, as a second optional parameter, the name of the output 70 directory (the current directory is the default). If the directory does not 71 exist, it will be created. 72 73 memlog_analyze depends on dot (from the graphviz package) and ps2pdf (from the 41 memlog2dot depends on dot (from the graphviz package) and ps2pdf (from the 74 42 ghostscript package), plus various tools from the binutils package. 75 43 -
memlog.cpp
r2d9e75d r09f3093 45 45 #include <cstdio> 46 46 #include <cstring> 47 #include <cstdint>48 47 49 48 // NOTE: This source makes very minimal use of C++11 features. It can still be … … 53 52 54 53 #include <limits.h> 55 #include <errno.h>56 54 #include <malloc.h> 57 55 #include <execinfo.h> 58 #include <sys/mman.h>59 56 #include <sys/syscall.h> 60 57 #include <sys/time.h> … … 69 66 #include <dlfcn.h> 70 67 71 #ifdef __bgq__72 #include <spi/include/kernel/location.h>73 #include <spi/include/kernel/memory.h>74 #endif75 76 68 using namespace std; 77 69 … … 80 72 // -Wl,--wrap,malloc,--wrap,free,--wrap,realloc,--wrap,calloc,--wrap,memalign /path/to/memlog_s.o -lpthread -ldl 81 73 82 static FILE *log_file = 0;74 FILE *log_file = NULL; 83 75 static pthread_mutex_t log_mutex = PTHREAD_MUTEX_INITIALIZER; 84 76 … … 88 80 static char self_path[PATH_MAX+1] = { '\0' }; 89 81 90 #ifdef __bgq__91 static int on_bgq = 0;92 #endif93 94 static void *initial_brk = 0;95 96 static unordered_map<void *, Dl_info> *dladdr_cache = 0;97 98 82 __attribute__((__constructor__)) 99 83 static void record_init() { … … 101 85 uname(&u); 102 86 103 int id = (int) getpid();104 #ifdef __bgq__105 // If we're really running on a BG/Q compute node, use the job rank instead106 // of the pid because the node name might not really be globally unique.107 if (!strcmp(u.sysname, "CNK") && !strcmp(u.machine, "BGQ")) {108 id = (int) Kernel_GetRank();109 on_bgq = 1;110 }111 #endif112 113 // If we're running under a common batch system, add the job id to the output114 // file names (add it as a prefix so that sorting the files will sort by job115 // first).116 char *job_id = 0;117 const char *job_id_vars[] =118 { "COBALT_JOBID", "PBS_JOBID", "SLURM_JOB_ID", "JOB_ID" };119 for (int i = 0; i < sizeof(job_id_vars)/sizeof(job_id_vars[0]); ++i) {120 job_id = getenv(job_id_vars[i]);121 if (job_id)122 break;123 }124 125 87 char log_name[PATH_MAX+1]; 126 if (job_id) 127 snprintf(log_name, PATH_MAX+1, "%s.%s.%d.memlog", job_id, u.nodename, id); 128 else 129 snprintf(log_name, PATH_MAX+1, "%s.%d.memlog", u.nodename, id); 88 snprintf(log_name, PATH_MAX+1, "%s.%d.memlog", u.nodename, getpid()); 130 89 log_file = fopen(log_name, "w"); 131 90 if (!log_file) … … 134 93 const char *link_name = "/proc/self/exe"; 135 94 readlink(link_name, self_path, PATH_MAX); 136 137 initial_brk = sbrk(0);138 95 } 139 96 … … 153 110 (void) fflush(log_file); 154 111 (void) fclose(log_file); 155 156 if (dladdr_cache)157 delete dladdr_cache;158 112 } 159 113 … … 161 115 // we need to cache the lookup results. 162 116 static int dladdr_cached(void * addr, Dl_info *info) { 163 if (!dladdr_cache) 164 dladdr_cache = new unordered_map<void *, Dl_info>; 165 166 auto I = dladdr_cache->find(addr); 167 if (I == dladdr_cache->end()) { 117 static unordered_map<void *, Dl_info> dladdr_cache; 118 119 auto I = dladdr_cache.find(addr); 120 if (I == dladdr_cache.end()) { 168 121 int r; 169 122 if (!(r = dladdr(addr, info))) 170 123 memset(info, 0, sizeof(Dl_info)); 171 124 172 dladdr_cache ->insert(make_pair(addr, *info));125 dladdr_cache.insert(make_pair(addr, *info)); 173 126 return r; 174 127 } … … 187 140 fprintf(log_file, "\t%ld.%06ld %ld %ld", usage.ru_utime.tv_sec, 188 141 usage.ru_utime.tv_usec, usage.ru_maxrss, syscall(SYS_gettid)); 189 190 // Some other memory stats (like with maxrss, report these in KB).191 size_t arena_size = ((size_t) sbrk(0)) - (size_t) initial_brk;192 193 uint64_t mmap_size = 0;194 #ifdef __bgq__195 if (on_bgq)196 (void) Kernel_GetMemorySize(KERNEL_MEMSIZE_MMAP, &mmap_size);197 #endif198 199 fprintf(log_file, " %ld %ld", arena_size >> 10, mmap_size >> 10);200 142 201 143 if (!show_backtrace) … … 291 233 } 292 234 293 #ifdef __PIC__294 static int (*__real_posix_memalign)(void **memptr, size_t alignment,295 size_t size) = 0;296 297 static void *(*__real_mmap)(void *addr, size_t length, int prot, int flags,298 int fd, off_t offset) = 0;299 static void *(*__real_mmap64)(void *addr, size_t length, int prot, int flags,300 int fd, off64_t offset) = 0;301 static int (*__real_munmap)(void *addr, size_t length) = 0;302 #else303 extern "C" {304 extern int __real_posix_memalign(void **memptr, size_t alignment, size_t size);305 306 extern void *__real_mmap(void *addr, size_t length, int prot, int flags,307 int fd, off_t offset);308 extern void *__real_mmap64(void *addr, size_t length, int prot, int flags,309 int fd, off64_t offset);310 extern int __real_munmap(void *addr, size_t length);311 }312 #endif313 314 235 // glibc exports its underlying malloc implementation under the name 315 236 // __libc_malloc so that hooks like this can use it. 316 237 extern "C" { 317 238 extern void *__libc_malloc(size_t size); 318 extern void *__libc_valloc(size_t size);319 239 extern void *__libc_realloc(void *ptr, size_t size); 320 240 extern void *__libc_calloc(size_t nmemb, size_t size); … … 338 258 339 259 void *ptr = __libc_malloc(size); 340 if (ptr) 341 record_malloc(size, ptr, caller); 342 343 in_malloc = 0; 344 return ptr; 345 } 346 347 void *FUNC(valloc)(size_t size) { 348 const void *caller = 349 __builtin_extract_return_addr(__builtin_return_address(0)); 350 351 if (in_malloc) 352 return __libc_valloc(size); 353 354 in_malloc = 1; 355 356 void *ptr = __libc_valloc(size); 357 if (ptr) 358 record_malloc(size, ptr, caller); 260 261 record_malloc(size, ptr, caller); 359 262 360 263 in_malloc = 0; … … 375 278 if (ptr) 376 279 record_free(ptr, caller); 377 if (nptr) 378 record_malloc(size, nptr, caller); 280 record_malloc(size, nptr, caller); 379 281 380 282 in_malloc = 0; … … 394 296 void *ptr = __libc_calloc(nmemb, size); 395 297 396 if (ptr) 397 record_malloc(nmemb*size, ptr, caller); 298 record_malloc(nmemb*size, ptr, caller); 398 299 399 300 in_malloc = 0; … … 413 314 void *ptr = __libc_memalign(boundary, size); 414 315 415 if (ptr) 416 record_malloc(size, ptr, caller); 316 record_malloc(size, ptr, caller); 417 317 418 318 in_malloc = 0; … … 437 337 } 438 338 439 int FUNC(posix_memalign)(void **memptr, size_t alignment, size_t size) {440 const void *caller =441 __builtin_extract_return_addr(__builtin_return_address(0));442 443 #ifdef __PIC__444 if (!__real_posix_memalign)445 if (!(*(void **) (&__real_posix_memalign) =446 dlsym(RTLD_NEXT, "posix_memalign"))) {447 return ELIBACC;448 }449 #endif450 451 if (in_malloc)452 return __real_posix_memalign(memptr, alignment, size);453 454 in_malloc = 1;455 456 int r = __real_posix_memalign(memptr, alignment, size);457 458 if (!r)459 record_malloc(size, *memptr, caller);460 461 in_malloc = 0;462 463 return r;464 }465 466 void *FUNC(mmap)(void *addr, size_t length, int prot, int flags,467 int fd, off_t offset) {468 const void *caller =469 __builtin_extract_return_addr(__builtin_return_address(0));470 471 #ifdef __PIC__472 if (!__real_mmap)473 if (!(*(void **) (&__real_mmap) = dlsym(RTLD_NEXT, "mmap"))) {474 errno = ELIBACC;475 return MAP_FAILED;476 }477 #endif478 479 if (in_malloc)480 return __real_mmap(addr, length, prot, flags, fd, offset);481 482 in_malloc = 1;483 484 void *ptr = __real_mmap(addr, length, prot, flags, fd, offset);485 486 if (ptr != MAP_FAILED)487 record_malloc(length, ptr, caller);488 489 in_malloc = 0;490 491 return ptr;492 }493 494 void *FUNC(mmap64)(void *addr, size_t length, int prot, int flags,495 int fd, off64_t offset) {496 const void *caller =497 __builtin_extract_return_addr(__builtin_return_address(0));498 499 #ifdef __PIC__500 if (!__real_mmap64)501 if (!(*(void **) (&__real_mmap64) = dlsym(RTLD_NEXT, "mmap64"))) {502 errno = ELIBACC;503 return MAP_FAILED;504 }505 #endif506 507 if (in_malloc)508 return __real_mmap64(addr, length, prot, flags, fd, offset);509 510 in_malloc = 1;511 512 void *ptr = __real_mmap64(addr, length, prot, flags, fd, offset);513 514 if (ptr != MAP_FAILED)515 record_malloc(length, ptr, caller);516 517 in_malloc = 0;518 519 return ptr;520 }521 522 int FUNC(munmap)(void *addr, size_t length) {523 const void *caller =524 __builtin_extract_return_addr(__builtin_return_address(0));525 526 #ifdef __PIC__527 if (!__real_munmap)528 if (!(*(void **) (&__real_munmap) = dlsym(RTLD_NEXT, "munmap"))) {529 errno = ELIBACC;530 return -1;531 }532 #endif533 534 if (in_malloc)535 return __real_munmap(addr, length);536 537 in_malloc = 1;538 539 record_free(addr, caller);540 541 int r = __real_munmap(addr, length);542 543 in_malloc = 0;544 545 return r;546 }547 548 339 } // extern "C" 549 340
Note: See TracChangeset
for help on using the changeset viewer.