Changes in / [09f3093:2d9e75d]


Ignore:
Files:
1 added
1 deleted
3 edited

Legend:

Unmodified
Added
Removed
  • Makefile

    r510af01 r3c9fc94  
    22CXXFLAGS = -std=gnu++0x -O3 -g 
    33 
    4 CPPFLAGS = 
     4# When compiling with CXX=powerpc64-bgq-linux-g++, we need these: 
     5CPPFLAGS = -I/bgsys/drivers/ppcfloor -I/bgsys/drivers/ppcfloor/spi/include/kernel/cnk 
     6 
    57LDFLAGS = -lpthread -ldl 
    68 
     
    1618        $(CXX) $(CPPFLAGS) $(CXXFLAGS) $(LDFLAGS) -fPIC -shared -o libmemlog.so memlog.cpp 
    1719 
    18 install: all memlog2dot README 
    19         cp -a libmemlog.so memlog_s.o memlog2dot README $(DESTDIR)/ 
     20install: all memlog_analyze README 
     21        cp -a libmemlog.so memlog_s.o memlog_analyze README $(DESTDIR)/ 
     22        echo '-Wl,--wrap,malloc,--wrap,valloc,--wrap,realloc,--wrap,calloc,--wrap,memalign,--wrap,free,--wrap,posix_memalign,--wrap,mmap,--wrap,mmap64,--wrap,munmap $(DESTDIR)/memlog_s.o -lpthread -ldl' > $(DESTDIR)/memlog_s_ld_cmds 
    2023 
    2124clean: 
  • README

    r24aa734 r3c9fc94  
    1616      -L/path/to/memlog -Wl,-rpath,/path/to/memlog -lmemlog 
    1717 
    18 For statically-linked applications, add the following to your linker flags: 
     18For statically-linked applications ld's automatic wrapping functionality is 
     19employed, and the exact set of necessary flags is large, so a file named 
     20memlog_s_ld_cmds has been provided containing the necessary flags. 
    1921 
    20   -Wl,--wrap,malloc,--wrap,free,--wrap,realloc,--wrap,calloc,--wrap,memalign \ 
    21     /path/to/memlog/memlog_s.o -lpthread -ldl 
     22To your linker flags add: 
     23 
     24  `cat /path/to/memlog/memlog_s_ld_cmds` 
     25 
     26or, if your compiler and wrappers support response files (gcc and clang do, for 
     27example), simply: 
     28 
     29  @/path/to/memlog/memlog_s_ld_cmds 
     30 
     31so your overall linking command might look something like this: 
     32 
     33  mpic++ -O3 -g -o my_program my_obj1.o my_obj2.o @/path/to/memlog/memlog_s_ld_cmds 
    2234 
    2335** RUNNING ** 
     
    2941use by running: 
    3042 
    31   /path/to/memlog/memlog2dot /path/to/HOST.PID.memlog 
     43  /path/to/memlog/memlog_analyze /path/to/HOST.PID.memlog 
    3244 
    3345this will generate files named HOST.PID.memlog.dot, HOST.PID.memlog.ps and 
     
    3648in textual form. 
    3749 
     50If you pass the --leaks option to memlog_analyze, it will provide data on 
     51allocations active at the end of the program (leaks) instead of those active 
     52when the peak memory usage is first reached. 
     53 
     54You might have many runs of the same application (or output from many ranks of 
     55an MPI job), and you'd like to pick the one for analysis with the highest 
     56memory usage. If you provide a glob pattern to memlog_analyze it will do this 
     57for you. Make sure you quote the glob pattern so that your shell does not 
     58expand it. 
     59 
     60  /path/to/memlog/memlog_analyze "/path/to/*.memlog" 
     61 
     62When running under common batch systems, the files are named 
     63JOB_ID.HOST.PID.memlog, and when running under the BG/Q CNK, the process's rank 
     64is used instead of the node-local PID. 
     65 
    3866Note that te peak memory usage is determined by monitoring the processes's 
    3967maximum resident set size, not just the total allocated heap memory. 
    4068 
    41 memlog2dot depends on dot (from the graphviz package) and ps2pdf (from the 
     69memlog_analyze takes, as a second optional parameter, the name of the output 
     70directory (the current directory is the default). If the directory does not 
     71exist, it will be created. 
     72 
     73memlog_analyze depends on dot (from the graphviz package) and ps2pdf (from the 
    4274ghostscript package), plus various tools from the binutils package. 
    4375 
  • memlog.cpp

    r09f3093 r2d9e75d  
    4545#include <cstdio> 
    4646#include <cstring> 
     47#include <cstdint> 
    4748 
    4849// NOTE: This source makes very minimal use of C++11 features. It can still be 
     
    5253 
    5354#include <limits.h> 
     55#include <errno.h> 
    5456#include <malloc.h> 
    5557#include <execinfo.h> 
     58#include <sys/mman.h> 
    5659#include <sys/syscall.h> 
    5760#include <sys/time.h> 
     
    6669#include <dlfcn.h> 
    6770 
     71#ifdef __bgq__ 
     72#include <spi/include/kernel/location.h> 
     73#include <spi/include/kernel/memory.h> 
     74#endif 
     75 
    6876using namespace std; 
    6977 
     
    7280//   -Wl,--wrap,malloc,--wrap,free,--wrap,realloc,--wrap,calloc,--wrap,memalign /path/to/memlog_s.o -lpthread -ldl 
    7381 
    74 FILE *log_file = NULL; 
     82static FILE *log_file = 0; 
    7583static pthread_mutex_t log_mutex = PTHREAD_MUTEX_INITIALIZER; 
    7684 
     
    8088static char self_path[PATH_MAX+1] = { '\0' }; 
    8189 
     90#ifdef __bgq__ 
     91static int on_bgq = 0; 
     92#endif 
     93 
     94static void *initial_brk = 0; 
     95 
     96static unordered_map<void *, Dl_info> *dladdr_cache = 0; 
     97 
    8298__attribute__((__constructor__)) 
    8399static void record_init() { 
     
    85101  uname(&u); 
    86102 
     103  int id = (int) getpid(); 
     104#ifdef __bgq__ 
     105  // If we're really running on a BG/Q compute node, use the job rank instead 
     106  // of the pid because the node name might not really be globally unique. 
     107  if (!strcmp(u.sysname, "CNK") && !strcmp(u.machine, "BGQ")) { 
     108    id = (int) Kernel_GetRank(); 
     109    on_bgq = 1; 
     110  } 
     111#endif 
     112 
     113  // If we're running under a common batch system, add the job id to the output 
     114  // file names (add it as a prefix so that sorting the files will sort by job 
     115  // first). 
     116  char *job_id = 0; 
     117  const char *job_id_vars[] = 
     118    { "COBALT_JOBID", "PBS_JOBID", "SLURM_JOB_ID", "JOB_ID" }; 
     119  for (int i = 0; i < sizeof(job_id_vars)/sizeof(job_id_vars[0]); ++i) { 
     120    job_id = getenv(job_id_vars[i]); 
     121    if (job_id) 
     122      break; 
     123  } 
     124 
    87125  char log_name[PATH_MAX+1]; 
    88   snprintf(log_name, PATH_MAX+1, "%s.%d.memlog", u.nodename, getpid()); 
     126  if (job_id) 
     127    snprintf(log_name, PATH_MAX+1, "%s.%s.%d.memlog", job_id, u.nodename, id); 
     128  else 
     129    snprintf(log_name, PATH_MAX+1, "%s.%d.memlog", u.nodename, id); 
    89130  log_file = fopen(log_name, "w"); 
    90131  if (!log_file) 
     
    93134  const char *link_name = "/proc/self/exe"; 
    94135  readlink(link_name, self_path, PATH_MAX); 
     136 
     137  initial_brk = sbrk(0); 
    95138} 
    96139 
     
    110153  (void) fflush(log_file); 
    111154  (void) fclose(log_file); 
     155 
     156  if (dladdr_cache) 
     157    delete dladdr_cache; 
    112158} 
    113159 
     
    115161// we need to cache the lookup results. 
    116162static int dladdr_cached(void * addr, Dl_info *info) { 
    117   static unordered_map<void *, Dl_info> dladdr_cache; 
    118  
    119   auto I = dladdr_cache.find(addr); 
    120   if (I == dladdr_cache.end()) { 
     163  if (!dladdr_cache) 
     164    dladdr_cache = new unordered_map<void *, Dl_info>; 
     165 
     166  auto I = dladdr_cache->find(addr); 
     167  if (I == dladdr_cache->end()) { 
    121168    int r; 
    122169    if (!(r = dladdr(addr, info))) 
    123170      memset(info, 0, sizeof(Dl_info)); 
    124171 
    125     dladdr_cache.insert(make_pair(addr, *info)); 
     172    dladdr_cache->insert(make_pair(addr, *info)); 
    126173    return r; 
    127174  } 
     
    140187  fprintf(log_file, "\t%ld.%06ld %ld %ld", usage.ru_utime.tv_sec, 
    141188          usage.ru_utime.tv_usec, usage.ru_maxrss, syscall(SYS_gettid)); 
     189 
     190  // Some other memory stats (like with maxrss, report these in KB). 
     191  size_t arena_size = ((size_t) sbrk(0)) - (size_t) initial_brk; 
     192 
     193  uint64_t mmap_size = 0; 
     194#ifdef __bgq__ 
     195  if (on_bgq) 
     196    (void) Kernel_GetMemorySize(KERNEL_MEMSIZE_MMAP, &mmap_size); 
     197#endif 
     198 
     199  fprintf(log_file, " %ld %ld", arena_size >> 10, mmap_size >> 10); 
    142200 
    143201  if (!show_backtrace) 
     
    233291} 
    234292 
     293#ifdef __PIC__ 
     294static int (*__real_posix_memalign)(void **memptr, size_t alignment, 
     295                                    size_t size) = 0; 
     296 
     297static void *(*__real_mmap)(void *addr, size_t length, int prot, int flags, 
     298                            int fd, off_t offset) = 0; 
     299static void *(*__real_mmap64)(void *addr, size_t length, int prot, int flags, 
     300                              int fd, off64_t offset) = 0; 
     301static int (*__real_munmap)(void *addr, size_t length) = 0; 
     302#else 
     303extern "C" { 
     304extern int __real_posix_memalign(void **memptr, size_t alignment, size_t size); 
     305 
     306extern void *__real_mmap(void *addr, size_t length, int prot, int flags, 
     307                         int fd, off_t offset); 
     308extern void *__real_mmap64(void *addr, size_t length, int prot, int flags, 
     309                           int fd, off64_t offset); 
     310extern int __real_munmap(void *addr, size_t length); 
     311} 
     312#endif 
     313 
    235314// glibc exports its underlying malloc implementation under the name 
    236315// __libc_malloc so that hooks like this can use it. 
    237316extern "C" { 
    238317extern void *__libc_malloc(size_t size); 
     318extern void *__libc_valloc(size_t size); 
    239319extern void *__libc_realloc(void *ptr, size_t size); 
    240320extern void *__libc_calloc(size_t nmemb, size_t size); 
     
    258338 
    259339  void *ptr = __libc_malloc(size); 
    260  
    261   record_malloc(size, ptr, caller); 
     340  if (ptr) 
     341    record_malloc(size, ptr, caller); 
     342 
     343  in_malloc = 0; 
     344  return ptr; 
     345} 
     346 
     347void *FUNC(valloc)(size_t size) { 
     348  const void *caller = 
     349    __builtin_extract_return_addr(__builtin_return_address(0)); 
     350 
     351  if (in_malloc) 
     352    return __libc_valloc(size); 
     353 
     354  in_malloc = 1; 
     355 
     356  void *ptr = __libc_valloc(size); 
     357  if (ptr) 
     358    record_malloc(size, ptr, caller); 
    262359 
    263360  in_malloc = 0; 
     
    278375  if (ptr) 
    279376    record_free(ptr, caller); 
    280   record_malloc(size, nptr, caller); 
     377  if (nptr) 
     378    record_malloc(size, nptr, caller); 
    281379 
    282380  in_malloc = 0; 
     
    296394  void *ptr = __libc_calloc(nmemb, size); 
    297395 
    298   record_malloc(nmemb*size, ptr, caller); 
     396  if (ptr) 
     397    record_malloc(nmemb*size, ptr, caller); 
    299398 
    300399  in_malloc = 0; 
     
    314413  void *ptr = __libc_memalign(boundary, size); 
    315414 
    316   record_malloc(size, ptr, caller); 
     415  if (ptr) 
     416    record_malloc(size, ptr, caller); 
    317417 
    318418  in_malloc = 0; 
     
    337437} 
    338438 
     439int FUNC(posix_memalign)(void **memptr, size_t alignment, size_t size) { 
     440  const void *caller = 
     441    __builtin_extract_return_addr(__builtin_return_address(0)); 
     442 
     443#ifdef __PIC__ 
     444  if (!__real_posix_memalign) 
     445    if (!(*(void **) (&__real_posix_memalign) = 
     446        dlsym(RTLD_NEXT, "posix_memalign"))) { 
     447      return ELIBACC; 
     448    } 
     449#endif 
     450 
     451  if (in_malloc) 
     452    return __real_posix_memalign(memptr, alignment, size); 
     453 
     454  in_malloc = 1; 
     455 
     456  int r = __real_posix_memalign(memptr, alignment, size); 
     457 
     458  if (!r) 
     459    record_malloc(size, *memptr, caller); 
     460 
     461  in_malloc = 0; 
     462 
     463  return r; 
     464} 
     465 
     466void *FUNC(mmap)(void *addr, size_t length, int prot, int flags, 
     467                 int fd, off_t offset) { 
     468  const void *caller = 
     469    __builtin_extract_return_addr(__builtin_return_address(0)); 
     470 
     471#ifdef __PIC__ 
     472  if (!__real_mmap) 
     473    if (!(*(void **) (&__real_mmap) = dlsym(RTLD_NEXT, "mmap"))) { 
     474      errno = ELIBACC; 
     475      return MAP_FAILED; 
     476    } 
     477#endif 
     478 
     479  if (in_malloc) 
     480    return __real_mmap(addr, length, prot, flags, fd, offset); 
     481 
     482  in_malloc = 1; 
     483 
     484  void *ptr = __real_mmap(addr, length, prot, flags, fd, offset); 
     485 
     486  if (ptr != MAP_FAILED) 
     487    record_malloc(length, ptr, caller); 
     488 
     489  in_malloc = 0; 
     490 
     491  return ptr; 
     492} 
     493 
     494void *FUNC(mmap64)(void *addr, size_t length, int prot, int flags, 
     495                   int fd, off64_t offset) { 
     496  const void *caller = 
     497    __builtin_extract_return_addr(__builtin_return_address(0)); 
     498 
     499#ifdef __PIC__ 
     500  if (!__real_mmap64) 
     501    if (!(*(void **) (&__real_mmap64) = dlsym(RTLD_NEXT, "mmap64"))) { 
     502      errno = ELIBACC; 
     503      return MAP_FAILED; 
     504    } 
     505#endif 
     506 
     507  if (in_malloc) 
     508    return __real_mmap64(addr, length, prot, flags, fd, offset); 
     509 
     510  in_malloc = 1; 
     511 
     512  void *ptr = __real_mmap64(addr, length, prot, flags, fd, offset); 
     513 
     514  if (ptr != MAP_FAILED) 
     515    record_malloc(length, ptr, caller); 
     516 
     517  in_malloc = 0; 
     518 
     519  return ptr; 
     520} 
     521 
     522int FUNC(munmap)(void *addr, size_t length) { 
     523  const void *caller = 
     524    __builtin_extract_return_addr(__builtin_return_address(0)); 
     525 
     526#ifdef __PIC__ 
     527  if (!__real_munmap) 
     528    if (!(*(void **) (&__real_munmap) = dlsym(RTLD_NEXT, "munmap"))) { 
     529      errno = ELIBACC; 
     530      return -1; 
     531    } 
     532#endif 
     533 
     534  if (in_malloc) 
     535    return __real_munmap(addr, length); 
     536 
     537  in_malloc = 1; 
     538 
     539  record_free(addr, caller); 
     540 
     541  int r = __real_munmap(addr, length); 
     542 
     543  in_malloc = 0; 
     544 
     545  return r; 
     546} 
     547 
    339548} // extern "C" 
    340549 
Note: See TracChangeset for help on using the changeset viewer.