Changes in / [2d9e75d:09f3093]


Ignore:
Files:
1 added
1 deleted
3 edited

Legend:

Unmodified
Added
Removed
  • Makefile

    r3c9fc94 r510af01  
    22CXXFLAGS = -std=gnu++0x -O3 -g 
    33 
    4 # When compiling with CXX=powerpc64-bgq-linux-g++, we need these: 
    5 CPPFLAGS = -I/bgsys/drivers/ppcfloor -I/bgsys/drivers/ppcfloor/spi/include/kernel/cnk 
    6  
     4CPPFLAGS = 
    75LDFLAGS = -lpthread -ldl 
    86 
     
    1816        $(CXX) $(CPPFLAGS) $(CXXFLAGS) $(LDFLAGS) -fPIC -shared -o libmemlog.so memlog.cpp 
    1917 
    20 install: all memlog_analyze README 
    21         cp -a libmemlog.so memlog_s.o memlog_analyze README $(DESTDIR)/ 
    22         echo '-Wl,--wrap,malloc,--wrap,valloc,--wrap,realloc,--wrap,calloc,--wrap,memalign,--wrap,free,--wrap,posix_memalign,--wrap,mmap,--wrap,mmap64,--wrap,munmap $(DESTDIR)/memlog_s.o -lpthread -ldl' > $(DESTDIR)/memlog_s_ld_cmds 
     18install: all memlog2dot README 
     19        cp -a libmemlog.so memlog_s.o memlog2dot README $(DESTDIR)/ 
    2320 
    2421clean: 
  • README

    r3c9fc94 r24aa734  
    1616      -L/path/to/memlog -Wl,-rpath,/path/to/memlog -lmemlog 
    1717 
    18 For statically-linked applications ld's automatic wrapping functionality is 
    19 employed, and the exact set of necessary flags is large, so a file named 
    20 memlog_s_ld_cmds has been provided containing the necessary flags. 
     18For statically-linked applications, add the following to your linker flags: 
    2119 
    22 To your linker flags add: 
    23  
    24   `cat /path/to/memlog/memlog_s_ld_cmds` 
    25  
    26 or, if your compiler and wrappers support response files (gcc and clang do, for 
    27 example), simply: 
    28  
    29   @/path/to/memlog/memlog_s_ld_cmds 
    30  
    31 so your overall linking command might look something like this: 
    32  
    33   mpic++ -O3 -g -o my_program my_obj1.o my_obj2.o @/path/to/memlog/memlog_s_ld_cmds 
     20  -Wl,--wrap,malloc,--wrap,free,--wrap,realloc,--wrap,calloc,--wrap,memalign \ 
     21    /path/to/memlog/memlog_s.o -lpthread -ldl 
    3422 
    3523** RUNNING ** 
     
    4129use by running: 
    4230 
    43   /path/to/memlog/memlog_analyze /path/to/HOST.PID.memlog 
     31  /path/to/memlog/memlog2dot /path/to/HOST.PID.memlog 
    4432 
    4533this will generate files named HOST.PID.memlog.dot, HOST.PID.memlog.ps and 
     
    4836in textual form. 
    4937 
    50 If you pass the --leaks option to memlog_analyze, it will provide data on 
    51 allocations active at the end of the program (leaks) instead of those active 
    52 when the peak memory usage is first reached. 
    53  
    54 You might have many runs of the same application (or output from many ranks of 
    55 an MPI job), and you'd like to pick the one for analysis with the highest 
    56 memory usage. If you provide a glob pattern to memlog_analyze it will do this 
    57 for you. Make sure you quote the glob pattern so that your shell does not 
    58 expand it. 
    59  
    60   /path/to/memlog/memlog_analyze "/path/to/*.memlog" 
    61  
    62 When running under common batch systems, the files are named 
    63 JOB_ID.HOST.PID.memlog, and when running under the BG/Q CNK, the process's rank 
    64 is used instead of the node-local PID. 
    65  
    6638Note that te peak memory usage is determined by monitoring the processes's 
    6739maximum resident set size, not just the total allocated heap memory. 
    6840 
    69 memlog_analyze takes, as a second optional parameter, the name of the output 
    70 directory (the current directory is the default). If the directory does not 
    71 exist, it will be created. 
    72  
    73 memlog_analyze depends on dot (from the graphviz package) and ps2pdf (from the 
     41memlog2dot depends on dot (from the graphviz package) and ps2pdf (from the 
    7442ghostscript package), plus various tools from the binutils package. 
    7543 
  • memlog.cpp

    r2d9e75d r09f3093  
    4545#include <cstdio> 
    4646#include <cstring> 
    47 #include <cstdint> 
    4847 
    4948// NOTE: This source makes very minimal use of C++11 features. It can still be 
     
    5352 
    5453#include <limits.h> 
    55 #include <errno.h> 
    5654#include <malloc.h> 
    5755#include <execinfo.h> 
    58 #include <sys/mman.h> 
    5956#include <sys/syscall.h> 
    6057#include <sys/time.h> 
     
    6966#include <dlfcn.h> 
    7067 
    71 #ifdef __bgq__ 
    72 #include <spi/include/kernel/location.h> 
    73 #include <spi/include/kernel/memory.h> 
    74 #endif 
    75  
    7668using namespace std; 
    7769 
     
    8072//   -Wl,--wrap,malloc,--wrap,free,--wrap,realloc,--wrap,calloc,--wrap,memalign /path/to/memlog_s.o -lpthread -ldl 
    8173 
    82 static FILE *log_file = 0; 
     74FILE *log_file = NULL; 
    8375static pthread_mutex_t log_mutex = PTHREAD_MUTEX_INITIALIZER; 
    8476 
     
    8880static char self_path[PATH_MAX+1] = { '\0' }; 
    8981 
    90 #ifdef __bgq__ 
    91 static int on_bgq = 0; 
    92 #endif 
    93  
    94 static void *initial_brk = 0; 
    95  
    96 static unordered_map<void *, Dl_info> *dladdr_cache = 0; 
    97  
    9882__attribute__((__constructor__)) 
    9983static void record_init() { 
     
    10185  uname(&u); 
    10286 
    103   int id = (int) getpid(); 
    104 #ifdef __bgq__ 
    105   // If we're really running on a BG/Q compute node, use the job rank instead 
    106   // of the pid because the node name might not really be globally unique. 
    107   if (!strcmp(u.sysname, "CNK") && !strcmp(u.machine, "BGQ")) { 
    108     id = (int) Kernel_GetRank(); 
    109     on_bgq = 1; 
    110   } 
    111 #endif 
    112  
    113   // If we're running under a common batch system, add the job id to the output 
    114   // file names (add it as a prefix so that sorting the files will sort by job 
    115   // first). 
    116   char *job_id = 0; 
    117   const char *job_id_vars[] = 
    118     { "COBALT_JOBID", "PBS_JOBID", "SLURM_JOB_ID", "JOB_ID" }; 
    119   for (int i = 0; i < sizeof(job_id_vars)/sizeof(job_id_vars[0]); ++i) { 
    120     job_id = getenv(job_id_vars[i]); 
    121     if (job_id) 
    122       break; 
    123   } 
    124  
    12587  char log_name[PATH_MAX+1]; 
    126   if (job_id) 
    127     snprintf(log_name, PATH_MAX+1, "%s.%s.%d.memlog", job_id, u.nodename, id); 
    128   else 
    129     snprintf(log_name, PATH_MAX+1, "%s.%d.memlog", u.nodename, id); 
     88  snprintf(log_name, PATH_MAX+1, "%s.%d.memlog", u.nodename, getpid()); 
    13089  log_file = fopen(log_name, "w"); 
    13190  if (!log_file) 
     
    13493  const char *link_name = "/proc/self/exe"; 
    13594  readlink(link_name, self_path, PATH_MAX); 
    136  
    137   initial_brk = sbrk(0); 
    13895} 
    13996 
     
    153110  (void) fflush(log_file); 
    154111  (void) fclose(log_file); 
    155  
    156   if (dladdr_cache) 
    157     delete dladdr_cache; 
    158112} 
    159113 
     
    161115// we need to cache the lookup results. 
    162116static int dladdr_cached(void * addr, Dl_info *info) { 
    163   if (!dladdr_cache) 
    164     dladdr_cache = new unordered_map<void *, Dl_info>; 
    165  
    166   auto I = dladdr_cache->find(addr); 
    167   if (I == dladdr_cache->end()) { 
     117  static unordered_map<void *, Dl_info> dladdr_cache; 
     118 
     119  auto I = dladdr_cache.find(addr); 
     120  if (I == dladdr_cache.end()) { 
    168121    int r; 
    169122    if (!(r = dladdr(addr, info))) 
    170123      memset(info, 0, sizeof(Dl_info)); 
    171124 
    172     dladdr_cache->insert(make_pair(addr, *info)); 
     125    dladdr_cache.insert(make_pair(addr, *info)); 
    173126    return r; 
    174127  } 
     
    187140  fprintf(log_file, "\t%ld.%06ld %ld %ld", usage.ru_utime.tv_sec, 
    188141          usage.ru_utime.tv_usec, usage.ru_maxrss, syscall(SYS_gettid)); 
    189  
    190   // Some other memory stats (like with maxrss, report these in KB). 
    191   size_t arena_size = ((size_t) sbrk(0)) - (size_t) initial_brk; 
    192  
    193   uint64_t mmap_size = 0; 
    194 #ifdef __bgq__ 
    195   if (on_bgq) 
    196     (void) Kernel_GetMemorySize(KERNEL_MEMSIZE_MMAP, &mmap_size); 
    197 #endif 
    198  
    199   fprintf(log_file, " %ld %ld", arena_size >> 10, mmap_size >> 10); 
    200142 
    201143  if (!show_backtrace) 
     
    291233} 
    292234 
    293 #ifdef __PIC__ 
    294 static int (*__real_posix_memalign)(void **memptr, size_t alignment, 
    295                                     size_t size) = 0; 
    296  
    297 static void *(*__real_mmap)(void *addr, size_t length, int prot, int flags, 
    298                             int fd, off_t offset) = 0; 
    299 static void *(*__real_mmap64)(void *addr, size_t length, int prot, int flags, 
    300                               int fd, off64_t offset) = 0; 
    301 static int (*__real_munmap)(void *addr, size_t length) = 0; 
    302 #else 
    303 extern "C" { 
    304 extern int __real_posix_memalign(void **memptr, size_t alignment, size_t size); 
    305  
    306 extern void *__real_mmap(void *addr, size_t length, int prot, int flags, 
    307                          int fd, off_t offset); 
    308 extern void *__real_mmap64(void *addr, size_t length, int prot, int flags, 
    309                            int fd, off64_t offset); 
    310 extern int __real_munmap(void *addr, size_t length); 
    311 } 
    312 #endif 
    313  
    314235// glibc exports its underlying malloc implementation under the name 
    315236// __libc_malloc so that hooks like this can use it. 
    316237extern "C" { 
    317238extern void *__libc_malloc(size_t size); 
    318 extern void *__libc_valloc(size_t size); 
    319239extern void *__libc_realloc(void *ptr, size_t size); 
    320240extern void *__libc_calloc(size_t nmemb, size_t size); 
     
    338258 
    339259  void *ptr = __libc_malloc(size); 
    340   if (ptr) 
    341     record_malloc(size, ptr, caller); 
    342  
    343   in_malloc = 0; 
    344   return ptr; 
    345 } 
    346  
    347 void *FUNC(valloc)(size_t size) { 
    348   const void *caller = 
    349     __builtin_extract_return_addr(__builtin_return_address(0)); 
    350  
    351   if (in_malloc) 
    352     return __libc_valloc(size); 
    353  
    354   in_malloc = 1; 
    355  
    356   void *ptr = __libc_valloc(size); 
    357   if (ptr) 
    358     record_malloc(size, ptr, caller); 
     260 
     261  record_malloc(size, ptr, caller); 
    359262 
    360263  in_malloc = 0; 
     
    375278  if (ptr) 
    376279    record_free(ptr, caller); 
    377   if (nptr) 
    378     record_malloc(size, nptr, caller); 
     280  record_malloc(size, nptr, caller); 
    379281 
    380282  in_malloc = 0; 
     
    394296  void *ptr = __libc_calloc(nmemb, size); 
    395297 
    396   if (ptr) 
    397     record_malloc(nmemb*size, ptr, caller); 
     298  record_malloc(nmemb*size, ptr, caller); 
    398299 
    399300  in_malloc = 0; 
     
    413314  void *ptr = __libc_memalign(boundary, size); 
    414315 
    415   if (ptr) 
    416     record_malloc(size, ptr, caller); 
     316  record_malloc(size, ptr, caller); 
    417317 
    418318  in_malloc = 0; 
     
    437337} 
    438338 
    439 int FUNC(posix_memalign)(void **memptr, size_t alignment, size_t size) { 
    440   const void *caller = 
    441     __builtin_extract_return_addr(__builtin_return_address(0)); 
    442  
    443 #ifdef __PIC__ 
    444   if (!__real_posix_memalign) 
    445     if (!(*(void **) (&__real_posix_memalign) = 
    446         dlsym(RTLD_NEXT, "posix_memalign"))) { 
    447       return ELIBACC; 
    448     } 
    449 #endif 
    450  
    451   if (in_malloc) 
    452     return __real_posix_memalign(memptr, alignment, size); 
    453  
    454   in_malloc = 1; 
    455  
    456   int r = __real_posix_memalign(memptr, alignment, size); 
    457  
    458   if (!r) 
    459     record_malloc(size, *memptr, caller); 
    460  
    461   in_malloc = 0; 
    462  
    463   return r; 
    464 } 
    465  
    466 void *FUNC(mmap)(void *addr, size_t length, int prot, int flags, 
    467                  int fd, off_t offset) { 
    468   const void *caller = 
    469     __builtin_extract_return_addr(__builtin_return_address(0)); 
    470  
    471 #ifdef __PIC__ 
    472   if (!__real_mmap) 
    473     if (!(*(void **) (&__real_mmap) = dlsym(RTLD_NEXT, "mmap"))) { 
    474       errno = ELIBACC; 
    475       return MAP_FAILED; 
    476     } 
    477 #endif 
    478  
    479   if (in_malloc) 
    480     return __real_mmap(addr, length, prot, flags, fd, offset); 
    481  
    482   in_malloc = 1; 
    483  
    484   void *ptr = __real_mmap(addr, length, prot, flags, fd, offset); 
    485  
    486   if (ptr != MAP_FAILED) 
    487     record_malloc(length, ptr, caller); 
    488  
    489   in_malloc = 0; 
    490  
    491   return ptr; 
    492 } 
    493  
    494 void *FUNC(mmap64)(void *addr, size_t length, int prot, int flags, 
    495                    int fd, off64_t offset) { 
    496   const void *caller = 
    497     __builtin_extract_return_addr(__builtin_return_address(0)); 
    498  
    499 #ifdef __PIC__ 
    500   if (!__real_mmap64) 
    501     if (!(*(void **) (&__real_mmap64) = dlsym(RTLD_NEXT, "mmap64"))) { 
    502       errno = ELIBACC; 
    503       return MAP_FAILED; 
    504     } 
    505 #endif 
    506  
    507   if (in_malloc) 
    508     return __real_mmap64(addr, length, prot, flags, fd, offset); 
    509  
    510   in_malloc = 1; 
    511  
    512   void *ptr = __real_mmap64(addr, length, prot, flags, fd, offset); 
    513  
    514   if (ptr != MAP_FAILED) 
    515     record_malloc(length, ptr, caller); 
    516  
    517   in_malloc = 0; 
    518  
    519   return ptr; 
    520 } 
    521  
    522 int FUNC(munmap)(void *addr, size_t length) { 
    523   const void *caller = 
    524     __builtin_extract_return_addr(__builtin_return_address(0)); 
    525  
    526 #ifdef __PIC__ 
    527   if (!__real_munmap) 
    528     if (!(*(void **) (&__real_munmap) = dlsym(RTLD_NEXT, "munmap"))) { 
    529       errno = ELIBACC; 
    530       return -1; 
    531     } 
    532 #endif 
    533  
    534   if (in_malloc) 
    535     return __real_munmap(addr, length); 
    536  
    537   in_malloc = 1; 
    538  
    539   record_free(addr, caller); 
    540  
    541   int r = __real_munmap(addr, length); 
    542  
    543   in_malloc = 0; 
    544  
    545   return r; 
    546 } 
    547  
    548339} // extern "C" 
    549340 
Note: See TracChangeset for help on using the changeset viewer.