source: memlog.cpp @ 21d3542

Revision 21d3542, 8.0 KB checked in by Hal Finkel <hfinkel@…>, 9 years ago (diff)

collect and report on mmap totals

  • Property mode set to 100644
RevLine 
[0ec59c5]1#ifndef _GNU_SOURCE
2#define _GNU_SOURCE
3#endif
4
[1bd82e0]5#include <cstdlib>
6#include <cstdio>
7#include <cstring>
[21d3542]8#include <cstdint>
[1bd82e0]9
[f715c76]10// NOTE: This source makes very minimal use of C++11 features. It can still be
11// compiled by g++ 4.4.7 with -std=gnu++0x.
[1bd82e0]12#include <unordered_map>
13#include <utility>
[0ec59c5]14
[1bd82e0]15#include <limits.h>
[430548b]16#include <malloc.h>
[a736d81]17#include <execinfo.h>
[134408c]18#include <sys/syscall.h>
[0a0ef57]19#include <sys/time.h>
20#include <sys/resource.h>
[0ec59c5]21#include <sys/types.h>
22#include <sys/stat.h>
23#include <sys/utsname.h>
24#include <fcntl.h>
25#include <unistd.h>
26
27#include <pthread.h>
28#include <dlfcn.h>
29
[a7b97b9]30#ifdef __bgq__
31#include <spi/include/kernel/location.h>
[21d3542]32#include <spi/include/kernel/memory.h>
[a7b97b9]33#endif
34
[1bd82e0]35using namespace std;
36
[493cb97]37// NOTE: When static linking, this depends on linker wrapping.
38// Add to your LDFLAGS:
39//   -Wl,--wrap,malloc,--wrap,free,--wrap,realloc,--wrap,calloc,--wrap,memalign /path/to/memlog_s.o -lpthread -ldl
40
[22f928f]41FILE *log_file = 0;
[0ec59c5]42static pthread_mutex_t log_mutex = PTHREAD_MUTEX_INITIALIZER;
43
[69850c8]44// The malloc hook might use functions that call malloc, and we need to make
45// sure this does not cause an infinite loop.
46static __thread int in_malloc = 0;
[3105f50]47static char self_path[PATH_MAX+1] = { '\0' };
[69850c8]48
[21d3542]49#ifdef __bgq__
50int on_bgq = 0;
51#endif
52
53void *initial_brk = 0;
54
[0ec59c5]55__attribute__((__constructor__))
56static void record_init() {
57  struct utsname u;
58  uname(&u);
59
[a7b97b9]60  int id = (int) getpid();
61#ifdef __bgq__
62  // If we're really running on a BG/Q compute node, use the job rank instead
63  // of the pid because the node name might not really be globally unique.
[21d3542]64  if (!strcmp(u.sysname, "CNK") && !strcmp(u.machine, "BGQ")) {
[a7b97b9]65    id = (int) Kernel_GetRank();
[21d3542]66    on_bgq = 1;
67  }
[a7b97b9]68#endif
69
[22f928f]70  // If we're running under a common batch system, add the job id to the output
71  // file names (add it as a prefix so that sorting the files will sort by job
72  // first).
73  char *job_id = 0;
74  const char *job_id_vars[] =
75    { "COBALT_JOBID", "PBS_JOBID", "SLURM_JOB_ID", "JOB_ID" };
76  for (int i = 0; i < sizeof(job_id_vars)/sizeof(job_id_vars[0]); ++i) {
77    job_id = getenv(job_id_vars[i]);
78    if (job_id)
79      break;
80  }
81
[3105f50]82  char log_name[PATH_MAX+1];
[22f928f]83  if (job_id)
84    snprintf(log_name, PATH_MAX+1, "%s.%s.%d.memlog", job_id, u.nodename, id);
85  else
86    snprintf(log_name, PATH_MAX+1, "%s.%d.memlog", u.nodename, id);
[0ec59c5]87  log_file = fopen(log_name, "w");
[0a0ef57]88  if (!log_file)
89    fprintf(stderr, "fopen failed for '%s': %m\n", log_name);
[3105f50]90
91  const char *link_name = "/proc/self/exe";
92  readlink(link_name, self_path, PATH_MAX);
[21d3542]93
94  initial_brk = sbrk(0);
[0ec59c5]95}
96
97__attribute__((__destructor__))
98static void record_cleanup() {
99  if (!log_file)
100    return;
101
[69850c8]102  // These functions might call free, but we're shutting down, so don't try to
103  // unwind the stack from here...
104  in_malloc = 1;
105
[5a9481e]106  // Avoid any racing by obtaining the lock.
107  if (pthread_mutex_lock(&log_mutex))
108    return;
109
[0ec59c5]110  (void) fflush(log_file);
111  (void) fclose(log_file);
112}
113
[5a9481e]114// dladdr is, relatively, quit slow. For this to work on a large application,
115// we need to cache the lookup results.
[1bd82e0]116static int dladdr_cached(void * addr, Dl_info *info) {
117  static unordered_map<void *, Dl_info> dladdr_cache;
118
119  auto I = dladdr_cache.find(addr);
120  if (I == dladdr_cache.end()) {
121    int r;
122    if (!(r = dladdr(addr, info)))
123      memset(info, 0, sizeof(Dl_info));
124
125    dladdr_cache.insert(make_pair(addr, *info));
126    return r;
127  }
128
129  memcpy(info, &I->second, sizeof(Dl_info));
130  return 1;
[5a9481e]131}
132
[1e5cce6]133static void print_context(const void *caller, int show_backtrace) {
[0a0ef57]134  struct rusage usage;
135  if (getrusage(RUSAGE_SELF, &usage)) {
136    fprintf(stderr, "getrusage failed: %m\n");
[0ec59c5]137    return;
[0a0ef57]138  }
[0ec59c5]139
[134408c]140  fprintf(log_file, "\t%ld.%06ld %ld %ld", usage.ru_utime.tv_sec,
141          usage.ru_utime.tv_usec, usage.ru_maxrss, syscall(SYS_gettid));
[0a0ef57]142
[21d3542]143  // Some other memory stats (like with maxrss, report these in KB).
144  size_t arena_size = ((size_t) sbrk(0)) - (size_t) initial_brk;
145
146  uint64_t mmap_size = 0;
147#ifdef __bgq__
148  if (on_bgq)
149    (void) Kernel_GetMemorySize(KERNEL_MEMSIZE_MMAP, &mmap_size);
150#endif
151
152  fprintf(log_file, " %ld %ld", arena_size >> 10, mmap_size >> 10);
153
[1e5cce6]154  if (!show_backtrace)
155    return;
156
[a736d81]157  void *pcs[1024];
158  int num_pcs = backtrace(pcs, 1024);
[14e2ab9]159
160  int found_caller = 0;
[a736d81]161  for (int pci = 0; pci < num_pcs; ++pci) {
[14e2ab9]162    intptr_t pc = (intptr_t) pcs[pci];
[69850c8]163
164    if (!pc)
165      break;
166
[14e2ab9]167    if (!found_caller) {
168      if (pc != (intptr_t) caller)
169        continue;
170
171      found_caller = 1;
172    }
173
174    intptr_t off, relpc;
[69850c8]175    const char *proc_name;
176    const char *file_name;
177    Dl_info dlinfo;
[5a9481e]178    if (dladdr_cached((void *) pc, &dlinfo) && dlinfo.dli_fname &&
[69850c8]179        *dlinfo.dli_fname) {
[14e2ab9]180      intptr_t saddr = (intptr_t) dlinfo.dli_saddr;
[69850c8]181      if (saddr) {
182#if defined(__powerpc64__) && !defined(__powerpc64le__)
183        // On PPC64 ELFv1, the symbol address points to the function descriptor, not
184        // the actual starting address.
[14e2ab9]185        saddr = *(intptr_t*) saddr;
[69850c8]186#endif
187
188        off = pc - saddr;
[14e2ab9]189        relpc = pc - ((intptr_t) dlinfo.dli_fbase);
[69850c8]190      } else {
191        off = 0;
192        relpc = 0;
193      }
194
195      proc_name = dlinfo.dli_sname;
196      if (!proc_name)
197        proc_name = "?";
198
199      file_name = dlinfo.dli_fname;
200    } else {
[3105f50]201      // We don't know these...
202      off = 0;
203      relpc = 0;
[69850c8]204      proc_name = "?";
[3105f50]205
206      // If we can't determine the file, assume it is the base executable
207      // (which does the right thing for statically-linked binaries).
208      file_name = self_path;
[69850c8]209    }
210
211    fprintf(log_file, "\t%s (%s+0x%x) [0x%lx (0x%lx)]", file_name, proc_name, (int) off,
212            (long) pc, (long) relpc);
[0ec59c5]213  }
214}
215
[430548b]216static void record_malloc(size_t size, void *ptr, const void *caller) {
[0ec59c5]217  if (!log_file)
218    return;
219
220  if (pthread_mutex_lock(&log_mutex))
221    return;
222
223  fprintf(log_file, "M: %zd %p", size, ptr);
[1e5cce6]224  print_context(caller, 1);
[0ec59c5]225  fprintf(log_file, "\n");
226
227done:
228  pthread_mutex_unlock(&log_mutex);
229}
230
[430548b]231static void record_free(void *ptr, const void *caller) {
[0ec59c5]232  if (!log_file)
233    return;
234
235  if (pthread_mutex_lock(&log_mutex))
236    return;
237
238  fprintf(log_file, "F: %p", ptr);
[1e5cce6]239  print_context(caller, 0);
[0ec59c5]240  fprintf(log_file, "\n");
241
242done:
243  pthread_mutex_unlock(&log_mutex);
244}
245
246// glibc exports its underlying malloc implementation under the name
247// __libc_malloc so that hooks like this can use it.
[1bd82e0]248extern "C" {
[0ec59c5]249extern void *__libc_malloc(size_t size);
250extern void *__libc_realloc(void *ptr, size_t size);
251extern void *__libc_calloc(size_t nmemb, size_t size);
252extern void *__libc_memalign(size_t boundary, size_t size);
253extern void __libc_free(void *ptr);
254
[430548b]255#ifdef __PIC__
256#define FUNC(x) x
257#else
258#define FUNC(x) __wrap_ ## x
259#endif
260
261void *FUNC(malloc)(size_t size) {
[5a9481e]262  const void *caller =
263    __builtin_extract_return_addr(__builtin_return_address(0));
[430548b]264
[0ec59c5]265  if (in_malloc)
266    return __libc_malloc(size);
267
268  in_malloc = 1;
269
270  void *ptr = __libc_malloc(size);
271
[14e2ab9]272  record_malloc(size, ptr, caller);
[0ec59c5]273
274  in_malloc = 0;
275  return ptr;
276}
277
[430548b]278void *FUNC(realloc)(void *ptr, size_t size) {
[5a9481e]279  const void *caller =
280    __builtin_extract_return_addr(__builtin_return_address(0));
[430548b]281
[0ec59c5]282  if (in_malloc)
283    return __libc_realloc(ptr, size);
284
285  in_malloc = 1;
286
287  void *nptr = __libc_realloc(ptr, size);
288
[14e2ab9]289  if (ptr)
290    record_free(ptr, caller);
291  record_malloc(size, nptr, caller);
[0ec59c5]292
293  in_malloc = 0;
294
295  return nptr;
296}
297
[430548b]298void *FUNC(calloc)(size_t nmemb, size_t size) {
[5a9481e]299  const void *caller =
300    __builtin_extract_return_addr(__builtin_return_address(0));
[430548b]301
[0ec59c5]302  if (in_malloc)
303    return __libc_calloc(nmemb, size);
304
305  in_malloc = 1;
306
307  void *ptr = __libc_calloc(nmemb, size);
308
[14e2ab9]309  record_malloc(nmemb*size, ptr, caller);
[0ec59c5]310
311  in_malloc = 0;
312
313  return ptr;
314}
315
[430548b]316void *FUNC(memalign)(size_t boundary, size_t size) {
[5a9481e]317  const void *caller =
318    __builtin_extract_return_addr(__builtin_return_address(0));
[430548b]319
[0ec59c5]320  if (in_malloc)
321    return __libc_memalign(boundary, size);
322
323  in_malloc = 1;
324
325  void *ptr = __libc_memalign(boundary, size);
326
[14e2ab9]327  record_malloc(size, ptr, caller);
[0ec59c5]328
329  in_malloc = 0;
330
331  return ptr;
332}
333
[430548b]334void FUNC(free)(void *ptr) {
[5a9481e]335  const void *caller =
336    __builtin_extract_return_addr(__builtin_return_address(0));
[430548b]337
[14e2ab9]338  if (in_malloc || !ptr)
[0ec59c5]339    return __libc_free(ptr);
340
341  in_malloc = 1;
342
[14e2ab9]343  record_free(ptr, caller);
[0ec59c5]344
345  __libc_free(ptr);
346
347  in_malloc = 0;
348}
349
[1bd82e0]350} // extern "C"
351
Note: See TracBrowser for help on using the repository browser.