source: memlog.cpp @ 2d9e75d

Revision 2d9e75d, 13.7 KB checked in by Hal Finkel <hfinkel@…>, 9 years ago (diff)

Merge branch 'master' of git.mcs.anl.gov:memlog

  • Property mode set to 100644
Line 
1// *****************************************************************************
2//                   Copyright (C) 2015, UChicago Argonne, LLC
3//                              All Rights Reserved
4//                            memlog (ANL-SF-15-081)
5//                    Hal Finkel, Argonne National Laboratory
6//
7//                              OPEN SOURCE LICENSE
8//
9// Under the terms of Contract No. DE-AC02-06CH11357 with UChicago Argonne, LLC,
10// the U.S. Government retains certain rights in this software.
11//
12// Redistribution and use in source and binary forms, with or without
13// modification, are permitted provided that the following conditions are met:
14//
15// 1. Redistributions of source code must retain the above copyright notice, this
16//    list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright notice,
19//    this list of conditions and the following disclaimer in the documentation
20//    and/or other materials provided with the distribution.
21//
22// 3. Neither the names of UChicago Argonne, LLC or the Department of Energy nor
23//    the names of its contributors may be used to endorse or promote products
24//    derived from this software without specific prior written permission.
25// 
26// *****************************************************************************
27//                                  DISCLAIMER
28//
29// THE SOFTWARE IS SUPPLIED “AS IS” WITHOUT WARRANTY OF ANY KIND.
30//
31// NEITHER THE UNTED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT OF
32// ENERGY, NOR UCHICAGO ARGONNE, LLC, NOR ANY OF THEIR EMPLOYEES, MAKES ANY
33// WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY
34// FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, DATA,
35// APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT
36// INFRINGE PRIVATELY OWNED RIGHTS.
37//
38// *****************************************************************************
39
40#ifndef _GNU_SOURCE
41#define _GNU_SOURCE
42#endif
43
44#include <cstdlib>
45#include <cstdio>
46#include <cstring>
47#include <cstdint>
48
49// NOTE: This source makes very minimal use of C++11 features. It can still be
50// compiled by g++ 4.4.7 with -std=gnu++0x.
51#include <unordered_map>
52#include <utility>
53
54#include <limits.h>
55#include <errno.h>
56#include <malloc.h>
57#include <execinfo.h>
58#include <sys/mman.h>
59#include <sys/syscall.h>
60#include <sys/time.h>
61#include <sys/resource.h>
62#include <sys/types.h>
63#include <sys/stat.h>
64#include <sys/utsname.h>
65#include <fcntl.h>
66#include <unistd.h>
67
68#include <pthread.h>
69#include <dlfcn.h>
70
71#ifdef __bgq__
72#include <spi/include/kernel/location.h>
73#include <spi/include/kernel/memory.h>
74#endif
75
76using namespace std;
77
78// NOTE: When static linking, this depends on linker wrapping.
79// Add to your LDFLAGS:
80//   -Wl,--wrap,malloc,--wrap,free,--wrap,realloc,--wrap,calloc,--wrap,memalign /path/to/memlog_s.o -lpthread -ldl
81
82static FILE *log_file = 0;
83static pthread_mutex_t log_mutex = PTHREAD_MUTEX_INITIALIZER;
84
85// The malloc hook might use functions that call malloc, and we need to make
86// sure this does not cause an infinite loop.
87static __thread int in_malloc = 0;
88static char self_path[PATH_MAX+1] = { '\0' };
89
90#ifdef __bgq__
91static int on_bgq = 0;
92#endif
93
94static void *initial_brk = 0;
95
96static unordered_map<void *, Dl_info> *dladdr_cache = 0;
97
98__attribute__((__constructor__))
99static void record_init() {
100  struct utsname u;
101  uname(&u);
102
103  int id = (int) getpid();
104#ifdef __bgq__
105  // If we're really running on a BG/Q compute node, use the job rank instead
106  // of the pid because the node name might not really be globally unique.
107  if (!strcmp(u.sysname, "CNK") && !strcmp(u.machine, "BGQ")) {
108    id = (int) Kernel_GetRank();
109    on_bgq = 1;
110  }
111#endif
112
113  // If we're running under a common batch system, add the job id to the output
114  // file names (add it as a prefix so that sorting the files will sort by job
115  // first).
116  char *job_id = 0;
117  const char *job_id_vars[] =
118    { "COBALT_JOBID", "PBS_JOBID", "SLURM_JOB_ID", "JOB_ID" };
119  for (int i = 0; i < sizeof(job_id_vars)/sizeof(job_id_vars[0]); ++i) {
120    job_id = getenv(job_id_vars[i]);
121    if (job_id)
122      break;
123  }
124
125  char log_name[PATH_MAX+1];
126  if (job_id)
127    snprintf(log_name, PATH_MAX+1, "%s.%s.%d.memlog", job_id, u.nodename, id);
128  else
129    snprintf(log_name, PATH_MAX+1, "%s.%d.memlog", u.nodename, id);
130  log_file = fopen(log_name, "w");
131  if (!log_file)
132    fprintf(stderr, "fopen failed for '%s': %m\n", log_name);
133
134  const char *link_name = "/proc/self/exe";
135  readlink(link_name, self_path, PATH_MAX);
136
137  initial_brk = sbrk(0);
138}
139
140__attribute__((__destructor__))
141static void record_cleanup() {
142  if (!log_file)
143    return;
144
145  // These functions might call free, but we're shutting down, so don't try to
146  // unwind the stack from here...
147  in_malloc = 1;
148
149  // Avoid any racing by obtaining the lock.
150  if (pthread_mutex_lock(&log_mutex))
151    return;
152
153  (void) fflush(log_file);
154  (void) fclose(log_file);
155
156  if (dladdr_cache)
157    delete dladdr_cache;
158}
159
160// dladdr is, relatively, quit slow. For this to work on a large application,
161// we need to cache the lookup results.
162static int dladdr_cached(void * addr, Dl_info *info) {
163  if (!dladdr_cache)
164    dladdr_cache = new unordered_map<void *, Dl_info>;
165
166  auto I = dladdr_cache->find(addr);
167  if (I == dladdr_cache->end()) {
168    int r;
169    if (!(r = dladdr(addr, info)))
170      memset(info, 0, sizeof(Dl_info));
171
172    dladdr_cache->insert(make_pair(addr, *info));
173    return r;
174  }
175
176  memcpy(info, &I->second, sizeof(Dl_info));
177  return 1;
178}
179
180static void print_context(const void *caller, int show_backtrace) {
181  struct rusage usage;
182  if (getrusage(RUSAGE_SELF, &usage)) {
183    fprintf(stderr, "getrusage failed: %m\n");
184    return;
185  }
186
187  fprintf(log_file, "\t%ld.%06ld %ld %ld", usage.ru_utime.tv_sec,
188          usage.ru_utime.tv_usec, usage.ru_maxrss, syscall(SYS_gettid));
189
190  // Some other memory stats (like with maxrss, report these in KB).
191  size_t arena_size = ((size_t) sbrk(0)) - (size_t) initial_brk;
192
193  uint64_t mmap_size = 0;
194#ifdef __bgq__
195  if (on_bgq)
196    (void) Kernel_GetMemorySize(KERNEL_MEMSIZE_MMAP, &mmap_size);
197#endif
198
199  fprintf(log_file, " %ld %ld", arena_size >> 10, mmap_size >> 10);
200
201  if (!show_backtrace)
202    return;
203
204  void *pcs[1024];
205  int num_pcs = backtrace(pcs, 1024);
206
207  int found_caller = 0;
208  for (int pci = 0; pci < num_pcs; ++pci) {
209    intptr_t pc = (intptr_t) pcs[pci];
210
211    if (!pc)
212      break;
213
214    if (!found_caller) {
215      if (pc != (intptr_t) caller)
216        continue;
217
218      found_caller = 1;
219    }
220
221    intptr_t off, relpc;
222    const char *proc_name;
223    const char *file_name;
224    Dl_info dlinfo;
225    if (dladdr_cached((void *) pc, &dlinfo) && dlinfo.dli_fname &&
226        *dlinfo.dli_fname) {
227      intptr_t saddr = (intptr_t) dlinfo.dli_saddr;
228      if (saddr) {
229#if defined(__powerpc64__) && !defined(__powerpc64le__)
230        // On PPC64 ELFv1, the symbol address points to the function descriptor, not
231        // the actual starting address.
232        saddr = *(intptr_t*) saddr;
233#endif
234
235        off = pc - saddr;
236        relpc = pc - ((intptr_t) dlinfo.dli_fbase);
237      } else {
238        off = 0;
239        relpc = 0;
240      }
241
242      proc_name = dlinfo.dli_sname;
243      if (!proc_name)
244        proc_name = "?";
245
246      file_name = dlinfo.dli_fname;
247    } else {
248      // We don't know these...
249      off = 0;
250      relpc = 0;
251      proc_name = "?";
252
253      // If we can't determine the file, assume it is the base executable
254      // (which does the right thing for statically-linked binaries).
255      file_name = self_path;
256    }
257
258    fprintf(log_file, "\t%s (%s+0x%x) [0x%lx (0x%lx)]", file_name, proc_name, (int) off,
259            (long) pc, (long) relpc);
260  }
261}
262
263static void record_malloc(size_t size, void *ptr, const void *caller) {
264  if (!log_file)
265    return;
266
267  if (pthread_mutex_lock(&log_mutex))
268    return;
269
270  fprintf(log_file, "M: %zd %p", size, ptr);
271  print_context(caller, 1);
272  fprintf(log_file, "\n");
273
274done:
275  pthread_mutex_unlock(&log_mutex);
276}
277
278static void record_free(void *ptr, const void *caller) {
279  if (!log_file)
280    return;
281
282  if (pthread_mutex_lock(&log_mutex))
283    return;
284
285  fprintf(log_file, "F: %p", ptr);
286  print_context(caller, 0);
287  fprintf(log_file, "\n");
288
289done:
290  pthread_mutex_unlock(&log_mutex);
291}
292
293#ifdef __PIC__
294static int (*__real_posix_memalign)(void **memptr, size_t alignment,
295                                    size_t size) = 0;
296
297static void *(*__real_mmap)(void *addr, size_t length, int prot, int flags,
298                            int fd, off_t offset) = 0;
299static void *(*__real_mmap64)(void *addr, size_t length, int prot, int flags,
300                              int fd, off64_t offset) = 0;
301static int (*__real_munmap)(void *addr, size_t length) = 0;
302#else
303extern "C" {
304extern int __real_posix_memalign(void **memptr, size_t alignment, size_t size);
305
306extern void *__real_mmap(void *addr, size_t length, int prot, int flags,
307                         int fd, off_t offset);
308extern void *__real_mmap64(void *addr, size_t length, int prot, int flags,
309                           int fd, off64_t offset);
310extern int __real_munmap(void *addr, size_t length);
311}
312#endif
313
314// glibc exports its underlying malloc implementation under the name
315// __libc_malloc so that hooks like this can use it.
316extern "C" {
317extern void *__libc_malloc(size_t size);
318extern void *__libc_valloc(size_t size);
319extern void *__libc_realloc(void *ptr, size_t size);
320extern void *__libc_calloc(size_t nmemb, size_t size);
321extern void *__libc_memalign(size_t boundary, size_t size);
322extern void __libc_free(void *ptr);
323
324#ifdef __PIC__
325#define FUNC(x) x
326#else
327#define FUNC(x) __wrap_ ## x
328#endif
329
330void *FUNC(malloc)(size_t size) {
331  const void *caller =
332    __builtin_extract_return_addr(__builtin_return_address(0));
333
334  if (in_malloc)
335    return __libc_malloc(size);
336
337  in_malloc = 1;
338
339  void *ptr = __libc_malloc(size);
340  if (ptr)
341    record_malloc(size, ptr, caller);
342
343  in_malloc = 0;
344  return ptr;
345}
346
347void *FUNC(valloc)(size_t size) {
348  const void *caller =
349    __builtin_extract_return_addr(__builtin_return_address(0));
350
351  if (in_malloc)
352    return __libc_valloc(size);
353
354  in_malloc = 1;
355
356  void *ptr = __libc_valloc(size);
357  if (ptr)
358    record_malloc(size, ptr, caller);
359
360  in_malloc = 0;
361  return ptr;
362}
363
364void *FUNC(realloc)(void *ptr, size_t size) {
365  const void *caller =
366    __builtin_extract_return_addr(__builtin_return_address(0));
367
368  if (in_malloc)
369    return __libc_realloc(ptr, size);
370
371  in_malloc = 1;
372
373  void *nptr = __libc_realloc(ptr, size);
374
375  if (ptr)
376    record_free(ptr, caller);
377  if (nptr)
378    record_malloc(size, nptr, caller);
379
380  in_malloc = 0;
381
382  return nptr;
383}
384
385void *FUNC(calloc)(size_t nmemb, size_t size) {
386  const void *caller =
387    __builtin_extract_return_addr(__builtin_return_address(0));
388
389  if (in_malloc)
390    return __libc_calloc(nmemb, size);
391
392  in_malloc = 1;
393
394  void *ptr = __libc_calloc(nmemb, size);
395
396  if (ptr)
397    record_malloc(nmemb*size, ptr, caller);
398
399  in_malloc = 0;
400
401  return ptr;
402}
403
404void *FUNC(memalign)(size_t boundary, size_t size) {
405  const void *caller =
406    __builtin_extract_return_addr(__builtin_return_address(0));
407
408  if (in_malloc)
409    return __libc_memalign(boundary, size);
410
411  in_malloc = 1;
412
413  void *ptr = __libc_memalign(boundary, size);
414
415  if (ptr)
416    record_malloc(size, ptr, caller);
417
418  in_malloc = 0;
419
420  return ptr;
421}
422
423void FUNC(free)(void *ptr) {
424  const void *caller =
425    __builtin_extract_return_addr(__builtin_return_address(0));
426
427  if (in_malloc || !ptr)
428    return __libc_free(ptr);
429
430  in_malloc = 1;
431
432  record_free(ptr, caller);
433
434  __libc_free(ptr);
435
436  in_malloc = 0;
437}
438
439int FUNC(posix_memalign)(void **memptr, size_t alignment, size_t size) {
440  const void *caller =
441    __builtin_extract_return_addr(__builtin_return_address(0));
442
443#ifdef __PIC__
444  if (!__real_posix_memalign)
445    if (!(*(void **) (&__real_posix_memalign) =
446        dlsym(RTLD_NEXT, "posix_memalign"))) {
447      return ELIBACC;
448    }
449#endif
450
451  if (in_malloc)
452    return __real_posix_memalign(memptr, alignment, size);
453
454  in_malloc = 1;
455
456  int r = __real_posix_memalign(memptr, alignment, size);
457
458  if (!r)
459    record_malloc(size, *memptr, caller);
460
461  in_malloc = 0;
462
463  return r;
464}
465
466void *FUNC(mmap)(void *addr, size_t length, int prot, int flags,
467                 int fd, off_t offset) {
468  const void *caller =
469    __builtin_extract_return_addr(__builtin_return_address(0));
470
471#ifdef __PIC__
472  if (!__real_mmap)
473    if (!(*(void **) (&__real_mmap) = dlsym(RTLD_NEXT, "mmap"))) {
474      errno = ELIBACC;
475      return MAP_FAILED;
476    }
477#endif
478
479  if (in_malloc)
480    return __real_mmap(addr, length, prot, flags, fd, offset);
481
482  in_malloc = 1;
483
484  void *ptr = __real_mmap(addr, length, prot, flags, fd, offset);
485
486  if (ptr != MAP_FAILED)
487    record_malloc(length, ptr, caller);
488
489  in_malloc = 0;
490
491  return ptr;
492}
493
494void *FUNC(mmap64)(void *addr, size_t length, int prot, int flags,
495                   int fd, off64_t offset) {
496  const void *caller =
497    __builtin_extract_return_addr(__builtin_return_address(0));
498
499#ifdef __PIC__
500  if (!__real_mmap64)
501    if (!(*(void **) (&__real_mmap64) = dlsym(RTLD_NEXT, "mmap64"))) {
502      errno = ELIBACC;
503      return MAP_FAILED;
504    }
505#endif
506
507  if (in_malloc)
508    return __real_mmap64(addr, length, prot, flags, fd, offset);
509
510  in_malloc = 1;
511
512  void *ptr = __real_mmap64(addr, length, prot, flags, fd, offset);
513
514  if (ptr != MAP_FAILED)
515    record_malloc(length, ptr, caller);
516
517  in_malloc = 0;
518
519  return ptr;
520}
521
522int FUNC(munmap)(void *addr, size_t length) {
523  const void *caller =
524    __builtin_extract_return_addr(__builtin_return_address(0));
525
526#ifdef __PIC__
527  if (!__real_munmap)
528    if (!(*(void **) (&__real_munmap) = dlsym(RTLD_NEXT, "munmap"))) {
529      errno = ELIBACC;
530      return -1;
531    }
532#endif
533
534  if (in_malloc)
535    return __real_munmap(addr, length);
536
537  in_malloc = 1;
538
539  record_free(addr, caller);
540
541  int r = __real_munmap(addr, length);
542
543  in_malloc = 0;
544
545  return r;
546}
547
548} // extern "C"
549
Note: See TracBrowser for help on using the repository browser.