source: memlog.cpp @ a7b97b9

Revision a7b97b9, 7.0 KB checked in by Hal Finkel <hfinkel@…>, 9 years ago (diff)

use kernel rank on the bgq instead of pid

  • Property mode set to 100644
RevLine 
[0ec59c5]1#ifndef _GNU_SOURCE
2#define _GNU_SOURCE
3#endif
4
[1bd82e0]5#include <cstdlib>
6#include <cstdio>
7#include <cstring>
8
[f715c76]9// NOTE: This source makes very minimal use of C++11 features. It can still be
10// compiled by g++ 4.4.7 with -std=gnu++0x.
[1bd82e0]11#include <unordered_map>
12#include <utility>
[0ec59c5]13
[1bd82e0]14#include <limits.h>
[430548b]15#include <malloc.h>
[a736d81]16#include <execinfo.h>
[134408c]17#include <sys/syscall.h>
[0a0ef57]18#include <sys/time.h>
19#include <sys/resource.h>
[0ec59c5]20#include <sys/types.h>
21#include <sys/stat.h>
22#include <sys/utsname.h>
23#include <fcntl.h>
24#include <unistd.h>
25
26#include <pthread.h>
27#include <dlfcn.h>
28
[a7b97b9]29#ifdef __bgq__
30#include <spi/include/kernel/location.h>
31#endif
32
[1bd82e0]33using namespace std;
34
[493cb97]35// NOTE: When static linking, this depends on linker wrapping.
36// Add to your LDFLAGS:
37//   -Wl,--wrap,malloc,--wrap,free,--wrap,realloc,--wrap,calloc,--wrap,memalign /path/to/memlog_s.o -lpthread -ldl
38
[0ec59c5]39FILE *log_file = NULL;
40static pthread_mutex_t log_mutex = PTHREAD_MUTEX_INITIALIZER;
41
[69850c8]42// The malloc hook might use functions that call malloc, and we need to make
43// sure this does not cause an infinite loop.
44static __thread int in_malloc = 0;
[3105f50]45static char self_path[PATH_MAX+1] = { '\0' };
[69850c8]46
[0ec59c5]47__attribute__((__constructor__))
48static void record_init() {
49  struct utsname u;
50  uname(&u);
51
[a7b97b9]52  int id = (int) getpid();
53#ifdef __bgq__
54  // If we're really running on a BG/Q compute node, use the job rank instead
55  // of the pid because the node name might not really be globally unique.
56  if (!strcmp(u.sysname, "CNK") && !strcmp(u.machine, "BGQ"))
57    id = (int) Kernel_GetRank();
58#endif
59
[3105f50]60  char log_name[PATH_MAX+1];
[a7b97b9]61  snprintf(log_name, PATH_MAX+1, "%s.%d.memlog", u.nodename, id);
[0ec59c5]62  log_file = fopen(log_name, "w");
[0a0ef57]63  if (!log_file)
64    fprintf(stderr, "fopen failed for '%s': %m\n", log_name);
[3105f50]65
66  const char *link_name = "/proc/self/exe";
67  readlink(link_name, self_path, PATH_MAX);
[0ec59c5]68}
69
70__attribute__((__destructor__))
71static void record_cleanup() {
72  if (!log_file)
73    return;
74
[69850c8]75  // These functions might call free, but we're shutting down, so don't try to
76  // unwind the stack from here...
77  in_malloc = 1;
78
[5a9481e]79  // Avoid any racing by obtaining the lock.
80  if (pthread_mutex_lock(&log_mutex))
81    return;
82
[0ec59c5]83  (void) fflush(log_file);
84  (void) fclose(log_file);
85}
86
[5a9481e]87// dladdr is, relatively, quit slow. For this to work on a large application,
88// we need to cache the lookup results.
[1bd82e0]89static int dladdr_cached(void * addr, Dl_info *info) {
90  static unordered_map<void *, Dl_info> dladdr_cache;
91
92  auto I = dladdr_cache.find(addr);
93  if (I == dladdr_cache.end()) {
94    int r;
95    if (!(r = dladdr(addr, info)))
96      memset(info, 0, sizeof(Dl_info));
97
98    dladdr_cache.insert(make_pair(addr, *info));
99    return r;
100  }
101
102  memcpy(info, &I->second, sizeof(Dl_info));
103  return 1;
[5a9481e]104}
105
[1e5cce6]106static void print_context(const void *caller, int show_backtrace) {
[0a0ef57]107  struct rusage usage;
108  if (getrusage(RUSAGE_SELF, &usage)) {
109    fprintf(stderr, "getrusage failed: %m\n");
[0ec59c5]110    return;
[0a0ef57]111  }
[0ec59c5]112
[134408c]113  fprintf(log_file, "\t%ld.%06ld %ld %ld", usage.ru_utime.tv_sec,
114          usage.ru_utime.tv_usec, usage.ru_maxrss, syscall(SYS_gettid));
[0a0ef57]115
[1e5cce6]116  if (!show_backtrace)
117    return;
118
[a736d81]119  void *pcs[1024];
120  int num_pcs = backtrace(pcs, 1024);
[14e2ab9]121
122  int found_caller = 0;
[a736d81]123  for (int pci = 0; pci < num_pcs; ++pci) {
[14e2ab9]124    intptr_t pc = (intptr_t) pcs[pci];
[69850c8]125
126    if (!pc)
127      break;
128
[14e2ab9]129    if (!found_caller) {
130      if (pc != (intptr_t) caller)
131        continue;
132
133      found_caller = 1;
134    }
135
136    intptr_t off, relpc;
[69850c8]137    const char *proc_name;
138    const char *file_name;
139    Dl_info dlinfo;
[5a9481e]140    if (dladdr_cached((void *) pc, &dlinfo) && dlinfo.dli_fname &&
[69850c8]141        *dlinfo.dli_fname) {
[14e2ab9]142      intptr_t saddr = (intptr_t) dlinfo.dli_saddr;
[69850c8]143      if (saddr) {
144#if defined(__powerpc64__) && !defined(__powerpc64le__)
145        // On PPC64 ELFv1, the symbol address points to the function descriptor, not
146        // the actual starting address.
[14e2ab9]147        saddr = *(intptr_t*) saddr;
[69850c8]148#endif
149
150        off = pc - saddr;
[14e2ab9]151        relpc = pc - ((intptr_t) dlinfo.dli_fbase);
[69850c8]152      } else {
153        off = 0;
154        relpc = 0;
155      }
156
157      proc_name = dlinfo.dli_sname;
158      if (!proc_name)
159        proc_name = "?";
160
161      file_name = dlinfo.dli_fname;
162    } else {
[3105f50]163      // We don't know these...
164      off = 0;
165      relpc = 0;
[69850c8]166      proc_name = "?";
[3105f50]167
168      // If we can't determine the file, assume it is the base executable
169      // (which does the right thing for statically-linked binaries).
170      file_name = self_path;
[69850c8]171    }
172
173    fprintf(log_file, "\t%s (%s+0x%x) [0x%lx (0x%lx)]", file_name, proc_name, (int) off,
174            (long) pc, (long) relpc);
[0ec59c5]175  }
176}
177
[430548b]178static void record_malloc(size_t size, void *ptr, const void *caller) {
[0ec59c5]179  if (!log_file)
180    return;
181
182  if (pthread_mutex_lock(&log_mutex))
183    return;
184
185  fprintf(log_file, "M: %zd %p", size, ptr);
[1e5cce6]186  print_context(caller, 1);
[0ec59c5]187  fprintf(log_file, "\n");
188
189done:
190  pthread_mutex_unlock(&log_mutex);
191}
192
[430548b]193static void record_free(void *ptr, const void *caller) {
[0ec59c5]194  if (!log_file)
195    return;
196
197  if (pthread_mutex_lock(&log_mutex))
198    return;
199
200  fprintf(log_file, "F: %p", ptr);
[1e5cce6]201  print_context(caller, 0);
[0ec59c5]202  fprintf(log_file, "\n");
203
204done:
205  pthread_mutex_unlock(&log_mutex);
206}
207
208// glibc exports its underlying malloc implementation under the name
209// __libc_malloc so that hooks like this can use it.
[1bd82e0]210extern "C" {
[0ec59c5]211extern void *__libc_malloc(size_t size);
212extern void *__libc_realloc(void *ptr, size_t size);
213extern void *__libc_calloc(size_t nmemb, size_t size);
214extern void *__libc_memalign(size_t boundary, size_t size);
215extern void __libc_free(void *ptr);
216
[430548b]217#ifdef __PIC__
218#define FUNC(x) x
219#else
220#define FUNC(x) __wrap_ ## x
221#endif
222
223void *FUNC(malloc)(size_t size) {
[5a9481e]224  const void *caller =
225    __builtin_extract_return_addr(__builtin_return_address(0));
[430548b]226
[0ec59c5]227  if (in_malloc)
228    return __libc_malloc(size);
229
230  in_malloc = 1;
231
232  void *ptr = __libc_malloc(size);
233
[14e2ab9]234  record_malloc(size, ptr, caller);
[0ec59c5]235
236  in_malloc = 0;
237  return ptr;
238}
239
[430548b]240void *FUNC(realloc)(void *ptr, size_t size) {
[5a9481e]241  const void *caller =
242    __builtin_extract_return_addr(__builtin_return_address(0));
[430548b]243
[0ec59c5]244  if (in_malloc)
245    return __libc_realloc(ptr, size);
246
247  in_malloc = 1;
248
249  void *nptr = __libc_realloc(ptr, size);
250
[14e2ab9]251  if (ptr)
252    record_free(ptr, caller);
253  record_malloc(size, nptr, caller);
[0ec59c5]254
255  in_malloc = 0;
256
257  return nptr;
258}
259
[430548b]260void *FUNC(calloc)(size_t nmemb, size_t size) {
[5a9481e]261  const void *caller =
262    __builtin_extract_return_addr(__builtin_return_address(0));
[430548b]263
[0ec59c5]264  if (in_malloc)
265    return __libc_calloc(nmemb, size);
266
267  in_malloc = 1;
268
269  void *ptr = __libc_calloc(nmemb, size);
270
[14e2ab9]271  record_malloc(nmemb*size, ptr, caller);
[0ec59c5]272
273  in_malloc = 0;
274
275  return ptr;
276}
277
[430548b]278void *FUNC(memalign)(size_t boundary, size_t size) {
[5a9481e]279  const void *caller =
280    __builtin_extract_return_addr(__builtin_return_address(0));
[430548b]281
[0ec59c5]282  if (in_malloc)
283    return __libc_memalign(boundary, size);
284
285  in_malloc = 1;
286
287  void *ptr = __libc_memalign(boundary, size);
288
[14e2ab9]289  record_malloc(size, ptr, caller);
[0ec59c5]290
291  in_malloc = 0;
292
293  return ptr;
294}
295
[430548b]296void FUNC(free)(void *ptr) {
[5a9481e]297  const void *caller =
298    __builtin_extract_return_addr(__builtin_return_address(0));
[430548b]299
[14e2ab9]300  if (in_malloc || !ptr)
[0ec59c5]301    return __libc_free(ptr);
302
303  in_malloc = 1;
304
[14e2ab9]305  record_free(ptr, caller);
[0ec59c5]306
307  __libc_free(ptr);
308
309  in_malloc = 0;
310}
311
[1bd82e0]312} // extern "C"
313
Note: See TracBrowser for help on using the repository browser.