source: memlog.cpp @ a7b97b9

Revision a7b97b9, 7.0 KB checked in by Hal Finkel <hfinkel@…>, 9 years ago (diff)

use kernel rank on the bgq instead of pid

  • Property mode set to 100644
Line 
1#ifndef _GNU_SOURCE
2#define _GNU_SOURCE
3#endif
4
5#include <cstdlib>
6#include <cstdio>
7#include <cstring>
8
9// NOTE: This source makes very minimal use of C++11 features. It can still be
10// compiled by g++ 4.4.7 with -std=gnu++0x.
11#include <unordered_map>
12#include <utility>
13
14#include <limits.h>
15#include <malloc.h>
16#include <execinfo.h>
17#include <sys/syscall.h>
18#include <sys/time.h>
19#include <sys/resource.h>
20#include <sys/types.h>
21#include <sys/stat.h>
22#include <sys/utsname.h>
23#include <fcntl.h>
24#include <unistd.h>
25
26#include <pthread.h>
27#include <dlfcn.h>
28
29#ifdef __bgq__
30#include <spi/include/kernel/location.h>
31#endif
32
33using namespace std;
34
35// NOTE: When static linking, this depends on linker wrapping.
36// Add to your LDFLAGS:
37//   -Wl,--wrap,malloc,--wrap,free,--wrap,realloc,--wrap,calloc,--wrap,memalign /path/to/memlog_s.o -lpthread -ldl
38
39FILE *log_file = NULL;
40static pthread_mutex_t log_mutex = PTHREAD_MUTEX_INITIALIZER;
41
42// The malloc hook might use functions that call malloc, and we need to make
43// sure this does not cause an infinite loop.
44static __thread int in_malloc = 0;
45static char self_path[PATH_MAX+1] = { '\0' };
46
47__attribute__((__constructor__))
48static void record_init() {
49  struct utsname u;
50  uname(&u);
51
52  int id = (int) getpid();
53#ifdef __bgq__
54  // If we're really running on a BG/Q compute node, use the job rank instead
55  // of the pid because the node name might not really be globally unique.
56  if (!strcmp(u.sysname, "CNK") && !strcmp(u.machine, "BGQ"))
57    id = (int) Kernel_GetRank();
58#endif
59
60  char log_name[PATH_MAX+1];
61  snprintf(log_name, PATH_MAX+1, "%s.%d.memlog", u.nodename, id);
62  log_file = fopen(log_name, "w");
63  if (!log_file)
64    fprintf(stderr, "fopen failed for '%s': %m\n", log_name);
65
66  const char *link_name = "/proc/self/exe";
67  readlink(link_name, self_path, PATH_MAX);
68}
69
70__attribute__((__destructor__))
71static void record_cleanup() {
72  if (!log_file)
73    return;
74
75  // These functions might call free, but we're shutting down, so don't try to
76  // unwind the stack from here...
77  in_malloc = 1;
78
79  // Avoid any racing by obtaining the lock.
80  if (pthread_mutex_lock(&log_mutex))
81    return;
82
83  (void) fflush(log_file);
84  (void) fclose(log_file);
85}
86
87// dladdr is, relatively, quit slow. For this to work on a large application,
88// we need to cache the lookup results.
89static int dladdr_cached(void * addr, Dl_info *info) {
90  static unordered_map<void *, Dl_info> dladdr_cache;
91
92  auto I = dladdr_cache.find(addr);
93  if (I == dladdr_cache.end()) {
94    int r;
95    if (!(r = dladdr(addr, info)))
96      memset(info, 0, sizeof(Dl_info));
97
98    dladdr_cache.insert(make_pair(addr, *info));
99    return r;
100  }
101
102  memcpy(info, &I->second, sizeof(Dl_info));
103  return 1;
104}
105
106static void print_context(const void *caller, int show_backtrace) {
107  struct rusage usage;
108  if (getrusage(RUSAGE_SELF, &usage)) {
109    fprintf(stderr, "getrusage failed: %m\n");
110    return;
111  }
112
113  fprintf(log_file, "\t%ld.%06ld %ld %ld", usage.ru_utime.tv_sec,
114          usage.ru_utime.tv_usec, usage.ru_maxrss, syscall(SYS_gettid));
115
116  if (!show_backtrace)
117    return;
118
119  void *pcs[1024];
120  int num_pcs = backtrace(pcs, 1024);
121
122  int found_caller = 0;
123  for (int pci = 0; pci < num_pcs; ++pci) {
124    intptr_t pc = (intptr_t) pcs[pci];
125
126    if (!pc)
127      break;
128
129    if (!found_caller) {
130      if (pc != (intptr_t) caller)
131        continue;
132
133      found_caller = 1;
134    }
135
136    intptr_t off, relpc;
137    const char *proc_name;
138    const char *file_name;
139    Dl_info dlinfo;
140    if (dladdr_cached((void *) pc, &dlinfo) && dlinfo.dli_fname &&
141        *dlinfo.dli_fname) {
142      intptr_t saddr = (intptr_t) dlinfo.dli_saddr;
143      if (saddr) {
144#if defined(__powerpc64__) && !defined(__powerpc64le__)
145        // On PPC64 ELFv1, the symbol address points to the function descriptor, not
146        // the actual starting address.
147        saddr = *(intptr_t*) saddr;
148#endif
149
150        off = pc - saddr;
151        relpc = pc - ((intptr_t) dlinfo.dli_fbase);
152      } else {
153        off = 0;
154        relpc = 0;
155      }
156
157      proc_name = dlinfo.dli_sname;
158      if (!proc_name)
159        proc_name = "?";
160
161      file_name = dlinfo.dli_fname;
162    } else {
163      // We don't know these...
164      off = 0;
165      relpc = 0;
166      proc_name = "?";
167
168      // If we can't determine the file, assume it is the base executable
169      // (which does the right thing for statically-linked binaries).
170      file_name = self_path;
171    }
172
173    fprintf(log_file, "\t%s (%s+0x%x) [0x%lx (0x%lx)]", file_name, proc_name, (int) off,
174            (long) pc, (long) relpc);
175  }
176}
177
178static void record_malloc(size_t size, void *ptr, const void *caller) {
179  if (!log_file)
180    return;
181
182  if (pthread_mutex_lock(&log_mutex))
183    return;
184
185  fprintf(log_file, "M: %zd %p", size, ptr);
186  print_context(caller, 1);
187  fprintf(log_file, "\n");
188
189done:
190  pthread_mutex_unlock(&log_mutex);
191}
192
193static void record_free(void *ptr, const void *caller) {
194  if (!log_file)
195    return;
196
197  if (pthread_mutex_lock(&log_mutex))
198    return;
199
200  fprintf(log_file, "F: %p", ptr);
201  print_context(caller, 0);
202  fprintf(log_file, "\n");
203
204done:
205  pthread_mutex_unlock(&log_mutex);
206}
207
208// glibc exports its underlying malloc implementation under the name
209// __libc_malloc so that hooks like this can use it.
210extern "C" {
211extern void *__libc_malloc(size_t size);
212extern void *__libc_realloc(void *ptr, size_t size);
213extern void *__libc_calloc(size_t nmemb, size_t size);
214extern void *__libc_memalign(size_t boundary, size_t size);
215extern void __libc_free(void *ptr);
216
217#ifdef __PIC__
218#define FUNC(x) x
219#else
220#define FUNC(x) __wrap_ ## x
221#endif
222
223void *FUNC(malloc)(size_t size) {
224  const void *caller =
225    __builtin_extract_return_addr(__builtin_return_address(0));
226
227  if (in_malloc)
228    return __libc_malloc(size);
229
230  in_malloc = 1;
231
232  void *ptr = __libc_malloc(size);
233
234  record_malloc(size, ptr, caller);
235
236  in_malloc = 0;
237  return ptr;
238}
239
240void *FUNC(realloc)(void *ptr, size_t size) {
241  const void *caller =
242    __builtin_extract_return_addr(__builtin_return_address(0));
243
244  if (in_malloc)
245    return __libc_realloc(ptr, size);
246
247  in_malloc = 1;
248
249  void *nptr = __libc_realloc(ptr, size);
250
251  if (ptr)
252    record_free(ptr, caller);
253  record_malloc(size, nptr, caller);
254
255  in_malloc = 0;
256
257  return nptr;
258}
259
260void *FUNC(calloc)(size_t nmemb, size_t size) {
261  const void *caller =
262    __builtin_extract_return_addr(__builtin_return_address(0));
263
264  if (in_malloc)
265    return __libc_calloc(nmemb, size);
266
267  in_malloc = 1;
268
269  void *ptr = __libc_calloc(nmemb, size);
270
271  record_malloc(nmemb*size, ptr, caller);
272
273  in_malloc = 0;
274
275  return ptr;
276}
277
278void *FUNC(memalign)(size_t boundary, size_t size) {
279  const void *caller =
280    __builtin_extract_return_addr(__builtin_return_address(0));
281
282  if (in_malloc)
283    return __libc_memalign(boundary, size);
284
285  in_malloc = 1;
286
287  void *ptr = __libc_memalign(boundary, size);
288
289  record_malloc(size, ptr, caller);
290
291  in_malloc = 0;
292
293  return ptr;
294}
295
296void FUNC(free)(void *ptr) {
297  const void *caller =
298    __builtin_extract_return_addr(__builtin_return_address(0));
299
300  if (in_malloc || !ptr)
301    return __libc_free(ptr);
302
303  in_malloc = 1;
304
305  record_free(ptr, caller);
306
307  __libc_free(ptr);
308
309  in_malloc = 0;
310}
311
312} // extern "C"
313
Note: See TracBrowser for help on using the repository browser.