source: memlog.cpp @ 21d3542

Revision 21d3542, 8.0 KB checked in by Hal Finkel <hfinkel@…>, 9 years ago (diff)

collect and report on mmap totals

  • Property mode set to 100644
Line 
1#ifndef _GNU_SOURCE
2#define _GNU_SOURCE
3#endif
4
5#include <cstdlib>
6#include <cstdio>
7#include <cstring>
8#include <cstdint>
9
10// NOTE: This source makes very minimal use of C++11 features. It can still be
11// compiled by g++ 4.4.7 with -std=gnu++0x.
12#include <unordered_map>
13#include <utility>
14
15#include <limits.h>
16#include <malloc.h>
17#include <execinfo.h>
18#include <sys/syscall.h>
19#include <sys/time.h>
20#include <sys/resource.h>
21#include <sys/types.h>
22#include <sys/stat.h>
23#include <sys/utsname.h>
24#include <fcntl.h>
25#include <unistd.h>
26
27#include <pthread.h>
28#include <dlfcn.h>
29
30#ifdef __bgq__
31#include <spi/include/kernel/location.h>
32#include <spi/include/kernel/memory.h>
33#endif
34
35using namespace std;
36
37// NOTE: When static linking, this depends on linker wrapping.
38// Add to your LDFLAGS:
39//   -Wl,--wrap,malloc,--wrap,free,--wrap,realloc,--wrap,calloc,--wrap,memalign /path/to/memlog_s.o -lpthread -ldl
40
41FILE *log_file = 0;
42static pthread_mutex_t log_mutex = PTHREAD_MUTEX_INITIALIZER;
43
44// The malloc hook might use functions that call malloc, and we need to make
45// sure this does not cause an infinite loop.
46static __thread int in_malloc = 0;
47static char self_path[PATH_MAX+1] = { '\0' };
48
49#ifdef __bgq__
50int on_bgq = 0;
51#endif
52
53void *initial_brk = 0;
54
55__attribute__((__constructor__))
56static void record_init() {
57  struct utsname u;
58  uname(&u);
59
60  int id = (int) getpid();
61#ifdef __bgq__
62  // If we're really running on a BG/Q compute node, use the job rank instead
63  // of the pid because the node name might not really be globally unique.
64  if (!strcmp(u.sysname, "CNK") && !strcmp(u.machine, "BGQ")) {
65    id = (int) Kernel_GetRank();
66    on_bgq = 1;
67  }
68#endif
69
70  // If we're running under a common batch system, add the job id to the output
71  // file names (add it as a prefix so that sorting the files will sort by job
72  // first).
73  char *job_id = 0;
74  const char *job_id_vars[] =
75    { "COBALT_JOBID", "PBS_JOBID", "SLURM_JOB_ID", "JOB_ID" };
76  for (int i = 0; i < sizeof(job_id_vars)/sizeof(job_id_vars[0]); ++i) {
77    job_id = getenv(job_id_vars[i]);
78    if (job_id)
79      break;
80  }
81
82  char log_name[PATH_MAX+1];
83  if (job_id)
84    snprintf(log_name, PATH_MAX+1, "%s.%s.%d.memlog", job_id, u.nodename, id);
85  else
86    snprintf(log_name, PATH_MAX+1, "%s.%d.memlog", u.nodename, id);
87  log_file = fopen(log_name, "w");
88  if (!log_file)
89    fprintf(stderr, "fopen failed for '%s': %m\n", log_name);
90
91  const char *link_name = "/proc/self/exe";
92  readlink(link_name, self_path, PATH_MAX);
93
94  initial_brk = sbrk(0);
95}
96
97__attribute__((__destructor__))
98static void record_cleanup() {
99  if (!log_file)
100    return;
101
102  // These functions might call free, but we're shutting down, so don't try to
103  // unwind the stack from here...
104  in_malloc = 1;
105
106  // Avoid any racing by obtaining the lock.
107  if (pthread_mutex_lock(&log_mutex))
108    return;
109
110  (void) fflush(log_file);
111  (void) fclose(log_file);
112}
113
114// dladdr is, relatively, quit slow. For this to work on a large application,
115// we need to cache the lookup results.
116static int dladdr_cached(void * addr, Dl_info *info) {
117  static unordered_map<void *, Dl_info> dladdr_cache;
118
119  auto I = dladdr_cache.find(addr);
120  if (I == dladdr_cache.end()) {
121    int r;
122    if (!(r = dladdr(addr, info)))
123      memset(info, 0, sizeof(Dl_info));
124
125    dladdr_cache.insert(make_pair(addr, *info));
126    return r;
127  }
128
129  memcpy(info, &I->second, sizeof(Dl_info));
130  return 1;
131}
132
133static void print_context(const void *caller, int show_backtrace) {
134  struct rusage usage;
135  if (getrusage(RUSAGE_SELF, &usage)) {
136    fprintf(stderr, "getrusage failed: %m\n");
137    return;
138  }
139
140  fprintf(log_file, "\t%ld.%06ld %ld %ld", usage.ru_utime.tv_sec,
141          usage.ru_utime.tv_usec, usage.ru_maxrss, syscall(SYS_gettid));
142
143  // Some other memory stats (like with maxrss, report these in KB).
144  size_t arena_size = ((size_t) sbrk(0)) - (size_t) initial_brk;
145
146  uint64_t mmap_size = 0;
147#ifdef __bgq__
148  if (on_bgq)
149    (void) Kernel_GetMemorySize(KERNEL_MEMSIZE_MMAP, &mmap_size);
150#endif
151
152  fprintf(log_file, " %ld %ld", arena_size >> 10, mmap_size >> 10);
153
154  if (!show_backtrace)
155    return;
156
157  void *pcs[1024];
158  int num_pcs = backtrace(pcs, 1024);
159
160  int found_caller = 0;
161  for (int pci = 0; pci < num_pcs; ++pci) {
162    intptr_t pc = (intptr_t) pcs[pci];
163
164    if (!pc)
165      break;
166
167    if (!found_caller) {
168      if (pc != (intptr_t) caller)
169        continue;
170
171      found_caller = 1;
172    }
173
174    intptr_t off, relpc;
175    const char *proc_name;
176    const char *file_name;
177    Dl_info dlinfo;
178    if (dladdr_cached((void *) pc, &dlinfo) && dlinfo.dli_fname &&
179        *dlinfo.dli_fname) {
180      intptr_t saddr = (intptr_t) dlinfo.dli_saddr;
181      if (saddr) {
182#if defined(__powerpc64__) && !defined(__powerpc64le__)
183        // On PPC64 ELFv1, the symbol address points to the function descriptor, not
184        // the actual starting address.
185        saddr = *(intptr_t*) saddr;
186#endif
187
188        off = pc - saddr;
189        relpc = pc - ((intptr_t) dlinfo.dli_fbase);
190      } else {
191        off = 0;
192        relpc = 0;
193      }
194
195      proc_name = dlinfo.dli_sname;
196      if (!proc_name)
197        proc_name = "?";
198
199      file_name = dlinfo.dli_fname;
200    } else {
201      // We don't know these...
202      off = 0;
203      relpc = 0;
204      proc_name = "?";
205
206      // If we can't determine the file, assume it is the base executable
207      // (which does the right thing for statically-linked binaries).
208      file_name = self_path;
209    }
210
211    fprintf(log_file, "\t%s (%s+0x%x) [0x%lx (0x%lx)]", file_name, proc_name, (int) off,
212            (long) pc, (long) relpc);
213  }
214}
215
216static void record_malloc(size_t size, void *ptr, const void *caller) {
217  if (!log_file)
218    return;
219
220  if (pthread_mutex_lock(&log_mutex))
221    return;
222
223  fprintf(log_file, "M: %zd %p", size, ptr);
224  print_context(caller, 1);
225  fprintf(log_file, "\n");
226
227done:
228  pthread_mutex_unlock(&log_mutex);
229}
230
231static void record_free(void *ptr, const void *caller) {
232  if (!log_file)
233    return;
234
235  if (pthread_mutex_lock(&log_mutex))
236    return;
237
238  fprintf(log_file, "F: %p", ptr);
239  print_context(caller, 0);
240  fprintf(log_file, "\n");
241
242done:
243  pthread_mutex_unlock(&log_mutex);
244}
245
246// glibc exports its underlying malloc implementation under the name
247// __libc_malloc so that hooks like this can use it.
248extern "C" {
249extern void *__libc_malloc(size_t size);
250extern void *__libc_realloc(void *ptr, size_t size);
251extern void *__libc_calloc(size_t nmemb, size_t size);
252extern void *__libc_memalign(size_t boundary, size_t size);
253extern void __libc_free(void *ptr);
254
255#ifdef __PIC__
256#define FUNC(x) x
257#else
258#define FUNC(x) __wrap_ ## x
259#endif
260
261void *FUNC(malloc)(size_t size) {
262  const void *caller =
263    __builtin_extract_return_addr(__builtin_return_address(0));
264
265  if (in_malloc)
266    return __libc_malloc(size);
267
268  in_malloc = 1;
269
270  void *ptr = __libc_malloc(size);
271
272  record_malloc(size, ptr, caller);
273
274  in_malloc = 0;
275  return ptr;
276}
277
278void *FUNC(realloc)(void *ptr, size_t size) {
279  const void *caller =
280    __builtin_extract_return_addr(__builtin_return_address(0));
281
282  if (in_malloc)
283    return __libc_realloc(ptr, size);
284
285  in_malloc = 1;
286
287  void *nptr = __libc_realloc(ptr, size);
288
289  if (ptr)
290    record_free(ptr, caller);
291  record_malloc(size, nptr, caller);
292
293  in_malloc = 0;
294
295  return nptr;
296}
297
298void *FUNC(calloc)(size_t nmemb, size_t size) {
299  const void *caller =
300    __builtin_extract_return_addr(__builtin_return_address(0));
301
302  if (in_malloc)
303    return __libc_calloc(nmemb, size);
304
305  in_malloc = 1;
306
307  void *ptr = __libc_calloc(nmemb, size);
308
309  record_malloc(nmemb*size, ptr, caller);
310
311  in_malloc = 0;
312
313  return ptr;
314}
315
316void *FUNC(memalign)(size_t boundary, size_t size) {
317  const void *caller =
318    __builtin_extract_return_addr(__builtin_return_address(0));
319
320  if (in_malloc)
321    return __libc_memalign(boundary, size);
322
323  in_malloc = 1;
324
325  void *ptr = __libc_memalign(boundary, size);
326
327  record_malloc(size, ptr, caller);
328
329  in_malloc = 0;
330
331  return ptr;
332}
333
334void FUNC(free)(void *ptr) {
335  const void *caller =
336    __builtin_extract_return_addr(__builtin_return_address(0));
337
338  if (in_malloc || !ptr)
339    return __libc_free(ptr);
340
341  in_malloc = 1;
342
343  record_free(ptr, caller);
344
345  __libc_free(ptr);
346
347  in_malloc = 0;
348}
349
350} // extern "C"
351
Note: See TracBrowser for help on using the repository browser.