source: memlog.cpp @ 22f928f

Revision 22f928f, 7.6 KB checked in by Hal Finkel <hfinkel@…>, 9 years ago (diff)

use the job id under batch systems; misc cleanups

  • Property mode set to 100644
Line 
1#ifndef _GNU_SOURCE
2#define _GNU_SOURCE
3#endif
4
5#include <cstdlib>
6#include <cstdio>
7#include <cstring>
8
9// NOTE: This source makes very minimal use of C++11 features. It can still be
10// compiled by g++ 4.4.7 with -std=gnu++0x.
11#include <unordered_map>
12#include <utility>
13
14#include <limits.h>
15#include <malloc.h>
16#include <execinfo.h>
17#include <sys/syscall.h>
18#include <sys/time.h>
19#include <sys/resource.h>
20#include <sys/types.h>
21#include <sys/stat.h>
22#include <sys/utsname.h>
23#include <fcntl.h>
24#include <unistd.h>
25
26#include <pthread.h>
27#include <dlfcn.h>
28
29#ifdef __bgq__
30#include <spi/include/kernel/location.h>
31#endif
32
33using namespace std;
34
35// NOTE: When static linking, this depends on linker wrapping.
36// Add to your LDFLAGS:
37//   -Wl,--wrap,malloc,--wrap,free,--wrap,realloc,--wrap,calloc,--wrap,memalign /path/to/memlog_s.o -lpthread -ldl
38
39FILE *log_file = 0;
40static pthread_mutex_t log_mutex = PTHREAD_MUTEX_INITIALIZER;
41
42// The malloc hook might use functions that call malloc, and we need to make
43// sure this does not cause an infinite loop.
44static __thread int in_malloc = 0;
45static char self_path[PATH_MAX+1] = { '\0' };
46
47__attribute__((__constructor__))
48static void record_init() {
49  struct utsname u;
50  uname(&u);
51
52  int id = (int) getpid();
53#ifdef __bgq__
54  // If we're really running on a BG/Q compute node, use the job rank instead
55  // of the pid because the node name might not really be globally unique.
56  if (!strcmp(u.sysname, "CNK") && !strcmp(u.machine, "BGQ"))
57    id = (int) Kernel_GetRank();
58#endif
59
60  // If we're running under a common batch system, add the job id to the output
61  // file names (add it as a prefix so that sorting the files will sort by job
62  // first).
63  char *job_id = 0;
64  const char *job_id_vars[] =
65    { "COBALT_JOBID", "PBS_JOBID", "SLURM_JOB_ID", "JOB_ID" };
66  for (int i = 0; i < sizeof(job_id_vars)/sizeof(job_id_vars[0]); ++i) {
67    job_id = getenv(job_id_vars[i]);
68    if (job_id)
69      break;
70  }
71
72  char log_name[PATH_MAX+1];
73  if (job_id)
74    snprintf(log_name, PATH_MAX+1, "%s.%s.%d.memlog", job_id, u.nodename, id);
75  else
76    snprintf(log_name, PATH_MAX+1, "%s.%d.memlog", u.nodename, id);
77  log_file = fopen(log_name, "w");
78  if (!log_file)
79    fprintf(stderr, "fopen failed for '%s': %m\n", log_name);
80
81  const char *link_name = "/proc/self/exe";
82  readlink(link_name, self_path, PATH_MAX);
83}
84
85__attribute__((__destructor__))
86static void record_cleanup() {
87  if (!log_file)
88    return;
89
90  // These functions might call free, but we're shutting down, so don't try to
91  // unwind the stack from here...
92  in_malloc = 1;
93
94  // Avoid any racing by obtaining the lock.
95  if (pthread_mutex_lock(&log_mutex))
96    return;
97
98  (void) fflush(log_file);
99  (void) fclose(log_file);
100}
101
102// dladdr is, relatively, quit slow. For this to work on a large application,
103// we need to cache the lookup results.
104static int dladdr_cached(void * addr, Dl_info *info) {
105  static unordered_map<void *, Dl_info> dladdr_cache;
106
107  auto I = dladdr_cache.find(addr);
108  if (I == dladdr_cache.end()) {
109    int r;
110    if (!(r = dladdr(addr, info)))
111      memset(info, 0, sizeof(Dl_info));
112
113    dladdr_cache.insert(make_pair(addr, *info));
114    return r;
115  }
116
117  memcpy(info, &I->second, sizeof(Dl_info));
118  return 1;
119}
120
121static void print_context(const void *caller, int show_backtrace) {
122  struct rusage usage;
123  if (getrusage(RUSAGE_SELF, &usage)) {
124    fprintf(stderr, "getrusage failed: %m\n");
125    return;
126  }
127
128  fprintf(log_file, "\t%ld.%06ld %ld %ld", usage.ru_utime.tv_sec,
129          usage.ru_utime.tv_usec, usage.ru_maxrss, syscall(SYS_gettid));
130
131  if (!show_backtrace)
132    return;
133
134  void *pcs[1024];
135  int num_pcs = backtrace(pcs, 1024);
136
137  int found_caller = 0;
138  for (int pci = 0; pci < num_pcs; ++pci) {
139    intptr_t pc = (intptr_t) pcs[pci];
140
141    if (!pc)
142      break;
143
144    if (!found_caller) {
145      if (pc != (intptr_t) caller)
146        continue;
147
148      found_caller = 1;
149    }
150
151    intptr_t off, relpc;
152    const char *proc_name;
153    const char *file_name;
154    Dl_info dlinfo;
155    if (dladdr_cached((void *) pc, &dlinfo) && dlinfo.dli_fname &&
156        *dlinfo.dli_fname) {
157      intptr_t saddr = (intptr_t) dlinfo.dli_saddr;
158      if (saddr) {
159#if defined(__powerpc64__) && !defined(__powerpc64le__)
160        // On PPC64 ELFv1, the symbol address points to the function descriptor, not
161        // the actual starting address.
162        saddr = *(intptr_t*) saddr;
163#endif
164
165        off = pc - saddr;
166        relpc = pc - ((intptr_t) dlinfo.dli_fbase);
167      } else {
168        off = 0;
169        relpc = 0;
170      }
171
172      proc_name = dlinfo.dli_sname;
173      if (!proc_name)
174        proc_name = "?";
175
176      file_name = dlinfo.dli_fname;
177    } else {
178      // We don't know these...
179      off = 0;
180      relpc = 0;
181      proc_name = "?";
182
183      // If we can't determine the file, assume it is the base executable
184      // (which does the right thing for statically-linked binaries).
185      file_name = self_path;
186    }
187
188    fprintf(log_file, "\t%s (%s+0x%x) [0x%lx (0x%lx)]", file_name, proc_name, (int) off,
189            (long) pc, (long) relpc);
190  }
191}
192
193static void record_malloc(size_t size, void *ptr, const void *caller) {
194  if (!log_file)
195    return;
196
197  if (pthread_mutex_lock(&log_mutex))
198    return;
199
200  fprintf(log_file, "M: %zd %p", size, ptr);
201  print_context(caller, 1);
202  fprintf(log_file, "\n");
203
204done:
205  pthread_mutex_unlock(&log_mutex);
206}
207
208static void record_free(void *ptr, const void *caller) {
209  if (!log_file)
210    return;
211
212  if (pthread_mutex_lock(&log_mutex))
213    return;
214
215  fprintf(log_file, "F: %p", ptr);
216  print_context(caller, 0);
217  fprintf(log_file, "\n");
218
219done:
220  pthread_mutex_unlock(&log_mutex);
221}
222
223// glibc exports its underlying malloc implementation under the name
224// __libc_malloc so that hooks like this can use it.
225extern "C" {
226extern void *__libc_malloc(size_t size);
227extern void *__libc_realloc(void *ptr, size_t size);
228extern void *__libc_calloc(size_t nmemb, size_t size);
229extern void *__libc_memalign(size_t boundary, size_t size);
230extern void __libc_free(void *ptr);
231
232#ifdef __PIC__
233#define FUNC(x) x
234#else
235#define FUNC(x) __wrap_ ## x
236#endif
237
238void *FUNC(malloc)(size_t size) {
239  const void *caller =
240    __builtin_extract_return_addr(__builtin_return_address(0));
241
242  if (in_malloc)
243    return __libc_malloc(size);
244
245  in_malloc = 1;
246
247  void *ptr = __libc_malloc(size);
248
249  record_malloc(size, ptr, caller);
250
251  in_malloc = 0;
252  return ptr;
253}
254
255void *FUNC(realloc)(void *ptr, size_t size) {
256  const void *caller =
257    __builtin_extract_return_addr(__builtin_return_address(0));
258
259  if (in_malloc)
260    return __libc_realloc(ptr, size);
261
262  in_malloc = 1;
263
264  void *nptr = __libc_realloc(ptr, size);
265
266  if (ptr)
267    record_free(ptr, caller);
268  record_malloc(size, nptr, caller);
269
270  in_malloc = 0;
271
272  return nptr;
273}
274
275void *FUNC(calloc)(size_t nmemb, size_t size) {
276  const void *caller =
277    __builtin_extract_return_addr(__builtin_return_address(0));
278
279  if (in_malloc)
280    return __libc_calloc(nmemb, size);
281
282  in_malloc = 1;
283
284  void *ptr = __libc_calloc(nmemb, size);
285
286  record_malloc(nmemb*size, ptr, caller);
287
288  in_malloc = 0;
289
290  return ptr;
291}
292
293void *FUNC(memalign)(size_t boundary, size_t size) {
294  const void *caller =
295    __builtin_extract_return_addr(__builtin_return_address(0));
296
297  if (in_malloc)
298    return __libc_memalign(boundary, size);
299
300  in_malloc = 1;
301
302  void *ptr = __libc_memalign(boundary, size);
303
304  record_malloc(size, ptr, caller);
305
306  in_malloc = 0;
307
308  return ptr;
309}
310
311void FUNC(free)(void *ptr) {
312  const void *caller =
313    __builtin_extract_return_addr(__builtin_return_address(0));
314
315  if (in_malloc || !ptr)
316    return __libc_free(ptr);
317
318  in_malloc = 1;
319
320  record_free(ptr, caller);
321
322  __libc_free(ptr);
323
324  in_malloc = 0;
325}
326
327} // extern "C"
328
Note: See TracBrowser for help on using the repository browser.