source: memlog.cpp @ 22f928f

Revision 22f928f, 7.6 KB checked in by Hal Finkel <hfinkel@…>, 9 years ago (diff)

use the job id under batch systems; misc cleanups

  • Property mode set to 100644
RevLine 
[0ec59c5]1#ifndef _GNU_SOURCE
2#define _GNU_SOURCE
3#endif
4
[1bd82e0]5#include <cstdlib>
6#include <cstdio>
7#include <cstring>
8
[f715c76]9// NOTE: This source makes very minimal use of C++11 features. It can still be
10// compiled by g++ 4.4.7 with -std=gnu++0x.
[1bd82e0]11#include <unordered_map>
12#include <utility>
[0ec59c5]13
[1bd82e0]14#include <limits.h>
[430548b]15#include <malloc.h>
[a736d81]16#include <execinfo.h>
[134408c]17#include <sys/syscall.h>
[0a0ef57]18#include <sys/time.h>
19#include <sys/resource.h>
[0ec59c5]20#include <sys/types.h>
21#include <sys/stat.h>
22#include <sys/utsname.h>
23#include <fcntl.h>
24#include <unistd.h>
25
26#include <pthread.h>
27#include <dlfcn.h>
28
[a7b97b9]29#ifdef __bgq__
30#include <spi/include/kernel/location.h>
31#endif
32
[1bd82e0]33using namespace std;
34
[493cb97]35// NOTE: When static linking, this depends on linker wrapping.
36// Add to your LDFLAGS:
37//   -Wl,--wrap,malloc,--wrap,free,--wrap,realloc,--wrap,calloc,--wrap,memalign /path/to/memlog_s.o -lpthread -ldl
38
[22f928f]39FILE *log_file = 0;
[0ec59c5]40static pthread_mutex_t log_mutex = PTHREAD_MUTEX_INITIALIZER;
41
[69850c8]42// The malloc hook might use functions that call malloc, and we need to make
43// sure this does not cause an infinite loop.
44static __thread int in_malloc = 0;
[3105f50]45static char self_path[PATH_MAX+1] = { '\0' };
[69850c8]46
[0ec59c5]47__attribute__((__constructor__))
48static void record_init() {
49  struct utsname u;
50  uname(&u);
51
[a7b97b9]52  int id = (int) getpid();
53#ifdef __bgq__
54  // If we're really running on a BG/Q compute node, use the job rank instead
55  // of the pid because the node name might not really be globally unique.
56  if (!strcmp(u.sysname, "CNK") && !strcmp(u.machine, "BGQ"))
57    id = (int) Kernel_GetRank();
58#endif
59
[22f928f]60  // If we're running under a common batch system, add the job id to the output
61  // file names (add it as a prefix so that sorting the files will sort by job
62  // first).
63  char *job_id = 0;
64  const char *job_id_vars[] =
65    { "COBALT_JOBID", "PBS_JOBID", "SLURM_JOB_ID", "JOB_ID" };
66  for (int i = 0; i < sizeof(job_id_vars)/sizeof(job_id_vars[0]); ++i) {
67    job_id = getenv(job_id_vars[i]);
68    if (job_id)
69      break;
70  }
71
[3105f50]72  char log_name[PATH_MAX+1];
[22f928f]73  if (job_id)
74    snprintf(log_name, PATH_MAX+1, "%s.%s.%d.memlog", job_id, u.nodename, id);
75  else
76    snprintf(log_name, PATH_MAX+1, "%s.%d.memlog", u.nodename, id);
[0ec59c5]77  log_file = fopen(log_name, "w");
[0a0ef57]78  if (!log_file)
79    fprintf(stderr, "fopen failed for '%s': %m\n", log_name);
[3105f50]80
81  const char *link_name = "/proc/self/exe";
82  readlink(link_name, self_path, PATH_MAX);
[0ec59c5]83}
84
85__attribute__((__destructor__))
86static void record_cleanup() {
87  if (!log_file)
88    return;
89
[69850c8]90  // These functions might call free, but we're shutting down, so don't try to
91  // unwind the stack from here...
92  in_malloc = 1;
93
[5a9481e]94  // Avoid any racing by obtaining the lock.
95  if (pthread_mutex_lock(&log_mutex))
96    return;
97
[0ec59c5]98  (void) fflush(log_file);
99  (void) fclose(log_file);
100}
101
[5a9481e]102// dladdr is, relatively, quit slow. For this to work on a large application,
103// we need to cache the lookup results.
[1bd82e0]104static int dladdr_cached(void * addr, Dl_info *info) {
105  static unordered_map<void *, Dl_info> dladdr_cache;
106
107  auto I = dladdr_cache.find(addr);
108  if (I == dladdr_cache.end()) {
109    int r;
110    if (!(r = dladdr(addr, info)))
111      memset(info, 0, sizeof(Dl_info));
112
113    dladdr_cache.insert(make_pair(addr, *info));
114    return r;
115  }
116
117  memcpy(info, &I->second, sizeof(Dl_info));
118  return 1;
[5a9481e]119}
120
[1e5cce6]121static void print_context(const void *caller, int show_backtrace) {
[0a0ef57]122  struct rusage usage;
123  if (getrusage(RUSAGE_SELF, &usage)) {
124    fprintf(stderr, "getrusage failed: %m\n");
[0ec59c5]125    return;
[0a0ef57]126  }
[0ec59c5]127
[134408c]128  fprintf(log_file, "\t%ld.%06ld %ld %ld", usage.ru_utime.tv_sec,
129          usage.ru_utime.tv_usec, usage.ru_maxrss, syscall(SYS_gettid));
[0a0ef57]130
[1e5cce6]131  if (!show_backtrace)
132    return;
133
[a736d81]134  void *pcs[1024];
135  int num_pcs = backtrace(pcs, 1024);
[14e2ab9]136
137  int found_caller = 0;
[a736d81]138  for (int pci = 0; pci < num_pcs; ++pci) {
[14e2ab9]139    intptr_t pc = (intptr_t) pcs[pci];
[69850c8]140
141    if (!pc)
142      break;
143
[14e2ab9]144    if (!found_caller) {
145      if (pc != (intptr_t) caller)
146        continue;
147
148      found_caller = 1;
149    }
150
151    intptr_t off, relpc;
[69850c8]152    const char *proc_name;
153    const char *file_name;
154    Dl_info dlinfo;
[5a9481e]155    if (dladdr_cached((void *) pc, &dlinfo) && dlinfo.dli_fname &&
[69850c8]156        *dlinfo.dli_fname) {
[14e2ab9]157      intptr_t saddr = (intptr_t) dlinfo.dli_saddr;
[69850c8]158      if (saddr) {
159#if defined(__powerpc64__) && !defined(__powerpc64le__)
160        // On PPC64 ELFv1, the symbol address points to the function descriptor, not
161        // the actual starting address.
[14e2ab9]162        saddr = *(intptr_t*) saddr;
[69850c8]163#endif
164
165        off = pc - saddr;
[14e2ab9]166        relpc = pc - ((intptr_t) dlinfo.dli_fbase);
[69850c8]167      } else {
168        off = 0;
169        relpc = 0;
170      }
171
172      proc_name = dlinfo.dli_sname;
173      if (!proc_name)
174        proc_name = "?";
175
176      file_name = dlinfo.dli_fname;
177    } else {
[3105f50]178      // We don't know these...
179      off = 0;
180      relpc = 0;
[69850c8]181      proc_name = "?";
[3105f50]182
183      // If we can't determine the file, assume it is the base executable
184      // (which does the right thing for statically-linked binaries).
185      file_name = self_path;
[69850c8]186    }
187
188    fprintf(log_file, "\t%s (%s+0x%x) [0x%lx (0x%lx)]", file_name, proc_name, (int) off,
189            (long) pc, (long) relpc);
[0ec59c5]190  }
191}
192
[430548b]193static void record_malloc(size_t size, void *ptr, const void *caller) {
[0ec59c5]194  if (!log_file)
195    return;
196
197  if (pthread_mutex_lock(&log_mutex))
198    return;
199
200  fprintf(log_file, "M: %zd %p", size, ptr);
[1e5cce6]201  print_context(caller, 1);
[0ec59c5]202  fprintf(log_file, "\n");
203
204done:
205  pthread_mutex_unlock(&log_mutex);
206}
207
[430548b]208static void record_free(void *ptr, const void *caller) {
[0ec59c5]209  if (!log_file)
210    return;
211
212  if (pthread_mutex_lock(&log_mutex))
213    return;
214
215  fprintf(log_file, "F: %p", ptr);
[1e5cce6]216  print_context(caller, 0);
[0ec59c5]217  fprintf(log_file, "\n");
218
219done:
220  pthread_mutex_unlock(&log_mutex);
221}
222
223// glibc exports its underlying malloc implementation under the name
224// __libc_malloc so that hooks like this can use it.
[1bd82e0]225extern "C" {
[0ec59c5]226extern void *__libc_malloc(size_t size);
227extern void *__libc_realloc(void *ptr, size_t size);
228extern void *__libc_calloc(size_t nmemb, size_t size);
229extern void *__libc_memalign(size_t boundary, size_t size);
230extern void __libc_free(void *ptr);
231
[430548b]232#ifdef __PIC__
233#define FUNC(x) x
234#else
235#define FUNC(x) __wrap_ ## x
236#endif
237
238void *FUNC(malloc)(size_t size) {
[5a9481e]239  const void *caller =
240    __builtin_extract_return_addr(__builtin_return_address(0));
[430548b]241
[0ec59c5]242  if (in_malloc)
243    return __libc_malloc(size);
244
245  in_malloc = 1;
246
247  void *ptr = __libc_malloc(size);
248
[14e2ab9]249  record_malloc(size, ptr, caller);
[0ec59c5]250
251  in_malloc = 0;
252  return ptr;
253}
254
[430548b]255void *FUNC(realloc)(void *ptr, size_t size) {
[5a9481e]256  const void *caller =
257    __builtin_extract_return_addr(__builtin_return_address(0));
[430548b]258
[0ec59c5]259  if (in_malloc)
260    return __libc_realloc(ptr, size);
261
262  in_malloc = 1;
263
264  void *nptr = __libc_realloc(ptr, size);
265
[14e2ab9]266  if (ptr)
267    record_free(ptr, caller);
268  record_malloc(size, nptr, caller);
[0ec59c5]269
270  in_malloc = 0;
271
272  return nptr;
273}
274
[430548b]275void *FUNC(calloc)(size_t nmemb, size_t size) {
[5a9481e]276  const void *caller =
277    __builtin_extract_return_addr(__builtin_return_address(0));
[430548b]278
[0ec59c5]279  if (in_malloc)
280    return __libc_calloc(nmemb, size);
281
282  in_malloc = 1;
283
284  void *ptr = __libc_calloc(nmemb, size);
285
[14e2ab9]286  record_malloc(nmemb*size, ptr, caller);
[0ec59c5]287
288  in_malloc = 0;
289
290  return ptr;
291}
292
[430548b]293void *FUNC(memalign)(size_t boundary, size_t size) {
[5a9481e]294  const void *caller =
295    __builtin_extract_return_addr(__builtin_return_address(0));
[430548b]296
[0ec59c5]297  if (in_malloc)
298    return __libc_memalign(boundary, size);
299
300  in_malloc = 1;
301
302  void *ptr = __libc_memalign(boundary, size);
303
[14e2ab9]304  record_malloc(size, ptr, caller);
[0ec59c5]305
306  in_malloc = 0;
307
308  return ptr;
309}
310
[430548b]311void FUNC(free)(void *ptr) {
[5a9481e]312  const void *caller =
313    __builtin_extract_return_addr(__builtin_return_address(0));
[430548b]314
[14e2ab9]315  if (in_malloc || !ptr)
[0ec59c5]316    return __libc_free(ptr);
317
318  in_malloc = 1;
319
[14e2ab9]320  record_free(ptr, caller);
[0ec59c5]321
322  __libc_free(ptr);
323
324  in_malloc = 0;
325}
326
[1bd82e0]327} // extern "C"
328
Note: See TracBrowser for help on using the repository browser.