This file is indexed.

/usr/share/systemtap/runtime/linux/perf.c is in systemtap-common 2.6-0.2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
/* -*- linux-c -*- 
 * Perf Functions
 * Copyright (C) 2006-2014 Red Hat Inc.
 *
 * This file is part of systemtap, and is free software.  You can
 * redistribute it and/or modify it under the terms of the GNU General
 * Public License (GPL); either version 2, or (at your option) any
 * later version.
 */

#ifndef _PERF_C_
#define _PERF_C_

#include <linux/perf_event.h>
#include <linux/workqueue.h>

#include "perf.h"

#ifndef INIT_WORK_ONSTACK
#define INIT_WORK_ONSTACK(_work, _func) INIT_WORK((_work), (_func))
#define destroy_work_on_stack(_work) do { (void)(_work); } while (0)
#endif

/** @file perf.c
 * @brief Implements performance monitoring hardware support
 */

/** Initialize performance sampling
 * Call this during probe initialization to set up performance event sampling
 * for all online cpus.  Returns non-zero on error.
 *
 * @param stp Handle for the event to be registered.
 */
static long _stp_perf_init (struct stap_perf_probe *stp, struct task_struct* task)
{
	int cpu;

	if (!stp->system_wide) {
	  if (task == 0) /* need to setup later when we know the task */
	    return 0;
	  else  {
	    if (stp->e.t.per_thread_event != 0) /* already setup */
	      return 0;
	    stp->e.t.per_thread_event = perf_event_create_kernel_counter(&stp->attr,
								     -1, 
#if defined(STAPCONF_PERF_STRUCTPID) || defined (STAPCONF_PERF_COUNTER_CONTEXT)
								     task,
#else
								     task->pid,
#endif
								     stp->callback
#ifdef STAPCONF_PERF_COUNTER_CONTEXT
								     , NULL
#endif
								     );
	    if (IS_ERR(stp->e.t.per_thread_event)) {
	      long rc = PTR_ERR(stp->e.t.per_thread_event);
	      stp->e.t.per_thread_event = NULL;

	      /*
	       * PPC returns ENXIO for HW counters until 2.6.37
	       * (behavior changed with commit b0a873e).
	       */
	      if (rc == -EINVAL || rc == -ENOSYS || rc == -ENOENT
		  || rc == -EOPNOTSUPP || rc == -ENXIO) {
	        _stp_warn("perf probe '%s' is not supported by this kernel (%ld).",
#ifdef STP_NEED_PROBE_NAME
			  stp->probe->pn,
#else
			  stp->probe->pp,
#endif
			  rc);
		/* Lie and return 0. This way the more generic
		 * task_finder warning won't be printed. */
		rc = 0;
	      }
	      return rc;
	    }
	  }
	}
	else {
	  /* allocate space for the event descriptor for each cpu */
	  stp->e.events = _stp_alloc_percpu (sizeof(struct perf_event*));
	  if (stp->e.events == NULL) {
	    return -ENOMEM;
	  }

	  /* initialize event on each processor */
	  for_each_possible_cpu(cpu) {
	    struct perf_event **event = per_cpu_ptr (stp->e.events, cpu);
	    if (cpu_is_offline(cpu)) {
	      *event = NULL;
	      continue;
	    }
	    *event = perf_event_create_kernel_counter(&stp->attr,
						      cpu,
#if defined(STAPCONF_PERF_STRUCTPID) || defined (STAPCONF_PERF_COUNTER_CONTEXT)
						      NULL,
#else
						      -1,
#endif
						      stp->callback
#ifdef STAPCONF_PERF_COUNTER_CONTEXT
						      , NULL
#endif
						      );

	    if (IS_ERR(*event)) {
	      long rc = PTR_ERR(*event);
	      *event = NULL;
	      _stp_perf_del(stp);
	      return rc;
	    }
	  }
	} /* (stp->system_wide) */
	return 0;
}

/** Delete performance event.
 * Call this to shutdown one performance event sampling
 *
 * @param stp Handle for the event to be unregistered.
 */
static void _stp_perf_del (struct stap_perf_probe *stp)
{
  int cpu;
  if (! stp || !stp->e.events)
    return;

  /* shut down performance event sampling */
  if (stp->system_wide) {
    for_each_possible_cpu(cpu) {
      struct perf_event **event = per_cpu_ptr (stp->e.events, cpu);
      if (*event) {
	perf_event_release_kernel(*event);
      }
    }
    _stp_free_percpu (stp->e.events);
    stp->e.events = NULL;
  }
  else {
    if (stp->e.t.per_thread_event) {
      perf_event_release_kernel(stp->e.t.per_thread_event);
    }
    stp->e.t.per_thread_event = NULL;
  }
}

/** Delete many performance events in reverse order.
 * Call this to shutdown all performance event sampling
 *
 * @param probes A pointer array for the events to be unregistered.
 * @param n The number of events in the array.
 */
static void _stp_perf_del_n (struct stap_perf_probe *probes, size_t n)
{
  while (n--)
    _stp_perf_del(&probes[n]);
}

struct _stp_perf_work {
  struct work_struct work;
  struct stap_perf_probe *probes;
  size_t nprobes;
  const char* probe_point;
  int rc;
};

/** Initialize many performance events from a workqueue
 * Even though we're using the kernel interface, perf checks CAP_SYS_ADMIN,
 * which our mere @stapdev user may not have.  By running via a workqueue,
 * we'll be in an events/X kernel thread with sufficient privileges.
 *
 * @param work The _stp_perf_work encapsulating _stp_perf_init_n parameters.
 */
static void _stp_perf_init_work (struct work_struct *work)
{
  size_t i;
  struct _stp_perf_work *pwork =
    container_of(work, struct _stp_perf_work, work);

  for (i = 0; i < pwork->nprobes; ++i) {
    struct stap_perf_probe* stp = &pwork->probes[i];

    if (stp->system_wide)
      pwork->rc = _stp_perf_init(stp, NULL);
    else if (stp->task_finder)
#ifdef STP_PERF_USE_TASK_FINDER
      pwork->rc = stap_register_task_finder_target(&stp->e.t.tgt);
#else
      pwork->rc = EINVAL;
#endif

    if (pwork->rc) {
      pwork->probe_point = stp->probe->pp;
      _stp_perf_del_n(pwork->probes, i);
      break;
    }
  }
}

/** Initialize many performance events
 * Call this to start all performance event sampling
 *
 * @param probes A pointer array for the events to be registered.
 * @param n The number of events in the array.
 * @param ppfail A pointer to return the probe_point on failure.
 */
static int _stp_perf_init_n (struct stap_perf_probe *probes, size_t n,
			     const char **ppfail)
{
  struct _stp_perf_work pwork = { .probes = probes, .nprobes = n };
  INIT_WORK_ONSTACK(&pwork.work, _stp_perf_init_work);
  schedule_work(&pwork.work);
  flush_work(&pwork.work);
  if (pwork.rc)
    *ppfail = pwork.probe_point;
  destroy_work_on_stack(&pwork.work);
  return pwork.rc;
}


/*
The first call to _stp_perf_init, via systemtap_module_init at runtime, is for
setting up aggregate counters.  Per thread counters need to be setup when the
thread is known.  This is done by calling _stp_perf_init later when the thread
is known.  A per thread perf counter is defined by a counter("var") suffix on
the perf probe.  It is defined by perf_builder.  This counter is read on demand 
via the "@perf("var")" builtin which is treated as an expression right hand side
which reads the perf counter associated with the previously defined perf
counter.  It is expanded by dwarf_var_expanding_visitor
*/

static int _stp_perf_read_init (unsigned i, struct task_struct* task)
{
  /* Choose the stap_perf_probes entry */
  struct stap_perf_probe* stp = & stap_perf_probes[i];

  return _stp_perf_init (stp, task);
}


long _stp_perf_read (int ncpu, unsigned i)
{
  /* Choose the stap_perf_probes entry */
  struct stap_perf_probe* stp;
  u64 enabled, running;

  if (i > sizeof(stap_perf_probes)/sizeof(struct stap_perf_probe))
    {
      _stp_error ("_stp_perf_read\n");
      return 0;
    }
  stp = & stap_perf_probes[i]; 
    
  if (stp == NULL || stp->e.t.per_thread_event == NULL)
    {
      _stp_error ("_stp_perf_read\n");
      return 0;
    }

  might_sleep();
  return perf_event_read_value (stp->e.t.per_thread_event, &enabled, &running);

}


#endif /* _PERF_C_ */