#include <scx/common.bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
#define SCX_MAX_PMU_COUNTERS (1)
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
__uint(key_size, sizeof(__u32));
__uint(value_size, sizeof(int));
__uint(max_entries, 4096);
} scx_pmu_map SEC(".maps");
struct scx_pmu_counters {
u64 start[SCX_MAX_PMU_COUNTERS];
u64 agg[SCX_MAX_PMU_COUNTERS];
bool switched;
u32 gen;
};
u64 scx_event_idx[SCX_MAX_PMU_COUNTERS];
u64 scx_pmu_gen = 1;
struct {
__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
__uint(map_flags, BPF_F_NO_PREALLOC);
__type(key, u32);
__type(value, struct scx_pmu_counters);
} scx_pmu_tasks SEC(".maps");
int scx_pmu_event_stop(struct task_struct __arg_trusted *p)
{
struct scx_pmu_counters *cntrs;
struct bpf_perf_event_value value;
int idx;
int ret;
cntrs = bpf_task_storage_get(&scx_pmu_tasks, p, 0, BPF_LOCAL_STORAGE_GET_F_CREATE);
if (!cntrs)
return -ENOENT;
bpf_for(idx, 0, SCX_MAX_PMU_COUNTERS) {
if (scx_event_idx[idx] == 0ULL)
continue;
if (unlikely(cntrs->gen != scx_pmu_gen)) {
cntrs->agg[idx] = 0;
continue;
}
ret = bpf_perf_event_read_value(&scx_pmu_map, BPF_F_CURRENT_CPU, &value, sizeof(value));
if (ret)
return ret;
if (unlikely(!cntrs->switched && value.enabled != value.running)) {
bpf_printk("SWITCHED: %ld vs %ld", value.enabled, value.running);
cntrs->switched = true;
}
cntrs->agg[idx] += value.counter - cntrs->start[idx];
}
cntrs->gen = scx_pmu_gen;
return 0;
}
int scx_pmu_event_start(struct task_struct __arg_trusted *p, bool update)
{
struct bpf_perf_event_value value;
struct scx_pmu_counters *cntrs;
int idx;
int ret;
cntrs = bpf_task_storage_get(&scx_pmu_tasks, p, 0, BPF_LOCAL_STORAGE_GET_F_CREATE);
if (!cntrs)
return -ENOENT;
bpf_for(idx, 0, SCX_MAX_PMU_COUNTERS) {
if (scx_event_idx[idx] == 0ULL)
continue;
if (unlikely(cntrs->gen != scx_pmu_gen))
cntrs->agg[idx] = 0;
ret = bpf_perf_event_read_value(&scx_pmu_map, BPF_F_CURRENT_CPU, &value, sizeof(value));
if (ret)
return ret;
if (update) {
cntrs->agg[idx] += value.counter - cntrs->start[idx];
}
cntrs->start[idx] = value.counter;
}
cntrs->gen = scx_pmu_gen;
return 0;
}
static
int scx_pmu_event_to_idx(u64 event)
{
int i;
bpf_for(i, 0, SCX_MAX_PMU_COUNTERS) {
if (scx_event_idx[i] == event)
break;
}
return i;
}
static
int scx_pmu_find_free_idx(void)
{
int i;
bpf_for(i, 0, SCX_MAX_PMU_COUNTERS) {
if (scx_event_idx[i] == 0ULL)
break;
}
return i;
}
__weak
int scx_pmu_task_init(struct task_struct __arg_trusted *p)
{
struct scx_pmu_counters *cntrs;
cntrs = bpf_task_storage_get(&scx_pmu_tasks, p, 0, BPF_LOCAL_STORAGE_GET_F_CREATE);
if (!cntrs)
return -ENOMEM;
cntrs->gen = 0;
return 0;
}
__weak
int scx_pmu_task_fini(struct task_struct __arg_trusted *p)
{
bpf_task_storage_delete(&scx_pmu_tasks, p);
return 0;
}
__weak
int scx_pmu_install(u64 event)
{
int idx;
idx = scx_pmu_find_free_idx();
if (unlikely(idx >= SCX_MAX_PMU_COUNTERS || idx < 0))
return -ENOSPC;
scx_event_idx[idx] = event;
scx_pmu_gen += 1;
return 0;
}
__weak
int scx_pmu_uninstall(u64 event)
{
int idx;
idx = scx_pmu_event_to_idx(event);
if (unlikely(idx >= SCX_MAX_PMU_COUNTERS || idx < 0))
return -ENOENT;
scx_event_idx[idx] = 0;
scx_pmu_gen += 1;
return 0;
}
__weak
int scx_pmu_read(struct task_struct __arg_trusted *p, u64 event, u64 *value, bool clear)
{
struct scx_pmu_counters *cntrs;
int idx;
idx = scx_pmu_event_to_idx(event);
if (idx == SCX_MAX_PMU_COUNTERS)
return -EINVAL;
cntrs = bpf_task_storage_get(&scx_pmu_tasks, p, 0, 0);
if (!cntrs)
return -ENOENT;
if (unlikely(!value))
return -EINVAL;
if (unlikely(idx < 0 || idx >= SCX_MAX_PMU_COUNTERS))
return -EINVAL;
*value = cntrs->agg[idx];
if (clear)
cntrs->agg[idx] = 0;
return 0;
}
SEC("?tp_btf/sched_switch")
int scx_pmu_switch_tc(u64 *ctx)
{
struct task_struct *prev, *next;
int ret;
prev = (struct task_struct *)ctx[1];
next = (struct task_struct *)ctx[2];
if (!prev->pid)
goto next;
ret = scx_pmu_event_stop(prev);
if (ret)
return ret;
next:
if (!next->pid)
return 0;
return scx_pmu_event_start(next, false);
}
SEC("?fentry/scx_tick")
int scx_pmu_tick_tc(u64 *ctx)
{
struct task_struct *p;
p = bpf_get_current_task_btf();
if (!p)
return 0;
if (!p->pid) {
return 0;
}
scx_pmu_event_start(p, true);
return 0;
}