#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <linux/ptrace.h>
#include <linux/types.h>
typedef __u32 u32;
typedef __u64 u64;
#define PERF_MAX_STACK_DEPTH 127
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, u32); __type(value, u64); __uint(max_entries, 10240);
} thread_last_offcpu SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, u32); __type(value, u32); __uint(max_entries, 10240);
} tid_to_tgid SEC(".maps");
struct offcpu_event {
u32 pid; u32 tid; u32 prev_state; u64 offcpu_time_ns; u64 start_time_ns; u64 end_time_ns; u32 user_stack_id; u32 kernel_stack_id; };
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
__uint(key_size, sizeof(u32));
__uint(value_size, sizeof(u32));
} events SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_STACK_TRACE);
__uint(key_size, sizeof(u32));
__uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64));
__uint(max_entries, 1024);
} user_stackmap SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_STACK_TRACE);
__uint(key_size, sizeof(u32));
__uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64));
__uint(max_entries, 1024);
} kernel_stackmap SEC(".maps");
#define MIN_OFFCPU_TIME_NS 1000000ULL
struct sched_switch_args {
u64 pad;
char prev_comm[16];
int prev_pid;
int prev_prio;
long prev_state;
char next_comm[16];
int next_pid;
int next_prio;
};
SEC("tracepoint/sched/sched_switch")
int trace_sched_switch(struct sched_switch_args *ctx) {
u64 now = bpf_ktime_get_ns();
u32 prev_tid = (u32)ctx->prev_pid;
u64 pid_tgid = bpf_get_current_pid_tgid();
u32 prev_tgid = (u32)(pid_tgid >> 32);
if (prev_tgid != 0) {
bpf_map_update_elem(&tid_to_tgid, &prev_tid, &prev_tgid, BPF_ANY);
}
bpf_map_update_elem(&thread_last_offcpu, &prev_tid, &now, BPF_ANY);
u32 next_tid = (u32)ctx->next_pid;
u64 *last_ts = bpf_map_lookup_elem(&thread_last_offcpu, &next_tid);
if (!last_ts) {
return 0;
}
u64 off_cpu_time = now - *last_ts;
bpf_map_delete_elem(&thread_last_offcpu, &next_tid);
if (off_cpu_time <= MIN_OFFCPU_TIME_NS) {
return 0;
}
u32 *tgid_ptr = bpf_map_lookup_elem(&tid_to_tgid, &next_tid);
u32 event_pid = tgid_ptr ? *tgid_ptr : next_tid;
u32 user_stack_id = bpf_get_stackid(ctx, &user_stackmap,
BPF_F_USER_STACK | BPF_F_FAST_STACK_CMP);
u32 kernel_stack_id = bpf_get_stackid(ctx, &kernel_stackmap, BPF_F_FAST_STACK_CMP);
struct offcpu_event event = {
.pid = event_pid,
.tid = next_tid,
.prev_state = (u32)ctx->prev_state,
.offcpu_time_ns = off_cpu_time,
.start_time_ns = *last_ts,
.end_time_ns = now,
.user_stack_id = user_stack_id,
.kernel_stack_id = kernel_stack_id,
};
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU,
&event, sizeof(event));
return 0;
}
char LICENSE[] SEC("license") = "GPL";