#include <scx/common.bpf.h>
#include "intf.h"
#include "lavd.bpf.h"
#include <errno.h>
#include <stdbool.h>
#include <bpf/bpf_core_read.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
static u64 cur_logical_clk = LAVD_DL_COMPETE_WINDOW;
static u64 cur_svc_time;
const volatile u64 slice_min_ns = LAVD_SLICE_MIN_NS_DFL;
const volatile u64 slice_max_ns = LAVD_SLICE_MAX_NS_DFL;
#include "util.bpf.c"
#include "power.bpf.c"
#include "introspec.bpf.c"
#include "preempt.bpf.c"
#include "lock.bpf.c"
#include "idle.bpf.c"
#include "balance.bpf.c"
#include "sys_stat.bpf.c"
#include "lat_cri.bpf.c"
static void advance_cur_logical_clk(struct task_struct *p)
{
u64 vlc, clc, ret_clc;
u64 nr_queued, delta, new_clk;
int i;
vlc = READ_ONCE(p->scx.dsq_vtime);
clc = READ_ONCE(cur_logical_clk);
bpf_for(i, 0, LAVD_MAX_RETRY) {
if (vlc <= clc)
return;
nr_queued = max(sys_stat.nr_queued_task, 1);
delta = (vlc - clc) / nr_queued;
new_clk = clc + delta;
ret_clc = __sync_val_compare_and_swap(&cur_logical_clk, clc, new_clk);
if (ret_clc == clc)
return;
clc = ret_clc;
}
}
static u64 calc_time_slice(struct task_ctx *taskc, struct cpu_ctx *cpuc)
{
if (!taskc || !cpuc)
return LAVD_SLICE_MAX_NS_DFL;
if (!no_slice_boost && !cpuc->nr_pinned_tasks &&
(taskc->avg_runtime >= sys_stat.slice)) {
if (can_boost_slice()) {
u64 s = taskc->avg_runtime + LAVD_SLICE_BOOST_BONUS;
taskc->slice = clamp(s, slice_min_ns,
LAVD_SLICE_BOOST_MAX);
set_task_flag(taskc, LAVD_FLAG_SLICE_BOOST);
return taskc->slice;
}
if (taskc->lat_cri > sys_stat.avg_lat_cri) {
u64 b = (sys_stat.slice * taskc->lat_cri) /
(sys_stat.avg_lat_cri + 1);
u64 s = sys_stat.slice + b;
taskc->slice = clamp(s, slice_min_ns,
min(taskc->avg_runtime,
sys_stat.slice * 2));
set_task_flag(taskc, LAVD_FLAG_SLICE_BOOST);
return taskc->slice;
}
}
taskc->slice = sys_stat.slice;
reset_task_flag(taskc, LAVD_FLAG_SLICE_BOOST);
return taskc->slice;
}
static void update_stat_for_running(struct task_struct *p,
struct task_ctx *taskc,
struct cpu_ctx *cpuc, u64 now)
{
u64 wait_period, interval;
struct cpu_ctx *prev_cpuc;
if (have_scheduled(taskc)) {
wait_period = time_delta(now, taskc->last_quiescent_clk);
interval = taskc->avg_runtime + wait_period;
if (interval > 0)
taskc->run_freq = calc_avg_freq(taskc->run_freq, interval);
}
if (is_monitored) {
taskc->resched_interval = time_delta(now,
taskc->last_running_clk);
}
taskc->prev_cpu_id = taskc->cpu_id;
taskc->cpu_id = cpuc->cpu_id;
reset_task_flag(taskc, LAVD_FLAG_IS_WAKEUP);
reset_task_flag(taskc, LAVD_FLAG_IS_SYNC_WAKEUP);
taskc->last_running_clk = now;
taskc->last_measured_clk = now;
reset_lock_futex_boost(taskc, cpuc);
if (cpuc->max_lat_cri < taskc->lat_cri)
cpuc->max_lat_cri = taskc->lat_cri;
cpuc->sum_lat_cri += taskc->lat_cri;
cpuc->nr_sched++;
if (have_little_core) {
if (cpuc->max_perf_cri < taskc->perf_cri)
cpuc->max_perf_cri = taskc->perf_cri;
if (cpuc->min_perf_cri > taskc->perf_cri)
cpuc->min_perf_cri = taskc->perf_cri;
cpuc->sum_perf_cri += taskc->perf_cri;
}
cpuc->flags = taskc->flags;
cpuc->lat_cri = taskc->lat_cri;
cpuc->running_clk = now;
cpuc->est_stopping_clk = get_est_stopping_clk(taskc, now);
if (is_lat_cri(taskc))
cpuc->nr_lat_cri++;
if (is_perf_cri(taskc))
cpuc->nr_perf_cri++;
prev_cpuc = get_cpu_ctx_id(taskc->prev_cpu_id);
if (prev_cpuc && prev_cpuc->cpdom_id != cpuc->cpdom_id)
cpuc->nr_x_migration++;
reset_suspended_duration(cpuc);
}
static void account_task_runtime(struct task_struct *p,
struct task_ctx *taskc,
struct cpu_ctx *cpuc,
u64 now)
{
u64 sus_dur, runtime, svc_time, sc_time;
sus_dur = get_suspended_duration_and_reset(cpuc);
runtime = time_delta(now, taskc->last_measured_clk + sus_dur);
svc_time = runtime / p->scx.weight;
sc_time = scale_cap_freq(runtime, cpuc->cpu_id);
WRITE_ONCE(cpuc->tot_svc_time, cpuc->tot_svc_time + svc_time);
WRITE_ONCE(cpuc->tot_sc_time, cpuc->tot_sc_time + sc_time);
taskc->acc_runtime += runtime;
taskc->svc_time += svc_time;
taskc->last_measured_clk = now;
}
static void update_stat_for_stopping(struct task_struct *p,
struct task_ctx *taskc,
struct cpu_ctx *cpuc)
{
u64 now = scx_bpf_now();
account_task_runtime(p, taskc, cpuc, now);
taskc->avg_runtime = calc_avg(taskc->avg_runtime, taskc->acc_runtime);
taskc->last_stopping_clk = now;
if (is_monitored) {
taskc->last_slice_used = time_delta(now, taskc->last_running_clk);
}
taskc->lat_cri_waker = 0;
if (READ_ONCE(cur_svc_time) < taskc->svc_time)
WRITE_ONCE(cur_svc_time, taskc->svc_time);
reset_lock_futex_boost(taskc, cpuc);
}
static void update_stat_for_refill(struct task_struct *p,
struct task_ctx *taskc,
struct cpu_ctx *cpuc)
{
u64 now = scx_bpf_now();
account_task_runtime(p, taskc, cpuc, now);
taskc->avg_runtime = calc_avg(taskc->avg_runtime, taskc->acc_runtime);
}
s32 BPF_STRUCT_OPS(lavd_select_cpu, struct task_struct *p, s32 prev_cpu,
u64 wake_flags)
{
bool found_idle = false;
struct task_ctx *taskc = get_task_ctx(p);
struct cpu_ctx *cpuc_cur = get_cpu_ctx();
struct cpu_ctx *cpuc;
u64 dsq_id;
s32 cpu_id;
struct pick_ctx ictx = {
.p = p,
.taskc = taskc,
.prev_cpu = prev_cpu,
.cpuc_cur = cpuc_cur,
.wake_flags = wake_flags,
};
if (!taskc || !cpuc_cur)
return prev_cpu;
if (wake_flags & SCX_WAKE_SYNC)
set_task_flag(taskc, LAVD_FLAG_IS_SYNC_WAKEUP);
else
reset_task_flag(taskc, LAVD_FLAG_IS_SYNC_WAKEUP);
cpu_id = pick_idle_cpu(&ictx, &found_idle);
cpu_id = cpu_id >= 0 ? cpu_id : prev_cpu;
taskc->suggested_cpu_id = cpu_id;
if (found_idle) {
set_task_flag(taskc, LAVD_FLAG_IDLE_CPU_PICKED);
cpuc = get_cpu_ctx_id(cpu_id);
if (!cpuc) {
scx_bpf_error("Failed to lookup cpu_ctx: %d", cpu_id);
goto out;
}
dsq_id = cpdom_to_dsq(cpuc->cpdom_id);
if (!scx_bpf_dsq_nr_queued(dsq_id)) {
p->scx.dsq_vtime = calc_when_to_run(p, taskc);
p->scx.slice = LAVD_SLICE_MAX_NS_DFL;
scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, p->scx.slice, 0);
goto out;
}
} else {
reset_task_flag(taskc, LAVD_FLAG_IDLE_CPU_PICKED);
}
out:
return cpu_id;
}
static bool can_direct_dispatch(u64 dsq_id, s32 cpu, bool is_idle)
{
return is_idle && cpu >= 0 && !scx_bpf_dsq_nr_queued(dsq_id);
}
void BPF_STRUCT_OPS(lavd_enqueue, struct task_struct *p, u64 enq_flags)
{
struct task_ctx *taskc;
struct cpu_ctx *cpuc, *cpuc_cur;
s32 task_cpu, cpu = -ENOENT;
u64 cpdom_id;
bool is_idle = false;
taskc = get_task_ctx(p);
cpuc_cur = get_cpu_ctx();
if (!taskc || !cpuc_cur) {
scx_bpf_error("Failed to lookup cpu_ctx %d", cpu);
return;
}
if (!(enq_flags & SCX_ENQ_REENQ)) {
if (enq_flags & SCX_ENQ_WAKEUP)
set_task_flag(taskc, LAVD_FLAG_IS_WAKEUP);
else
reset_task_flag(taskc, LAVD_FLAG_IS_WAKEUP);
p->scx.dsq_vtime = calc_when_to_run(p, taskc);
}
p->scx.slice = LAVD_SLICE_MAX_NS_DFL;
task_cpu = scx_bpf_task_cpu(p);
if (!__COMPAT_is_enq_cpu_selected(enq_flags)) {
cpdom_id = pick_proper_dsq(p, taskc, task_cpu, &cpu,
&is_idle, cpuc_cur);
taskc->suggested_cpu_id = cpu;
cpuc = get_cpu_ctx_id(cpu);
if (!cpuc) {
scx_bpf_error("Failed to lookup cpu_ctx %d", cpu);
return;
}
} else {
cpu = scx_bpf_task_cpu(p);
cpuc = get_cpu_ctx_id(cpu);
if (!cpuc) {
scx_bpf_error("Failed to lookup cpu_ctx %d", cpu);
return;
}
cpdom_id = cpuc->cpdom_id;
is_idle = test_task_flag(taskc, LAVD_FLAG_IDLE_CPU_PICKED);
reset_task_flag(taskc, LAVD_FLAG_IDLE_CPU_PICKED);
}
if (is_pinned(p)) {
__sync_fetch_and_add(&cpuc->nr_pinned_tasks, 1);
}
if (can_direct_dispatch(cpu_to_dsq(cpu), cpu, is_idle)) {
scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL_ON | cpu, p->scx.slice,
enq_flags);
} else if (per_cpu_dsq) {
scx_bpf_dsq_insert_vtime(p, cpu_to_dsq(cpu), p->scx.slice,
p->scx.dsq_vtime, enq_flags);
} else {
scx_bpf_dsq_insert_vtime(p, cpdom_to_dsq(cpdom_id), p->scx.slice,
p->scx.dsq_vtime, enq_flags);
}
if (is_idle) {
scx_bpf_kick_cpu(cpu, SCX_KICK_IDLE);
return;
}
if (!no_preemption)
try_find_and_kick_victim_cpu(p, taskc, cpu, cpdom_to_dsq(cpdom_id));
}
void BPF_STRUCT_OPS(lavd_dispatch, s32 cpu, struct task_struct *prev)
{
struct cpu_ctx *cpuc;
struct task_ctx *taskc_prev = NULL;
struct bpf_cpumask *active, *ovrflw;
struct task_struct *p;
u32 dsq_type;
s32 new_cpu;
bool try_consume;
u64 dsq_ids[LAVD_DSQ_NR_TYPES];
int dsq_start = LAVD_DSQ_TYPE_CPDOM;
cpuc = get_cpu_ctx_id(cpu);
if (!cpuc) {
scx_bpf_error("Failed to lookup cpu_ctx %d", cpu);
return;
}
dsq_ids[LAVD_DSQ_TYPE_CPU] = cpu_to_dsq(cpu);
dsq_ids[LAVD_DSQ_TYPE_CPDOM] = cpdom_to_dsq(cpuc->cpdom_id);
if (prev && (prev->scx.flags & SCX_TASK_QUEUED) &&
is_lock_holder_running(cpuc))
goto consume_prev;
if (use_full_cpus())
goto consume_out;
if (per_cpu_dsq && scx_bpf_dsq_nr_queued(dsq_ids[LAVD_DSQ_TYPE_CPU]))
goto consume_out;
bpf_rcu_read_lock();
active = active_cpumask;
ovrflw = ovrflw_cpumask;
if (!active || !ovrflw) {
scx_bpf_error("Failed to prepare cpumasks.");
try_consume = false;
goto unlock_out;
}
if (bpf_cpumask_test_cpu(cpu, cast_mask(active)) ||
bpf_cpumask_test_cpu(cpu, cast_mask(ovrflw))) {
bpf_rcu_read_unlock();
goto consume_out;
}
if (prev) {
if (is_pinned(prev)) {
bpf_cpumask_set_cpu(cpu, ovrflw);
bpf_rcu_read_unlock();
goto consume_out;
} else if (is_migration_disabled(prev)) {
bpf_rcu_read_unlock();
goto consume_out;
}
taskc_prev = get_task_ctx(prev);
if (taskc_prev &&
test_task_flag(taskc_prev, LAVD_FLAG_IS_AFFINITIZED) &&
bpf_cpumask_test_cpu(cpu, prev->cpus_ptr) &&
!bpf_cpumask_intersects(cast_mask(active), prev->cpus_ptr) &&
!bpf_cpumask_intersects(cast_mask(ovrflw), prev->cpus_ptr)) {
bpf_cpumask_set_cpu(cpu, ovrflw);
bpf_rcu_read_unlock();
goto consume_out;
}
}
if (per_cpu_dsq)
dsq_start = LAVD_DSQ_TYPE_CPU;
try_consume = false;
bpf_for(dsq_type, dsq_start, LAVD_DSQ_NR_TYPES) {
u64 dsq_id = dsq_ids[dsq_type];
bpf_for_each(scx_dsq, p, dsq_id, 0) {
struct task_ctx *taskc;
p = bpf_task_from_pid(p->pid);
if (!p)
break;
if (is_pinned(p)) {
new_cpu = scx_bpf_task_cpu(p);
if (new_cpu == cpu) {
bpf_cpumask_set_cpu(new_cpu, ovrflw);
bpf_task_release(p);
try_consume = true;
break;
}
if (!bpf_cpumask_test_and_set_cpu(new_cpu, ovrflw))
scx_bpf_kick_cpu(new_cpu, SCX_KICK_IDLE);
bpf_task_release(p);
continue;
} else if (is_migration_disabled(p)) {
new_cpu = scx_bpf_task_cpu(p);
if (new_cpu == cpu) {
bpf_task_release(p);
try_consume = true;
break;
}
bpf_task_release(p);
continue;
}
taskc = get_task_ctx(p);
if(taskc &&
(!test_task_flag(taskc, LAVD_FLAG_IS_AFFINITIZED) ||
bpf_cpumask_intersects(cast_mask(active), p->cpus_ptr) ||
bpf_cpumask_intersects(cast_mask(ovrflw), p->cpus_ptr))) {
bpf_task_release(p);
continue;
}
new_cpu = find_cpu_in(p->cpus_ptr, cpuc);
if (new_cpu >= 0) {
if (new_cpu == cpu) {
bpf_cpumask_set_cpu(new_cpu, ovrflw);
bpf_task_release(p);
try_consume = true;
break;
}
else if (!bpf_cpumask_test_and_set_cpu(new_cpu, ovrflw))
scx_bpf_kick_cpu(new_cpu, SCX_KICK_IDLE);
}
bpf_task_release(p);
}
if (try_consume)
break;
}
unlock_out:
bpf_rcu_read_unlock();
if (!try_consume)
return;
consume_out:
if (consume_task(dsq_ids[LAVD_DSQ_TYPE_CPU], dsq_ids[LAVD_DSQ_TYPE_CPDOM]))
return;
if (prev && prev->scx.flags & SCX_TASK_QUEUED) {
consume_prev:
taskc_prev = taskc_prev ?: get_task_ctx(prev);
if (taskc_prev) {
update_stat_for_refill(prev, taskc_prev, cpuc);
prev->scx.slice = calc_time_slice(taskc_prev, cpuc);
if (is_lock_holder_running(cpuc))
reset_lock_futex_boost(taskc_prev, cpuc);
cpuc->flags = taskc_prev->flags;
}
}
}
void BPF_STRUCT_OPS(lavd_runnable, struct task_struct *p, u64 enq_flags)
{
struct task_struct *waker;
struct task_ctx *p_taskc, *waker_taskc;
u64 now, interval;
p_taskc = get_task_ctx(p);
if (!p_taskc) {
scx_bpf_error("Failed to lookup task_ctx for task %d", p->pid);
return;
}
p_taskc->acc_runtime = 0;
if (!(enq_flags & SCX_ENQ_WAKEUP))
return;
if (enq_flags & (SCX_ENQ_PREEMPT | SCX_ENQ_REENQ | SCX_ENQ_LAST))
return;
waker = bpf_get_current_task_btf();
if ((p->real_parent != waker->real_parent))
return;
if (is_kernel_task(p) != is_kernel_task(waker))
return;
waker_taskc = get_task_ctx(waker);
if (!waker_taskc) {
return;
}
now = scx_bpf_now();
interval = time_delta(now, READ_ONCE(waker_taskc->last_runnable_clk));
if (interval >= LAVD_LC_WAKE_INTERVAL_MIN) {
WRITE_ONCE(waker_taskc->wake_freq,
calc_avg_freq(waker_taskc->wake_freq, interval));
WRITE_ONCE(waker_taskc->last_runnable_clk, now);
}
p_taskc->lat_cri_waker = waker_taskc->lat_cri;
if (is_monitored) {
p_taskc->waker_pid = waker->pid;
__builtin_memcpy_inline(p_taskc->waker_comm, waker->comm,
TASK_COMM_LEN);
}
}
void BPF_STRUCT_OPS(lavd_running, struct task_struct *p)
{
struct cpu_ctx *cpuc;
struct task_ctx *taskc;
u64 now = scx_bpf_now();
cpuc = get_cpu_ctx_task(p);
taskc = get_task_ctx(p);
if (!cpuc || !taskc) {
scx_bpf_error("Failed to lookup context for task %d", p->pid);
return;
}
if (is_pinned(p))
__sync_fetch_and_sub(&cpuc->nr_pinned_tasks, 1);
if (p->scx.slice == SCX_SLICE_DFL)
p->scx.dsq_vtime = READ_ONCE(cur_logical_clk);
p->scx.slice = calc_time_slice(taskc, cpuc);
advance_cur_logical_clk(p);
update_stat_for_running(p, taskc, cpuc, now);
update_cpuperf_target(cpuc);
try_proc_introspec_cmd(p, taskc);
}
void BPF_STRUCT_OPS(lavd_tick, struct task_struct *p)
{
struct cpu_ctx *cpuc;
struct task_ctx *taskc;
u64 now;
cpuc = get_cpu_ctx_task(p);
taskc = get_task_ctx(p);
if (!cpuc || !taskc) {
scx_bpf_error("Failed to lookup context for task %d", p->pid);
return;
}
now = scx_bpf_now();
account_task_runtime(p, taskc, cpuc, now);
if (cpuc->nr_pinned_tasks &&
test_cpu_flag(cpuc, LAVD_FLAG_SLICE_BOOST)) {
shrink_boosted_slice_at_tick(p, cpuc, now);
}
}
void BPF_STRUCT_OPS(lavd_stopping, struct task_struct *p, bool runnable)
{
struct cpu_ctx *cpuc;
struct task_ctx *taskc;
cpuc = get_cpu_ctx_task(p);
taskc = get_task_ctx(p);
if (!cpuc || !taskc) {
scx_bpf_error("Failed to lookup context for task %d", p->pid);
return;
}
update_stat_for_stopping(p, taskc, cpuc);
}
void BPF_STRUCT_OPS(lavd_quiescent, struct task_struct *p, u64 deq_flags)
{
struct cpu_ctx *cpuc;
struct task_ctx *taskc;
u64 now, interval;
cpuc = get_cpu_ctx_task(p);
taskc = get_task_ctx(p);
if (!cpuc || !taskc) {
scx_bpf_error("Failed to lookup context for task %d", p->pid);
return;
}
cpuc->flags = 0;
if (!(deq_flags & SCX_DEQ_SLEEP))
return;
now = scx_bpf_now();
interval = time_delta(now, taskc->last_quiescent_clk);
if (interval > 0) {
taskc->wait_freq = calc_avg_freq(taskc->wait_freq, interval);
taskc->last_quiescent_clk = now;
}
}
static void cpu_ctx_init_online(struct cpu_ctx *cpuc, u32 cpu_id, u64 now)
{
struct bpf_cpumask *cd_cpumask;
bpf_rcu_read_lock();
cd_cpumask = MEMBER_VPTR(cpdom_cpumask, [cpuc->cpdom_id]);
if (!cd_cpumask)
goto unlock_out;
bpf_cpumask_set_cpu(cpu_id, cd_cpumask);
unlock_out:
bpf_rcu_read_unlock();
cpuc->flags = 0;
cpuc->idle_start_clk = 0;
cpuc->lat_cri = 0;
cpuc->running_clk = 0;
cpuc->est_stopping_clk = SCX_SLICE_INF;
WRITE_ONCE(cpuc->online_clk, now);
barrier();
cpuc->is_online = true;
}
static void cpu_ctx_init_offline(struct cpu_ctx *cpuc, u32 cpu_id, u64 now)
{
struct bpf_cpumask *cd_cpumask;
bpf_rcu_read_lock();
cd_cpumask = MEMBER_VPTR(cpdom_cpumask, [cpuc->cpdom_id]);
if (!cd_cpumask)
goto unlock_out;
bpf_cpumask_clear_cpu(cpu_id, cd_cpumask);
unlock_out:
bpf_rcu_read_unlock();
cpuc->flags = 0;
cpuc->idle_start_clk = 0;
WRITE_ONCE(cpuc->offline_clk, now);
cpuc->is_online = false;
barrier();
cpuc->lat_cri = 0;
cpuc->running_clk = 0;
cpuc->est_stopping_clk = SCX_SLICE_INF;
}
void BPF_STRUCT_OPS(lavd_cpu_online, s32 cpu)
{
u64 now = scx_bpf_now();
struct cpu_ctx *cpuc;
cpuc = get_cpu_ctx_id(cpu);
if (!cpuc) {
scx_bpf_error("Failed to lookup cpu_ctx %d", cpu);
return;
}
cpu_ctx_init_online(cpuc, cpu, now);
__sync_fetch_and_add(&nr_cpus_onln, 1);
__sync_fetch_and_add(&total_capacity, cpuc->capacity);
update_autopilot_high_cap();
update_sys_stat();
}
void BPF_STRUCT_OPS(lavd_cpu_offline, s32 cpu)
{
u64 now = scx_bpf_now();
struct cpu_ctx *cpuc;
cpuc = get_cpu_ctx_id(cpu);
if (!cpuc) {
scx_bpf_error("Failed to lookup cpu_ctx %d", cpu);
return;
}
cpu_ctx_init_offline(cpuc, cpu, now);
__sync_fetch_and_sub(&nr_cpus_onln, 1);
__sync_fetch_and_sub(&total_capacity, cpuc->capacity);
update_autopilot_high_cap();
update_sys_stat();
}
void BPF_STRUCT_OPS(lavd_update_idle, s32 cpu, bool idle)
{
struct cpu_ctx *cpuc;
u64 now;
cpuc = get_cpu_ctx_id(cpu);
if (!cpuc) {
scx_bpf_error("Failed to lookup cpu_ctx %d", cpu);
return;
}
now = scx_bpf_now();
if (idle) {
cpuc->idle_start_clk = now;
reset_cpu_preemption_info(cpuc, false);
}
else {
u64 old_clk = cpuc->idle_start_clk;
if (old_clk != 0) {
u64 duration = time_delta(now, old_clk);
bool ret = __sync_bool_compare_and_swap(
&cpuc->idle_start_clk, old_clk, 0);
if (ret)
cpuc->idle_total += duration;
}
}
}
void BPF_STRUCT_OPS(lavd_set_cpumask, struct task_struct *p,
const struct cpumask *cpumask)
{
struct task_ctx *taskc;
taskc = get_task_ctx(p);
if (!taskc) {
scx_bpf_error("task_ctx_stor first lookup failed");
return;
}
if (bpf_cpumask_weight(p->cpus_ptr) != nr_cpu_ids)
set_task_flag(taskc, LAVD_FLAG_IS_AFFINITIZED);
else
reset_task_flag(taskc, LAVD_FLAG_IS_AFFINITIZED);
set_on_core_type(taskc, cpumask);
}
void BPF_STRUCT_OPS(lavd_cpu_acquire, s32 cpu,
struct scx_cpu_acquire_args *args)
{
struct cpu_ctx *cpuc;
u64 dur, scaled_dur;
cpuc = get_cpu_ctx_id(cpu);
if (!cpuc) {
scx_bpf_error("Failed to lookup cpu_ctx %d", cpu);
return;
}
dur = time_delta(scx_bpf_now(), cpuc->cpu_release_clk);
scaled_dur = scale_cap_freq(dur, cpu);
cpuc->tot_sc_time += scaled_dur;
cpuc->cpuperf_cur = scx_bpf_cpuperf_cur(cpu);
}
void BPF_STRUCT_OPS(lavd_cpu_release, s32 cpu,
struct scx_cpu_release_args *args)
{
struct cpu_ctx *cpuc;
cpuc = get_cpu_ctx_id(cpu);
if (!cpuc) {
scx_bpf_error("Failed to lookup cpu_ctx %d", cpu);
return;
}
cpuc->flags = 0;
reset_cpu_preemption_info(cpuc, true);
scx_bpf_reenqueue_local();
reset_cpuperf_target(cpuc);
cpuc->cpu_release_clk = scx_bpf_now();
}
void BPF_STRUCT_OPS(lavd_enable, struct task_struct *p)
{
struct task_ctx *taskc;
taskc = get_task_ctx(p);
if (!taskc) {
scx_bpf_error("task_ctx_stor first lookup failed");
return;
}
taskc->svc_time = READ_ONCE(cur_svc_time);
}
static void init_task_ctx(struct task_struct *p, struct task_ctx *taskc)
{
u64 now = scx_bpf_now();
__builtin_memset(taskc, 0, sizeof(*taskc));
if (bpf_cpumask_weight(p->cpus_ptr) != nr_cpu_ids)
set_task_flag(taskc, LAVD_FLAG_IS_AFFINITIZED);
else
reset_task_flag(taskc, LAVD_FLAG_IS_AFFINITIZED);
taskc->last_runnable_clk = now;
taskc->last_running_clk = now;
taskc->last_stopping_clk = now;
taskc->last_quiescent_clk = now;
taskc->avg_runtime = sys_stat.slice;
taskc->svc_time = sys_stat.avg_svc_time;
set_on_core_type(taskc, p->cpus_ptr);
}
s32 BPF_STRUCT_OPS(lavd_init_task, struct task_struct *p,
struct scx_init_task_args *args)
{
struct task_ctx *taskc;
if (!p) {
scx_bpf_error("NULL task_struct pointer received");
return -ESRCH;
}
taskc = bpf_task_storage_get(&task_ctx_stor, p, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (!taskc) {
scx_bpf_error("task_ctx_stor first lookup failed");
return -ENOMEM;
}
init_task_ctx(p, taskc);
return 0;
}
static s32 init_cpdoms(u64 now)
{
struct cpdom_ctx *cpdomc;
int err;
for (int i = 0; i < LAVD_CPDOM_MAX_NR; i++) {
cpdomc = MEMBER_VPTR(cpdom_ctxs, [i]);
if (!cpdomc) {
scx_bpf_error("Failed to lookup cpdom_ctx for %d", i);
return -ESRCH;
}
if (!cpdomc->is_valid)
continue;
err = scx_bpf_create_dsq(cpdom_to_dsq(cpdomc->id), cpdomc->node_id);
if (err) {
scx_bpf_error("Failed to create a DSQ for cpdom %llu on NUMA node %d",
cpdomc->id, cpdomc->node_id);
return err;
}
nr_cpdoms = i + 1;
}
return 0;
}
static int calloc_cpumask(struct bpf_cpumask **p_cpumask)
{
struct bpf_cpumask *cpumask;
cpumask = bpf_cpumask_create();
if (!cpumask)
return -ENOMEM;
cpumask = bpf_kptr_xchg(p_cpumask, cpumask);
if (cpumask)
bpf_cpumask_release(cpumask);
return 0;
}
static int init_cpumasks(void)
{
const struct cpumask *online_cpumask;
struct bpf_cpumask *active;
int err = 0;
bpf_rcu_read_lock();
err = calloc_cpumask(&active_cpumask);
active = active_cpumask;
if (err || !active)
goto out;
online_cpumask = scx_bpf_get_online_cpumask();
nr_cpus_onln = bpf_cpumask_weight(online_cpumask);
bpf_cpumask_copy(active, online_cpumask);
scx_bpf_put_cpumask(online_cpumask);
err = calloc_cpumask(&ovrflw_cpumask);
if (err)
goto out;
err = calloc_cpumask(&turbo_cpumask);
if (err)
goto out;
err = calloc_cpumask(&big_cpumask);
if (err)
goto out;
err = calloc_cpumask(&little_cpumask);
if (err)
goto out;
out:
bpf_rcu_read_unlock();
return err;
}
static s32 init_per_cpu_ctx(u64 now)
{
struct cpu_ctx *cpuc;
struct bpf_cpumask *turbo, *big, *little, *active, *ovrflw, *cd_cpumask;
const struct cpumask *online_cpumask;
struct cpdom_ctx *cpdomc;
int cpu, i, j, err = 0;
u64 cpdom_id;
u32 sum_capacity = 0, big_capacity = 0;
bpf_rcu_read_lock();
online_cpumask = scx_bpf_get_online_cpumask();
turbo = turbo_cpumask;
big = big_cpumask;
little = little_cpumask;
active = active_cpumask;
ovrflw = ovrflw_cpumask;
if (!turbo || !big || !little || !active || !ovrflw) {
scx_bpf_error("Failed to prepare cpumasks.");
err = -ENOMEM;
goto unlock_out;
}
one_little_capacity = LAVD_SCALE;
bpf_for(cpu, 0, nr_cpu_ids) {
if (cpu >= LAVD_CPU_ID_MAX)
break;
cpuc = get_cpu_ctx_id(cpu);
if (!cpuc) {
scx_bpf_error("Failed to lookup cpu_ctx: %d", cpu);
err = -ESRCH;
goto unlock_out;
}
err = calloc_cpumask(&cpuc->tmp_a_mask);
if (err)
goto unlock_out;
err = calloc_cpumask(&cpuc->tmp_o_mask);
if (err)
goto unlock_out;
err = calloc_cpumask(&cpuc->tmp_l_mask);
if (err)
goto unlock_out;
err = calloc_cpumask(&cpuc->tmp_i_mask);
if (err)
goto unlock_out;
err = calloc_cpumask(&cpuc->tmp_t_mask);
if (err)
goto unlock_out;
err = calloc_cpumask(&cpuc->tmp_t2_mask);
if (err)
goto unlock_out;
err = calloc_cpumask(&cpuc->tmp_t3_mask);
if (err)
goto unlock_out;
cpuc->cpu_id = cpu;
cpuc->idle_start_clk = 0;
cpuc->lat_cri = 0;
cpuc->running_clk = 0;
cpuc->est_stopping_clk = SCX_SLICE_INF;
cpuc->online_clk = now;
cpuc->offline_clk = now;
cpuc->cpu_release_clk = now;
cpuc->is_online = bpf_cpumask_test_cpu(cpu, online_cpumask);
cpuc->capacity = cpu_capacity[cpu];
cpuc->big_core = cpu_big[cpu];
cpuc->turbo_core = cpu_turbo[cpu];
cpuc->cpdom_poll_pos = cpu % LAVD_CPDOM_MAX_NR;
cpuc->min_perf_cri = LAVD_SCALE;
cpuc->futex_op = LAVD_FUTEX_OP_INVALID;
sum_capacity += cpuc->capacity;
if (cpuc->big_core) {
nr_cpus_big++;
big_capacity += cpuc->capacity;
bpf_cpumask_set_cpu(cpu, big);
}
else {
bpf_cpumask_set_cpu(cpu, little);
have_little_core = true;
}
if (cpuc->turbo_core) {
bpf_cpumask_set_cpu(cpu, turbo);
have_turbo_core = true;
}
if (cpuc->capacity < one_little_capacity)
one_little_capacity = cpuc->capacity;
}
default_big_core_scale = (big_capacity << LAVD_SHIFT) / sum_capacity;
total_capacity = sum_capacity;
bpf_for(cpdom_id, 0, nr_cpdoms) {
if (cpdom_id >= LAVD_CPDOM_MAX_NR)
break;
cpdomc = MEMBER_VPTR(cpdom_ctxs, [cpdom_id]);
cd_cpumask = MEMBER_VPTR(cpdom_cpumask, [cpdom_id]);
if (!cpdomc || !cd_cpumask) {
scx_bpf_error("Failed to lookup cpdom_ctx for %llu", cpdom_id);
err = -ESRCH;
goto unlock_out;
}
if (!cpdomc->is_valid)
continue;
bpf_for(i, 0, LAVD_CPU_ID_MAX/64) {
u64 cpumask = cpdomc->__cpumask[i];
bpf_for(j, 0, 64) {
if (cpumask & 0x1LLU << j) {
cpu = (i * 64) + j;
cpuc = get_cpu_ctx_id(cpu);
if (!cpuc) {
scx_bpf_error("Failed to lookup cpu_ctx: %d", cpu);
err = -ESRCH;
goto unlock_out;
}
cpuc->cpdom_id = cpdomc->id;
cpuc->cpdom_alt_id = cpdomc->alt_id;
if (bpf_cpumask_test_cpu(cpu, online_cpumask)) {
bpf_cpumask_set_cpu(cpu, cd_cpumask);
cpdomc->nr_active_cpus++;
cpdomc->cap_sum_active_cpus += cpuc->capacity;
}
cpdomc->nr_cpus++;
}
}
}
}
bpf_for(cpu, 0, nr_cpu_ids) {
cpuc = get_cpu_ctx_id(cpu);
if (!cpuc) {
scx_bpf_error("Failed to lookup cpu_ctx: %d", cpu);
err = -ESRCH;
goto unlock_out;
}
debugln("cpu[%d] capacity: %d, big_core: %d, turbo_core: %d, "
"cpdom_id: %llu, alt_id: %llu",
cpu, cpuc->capacity, cpuc->big_core, cpuc->turbo_core,
cpuc->cpdom_id, cpuc->cpdom_alt_id);
}
unlock_out:
scx_bpf_put_cpumask(online_cpumask);
bpf_rcu_read_unlock();
return err;
}
static int init_per_cpu_dsqs(void)
{
struct cpdom_ctx *cpdomc;
struct cpu_ctx *cpuc;
int cpu, err = 0;
bpf_for(cpu, 0, nr_cpu_ids) {
cpuc = get_cpu_ctx_id(cpu);
if (!cpuc) {
scx_bpf_error("Failed to lookup cpu_ctx: %d", cpu);
return -ESRCH;
}
cpdomc = MEMBER_VPTR(cpdom_ctxs, [cpuc->cpdom_id]);
if (!cpdomc) {
scx_bpf_error("Failed to lookup cpdom_ctx for %hhu", cpuc->cpdom_id);
return -ESRCH;
}
err = scx_bpf_create_dsq(cpu_to_dsq(cpu), cpdomc->node_id);
if (err) {
scx_bpf_error("Failed to create a DSQ for cpu %d on NUMA node %d",
cpu, cpdomc->node_id);
return err;
}
}
return 0;
}
s32 BPF_STRUCT_OPS_SLEEPABLE(lavd_init)
{
u64 now = scx_bpf_now();
int err;
err = init_cpdoms(now);
if (err)
return err;
err = init_cpumasks();
if (err)
return err;
err = init_per_cpu_ctx(now);
if (err)
return err;
if (per_cpu_dsq) {
err = init_per_cpu_dsqs();
if (err)
return err;
}
err = init_sys_stat(now);
if (err)
return err;
init_autopilot_caps();
WRITE_ONCE(cur_logical_clk, 0);
WRITE_ONCE(cur_svc_time, 0);
return err;
}
void BPF_STRUCT_OPS(lavd_exit, struct scx_exit_info *ei)
{
UEI_RECORD(uei, ei);
}
SCX_OPS_DEFINE(lavd_ops,
.select_cpu = (void *)lavd_select_cpu,
.enqueue = (void *)lavd_enqueue,
.dispatch = (void *)lavd_dispatch,
.runnable = (void *)lavd_runnable,
.running = (void *)lavd_running,
.tick = (void *)lavd_tick,
.stopping = (void *)lavd_stopping,
.quiescent = (void *)lavd_quiescent,
.cpu_online = (void *)lavd_cpu_online,
.cpu_offline = (void *)lavd_cpu_offline,
.update_idle = (void *)lavd_update_idle,
.set_cpumask = (void *)lavd_set_cpumask,
.cpu_acquire = (void *)lavd_cpu_acquire,
.cpu_release = (void *)lavd_cpu_release,
.enable = (void *)lavd_enable,
.init_task = (void *)lavd_init_task,
.init = (void *)lavd_init,
.exit = (void *)lavd_exit,
.timeout_ms = 30000U,
.name = "lavd");