#include <scx/common.bpf.h>
#include "intf.h"
#include "lavd.bpf.h"
#include <errno.h>
#include <stdbool.h>
#include <bpf/bpf_core_read.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
static u64 cur_logical_clk;
static u64 cur_svc_time;
const volatile u64 slice_min_ns = LAVD_SLICE_MIN_NS_DFL;
const volatile u64 slice_max_ns = LAVD_SLICE_MAX_NS_DFL;
#include "util.bpf.c"
#include "power.bpf.c"
#include "introspec.bpf.c"
#include "preempt.bpf.c"
#include "lock.bpf.c"
#include "idle.bpf.c"
#include "balance.bpf.c"
#include "sys_stat.bpf.c"
static u32 calc_greedy_ratio(struct task_ctx *taskc)
{
u32 ratio;
ratio = (taskc->svc_time << LAVD_SHIFT) / sys_stat.avg_svc_time;
taskc->is_greedy = ratio > LAVD_SCALE;
return ratio;
}
static u32 calc_greedy_factor(u32 greedy_ratio)
{
if (greedy_ratio <= LAVD_SCALE)
return LAVD_SCALE;
return LAVD_SCALE + ((greedy_ratio - LAVD_SCALE) / LAVD_LC_GREEDY_PENALTY);
}
static inline u64 calc_runtime_factor(u64 runtime)
{
return rsigmoid_u64(runtime, LAVD_LC_RUNTIME_MAX);
}
static inline u64 calc_freq_factor(u64 freq)
{
return sigmoid_u64(freq, LAVD_LC_FREQ_MAX);
}
static u64 calc_weight_factor(struct task_struct *p, struct task_ctx *taskc)
{
u64 weight_boost = 1;
u64 weight_ft;
weight_boost += taskc->wakeup_ft * LAVD_LC_WEIGHT_BOOST;
if (is_kernel_task(p))
weight_boost += LAVD_LC_WEIGHT_BOOST;
if (is_kernel_worker(p))
weight_boost += LAVD_LC_WEIGHT_BOOST;
if (taskc->is_affinitized)
weight_boost += LAVD_LC_WEIGHT_BOOST;
if (is_pinned(p) || is_migration_disabled(p))
weight_boost += LAVD_LC_WEIGHT_BOOST;
if (taskc->need_lock_boost) {
taskc->need_lock_boost = false;
weight_boost += LAVD_LC_WEIGHT_BOOST;
}
weight_ft = p->scx.weight * weight_boost;
return weight_ft;
}
static void calc_perf_cri(struct task_struct *p, struct task_ctx *taskc)
{
u64 wait_freq_ft, wake_freq_ft, perf_cri = LAVD_SCALE;
if (have_little_core) {
wait_freq_ft = calc_freq_factor(taskc->wait_freq);
wake_freq_ft = calc_freq_factor(taskc->wake_freq);
perf_cri = log2_u64(wait_freq_ft * wake_freq_ft);
perf_cri += log2_u64(max(taskc->run_freq, 1) *
max(taskc->avg_runtime, 1) * p->scx.weight);
}
taskc->perf_cri = perf_cri;
}
static void calc_lat_cri(struct task_struct *p, struct task_ctx *taskc)
{
u64 weight_ft, wait_freq_ft, wake_freq_ft, runtime_ft;
u64 lat_cri;
weight_ft = calc_weight_factor(p, taskc);
wait_freq_ft = calc_freq_factor(taskc->wait_freq) + 1;
wake_freq_ft = calc_freq_factor(taskc->wake_freq) + 1;
runtime_ft = calc_runtime_factor(taskc->avg_runtime) + 1;
lat_cri = log2_u64(wait_freq_ft * wake_freq_ft) +
log2_u64(runtime_ft * weight_ft);
taskc->lat_cri = max(lat_cri, taskc->lat_cri_waker);
}
static u64 calc_adjusted_runtime(struct task_ctx *taskc)
{
u64 runtime;
runtime = LAVD_ACC_RUNTIME_MAX +
min(taskc->acc_runtime, LAVD_ACC_RUNTIME_MAX);
return runtime;
}
static u64 calc_virtual_deadline_delta(struct task_struct *p,
struct task_ctx *taskc)
{
u64 deadline, adjusted_runtime;
u32 greedy_ratio, greedy_ft;
calc_perf_cri(p, taskc);
calc_lat_cri(p, taskc);
greedy_ratio = calc_greedy_ratio(taskc);
greedy_ft = calc_greedy_factor(greedy_ratio);
adjusted_runtime = calc_adjusted_runtime(taskc);
deadline = (adjusted_runtime * greedy_ft) / taskc->lat_cri;
return deadline;
}
static u64 calc_time_slice(struct task_ctx *taskc)
{
if (!taskc)
return LAVD_SLICE_MAX_NS_DFL;
taskc->slice_ns = sys_stat.slice;
return taskc->slice_ns;
}
static void reset_suspended_duration(struct cpu_ctx *cpuc)
{
if (cpuc->online_clk > cpuc->offline_clk)
cpuc->offline_clk = cpuc->online_clk;
}
static u64 get_suspended_duration_and_reset(struct cpu_ctx *cpuc)
{
u64 duration = 0;
if (cpuc->online_clk > cpuc->offline_clk) {
duration = time_delta(cpuc->online_clk, cpuc->offline_clk);
cpuc->offline_clk = cpuc->online_clk;
}
return duration;
}
static void advance_cur_logical_clk(struct task_struct *p)
{
u64 vlc, clc, ret_clc;
u64 nr_queued, delta, new_clk;
int i;
vlc = READ_ONCE(p->scx.dsq_vtime);
clc = READ_ONCE(cur_logical_clk);
bpf_for(i, 0, LAVD_MAX_RETRY) {
if (vlc <= clc)
return;
nr_queued = max(sys_stat.nr_queued_task, 1);
delta = (vlc - clc) / nr_queued;
new_clk = clc + delta;
ret_clc = __sync_val_compare_and_swap(&cur_logical_clk, clc, new_clk);
if (ret_clc == clc)
return;
clc = ret_clc;
}
}
static void update_stat_for_running(struct task_struct *p,
struct task_ctx *taskc,
struct cpu_ctx *cpuc, u64 now)
{
u64 wait_period, interval;
if (p->scx.slice == SCX_SLICE_DFL) {
p->scx.dsq_vtime = READ_ONCE(cur_logical_clk);
p->scx.slice = calc_time_slice(taskc);
}
advance_cur_logical_clk(p);
if (have_scheduled(taskc)) {
wait_period = time_delta(now, taskc->last_quiescent_clk);
interval = taskc->avg_runtime + wait_period;
taskc->run_freq = calc_avg_freq(taskc->run_freq, interval);
}
taskc->wakeup_ft = 0;
taskc->last_running_clk = now;
reset_lock_futex_boost(taskc, cpuc);
if (cpuc->max_lat_cri < taskc->lat_cri)
cpuc->max_lat_cri = taskc->lat_cri;
cpuc->sum_lat_cri += taskc->lat_cri;
cpuc->nr_sched++;
if (have_little_core) {
if (cpuc->max_perf_cri < taskc->perf_cri)
cpuc->max_perf_cri = taskc->perf_cri;
if (cpuc->min_perf_cri > taskc->perf_cri)
cpuc->min_perf_cri = taskc->perf_cri;
cpuc->sum_perf_cri += taskc->perf_cri;
}
if (is_lat_cri(taskc))
cpuc->nr_lat_cri++;
if (is_perf_cri(taskc))
cpuc->nr_perf_cri++;
if (taskc->dsq_id != cpuc->cpdom_id) {
taskc->dsq_id = cpuc->cpdom_id;
cpuc->nr_x_migration++;
}
reset_suspended_duration(cpuc);
}
static void update_stat_for_stopping(struct task_struct *p,
struct task_ctx *taskc,
struct cpu_ctx *cpuc)
{
u64 now = scx_bpf_now();
u64 suspended_duration, task_runtime;
suspended_duration = get_suspended_duration_and_reset(cpuc);
task_runtime = time_delta(now, taskc->last_running_clk + suspended_duration);
taskc->acc_runtime += task_runtime;
taskc->avg_runtime = calc_avg(taskc->avg_runtime, taskc->acc_runtime);
taskc->last_stopping_clk = now;
taskc->svc_time += task_runtime / p->scx.weight;
taskc->lat_cri_waker = 0;
cpuc->tot_svc_time += taskc->svc_time;
if (READ_ONCE(cur_svc_time) < taskc->svc_time)
WRITE_ONCE(cur_svc_time, taskc->svc_time);
cpuc->tot_sc_time += scale_cap_freq(task_runtime, cpuc->cpu_id);
reset_lock_futex_boost(taskc, cpuc);
taskc->lock_holder_xted = false;
}
static u64 calc_when_to_run(struct task_struct *p, struct task_ctx *taskc)
{
u64 deadline_delta;
deadline_delta = calc_virtual_deadline_delta(p, taskc);
return READ_ONCE(cur_logical_clk) + deadline_delta;
}
s32 BPF_STRUCT_OPS(lavd_select_cpu, struct task_struct *p, s32 prev_cpu,
u64 wake_flags)
{
bool found_idle = false;
struct task_ctx *taskc = get_task_ctx(p);
struct cpu_ctx *cpuc;
u64 dsq_id;
s32 cpu_id;
struct pick_ctx ictx = {
.p = p,
.taskc = taskc,
.prev_cpu = prev_cpu,
.wake_flags = wake_flags,
};
if (!taskc)
return prev_cpu;
taskc->wakeup_ft += !!(wake_flags & SCX_WAKE_SYNC);
cpu_id = pick_idle_cpu(&ictx, &found_idle);
if (found_idle) {
cpuc = get_cpu_ctx_id(cpu_id);
if (!cpuc) {
scx_bpf_error("Failed to lookup cpu_ctx: %d", cpu_id);
return cpu_id;
}
dsq_id = cpuc->cpdom_id;
if (!scx_bpf_dsq_nr_queued(dsq_id)) {
p->scx.dsq_vtime = calc_when_to_run(p, taskc);
p->scx.slice = calc_time_slice(taskc);
scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, p->scx.slice, 0);
return cpu_id;
}
}
return cpu_id >= 0 ? cpu_id : prev_cpu;
}
static bool can_direct_dispatch(u64 dsq_id, s32 cpu, bool is_idle)
{
return is_idle && cpu >= 0 && !scx_bpf_dsq_nr_queued(dsq_id);
}
void BPF_STRUCT_OPS(lavd_enqueue, struct task_struct *p, u64 enq_flags)
{
struct task_ctx *taskc;
s32 task_cpu, cpu = -ENOENT;
u64 dsq_id;
bool is_idle = false;
taskc = get_task_ctx(p);
if (!taskc)
return;
taskc->wakeup_ft += !!(enq_flags & SCX_ENQ_WAKEUP);
p->scx.dsq_vtime = calc_when_to_run(p, taskc);
p->scx.slice = calc_time_slice(taskc);
task_cpu = scx_bpf_task_cpu(p);
dsq_id = pick_proper_dsq(p, taskc, task_cpu, &cpu, &is_idle);
if (can_direct_dispatch(dsq_id, cpu, is_idle)) {
scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL_ON | cpu, p->scx.slice,
enq_flags);
} else {
scx_bpf_dsq_insert_vtime(p, dsq_id, p->scx.slice,
p->scx.dsq_vtime, enq_flags);
}
if (is_idle && cpu >= 0) {
scx_bpf_kick_cpu(cpu, SCX_KICK_IDLE);
return;
}
if (!no_preemption)
try_find_and_kick_victim_cpu(p, taskc, dsq_id);
}
void BPF_STRUCT_OPS(lavd_dispatch, s32 cpu, struct task_struct *prev)
{
struct cpu_ctx *cpuc;
struct task_ctx *taskc_prev = NULL;
struct bpf_cpumask *active, *ovrflw;
struct task_struct *p;
u64 dsq_id;
s32 new_cpu;
bool try_consume;
cpuc = get_cpu_ctx_id(cpu);
if (!cpuc) {
scx_bpf_error("Failed to lookup cpu_ctx %d", cpu);
return;
}
dsq_id = cpuc->cpdom_id;
if (prev && (prev->scx.flags & SCX_TASK_QUEUED) && cpuc->lock_holder) {
taskc_prev = get_task_ctx(prev);
if (!taskc_prev) {
scx_bpf_error("Failed to look up task context");
return;
}
prev->scx.slice = calc_time_slice(taskc_prev);
reset_lock_futex_boost(taskc_prev, cpuc);
taskc_prev->lock_holder_xted = true;
return;
}
if (use_full_cpus())
goto consume_out;
bpf_rcu_read_lock();
active = active_cpumask;
ovrflw = ovrflw_cpumask;
if (!active || !ovrflw) {
scx_bpf_error("Failed to prepare cpumasks.");
try_consume = false;
goto unlock_out;
}
if (bpf_cpumask_test_cpu(cpu, cast_mask(active)) ||
bpf_cpumask_test_cpu(cpu, cast_mask(ovrflw))) {
bpf_rcu_read_unlock();
goto consume_out;
}
if (prev) {
if (is_pinned(prev)) {
bpf_cpumask_set_cpu(cpu, ovrflw);
bpf_rcu_read_unlock();
goto consume_out;
} else if (is_migration_disabled(prev)) {
bpf_rcu_read_unlock();
goto consume_out;
}
taskc_prev = get_task_ctx(prev);
if (taskc_prev && taskc_prev->is_affinitized &&
bpf_cpumask_test_cpu(cpu, prev->cpus_ptr) &&
!bpf_cpumask_intersects(cast_mask(active), prev->cpus_ptr) &&
!bpf_cpumask_intersects(cast_mask(ovrflw), prev->cpus_ptr)) {
bpf_cpumask_set_cpu(cpu, ovrflw);
bpf_rcu_read_unlock();
goto consume_out;
}
}
try_consume = false;
bpf_for_each(scx_dsq, p, dsq_id, 0) {
struct task_ctx *taskc;
p = bpf_task_from_pid(p->pid);
if (!p)
break;
if (is_pinned(p)) {
new_cpu = scx_bpf_task_cpu(p);
if (new_cpu == cpu) {
bpf_cpumask_set_cpu(new_cpu, ovrflw);
bpf_task_release(p);
try_consume = true;
break;
}
if (!bpf_cpumask_test_and_set_cpu(new_cpu, ovrflw))
scx_bpf_kick_cpu(new_cpu, SCX_KICK_IDLE);
bpf_task_release(p);
continue;
} else if (is_migration_disabled(p)) {
new_cpu = scx_bpf_task_cpu(p);
if (new_cpu == cpu) {
bpf_task_release(p);
try_consume = true;
break;
}
bpf_task_release(p);
continue;
}
taskc = get_task_ctx(p);
if(taskc && (!taskc->is_affinitized ||
bpf_cpumask_intersects(cast_mask(active), p->cpus_ptr) ||
bpf_cpumask_intersects(cast_mask(ovrflw), p->cpus_ptr))) {
bpf_task_release(p);
continue;
}
new_cpu = find_cpu_in(p->cpus_ptr, cpuc);
if (new_cpu >= 0) {
if (new_cpu == cpu) {
bpf_cpumask_set_cpu(new_cpu, ovrflw);
bpf_task_release(p);
try_consume = true;
break;
}
else if (!bpf_cpumask_test_and_set_cpu(new_cpu, ovrflw))
scx_bpf_kick_cpu(new_cpu, SCX_KICK_IDLE);
}
bpf_task_release(p);
}
unlock_out:
bpf_rcu_read_unlock();
if (!try_consume)
return;
consume_out:
if (consume_task(dsq_id))
return;
if (prev && prev->scx.flags & SCX_TASK_QUEUED) {
taskc_prev = taskc_prev ?: get_task_ctx(prev);
if (taskc_prev)
prev->scx.slice = calc_time_slice(taskc_prev);
}
}
void BPF_STRUCT_OPS(lavd_runnable, struct task_struct *p, u64 enq_flags)
{
struct task_struct *waker;
struct task_ctx *p_taskc, *waker_taskc;
u64 now, interval;
p_taskc = get_task_ctx(p);
if (!p_taskc) {
scx_bpf_error("Failed to lookup task_ctx for task %d", p->pid);
return;
}
p_taskc->acc_runtime = 0;
if (!(enq_flags & SCX_ENQ_WAKEUP))
return;
waker = bpf_get_current_task_btf();
waker_taskc = get_task_ctx(waker);
if (!waker_taskc) {
return;
}
now = scx_bpf_now();
interval = time_delta(now, waker_taskc->last_runnable_clk);
waker_taskc->wake_freq = calc_avg_freq(waker_taskc->wake_freq, interval);
waker_taskc->last_runnable_clk = now;
p_taskc->lat_cri_waker = waker_taskc->lat_cri;
}
void BPF_STRUCT_OPS(lavd_running, struct task_struct *p)
{
struct cpu_ctx *cpuc;
struct task_ctx *taskc;
u64 now = scx_bpf_now();
cpuc = get_cpu_ctx_task(p);
taskc = get_task_ctx(p);
if (!cpuc || !taskc) {
scx_bpf_error("Failed to lookup context for task %d", p->pid);
return;
}
update_stat_for_running(p, taskc, cpuc, now);
p->scx.slice = calc_time_slice(taskc);
update_cpuperf_target(cpuc);
cpuc->lat_cri = taskc->lat_cri;
cpuc->stopping_tm_est_ns = get_est_stopping_time(taskc, now);
try_proc_introspec_cmd(p, taskc);
}
void BPF_STRUCT_OPS(lavd_stopping, struct task_struct *p, bool runnable)
{
struct cpu_ctx *cpuc;
struct task_ctx *taskc;
cpuc = get_cpu_ctx_task(p);
taskc = get_task_ctx(p);
if (!cpuc || !taskc) {
scx_bpf_error("Failed to lookup context for task %d", p->pid);
return;
}
update_stat_for_stopping(p, taskc, cpuc);
}
void BPF_STRUCT_OPS(lavd_quiescent, struct task_struct *p, u64 deq_flags)
{
struct cpu_ctx *cpuc;
struct task_ctx *taskc;
u64 now, interval;
cpuc = get_cpu_ctx_task(p);
taskc = get_task_ctx(p);
if (!cpuc || !taskc) {
scx_bpf_error("Failed to lookup context for task %d", p->pid);
return;
}
if (!(deq_flags & SCX_DEQ_SLEEP))
return;
now = scx_bpf_now();
interval = time_delta(now, taskc->last_quiescent_clk);
taskc->wait_freq = calc_avg_freq(taskc->wait_freq, interval);
taskc->last_quiescent_clk = now;
}
static void cpu_ctx_init_online(struct cpu_ctx *cpuc, u32 cpu_id, u64 now)
{
struct bpf_cpumask *cd_cpumask;
bpf_rcu_read_lock();
cd_cpumask = MEMBER_VPTR(cpdom_cpumask, [cpuc->cpdom_id]);
if (!cd_cpumask)
goto unlock_out;
bpf_cpumask_set_cpu(cpu_id, cd_cpumask);
unlock_out:
bpf_rcu_read_unlock();
cpuc->idle_start_clk = 0;
cpuc->lat_cri = 0;
cpuc->stopping_tm_est_ns = SCX_SLICE_INF;
WRITE_ONCE(cpuc->online_clk, now);
barrier();
cpuc->is_online = true;
}
static void cpu_ctx_init_offline(struct cpu_ctx *cpuc, u32 cpu_id, u64 now)
{
struct bpf_cpumask *cd_cpumask;
bpf_rcu_read_lock();
cd_cpumask = MEMBER_VPTR(cpdom_cpumask, [cpuc->cpdom_id]);
if (!cd_cpumask)
goto unlock_out;
bpf_cpumask_clear_cpu(cpu_id, cd_cpumask);
unlock_out:
bpf_rcu_read_unlock();
cpuc->idle_start_clk = 0;
WRITE_ONCE(cpuc->offline_clk, now);
cpuc->is_online = false;
barrier();
cpuc->lat_cri = 0;
cpuc->stopping_tm_est_ns = SCX_SLICE_INF;
}
void BPF_STRUCT_OPS(lavd_cpu_online, s32 cpu)
{
u64 now = scx_bpf_now();
struct cpu_ctx *cpuc;
cpuc = get_cpu_ctx_id(cpu);
if (!cpuc) {
scx_bpf_error("Failed to lookup cpu_ctx %d", cpu);
return;
}
cpu_ctx_init_online(cpuc, cpu, now);
__sync_fetch_and_add(&nr_cpus_onln, 1);
update_sys_stat();
}
void BPF_STRUCT_OPS(lavd_cpu_offline, s32 cpu)
{
u64 now = scx_bpf_now();
struct cpu_ctx *cpuc;
cpuc = get_cpu_ctx_id(cpu);
if (!cpuc) {
scx_bpf_error("Failed to lookup cpu_ctx %d", cpu);
return;
}
cpu_ctx_init_offline(cpuc, cpu, now);
__sync_fetch_and_sub(&nr_cpus_onln, 1);
update_sys_stat();
}
void BPF_STRUCT_OPS(lavd_update_idle, s32 cpu, bool idle)
{
struct cpu_ctx *cpuc;
u64 now;
cpuc = get_cpu_ctx_id(cpu);
if (!cpuc) {
scx_bpf_error("Failed to lookup cpu_ctx %d", cpu);
return;
}
now = scx_bpf_now();
if (idle) {
cpuc->idle_start_clk = now;
reset_cpu_preemption_info(cpuc, false);
}
else {
u64 old_clk = cpuc->idle_start_clk;
if (old_clk != 0) {
u64 duration = time_delta(now, old_clk);
bool ret = __sync_bool_compare_and_swap(
&cpuc->idle_start_clk, old_clk, 0);
if (ret)
cpuc->idle_total += duration;
}
}
}
void BPF_STRUCT_OPS(lavd_set_cpumask, struct task_struct *p,
const struct cpumask *cpumask)
{
struct task_ctx *taskc;
taskc = get_task_ctx(p);
if (!taskc) {
scx_bpf_error("task_ctx_stor first lookup failed");
return;
}
taskc->is_affinitized = bpf_cpumask_weight(p->cpus_ptr) != nr_cpu_ids;
set_on_core_type(taskc, cpumask);
}
void BPF_STRUCT_OPS(lavd_cpu_acquire, s32 cpu,
struct scx_cpu_acquire_args *args)
{
struct cpu_ctx *cpuc;
u64 dur, scaled_dur;
cpuc = get_cpu_ctx_id(cpu);
if (!cpuc) {
scx_bpf_error("Failed to lookup cpu_ctx %d", cpu);
return;
}
dur = time_delta(scx_bpf_now(), cpuc->cpu_release_clk);
scaled_dur = scale_cap_freq(dur, cpu);
cpuc->tot_sc_time += scaled_dur;
cpuc->cpuperf_cur = scx_bpf_cpuperf_cur(cpu);
}
void BPF_STRUCT_OPS(lavd_cpu_release, s32 cpu,
struct scx_cpu_release_args *args)
{
struct cpu_ctx *cpuc;
cpuc = get_cpu_ctx_id(cpu);
if (!cpuc) {
scx_bpf_error("Failed to lookup cpu_ctx %d", cpu);
return;
}
reset_cpu_preemption_info(cpuc, true);
scx_bpf_reenqueue_local();
reset_cpuperf_target(cpuc);
cpuc->cpu_release_clk = scx_bpf_now();
}
void BPF_STRUCT_OPS(lavd_enable, struct task_struct *p)
{
struct task_ctx *taskc;
taskc = get_task_ctx(p);
if (!taskc) {
scx_bpf_error("task_ctx_stor first lookup failed");
return;
}
taskc->svc_time = READ_ONCE(cur_svc_time);
}
static void init_task_ctx(struct task_struct *p, struct task_ctx *taskc)
{
u64 now = scx_bpf_now();
__builtin_memset(taskc, 0, sizeof(*taskc));
taskc->is_affinitized = bpf_cpumask_weight(p->cpus_ptr) != nr_cpu_ids;
taskc->last_running_clk = now;
taskc->last_stopping_clk = now;
taskc->avg_runtime = slice_max_ns;
taskc->svc_time = sys_stat.avg_svc_time;
set_on_core_type(taskc, p->cpus_ptr);
}
s32 BPF_STRUCT_OPS(lavd_init_task, struct task_struct *p,
struct scx_init_task_args *args)
{
struct task_ctx *taskc;
if (!p) {
scx_bpf_error("NULL task_struct pointer received");
return -ESRCH;
}
taskc = bpf_task_storage_get(&task_ctx_stor, p, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (!taskc) {
scx_bpf_error("task_ctx_stor first lookup failed");
return -ENOMEM;
}
init_task_ctx(p, taskc);
return 0;
}
static s32 init_cpdoms(u64 now)
{
struct cpdom_ctx *cpdomc;
int err;
for (int i = 0; i < LAVD_CPDOM_MAX_NR; i++) {
cpdomc = MEMBER_VPTR(cpdom_ctxs, [i]);
if (!cpdomc) {
scx_bpf_error("Failed to lookup cpdom_ctx for %d", i);
return -ESRCH;
}
if (!cpdomc->is_valid)
continue;
err = scx_bpf_create_dsq(cpdomc->id, cpdomc->node_id);
if (err) {
scx_bpf_error("Failed to create a DSQ for cpdom %llu on NUMA node %d",
cpdomc->id, cpdomc->node_id);
return err;
}
nr_cpdoms = i + 1;
}
return 0;
}
static int calloc_cpumask(struct bpf_cpumask **p_cpumask)
{
struct bpf_cpumask *cpumask;
cpumask = bpf_cpumask_create();
if (!cpumask)
return -ENOMEM;
cpumask = bpf_kptr_xchg(p_cpumask, cpumask);
if (cpumask)
bpf_cpumask_release(cpumask);
return 0;
}
static int init_cpumasks(void)
{
const struct cpumask *online_cpumask;
struct bpf_cpumask *active;
int err = 0;
bpf_rcu_read_lock();
err = calloc_cpumask(&active_cpumask);
active = active_cpumask;
if (err || !active)
goto out;
online_cpumask = scx_bpf_get_online_cpumask();
nr_cpus_onln = bpf_cpumask_weight(online_cpumask);
bpf_cpumask_copy(active, online_cpumask);
scx_bpf_put_cpumask(online_cpumask);
err = calloc_cpumask(&ovrflw_cpumask);
if (err)
goto out;
err = calloc_cpumask(&turbo_cpumask);
if (err)
goto out;
err = calloc_cpumask(&big_cpumask);
if (err)
goto out;
err = calloc_cpumask(&little_cpumask);
if (err)
goto out;
out:
bpf_rcu_read_unlock();
return err;
}
static s32 init_per_cpu_ctx(u64 now)
{
struct cpu_ctx *cpuc;
struct bpf_cpumask *turbo, *big, *little, *active, *ovrflw, *cd_cpumask;
const struct cpumask *online_cpumask;
struct cpdom_ctx *cpdomc;
int cpu, i, j, err = 0;
u64 cpdom_id;
u32 sum_capacity = 0, big_capacity = 0;
bpf_rcu_read_lock();
online_cpumask = scx_bpf_get_online_cpumask();
turbo = turbo_cpumask;
big = big_cpumask;
little = little_cpumask;
active = active_cpumask;
ovrflw = ovrflw_cpumask;
if (!turbo || !big || !little || !active || !ovrflw) {
scx_bpf_error("Failed to prepare cpumasks.");
err = -ENOMEM;
goto unlock_out;
}
bpf_for(cpu, 0, nr_cpu_ids) {
if (cpu >= LAVD_CPU_ID_MAX)
break;
cpuc = get_cpu_ctx_id(cpu);
if (!cpuc) {
scx_bpf_error("Failed to lookup cpu_ctx: %d", cpu);
err = -ESRCH;
goto unlock_out;
}
err = calloc_cpumask(&cpuc->tmp_a_mask);
if (err)
goto unlock_out;
err = calloc_cpumask(&cpuc->tmp_o_mask);
if (err)
goto unlock_out;
err = calloc_cpumask(&cpuc->tmp_l_mask);
if (err)
goto unlock_out;
err = calloc_cpumask(&cpuc->tmp_i_mask);
if (err)
goto unlock_out;
err = calloc_cpumask(&cpuc->tmp_t_mask);
if (err)
goto unlock_out;
err = calloc_cpumask(&cpuc->tmp_t2_mask);
if (err)
goto unlock_out;
err = calloc_cpumask(&cpuc->tmp_t3_mask);
if (err)
goto unlock_out;
cpuc->cpu_id = cpu;
cpuc->idle_start_clk = 0;
cpuc->lat_cri = 0;
cpuc->stopping_tm_est_ns = SCX_SLICE_INF;
cpuc->online_clk = now;
cpuc->offline_clk = now;
cpuc->cpu_release_clk = now;
cpuc->is_online = bpf_cpumask_test_cpu(cpu, online_cpumask);
cpuc->capacity = cpu_capacity[cpu];
cpuc->big_core = cpu_big[cpu];
cpuc->turbo_core = cpu_turbo[cpu];
cpuc->cpdom_poll_pos = cpu % LAVD_CPDOM_MAX_NR;
cpuc->min_perf_cri = LAVD_SCALE;
cpuc->futex_op = LAVD_FUTEX_OP_INVALID;
sum_capacity += cpuc->capacity;
if (cpuc->big_core) {
nr_cpus_big++;
big_capacity += cpuc->capacity;
bpf_cpumask_set_cpu(cpu, big);
}
else {
bpf_cpumask_set_cpu(cpu, little);
have_little_core = true;
}
if (cpuc->turbo_core) {
bpf_cpumask_set_cpu(cpu, turbo);
have_turbo_core = true;
}
}
default_big_core_scale = (big_capacity << LAVD_SHIFT) / sum_capacity;
bpf_for(cpdom_id, 0, nr_cpdoms) {
if (cpdom_id >= LAVD_CPDOM_MAX_NR)
break;
cpdomc = MEMBER_VPTR(cpdom_ctxs, [cpdom_id]);
cd_cpumask = MEMBER_VPTR(cpdom_cpumask, [cpdom_id]);
if (!cpdomc || !cd_cpumask) {
scx_bpf_error("Failed to lookup cpdom_ctx for %llu", cpdom_id);
err = -ESRCH;
goto unlock_out;
}
if (!cpdomc->is_valid)
continue;
bpf_for(i, 0, LAVD_CPU_ID_MAX/64) {
u64 cpumask = cpdomc->__cpumask[i];
bpf_for(j, 0, 64) {
if (cpumask & 0x1LLU << j) {
cpu = (i * 64) + j;
cpuc = get_cpu_ctx_id(cpu);
if (!cpuc) {
scx_bpf_error("Failed to lookup cpu_ctx: %d", cpu);
err = -ESRCH;
goto unlock_out;
}
cpuc->cpdom_id = cpdomc->id;
cpuc->cpdom_alt_id = cpdomc->alt_id;
if (bpf_cpumask_test_cpu(cpu, online_cpumask)) {
bpf_cpumask_set_cpu(cpu, cd_cpumask);
cpdomc->nr_active_cpus++;
cpdomc->cap_sum_active_cpus += cpuc->capacity;
}
cpdomc->nr_cpus++;
}
}
}
}
bpf_for(cpu, 0, nr_cpu_ids) {
cpuc = get_cpu_ctx_id(cpu);
if (!cpuc) {
scx_bpf_error("Failed to lookup cpu_ctx: %d", cpu);
err = -ESRCH;
goto unlock_out;
}
debugln("cpu[%d] capacity: %d, big_core: %d, turbo_core: %d, "
"cpdom_id: %llu, alt_id: %llu",
cpu, cpuc->capacity, cpuc->big_core, cpuc->turbo_core,
cpuc->cpdom_id, cpuc->cpdom_alt_id);
}
unlock_out:
scx_bpf_put_cpumask(online_cpumask);
bpf_rcu_read_unlock();
return err;
}
s32 BPF_STRUCT_OPS_SLEEPABLE(lavd_init)
{
u64 now = scx_bpf_now();
int err;
err = init_cpdoms(now);
if (err)
return err;
err = init_cpumasks();
if (err)
return err;
err = init_per_cpu_ctx(now);
if (err)
return err;
err = init_sys_stat(now);
if (err)
return err;
init_autopilot_low_util();
WRITE_ONCE(cur_logical_clk, 0);
WRITE_ONCE(cur_svc_time, 0);
return err;
}
void BPF_STRUCT_OPS(lavd_exit, struct scx_exit_info *ei)
{
UEI_RECORD(uei, ei);
}
SCX_OPS_DEFINE(lavd_ops,
.select_cpu = (void *)lavd_select_cpu,
.enqueue = (void *)lavd_enqueue,
.dispatch = (void *)lavd_dispatch,
.runnable = (void *)lavd_runnable,
.running = (void *)lavd_running,
.stopping = (void *)lavd_stopping,
.quiescent = (void *)lavd_quiescent,
.cpu_online = (void *)lavd_cpu_online,
.cpu_offline = (void *)lavd_cpu_offline,
.update_idle = (void *)lavd_update_idle,
.set_cpumask = (void *)lavd_set_cpumask,
.cpu_acquire = (void *)lavd_cpu_acquire,
.cpu_release = (void *)lavd_cpu_release,
.enable = (void *)lavd_enable,
.init_task = (void *)lavd_init_task,
.init = (void *)lavd_init,
.exit = (void *)lavd_exit,
.timeout_ms = 30000U,
.name = "lavd");