static u64 LAVD_AP_LOW_UTIL;
static bool have_turbo_core;
static bool have_little_core;
const volatile u16 cpu_order_performance[LAVD_CPU_ID_MAX];
const volatile u16 cpu_order_powersave[LAVD_CPU_ID_MAX];
const volatile u16 cpu_capacity[LAVD_CPU_ID_MAX];
const volatile u8 cpu_big[LAVD_CPU_ID_MAX];
const volatile u8 cpu_turbo[LAVD_CPU_ID_MAX];
static int nr_cpdoms;
struct cpdom_ctx cpdom_ctxs[LAVD_CPDOM_MAX_NR];
private(LAVD) struct bpf_cpumask cpdom_cpumask[LAVD_CPDOM_MAX_NR];
static u32 cur_big_core_scale;
static u32 default_big_core_scale;
volatile int power_mode;
volatile u64 last_power_mode_clk;
volatile u64 performance_mode_ns;
volatile u64 balanced_mode_ns;
volatile u64 powersave_mode_ns;
static bool is_perf_cri(struct task_ctx *taskc)
{
if (!have_little_core)
return true;
if (READ_ONCE(taskc->on_big) && READ_ONCE(taskc->on_little))
return taskc->perf_cri >= sys_stat.thr_perf_cri;
return READ_ONCE(taskc->on_big);
}
static bool clear_cpu_periodically(u32 cpu, struct bpf_cpumask *cpumask)
{
u32 clear;
clear = !(bpf_get_prandom_u32() % LAVD_CC_CPU_PIN_INTERVAL_DIV);
if (clear)
bpf_cpumask_clear_cpu(cpu, cpumask);
return clear;
}
static const volatile u16 *get_cpu_order(void)
{
if (is_powersave_mode)
return cpu_order_powersave;
else
return cpu_order_performance;
}
static int calc_nr_active_cpus(void)
{
const volatile u16 *cpu_order;
u64 req_cap, cap_cpu, cap_sum = 0;
u16 cpu_id, i;
req_cap = nr_cpus_onln * sys_stat.avg_sc_util;
cpu_order = get_cpu_order();
bpf_for(i, 0, nr_cpu_ids) {
if (i >= LAVD_CPU_ID_MAX)
return nr_cpu_ids;
cpu_id = cpu_order[i];
if (cpu_id >= LAVD_CPU_ID_MAX)
return nr_cpu_ids;
cap_cpu = cpu_capacity[cpu_id];
cap_sum += cap_cpu >> LAVD_CC_PER_CORE_SHIFT;
if (cap_sum >= req_cap)
return i+1;
}
return nr_cpu_ids;
}
static void do_core_compaction(void)
{
const volatile u16 *cpu_order = get_cpu_order();
struct cpu_ctx *cpuc;
struct bpf_cpumask *active, *ovrflw;
struct cpdom_ctx *cpdomc;
int nr_active, nr_active_old, cpu, i;
u32 sum_capacity = 0, big_capacity = 0, nr_active_cpdoms = 0;
bool clear;
u64 cpdom_id;
bpf_rcu_read_lock();
active = active_cpumask;
ovrflw = ovrflw_cpumask;
if (!active || !ovrflw) {
scx_bpf_error("Failed to prepare cpumasks.");
goto unlock_out;
}
nr_active_old = sys_stat.nr_active;
nr_active = calc_nr_active_cpus();
bpf_for(i, 0, nr_cpu_ids) {
if (i >= LAVD_CPU_ID_MAX)
break;
cpu = cpu_order[i];
cpuc = get_cpu_ctx_id(cpu);
if (!cpuc || !cpuc->is_online) {
bpf_cpumask_clear_cpu(cpu, active);
bpf_cpumask_clear_cpu(cpu, ovrflw);
continue;
}
if (i < nr_active) {
bpf_cpumask_set_cpu(cpu, active);
bpf_cpumask_clear_cpu(cpu, ovrflw);
scx_bpf_kick_cpu(cpu, SCX_KICK_IDLE);
cpdomc = MEMBER_VPTR(cpdom_ctxs, [cpuc->cpdom_id]);
if (cpdomc) {
cpdomc->cap_sum_temp += cpuc->capacity;
cpdomc->nr_acpus_temp++;
}
sum_capacity += cpuc->capacity;
if (cpuc->big_core)
big_capacity += cpuc->capacity;
} else if (i < nr_active_old) {
bpf_cpumask_clear_cpu(cpu, active);
bpf_cpumask_clear_cpu(cpu, ovrflw);
} else {
bpf_cpumask_clear_cpu(cpu, active);
clear = clear_cpu_periodically(cpu, ovrflw);
if (!clear)
scx_bpf_kick_cpu(cpu, SCX_KICK_IDLE);
}
}
cur_big_core_scale = (big_capacity << LAVD_SHIFT) / sum_capacity;
sys_stat.nr_active = nr_active;
bpf_for(cpdom_id, 0, nr_cpdoms) {
if (cpdom_id >= LAVD_CPDOM_MAX_NR)
break;
cpdomc = MEMBER_VPTR(cpdom_ctxs, [cpdom_id]);
if (!cpdomc)
continue;
WRITE_ONCE(cpdomc->nr_active_cpus, cpdomc->nr_acpus_temp);
WRITE_ONCE(cpdomc->nr_acpus_temp, 0);
WRITE_ONCE(cpdomc->cap_sum_active_cpus, cpdomc->cap_sum_temp);
WRITE_ONCE(cpdomc->cap_sum_temp, 0);
if (cpdomc->nr_active_cpus)
nr_active_cpdoms++;
}
sys_stat.nr_active_cpdoms = nr_active_cpdoms;
unlock_out:
bpf_rcu_read_unlock();
}
static void update_power_mode_time(void)
{
u64 now = scx_bpf_now();
u64 delta;
if (last_power_mode_clk == 0)
last_power_mode_clk = now;
delta = time_delta(now, last_power_mode_clk);
last_power_mode_clk = now;
switch (power_mode) {
case LAVD_PM_PERFORMANCE:
__sync_fetch_and_add(&performance_mode_ns, delta);
break;
case LAVD_PM_BALANCED:
__sync_fetch_and_add(&balanced_mode_ns, delta);
break;
case LAVD_PM_POWERSAVE:
__sync_fetch_and_add(&powersave_mode_ns, delta);
break;
}
}
static int do_set_power_profile(s32 pm, int util)
{
if (power_mode == pm)
return 0;
update_power_mode_time();
power_mode = pm;
switch (pm) {
case LAVD_PM_PERFORMANCE:
no_core_compaction = true;
is_powersave_mode = false;
reinit_cpumask_for_performance = true;
debugln("Set the scheduler's power profile to performance mode: %d", util);
break;
case LAVD_PM_BALANCED:
no_core_compaction = false;
is_powersave_mode = false;
reinit_cpumask_for_performance = false;
debugln("Set the scheduler's power profile to balanced mode: %d", util);
break;
case LAVD_PM_POWERSAVE:
no_core_compaction = false;
is_powersave_mode = true;
reinit_cpumask_for_performance = false;
debugln("Set the scheduler's power profile to power-save mode: %d", util);
break;
default:
return -EINVAL;
}
return 0;
}
static int do_autopilot(void)
{
if (sys_stat.avg_util <= LAVD_AP_LOW_UTIL)
return do_set_power_profile(LAVD_PM_POWERSAVE, sys_stat.avg_util);
if (sys_stat.avg_util <= LAVD_AP_HIGH_UTIL)
return do_set_power_profile(LAVD_PM_BALANCED, sys_stat.avg_util);
return do_set_power_profile(LAVD_PM_PERFORMANCE, sys_stat.avg_util);
}
static void update_thr_perf_cri(void)
{
u32 little_core_scale, delta, diff, thr;
if (no_core_compaction || !have_little_core)
cur_big_core_scale = default_big_core_scale;
if (cur_big_core_scale == LAVD_SCALE) {
sys_stat.thr_perf_cri = 0;
return;
}
little_core_scale = LAVD_SCALE - cur_big_core_scale;
if (little_core_scale < p2s(50)) {
delta = sys_stat.avg_perf_cri - sys_stat.min_perf_cri;
diff = (delta * little_core_scale) >> LAVD_SHIFT;
thr = diff + sys_stat.min_perf_cri;
}
else {
delta = sys_stat.max_perf_cri - sys_stat.avg_perf_cri;
diff = (delta * cur_big_core_scale) >> LAVD_SHIFT;
thr = sys_stat.max_perf_cri - diff;
}
sys_stat.thr_perf_cri = thr;
}
static int reinit_active_cpumask_for_performance(void)
{
struct cpu_ctx *cpuc;
struct bpf_cpumask *active, *ovrflw;
const struct cpumask *online_cpumask;
struct cpdom_ctx *cpdomc;
u64 dsq_id;
u32 nr_active_cpdoms = 0;
int cpu, err = 0;
barrier();
bpf_rcu_read_lock();
active = active_cpumask;
ovrflw = ovrflw_cpumask;
if (!active || !ovrflw) {
scx_bpf_error("Failed to prepare cpumasks.");
err = -ENOMEM;
goto unlock_out;
}
if (have_little_core) {
bpf_for(cpu, 0, nr_cpu_ids) {
cpuc = get_cpu_ctx_id(cpu);
if (!cpuc)
continue;
if (!cpuc->is_online) {
bpf_cpumask_clear_cpu(cpu, active);
bpf_cpumask_clear_cpu(cpu, ovrflw);
continue;
}
if (cpuc->big_core) {
bpf_cpumask_set_cpu(cpu, active);
bpf_cpumask_clear_cpu(cpu, ovrflw);
} else {
bpf_cpumask_set_cpu(cpu, ovrflw);
bpf_cpumask_clear_cpu(cpu, active);
}
scx_bpf_kick_cpu(cpu, SCX_KICK_IDLE);
cpdomc = MEMBER_VPTR(cpdom_ctxs, [cpuc->cpdom_id]);
if (cpdomc) {
cpdomc->nr_acpus_temp++;
cpdomc->cap_sum_temp += cpuc->capacity;
}
}
} else {
online_cpumask = scx_bpf_get_online_cpumask();
nr_cpus_onln = bpf_cpumask_weight(online_cpumask);
bpf_cpumask_copy(active, online_cpumask);
scx_bpf_put_cpumask(online_cpumask);
bpf_cpumask_clear(ovrflw);
bpf_for(cpu, 0, nr_cpu_ids) {
cpuc = get_cpu_ctx_id(cpu);
if (!cpuc || !cpuc->is_online)
continue;
scx_bpf_kick_cpu(cpu, SCX_KICK_IDLE);
cpdomc = MEMBER_VPTR(cpdom_ctxs, [cpuc->cpdom_id]);
if (cpdomc) {
cpdomc->nr_acpus_temp++;
cpdomc->cap_sum_temp += cpuc->capacity;
}
}
}
bpf_for(dsq_id, 0, nr_cpdoms) {
if (dsq_id >= LAVD_CPDOM_MAX_NR)
break;
cpdomc = MEMBER_VPTR(cpdom_ctxs, [dsq_id]);
WRITE_ONCE(cpdomc->nr_active_cpus, cpdomc->nr_acpus_temp);
WRITE_ONCE(cpdomc->nr_acpus_temp, 0);
WRITE_ONCE(cpdomc->cap_sum_active_cpus, cpdomc->cap_sum_temp);
WRITE_ONCE(cpdomc->cap_sum_temp, 0);
if (cpdomc->nr_active_cpus)
nr_active_cpdoms++;
}
sys_stat.nr_active = nr_cpus_onln;
sys_stat.nr_active_cpdoms = nr_active_cpdoms;
unlock_out:
bpf_rcu_read_unlock();
return err;
}
static void update_cpuperf_target(struct cpu_ctx *cpuc)
{
u32 util, max_util, cpuperf_target;
if (!no_freq_scaling) {
max_util = max(cpuc->avg_util, cpuc->cur_util);
util = (max_util < LAVD_CPU_UTIL_MAX_FOR_CPUPERF) ? max_util
: LAVD_SCALE;
cpuperf_target = (util * SCX_CPUPERF_ONE) >> LAVD_SHIFT;
} else
cpuperf_target = SCX_CPUPERF_ONE;
if (cpuc->cpuperf_cur != cpuperf_target) {
scx_bpf_cpuperf_set(cpuc->cpu_id, cpuperf_target);
cpuc->cpuperf_cur = cpuperf_target;
}
}
static void reset_cpuperf_target(struct cpu_ctx *cpuc)
{
if (!no_freq_scaling) {
cpuc->cpuperf_cur = 0;
}
}
static u16 get_cpuperf_cap(s32 cpu)
{
const volatile u16 *cap;
cap = MEMBER_VPTR(cpu_capacity, [cpu]);
if (cap)
return *cap;
debugln("Infeasible CPU id: %d", cpu);
return 0;
}
static u64 scale_cap_freq(u64 dur, s32 cpu)
{
u64 cap, freq, scaled_dur;
cap = get_cpuperf_cap(cpu);
freq = scx_bpf_cpuperf_cur(cpu);
scaled_dur = (dur * cap * freq) >> (LAVD_SHIFT * 2);
return scaled_dur;
}
static void init_autopilot_low_util(void)
{
if (nr_cpus_big < nr_cpus_onln) {
LAVD_AP_LOW_UTIL = LAVD_SCALE / nr_cpus_onln;
}
else {
LAVD_AP_LOW_UTIL = (2 * LAVD_SCALE) / nr_cpus_onln;
}
}
SEC("syscall")
int set_power_profile(struct power_arg *input)
{
return do_set_power_profile(input->power_mode, 0);
}