#include <scx/common.bpf.h>
#include <bpf_arena_common.bpf.h>
#include "intf.h"
#include "lavd.bpf.h"
#include "util.bpf.h"
#include "power.bpf.h"
#include <errno.h>
#include <stdbool.h>
#include <bpf/bpf_core_read.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <lib/cgroup.h>
struct sticky_ctx {
unsigned int i_m;
unsigned int i_nm;
struct cpu_ctx *cpuc_match[2];
struct cpu_ctx *cpuc_not_match[2];
};
static __always_inline
bool init_idle_i_mask(struct pick_ctx *ctx, const struct cpumask *idle_cpumask)
{
if (!test_task_flag(ctx->taskc, LAVD_FLAG_IS_AFFINITIZED))
ctx->i_mask = idle_cpumask;
else {
struct bpf_cpumask *_i_mask = ctx->cpuc_cur->tmp_i_mask;
if (!_i_mask)
return false;
bpf_cpumask_and(_i_mask, ctx->p->cpus_ptr, idle_cpumask);
ctx->i_mask = cast_mask(_i_mask);
}
ctx->i_empty = bpf_cpumask_empty(ctx->i_mask);
return true;
}
static __always_inline
bool init_active_ovrflw_masks(struct pick_ctx *ctx)
{
ctx->active = active_cpumask;
ctx->ovrflw = ovrflw_cpumask;
if (!ctx->active || !ctx->ovrflw)
return false;
return true;
}
static __always_inline
bool init_ao_masks(struct pick_ctx *ctx)
{
ctx->cpuc_cur = get_cpu_ctx();
if (!ctx->cpuc_cur)
return false;
if (!test_task_flag(ctx->taskc, LAVD_FLAG_IS_AFFINITIZED)) {
ctx->a_mask = ctx->active;
ctx->o_mask = ctx->ovrflw;
ctx->a_empty = ctx->o_empty = false;
return true;
}
ctx->a_mask = ctx->cpuc_cur->tmp_a_mask;
ctx->o_mask = ctx->cpuc_cur->tmp_o_mask;
if (!ctx->a_mask || !ctx->o_mask)
return false;
bpf_cpumask_and(ctx->a_mask, ctx->p->cpus_ptr, cast_mask(ctx->active));
bpf_cpumask_and(ctx->o_mask, ctx->p->cpus_ptr, cast_mask(ctx->ovrflw));
ctx->a_empty = bpf_cpumask_empty(cast_mask(ctx->a_mask));
ctx->o_empty = bpf_cpumask_empty(cast_mask(ctx->o_mask));
if (ctx->a_empty)
ctx->a_mask = NULL;
if (ctx->o_empty)
ctx->o_mask = NULL;
return true;
}
static __always_inline
bool init_idle_ato_masks(struct pick_ctx *ctx, const struct cpumask *idle_mask)
{
ctx->ia_mask = ctx->cpuc_cur->tmp_t_mask;
ctx->io_mask = ctx->cpuc_cur->tmp_t2_mask;
ctx->iat_mask = ctx->cpuc_cur->tmp_t3_mask;
ctx->temp_mask = ctx->cpuc_cur->tmp_l_mask;
if (!ctx->ia_mask || !ctx->io_mask || !ctx->iat_mask || !ctx->temp_mask)
return false;
if (ctx->a_mask) {
bpf_cpumask_and(ctx->ia_mask, idle_mask, cast_mask(ctx->a_mask));
ctx->ia_empty = bpf_cpumask_empty(cast_mask(ctx->ia_mask));
}
else
ctx->ia_empty = true;
if (ctx->o_mask) {
bpf_cpumask_and(ctx->io_mask, idle_mask, cast_mask(ctx->o_mask));
ctx->io_empty = bpf_cpumask_empty(cast_mask(ctx->io_mask));
}
else
ctx->io_empty = true;
if (ctx->ia_empty || !have_turbo_core || !turbo_cpumask)
ctx->iat_empty = true;
else if (turbo_cpumask) {
bpf_cpumask_and(ctx->iat_mask, cast_mask(ctx->ia_mask),
cast_mask(turbo_cpumask));
ctx->iat_empty = bpf_cpumask_empty(cast_mask(ctx->iat_mask));
}
return true;
}
__hidden
s32 find_cpu_in(const struct cpumask *src_mask, struct cpu_ctx *cpuc_cur)
{
const volatile u16 *cpu_order = get_cpu_order();
const struct cpumask *online_mask;
struct bpf_cpumask *online_src_mask;
s32 cpu;
unsigned int i;
online_src_mask = cpuc_cur->tmp_l_mask;
if (!online_src_mask)
return -ENOENT;
online_mask = scx_bpf_get_online_cpumask();
bpf_cpumask_and(online_src_mask, src_mask, online_mask);
scx_bpf_put_cpumask(online_mask);
bpf_for(i, sys_stat.nr_active, nr_cpu_ids) {
if (i >= LAVD_CPU_ID_MAX)
break;
cpu = cpu_order[i];
if (bpf_cpumask_test_cpu(cpu, cast_mask(online_src_mask)))
return cpu;
};
return -ENOENT;
}
static s32 pick_idle_cpu_at_cpdom(struct pick_ctx *ctx, s64 cpdom, u64 scope,
bool *is_idle)
{
struct bpf_cpumask *cpd_mask;
struct cpdom_ctx *cpdc;
s32 cpu;
cpd_mask = MEMBER_VPTR(cpdom_cpumask, [cpdom]);
cpdc = MEMBER_VPTR(cpdom_ctxs, [cpdom]);
if (!ctx || !cpdc || !cpd_mask || !cpdc->is_valid)
return -ENOENT;
if (!ctx->iat_empty && cpdc->nr_active_cpus && cpdc->is_big) {
bpf_cpumask_and(ctx->temp_mask,
cast_mask(cpd_mask), cast_mask(ctx->iat_mask));
cpu = scx_bpf_pick_idle_cpu(cast_mask(ctx->temp_mask), scope);
if (cpu >= 0) {
*is_idle = true;
return cpu;
}
}
if (!ctx->ia_empty && cpdc->nr_active_cpus) {
bpf_cpumask_and(ctx->temp_mask,
cast_mask(cpd_mask), cast_mask(ctx->ia_mask));
cpu = scx_bpf_pick_idle_cpu(cast_mask(ctx->temp_mask), scope);
if (cpu >= 0) {
*is_idle = true;
return cpu;
}
}
if (!ctx->io_empty) {
bpf_cpumask_and(ctx->temp_mask,
cast_mask(cpd_mask), cast_mask(ctx->io_mask));
cpu = scx_bpf_pick_idle_cpu(cast_mask(ctx->temp_mask), scope);
if (cpu >= 0) {
*is_idle = true;
return cpu;
}
}
return -ENOENT;
}
static __always_inline
s32 cpumask_any_dsitribute(struct pick_ctx *ctx)
{
const struct cpumask *mask;
s32 cpu;
mask = cast_mask(ctx->a_mask);
if (mask && ((cpu = bpf_cpumask_any_distribute(mask)) < nr_cpu_ids))
return cpu;
mask = cast_mask(ctx->o_mask);
if (mask && ((cpu = bpf_cpumask_any_distribute(mask)) < nr_cpu_ids))
return cpu;
return -ENOENT;
}
static
s32 pick_random_cpu(struct pick_ctx *ctx)
{
s32 cpu0 = cpumask_any_dsitribute(ctx);
s32 cpu1 = cpumask_any_dsitribute(ctx);
struct cpu_ctx *cpuc0, *cpuc1;
if (cpu0 == cpu1 && cpu0 != -ENOENT)
return cpu0;
cpuc0 = get_cpu_ctx_id(cpu0);
cpuc1 = get_cpu_ctx_id(cpu1);
if (!cpuc0 || !cpuc1)
return ctx->prev_cpu;
return (cpuc0->cur_util_invr < cpuc1->cur_util_invr) ? cpu0 : cpu1;
}
static
s32 find_sticky_cpu_at_cpdom(struct pick_ctx *ctx, s32 sticky_cpu, s64 sticky_cpdom)
{
struct bpf_cpumask *cpd_mask;
s32 cpu;
if (sticky_cpu >= 0)
return sticky_cpu;
if (sticky_cpdom < 0)
return -ENOENT;
cpd_mask = MEMBER_VPTR(cpdom_cpumask, [sticky_cpdom]);
if (cpd_mask) {
if (ctx->a_mask) {
cpu = bpf_cpumask_any_and_distribute(
cast_mask(cpd_mask), cast_mask(ctx->a_mask));
if (cpu < nr_cpu_ids)
return cpu;
}
if (ctx->o_mask) {
cpu = bpf_cpumask_any_and_distribute(
cast_mask(cpd_mask), cast_mask(ctx->o_mask));
if (cpu < nr_cpu_ids)
return cpu;
}
}
return -ENOENT;
}
static __always_inline
bool can_run_on_cpu(struct pick_ctx *ctx, s32 cpu)
{
struct bpf_cpumask *a_mask;
struct bpf_cpumask *o_mask;
if (!test_task_flag(ctx->taskc, LAVD_FLAG_IS_AFFINITIZED))
return true;
if (!bpf_cpumask_test_cpu(cpu, ctx->p->cpus_ptr))
return false;
a_mask = ctx->a_mask;
o_mask = ctx->o_mask;
if ((a_mask && bpf_cpumask_test_cpu(cpu, cast_mask(a_mask))) ||
(o_mask && bpf_cpumask_test_cpu(cpu, cast_mask(o_mask))))
return true;
return false;
}
static __always_inline
bool can_run_on_domain(struct pick_ctx *ctx, s64 cpdom)
{
struct cpdom_ctx *cpdc;
struct bpf_cpumask *cpd_mask, *a_mask, *o_mask;
if (!test_task_flag(ctx->taskc, LAVD_FLAG_IS_AFFINITIZED))
return true;
cpd_mask = MEMBER_VPTR(cpdom_cpumask, [cpdom]);
cpdc = MEMBER_VPTR(cpdom_ctxs, [cpdom]);
if (!cpd_mask || !cpdc)
return false;
a_mask = ctx->a_mask;
if (a_mask && cpdc->nr_active_cpus &&
bpf_cpumask_intersects(cast_mask(a_mask), cast_mask(cpd_mask)))
return true;
o_mask = ctx->o_mask;
if (o_mask &&
bpf_cpumask_intersects(cast_mask(o_mask), cast_mask(cpd_mask)))
return true;
return false;
}
static __always_inline
bool test_cpu_stickable(struct pick_ctx *ctx, struct sticky_ctx *sctx,
s32 cpu, bool is_task_big)
{
if (can_run_on_cpu(ctx, cpu)) {
struct cpu_ctx *cpuc = get_cpu_ctx_id(cpu);
if (!cpuc || sctx->i_m >= 2 || sctx->i_nm >= 2)
return false;
if (is_task_big == cpuc->big_core)
sctx->cpuc_match[sctx->i_m++] = cpuc;
else
sctx->cpuc_not_match[sctx->i_nm++] = cpuc;
return true;
}
return false;
}
static
bool is_sync_wakeup(struct pick_ctx *ctx)
{
struct task_struct *waker;
if (!(ctx->wake_flags & SCX_WAKE_SYNC))
return false;
waker = bpf_get_current_task_btf();
if (waker->flags & PF_EXITING)
return false;
return true;
}
static
s32 find_sticky_cpu_and_cpdom(struct pick_ctx *ctx, s64 *sticky_cpdom)
{
struct cpu_ctx *p0, *p1, *cpuc;
struct cpdom_ctx *d0, *d1;
struct sticky_ctx sctx;
__builtin_memset(&sctx, 0, sizeof(sctx));
test_cpu_stickable(ctx, &sctx, ctx->prev_cpu, ctx->is_task_big);
if (is_sync_wakeup(ctx)) {
s32 waker_cpu = bpf_get_smp_processor_id();
if (waker_cpu != ctx->prev_cpu) {
ctx->sync_waker_cpu = waker_cpu;
test_cpu_stickable(ctx, &sctx, ctx->sync_waker_cpu, ctx->is_task_big);
}
}
if (sctx.i_m == 1) {
*sticky_cpdom = sctx.cpuc_match[0]->cpdom_id;
return sctx.cpuc_match[0]->cpu_id;
} else if (sctx.i_m == 2) {
p0 = sctx.cpuc_match[0];
p1 = sctx.cpuc_match[1];
d0 = MEMBER_VPTR(cpdom_ctxs, [p0->cpdom_id]);
d1 = MEMBER_VPTR(cpdom_ctxs, [p1->cpdom_id]);
if ((p0 != p1) && (d0 && d1) && (d0->load_invr > d1->load_invr)) {
*sticky_cpdom = p1->cpdom_id;
return -ENOENT;
} else {
*sticky_cpdom = p0->cpdom_id;
return p0->cpu_id;
}
}
if (sctx.i_nm == 1) {
p0 = sctx.cpuc_not_match[0];
if (can_run_on_domain(ctx, p0->cpdom_id)) {
*sticky_cpdom = p0->cpdom_id;
return -ENOENT;
}
} else if (sctx.i_nm == 2) {
p0 = sctx.cpuc_not_match[0];
p1 = sctx.cpuc_not_match[1];
if ((p0 != p1) && can_run_on_domain(ctx, p0->cpdom_id) &&
can_run_on_domain(ctx, p1->cpdom_id)) {
d0 = MEMBER_VPTR(cpdom_ctxs, [p0->cpdom_id]);
d1 = MEMBER_VPTR(cpdom_ctxs, [p1->cpdom_id]);
if (d0 && d1) {
if (d0->load_invr > d1->load_invr) {
*sticky_cpdom = p1->cpdom_id;
return -ENOENT;
}
else {
*sticky_cpdom = p0->cpdom_id;
return -ENOENT;
}
}
} else if (can_run_on_domain(ctx, p0->cpdom_id)) {
*sticky_cpdom = p0->cpdom_id;
return -ENOENT;
} else if (can_run_on_domain(ctx, p1->cpdom_id)) {
*sticky_cpdom = p1->cpdom_id;
return -ENOENT;
}
}
cpuc = get_cpu_ctx_id(ctx->prev_cpu);
if (cpuc && can_run_on_domain(ctx, cpuc->cpdom_id)) {
*sticky_cpdom = cpuc->cpdom_id;
return -ENOENT;
}
if (ctx->sync_waker_cpu < 0)
goto err_out;
cpuc = get_cpu_ctx_id(ctx->sync_waker_cpu);
if (cpuc && can_run_on_domain(ctx, cpuc->cpdom_id)) {
*sticky_cpdom = cpuc->cpdom_id;
return -ENOENT;
}
err_out:
*sticky_cpdom = -ENOENT;
return -ENOENT;
}
static
bool is_sync_waker_idle(struct pick_ctx * ctx, s64 *cpdom_id)
{
struct cpu_ctx *cpuc_waker, *cpuc_prev;
if (ctx->sync_waker_cpu < 0)
return false;
if (!can_run_on_cpu(ctx, ctx->sync_waker_cpu))
return false;
cpuc_waker = get_cpu_ctx_id(ctx->sync_waker_cpu);
if (!cpuc_waker || queued_on_cpu(cpuc_waker))
return false;
if (nr_cpdoms > 1) {
cpuc_prev = get_cpu_ctx_id(ctx->prev_cpu);
if (!cpuc_prev ||
cpuc_prev->cpdom_id != cpuc_waker->cpdom_id)
return false;
}
*cpdom_id = cpuc_waker->cpdom_id;
return true;
}
static
s32 migrate_to_neighbor(struct pick_ctx *ctx, struct cpdom_ctx *cpdc,
u64 scope, s64 *sticky_cpdom, bool *is_idle)
{
struct cpdom_ctx *mig_cpdc;
s64 mig_cpdom, nr_nbr;
s32 cpu = -ENOENT;
int i, j;
bpf_for(i, 0, LAVD_CPDOM_MAX_DIST) {
nr_nbr = min(cpdc->nr_neighbors[i], LAVD_CPDOM_MAX_NR);
if (nr_nbr == 0)
break;
bpf_for(j, 0, LAVD_CPDOM_MAX_NR) {
if (j >= nr_nbr)
break;
mig_cpdom = get_neighbor_id(cpdc, i, j);
if (mig_cpdom < 0)
continue;
mig_cpdc = MEMBER_VPTR(cpdom_ctxs, [mig_cpdom]);
if (!mig_cpdc || !READ_ONCE(mig_cpdc->is_stealer))
continue;
cpu = pick_idle_cpu_at_cpdom(ctx, mig_cpdom, scope, is_idle);
if (cpu >= 0) {
WRITE_ONCE(mig_cpdc->is_stealer, false);
WRITE_ONCE(cpdc->is_stealee, false);
*sticky_cpdom = mig_cpdom;
break;
}
}
}
return cpu;
}
__hidden __noinline
s32 pick_idle_cpu(struct pick_ctx *ctx, bool *is_idle)
{
const struct cpumask *idle_cpumask = NULL, *idle_smtmask = NULL;
s32 cpu = -ENOENT, sticky_cpu;
s64 sticky_cpdom = -ENOENT;
struct cpdom_ctx *cpdc;
bool i_smt_empty;
bpf_rcu_read_lock();
if (!init_active_ovrflw_masks(ctx))
goto err_out;
if (is_pinned(ctx->p) || is_migration_disabled(ctx->p)) {
cpu = ctx->prev_cpu;
if (!bpf_cpumask_test_cpu(cpu, cast_mask(ctx->active))) {
if (is_pinned(ctx->p))
bpf_cpumask_test_and_set_cpu(cpu, ctx->ovrflw);
}
*is_idle = scx_bpf_test_and_clear_cpu_idle(cpu);
goto unlock_out;
}
if (!init_ao_masks(ctx))
goto err_out;
if (ctx->a_empty && ctx->o_empty) {
cpu = find_cpu_in(ctx->p->cpus_ptr, ctx->cpuc_cur);
if (cpu >= 0) {
bpf_cpumask_set_cpu(cpu, ctx->ovrflw);
*is_idle = scx_bpf_test_and_clear_cpu_idle(cpu);
}
goto unlock_out;
}
ctx->sync_waker_cpu = -ENOENT;
ctx->is_task_big = is_perf_cri(ctx->taskc);
sticky_cpu = find_sticky_cpu_and_cpdom(ctx, &sticky_cpdom);
if (sticky_cpdom < 0) {
cpu = pick_random_cpu(ctx);
goto unlock_out;
}
idle_cpumask = scx_bpf_get_idle_cpumask();
if (!init_idle_i_mask(ctx, idle_cpumask))
goto err_out;
if (ctx->i_empty) {
cpu = sticky_cpu;
if (cpu == -ENOENT) {
cpu = find_sticky_cpu_at_cpdom(ctx, sticky_cpu,
sticky_cpdom);
}
goto unlock_out;
}
if (is_smt_active) {
idle_smtmask = scx_bpf_get_idle_smtmask();
i_smt_empty = bpf_cpumask_empty(idle_smtmask);
} else
i_smt_empty = true;
if (!i_smt_empty && sticky_cpu >= 0 &&
bpf_cpumask_test_cpu(sticky_cpu, idle_smtmask) &&
scx_bpf_test_and_clear_cpu_idle(sticky_cpu)) {
cpu = sticky_cpu;
*is_idle = true;
goto unlock_out;
}
if (!i_smt_empty) {
if (!init_idle_ato_masks(ctx, idle_smtmask))
goto err_out;
if (!ctx->ia_empty || !ctx->io_empty) {
cpu = pick_idle_cpu_at_cpdom(ctx, sticky_cpdom,
SCX_PICK_IDLE_CORE, is_idle);
if (cpu >= 0)
goto unlock_out;
}
}
if (sticky_cpu >= 0 && scx_bpf_test_and_clear_cpu_idle(sticky_cpu)) {
cpu = sticky_cpu;
*is_idle = true;
goto unlock_out;
}
if (!no_wake_sync && is_sync_waker_idle(ctx, &sticky_cpdom)) {
cpu = ctx->sync_waker_cpu;
goto unlock_out;
}
if (!init_idle_ato_masks(ctx, ctx->i_mask))
goto err_out;
if (ctx->ia_empty && ctx->io_empty) {
cpu = sticky_cpu;
if (cpu == -ENOENT) {
cpu = find_sticky_cpu_at_cpdom(ctx, sticky_cpu,
sticky_cpdom);
}
goto unlock_out;
}
if (!i_smt_empty && (nr_cpdoms > 1) &&
(cpdc = MEMBER_VPTR(cpdom_ctxs, [sticky_cpdom])) &&
READ_ONCE(cpdc->is_stealee)) {
cpu = migrate_to_neighbor(ctx, cpdc, SCX_PICK_IDLE_CORE,
&sticky_cpdom, is_idle);
if (cpu >= 0)
goto unlock_out;
}
cpu = pick_idle_cpu_at_cpdom(ctx, sticky_cpdom, 0, is_idle);
if (cpu >= 0)
goto unlock_out;
if ((nr_cpdoms > 1) &&
test_task_flag(ctx->taskc, LAVD_FLAG_MIGRATION_AGGRESSIVE) &&
(cpdc = MEMBER_VPTR(cpdom_ctxs, [sticky_cpdom])) &&
READ_ONCE(cpdc->is_stealee)) {
cpu = migrate_to_neighbor(ctx, cpdc, 0, &sticky_cpdom, is_idle);
if (cpu >= 0)
goto unlock_out;
}
if (can_run_on_cpu(ctx, ctx->prev_cpu)) {
cpu = ctx->prev_cpu;
sticky_cpdom = -ENOENT;
goto unlock_out;
}
err_out:
cpu = -ENOENT;
unlock_out:
if (cpu < 0)
cpu = pick_random_cpu(ctx);
if (idle_smtmask)
scx_bpf_put_idle_cpumask(idle_smtmask);
if (idle_cpumask)
scx_bpf_put_idle_cpumask(idle_cpumask);
bpf_rcu_read_unlock();
return cpu;
}