struct preemption_info {
u64 est_stopping_clk;
u64 lat_cri;
struct cpu_ctx *cpuc;
};
static u64 get_est_stopping_clk(struct task_ctx *taskc, u64 now)
{
return now + min(taskc->avg_runtime, taskc->slice);
}
static bool can_x_kick_y(struct preemption_info *prm_x,
struct preemption_info *prm_y)
{
if ((prm_x->lat_cri > prm_y->lat_cri) &&
(prm_x->est_stopping_clk < prm_y->est_stopping_clk))
return true;
return false;
}
static bool can_x_kick_cpu2(struct preemption_info *prm_x,
struct preemption_info *prm_cpu2,
struct cpu_ctx *cpuc2)
{
if (is_lock_holder_running(cpuc2))
return false;
prm_cpu2->est_stopping_clk = cpuc2->est_stopping_clk;
prm_cpu2->lat_cri = cpuc2->lat_cri;
prm_cpu2->cpuc = cpuc2;
return can_x_kick_y(prm_x, prm_cpu2);
}
static void init_prm_by_task(struct preemption_info *prm_task,
struct task_ctx *taskc, u64 now)
{
prm_task->est_stopping_clk = get_est_stopping_clk(taskc, now);
prm_task->lat_cri = taskc->lat_cri;
prm_task->cpuc = NULL;
}
static bool is_worth_kick_other_task(struct task_ctx *taskc)
{
return (taskc->lat_cri >= sys_stat.thr_lat_cri);
}
static struct cpu_ctx *find_victim_cpu(const struct cpumask *cpumask,
s32 preferred_cpu,
struct task_ctx *taskc, u64 now)
{
struct cpu_ctx *cpuc;
struct preemption_info prm_task, prm_cpus[2], *victim_cpu;
int cpu, nr_cpus;
int i, v = 0;
int ret;
init_prm_by_task(&prm_task, taskc, now);
if (preferred_cpu >= 0 && (cpuc = get_cpu_ctx_id(preferred_cpu))) {
if (can_x_kick_cpu2(&prm_task, &prm_cpus[v], cpuc))
v++;
}
nr_cpus = bpf_cpumask_weight(cpumask);
bpf_for(i, 0, nr_cpus) {
cpu = bpf_cpumask_any_distribute(cpumask);
if (cpu >= nr_cpu_ids || cpu == preferred_cpu)
continue;
cpuc = get_cpu_ctx_id(cpu);
if (!cpuc) {
scx_bpf_error("Failed to lookup cpu_ctx: %d", cpu);
goto null_out;
}
if (!cpuc->is_online)
continue;
ret = can_x_kick_cpu2(&prm_task, &prm_cpus[v], cpuc);
if (ret == true && ++v >= 2)
break;
}
switch(v) {
case 2:
victim_cpu = can_x_kick_y(&prm_cpus[0], &prm_cpus[1]) ?
&prm_cpus[0] : &prm_cpus[1];
goto bingo_out;
case 1:
victim_cpu = &prm_cpus[0];
goto bingo_out;
case 0:
goto null_out;
default:
goto null_out;
}
bingo_out:
return victim_cpu->cpuc;
null_out:
return NULL;
}
static void ask_cpu_yield_after(struct cpu_ctx *victim_cpuc, u64 new_slice)
{
struct rq *victim_rq;
struct task_struct *victim_p;
victim_rq = scx_bpf_cpu_rq(victim_cpuc->cpu_id);
if (victim_rq && (victim_p = victim_rq->curr)) {
u64 old = victim_cpuc->est_stopping_clk;
if (!old)
return;
new_slice = max(new_slice, 1);
if (new_slice == 1) {
bool ret = __sync_bool_compare_and_swap(
&victim_cpuc->est_stopping_clk, old, 0);
if (ret)
WRITE_ONCE(victim_p->scx.slice, new_slice);
} else {
if (victim_p->scx.slice > new_slice)
WRITE_ONCE(victim_p->scx.slice, new_slice);
}
}
}
static void shrink_boosted_slice_remote(struct cpu_ctx *cpuc, u64 now)
{
u64 dur, new_slice = 0;
reset_cpu_flag(cpuc, LAVD_FLAG_SLICE_BOOST);
dur = time_delta(now, cpuc->running_clk);
if (sys_stat.slice > dur)
new_slice = time_delta(sys_stat.slice, dur);
if (!new_slice)
scx_bpf_kick_cpu(cpuc->cpu_id, SCX_KICK_PREEMPT);
else
ask_cpu_yield_after(cpuc, new_slice);
cpuc->nr_preempt++;
}
static void shrink_boosted_slice_at_tick(struct task_struct *p,
struct cpu_ctx *cpuc, u64 now)
{
u64 dur, new_slice = 0;
reset_cpu_flag(cpuc, LAVD_FLAG_SLICE_BOOST);
dur = time_delta(now, cpuc->running_clk);
if (sys_stat.slice > dur)
new_slice = time_delta(sys_stat.slice, dur);
p->scx.slice = new_slice;
cpuc->nr_preempt++;
}
static void try_find_and_kick_victim_cpu(struct task_struct *p,
struct task_ctx *taskc,
s32 preferred_cpu,
u64 dsq_id)
{
struct preemption_info prm_t, prm_c;
struct bpf_cpumask *cd_cpumask, *cpumask;
struct cpdom_ctx *cpdomc;
struct cpu_ctx *cpuc_victim;
struct cpu_ctx *cpuc_cur = NULL;
u64 now, dur, cpdom_id, new_slice = 0;
if (test_task_flag(taskc, LAVD_FLAG_IS_GREEDY) ||
!is_worth_kick_other_task(taskc))
return;
now = scx_bpf_now();
if (!no_slice_boost &&
(preferred_cpu >= 0) &&
(cpuc_victim = get_cpu_ctx_id(preferred_cpu)) &&
test_cpu_flag(cpuc_victim, LAVD_FLAG_SLICE_BOOST)) {
dur = time_delta(now, cpuc_victim->running_clk);
if (dur >= sys_stat.slice) {
init_prm_by_task(&prm_t, taskc, now);
if (can_x_kick_cpu2(&prm_t, &prm_c, cpuc_victim)) {
reset_cpu_flag(cpuc_victim, LAVD_FLAG_SLICE_BOOST);
goto kick_out;
}
}
if (test_task_flag(taskc, LAVD_FLAG_IS_AFFINITIZED)) {
if (sys_stat.slice > dur)
new_slice = time_delta(sys_stat.slice, dur);
reset_cpu_flag(cpuc_victim, LAVD_FLAG_SLICE_BOOST);
goto kick_out;
}
}
cpuc_cur = get_cpu_ctx();
if (!cpuc_cur)
return;
cpumask = cpuc_cur->tmp_t_mask;
cpdom_id = dsq_to_cpdom(dsq_id);
cpdomc = MEMBER_VPTR(cpdom_ctxs, [cpdom_id]);
cd_cpumask = MEMBER_VPTR(cpdom_cpumask, [cpdom_id]);
if (!cpdomc || !cd_cpumask || !cpumask)
return;
bpf_cpumask_and(cpumask, cast_mask(cd_cpumask), p->cpus_ptr);
cpuc_victim = find_victim_cpu(cast_mask(cpumask), preferred_cpu, taskc, now);
if (cpuc_victim) {
kick_out:
ask_cpu_yield_after(cpuc_victim, new_slice);
if (cpuc_cur || (cpuc_cur = get_cpu_ctx()))
cpuc_cur->nr_preempt++;
}
}
static void reset_cpu_preemption_info(struct cpu_ctx *cpuc, bool released)
{
if (released) {
cpuc->flags = 0;
cpuc->lat_cri = SCX_SLICE_INF;
cpuc->est_stopping_clk = 0;
} else {
cpuc->flags = 0;
cpuc->lat_cri = 0;
cpuc->est_stopping_clk = SCX_SLICE_INF;
}
}