#include <scx/common.bpf.h>
#include <scx/compat.bpf.h>
#include "intf.h"
#include "bpf_compat.h"
char _license[] SEC("license") = "GPL";
const u64 quantum_ns = CAKE_DEFAULT_QUANTUM_NS;
const u64 new_flow_bonus_ns = CAKE_DEFAULT_NEW_FLOW_BONUS_NS;
const bool enable_stats = false;
const bool has_hybrid = false;
const u32 nr_llcs = 1;
const u32 nr_cpus = 8;
const u32 cpu_llc_id[CAKE_MAX_CPUS] = {};
struct mega_mailbox_entry mega_mailbox[CAKE_MAX_CPUS] SEC(".bss");
#define GET_TIER_RAW(packed) EXTRACT_BITS_U32(packed, SHIFT_TIER, 2)
#define GET_TIER(ctx) GET_TIER_RAW(cake_relaxed_load_u32(&(ctx)->packed_info))
struct cake_scratch {
bool dummy_idle;
u32 init_tier;
u32 cached_llc;
u64 cached_now;
struct bpf_iter_scx_dsq it;
u8 _pad[36];
} global_scratch[CAKE_MAX_CPUS] SEC(".bss") __attribute__((aligned(128)));
_Static_assert(sizeof(struct cake_scratch) <= 128,
"cake_scratch exceeds 128B -- adjacent CPUs will false-share");
struct cake_stats global_stats[CAKE_MAX_CPUS] SEC(".bss") __attribute__((aligned(256)));
u8 __bss_tail_guard[64] SEC(".bss") __attribute__((aligned(64)));
static __always_inline struct cake_stats *get_local_stats(void)
{
u32 cpu = bpf_get_smp_processor_id();
return &global_stats[cpu & (CAKE_MAX_CPUS - 1)];
}
UEI_DEFINE(uei);
#define CAKE_DSQ_LC_BASE 1000
const fused_config_t tier_configs[8] = {
PACK_CONFIG(CAKE_DEFAULT_QUANTUM_NS >> 10, CAKE_DEFAULT_MULTIPLIER_T0,
CAKE_DEFAULT_WAIT_BUDGET_T0 >> 10, CAKE_DEFAULT_STARVATION_T0 >> 10),
PACK_CONFIG(CAKE_DEFAULT_QUANTUM_NS >> 10, CAKE_DEFAULT_MULTIPLIER_T1,
CAKE_DEFAULT_WAIT_BUDGET_T1 >> 10, CAKE_DEFAULT_STARVATION_T1 >> 10),
PACK_CONFIG(CAKE_DEFAULT_QUANTUM_NS >> 10, CAKE_DEFAULT_MULTIPLIER_T2,
CAKE_DEFAULT_WAIT_BUDGET_T2 >> 10, CAKE_DEFAULT_STARVATION_T2 >> 10),
PACK_CONFIG(CAKE_DEFAULT_QUANTUM_NS >> 10, CAKE_DEFAULT_MULTIPLIER_T3,
CAKE_DEFAULT_WAIT_BUDGET_T3 >> 10, CAKE_DEFAULT_STARVATION_T3 >> 10),
PACK_CONFIG(CAKE_DEFAULT_QUANTUM_NS >> 10, CAKE_DEFAULT_MULTIPLIER_T3,
CAKE_DEFAULT_WAIT_BUDGET_T3 >> 10, CAKE_DEFAULT_STARVATION_T3 >> 10),
PACK_CONFIG(CAKE_DEFAULT_QUANTUM_NS >> 10, CAKE_DEFAULT_MULTIPLIER_T3,
CAKE_DEFAULT_WAIT_BUDGET_T3 >> 10, CAKE_DEFAULT_STARVATION_T3 >> 10),
PACK_CONFIG(CAKE_DEFAULT_QUANTUM_NS >> 10, CAKE_DEFAULT_MULTIPLIER_T3,
CAKE_DEFAULT_WAIT_BUDGET_T3 >> 10, CAKE_DEFAULT_STARVATION_T3 >> 10),
PACK_CONFIG(CAKE_DEFAULT_QUANTUM_NS >> 10, CAKE_DEFAULT_MULTIPLIER_T3,
CAKE_DEFAULT_WAIT_BUDGET_T3 >> 10, CAKE_DEFAULT_STARVATION_T3 >> 10),
};
static const u16 tier_recheck_mask[] = {
1023,
127,
31,
15,
15, 15, 15, 15,
};
struct {
__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
__uint(map_flags, BPF_F_NO_PREALLOC);
__type(key, int);
__type(value, struct cake_task_ctx);
} task_ctx SEC(".maps");
__attribute__((noinline))
struct task_struct *cake_bpf_dsq_peek_legacy(u64 dsq_id)
{
register u64 dsq_reg asm("r9") = dsq_id;
asm volatile("" : "+r"(dsq_reg));
u32 cpu = bpf_get_smp_processor_id() & (CAKE_MAX_CPUS - 1);
asm volatile("" : "+r"(dsq_reg));
struct cake_scratch *scr = &global_scratch[cpu];
struct task_struct *p = NULL;
if (bpf_iter_scx_dsq_new(&scr->it, dsq_reg, 0) == 0) {
p = bpf_iter_scx_dsq_next(&scr->it);
bpf_iter_scx_dsq_destroy(&scr->it);
}
return p;
}
static __attribute__((noinline))
struct cake_task_ctx *alloc_task_ctx_cold(struct task_struct *p)
{
struct cake_task_ctx *ctx;
ctx = bpf_task_storage_get(&task_ctx, p, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (!ctx) return NULL;
ctx->next_slice = quantum_ns;
u16 init_deficit = (u16)((quantum_ns + new_flow_bonus_ns) >> 10);
ctx->deficit_avg_fused = PACK_DEFICIT_AVG(init_deficit, 0);
ctx->last_run_at = 0;
ctx->reclass_counter = 0;
u32 prio = p->static_prio;
u8 init_tier;
if (prio < 120) {
init_tier = CAKE_TIER_CRITICAL;
} else if (prio > 130) {
init_tier = CAKE_TIER_BULK;
} else {
init_tier = CAKE_TIER_INTERACT;
}
u32 packed = 0;
packed |= (255 & MASK_KALMAN_ERROR) << SHIFT_KALMAN_ERROR;
packed |= (((u32)(init_tier & MASK_TIER) << 4) | (CAKE_FLOW_NEW & MASK_FLAGS)) << SHIFT_FLAGS;
ctx->packed_info = packed;
return ctx;
}
static __always_inline struct cake_task_ctx *get_task_ctx(struct task_struct *p, bool create)
{
struct cake_task_ctx *ctx;
ctx = bpf_task_storage_get(&task_ctx, p, 0, 0);
if (ctx)
return ctx;
if (!create)
return NULL;
return alloc_task_ctx_cold(p);
}
static __attribute__((noinline))
s32 dispatch_sync_cold(struct task_struct *p, u64 wake_flags)
{
u32 cpu = bpf_get_smp_processor_id() & (CAKE_MAX_CPUS - 1);
if (!bpf_cpumask_test_cpu(cpu, p->cpus_ptr))
return -1;
struct cake_task_ctx *tctx = bpf_task_storage_get(&task_ctx, p, 0, 0);
u64 slice = tctx ? tctx->next_slice : quantum_ns;
scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL_ON | cpu, slice, wake_flags);
return (s32)cpu;
}
s32 BPF_STRUCT_OPS(cake_select_cpu, struct task_struct *p, s32 prev_cpu,
u64 wake_flags)
{
if (wake_flags & SCX_WAKE_SYNC) {
s32 sync_cpu = dispatch_sync_cold(p, wake_flags);
if (sync_cpu >= 0)
return sync_cpu;
}
u32 tc_id = bpf_get_smp_processor_id() & (CAKE_MAX_CPUS - 1);
struct cake_scratch *scr = &global_scratch[tc_id];
s32 cpu = scx_bpf_select_cpu_dfl(p, prev_cpu, wake_flags, &scr->dummy_idle);
if (scr->dummy_idle) {
struct cake_task_ctx *tctx = bpf_task_storage_get(&task_ctx, p, 0, 0);
u64 slice = tctx ? tctx->next_slice : quantum_ns;
scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL_ON | cpu, slice, wake_flags);
return cpu;
}
scr->cached_llc = cpu_llc_id[tc_id];
scr->cached_now = scx_bpf_now();
return prev_cpu;
}
void BPF_STRUCT_OPS(cake_enqueue, struct task_struct *p, u64 enq_flags)
{
register struct task_struct *p_reg asm("r6") = p;
u32 task_flags = p_reg->flags;
u32 enq_cpu = bpf_get_smp_processor_id() & (CAKE_MAX_CPUS - 1);
struct cake_scratch *scr = &global_scratch[enq_cpu];
u64 now_cached = scr->cached_now;
u32 enq_llc = scr->cached_llc;
struct cake_task_ctx *tctx = get_task_ctx(p_reg, false);
if (unlikely((task_flags & PF_KTHREAD) && !tctx)) {
u64 vtime = ((u64)CAKE_TIER_CRITICAL << 56) | (now_cached & 0x00FFFFFFFFFFFFFFULL);
scx_bpf_dsq_insert_vtime(p_reg, LLC_DSQ_BASE + enq_llc, quantum_ns, vtime, enq_flags);
return;
}
register struct cake_task_ctx *tctx_reg asm("r7") = tctx;
if (!(enq_flags & (SCX_ENQ_WAKEUP | SCX_ENQ_PREEMPT))) {
u64 vtime = ((u64)CAKE_TIER_BULK << 56) | (now_cached & 0x00FFFFFFFFFFFFFFULL);
scx_bpf_dsq_insert_vtime(p_reg, LLC_DSQ_BASE + enq_llc, quantum_ns, vtime, enq_flags);
return;
}
if (unlikely(!tctx_reg)) {
u64 vtime = ((u64)CAKE_TIER_FRAME << 56) | (now_cached & 0x00FFFFFFFFFFFFFFULL);
scx_bpf_dsq_insert_vtime(p_reg, LLC_DSQ_BASE + enq_llc, quantum_ns, vtime, enq_flags);
return;
}
u8 tier = GET_TIER(tctx_reg) & 3;
u64 slice = tctx_reg->next_slice;
if (enable_stats) {
struct cake_stats *s = get_local_stats();
if (enq_flags & SCX_ENQ_WAKEUP)
s->nr_new_flow_dispatches++;
else
s->nr_old_flow_dispatches++;
if (tier < CAKE_TIER_MAX)
s->nr_tier_dispatches[tier]++;
}
u64 vtime = ((u64)tier << 56) | (now_cached & 0x00FFFFFFFFFFFFFFULL);
u32 task_packed = cake_relaxed_load_u32(&tctx_reg->packed_info);
if (task_packed & ((u32)CAKE_FLOW_NEW << SHIFT_FLAGS))
vtime -= new_flow_bonus_ns;
scx_bpf_dsq_insert_vtime(p_reg, LLC_DSQ_BASE + enq_llc, slice, vtime, enq_flags);
}
void BPF_STRUCT_OPS(cake_dispatch, s32 raw_cpu, struct task_struct *prev)
{
u32 my_llc = cpu_llc_id[raw_cpu & (CAKE_MAX_CPUS - 1)];
if (scx_bpf_dsq_move_to_local(LLC_DSQ_BASE + my_llc))
return;
if (nr_llcs <= 1)
return;
for (u32 i = 1; i < CAKE_MAX_LLCS; i++) {
if (i >= nr_llcs)
break;
u32 victim = my_llc + i;
if (victim >= nr_llcs)
victim -= nr_llcs;
if (scx_bpf_dsq_move_to_local(LLC_DSQ_BASE + victim))
return;
}
}
const u32 tier_perf_target[8] = {
1024,
1024,
1024,
768,
768, 768, 768, 768,
};
void BPF_STRUCT_OPS(cake_tick, struct task_struct *p)
{
register struct task_struct *p_reg asm("r6") = p;
register struct cake_task_ctx *tctx_reg asm("r7") = get_task_ctx(p_reg, false);
register u32 cpu_id_reg asm("r8") = bpf_get_smp_processor_id() & (CAKE_MAX_CPUS - 1);
u32 now = (u32)scx_bpf_now();
if (unlikely(!tctx_reg || tctx_reg->last_run_at == 0)) {
if (tctx_reg) tctx_reg->last_run_at = now;
return;
}
register u8 tier_reg asm("r9") = GET_TIER(tctx_reg);
u32 last_run = tctx_reg->last_run_at;
u64 runtime = (u64)(now - last_run);
if (unlikely(runtime > tctx_reg->next_slice)) {
scx_bpf_kick_cpu(cpu_id_reg, SCX_KICK_PREEMPT);
return;
}
struct mega_mailbox_entry *mbox = &mega_mailbox[cpu_id_reg];
u8 tc = mbox->tick_counter;
u8 skip_mask = tc < 8 ? 0 : tc < 16 ? 1 : tc < 32 ? 3 : 7;
if (!(tc & skip_mask)) {
struct rq *rq = cake_get_rq(cpu_id_reg);
if (rq && rq->scx.nr_running > 1) {
mbox->tick_counter = 0;
u64 threshold = UNPACK_STARVATION_NS(tier_configs[tier_reg & 7]);
if (unlikely(runtime > threshold)) {
scx_bpf_kick_cpu(cpu_id_reg, SCX_KICK_PREEMPT);
if (enable_stats && tier_reg < CAKE_TIER_MAX) {
struct cake_stats *s = get_local_stats();
if (s) s->nr_starvation_preempts_tier[tier_reg]++;
}
return;
}
} else {
if (tc < 255) mbox->tick_counter = tc + 1;
}
} else {
if (tc < 255) mbox->tick_counter = tc + 1;
}
u8 new_flags = (tier_reg & MBOX_TIER_MASK);
if (mbox->flags != new_flags)
mbox->flags = new_flags;
u32 target = tier_perf_target[tier_reg & 7];
if (has_hybrid) {
u32 cap = scx_bpf_cpuperf_cap(cpu_id_reg);
target = (target * cap) >> 10;
}
u8 cached_perf = mbox->dsq_hint;
u8 target_cached = (u8)(target >> 2);
if (cached_perf != target_cached) {
scx_bpf_cpuperf_set(cpu_id_reg, target);
mbox->dsq_hint = target_cached;
}
}
void BPF_STRUCT_OPS(cake_running, struct task_struct *p)
{
struct cake_task_ctx *tctx = get_task_ctx(p, false);
if (!tctx)
return;
tctx->last_run_at = (u32)scx_bpf_now();
}
static __attribute__((noinline))
void reclassify_task_cold(struct cake_task_ctx *tctx)
{
u32 packed = cake_relaxed_load_u32(&tctx->packed_info);
u32 now = (u32)scx_bpf_now();
u32 last_run = tctx->last_run_at;
if (!last_run)
return;
u32 runtime_raw = now - last_run;
u32 runtime_us = runtime_raw >> 10;
u16 rt_clamped = runtime_us > 0xFFFF ? 0xFFFF : (u16)runtime_us;
u8 stable = (packed >> SHIFT_STABLE) & 3;
if (stable == 3) {
u32 old_fused = tctx->deficit_avg_fused;
u16 avg_rt = EXTRACT_AVG_RT(old_fused);
u16 new_avg = avg_rt - (avg_rt >> 3) + (rt_clamped >> 3);
u16 deficit = EXTRACT_DEFICIT(old_fused);
deficit = (rt_clamped >= deficit) ? 0 : deficit - rt_clamped;
u32 new_fused = PACK_DEFICIT_AVG(deficit, new_avg);
if (new_fused != old_fused)
tctx->deficit_avg_fused = new_fused;
u8 tier = (packed >> SHIFT_TIER) & MASK_TIER;
u16 mask = tier_recheck_mask[tier & 3];
u16 counter = tctx->reclass_counter + 1;
tctx->reclass_counter = counter;
if (counter & mask) {
u16 g0 = tier <= 0 ? TIER_GATE_T0 : TIER_GATE_T0 - TIER_GATE_T0 / 10;
u16 g1 = tier <= 1 ? TIER_GATE_T1 : TIER_GATE_T1 - TIER_GATE_T1 / 10;
u16 g2 = tier <= 2 ? TIER_GATE_T2 : TIER_GATE_T2 - TIER_GATE_T2 / 10;
u8 spot_tier;
if (new_avg < g0) spot_tier = 0;
else if (new_avg < g1) spot_tier = 1;
else if (new_avg < g2) spot_tier = 2;
else spot_tier = 3;
if (spot_tier != tier) {
u32 reset = packed & ~((u32)3 << SHIFT_STABLE);
cake_relaxed_store_u32(&tctx->packed_info, reset);
tctx->reclass_counter = 0;
}
return;
}
}
u32 old_fused = tctx->deficit_avg_fused;
u16 avg_rt = EXTRACT_AVG_RT(old_fused);
u16 new_avg = avg_rt - (avg_rt >> 3) + (rt_clamped >> 3);
u16 deficit = EXTRACT_DEFICIT(old_fused);
deficit = (rt_clamped >= deficit) ? 0 : deficit - rt_clamped;
bool deficit_exhausted = (deficit == 0 && (packed & ((u32)CAKE_FLOW_NEW << SHIFT_FLAGS)));
u32 new_fused = PACK_DEFICIT_AVG(deficit, new_avg);
if (new_fused != old_fused)
tctx->deficit_avg_fused = new_fused;
u8 old_tier = (packed >> SHIFT_TIER) & MASK_TIER;
u8 new_tier;
u16 g0 = old_tier <= 0 ? TIER_GATE_T0 : TIER_GATE_T0 - TIER_GATE_T0 / 10;
u16 g1 = old_tier <= 1 ? TIER_GATE_T1 : TIER_GATE_T1 - TIER_GATE_T1 / 10;
u16 g2 = old_tier <= 2 ? TIER_GATE_T2 : TIER_GATE_T2 - TIER_GATE_T2 / 10;
if (new_avg < g0) new_tier = 0;
else if (new_avg < g1) new_tier = 1;
else if (new_avg < g2) new_tier = 2;
else new_tier = 3;
bool tier_changed = (new_tier != old_tier);
u8 new_stable = tier_changed ? 0 : ((stable < 3) ? stable + 1 : 3);
if (tier_changed || deficit_exhausted || new_stable != stable) {
u32 new_packed = packed;
new_packed &= ~((u32)0xF << 28);
new_packed |= (((u32)new_stable << 2) | (u32)new_tier) << 28;
if (deficit_exhausted)
new_packed &= ~((u32)CAKE_FLOW_NEW << SHIFT_FLAGS);
cake_relaxed_store_u32(&tctx->packed_info, new_packed);
}
if (tier_changed) {
u64 cfg = tier_configs[new_tier & 7];
u64 mult = UNPACK_MULTIPLIER(cfg);
tctx->next_slice = (quantum_ns * mult) >> 10;
tctx->reclass_counter = 0;
}
}
void BPF_STRUCT_OPS(cake_stopping, struct task_struct *p, bool runnable)
{
struct cake_task_ctx *tctx = get_task_ctx(p, false);
if (tctx)
reclassify_task_cold(tctx);
}
s32 BPF_STRUCT_OPS_SLEEPABLE(cake_init)
{
for (u32 i = 0; i < CAKE_MAX_LLCS; i++) {
if (i >= nr_llcs)
break;
s32 ret = scx_bpf_create_dsq(LLC_DSQ_BASE + i, -1);
if (ret < 0)
return ret;
}
return 0;
}
void BPF_STRUCT_OPS(cake_exit, struct scx_exit_info *ei)
{
UEI_RECORD(uei, ei);
}
SCX_OPS_DEFINE(cake_ops,
.select_cpu = (void *)cake_select_cpu,
.enqueue = (void *)cake_enqueue,
.dispatch = (void *)cake_dispatch,
.tick = (void *)cake_tick,
.running = (void *)cake_running,
.stopping = (void *)cake_stopping,
.init = (void *)cake_init,
.exit = (void *)cake_exit,
.flags = SCX_OPS_KEEP_BUILTIN_IDLE,
.name = "cake");