#ifndef __LAVD_H
#define __LAVD_H
#include <scx/common.bpf.h>
#include <bpf_arena_common.bpf.h>
#include <lib/sdt_task.h>
#include <lib/atq.h>
#define U64_MAX ((u64)~0ULL)
#define S64_MAX ((s64)(U64_MAX >> 1))
#define U32_MAX ((u32)~0U)
#define S32_MAX ((s32)(U32_MAX >> 1))
#define MAX_RT_PRIO 100
#define LAVD_SHIFT 10
#define LAVD_SCALE (1L << LAVD_SHIFT)
#define p2s(percent) (((percent) << LAVD_SHIFT) / 100)
#define s2p(scale) (((scale) * 100) >> LAVD_SHIFT)
#define cpdom_to_dsq(cpdom_id) ((cpdom_id) | LAVD_DSQ_TYPE_CPDOM << LAVD_DSQ_TYPE_SHFT)
#define dsq_to_cpdom(dsq_id) ((dsq_id) & LAVD_DSQ_ID_MASK)
#define dsq_to_cpu(dsq_id) ((dsq_id) & LAVD_DSQ_ID_MASK)
#define dsq_type(dsq_id) (((dsq_id) & LAVD_DSQ_TYPE_MASK) >> LAVD_DSQ_TYPE_SHFT)
enum {
LAVD_DSQ_TYPE_SHFT = 12,
LAVD_DSQ_TYPE_MASK = 0x3 << LAVD_DSQ_TYPE_SHFT,
LAVD_DSQ_ID_SHFT = 0,
LAVD_DSQ_ID_MASK = 0xfff << LAVD_DSQ_ID_SHFT,
LAVD_DSQ_NR_TYPES = 2,
LAVD_DSQ_TYPE_CPDOM = 1,
LAVD_DSQ_TYPE_CPU = 0,
};
enum consts_internal {
CLOCK_BOOTTIME = 7,
CACHELINE_SIZE = 64,
NSEC_PER_USEC = 1000ULL,
NSEC_PER_MSEC = (1000ULL * NSEC_PER_USEC),
LAVD_TIME_ONE_SEC = (1000ULL * NSEC_PER_MSEC),
LAVD_MAX_RETRY = 3,
LAVD_TARGETED_LATENCY_NS = (10ULL * NSEC_PER_MSEC),
LAVD_SLICE_MIN_NS_DFL = (500ULL * NSEC_PER_USEC),
LAVD_SLICE_MAX_NS_DFL = (5ULL * NSEC_PER_MSEC),
LAVD_SLICE_BOOST_BONUS = LAVD_SLICE_MIN_NS_DFL,
LAVD_SLICE_BOOST_MAX = (500ULL * NSEC_PER_MSEC),
LAVD_SLICE_BOOST_UTIL_WALL = p2s(95),
LAVD_ACC_RUNTIME_MAX = LAVD_SLICE_MAX_NS_DFL,
LAVD_TASK_LAG_MAX = (500ULL * NSEC_PER_MSEC),
LAVD_DL_COMPETE_WINDOW = ((300ULL * NSEC_PER_MSEC) >> 16),
LAVD_LC_FREQ_MAX = 100000,
LAVD_LC_RUNTIME_MAX = LAVD_TIME_ONE_SEC,
LAVD_LC_WEIGHT_BOOST_REGULAR = 128,
LAVD_LC_WEIGHT_BOOST_MEDIUM = (2 * LAVD_LC_WEIGHT_BOOST_REGULAR),
LAVD_LC_WEIGHT_BOOST_HIGH = (2 * LAVD_LC_WEIGHT_BOOST_MEDIUM),
LAVD_LC_WEIGHT_BOOST_HIGHEST = (2 * LAVD_LC_WEIGHT_BOOST_HIGH),
LAVD_LC_GREEDY_SHIFT = 1,
LAVD_LC_WAKE_INTERVAL_MIN = LAVD_SLICE_MIN_NS_DFL,
LAVD_LC_INH_RECEIVER_SHIFT = 2,
LAVD_LC_INH_GIVER_SHIFT = 3,
LAVD_SYS_STAT_INTERVAL_NS = (10ULL * NSEC_PER_MSEC),
LAVD_SYS_STAT_DECAY_TIMES = ((2ULL * LAVD_TIME_ONE_SEC) / LAVD_SYS_STAT_INTERVAL_NS),
LAVD_CPU_UTIL_MAX_FOR_CPUPERF = p2s(85),
LAVD_CPU_UTIL_THR_FOR_MAX_FREQ = p2s(80),
LAVD_CC_REQ_CAPACITY_HEADROOM = p2s(25),
LAVD_CC_PER_CPU_UTIL = p2s(50),
LAVD_CC_UTIL_SPIKE = p2s(90),
LAVD_CC_CPU_PIN_INTERVAL = (250ULL * NSEC_PER_MSEC),
LAVD_CC_CPU_PIN_INTERVAL_DIV = (LAVD_CC_CPU_PIN_INTERVAL / LAVD_SYS_STAT_INTERVAL_NS),
LAVD_AP_HIGH_UTIL_DFL_SMT_RT = p2s(25),
LAVD_AP_HIGH_UTIL_DFL_NO_SMT_RT = p2s(50),
LAVD_CPDOM_MIG_SHIFT_UL = 2,
LAVD_CPDOM_MIG_SHIFT = 3,
LAVD_CPDOM_MIG_SHIFT_OL = 4,
LAVD_CPDOM_MIG_PROB_FT = (LAVD_SYS_STAT_INTERVAL_NS / LAVD_SLICE_MAX_NS_DFL),
LAVD_FUTEX_OP_INVALID = -1,
};
enum consts_flags {
LAVD_FLAG_FUTEX_BOOST = (0x1 << 0),
LAVD_FLAG_NEED_LOCK_BOOST = (0x1 << 1),
LAVD_FLAG_IS_GREEDY = (0x1 << 2),
LAVD_FLAG_IS_AFFINITIZED = (0x1 << 3),
LAVD_FLAG_IS_WAKEUP = (0x1 << 4),
LAVD_FLAG_IS_SYNC_WAKEUP = (0x1 << 5),
LAVD_FLAG_ON_BIG = (0x1 << 6),
LAVD_FLAG_ON_LITTLE = (0x1 << 7),
LAVD_FLAG_SLICE_BOOST = (0x1 << 8),
LAVD_FLAG_IDLE_CPU_PICKED = (0x1 << 9),
LAVD_FLAG_KSOFTIRQD = (0x1 << 10),
LAVD_FLAG_WOKEN_BY_RT_DL = (0x1 << 11),
LAVD_FLAG_WOKEN_BY_HARDIRQ = (0x1 << 12),
LAVD_FLAG_WOKEN_BY_SOFTIRQ = (0x1 << 13),
LAVD_FLAG_MIGRATION_AGGRESSIVE = (0x1 << 14),
};
#define LAVD_MASK_MIGRATION (LAVD_FLAG_MIGRATION_AGGRESSIVE)
struct task_ctx {
struct scx_task_common atq __attribute__((aligned(CACHELINE_SIZE)));
volatile u64 flags;
u64 slice_wall;
u64 wait_freq;
u64 wake_freq;
u64 last_measured_wall_clk;
u64 acc_runtime_wall;
u64 avg_runtime_wall;
u64 svc_time_wwgt;
u64 last_runnable_clk;
u64 last_running_clk;
u64 run_freq;
u16 lat_cri;
u16 lat_cri_waker;
u16 lat_cri_wakee;
u16 perf_cri;
u32 cpdom_id;
s32 pinned_cpu_id;
u32 suggested_cpu_id;
u32 prev_cpu_id;
u32 cpu_id;
u64 last_quiescent_clk;
u64 last_measured_task_clk;
u64 cgrp_id;
u64 resched_interval_wall;
u64 last_slice_used_wall;
pid_t pid;
pid_t waker_pid;
char waker_comm[TASK_COMM_LEN + 1];
} __attribute__((aligned(CACHELINE_SIZE)));
struct cpdom_ctx {
u64 id;
u64 alt_id;
u8 numa_id;
u8 llc_id;
u8 is_big;
u8 is_valid;
u8 nr_neighbors[LAVD_CPDOM_MAX_DIST];
u64 __cpumask[LAVD_CPU_ID_MAX/64];
u8 neighbor_ids[LAVD_CPDOM_MAX_DIST * LAVD_CPDOM_MAX_NR];
u8 is_stealer __attribute__((aligned(CACHELINE_SIZE)));
u8 is_stealee;
u16 nr_active_cpus;
u16 nr_acpus_temp;
u32 load_invr;
u32 nr_queued_task;
u32 cur_util_wall_sum;
u32 avg_util_wall_sum;
u32 cap_sum_active_cpus;
u32 cap_sum_temp;
u32 dsq_consume_lat;
} __attribute__((aligned(CACHELINE_SIZE)));
#define get_neighbor_id(cpdomc, d, i) ((cpdomc)->neighbor_ids[((d) * LAVD_CPDOM_MAX_NR) + (i)])
extern struct cpdom_ctx cpdom_ctxs[LAVD_CPDOM_MAX_NR];
extern struct bpf_cpumask cpdom_cpumask[LAVD_CPDOM_MAX_NR];
extern int nr_cpdoms;
typedef struct task_ctx __arena task_ctx;
u64 get_task_ctx_internal(struct task_struct *p);
#define get_task_ctx(p) ((task_ctx *)get_task_ctx_internal((p)))
struct cpu_ctx *get_cpu_ctx(void);
struct cpu_ctx *get_cpu_ctx_id(s32 cpu_id);
struct cpu_ctx *get_cpu_ctx_task(const struct task_struct *p);
struct cpu_ctx {
volatile u64 flags;
volatile u64 est_stopping_clk;
volatile u64 running_clk;
volatile u16 lat_cri;
volatile u16 effective_capacity;
volatile u32 max_lat_cri;
volatile u64 sum_lat_cri;
volatile u64 tot_task_time_wall;
volatile u64 tot_task_time_wwgt;
volatile u64 tot_task_time_invr;
volatile u64 sum_perf_cri;
volatile u32 min_perf_cri;
volatile u32 max_perf_cri;
volatile u32 max_freq;
volatile u32 max_freq_observed;
volatile u32 nr_sched;
volatile u32 nr_preempt;
volatile u32 nr_x_migration;
volatile u32 nr_perf_cri;
volatile u32 nr_lat_cri;
volatile u32 nr_pinned_tasks;
volatile s32 futex_op;
volatile u32 avg_util_wall;
volatile u32 cur_util_wall;
u32 cpuperf_cur;
volatile u32 avg_util_invr;
volatile u32 cur_util_invr;
volatile u64 cpu_release_clk;
volatile u64 idle_total_wall;
volatile u64 idle_start_clk;
u64 online_clk;
u64 offline_clk;
volatile u32 avg_stolen_time_wall;
volatile u32 cur_stolen_time_wall;
volatile u64 stolen_time_wall;
u16 cpu_id;
u16 max_capacity;
u8 big_core;
u8 turbo_core;
u8 llc_id;
u8 cpdom_id;
u8 cpdom_alt_id;
u8 is_online;
struct bpf_cpumask __kptr *tmp_a_mask;
struct bpf_cpumask __kptr *tmp_o_mask;
struct bpf_cpumask __kptr *tmp_l_mask;
struct bpf_cpumask __kptr *tmp_i_mask;
struct bpf_cpumask __kptr *tmp_t_mask;
struct bpf_cpumask __kptr *tmp_t2_mask;
struct bpf_cpumask __kptr *tmp_t3_mask;
} __attribute__((aligned(CACHELINE_SIZE)));
extern const volatile u64 nr_llcs;
const extern volatile u32 nr_cpu_ids;
extern volatile u64 nr_cpus_onln;
extern const volatile u16 cpu_capacity[LAVD_CPU_ID_MAX];
extern const volatile u8 cpu_big[LAVD_CPU_ID_MAX];
extern const volatile u8 cpu_turbo[LAVD_CPU_ID_MAX];
extern const volatile bool no_wake_sync;
extern const volatile bool no_slice_boost;
extern const volatile u8 verbose;
#define debugln(fmt, ...) \
({ \
if (verbose > 0) \
bpf_printk("[%s:%d] " fmt, __func__, __LINE__, \
##__VA_ARGS__); \
})
#define traceln(fmt, ...) \
({ \
if (verbose > 1) \
bpf_printk("[%s:%d] " fmt, __func__, __LINE__, \
##__VA_ARGS__); \
})
#ifndef min
#define min(X, Y) (((X) < (Y)) ? (X) : (Y))
#endif
#ifndef max
#define max(X, Y) (((X) < (Y)) ? (Y) : (X))
#endif
#ifndef clamp
#define clamp(val, lo, hi) min(max(val, lo), hi)
#endif
u64 calc_avg(u64 old_val, u64 new_val);
u64 calc_asym_avg(u64 old_val, u64 new_val);
static __always_inline int cpumask_next_set_bit(u64 *cpumask)
{
if (!*cpumask)
return -ENOENT;
int bit = ctzll(*cpumask);
*cpumask &= *cpumask - 1;
return bit;
}
extern struct sys_stat sys_stat;
s32 init_sys_stat(u64 now);
int update_sys_stat(void);
extern volatile u64 performance_mode_ns;
extern volatile u64 balanced_mode_ns;
extern volatile u64 powersave_mode_ns;
extern const volatile bool per_cpu_dsq;
extern const volatile u64 pinned_slice_ns;
extern volatile bool reinit_cpumask_for_performance;
extern volatile bool no_preemption;
extern volatile bool no_core_compaction;
extern volatile bool no_freq_scaling;
bool test_cpu_flag(struct cpu_ctx *cpuc, u64 flag);
void set_cpu_flag(struct cpu_ctx *cpuc, u64 flag);
void reset_cpu_flag(struct cpu_ctx *cpuc, u64 flag);
bool is_lock_holder(task_ctx *taskc);
bool is_lock_holder_running(struct cpu_ctx *cpuc);
bool have_scheduled(task_ctx *taskc);
bool have_pending_tasks(struct cpu_ctx *cpuc);
bool can_boost_slice(void);
bool is_lat_cri(task_ctx *taskc);
u16 get_nice_prio(struct task_struct *p);
u32 cpu_to_dsq(u32 cpu);
void set_task_flag(task_ctx *taskc, u64 flag);
void reset_task_flag(task_ctx *taskc, u64 flag);
bool test_task_flag(task_ctx *taskc, u64 flag);
bool test_task_flag_mask(task_ctx __arg_arena *taskc, u64 flag);
void reset_task_flag(task_ctx *taskc, u64 flag);
static __always_inline bool use_per_cpu_dsq(void)
{
return per_cpu_dsq || pinned_slice_ns;
}
static __always_inline bool is_per_cpu_dsq_migratable(void)
{
return per_cpu_dsq;
}
static __always_inline bool use_cpdom_dsq(void)
{
return !per_cpu_dsq;
}
bool queued_on_cpu(struct cpu_ctx *cpuc);
u64 get_target_dsq_id(struct task_struct *p, struct cpu_ctx *cpuc);
extern struct bpf_cpumask __kptr *turbo_cpumask;
extern struct bpf_cpumask __kptr *big_cpumask;
extern struct bpf_cpumask __kptr *active_cpumask;
extern struct bpf_cpumask __kptr *ovrflw_cpumask;
int plan_x_cpdom_migration(void);
void shrink_slice_at_tick(struct task_struct *p, struct cpu_ctx *cpuc, u64 now);
void reset_lock_futex_boost(task_ctx *taskc, struct cpu_ctx *cpuc);
u64 get_est_stopping_clk(task_ctx *taskc, u64 now);
void try_proc_introspec_cmd(struct task_struct *p, task_ctx *taskc);
void reset_cpu_preemption_info(struct cpu_ctx *cpuc, bool released);
int shrink_boosted_slice_remote(struct cpu_ctx *cpuc, u64 now);
void shrink_boosted_slice_at_tick(struct task_struct *p,
struct cpu_ctx *cpuc, u64 now);
void preempt_at_tick(struct task_struct *p, struct cpu_ctx *cpuc);
void try_find_and_kick_victim_cpu(struct task_struct *p,
task_ctx *taskc,
s32 preferred_cpu,
u64 dsq_id);
extern volatile bool is_monitored;
struct pick_ctx {
const struct task_struct *p;
task_ctx *taskc;
u64 wake_flags;
s32 prev_cpu;
s32 sync_waker_cpu;
struct bpf_cpumask *active;
struct bpf_cpumask *ovrflw;
struct cpu_ctx *cpuc_cur;
struct bpf_cpumask *a_mask;
struct bpf_cpumask *o_mask;
const struct cpumask *i_mask;
struct bpf_cpumask *ia_mask;
struct bpf_cpumask *iat_mask;
struct bpf_cpumask *io_mask;
struct bpf_cpumask *temp_mask;
bool a_empty:1;
bool o_empty:1;
bool is_task_big:1;
bool i_empty:1;
bool ia_empty:1;
bool iat_empty:1;
bool io_empty:1;
};
s32 find_cpu_in(const struct cpumask *src_mask, struct cpu_ctx *cpuc_cur);
s32 pick_idle_cpu(struct pick_ctx *ctx, bool *is_idle);
bool consume_task(u64 cpu_dsq_id, u64 cpdom_dsq_id);
extern u64 cur_logical_clk;
u64 calc_when_to_run(struct task_struct *p, task_ctx *taskc);
#endif