#[cfg_attr(coverage_nightly, coverage(off))] #[allow(clippy::cast_possible_truncation)]
pub(super) fn populate_cached_annotations(
functions: &mut [FunctionEntry],
file_index: &HashMap<String, Vec<usize>>,
project_root: &std::path::Path,
) {
eprintln!("Computing annotations for {} functions...", functions.len());
let file_commits = get_file_commit_counts(project_root, file_index.keys());
let max_commits = file_commits.values().copied().max().unwrap_or(1) as f32;
eprintln!(
" Git churn: {} files with commits (max={})",
file_commits.len(),
max_commits as u32
);
let clone_groups = detect_code_clones(functions);
eprintln!(" Clones: {} functions with duplicates", clone_groups.len());
let file_diversity = compute_file_pattern_diversity(functions, file_index);
eprintln!(" Diversity: {} files analyzed", file_diversity.len());
let fault_patterns = detect_fault_patterns(functions);
eprintln!(" Faults: {} functions with patterns", fault_patterns.len());
let mut churn_applied = 0;
let mut clone_applied = 0;
let mut diversity_applied = 0;
let mut fault_applied = 0;
for (i, func) in functions.iter_mut().enumerate() {
if let Some(&commits) = file_commits.get(&func.file_path) {
func.commit_count = commits;
func.churn_score = commits as f32 / max_commits;
churn_applied += 1;
}
if let Some(&count) = clone_groups.get(&i) {
func.clone_count = count;
clone_applied += 1;
}
if let Some(&diversity) = file_diversity.get(&func.file_path) {
func.pattern_diversity = diversity;
diversity_applied += 1;
}
if let Some(faults) = fault_patterns.get(&i) {
func.fault_annotations = faults.clone();
fault_applied += 1;
}
}
eprintln!(
" Applied: churn={}, clones={}, diversity={}, faults={}",
churn_applied, clone_applied, diversity_applied, fault_applied
);
}
fn match_git_path<'a>(line: &'a str, files: &std::collections::HashSet<&str>) -> Option<&'a str> {
let trimmed = line.trim();
if trimmed.is_empty() {
return None;
}
if files.contains(trimmed) {
return Some(trimmed);
}
let normalized = trimmed.strip_prefix("server/").unwrap_or(trimmed);
if files.contains(normalized) {
return Some(normalized);
}
None
}
#[cfg_attr(coverage_nightly, coverage(off))] pub(super) fn get_file_commit_counts<'a>(
project_root: &std::path::Path,
files: impl Iterator<Item = &'a String>,
) -> HashMap<String, u32> {
let files: std::collections::HashSet<&str> = files.map(String::as_str).collect();
if files.is_empty() {
return HashMap::new();
}
let output = std::process::Command::new("git")
.args(["log", "--format=", "--name-only", "--since=1 year ago"])
.current_dir(project_root)
.output();
let Ok(output) = output else {
return HashMap::new();
};
if !output.status.success() {
return HashMap::new();
}
let mut result: HashMap<String, u32> = HashMap::with_capacity(files.len());
let stdout = String::from_utf8_lossy(&output.stdout);
for line in stdout.lines() {
if let Some(path) = match_git_path(line, &files) {
*result.entry(path.to_string()).or_insert(0) += 1;
}
}
result
}
#[allow(clippy::cast_possible_truncation)]
pub(super) fn detect_code_clones(functions: &[FunctionEntry]) -> HashMap<usize, u32> {
let mut result = HashMap::new();
let mut hash_to_indices: HashMap<u64, Vec<usize>> = HashMap::with_capacity(functions.len());
for (i, func) in functions.iter().enumerate() {
let hash = normalize_source_hash(&func.source);
hash_to_indices.entry(hash).or_default().push(i);
}
for indices in hash_to_indices.values() {
if indices.len() > 1 {
let count = indices.len() as u32;
for &idx in indices {
result.insert(idx, count);
}
}
}
result
}
pub(super) fn normalize_source_hash(source: &str) -> u64 {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
for c in source.chars() {
if !c.is_whitespace() {
for lc in c.to_lowercase() {
lc.hash(&mut hasher);
}
}
}
hasher.finish()
}
#[allow(clippy::cast_possible_truncation)]
pub(super) fn compute_file_pattern_diversity(
functions: &[FunctionEntry],
file_index: &HashMap<String, Vec<usize>>,
) -> HashMap<String, f32> {
let mut result = HashMap::new();
for (file_path, indices) in file_index {
if indices.is_empty() {
continue;
}
let mut patterns = std::collections::HashSet::new();
for &idx in indices {
if let Some(func) = functions.get(idx) {
let pattern = format!(
"{}:{}:{}",
extract_return_type(&func.signature),
count_params(&func.signature),
func.quality.complexity / 5 );
patterns.insert(pattern);
}
}
let diversity = patterns.len() as f32 / indices.len() as f32;
result.insert(file_path.clone(), diversity);
}
result
}
pub(super) fn extract_return_type(sig: &str) -> &str {
if sig.contains("->") {
sig.split("->").last().unwrap_or("void").trim()
} else {
"void"
}
}
pub(super) fn count_params(sig: &str) -> usize {
if let Some(start) = sig.find('(') {
if let Some(end) = sig[start..].find(')') {
let params = &sig[start + 1..start + end];
if params.trim().is_empty() {
return 0;
}
return params.split(',').count();
}
}
0
}
pub(super) fn detect_fault_patterns(functions: &[FunctionEntry]) -> HashMap<usize, Vec<String>> {
let mut result = HashMap::new();
let patterns = [
("unwrap()", "UNWRAP"),
("expect(", "EXPECT"),
("panic!", "PANIC"),
("unsafe {", "UNSAFE"),
("unsafe{", "UNSAFE"),
(".clone()", "CLONE"),
("// TODO", "TODO"),
("// FIXME", "FIXME"),
("// HACK", "HACK"),
("// XXX", "XXX"),
("unimplemented!", "UNIMPL"),
("todo!", "TODO_MACRO"),
("unreachable!", "UNREACHABLE"),
("asm volatile", "INLINE_PTX"),
("asm(\"", "INLINE_PTX"),
("__syncthreads()", "CUDA_SYNC"),
("__shared__", "CUDA_SHMEM"),
("extern \"C\"", "EXTERN_C"),
("__global__", "CUDA_KERNEL"),
("__device__", "CUDA_DEVICE"),
];
for (i, func) in functions.iter().enumerate() {
let mut faults = Vec::new();
let src = &func.source;
for (pattern, label) in &patterns {
if src.contains(pattern) {
faults.push(label.to_string());
}
}
extract_ptx_instruction_tags(src, &mut faults);
classify_cpp_macros(src, &mut faults);
detect_inline_ptx_defects(src, &mut faults);
if !faults.is_empty() {
faults.sort();
faults.dedup();
result.insert(i, faults);
}
}
result
}
fn extract_ptx_instruction_tags(source: &str, faults: &mut Vec<String>) {
const PTX_OPCODES: &[&str] = &[
"mma.sync",
"ldmatrix",
"movmatrix",
"cp.async",
"bar.sync",
"bar.arrive",
"membar",
"ld.shared",
"st.shared",
"ld.global",
"st.global",
"atom.shared",
"red.shared",
"shfl.sync",
"vote.sync",
"match.sync",
];
for opcode in PTX_OPCODES {
if source.contains(opcode) {
faults.push(format!("PTX:{opcode}"));
}
}
}
pub(super) fn classify_cpp_macros(source: &str, faults: &mut Vec<String>) {
const ASSERT_MACROS: &[&str] = &[
"GGML_ASSERT", "GGML_ABORT", "TORCH_CHECK", "TORCH_INTERNAL_ASSERT",
"AT_ASSERT", "CUDA_CHECK", "CHECK_CUDA", "CUBLAS_CHECK",
];
const DISPATCH_MACROS: &[&str] = &[
"AT_DISPATCH_ALL_TYPES", "AT_DISPATCH_FLOATING_TYPES",
"AT_DISPATCH_INTEGRAL_TYPES", "AT_DISPATCH_COMPLEX_TYPES",
"GGML_DISPATCH_BOOL", "CUDA_DISPATCH",
];
const LOG_MACROS: &[&str] = &[
"GGML_LOG_INFO", "GGML_LOG_WARN", "GGML_LOG_ERROR",
"TORCH_WARN", "TORCH_LOG",
];
let has_assert = ASSERT_MACROS.iter().any(|m| source.contains(m));
let has_dispatch = DISPATCH_MACROS.iter().any(|m| source.contains(m));
let has_log = LOG_MACROS.iter().any(|m| source.contains(m));
if has_assert {
faults.push("MACRO:ASSERT".to_string());
}
if has_dispatch {
faults.push("MACRO:DISPATCH".to_string());
}
if has_log {
faults.push("MACRO:LOG".to_string());
}
}
pub(super) fn detect_inline_ptx_defects(source: &str, faults: &mut Vec<String>) {
if !source.contains("asm(") && !source.contains("asm volatile") {
return;
}
let has_shared_store = source.contains("st.shared") || source.contains("__shared__");
let has_shared_load = source.contains("ld.shared");
let has_barrier = source.contains("bar.sync") || source.contains("__syncthreads");
if has_shared_store && has_shared_load && !has_barrier {
faults.push("PTX_MISSING_BARRIER".to_string());
}
if has_barrier {
detect_ptx_barrier_divergence(source, faults);
detect_ptx_early_exit(source, faults);
}
detect_ptx_register_issues(source, faults);
detect_ptx_shared_u64(source, faults);
detect_ptx_local_spills(source, faults);
detect_ptx_pred_overflow(source, faults);
detect_ptx_empty_loop(source, faults);
detect_ptx_redundant_mov(source, faults);
}
fn detect_ptx_barrier_divergence(source: &str, faults: &mut Vec<String>) {
let mut in_branch = false;
for line in source.lines() {
let t = line.trim();
if t.starts_with("if ") || t.starts_with("if(") || t.contains("@!%p") || t.contains("@%p") {
in_branch = true;
}
if in_branch && (t.contains("bar.sync") || t.contains("__syncthreads")) {
faults.push("PTX_BARRIER_DIV".to_string());
return;
}
if t == "}" || t.starts_with("else") {
in_branch = false;
}
}
}
fn detect_ptx_early_exit(source: &str, faults: &mut Vec<String>) {
let mut seen_return = false;
for line in source.lines() {
let t = line.trim();
if t.starts_with("return") && t.contains(';') {
seen_return = true;
}
if seen_return && (t.contains("bar.sync") || t.contains("__syncthreads")) {
faults.push("PTX_EARLY_EXIT".to_string());
return;
}
}
}
fn detect_ptx_register_issues(source: &str, faults: &mut Vec<String>) {
let reg_count = source.matches("\"=r\"").count() + source.matches("\"+r\"").count();
if reg_count > 8 {
faults.push("PTX_HIGH_REGS".to_string());
}
}
fn detect_ptx_shared_u64(source: &str, faults: &mut Vec<String>) {
let has_shared = source.contains("st.shared") || source.contains("ld.shared");
if has_shared && (source.contains("cvta.shared") || source.contains("cvta.to.shared")) {
faults.push("PTX_SHARED_U64".to_string());
}
}
fn detect_ptx_local_spills(source: &str, faults: &mut Vec<String>) {
if source.contains(".local") && (source.contains("st.local") || source.contains("ld.local")) {
faults.push("PTX_REG_SPILL".to_string());
}
}
fn detect_ptx_pred_overflow(source: &str, faults: &mut Vec<String>) {
let pred_count = (0..16).filter(|i| source.contains(&format!("%p{i}"))).count();
if pred_count > 8 {
faults.push("PTX_PRED_OVERFLOW".to_string());
}
}
fn detect_ptx_empty_loop(source: &str, faults: &mut Vec<String>) {
if !source.contains("__global__") && !source.contains("__device__") {
return;
}
let mut lines = source.lines().peekable();
while let Some(line) = lines.next() {
let t = line.trim();
if t.starts_with("for") || t.starts_with("while") {
if let Some(&next) = lines.peek() {
let next = next.trim();
if next == "{}" || next == "{ }" || next == ";" {
faults.push("PTX_EMPTY_LOOP".to_string());
return;
}
}
}
}
}
fn detect_ptx_redundant_mov(source: &str, faults: &mut Vec<String>) {
for line in source.lines() {
let t = line.trim();
if !t.contains("mov.") { continue; }
let Some(args) = t.split_whitespace().nth(1) else { continue };
let parts: Vec<&str> = args.split(',').map(str::trim).collect();
if parts.len() == 2 {
let dest = parts[0].trim_end_matches(';');
let src = parts[1].trim_end_matches(';');
if dest == src && dest.starts_with('%') {
faults.push("PTX_REDUNDANT_MOV".to_string());
return;
}
}
}
}
pub(super) fn link_declarations_to_definitions(functions: &mut [FunctionEntry]) {
let mut def_index: HashMap<String, Vec<(usize, String, usize)>> = HashMap::new();
for (i, func) in functions.iter().enumerate() {
if !func.function_name.ends_with(" [decl]") {
def_index
.entry(func.function_name.clone())
.or_default()
.push((i, func.file_path.clone(), func.start_line));
}
}
let mut linked = 0;
for func in functions.iter_mut() {
if func.function_name.ends_with(" [decl]") {
let bare_name = func.function_name.trim_end_matches(" [decl]");
func.definition_type = DefinitionType::Declaration;
if let Some(defs) = def_index.get(bare_name) {
let best = defs
.iter()
.find(|(_, path, _)| path != &func.file_path)
.or_else(|| defs.first());
if let Some((_, path, line)) = best {
func.linked_definition = Some(format!("{path}:{line}"));
linked += 1;
}
}
}
}
if linked > 0 {
eprintln!(" Decl-def links: {linked} declarations linked to definitions");
}
}
#[allow(clippy::cast_possible_truncation)]
pub(crate) fn compute_name_frequency(
name_index: &HashMap<String, Vec<usize>>,
total: usize,
) -> HashMap<String, f32> {
if total == 0 {
return HashMap::new();
}
let mut result = HashMap::with_capacity(name_index.len());
for (name, indices) in name_index {
result.insert(name.clone(), indices.len() as f32 / total as f32);
}
result
}