use super::{
MIN_DISTINCTIVE_SHARED_TERMS, MIN_INTENT_DIRECTIVE_OVERLAP, MIN_INTENT_DIRECTIVE_OVERLAP_RATIO,
rule_title,
};
pub(super) fn rule_directive_text(content: &str) -> String {
const BODY_DIRECTIVE_CHARS: usize = 160;
let title = rule_title(content, "");
let body = content
.split_once("\n\n")
.map_or(content, |(_, body)| body)
.trim();
let body_head: String = body.chars().take(BODY_DIRECTIVE_CHARS).collect();
format!("{title} {body_head}")
}
pub(super) fn is_generic_anchor(stem: &str) -> bool {
const GENERIC_ANCHORS: &[&str] = &[
"panic", "error", "test", "value", "code", "data", "type", "result", "check", "handle",
"handler", "return", "input", "output", "field", "case", "call", "message", "method",
"function", "file", "line", "block", "thread", "task", "async", "await", "default",
"option", "config", "request", "response", "buffer", "queue", "size", "count", "index",
"state", "event", "lock", "guard", "time", "timer", "runtime", "feature",
];
GENERIC_ANCHORS.contains(&stem)
}
pub(super) fn term_present_in_directive(term: &str, directive: &str) -> bool {
directive.contains(term) || directive.contains(light_stem(term).as_str())
}
pub(super) fn directive_intent_aligned(content: &str, query_terms: &[String]) -> bool {
if query_terms.is_empty() {
return false;
}
let directive = rule_directive_text(content).to_ascii_lowercase();
let mut shared_stems: Vec<String> = Vec::new();
let mut distinctive_shared = 0usize;
for term in query_terms {
if !term_present_in_directive(term, &directive) {
continue;
}
let stem = light_stem(term);
if shared_stems.iter().any(|s| s == &stem) {
continue;
}
if !is_generic_anchor(&stem) {
distinctive_shared += 1;
}
shared_stems.push(stem);
}
let overlap = shared_stems.len();
if distinctive_shared < MIN_DISTINCTIVE_SHARED_TERMS {
return false;
}
if overlap >= MIN_INTENT_DIRECTIVE_OVERLAP {
return true;
}
let query_ratio = overlap as f64 / query_terms.len() as f64;
query_ratio >= MIN_INTENT_DIRECTIVE_OVERLAP_RATIO
}
pub(super) fn light_stem(term: &str) -> String {
const MIN_STEM_LEN: usize = 4;
if let Some(base) = term.strip_suffix("ies")
&& base.len() >= MIN_STEM_LEN - 1
{
return format!("{base}y");
}
for suffix in ["ing", "es", "s"] {
if let Some(base) = term.strip_suffix(suffix)
&& base.len() >= MIN_STEM_LEN
{
return base.to_owned();
}
}
term.to_owned()
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum RuleKind {
Correction,
Convention,
Style,
Slogan,
Other,
}
pub fn infer_rule_kind(content: &str) -> RuleKind {
let lc = content.to_ascii_lowercase();
const CORRECTION_HINTS: &[&str] = &[
"don't ",
"do not ",
"never ",
"must not ",
"regression",
"fix:",
"bug:",
"broke ",
"incorrect",
"wrong",
];
if CORRECTION_HINTS.iter().any(|h| lc.contains(h)) {
return RuleKind::Correction;
}
const STYLE_HINTS: &[&str] = &[
"format",
"indent",
"spacing",
"naming convention",
"lint",
"prettier",
"rustfmt",
"biome",
"eslint",
];
if STYLE_HINTS.iter().any(|h| lc.contains(h)) {
return RuleKind::Style;
}
const CONVENTION_HINTS: &[&str] = &[
"we use ",
"prefer ",
"always use ",
"convention",
"project uses",
];
if CONVENTION_HINTS.iter().any(|h| lc.contains(h)) {
return RuleKind::Convention;
}
const SLOGAN_HINTS: &[&str] = &[
"trust ",
"review carefully",
"be careful",
"best practice",
"good practice",
];
if SLOGAN_HINTS.iter().any(|h| lc.contains(h)) {
return RuleKind::Slogan;
}
RuleKind::Other
}
const fn half_life_days(kind: RuleKind) -> f32 {
match kind {
RuleKind::Correction => 365.0,
RuleKind::Convention => 120.0,
RuleKind::Style | RuleKind::Slogan => 30.0,
RuleKind::Other => 90.0,
}
}
pub fn effective_confidence(raw_confidence: f32, kind: &RuleKind, age_days: f32) -> f32 {
let raw = raw_confidence.clamp(0.0, 1.0);
let age = age_days.max(0.0);
let half = half_life_days(*kind);
raw * 0.5_f32.powf(age / half)
}
#[cfg(test)]
mod tests {
use super::*;
use std::collections::HashMap;
#[test]
fn effective_confidence_fresh_correction_keeps_raw_value() {
let eff = effective_confidence(0.6, &RuleKind::Correction, 1.0);
assert!(
(eff - 0.6).abs() < 0.005,
"fresh correction should keep raw conf, got {eff}"
);
}
#[test]
fn effective_confidence_two_year_old_style_decays_to_near_zero() {
let eff = effective_confidence(0.9, &RuleKind::Style, 730.0);
assert!(
eff < 1e-6,
"two-year-old style rule should decay to ~0, got {eff}"
);
}
#[test]
fn strengthened_correction_outranks_fresh_slogan() {
let correction = effective_confidence(0.95, &RuleKind::Correction, 30.0);
let slogan = effective_confidence(0.5, &RuleKind::Slogan, 1.0);
assert!(
correction > slogan,
"strengthened correction ({correction}) should outrank fresh slogan ({slogan})"
);
}
#[test]
fn effective_confidence_at_age_zero_is_identity() {
for k in [
RuleKind::Correction,
RuleKind::Convention,
RuleKind::Style,
RuleKind::Slogan,
RuleKind::Other,
] {
let eff = effective_confidence(0.73, &k, 0.0);
assert!(
(eff - 0.73).abs() < 1e-6,
"age=0 must be identity for {k:?}, got {eff}"
);
}
}
#[test]
fn age_days_map_decays_old_style_below_fresh_correction_via_options() {
let style_old = effective_confidence(0.95, &RuleKind::Style, 730.0);
let correction_fresh = effective_confidence(0.6, &RuleKind::Correction, 1.0);
let style_weight = 0.1f64.mul_add(style_old.clamp(0.0, 1.0) as f64, 0.9);
let correction_weight = 0.1f64.mul_add(correction_fresh.clamp(0.0, 1.0) as f64, 0.9);
assert!(
correction_weight > style_weight,
"fresh correction weight ({correction_weight}) must beat old-style weight ({style_weight}) once age is plumbed"
);
}
#[test]
fn age_days_map_lookup_falls_back_to_zero_for_unknown_skill() {
let map: HashMap<String, f32> =
std::iter::once(("known-skill".to_owned(), 30.0_f32)).collect();
let lookup_known = map.get("known-skill").copied().unwrap_or(0.0);
let lookup_unknown = map.get("ghost-skill").copied().unwrap_or(0.0);
assert!((lookup_known - 30.0).abs() < 1e-6);
assert!(
lookup_unknown.abs() < 1e-6,
"unknown skill must fall back to 0"
);
}
#[test]
fn is_generic_anchor_flags_common_topic_words_only() {
for generic in ["panic", "error", "test", "input", "handler", "runtime"] {
assert!(
is_generic_anchor(generic),
"`{generic}` should be a generic anchor"
);
}
for distinctive in [
"hijack", "memchr", "invalid", "false", "session", "validate",
] {
assert!(
!is_generic_anchor(distinctive),
"`{distinctive}` is a distinctive subject token, not a generic anchor"
);
}
}
#[test]
fn rule_directive_text_distils_title_and_body_head() {
let content = "Rule ID: r1\nRule Name: Return false on invalid input\nType: x\nTags: \n\nReturn false rather than panicking when the caller passes bad input.";
let directive = rule_directive_text(content);
assert!(directive.contains("Return false on invalid input"));
assert!(directive.contains("panicking"));
assert!(!directive.contains("Rule ID"));
assert!(!directive.contains("Tags"));
}
#[test]
fn infer_rule_kind_buckets_common_phrasings() {
assert_eq!(
infer_rule_kind("Never use unwrap() in production code"),
RuleKind::Correction
);
assert_eq!(
infer_rule_kind("Run prettier before committing"),
RuleKind::Style
);
assert_eq!(
infer_rule_kind("We use Drizzle ORM for all queries"),
RuleKind::Convention
);
assert_eq!(
infer_rule_kind("Trust CI for workflow correctness"),
RuleKind::Slogan
);
assert_eq!(infer_rule_kind("Some random observation"), RuleKind::Other);
}
}