use std::collections::BTreeMap;
use serde_json::json;
use super::thresholds::StarsThresholds;
use crate::features::stars::{ecosystem_multipliers, StarsFeatures};
use crate::models::{Confidence, EvidenceItem, ModuleResult, Verdict};
const MODULE_NAME: &str = "stars";
#[must_use]
pub fn score(
features: &StarsFeatures,
thresholds: &StarsThresholds,
) -> (ModuleResult, Vec<EvidenceItem>) {
let mut sub_scores: BTreeMap<String, u8> = BTreeMap::new();
let mut evidence: Vec<EvidenceItem> = Vec::new();
let mut missing: Vec<String> = Vec::new();
if features.total_stars < thresholds.min_stars_to_sample {
missing.push("below_sampling_floor".into());
evidence.push(EvidenceItem {
module: MODULE_NAME.into(),
code: "below_sampling_floor".into(),
label: "Repository has too few stars to sample meaningfully".into(),
value: json!(features.total_stars),
threshold: Some(json!(thresholds.min_stars_to_sample)),
verdict: Verdict::Neutral,
rationale: format!(
"Repository has {} star(s); the Star Authenticity heuristic requires at least {} for a meaningful sample.",
features.total_stars, thresholds.min_stars_to_sample,
),
});
return (
ModuleResult {
module: MODULE_NAME.into(),
score: 0,
confidence: Confidence::Low,
sub_scores,
sample_size: Some(0),
missing_data: missing,
},
evidence,
);
}
let h1_score: Option<u8> = if let Some(share) = features.low_activity_share {
let leniency_applied = features.repo_age_days < thresholds.young_repo_age_days;
let leniency = if leniency_applied {
thresholds.young_repo_leniency_pp
} else {
0.0
};
let adjusted_share = (share - leniency).max(0.0);
let s = bucket_low_activity(adjusted_share, &thresholds.low_activity_bands);
sub_scores.insert("low_activity_share".into(), s);
evidence.push(EvidenceItem {
module: MODULE_NAME.into(),
code: "low_activity_stargazer_share".into(),
label: "Share of sampled stargazers matching the 9-signal low-activity profile".into(),
value: json!(crate::utils::time::round6(share)),
threshold: Some(json!({
"bands": thresholds.low_activity_bands,
"young_repo_leniency_pp": thresholds.young_repo_leniency_pp,
})),
verdict: stars_verdict(s),
rationale: format!(
"{:.1}% of {} sampled stargazers match the 9-signal low-activity profile{}.",
share * 100.0,
features.sample_size,
if leniency_applied {
format!(
" ({:.0}pp leniency applied for repo younger than {} days)",
leniency * 100.0,
thresholds.young_repo_age_days
)
} else {
String::new()
},
),
});
Some(s)
} else {
missing.push("no_stargazer_sample".into());
None
};
let (fork_mult, watcher_mult) = ecosystem_multipliers(features.primary_language.as_deref());
let fork_healthy = thresholds.fork_to_star_healthy * fork_mult;
let watcher_healthy = thresholds.watcher_to_star_healthy * watcher_mult;
let fork_score = ratio_score(features.fork_to_star_ratio, fork_healthy);
let watcher_score = ratio_score(features.watcher_to_star_ratio, watcher_healthy);
let h3_score: u8 = ((u32::from(fork_score) + u32::from(watcher_score)) / 2) as u8;
sub_scores.insert("fork_to_star_ratio".into(), fork_score);
sub_scores.insert("watcher_to_star_ratio".into(), watcher_score);
evidence.push(EvidenceItem {
module: MODULE_NAME.into(),
code: "fork_to_star_ratio".into(),
label: "Forks-to-stars ratio".into(),
value: json!(crate::utils::time::round6(features.fork_to_star_ratio)),
threshold: Some(json!({
"healthy": crate::utils::time::round6(fork_healthy),
"ecosystem_multiplier": fork_mult,
})),
verdict: stars_verdict(fork_score),
rationale: format!(
"fork/star ratio = {:.4}; ecosystem-adjusted healthy threshold ≥ {:.4} (multiplier {:.2}).",
features.fork_to_star_ratio, fork_healthy, fork_mult,
),
});
evidence.push(EvidenceItem {
module: MODULE_NAME.into(),
code: "watcher_to_star_ratio".into(),
label: "Watchers-to-stars ratio".into(),
value: json!(crate::utils::time::round6(features.watcher_to_star_ratio)),
threshold: Some(json!({
"healthy": crate::utils::time::round6(watcher_healthy),
"ecosystem_multiplier": watcher_mult,
})),
verdict: stars_verdict(watcher_score),
rationale: format!(
"watcher/star ratio = {:.4}; ecosystem-adjusted healthy threshold ≥ {:.4} (multiplier {:.2}).",
features.watcher_to_star_ratio, watcher_healthy, watcher_mult,
),
});
let h2_score: Option<u8> = if let Some(z) = features.lockstep_z_score {
let s = bucket_lockstep(z, &thresholds.lockstep_score_bands);
sub_scores.insert("lockstep_z_score".into(), s);
evidence.push(EvidenceItem {
module: MODULE_NAME.into(),
code: "lockstep_z_score".into(),
label: "Lockstep timing — max daily z-score over 28-day baseline".into(),
value: json!(crate::utils::time::round6(z)),
threshold: Some(json!({
"bands": thresholds.lockstep_score_bands.iter()
.map(|(c, s)| serde_json::json!([if c.is_finite() { json!(c) } else { json!("infinity") }, s]))
.collect::<Vec<_>>(),
})),
verdict: stars_verdict(s),
rationale: format!(
"Max daily z-score = {z:.2} over a rolling 28-day baseline lagged 7 days. ≥5 indicates a starring burst; ≥3 a notable spike."
),
});
Some(s)
} else {
evidence.push(EvidenceItem {
module: MODULE_NAME.into(),
code: "lockstep_window_too_short".into(),
label: "Lockstep timing window unavailable".into(),
value: json!(null),
threshold: None,
verdict: Verdict::Neutral,
rationale: "Sample spans fewer than 35 days (28 baseline + 7 lag) or carries no starred_at timestamps. Heuristic 2 contribution is dropped from the final formula.".into(),
});
None
};
let final_score: u8 = match (h1_score, h2_score) {
(Some(h1), Some(h2)) => {
let raw = 0.55 * f64::from(h1) + 0.30 * f64::from(h2) + 0.15 * f64::from(h3_score);
raw.round().clamp(0.0, 100.0) as u8
},
(Some(h1), None) => {
let raw = 0.55 * f64::from(h1) + 0.45 * f64::from(h3_score);
raw.round().clamp(0.0, 100.0) as u8
},
(None, Some(h2)) => {
let raw = 0.50 * f64::from(h2) + 0.50 * f64::from(h3_score);
raw.round().clamp(0.0, 100.0) as u8
},
(None, None) => h3_score, };
if let (Some(share), Some(z)) = (features.low_activity_share, features.lockstep_z_score) {
if share >= thresholds.combined_low_activity_threshold
&& z >= thresholds.combined_z_threshold
{
evidence.push(EvidenceItem {
module: MODULE_NAME.into(),
code: "combined_low_activity_and_lockstep".into(),
label: "Combined Heuristic 1 + Heuristic 2 signal".into(),
value: json!({
"low_activity_share": crate::utils::time::round6(share),
"lockstep_z_score": crate::utils::time::round6(z),
}),
threshold: Some(json!({
"low_activity_share": thresholds.combined_low_activity_threshold,
"lockstep_z_score": thresholds.combined_z_threshold,
})),
verdict: Verdict::Concerning,
rationale: format!(
"Both signals present: {:.1}% of sampled stargazers match the low-activity profile AND the daily star series shows a max z-score of {:.2}. Methodology recommends treating this combination as Concerning.",
share * 100.0, z,
),
});
}
}
evidence.push(EvidenceItem {
module: MODULE_NAME.into(),
code: "recency_biased_sample".into(),
label: "Stargazer sample is recency-biased".into(),
value: json!(features.sample_size),
threshold: None,
verdict: Verdict::Neutral,
rationale: "Day 3-4 sampling is recency-biased: the most-recent N stargazers are sampled directly. True uniform random sampling over the full stargazer history is deferred to Phase 2 deep mode.".into(),
});
if features.low_activity_share.is_some()
&& features.sample_size < thresholds.min_sample_for_high_confidence
{
evidence.push(EvidenceItem {
module: MODULE_NAME.into(),
code: "small_sample".into(),
label: "Stargazer sample below confidence threshold".into(),
value: json!(features.sample_size),
threshold: Some(json!({
"high": thresholds.min_sample_for_high_confidence,
"medium": thresholds.min_sample_for_medium_confidence,
})),
verdict: Verdict::Neutral,
rationale: format!(
"Sample size {} is below {} required for High confidence on Heuristic 1.",
features.sample_size, thresholds.min_sample_for_high_confidence,
),
});
}
if features.archived {
missing.push("archived".into());
evidence.push(EvidenceItem {
module: MODULE_NAME.into(),
code: "archived".into(),
label: "Repository is archived".into(),
value: json!(true),
threshold: None,
verdict: Verdict::Neutral,
rationale: "Owner has archived this repository; star-authenticity signals are frozen."
.into(),
});
}
let confidence = compute_confidence(features, thresholds);
(
ModuleResult {
module: MODULE_NAME.into(),
score: final_score,
confidence,
sub_scores,
sample_size: Some(features.sample_size),
missing_data: missing,
},
evidence,
)
}
fn stars_verdict(score: u8) -> Verdict {
match score {
80..=100 => Verdict::Positive,
50..=79 => Verdict::Neutral,
_ => Verdict::Concerning,
}
}
fn bucket_low_activity(share: f64, bands: &[(f64, u8); 6]) -> u8 {
for (ceiling, sub_score) in bands {
if share <= *ceiling {
return *sub_score;
}
}
0
}
fn bucket_lockstep(z: f64, bands: &[(f64, u8); 5]) -> u8 {
for (ceiling, sub_score) in bands {
if z <= *ceiling {
return *sub_score;
}
}
bands.last().map_or(0, |(_, s)| *s)
}
fn ratio_score(actual: f64, healthy: f64) -> u8 {
if healthy <= 0.0 {
return 0;
}
if actual >= healthy {
return 100;
}
if actual <= 0.0 {
return 0;
}
let frac = (actual / healthy).clamp(0.0, 1.0);
(frac * 100.0).round().clamp(0.0, 100.0) as u8
}
fn compute_confidence(features: &StarsFeatures, thresholds: &StarsThresholds) -> Confidence {
if features.archived {
return Confidence::Low;
}
if features.total_stars < thresholds.min_stars_to_sample {
return Confidence::Low;
}
if features.sample_size < thresholds.min_sample_for_medium_confidence {
return Confidence::Low;
}
if features.sample_size < thresholds.min_sample_for_high_confidence {
return Confidence::Medium;
}
if features.repo_age_days < thresholds.young_repo_age_days {
return Confidence::Medium;
}
Confidence::High
}
#[cfg(test)]
mod tests {
use super::*;
use crate::features::stars::StarsFeatures;
fn baseline() -> StarsFeatures {
StarsFeatures {
total_stars: 1_000,
forks_count: 100,
watchers_count: 10,
fork_to_star_ratio: 0.10,
watcher_to_star_ratio: 0.01,
low_activity_share: Some(0.04),
lockstep_z_score: Some(2.0), sample_size: 200,
primary_language: Some("Rust".into()),
repo_age_days: 365 * 3,
archived: false,
}
}
#[test]
fn organic_profile_scores_high() {
let f = baseline();
let (r, ev) = score(&f, &StarsThresholds::v1());
assert!(r.score >= 80, "expected ≥80, got {}", r.score);
assert_eq!(r.confidence, Confidence::High);
assert!(ev.iter().any(|e| e.code == "low_activity_stargazer_share"));
assert!(ev.iter().any(|e| e.code == "fork_to_star_ratio"));
assert!(ev.iter().any(|e| e.code == "watcher_to_star_ratio"));
assert!(ev.iter().any(|e| e.code == "lockstep_z_score"));
assert!(ev.iter().any(|e| e.code == "recency_biased_sample"));
assert!(
!ev.iter().any(|e| e.code == "lockstep_deferred_to_day_4"),
"Day-3 deferred caveat should be gone now that H2 ships"
);
}
#[test]
fn suspicious_profile_lowers_score_to_concerning_not_highrisk() {
let mut f = baseline();
f.low_activity_share = Some(0.38);
f.lockstep_z_score = Some(8.0); f.fork_to_star_ratio = 0.005;
f.watcher_to_star_ratio = 0.0005;
let (r, ev) = score(&f, &StarsThresholds::v1());
assert!(r.score <= 35, "expected ≤35, got {}", r.score);
let h1 = ev
.iter()
.find(|e| e.code == "low_activity_stargazer_share")
.unwrap();
assert!(matches!(h1.verdict, Verdict::Concerning));
let combined = ev
.iter()
.find(|e| e.code == "combined_low_activity_and_lockstep")
.expect("combined evidence missing");
assert!(matches!(combined.verdict, Verdict::Concerning));
}
#[test]
fn lockstep_window_too_short_falls_back_to_h3_redistribution() {
let mut f = baseline();
f.lockstep_z_score = None;
let (r, ev) = score(&f, &StarsThresholds::v1());
assert!(ev.iter().any(|e| e.code == "lockstep_window_too_short"));
assert!(!r.sub_scores.contains_key("lockstep_z_score"));
}
#[test]
fn lockstep_smooth_z_score_full_credit() {
let mut f = baseline();
f.lockstep_z_score = Some(2.5);
let (r, _) = score(&f, &StarsThresholds::v1());
assert_eq!(r.sub_scores.get("lockstep_z_score").copied(), Some(100));
}
#[test]
fn lockstep_bursty_z_score_drops() {
let mut f = baseline();
f.lockstep_z_score = Some(10.0);
let (r, _) = score(&f, &StarsThresholds::v1());
assert_eq!(r.sub_scores.get("lockstep_z_score").copied(), Some(30));
}
#[test]
fn day_4_formula_uses_methodology_weights() {
let f = baseline();
let (r, _) = score(&f, &StarsThresholds::v1());
assert_eq!(r.score, 100);
}
#[test]
fn combined_evidence_only_fires_when_both_thresholds_met() {
let mut f = baseline();
f.low_activity_share = Some(0.30);
f.lockstep_z_score = Some(2.0); let (_, ev) = score(&f, &StarsThresholds::v1());
assert!(
!ev.iter()
.any(|e| e.code == "combined_low_activity_and_lockstep"),
"combined evidence should NOT fire when only H1 condition met"
);
}
#[test]
fn recency_biased_evidence_emitted_on_every_non_below_floor_run() {
let f = baseline();
let (_, ev) = score(&f, &StarsThresholds::v1());
let item = ev
.iter()
.find(|e| e.code == "recency_biased_sample")
.expect("recency_biased_sample evidence required");
assert!(matches!(item.verdict, Verdict::Neutral));
}
#[test]
fn below_floor_short_circuits_to_zero_with_low_confidence() {
let mut f = baseline();
f.total_stars = 25;
f.sample_size = 0;
f.low_activity_share = None;
let (r, ev) = score(&f, &StarsThresholds::v1());
assert_eq!(r.score, 0);
assert_eq!(r.confidence, Confidence::Low);
assert!(r.missing_data.iter().any(|m| m == "below_sampling_floor"));
assert_eq!(ev.len(), 1);
assert_eq!(ev[0].code, "below_sampling_floor");
}
#[test]
fn young_repo_gets_5pp_leniency() {
let mut f = baseline();
f.low_activity_share = Some(0.22);
f.repo_age_days = 60;
let (r, ev) = score(&f, &StarsThresholds::v1());
let h1 = r.sub_scores.get("low_activity_share").copied().unwrap();
assert_eq!(h1, 65);
assert!(ev
.iter()
.find(|e| e.code == "low_activity_stargazer_share")
.unwrap()
.rationale
.contains("leniency"));
}
#[test]
fn small_sample_demotes_confidence_to_medium() {
let mut f = baseline();
f.sample_size = 60;
let (r, ev) = score(&f, &StarsThresholds::v1());
assert_eq!(r.confidence, Confidence::Medium);
assert!(ev.iter().any(|e| e.code == "small_sample"));
}
#[test]
fn very_small_sample_demotes_to_low_confidence() {
let mut f = baseline();
f.sample_size = 20;
let (r, _) = score(&f, &StarsThresholds::v1());
assert_eq!(r.confidence, Confidence::Low);
}
#[test]
fn ecosystem_multiplier_shifts_ratio_threshold_typescript() {
let mut f = baseline();
f.primary_language = Some("TypeScript".into());
f.fork_to_star_ratio = 0.03; let (r, _) = score(&f, &StarsThresholds::v1());
let s = r.sub_scores.get("fork_to_star_ratio").copied().unwrap();
assert_eq!(s, 100);
}
#[test]
fn quick_mode_no_sample_falls_back_to_h3_only() {
let mut f = baseline();
f.sample_size = 0;
f.low_activity_share = None;
let (r, ev) = score(&f, &StarsThresholds::v1());
assert!(!r.sub_scores.contains_key("low_activity_share"));
assert!(r.sub_scores.contains_key("fork_to_star_ratio"));
assert!(r.missing_data.iter().any(|m| m == "no_stargazer_sample"));
assert!(!ev.is_empty());
}
#[test]
fn archived_demotes_to_low_confidence() {
let mut f = baseline();
f.archived = true;
let (r, _) = score(&f, &StarsThresholds::v1());
assert_eq!(r.confidence, Confidence::Low);
assert!(r.missing_data.iter().any(|m| m == "archived"));
}
#[test]
fn evidence_codes_are_unique() {
let f = baseline();
let (_, ev) = score(&f, &StarsThresholds::v1());
let mut codes: Vec<&str> = ev.iter().map(|e| e.code.as_str()).collect();
codes.sort_unstable();
codes.dedup();
assert_eq!(codes.len(), ev.len());
}
#[test]
fn rationale_uses_only_probabilistic_phrasing_no_fake_fraud_bot() {
let mut f = baseline();
f.low_activity_share = Some(0.45);
let (_, ev) = score(&f, &StarsThresholds::v1());
for item in &ev {
let lower = item.rationale.to_lowercase();
assert!(
!lower.contains("fake"),
"rationale must not contain 'fake': {}",
item.rationale
);
assert!(
!lower.contains("fraud"),
"rationale must not contain 'fraud': {}",
item.rationale
);
for word in lower.split(|c: char| !c.is_ascii_alphanumeric()) {
assert_ne!(
word, "bot",
"rationale must not contain word 'bot': {}",
item.rationale
);
}
}
}
#[test]
fn module_result_carries_module_name() {
let f = baseline();
let (r, _) = score(&f, &StarsThresholds::v1());
assert_eq!(r.module, "stars");
}
}