use std::collections::HashMap;
use crate::diagnostics::SelectionReport;
use crate::error::CupelError;
use crate::model::{ContextBudget, ContextItem, ContextKind};
use crate::pipeline::{Pipeline, Policy, run_policy};
use crate::slicer::{QuotaConstraintMode, QuotaPolicy};
pub fn budget_utilization(report: &SelectionReport, budget: &ContextBudget) -> f64 {
report
.included
.iter()
.map(|i| i.item.tokens() as f64)
.sum::<f64>()
/ budget.max_tokens() as f64
}
pub fn kind_diversity(report: &SelectionReport) -> usize {
report
.included
.iter()
.map(|i| i.item.kind())
.collect::<std::collections::HashSet<_>>()
.len()
}
pub fn timestamp_coverage(report: &SelectionReport) -> f64 {
if report.included.is_empty() {
return 0.0;
}
report
.included
.iter()
.filter(|i| i.item.timestamp().is_some())
.count() as f64
/ report.included.len() as f64
}
#[derive(Debug, Clone, PartialEq)]
pub struct KindQuotaUtilization {
pub kind: ContextKind,
pub mode: QuotaConstraintMode,
pub require: f64,
pub cap: f64,
pub actual: f64,
pub utilization: f64,
}
pub fn quota_utilization(
report: &SelectionReport,
policy: &dyn QuotaPolicy,
budget: &ContextBudget,
) -> Vec<KindQuotaUtilization> {
let constraints = policy.quota_constraints();
let mut kind_stats: HashMap<&ContextKind, (i64, usize)> = HashMap::new();
for inc in &report.included {
let entry = kind_stats.entry(inc.item.kind()).or_insert((0, 0));
entry.0 += inc.item.tokens();
entry.1 += 1;
}
let target_tokens = budget.target_tokens() as f64;
let mut results: Vec<KindQuotaUtilization> = constraints
.iter()
.map(|c| {
let (token_sum, count) = kind_stats.get(&c.kind).copied().unwrap_or((0, 0));
let actual = match c.mode {
QuotaConstraintMode::Percentage => {
if target_tokens == 0.0 {
0.0
} else {
token_sum as f64 / target_tokens * 100.0
}
}
QuotaConstraintMode::Count => count as f64,
};
let utilization = if c.cap == 0.0 {
0.0
} else {
(actual / c.cap).clamp(0.0, 1.0)
};
KindQuotaUtilization {
kind: c.kind.clone(),
mode: c.mode,
require: c.require,
cap: c.cap,
actual,
utilization,
}
})
.collect();
results.sort_by(|a, b| a.kind.as_str().cmp(b.kind.as_str()));
results
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ItemStatus {
Included,
Excluded,
}
#[derive(Debug, Clone, PartialEq)]
pub struct PolicySensitivityDiffEntry {
pub content: String,
pub statuses: Vec<(String, ItemStatus)>,
}
#[derive(Debug, Clone, PartialEq)]
pub struct PolicySensitivityReport {
pub variants: Vec<(String, SelectionReport)>,
pub diffs: Vec<PolicySensitivityDiffEntry>,
}
pub fn policy_sensitivity_from_pipelines(
items: &[ContextItem],
budget: &ContextBudget,
variants: &[(impl AsRef<str>, &Pipeline)],
) -> Result<PolicySensitivityReport, CupelError> {
let mut results: Vec<(String, SelectionReport)> = Vec::with_capacity(variants.len());
for (label, pipeline) in variants {
let report = pipeline.dry_run(items, budget)?;
results.push((label.as_ref().to_string(), report));
}
let mut status_map: HashMap<String, Vec<(String, ItemStatus)>> = HashMap::new();
for (label, report) in &results {
for inc in &report.included {
status_map
.entry(inc.item.content().to_string())
.or_default()
.push((label.clone(), ItemStatus::Included));
}
for exc in &report.excluded {
status_map
.entry(exc.item.content().to_string())
.or_default()
.push((label.clone(), ItemStatus::Excluded));
}
}
let diffs: Vec<PolicySensitivityDiffEntry> = status_map
.into_iter()
.filter(|(_, statuses)| {
let first = statuses.first().map(|(_, s)| *s);
statuses.iter().any(|(_, s)| Some(*s) != first)
})
.map(|(content, statuses)| PolicySensitivityDiffEntry { content, statuses })
.collect();
Ok(PolicySensitivityReport {
variants: results,
diffs,
})
}
pub fn policy_sensitivity(
items: &[ContextItem],
budget: &ContextBudget,
variants: &[(impl AsRef<str>, &Policy)],
) -> Result<PolicySensitivityReport, CupelError> {
if variants.len() < 2 {
return Err(CupelError::PipelineConfig(
"policy_sensitivity requires at least 2 variants".to_string(),
));
}
let mut results: Vec<(String, SelectionReport)> = Vec::with_capacity(variants.len());
for (label, policy) in variants {
let report = run_policy(items, budget, policy)?;
results.push((label.as_ref().to_string(), report));
}
let mut status_map: HashMap<String, Vec<(String, ItemStatus)>> = HashMap::new();
for (label, report) in &results {
for inc in &report.included {
status_map
.entry(inc.item.content().to_string())
.or_default()
.push((label.clone(), ItemStatus::Included));
}
for exc in &report.excluded {
status_map
.entry(exc.item.content().to_string())
.or_default()
.push((label.clone(), ItemStatus::Excluded));
}
}
let diffs: Vec<PolicySensitivityDiffEntry> = status_map
.into_iter()
.filter(|(_, statuses)| {
let first = statuses.first().map(|(_, s)| *s);
statuses.iter().any(|(_, s)| Some(*s) != first)
})
.map(|(content, statuses)| PolicySensitivityDiffEntry { content, statuses })
.collect();
Ok(PolicySensitivityReport {
variants: results,
diffs,
})
}
#[cfg(test)]
mod tests {
use super::*;
use crate::diagnostics::{IncludedItem, InclusionReason, SelectionReport};
use crate::model::{ContextBudget, ContextItemBuilder, ContextKind};
use chrono::Utc;
fn empty_report() -> SelectionReport {
SelectionReport {
events: vec![],
included: vec![],
excluded: vec![],
total_candidates: 0,
total_tokens_considered: 0,
count_requirement_shortfalls: vec![],
}
}
fn make_included(tokens: i64, kind: &str, with_timestamp: bool) -> IncludedItem {
let mut builder =
ContextItemBuilder::new("content", tokens).kind(ContextKind::new(kind).unwrap());
if with_timestamp {
builder = builder.timestamp(Utc::now());
}
IncludedItem {
item: builder.build().unwrap(),
score: 1.0,
reason: InclusionReason::Scored,
}
}
fn budget(max: i64) -> ContextBudget {
ContextBudget::new(max, max, 0, Default::default(), 0.0).unwrap()
}
#[test]
fn budget_utilization_empty_is_zero() {
let report = empty_report();
let b = budget(1000);
assert_eq!(budget_utilization(&report, &b), 0.0);
}
#[test]
fn budget_utilization_full_budget() {
let item = make_included(500, "Message", false);
let report = SelectionReport {
total_candidates: 1,
total_tokens_considered: 500,
included: vec![item],
excluded: vec![],
events: vec![],
count_requirement_shortfalls: vec![],
};
let b = budget(1000);
let util = budget_utilization(&report, &b);
assert!(
(util - 0.5).abs() < f64::EPSILON,
"expected 0.5, got {util}"
);
}
#[test]
fn kind_diversity_empty_is_zero() {
let report = empty_report();
assert_eq!(kind_diversity(&report), 0);
}
#[test]
fn kind_diversity_counts_distinct_kinds() {
let report = SelectionReport {
included: vec![
make_included(10, "SystemPrompt", false),
make_included(20, "Message", false),
make_included(30, "Message", false), ],
excluded: vec![],
events: vec![],
total_candidates: 3,
total_tokens_considered: 60,
count_requirement_shortfalls: vec![],
};
assert_eq!(kind_diversity(&report), 2);
}
#[test]
fn timestamp_coverage_empty_is_zero() {
let report = empty_report();
assert_eq!(timestamp_coverage(&report), 0.0);
}
#[test]
fn timestamp_coverage_all_have_timestamps() {
let report = SelectionReport {
included: vec![
make_included(10, "Message", true),
make_included(20, "Message", true),
],
excluded: vec![],
events: vec![],
total_candidates: 2,
total_tokens_considered: 30,
count_requirement_shortfalls: vec![],
};
assert_eq!(timestamp_coverage(&report), 1.0);
}
#[test]
fn timestamp_coverage_partial() {
let report = SelectionReport {
included: vec![
make_included(10, "Message", true),
make_included(20, "Message", false),
],
excluded: vec![],
events: vec![],
total_candidates: 2,
total_tokens_considered: 30,
count_requirement_shortfalls: vec![],
};
let cov = timestamp_coverage(&report);
assert!((cov - 0.5).abs() < f64::EPSILON, "expected 0.5, got {cov}");
}
}