Skip to main content

weave_content/
staleness.rs

1//! Structural staleness analysis for OSINT case files (ADR-021 Layer 1).
2//!
3//! Analyzes parsed case data offline (no network) to detect cases whose
4//! `status` field may no longer reflect reality based on heuristics like
5//! event age and status-event mismatches.
6
7use crate::entity::{Entity, FieldValue, Label};
8use crate::parser::ParsedCase;
9
10/// Severity of a staleness finding.
11#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
12pub enum Severity {
13    Error,
14    Warning,
15    Info,
16}
17
18impl std::fmt::Display for Severity {
19    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
20        match self {
21            Self::Error => write!(f, "error"),
22            Self::Warning => write!(f, "warning"),
23            Self::Info => write!(f, "info"),
24        }
25    }
26}
27
28/// A single staleness finding for a case file.
29#[derive(Debug)]
30pub struct Finding {
31    pub severity: Severity,
32    pub message: String,
33}
34
35/// Configurable thresholds for staleness detection.
36#[derive(Debug, Clone)]
37pub struct Thresholds {
38    pub investigation_months: u32,
39    pub trial_months: u32,
40    pub appeal_months: u32,
41}
42
43impl Default for Thresholds {
44    fn default() -> Self {
45        Self {
46            investigation_months: 6,
47            trial_months: 12,
48            appeal_months: 12,
49        }
50    }
51}
52
53/// Check a single case for staleness issues.
54///
55/// `today` is passed explicitly for testability.
56pub fn check_case(
57    case: &ParsedCase,
58    entities: &[Entity],
59    thresholds: &Thresholds,
60    today: (i32, u32, u32),
61) -> Vec<Finding> {
62    let mut findings = Vec::new();
63
64    let Some(status) = case.status.as_deref() else {
65        return findings;
66    };
67
68    let events: Vec<&Entity> = entities
69        .iter()
70        .filter(|e| e.label == Label::Event)
71        .collect();
72
73    // Rule: No events
74    if events.is_empty() {
75        findings.push(Finding {
76            severity: Severity::Error,
77            message: format!("status is '{status}' but case has no events"),
78        });
79        return findings;
80    }
81
82    let event_types = collect_event_types(&events);
83    let latest_date = find_latest_date(&events);
84
85    check_age_rules(status, latest_date, thresholds, today, &mut findings);
86    check_mismatch_rules(status, &event_types, &mut findings);
87
88    findings
89}
90
91/// Check age-based staleness rules.
92fn check_age_rules(
93    status: &str,
94    latest_date: Option<(i32, u32, u32)>,
95    thresholds: &Thresholds,
96    today: (i32, u32, u32),
97    findings: &mut Vec<Finding>,
98) {
99    let Some((y, m, d)) = latest_date else {
100        return;
101    };
102    let months_ago = months_between((y, m, d), today);
103
104    let (threshold, applies) = match status {
105        "under_investigation" => (thresholds.investigation_months, true),
106        "trial" | "open" => (thresholds.trial_months, true),
107        "appeal" => (thresholds.appeal_months, true),
108        _ => (0, false),
109    };
110
111    if applies && months_ago > threshold {
112        findings.push(Finding {
113            severity: Severity::Warning,
114            message: format!(
115                "status is '{status}' and latest event is {months_ago} months ago \
116                 (threshold: {threshold} months)"
117            ),
118        });
119    }
120}
121
122/// Check status-event mismatch rules.
123fn check_mismatch_rules(
124    status: &str,
125    event_types: &std::collections::HashSet<String>,
126    findings: &mut Vec<Finding>,
127) {
128    // Missing verdict (trial without verdict/sentencing)
129    if status == "trial" && !has_any(event_types, VERDICT_TYPES) {
130        findings.push(Finding {
131            severity: Severity::Info,
132            message: "status is 'trial' but no verdict/sentencing/conviction/acquittal event found"
133                .to_string(),
134        });
135    }
136
137    if status == "convicted"
138        && !has_any(event_types, &["verdict", "sentencing", "conviction"])
139    {
140        findings.push(Finding {
141            severity: Severity::Warning,
142            message: "status is 'convicted' but no verdict/sentencing/conviction event found"
143                .to_string(),
144        });
145    }
146
147    if status == "acquitted" && !has_any(event_types, &["verdict", "acquittal"]) {
148        findings.push(Finding {
149            severity: Severity::Warning,
150            message: "status is 'acquitted' but no verdict/acquittal event found".to_string(),
151        });
152    }
153
154    if status == "pardoned"
155        && !has_any(event_types, &["verdict", "sentencing", "conviction", "pardon"])
156    {
157        findings.push(Finding {
158            severity: Severity::Warning,
159            message:
160                "status is 'pardoned' but no verdict/sentencing/conviction/pardon event found"
161                    .to_string(),
162        });
163    }
164}
165
166/// Event types that indicate a verdict or resolution.
167const VERDICT_TYPES: &[&str] = &["verdict", "sentencing", "conviction", "acquittal"];
168
169/// Check if the set contains any of the given values.
170fn has_any(set: &std::collections::HashSet<String>, values: &[&str]) -> bool {
171    values.iter().any(|v| set.contains(*v))
172}
173
174/// Collect all `event_type` values from events.
175fn collect_event_types(events: &[&Entity]) -> std::collections::HashSet<String> {
176    let mut types = std::collections::HashSet::new();
177    for e in events {
178        if let Some((_, FieldValue::Single(val))) =
179            e.fields.iter().find(|(k, _)| k == "event_type")
180        {
181            // Normalize: strip "custom:" prefix if present
182            let normalized = val
183                .strip_prefix("custom:")
184                .unwrap_or(val)
185                .to_lowercase()
186                .replace(' ', "_");
187            types.insert(normalized);
188        }
189    }
190    types
191}
192
193/// Find the latest `occurred_at` date among events.
194/// Returns `(year, month, day)`. Partial dates use end-of-period defaults.
195fn find_latest_date(events: &[&Entity]) -> Option<(i32, u32, u32)> {
196    let mut latest: Option<(i32, u32, u32)> = None;
197
198    for e in events {
199        if let Some((_, FieldValue::Single(val))) =
200            e.fields.iter().find(|(k, _)| k == "occurred_at")
201            && let Some(parsed) = parse_date(val)
202        {
203            latest = Some(match latest {
204                None => parsed,
205                Some(prev) => {
206                    if date_cmp(parsed, prev) == std::cmp::Ordering::Greater {
207                        parsed
208                    } else {
209                        prev
210                    }
211                }
212            });
213        }
214    }
215
216    latest
217}
218
219/// Parse a date string in YYYY, YYYY-MM, or YYYY-MM-DD format.
220fn parse_date(s: &str) -> Option<(i32, u32, u32)> {
221    let parts: Vec<&str> = s.split('-').collect();
222    match parts.len() {
223        1 => {
224            let y = parts[0].parse::<i32>().ok()?;
225            Some((y, 12, 31)) // year-only: assume end of year for staleness
226        }
227        2 => {
228            let y = parts[0].parse::<i32>().ok()?;
229            let m = parts[1].parse::<u32>().ok()?;
230            if !(1..=12).contains(&m) {
231                return None;
232            }
233            Some((y, m, 28)) // month-only: assume end of month (conservative)
234        }
235        3 => {
236            let y = parts[0].parse::<i32>().ok()?;
237            let m = parts[1].parse::<u32>().ok()?;
238            let d = parts[2].parse::<u32>().ok()?;
239            if !(1..=12).contains(&m) || !(1..=31).contains(&d) {
240                return None;
241            }
242            Some((y, m, d))
243        }
244        _ => None,
245    }
246}
247
248/// Compare two date tuples.
249fn date_cmp(a: (i32, u32, u32), b: (i32, u32, u32)) -> std::cmp::Ordering {
250    a.0.cmp(&b.0).then(a.1.cmp(&b.1)).then(a.2.cmp(&b.2))
251}
252
253/// Calculate approximate months between two dates.
254fn months_between(from: (i32, u32, u32), to: (i32, u32, u32)) -> u32 {
255    let year_diff = to.0 - from.0;
256    let month_diff = i64::from(to.1) - i64::from(from.1);
257    let total = i64::from(year_diff) * 12 + month_diff;
258    #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
259    if total < 0 { 0 } else { total as u32 }
260}
261
262#[cfg(test)]
263mod tests {
264    use super::*;
265
266    fn make_event(name: &str, event_type: &str, occurred_at: &str) -> Entity {
267        Entity {
268            name: name.to_string(),
269            label: Label::Event,
270            fields: vec![
271                (
272                    "event_type".to_string(),
273                    FieldValue::Single(event_type.to_string()),
274                ),
275                (
276                    "occurred_at".to_string(),
277                    FieldValue::Single(occurred_at.to_string()),
278                ),
279            ],
280            id: Some("01TEST00000000000000000000".to_string()),
281            line: 1,
282            tags: Vec::new(),
283            slug: None,
284        }
285    }
286
287    fn make_case(status: &str) -> ParsedCase {
288        ParsedCase {
289            id: Some("01TEST00000000000000000000".to_string()),
290            sources: Vec::new(),
291            title: "Test Case".to_string(),
292            summary: "Test summary.".to_string(),
293            sections: Vec::new(),
294            case_type: Some("corruption".to_string()),
295            status: Some(status.to_string()),
296            amounts: None,
297            tags: Vec::new(),
298            tagline: None,
299            related_cases: Vec::new(),
300            involved: Vec::new(),
301        }
302    }
303
304    fn today() -> (i32, u32, u32) {
305        (2026, 4, 21)
306    }
307
308    #[test]
309    fn no_events_is_error() {
310        let case = make_case("convicted");
311        let findings = check_case(&case, &[], &Thresholds::default(), today());
312        assert_eq!(findings.len(), 1);
313        assert_eq!(findings[0].severity, Severity::Error);
314        assert!(findings[0].message.contains("no events"));
315    }
316
317    #[test]
318    fn investigation_stale() {
319        let case = make_case("under_investigation");
320        let events = vec![make_event("Raid", "raid", "2025-06-01")];
321        let findings = check_case(&case, &events, &Thresholds::default(), today());
322        assert_eq!(findings.len(), 1);
323        assert_eq!(findings[0].severity, Severity::Warning);
324        assert!(findings[0].message.contains("under_investigation"));
325        assert!(findings[0].message.contains("10 months ago"));
326    }
327
328    #[test]
329    fn investigation_fresh() {
330        let case = make_case("under_investigation");
331        let events = vec![make_event("Raid", "raid", "2026-02-01")];
332        let findings = check_case(&case, &events, &Thresholds::default(), today());
333        assert!(findings.is_empty());
334    }
335
336    #[test]
337    fn trial_stale() {
338        let case = make_case("trial");
339        let events = vec![make_event("Indictment", "indictment", "2024-12-01")];
340        let findings = check_case(&case, &events, &Thresholds::default(), today());
341        // Should have warning (stale) + info (missing verdict)
342        assert!(findings.iter().any(|f| f.severity == Severity::Warning));
343        assert!(findings.iter().any(|f| f.severity == Severity::Info));
344    }
345
346    #[test]
347    fn trial_with_verdict_not_missing() {
348        let case = make_case("trial");
349        let events = vec![
350            make_event("Indictment", "indictment", "2026-03-01"),
351            make_event("Verdict", "conviction", "2026-04-01"),
352        ];
353        let findings = check_case(&case, &events, &Thresholds::default(), today());
354        assert!(findings.is_empty());
355    }
356
357    #[test]
358    fn convicted_without_verdict() {
359        let case = make_case("convicted");
360        let events = vec![make_event("Arrest", "arrest", "2026-01-01")];
361        let findings = check_case(&case, &events, &Thresholds::default(), today());
362        assert_eq!(findings.len(), 1);
363        assert_eq!(findings[0].severity, Severity::Warning);
364        assert!(findings[0].message.contains("convicted"));
365    }
366
367    #[test]
368    fn convicted_with_sentencing() {
369        let case = make_case("convicted");
370        let events = vec![make_event("Sentencing", "sentencing", "2026-01-01")];
371        let findings = check_case(&case, &events, &Thresholds::default(), today());
372        assert!(findings.is_empty());
373    }
374
375    #[test]
376    fn acquitted_without_verdict() {
377        let case = make_case("acquitted");
378        let events = vec![make_event("Trial", "trial", "2026-01-01")];
379        let findings = check_case(&case, &events, &Thresholds::default(), today());
380        assert_eq!(findings.len(), 1);
381        assert!(findings[0].message.contains("acquitted"));
382    }
383
384    #[test]
385    fn pardoned_without_conviction() {
386        let case = make_case("pardoned");
387        let events = vec![make_event("Arrest", "arrest", "2026-01-01")];
388        let findings = check_case(&case, &events, &Thresholds::default(), today());
389        assert_eq!(findings.len(), 1);
390        assert!(findings[0].message.contains("pardoned"));
391    }
392
393    #[test]
394    fn pardoned_with_pardon_event() {
395        let case = make_case("pardoned");
396        let events = vec![make_event("Pardon", "pardon", "2026-01-01")];
397        let findings = check_case(&case, &events, &Thresholds::default(), today());
398        assert!(findings.is_empty());
399    }
400
401    #[test]
402    fn custom_thresholds() {
403        let case = make_case("under_investigation");
404        let events = vec![make_event("Raid", "raid", "2026-01-01")];
405        let thresholds = Thresholds {
406            investigation_months: 2,
407            ..Thresholds::default()
408        };
409        let findings = check_case(&case, &events, &thresholds, today());
410        assert_eq!(findings.len(), 1);
411        assert!(findings[0].message.contains("3 months ago"));
412    }
413
414    #[test]
415    fn no_status_no_findings() {
416        let mut case = make_case("convicted");
417        case.status = None;
418        let events = vec![make_event("Arrest", "arrest", "2020-01-01")];
419        let findings = check_case(&case, &events, &Thresholds::default(), today());
420        assert!(findings.is_empty());
421    }
422
423    #[test]
424    fn partial_date_year_only() {
425        let case = make_case("under_investigation");
426        let events = vec![make_event("Raid", "raid", "2025")];
427        // 2025 -> (2025, 12, 31), which is ~4 months ago from 2026-04-21
428        let findings = check_case(&case, &events, &Thresholds::default(), today());
429        assert!(findings.is_empty()); // 4 months < 6 month threshold
430    }
431
432    #[test]
433    fn open_case_stale() {
434        let case = make_case("open");
435        let events = vec![make_event("Filing", "investigation_opened", "2024-01-01")];
436        let findings = check_case(&case, &events, &Thresholds::default(), today());
437        assert_eq!(findings.len(), 1);
438        assert!(findings[0].message.contains("open"));
439    }
440
441    #[test]
442    fn months_between_same_date() {
443        assert_eq!(months_between((2026, 4, 21), (2026, 4, 21)), 0);
444    }
445
446    #[test]
447    fn months_between_year_diff() {
448        assert_eq!(months_between((2024, 4, 1), (2026, 4, 21)), 24);
449    }
450
451    #[test]
452    fn parse_date_formats() {
453        assert_eq!(parse_date("2025"), Some((2025, 12, 31)));
454        assert_eq!(parse_date("2025-06"), Some((2025, 6, 28)));
455        assert_eq!(parse_date("2025-06-15"), Some((2025, 6, 15)));
456        assert_eq!(parse_date("invalid"), None);
457        assert_eq!(parse_date("2025-13"), None);
458    }
459}