Skip to main content

magi_core/
validate.rs

1// Author: Julian Bolivar
2// Version: 1.0.0
3// Date: 2026-04-05
4
5use std::sync::LazyLock;
6
7use regex::Regex;
8
9use crate::error::MagiError;
10use crate::schema::{AgentOutput, Finding};
11
12/// Matches control whitespace characters that should be replaced with a space:
13/// horizontal tab, newline, vertical tab, form feed, carriage return, and NEL (U+0085).
14static CONTROL_WHITESPACE_RE: LazyLock<Regex> = LazyLock::new(|| {
15    Regex::new(r"[\t\n\x0B\x0C\r\x{85}]").expect("valid CONTROL_WHITESPACE_RE regex")
16});
17
18/// Matches invisible characters and Unicode separators that should be removed:
19/// zero-width spaces, bidi marks, line/paragraph separators (U+2028..U+202F range),
20/// extended formatting controls (U+2060..U+206F), BOM (U+FEFF), and soft hyphen (U+00AD).
21pub(crate) static INVISIBLE_AND_SEPARATOR_RE: LazyLock<Regex> = LazyLock::new(|| {
22    Regex::new(r"[\u{200b}-\u{200f}\u{2028}-\u{202f}\u{2060}-\u{206f}\u{feff}\u{00ad}]")
23        .expect("valid INVISIBLE_AND_SEPARATOR_RE regex")
24});
25
26/// Cleans a title by normalizing control whitespace, stripping invisible
27/// characters and separators, and trimming edges.
28///
29/// # Pipeline
30///
31/// 1. Replace control whitespace (`\t`, `\n`, `\x0B`, `\x0C`, `\r`, U+0085) with ASCII space.
32/// 2. Remove invisible characters and selected Unicode separators
33///    (zero-width, bidi marks, line/paragraph separators in the U+2028..U+202F range,
34///    word joiner and related U+2060..U+206F controls, BOM, soft hyphen).
35/// 3. Trim leading/trailing whitespace.
36///
37/// Note: interior whitespace is NOT collapsed — an input `"foo\t\tbar"` becomes
38/// `"foo  bar"` (two spaces). This matches the Python reference implementation.
39///
40/// # Examples
41///
42/// ```
43/// use magi_core::validate::clean_title;
44///
45/// assert_eq!(clean_title("  hello\nworld  "), "hello world");
46/// assert_eq!(clean_title("text\u{200b}with\u{feff}invisibles"), "textwithinvisibles");
47/// ```
48pub fn clean_title(input: &str) -> String {
49    let step1 = CONTROL_WHITESPACE_RE.replace_all(input, " ");
50    let step2 = INVISIBLE_AND_SEPARATOR_RE.replace_all(&step1, "");
51    step2.trim().to_string()
52}
53
54/// Configuration thresholds for agent output validation.
55#[non_exhaustive]
56#[derive(Debug, Clone)]
57pub struct ValidationLimits {
58    /// Maximum number of findings per agent output.
59    pub max_findings: usize,
60    /// Maximum character count for finding titles (Unicode scalar values, not bytes).
61    pub max_title_len: usize,
62    /// Maximum character count for finding details (Unicode scalar values, not bytes).
63    pub max_detail_len: usize,
64    /// Maximum character count for text fields — summary, reasoning, recommendation
65    /// (Unicode scalar values, not bytes).
66    pub max_text_len: usize,
67    /// Minimum valid confidence value, inclusive.
68    pub confidence_min: f64,
69    /// Maximum valid confidence value, inclusive.
70    pub confidence_max: f64,
71}
72
73impl Default for ValidationLimits {
74    fn default() -> Self {
75        Self {
76            max_findings: 100,
77            max_title_len: 500,
78            max_detail_len: 10_000,
79            max_text_len: 50_000,
80            confidence_min: 0.0,
81            confidence_max: 1.0,
82        }
83    }
84}
85
86/// Validates `AgentOutput` fields against configurable limits.
87///
88/// Uses [`clean_title`] for normalizing finding titles before length validation,
89/// and configurable limits for field lengths and counts.
90pub struct Validator {
91    /// Active validation limits.
92    pub limits: ValidationLimits,
93}
94
95impl Default for Validator {
96    fn default() -> Self {
97        Self::new()
98    }
99}
100
101impl Validator {
102    /// Creates a validator with default limits.
103    pub fn new() -> Self {
104        Self::with_limits(ValidationLimits::default())
105    }
106
107    /// Creates a validator with custom limits.
108    pub fn with_limits(limits: ValidationLimits) -> Self {
109        Self { limits }
110    }
111
112    /// Validates an `AgentOutput`, returning on first failure.
113    ///
114    /// Checks in order: confidence, summary, reasoning, recommendation, findings.
115    /// Returns `MagiError::Validation` with a descriptive message on failure.
116    pub fn validate(&self, output: &AgentOutput) -> Result<(), MagiError> {
117        self.validate_confidence(output.confidence)?;
118        self.validate_text_field("summary", &output.summary)?;
119        self.validate_text_field("reasoning", &output.reasoning)?;
120        self.validate_text_field("recommendation", &output.recommendation)?;
121        self.validate_findings(&output.findings)?;
122        Ok(())
123    }
124
125    /// Validates `output` in place, replacing each finding's title with its
126    /// cleaned form (see [`clean_title`]) before length validation.
127    ///
128    /// This is the preferred entry point for pipelines that parse LLM responses,
129    /// because it ensures downstream code sees titles in the canonical cleaned
130    /// form used by the consensus engine.
131    ///
132    /// # Atomicity
133    ///
134    /// This method is atomic on error: it cleans and validates all finding titles
135    /// into a temporary buffer before committing any mutation to `output`. If any
136    /// validation check fails, `output` is left completely unchanged — no partial
137    /// title cleaning occurs.
138    ///
139    /// # Errors
140    ///
141    /// Returns [`MagiError::Validation`] on the first field that fails validation.
142    /// Validation order: confidence → summary → reasoning → recommendation →
143    /// findings (count, then each cleaned title/detail).
144    /// On error, `output` is not modified.
145    pub fn validate_mut(&self, output: &mut AgentOutput) -> Result<(), MagiError> {
146        self.validate_confidence(output.confidence)?;
147        self.validate_text_field("summary", &output.summary)?;
148        self.validate_text_field("reasoning", &output.reasoning)?;
149        self.validate_text_field("recommendation", &output.recommendation)?;
150        if output.findings.len() > self.limits.max_findings {
151            return Err(MagiError::Validation(format!(
152                "findings count {} exceeds maximum of {}",
153                output.findings.len(),
154                self.limits.max_findings
155            )));
156        }
157        // Collect cleaned titles and validate each before any mutation.
158        let cleaned_titles: Vec<String> = output
159            .findings
160            .iter()
161            .map(|f| clean_title(&f.title))
162            .collect();
163        for (cleaned, finding) in cleaned_titles.iter().zip(output.findings.iter()) {
164            self.validate_finding_fields(cleaned, &finding.detail)?;
165        }
166        // All valid — now commit mutations.
167        for (cleaned, finding) in cleaned_titles.into_iter().zip(output.findings.iter_mut()) {
168            finding.title = cleaned;
169        }
170        Ok(())
171    }
172
173    fn validate_confidence(&self, confidence: f64) -> Result<(), MagiError> {
174        if !(confidence >= self.limits.confidence_min && confidence <= self.limits.confidence_max) {
175            return Err(MagiError::Validation(format!(
176                "confidence {} is out of range [{}, {}]",
177                confidence, self.limits.confidence_min, self.limits.confidence_max
178            )));
179        }
180        Ok(())
181    }
182
183    fn validate_text_field(&self, field_name: &str, value: &str) -> Result<(), MagiError> {
184        if value.chars().count() > self.limits.max_text_len {
185            return Err(MagiError::Validation(format!(
186                "{field_name} exceeds maximum length of {} characters",
187                self.limits.max_text_len
188            )));
189        }
190        Ok(())
191    }
192
193    fn validate_findings(&self, findings: &[Finding]) -> Result<(), MagiError> {
194        if findings.len() > self.limits.max_findings {
195            return Err(MagiError::Validation(format!(
196                "findings count {} exceeds maximum of {}",
197                findings.len(),
198                self.limits.max_findings
199            )));
200        }
201        for finding in findings {
202            self.validate_finding(finding)?;
203        }
204        Ok(())
205    }
206
207    /// Validates title and detail length invariants.
208    ///
209    /// `title` must already be in its final form (raw or pre-cleaned — the
210    /// caller decides). No stripping is performed here.
211    fn validate_finding_fields(&self, title: &str, detail: &str) -> Result<(), MagiError> {
212        if title.is_empty() {
213            return Err(MagiError::Validation(
214                "finding title is empty after normalization".to_string(),
215            ));
216        }
217        if title.chars().count() > self.limits.max_title_len {
218            return Err(MagiError::Validation(format!(
219                "finding title exceeds maximum length of {} characters",
220                self.limits.max_title_len
221            )));
222        }
223        if detail.chars().count() > self.limits.max_detail_len {
224            return Err(MagiError::Validation(format!(
225                "finding detail exceeds maximum length of {} characters",
226                self.limits.max_detail_len
227            )));
228        }
229        Ok(())
230    }
231
232    fn validate_finding(&self, finding: &Finding) -> Result<(), MagiError> {
233        let cleaned = clean_title(&finding.title);
234        self.validate_finding_fields(&cleaned, &finding.detail)
235    }
236}
237
238#[cfg(test)]
239mod tests {
240    use super::*;
241    use crate::schema::*;
242
243    fn valid_agent_output() -> AgentOutput {
244        AgentOutput {
245            agent: AgentName::Melchior,
246            verdict: Verdict::Approve,
247            confidence: 0.9,
248            summary: "Good code".to_string(),
249            reasoning: "Well structured".to_string(),
250            findings: vec![],
251            recommendation: "Approve as-is".to_string(),
252        }
253    }
254
255    fn output_with_confidence(confidence: f64) -> AgentOutput {
256        AgentOutput {
257            confidence,
258            ..valid_agent_output()
259        }
260    }
261
262    fn output_with_findings(findings: Vec<Finding>) -> AgentOutput {
263        AgentOutput {
264            findings,
265            ..valid_agent_output()
266        }
267    }
268
269    // -- Constructor tests --
270
271    #[test]
272    fn test_validator_new_creates_with_default_limits() {
273        let v = Validator::new();
274        assert_eq!(v.limits.max_findings, 100);
275        assert_eq!(v.limits.max_title_len, 500);
276        assert_eq!(v.limits.max_detail_len, 10_000);
277        assert_eq!(v.limits.max_text_len, 50_000);
278        assert!((v.limits.confidence_min - 0.0).abs() < f64::EPSILON);
279        assert!((v.limits.confidence_max - 1.0).abs() < f64::EPSILON);
280    }
281
282    #[test]
283    fn test_validator_with_limits_uses_custom_limits() {
284        let custom = ValidationLimits {
285            max_findings: 5,
286            ..ValidationLimits::default()
287        };
288        let v = Validator::with_limits(custom);
289        assert_eq!(v.limits.max_findings, 5);
290    }
291
292    // -- BDD-10: confidence out of range --
293
294    #[test]
295    fn test_validate_rejects_confidence_above_one() {
296        let v = Validator::new();
297        let output = output_with_confidence(1.5);
298        let err = v.validate(&output).unwrap_err();
299        let msg = format!("{err}");
300        assert!(
301            msg.contains("confidence"),
302            "error should mention confidence: {msg}"
303        );
304    }
305
306    #[test]
307    fn test_validate_rejects_confidence_below_zero() {
308        let v = Validator::new();
309        let output = output_with_confidence(-0.1);
310        let err = v.validate(&output).unwrap_err();
311        let msg = format!("{err}");
312        assert!(
313            msg.contains("confidence"),
314            "error should mention confidence: {msg}"
315        );
316    }
317
318    #[test]
319    fn test_validate_accepts_confidence_at_boundaries() {
320        let v = Validator::new();
321        assert!(v.validate(&output_with_confidence(0.0)).is_ok());
322        assert!(v.validate(&output_with_confidence(1.0)).is_ok());
323    }
324
325    #[test]
326    fn test_validate_rejects_nan_confidence() {
327        let v = Validator::new();
328        let output = output_with_confidence(f64::NAN);
329        assert!(v.validate(&output).is_err());
330    }
331
332    #[test]
333    fn test_validate_rejects_infinity_confidence() {
334        let v = Validator::new();
335        assert!(v.validate(&output_with_confidence(f64::INFINITY)).is_err());
336        assert!(
337            v.validate(&output_with_confidence(f64::NEG_INFINITY))
338                .is_err()
339        );
340    }
341
342    // -- BDD-11: empty title after strip zero-width --
343
344    #[test]
345    fn test_validate_rejects_finding_with_only_zero_width_title() {
346        let v = Validator::new();
347        let output = output_with_findings(vec![Finding {
348            severity: Severity::Warning,
349            title: "\u{200B}\u{FEFF}\u{200C}".to_string(),
350            detail: "detail".to_string(),
351        }]);
352        let err = v.validate(&output).unwrap_err();
353        let msg = format!("{err}");
354        assert!(msg.contains("title"), "error should mention title: {msg}");
355    }
356
357    #[test]
358    fn test_validate_accepts_finding_with_normal_title() {
359        let v = Validator::new();
360        let output = output_with_findings(vec![Finding {
361            severity: Severity::Info,
362            title: "Security vulnerability".to_string(),
363            detail: "detail".to_string(),
364        }]);
365        assert!(v.validate(&output).is_ok());
366    }
367
368    // -- BDD-12: text field exceeds max_text_len --
369
370    #[test]
371    fn test_validate_rejects_reasoning_exceeding_max_text_len() {
372        let v = Validator::new();
373        let mut output = valid_agent_output();
374        output.reasoning = "x".repeat(50_001);
375        let err = v.validate(&output).unwrap_err();
376        let msg = format!("{err}");
377        assert!(
378            msg.contains("reasoning"),
379            "error should mention reasoning: {msg}"
380        );
381    }
382
383    #[test]
384    fn test_validate_rejects_summary_exceeding_max_text_len() {
385        let v = Validator::new();
386        let mut output = valid_agent_output();
387        output.summary = "x".repeat(50_001);
388        let err = v.validate(&output).unwrap_err();
389        let msg = format!("{err}");
390        assert!(
391            msg.contains("summary"),
392            "error should mention summary: {msg}"
393        );
394    }
395
396    #[test]
397    fn test_validate_rejects_recommendation_exceeding_max_text_len() {
398        let v = Validator::new();
399        let mut output = valid_agent_output();
400        output.recommendation = "x".repeat(50_001);
401        let err = v.validate(&output).unwrap_err();
402        let msg = format!("{err}");
403        assert!(
404            msg.contains("recommendation"),
405            "error should mention recommendation: {msg}"
406        );
407    }
408
409    // -- Findings count and field limits --
410
411    #[test]
412    fn test_validate_rejects_findings_count_exceeding_max_findings() {
413        let v = Validator::new();
414        let findings: Vec<Finding> = (0..101)
415            .map(|i| Finding {
416                severity: Severity::Info,
417                title: format!("Finding {i}"),
418                detail: "detail".to_string(),
419            })
420            .collect();
421        let output = output_with_findings(findings);
422        let err = v.validate(&output).unwrap_err();
423        let msg = format!("{err}");
424        assert!(
425            msg.contains("findings"),
426            "error should mention findings: {msg}"
427        );
428    }
429
430    #[test]
431    fn test_validate_rejects_finding_title_exceeding_max_title_len() {
432        let v = Validator::new();
433        let output = output_with_findings(vec![Finding {
434            severity: Severity::Warning,
435            title: "x".repeat(501),
436            detail: "detail".to_string(),
437        }]);
438        let err = v.validate(&output).unwrap_err();
439        let msg = format!("{err}");
440        assert!(msg.contains("title"), "error should mention title: {msg}");
441    }
442
443    #[test]
444    fn test_validate_rejects_finding_detail_exceeding_max_detail_len() {
445        let v = Validator::new();
446        let output = output_with_findings(vec![Finding {
447            severity: Severity::Info,
448            title: "Valid title".to_string(),
449            detail: "x".repeat(10_001),
450        }]);
451        let err = v.validate(&output).unwrap_err();
452        let msg = format!("{err}");
453        assert!(msg.contains("detail"), "error should mention detail: {msg}");
454    }
455
456    // -- Happy path --
457
458    #[test]
459    fn test_validate_accepts_valid_agent_output() {
460        let v = Validator::new();
461        assert!(v.validate(&valid_agent_output()).is_ok());
462    }
463
464    // -- validate removes zero-width via clean_title pipeline --
465
466    #[test]
467    fn test_validate_strips_zero_width_characters_via_clean_title() {
468        let v = Validator::new();
469        // validate uses clean_title pipeline internally; zero-width chars are removed
470        let output = output_with_findings(vec![Finding {
471            severity: Severity::Info,
472            title: "Hello\u{200B}World\u{FEFF}Test\u{200C}End".to_string(),
473            detail: "detail".to_string(),
474        }]);
475        assert!(
476            v.validate(&output).is_ok(),
477            "valid title after clean should pass"
478        );
479    }
480
481    // -- Validation order --
482
483    #[test]
484    fn test_validation_order_confidence_checked_before_text_fields() {
485        let v = Validator::new();
486        let mut output = valid_agent_output();
487        output.confidence = 2.0;
488        output.summary = "x".repeat(50_001);
489        let err = v.validate(&output).unwrap_err();
490        let msg = format!("{err}");
491        assert!(
492            msg.contains("confidence"),
493            "confidence should be checked first, got: {msg}"
494        );
495    }
496
497    #[test]
498    fn test_validation_order_summary_checked_before_reasoning() {
499        let v = Validator::new();
500        let mut output = valid_agent_output();
501        output.summary = "x".repeat(50_001);
502        output.reasoning = "x".repeat(50_001);
503        let err = v.validate(&output).unwrap_err();
504        let msg = format!("{err}");
505        assert!(
506            msg.contains("summary"),
507            "summary should be checked before reasoning, got: {msg}"
508        );
509    }
510
511    #[test]
512    fn test_validation_order_recommendation_checked_before_findings() {
513        let v = Validator::new();
514        let mut output = valid_agent_output();
515        output.recommendation = "x".repeat(50_001);
516        output.findings = (0..101)
517            .map(|i| Finding {
518                severity: Severity::Info,
519                title: format!("Finding {i}"),
520                detail: "detail".to_string(),
521            })
522            .collect();
523        let err = v.validate(&output).unwrap_err();
524        let msg = format!("{err}");
525        assert!(
526            msg.contains("recommendation"),
527            "recommendation should be checked before findings, got: {msg}"
528        );
529    }
530
531    // -- Title length checked after strip --
532
533    #[test]
534    fn test_title_length_checked_after_strip_zero_width() {
535        let limits = ValidationLimits {
536            max_title_len: 5,
537            ..ValidationLimits::default()
538        };
539        let v = Validator::with_limits(limits);
540        // Title is 8 chars raw but 5 after stripping 3 zero-width chars => should pass
541        let output = output_with_findings(vec![Finding {
542            severity: Severity::Info,
543            title: "He\u{200B}l\u{FEFF}lo\u{200C}".to_string(),
544            detail: "detail".to_string(),
545        }]);
546        assert!(v.validate(&output).is_ok());
547    }
548
549    // -- validate_mut tests --
550
551    fn finding_with_title(title: &str) -> Finding {
552        Finding {
553            severity: Severity::Info,
554            title: title.to_string(),
555            detail: "some detail".to_string(),
556        }
557    }
558
559    #[test]
560    fn test_validate_mut_replaces_title_with_cleaned_form() {
561        let v = Validator::new();
562        let mut output = output_with_findings(vec![finding_with_title("  Issue\t\u{200b}Title  ")]);
563        v.validate_mut(&mut output).unwrap();
564        assert_eq!(output.findings[0].title, "Issue Title");
565    }
566
567    #[test]
568    fn test_validate_mut_strips_zero_width_from_titles() {
569        let v = Validator::new();
570        let mut output = output_with_findings(vec![finding_with_title("Good\u{200b}Title")]);
571        v.validate_mut(&mut output).unwrap();
572        assert_eq!(output.findings[0].title, "GoodTitle");
573    }
574
575    #[test]
576    fn test_validate_mut_collapses_control_whitespace_in_titles() {
577        let v = Validator::new();
578        let mut output = output_with_findings(vec![finding_with_title("Bad\tTitle")]);
579        v.validate_mut(&mut output).unwrap();
580        assert_eq!(output.findings[0].title, "Bad Title");
581    }
582
583    #[test]
584    fn test_validate_mut_preserves_order_of_findings() {
585        let v = Validator::new();
586        let titles = ["Alpha\u{200b}One", "Beta\tTwo", "  Gamma Three  "];
587        let mut output =
588            output_with_findings(titles.iter().map(|t| finding_with_title(t)).collect());
589        v.validate_mut(&mut output).unwrap();
590        assert_eq!(output.findings[0].title, "AlphaOne");
591        assert_eq!(output.findings[1].title, "Beta Two");
592        assert_eq!(output.findings[2].title, "Gamma Three");
593    }
594
595    #[test]
596    fn test_validate_retains_original_behavior_on_immutable_slice() {
597        let v = Validator::new();
598        let output = output_with_findings(vec![finding_with_title("Normal Title")]);
599        // Immutable validate does not mutate.
600        let original_title = output.findings[0].title.clone();
601        v.validate(&output).unwrap();
602        assert_eq!(output.findings[0].title, original_title);
603    }
604
605    // -- clean_title tests --
606
607    #[test]
608    fn test_clean_title_replaces_tab_with_space() {
609        assert_eq!(clean_title("foo\tbar"), "foo bar");
610    }
611
612    #[test]
613    fn test_clean_title_replaces_newline_with_space() {
614        assert_eq!(clean_title("foo\nbar"), "foo bar");
615    }
616
617    #[test]
618    fn test_clean_title_replaces_vertical_tab_with_space() {
619        assert_eq!(clean_title("foo\x0Bbar"), "foo bar");
620    }
621
622    #[test]
623    fn test_clean_title_replaces_carriage_return_with_space() {
624        assert_eq!(clean_title("foo\rbar"), "foo bar");
625    }
626
627    #[test]
628    fn test_clean_title_replaces_nel_u0085_with_space() {
629        assert_eq!(clean_title("foo\u{85}bar"), "foo bar");
630    }
631
632    #[test]
633    fn test_clean_title_strips_zero_width_space_u200b() {
634        assert_eq!(clean_title("a\u{200b}b"), "ab");
635    }
636
637    #[test]
638    fn test_clean_title_strips_zwnj_u200c() {
639        assert_eq!(clean_title("a\u{200c}b"), "ab");
640    }
641
642    #[test]
643    fn test_clean_title_strips_zwj_u200d() {
644        assert_eq!(clean_title("a\u{200d}b"), "ab");
645    }
646
647    #[test]
648    fn test_clean_title_strips_lrm_rlm_u200e_u200f() {
649        assert_eq!(clean_title("a\u{200e}b\u{200f}c"), "abc");
650    }
651
652    #[test]
653    fn test_clean_title_strips_line_separator_u2028() {
654        assert_eq!(clean_title("a\u{2028}b"), "ab");
655    }
656
657    #[test]
658    fn test_clean_title_strips_paragraph_separator_u2029() {
659        assert_eq!(clean_title("a\u{2029}b"), "ab");
660    }
661
662    #[test]
663    fn test_clean_title_strips_narrow_nbsp_u202f() {
664        assert_eq!(clean_title("a\u{202f}b"), "ab");
665    }
666
667    #[test]
668    fn test_clean_title_strips_bidi_override_u202a_through_u202e() {
669        for cp in ['\u{202a}', '\u{202b}', '\u{202c}', '\u{202d}', '\u{202e}'] {
670            let input = format!("a{cp}b");
671            assert_eq!(clean_title(&input), "ab", "failed for U+{:04X}", cp as u32);
672        }
673    }
674
675    #[test]
676    fn test_clean_title_strips_word_joiner_u2060() {
677        assert_eq!(clean_title("a\u{2060}b"), "ab");
678    }
679
680    #[test]
681    fn test_clean_title_strips_bom_ufeff() {
682        assert_eq!(clean_title("\u{feff}hello"), "hello");
683    }
684
685    #[test]
686    fn test_clean_title_strips_soft_hyphen_u00ad() {
687        assert_eq!(clean_title("soft\u{00ad}hyphen"), "softhyphen");
688    }
689
690    #[test]
691    fn test_clean_title_trims_leading_trailing_spaces() {
692        assert_eq!(clean_title("  hello  "), "hello");
693    }
694
695    #[test]
696    fn test_clean_title_trims_leading_trailing_tabs_after_replacement() {
697        // Leading/trailing \t are replaced to spaces in step 1, then trimmed in step 3
698        assert_eq!(clean_title("\thello\t"), "hello");
699    }
700
701    #[test]
702    fn test_clean_title_preserves_interior_single_spaces() {
703        assert_eq!(clean_title("hello world"), "hello world");
704    }
705
706    #[test]
707    fn test_clean_title_does_not_collapse_double_spaces_interior() {
708        // Interior whitespace is NOT collapsed — this is intentional Python parity
709        assert_eq!(clean_title("foo  bar"), "foo  bar");
710    }
711
712    #[test]
713    fn test_clean_title_preserves_unicode_letters() {
714        assert_eq!(clean_title("café"), "café");
715    }
716
717    #[test]
718    fn test_clean_title_empty_string_returns_empty() {
719        assert_eq!(clean_title(""), "");
720    }
721
722    #[test]
723    fn test_clean_title_all_whitespace_returns_empty() {
724        assert_eq!(clean_title("   \t\n  "), "");
725    }
726
727    #[test]
728    fn test_clean_title_is_idempotent() {
729        let inputs = [
730            "hello\nworld",
731            "  \u{200b}spaces\u{feff}  ",
732            "café\u{2060}",
733            "normal text",
734            "",
735        ];
736        for input in inputs {
737            let once = clean_title(input);
738            let twice = clean_title(&once);
739            assert_eq!(once, twice, "not idempotent for input: {input:?}");
740        }
741    }
742
743    /// validate_mut is atomic: if any finding fails validation, no titles are mutated.
744    #[test]
745    fn test_validate_mut_atomic_no_partial_mutation_on_error() {
746        let limits = ValidationLimits {
747            max_title_len: 5,
748            ..ValidationLimits::default()
749        };
750        let v = Validator::with_limits(limits);
751
752        // Finding 0: valid clean title with zero-width chars (will be cleaned to "AB")
753        // Finding 1: valid clean title with zero-width chars (will be cleaned to "CD")
754        // Finding 2: title that exceeds max_title_len=5 after cleaning → triggers error
755        let long_title_after_clean = "toolong"; // 7 chars after clean, exceeds max 5
756        let mut output = output_with_findings(vec![
757            Finding {
758                severity: Severity::Info,
759                title: "A\u{200b}B".to_string(), // contains zero-width char
760                detail: "detail".to_string(),
761            },
762            Finding {
763                severity: Severity::Info,
764                title: "C\u{200b}D".to_string(), // contains zero-width char
765                detail: "detail".to_string(),
766            },
767            Finding {
768                severity: Severity::Info,
769                title: long_title_after_clean.to_string(),
770                detail: "detail".to_string(),
771            },
772        ]);
773
774        // Capture original titles before calling validate_mut
775        let orig0 = output.findings[0].title.clone();
776        let orig1 = output.findings[1].title.clone();
777        let orig2 = output.findings[2].title.clone();
778
779        // validate_mut must fail due to finding 2 exceeding max_title_len
780        let result = v.validate_mut(&mut output);
781        assert!(result.is_err(), "should fail on over-length title");
782
783        // Atomicity: no titles must have been modified
784        assert_eq!(
785            output.findings[0].title, orig0,
786            "finding 0 title must not be mutated on error"
787        );
788        assert_eq!(
789            output.findings[1].title, orig1,
790            "finding 1 title must not be mutated on error"
791        );
792        assert_eq!(
793            output.findings[2].title, orig2,
794            "finding 2 title must not be mutated on error"
795        );
796    }
797}