magi-core 0.3.1

LLM-agnostic multi-perspective analysis system inspired by MAGI
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
// Author: Julian Bolivar
// Version: 1.0.0
// Date: 2026-04-05

use std::sync::LazyLock;

use regex::Regex;

use crate::error::MagiError;
use crate::schema::{AgentOutput, Finding};

/// Matches control whitespace characters that should be replaced with a space:
/// horizontal tab, newline, vertical tab, form feed, carriage return, and NEL (U+0085).
static CONTROL_WHITESPACE_RE: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(r"[\t\n\x0B\x0C\r\x{85}]").expect("valid CONTROL_WHITESPACE_RE regex")
});

/// Matches invisible characters and Unicode separators that should be removed:
/// zero-width spaces, bidi marks, line/paragraph separators (U+2028..U+202F range),
/// extended formatting controls (U+2060..U+206F), BOM (U+FEFF), and soft hyphen (U+00AD).
pub(crate) static INVISIBLE_AND_SEPARATOR_RE: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(r"[\u{200b}-\u{200f}\u{2028}-\u{202f}\u{2060}-\u{206f}\u{feff}\u{00ad}]")
        .expect("valid INVISIBLE_AND_SEPARATOR_RE regex")
});

/// Cleans a title by normalizing control whitespace, stripping invisible
/// characters and separators, and trimming edges.
///
/// # Pipeline
///
/// 1. Replace control whitespace (`\t`, `\n`, `\x0B`, `\x0C`, `\r`, U+0085) with ASCII space.
/// 2. Remove invisible characters and selected Unicode separators
///    (zero-width, bidi marks, line/paragraph separators in the U+2028..U+202F range,
///    word joiner and related U+2060..U+206F controls, BOM, soft hyphen).
/// 3. Trim leading/trailing whitespace.
///
/// Note: interior whitespace is NOT collapsed — an input `"foo\t\tbar"` becomes
/// `"foo  bar"` (two spaces). This matches the Python reference implementation.
///
/// # Examples
///
/// ```
/// use magi_core::validate::clean_title;
///
/// assert_eq!(clean_title("  hello\nworld  "), "hello world");
/// assert_eq!(clean_title("text\u{200b}with\u{feff}invisibles"), "textwithinvisibles");
/// ```
pub fn clean_title(input: &str) -> String {
    let step1 = CONTROL_WHITESPACE_RE.replace_all(input, " ");
    let step2 = INVISIBLE_AND_SEPARATOR_RE.replace_all(&step1, "");
    step2.trim().to_string()
}

/// Configuration thresholds for agent output validation.
#[non_exhaustive]
#[derive(Debug, Clone)]
pub struct ValidationLimits {
    /// Maximum number of findings per agent output.
    pub max_findings: usize,
    /// Maximum character count for finding titles (Unicode scalar values, not bytes).
    pub max_title_len: usize,
    /// Maximum character count for finding details (Unicode scalar values, not bytes).
    pub max_detail_len: usize,
    /// Maximum character count for text fields — summary, reasoning, recommendation
    /// (Unicode scalar values, not bytes).
    pub max_text_len: usize,
    /// Minimum valid confidence value, inclusive.
    pub confidence_min: f64,
    /// Maximum valid confidence value, inclusive.
    pub confidence_max: f64,
}

impl Default for ValidationLimits {
    fn default() -> Self {
        Self {
            max_findings: 100,
            max_title_len: 500,
            max_detail_len: 10_000,
            max_text_len: 50_000,
            confidence_min: 0.0,
            confidence_max: 1.0,
        }
    }
}

/// Validates `AgentOutput` fields against configurable limits.
///
/// Uses [`clean_title`] for normalizing finding titles before length validation,
/// and configurable limits for field lengths and counts.
pub struct Validator {
    /// Active validation limits.
    pub limits: ValidationLimits,
}

impl Default for Validator {
    fn default() -> Self {
        Self::new()
    }
}

impl Validator {
    /// Creates a validator with default limits.
    pub fn new() -> Self {
        Self::with_limits(ValidationLimits::default())
    }

    /// Creates a validator with custom limits.
    pub fn with_limits(limits: ValidationLimits) -> Self {
        Self { limits }
    }

    /// Validates an `AgentOutput`, returning on first failure.
    ///
    /// Checks in order: confidence, summary, reasoning, recommendation, findings.
    /// Returns `MagiError::Validation` with a descriptive message on failure.
    pub fn validate(&self, output: &AgentOutput) -> Result<(), MagiError> {
        self.validate_confidence(output.confidence)?;
        self.validate_text_field("summary", &output.summary)?;
        self.validate_text_field("reasoning", &output.reasoning)?;
        self.validate_text_field("recommendation", &output.recommendation)?;
        self.validate_findings(&output.findings)?;
        Ok(())
    }

    /// Validates `output` in place, replacing each finding's title with its
    /// cleaned form (see [`clean_title`]) before length validation.
    ///
    /// This is the preferred entry point for pipelines that parse LLM responses,
    /// because it ensures downstream code sees titles in the canonical cleaned
    /// form used by the consensus engine.
    ///
    /// # Atomicity
    ///
    /// This method is atomic on error: it cleans and validates all finding titles
    /// into a temporary buffer before committing any mutation to `output`. If any
    /// validation check fails, `output` is left completely unchanged — no partial
    /// title cleaning occurs.
    ///
    /// # Errors
    ///
    /// Returns [`MagiError::Validation`] on the first field that fails validation.
    /// Validation order: confidence → summary → reasoning → recommendation →
    /// findings (count, then each cleaned title/detail).
    /// On error, `output` is not modified.
    pub fn validate_mut(&self, output: &mut AgentOutput) -> Result<(), MagiError> {
        self.validate_confidence(output.confidence)?;
        self.validate_text_field("summary", &output.summary)?;
        self.validate_text_field("reasoning", &output.reasoning)?;
        self.validate_text_field("recommendation", &output.recommendation)?;
        if output.findings.len() > self.limits.max_findings {
            return Err(MagiError::Validation(format!(
                "findings count {} exceeds maximum of {}",
                output.findings.len(),
                self.limits.max_findings
            )));
        }
        // Collect cleaned titles and validate each before any mutation.
        let cleaned_titles: Vec<String> = output
            .findings
            .iter()
            .map(|f| clean_title(&f.title))
            .collect();
        for (cleaned, finding) in cleaned_titles.iter().zip(output.findings.iter()) {
            self.validate_finding_fields(cleaned, &finding.detail)?;
        }
        // All valid — now commit mutations.
        for (cleaned, finding) in cleaned_titles.into_iter().zip(output.findings.iter_mut()) {
            finding.title = cleaned;
        }
        Ok(())
    }

    fn validate_confidence(&self, confidence: f64) -> Result<(), MagiError> {
        if !(confidence >= self.limits.confidence_min && confidence <= self.limits.confidence_max) {
            return Err(MagiError::Validation(format!(
                "confidence {} is out of range [{}, {}]",
                confidence, self.limits.confidence_min, self.limits.confidence_max
            )));
        }
        Ok(())
    }

    fn validate_text_field(&self, field_name: &str, value: &str) -> Result<(), MagiError> {
        if value.chars().count() > self.limits.max_text_len {
            return Err(MagiError::Validation(format!(
                "{field_name} exceeds maximum length of {} characters",
                self.limits.max_text_len
            )));
        }
        Ok(())
    }

    fn validate_findings(&self, findings: &[Finding]) -> Result<(), MagiError> {
        if findings.len() > self.limits.max_findings {
            return Err(MagiError::Validation(format!(
                "findings count {} exceeds maximum of {}",
                findings.len(),
                self.limits.max_findings
            )));
        }
        for finding in findings {
            self.validate_finding(finding)?;
        }
        Ok(())
    }

    /// Validates title and detail length invariants.
    ///
    /// `title` must already be in its final form (raw or pre-cleaned — the
    /// caller decides). No stripping is performed here.
    fn validate_finding_fields(&self, title: &str, detail: &str) -> Result<(), MagiError> {
        if title.is_empty() {
            return Err(MagiError::Validation(
                "finding title is empty after normalization".to_string(),
            ));
        }
        if title.chars().count() > self.limits.max_title_len {
            return Err(MagiError::Validation(format!(
                "finding title exceeds maximum length of {} characters",
                self.limits.max_title_len
            )));
        }
        if detail.chars().count() > self.limits.max_detail_len {
            return Err(MagiError::Validation(format!(
                "finding detail exceeds maximum length of {} characters",
                self.limits.max_detail_len
            )));
        }
        Ok(())
    }

    fn validate_finding(&self, finding: &Finding) -> Result<(), MagiError> {
        let cleaned = clean_title(&finding.title);
        self.validate_finding_fields(&cleaned, &finding.detail)
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::schema::*;

    fn valid_agent_output() -> AgentOutput {
        AgentOutput {
            agent: AgentName::Melchior,
            verdict: Verdict::Approve,
            confidence: 0.9,
            summary: "Good code".to_string(),
            reasoning: "Well structured".to_string(),
            findings: vec![],
            recommendation: "Approve as-is".to_string(),
        }
    }

    fn output_with_confidence(confidence: f64) -> AgentOutput {
        AgentOutput {
            confidence,
            ..valid_agent_output()
        }
    }

    fn output_with_findings(findings: Vec<Finding>) -> AgentOutput {
        AgentOutput {
            findings,
            ..valid_agent_output()
        }
    }

    // -- Constructor tests --

    #[test]
    fn test_validator_new_creates_with_default_limits() {
        let v = Validator::new();
        assert_eq!(v.limits.max_findings, 100);
        assert_eq!(v.limits.max_title_len, 500);
        assert_eq!(v.limits.max_detail_len, 10_000);
        assert_eq!(v.limits.max_text_len, 50_000);
        assert!((v.limits.confidence_min - 0.0).abs() < f64::EPSILON);
        assert!((v.limits.confidence_max - 1.0).abs() < f64::EPSILON);
    }

    #[test]
    fn test_validator_with_limits_uses_custom_limits() {
        let custom = ValidationLimits {
            max_findings: 5,
            ..ValidationLimits::default()
        };
        let v = Validator::with_limits(custom);
        assert_eq!(v.limits.max_findings, 5);
    }

    // -- BDD-10: confidence out of range --

    #[test]
    fn test_validate_rejects_confidence_above_one() {
        let v = Validator::new();
        let output = output_with_confidence(1.5);
        let err = v.validate(&output).unwrap_err();
        let msg = format!("{err}");
        assert!(
            msg.contains("confidence"),
            "error should mention confidence: {msg}"
        );
    }

    #[test]
    fn test_validate_rejects_confidence_below_zero() {
        let v = Validator::new();
        let output = output_with_confidence(-0.1);
        let err = v.validate(&output).unwrap_err();
        let msg = format!("{err}");
        assert!(
            msg.contains("confidence"),
            "error should mention confidence: {msg}"
        );
    }

    #[test]
    fn test_validate_accepts_confidence_at_boundaries() {
        let v = Validator::new();
        assert!(v.validate(&output_with_confidence(0.0)).is_ok());
        assert!(v.validate(&output_with_confidence(1.0)).is_ok());
    }

    #[test]
    fn test_validate_rejects_nan_confidence() {
        let v = Validator::new();
        let output = output_with_confidence(f64::NAN);
        assert!(v.validate(&output).is_err());
    }

    #[test]
    fn test_validate_rejects_infinity_confidence() {
        let v = Validator::new();
        assert!(v.validate(&output_with_confidence(f64::INFINITY)).is_err());
        assert!(
            v.validate(&output_with_confidence(f64::NEG_INFINITY))
                .is_err()
        );
    }

    // -- BDD-11: empty title after strip zero-width --

    #[test]
    fn test_validate_rejects_finding_with_only_zero_width_title() {
        let v = Validator::new();
        let output = output_with_findings(vec![Finding {
            severity: Severity::Warning,
            title: "\u{200B}\u{FEFF}\u{200C}".to_string(),
            detail: "detail".to_string(),
        }]);
        let err = v.validate(&output).unwrap_err();
        let msg = format!("{err}");
        assert!(msg.contains("title"), "error should mention title: {msg}");
    }

    #[test]
    fn test_validate_accepts_finding_with_normal_title() {
        let v = Validator::new();
        let output = output_with_findings(vec![Finding {
            severity: Severity::Info,
            title: "Security vulnerability".to_string(),
            detail: "detail".to_string(),
        }]);
        assert!(v.validate(&output).is_ok());
    }

    // -- BDD-12: text field exceeds max_text_len --

    #[test]
    fn test_validate_rejects_reasoning_exceeding_max_text_len() {
        let v = Validator::new();
        let mut output = valid_agent_output();
        output.reasoning = "x".repeat(50_001);
        let err = v.validate(&output).unwrap_err();
        let msg = format!("{err}");
        assert!(
            msg.contains("reasoning"),
            "error should mention reasoning: {msg}"
        );
    }

    #[test]
    fn test_validate_rejects_summary_exceeding_max_text_len() {
        let v = Validator::new();
        let mut output = valid_agent_output();
        output.summary = "x".repeat(50_001);
        let err = v.validate(&output).unwrap_err();
        let msg = format!("{err}");
        assert!(
            msg.contains("summary"),
            "error should mention summary: {msg}"
        );
    }

    #[test]
    fn test_validate_rejects_recommendation_exceeding_max_text_len() {
        let v = Validator::new();
        let mut output = valid_agent_output();
        output.recommendation = "x".repeat(50_001);
        let err = v.validate(&output).unwrap_err();
        let msg = format!("{err}");
        assert!(
            msg.contains("recommendation"),
            "error should mention recommendation: {msg}"
        );
    }

    // -- Findings count and field limits --

    #[test]
    fn test_validate_rejects_findings_count_exceeding_max_findings() {
        let v = Validator::new();
        let findings: Vec<Finding> = (0..101)
            .map(|i| Finding {
                severity: Severity::Info,
                title: format!("Finding {i}"),
                detail: "detail".to_string(),
            })
            .collect();
        let output = output_with_findings(findings);
        let err = v.validate(&output).unwrap_err();
        let msg = format!("{err}");
        assert!(
            msg.contains("findings"),
            "error should mention findings: {msg}"
        );
    }

    #[test]
    fn test_validate_rejects_finding_title_exceeding_max_title_len() {
        let v = Validator::new();
        let output = output_with_findings(vec![Finding {
            severity: Severity::Warning,
            title: "x".repeat(501),
            detail: "detail".to_string(),
        }]);
        let err = v.validate(&output).unwrap_err();
        let msg = format!("{err}");
        assert!(msg.contains("title"), "error should mention title: {msg}");
    }

    #[test]
    fn test_validate_rejects_finding_detail_exceeding_max_detail_len() {
        let v = Validator::new();
        let output = output_with_findings(vec![Finding {
            severity: Severity::Info,
            title: "Valid title".to_string(),
            detail: "x".repeat(10_001),
        }]);
        let err = v.validate(&output).unwrap_err();
        let msg = format!("{err}");
        assert!(msg.contains("detail"), "error should mention detail: {msg}");
    }

    // -- Happy path --

    #[test]
    fn test_validate_accepts_valid_agent_output() {
        let v = Validator::new();
        assert!(v.validate(&valid_agent_output()).is_ok());
    }

    // -- validate removes zero-width via clean_title pipeline --

    #[test]
    fn test_validate_strips_zero_width_characters_via_clean_title() {
        let v = Validator::new();
        // validate uses clean_title pipeline internally; zero-width chars are removed
        let output = output_with_findings(vec![Finding {
            severity: Severity::Info,
            title: "Hello\u{200B}World\u{FEFF}Test\u{200C}End".to_string(),
            detail: "detail".to_string(),
        }]);
        assert!(
            v.validate(&output).is_ok(),
            "valid title after clean should pass"
        );
    }

    // -- Validation order --

    #[test]
    fn test_validation_order_confidence_checked_before_text_fields() {
        let v = Validator::new();
        let mut output = valid_agent_output();
        output.confidence = 2.0;
        output.summary = "x".repeat(50_001);
        let err = v.validate(&output).unwrap_err();
        let msg = format!("{err}");
        assert!(
            msg.contains("confidence"),
            "confidence should be checked first, got: {msg}"
        );
    }

    #[test]
    fn test_validation_order_summary_checked_before_reasoning() {
        let v = Validator::new();
        let mut output = valid_agent_output();
        output.summary = "x".repeat(50_001);
        output.reasoning = "x".repeat(50_001);
        let err = v.validate(&output).unwrap_err();
        let msg = format!("{err}");
        assert!(
            msg.contains("summary"),
            "summary should be checked before reasoning, got: {msg}"
        );
    }

    #[test]
    fn test_validation_order_recommendation_checked_before_findings() {
        let v = Validator::new();
        let mut output = valid_agent_output();
        output.recommendation = "x".repeat(50_001);
        output.findings = (0..101)
            .map(|i| Finding {
                severity: Severity::Info,
                title: format!("Finding {i}"),
                detail: "detail".to_string(),
            })
            .collect();
        let err = v.validate(&output).unwrap_err();
        let msg = format!("{err}");
        assert!(
            msg.contains("recommendation"),
            "recommendation should be checked before findings, got: {msg}"
        );
    }

    // -- Title length checked after strip --

    #[test]
    fn test_title_length_checked_after_strip_zero_width() {
        let limits = ValidationLimits {
            max_title_len: 5,
            ..ValidationLimits::default()
        };
        let v = Validator::with_limits(limits);
        // Title is 8 chars raw but 5 after stripping 3 zero-width chars => should pass
        let output = output_with_findings(vec![Finding {
            severity: Severity::Info,
            title: "He\u{200B}l\u{FEFF}lo\u{200C}".to_string(),
            detail: "detail".to_string(),
        }]);
        assert!(v.validate(&output).is_ok());
    }

    // -- validate_mut tests --

    fn finding_with_title(title: &str) -> Finding {
        Finding {
            severity: Severity::Info,
            title: title.to_string(),
            detail: "some detail".to_string(),
        }
    }

    #[test]
    fn test_validate_mut_replaces_title_with_cleaned_form() {
        let v = Validator::new();
        let mut output = output_with_findings(vec![finding_with_title("  Issue\t\u{200b}Title  ")]);
        v.validate_mut(&mut output).unwrap();
        assert_eq!(output.findings[0].title, "Issue Title");
    }

    #[test]
    fn test_validate_mut_strips_zero_width_from_titles() {
        let v = Validator::new();
        let mut output = output_with_findings(vec![finding_with_title("Good\u{200b}Title")]);
        v.validate_mut(&mut output).unwrap();
        assert_eq!(output.findings[0].title, "GoodTitle");
    }

    #[test]
    fn test_validate_mut_collapses_control_whitespace_in_titles() {
        let v = Validator::new();
        let mut output = output_with_findings(vec![finding_with_title("Bad\tTitle")]);
        v.validate_mut(&mut output).unwrap();
        assert_eq!(output.findings[0].title, "Bad Title");
    }

    #[test]
    fn test_validate_mut_preserves_order_of_findings() {
        let v = Validator::new();
        let titles = ["Alpha\u{200b}One", "Beta\tTwo", "  Gamma Three  "];
        let mut output =
            output_with_findings(titles.iter().map(|t| finding_with_title(t)).collect());
        v.validate_mut(&mut output).unwrap();
        assert_eq!(output.findings[0].title, "AlphaOne");
        assert_eq!(output.findings[1].title, "Beta Two");
        assert_eq!(output.findings[2].title, "Gamma Three");
    }

    #[test]
    fn test_validate_retains_original_behavior_on_immutable_slice() {
        let v = Validator::new();
        let output = output_with_findings(vec![finding_with_title("Normal Title")]);
        // Immutable validate does not mutate.
        let original_title = output.findings[0].title.clone();
        v.validate(&output).unwrap();
        assert_eq!(output.findings[0].title, original_title);
    }

    // -- clean_title tests --

    #[test]
    fn test_clean_title_replaces_tab_with_space() {
        assert_eq!(clean_title("foo\tbar"), "foo bar");
    }

    #[test]
    fn test_clean_title_replaces_newline_with_space() {
        assert_eq!(clean_title("foo\nbar"), "foo bar");
    }

    #[test]
    fn test_clean_title_replaces_vertical_tab_with_space() {
        assert_eq!(clean_title("foo\x0Bbar"), "foo bar");
    }

    #[test]
    fn test_clean_title_replaces_carriage_return_with_space() {
        assert_eq!(clean_title("foo\rbar"), "foo bar");
    }

    #[test]
    fn test_clean_title_replaces_nel_u0085_with_space() {
        assert_eq!(clean_title("foo\u{85}bar"), "foo bar");
    }

    #[test]
    fn test_clean_title_strips_zero_width_space_u200b() {
        assert_eq!(clean_title("a\u{200b}b"), "ab");
    }

    #[test]
    fn test_clean_title_strips_zwnj_u200c() {
        assert_eq!(clean_title("a\u{200c}b"), "ab");
    }

    #[test]
    fn test_clean_title_strips_zwj_u200d() {
        assert_eq!(clean_title("a\u{200d}b"), "ab");
    }

    #[test]
    fn test_clean_title_strips_lrm_rlm_u200e_u200f() {
        assert_eq!(clean_title("a\u{200e}b\u{200f}c"), "abc");
    }

    #[test]
    fn test_clean_title_strips_line_separator_u2028() {
        assert_eq!(clean_title("a\u{2028}b"), "ab");
    }

    #[test]
    fn test_clean_title_strips_paragraph_separator_u2029() {
        assert_eq!(clean_title("a\u{2029}b"), "ab");
    }

    #[test]
    fn test_clean_title_strips_narrow_nbsp_u202f() {
        assert_eq!(clean_title("a\u{202f}b"), "ab");
    }

    #[test]
    fn test_clean_title_strips_bidi_override_u202a_through_u202e() {
        for cp in ['\u{202a}', '\u{202b}', '\u{202c}', '\u{202d}', '\u{202e}'] {
            let input = format!("a{cp}b");
            assert_eq!(clean_title(&input), "ab", "failed for U+{:04X}", cp as u32);
        }
    }

    #[test]
    fn test_clean_title_strips_word_joiner_u2060() {
        assert_eq!(clean_title("a\u{2060}b"), "ab");
    }

    #[test]
    fn test_clean_title_strips_bom_ufeff() {
        assert_eq!(clean_title("\u{feff}hello"), "hello");
    }

    #[test]
    fn test_clean_title_strips_soft_hyphen_u00ad() {
        assert_eq!(clean_title("soft\u{00ad}hyphen"), "softhyphen");
    }

    #[test]
    fn test_clean_title_trims_leading_trailing_spaces() {
        assert_eq!(clean_title("  hello  "), "hello");
    }

    #[test]
    fn test_clean_title_trims_leading_trailing_tabs_after_replacement() {
        // Leading/trailing \t are replaced to spaces in step 1, then trimmed in step 3
        assert_eq!(clean_title("\thello\t"), "hello");
    }

    #[test]
    fn test_clean_title_preserves_interior_single_spaces() {
        assert_eq!(clean_title("hello world"), "hello world");
    }

    #[test]
    fn test_clean_title_does_not_collapse_double_spaces_interior() {
        // Interior whitespace is NOT collapsed — this is intentional Python parity
        assert_eq!(clean_title("foo  bar"), "foo  bar");
    }

    #[test]
    fn test_clean_title_preserves_unicode_letters() {
        assert_eq!(clean_title("café"), "café");
    }

    #[test]
    fn test_clean_title_empty_string_returns_empty() {
        assert_eq!(clean_title(""), "");
    }

    #[test]
    fn test_clean_title_all_whitespace_returns_empty() {
        assert_eq!(clean_title("   \t\n  "), "");
    }

    #[test]
    fn test_clean_title_is_idempotent() {
        let inputs = [
            "hello\nworld",
            "  \u{200b}spaces\u{feff}  ",
            "café\u{2060}",
            "normal text",
            "",
        ];
        for input in inputs {
            let once = clean_title(input);
            let twice = clean_title(&once);
            assert_eq!(once, twice, "not idempotent for input: {input:?}");
        }
    }

    /// validate_mut is atomic: if any finding fails validation, no titles are mutated.
    #[test]
    fn test_validate_mut_atomic_no_partial_mutation_on_error() {
        let limits = ValidationLimits {
            max_title_len: 5,
            ..ValidationLimits::default()
        };
        let v = Validator::with_limits(limits);

        // Finding 0: valid clean title with zero-width chars (will be cleaned to "AB")
        // Finding 1: valid clean title with zero-width chars (will be cleaned to "CD")
        // Finding 2: title that exceeds max_title_len=5 after cleaning → triggers error
        let long_title_after_clean = "toolong"; // 7 chars after clean, exceeds max 5
        let mut output = output_with_findings(vec![
            Finding {
                severity: Severity::Info,
                title: "A\u{200b}B".to_string(), // contains zero-width char
                detail: "detail".to_string(),
            },
            Finding {
                severity: Severity::Info,
                title: "C\u{200b}D".to_string(), // contains zero-width char
                detail: "detail".to_string(),
            },
            Finding {
                severity: Severity::Info,
                title: long_title_after_clean.to_string(),
                detail: "detail".to_string(),
            },
        ]);

        // Capture original titles before calling validate_mut
        let orig0 = output.findings[0].title.clone();
        let orig1 = output.findings[1].title.clone();
        let orig2 = output.findings[2].title.clone();

        // validate_mut must fail due to finding 2 exceeding max_title_len
        let result = v.validate_mut(&mut output);
        assert!(result.is_err(), "should fail on over-length title");

        // Atomicity: no titles must have been modified
        assert_eq!(
            output.findings[0].title, orig0,
            "finding 0 title must not be mutated on error"
        );
        assert_eq!(
            output.findings[1].title, orig1,
            "finding 1 title must not be mutated on error"
        );
        assert_eq!(
            output.findings[2].title, orig2,
            "finding 2 title must not be mutated on error"
        );
    }
}