1use std::sync::LazyLock;
6
7use regex::Regex;
8
9use crate::error::MagiError;
10use crate::schema::{AgentOutput, Finding};
11
12static CONTROL_WHITESPACE_RE: LazyLock<Regex> = LazyLock::new(|| {
15 Regex::new(r"[\t\n\x0B\x0C\r\x{85}]").expect("valid CONTROL_WHITESPACE_RE regex")
16});
17
18pub(crate) static INVISIBLE_AND_SEPARATOR_RE: LazyLock<Regex> = LazyLock::new(|| {
22 Regex::new(r"[\u{200b}-\u{200f}\u{2028}-\u{202f}\u{2060}-\u{206f}\u{feff}\u{00ad}]")
23 .expect("valid INVISIBLE_AND_SEPARATOR_RE regex")
24});
25
26pub fn clean_title(input: &str) -> String {
49 let step1 = CONTROL_WHITESPACE_RE.replace_all(input, " ");
50 let step2 = INVISIBLE_AND_SEPARATOR_RE.replace_all(&step1, "");
51 step2.trim().to_string()
52}
53
54#[non_exhaustive]
56#[derive(Debug, Clone)]
57pub struct ValidationLimits {
58 pub max_findings: usize,
60 pub max_title_len: usize,
62 pub max_detail_len: usize,
64 pub max_text_len: usize,
67 pub confidence_min: f64,
69 pub confidence_max: f64,
71}
72
73impl Default for ValidationLimits {
74 fn default() -> Self {
75 Self {
76 max_findings: 100,
77 max_title_len: 500,
78 max_detail_len: 10_000,
79 max_text_len: 50_000,
80 confidence_min: 0.0,
81 confidence_max: 1.0,
82 }
83 }
84}
85
86pub struct Validator {
91 pub limits: ValidationLimits,
93}
94
95impl Default for Validator {
96 fn default() -> Self {
97 Self::new()
98 }
99}
100
101impl Validator {
102 pub fn new() -> Self {
104 Self::with_limits(ValidationLimits::default())
105 }
106
107 pub fn with_limits(limits: ValidationLimits) -> Self {
109 Self { limits }
110 }
111
112 pub fn validate(&self, output: &AgentOutput) -> Result<(), MagiError> {
117 self.validate_confidence(output.confidence)?;
118 self.validate_text_field("summary", &output.summary)?;
119 self.validate_text_field("reasoning", &output.reasoning)?;
120 self.validate_text_field("recommendation", &output.recommendation)?;
121 self.validate_findings(&output.findings)?;
122 Ok(())
123 }
124
125 pub fn validate_mut(&self, output: &mut AgentOutput) -> Result<(), MagiError> {
146 self.validate_confidence(output.confidence)?;
147 self.validate_text_field("summary", &output.summary)?;
148 self.validate_text_field("reasoning", &output.reasoning)?;
149 self.validate_text_field("recommendation", &output.recommendation)?;
150 if output.findings.len() > self.limits.max_findings {
151 return Err(MagiError::Validation(format!(
152 "findings count {} exceeds maximum of {}",
153 output.findings.len(),
154 self.limits.max_findings
155 )));
156 }
157 let cleaned_titles: Vec<String> = output
159 .findings
160 .iter()
161 .map(|f| clean_title(&f.title))
162 .collect();
163 for (cleaned, finding) in cleaned_titles.iter().zip(output.findings.iter()) {
164 self.validate_finding_fields(cleaned, &finding.detail)?;
165 }
166 for (cleaned, finding) in cleaned_titles.into_iter().zip(output.findings.iter_mut()) {
168 finding.title = cleaned;
169 }
170 Ok(())
171 }
172
173 fn validate_confidence(&self, confidence: f64) -> Result<(), MagiError> {
174 if !(confidence >= self.limits.confidence_min && confidence <= self.limits.confidence_max) {
175 return Err(MagiError::Validation(format!(
176 "confidence {} is out of range [{}, {}]",
177 confidence, self.limits.confidence_min, self.limits.confidence_max
178 )));
179 }
180 Ok(())
181 }
182
183 fn validate_text_field(&self, field_name: &str, value: &str) -> Result<(), MagiError> {
184 if value.chars().count() > self.limits.max_text_len {
185 return Err(MagiError::Validation(format!(
186 "{field_name} exceeds maximum length of {} characters",
187 self.limits.max_text_len
188 )));
189 }
190 Ok(())
191 }
192
193 fn validate_findings(&self, findings: &[Finding]) -> Result<(), MagiError> {
194 if findings.len() > self.limits.max_findings {
195 return Err(MagiError::Validation(format!(
196 "findings count {} exceeds maximum of {}",
197 findings.len(),
198 self.limits.max_findings
199 )));
200 }
201 for finding in findings {
202 self.validate_finding(finding)?;
203 }
204 Ok(())
205 }
206
207 fn validate_finding_fields(&self, title: &str, detail: &str) -> Result<(), MagiError> {
212 if title.is_empty() {
213 return Err(MagiError::Validation(
214 "finding title is empty after normalization".to_string(),
215 ));
216 }
217 if title.chars().count() > self.limits.max_title_len {
218 return Err(MagiError::Validation(format!(
219 "finding title exceeds maximum length of {} characters",
220 self.limits.max_title_len
221 )));
222 }
223 if detail.chars().count() > self.limits.max_detail_len {
224 return Err(MagiError::Validation(format!(
225 "finding detail exceeds maximum length of {} characters",
226 self.limits.max_detail_len
227 )));
228 }
229 Ok(())
230 }
231
232 fn validate_finding(&self, finding: &Finding) -> Result<(), MagiError> {
233 let cleaned = clean_title(&finding.title);
234 self.validate_finding_fields(&cleaned, &finding.detail)
235 }
236}
237
238#[cfg(test)]
239mod tests {
240 use super::*;
241 use crate::schema::*;
242
243 fn valid_agent_output() -> AgentOutput {
244 AgentOutput {
245 agent: AgentName::Melchior,
246 verdict: Verdict::Approve,
247 confidence: 0.9,
248 summary: "Good code".to_string(),
249 reasoning: "Well structured".to_string(),
250 findings: vec![],
251 recommendation: "Approve as-is".to_string(),
252 }
253 }
254
255 fn output_with_confidence(confidence: f64) -> AgentOutput {
256 AgentOutput {
257 confidence,
258 ..valid_agent_output()
259 }
260 }
261
262 fn output_with_findings(findings: Vec<Finding>) -> AgentOutput {
263 AgentOutput {
264 findings,
265 ..valid_agent_output()
266 }
267 }
268
269 #[test]
272 fn test_validator_new_creates_with_default_limits() {
273 let v = Validator::new();
274 assert_eq!(v.limits.max_findings, 100);
275 assert_eq!(v.limits.max_title_len, 500);
276 assert_eq!(v.limits.max_detail_len, 10_000);
277 assert_eq!(v.limits.max_text_len, 50_000);
278 assert!((v.limits.confidence_min - 0.0).abs() < f64::EPSILON);
279 assert!((v.limits.confidence_max - 1.0).abs() < f64::EPSILON);
280 }
281
282 #[test]
283 fn test_validator_with_limits_uses_custom_limits() {
284 let custom = ValidationLimits {
285 max_findings: 5,
286 ..ValidationLimits::default()
287 };
288 let v = Validator::with_limits(custom);
289 assert_eq!(v.limits.max_findings, 5);
290 }
291
292 #[test]
295 fn test_validate_rejects_confidence_above_one() {
296 let v = Validator::new();
297 let output = output_with_confidence(1.5);
298 let err = v.validate(&output).unwrap_err();
299 let msg = format!("{err}");
300 assert!(
301 msg.contains("confidence"),
302 "error should mention confidence: {msg}"
303 );
304 }
305
306 #[test]
307 fn test_validate_rejects_confidence_below_zero() {
308 let v = Validator::new();
309 let output = output_with_confidence(-0.1);
310 let err = v.validate(&output).unwrap_err();
311 let msg = format!("{err}");
312 assert!(
313 msg.contains("confidence"),
314 "error should mention confidence: {msg}"
315 );
316 }
317
318 #[test]
319 fn test_validate_accepts_confidence_at_boundaries() {
320 let v = Validator::new();
321 assert!(v.validate(&output_with_confidence(0.0)).is_ok());
322 assert!(v.validate(&output_with_confidence(1.0)).is_ok());
323 }
324
325 #[test]
326 fn test_validate_rejects_nan_confidence() {
327 let v = Validator::new();
328 let output = output_with_confidence(f64::NAN);
329 assert!(v.validate(&output).is_err());
330 }
331
332 #[test]
333 fn test_validate_rejects_infinity_confidence() {
334 let v = Validator::new();
335 assert!(v.validate(&output_with_confidence(f64::INFINITY)).is_err());
336 assert!(
337 v.validate(&output_with_confidence(f64::NEG_INFINITY))
338 .is_err()
339 );
340 }
341
342 #[test]
345 fn test_validate_rejects_finding_with_only_zero_width_title() {
346 let v = Validator::new();
347 let output = output_with_findings(vec![Finding {
348 severity: Severity::Warning,
349 title: "\u{200B}\u{FEFF}\u{200C}".to_string(),
350 detail: "detail".to_string(),
351 }]);
352 let err = v.validate(&output).unwrap_err();
353 let msg = format!("{err}");
354 assert!(msg.contains("title"), "error should mention title: {msg}");
355 }
356
357 #[test]
358 fn test_validate_accepts_finding_with_normal_title() {
359 let v = Validator::new();
360 let output = output_with_findings(vec![Finding {
361 severity: Severity::Info,
362 title: "Security vulnerability".to_string(),
363 detail: "detail".to_string(),
364 }]);
365 assert!(v.validate(&output).is_ok());
366 }
367
368 #[test]
371 fn test_validate_rejects_reasoning_exceeding_max_text_len() {
372 let v = Validator::new();
373 let mut output = valid_agent_output();
374 output.reasoning = "x".repeat(50_001);
375 let err = v.validate(&output).unwrap_err();
376 let msg = format!("{err}");
377 assert!(
378 msg.contains("reasoning"),
379 "error should mention reasoning: {msg}"
380 );
381 }
382
383 #[test]
384 fn test_validate_rejects_summary_exceeding_max_text_len() {
385 let v = Validator::new();
386 let mut output = valid_agent_output();
387 output.summary = "x".repeat(50_001);
388 let err = v.validate(&output).unwrap_err();
389 let msg = format!("{err}");
390 assert!(
391 msg.contains("summary"),
392 "error should mention summary: {msg}"
393 );
394 }
395
396 #[test]
397 fn test_validate_rejects_recommendation_exceeding_max_text_len() {
398 let v = Validator::new();
399 let mut output = valid_agent_output();
400 output.recommendation = "x".repeat(50_001);
401 let err = v.validate(&output).unwrap_err();
402 let msg = format!("{err}");
403 assert!(
404 msg.contains("recommendation"),
405 "error should mention recommendation: {msg}"
406 );
407 }
408
409 #[test]
412 fn test_validate_rejects_findings_count_exceeding_max_findings() {
413 let v = Validator::new();
414 let findings: Vec<Finding> = (0..101)
415 .map(|i| Finding {
416 severity: Severity::Info,
417 title: format!("Finding {i}"),
418 detail: "detail".to_string(),
419 })
420 .collect();
421 let output = output_with_findings(findings);
422 let err = v.validate(&output).unwrap_err();
423 let msg = format!("{err}");
424 assert!(
425 msg.contains("findings"),
426 "error should mention findings: {msg}"
427 );
428 }
429
430 #[test]
431 fn test_validate_rejects_finding_title_exceeding_max_title_len() {
432 let v = Validator::new();
433 let output = output_with_findings(vec![Finding {
434 severity: Severity::Warning,
435 title: "x".repeat(501),
436 detail: "detail".to_string(),
437 }]);
438 let err = v.validate(&output).unwrap_err();
439 let msg = format!("{err}");
440 assert!(msg.contains("title"), "error should mention title: {msg}");
441 }
442
443 #[test]
444 fn test_validate_rejects_finding_detail_exceeding_max_detail_len() {
445 let v = Validator::new();
446 let output = output_with_findings(vec![Finding {
447 severity: Severity::Info,
448 title: "Valid title".to_string(),
449 detail: "x".repeat(10_001),
450 }]);
451 let err = v.validate(&output).unwrap_err();
452 let msg = format!("{err}");
453 assert!(msg.contains("detail"), "error should mention detail: {msg}");
454 }
455
456 #[test]
459 fn test_validate_accepts_valid_agent_output() {
460 let v = Validator::new();
461 assert!(v.validate(&valid_agent_output()).is_ok());
462 }
463
464 #[test]
467 fn test_validate_strips_zero_width_characters_via_clean_title() {
468 let v = Validator::new();
469 let output = output_with_findings(vec![Finding {
471 severity: Severity::Info,
472 title: "Hello\u{200B}World\u{FEFF}Test\u{200C}End".to_string(),
473 detail: "detail".to_string(),
474 }]);
475 assert!(
476 v.validate(&output).is_ok(),
477 "valid title after clean should pass"
478 );
479 }
480
481 #[test]
484 fn test_validation_order_confidence_checked_before_text_fields() {
485 let v = Validator::new();
486 let mut output = valid_agent_output();
487 output.confidence = 2.0;
488 output.summary = "x".repeat(50_001);
489 let err = v.validate(&output).unwrap_err();
490 let msg = format!("{err}");
491 assert!(
492 msg.contains("confidence"),
493 "confidence should be checked first, got: {msg}"
494 );
495 }
496
497 #[test]
498 fn test_validation_order_summary_checked_before_reasoning() {
499 let v = Validator::new();
500 let mut output = valid_agent_output();
501 output.summary = "x".repeat(50_001);
502 output.reasoning = "x".repeat(50_001);
503 let err = v.validate(&output).unwrap_err();
504 let msg = format!("{err}");
505 assert!(
506 msg.contains("summary"),
507 "summary should be checked before reasoning, got: {msg}"
508 );
509 }
510
511 #[test]
512 fn test_validation_order_recommendation_checked_before_findings() {
513 let v = Validator::new();
514 let mut output = valid_agent_output();
515 output.recommendation = "x".repeat(50_001);
516 output.findings = (0..101)
517 .map(|i| Finding {
518 severity: Severity::Info,
519 title: format!("Finding {i}"),
520 detail: "detail".to_string(),
521 })
522 .collect();
523 let err = v.validate(&output).unwrap_err();
524 let msg = format!("{err}");
525 assert!(
526 msg.contains("recommendation"),
527 "recommendation should be checked before findings, got: {msg}"
528 );
529 }
530
531 #[test]
534 fn test_title_length_checked_after_strip_zero_width() {
535 let limits = ValidationLimits {
536 max_title_len: 5,
537 ..ValidationLimits::default()
538 };
539 let v = Validator::with_limits(limits);
540 let output = output_with_findings(vec![Finding {
542 severity: Severity::Info,
543 title: "He\u{200B}l\u{FEFF}lo\u{200C}".to_string(),
544 detail: "detail".to_string(),
545 }]);
546 assert!(v.validate(&output).is_ok());
547 }
548
549 fn finding_with_title(title: &str) -> Finding {
552 Finding {
553 severity: Severity::Info,
554 title: title.to_string(),
555 detail: "some detail".to_string(),
556 }
557 }
558
559 #[test]
560 fn test_validate_mut_replaces_title_with_cleaned_form() {
561 let v = Validator::new();
562 let mut output = output_with_findings(vec![finding_with_title(" Issue\t\u{200b}Title ")]);
563 v.validate_mut(&mut output).unwrap();
564 assert_eq!(output.findings[0].title, "Issue Title");
565 }
566
567 #[test]
568 fn test_validate_mut_strips_zero_width_from_titles() {
569 let v = Validator::new();
570 let mut output = output_with_findings(vec![finding_with_title("Good\u{200b}Title")]);
571 v.validate_mut(&mut output).unwrap();
572 assert_eq!(output.findings[0].title, "GoodTitle");
573 }
574
575 #[test]
576 fn test_validate_mut_collapses_control_whitespace_in_titles() {
577 let v = Validator::new();
578 let mut output = output_with_findings(vec![finding_with_title("Bad\tTitle")]);
579 v.validate_mut(&mut output).unwrap();
580 assert_eq!(output.findings[0].title, "Bad Title");
581 }
582
583 #[test]
584 fn test_validate_mut_preserves_order_of_findings() {
585 let v = Validator::new();
586 let titles = ["Alpha\u{200b}One", "Beta\tTwo", " Gamma Three "];
587 let mut output =
588 output_with_findings(titles.iter().map(|t| finding_with_title(t)).collect());
589 v.validate_mut(&mut output).unwrap();
590 assert_eq!(output.findings[0].title, "AlphaOne");
591 assert_eq!(output.findings[1].title, "Beta Two");
592 assert_eq!(output.findings[2].title, "Gamma Three");
593 }
594
595 #[test]
596 fn test_validate_retains_original_behavior_on_immutable_slice() {
597 let v = Validator::new();
598 let output = output_with_findings(vec![finding_with_title("Normal Title")]);
599 let original_title = output.findings[0].title.clone();
601 v.validate(&output).unwrap();
602 assert_eq!(output.findings[0].title, original_title);
603 }
604
605 #[test]
608 fn test_clean_title_replaces_tab_with_space() {
609 assert_eq!(clean_title("foo\tbar"), "foo bar");
610 }
611
612 #[test]
613 fn test_clean_title_replaces_newline_with_space() {
614 assert_eq!(clean_title("foo\nbar"), "foo bar");
615 }
616
617 #[test]
618 fn test_clean_title_replaces_vertical_tab_with_space() {
619 assert_eq!(clean_title("foo\x0Bbar"), "foo bar");
620 }
621
622 #[test]
623 fn test_clean_title_replaces_carriage_return_with_space() {
624 assert_eq!(clean_title("foo\rbar"), "foo bar");
625 }
626
627 #[test]
628 fn test_clean_title_replaces_nel_u0085_with_space() {
629 assert_eq!(clean_title("foo\u{85}bar"), "foo bar");
630 }
631
632 #[test]
633 fn test_clean_title_strips_zero_width_space_u200b() {
634 assert_eq!(clean_title("a\u{200b}b"), "ab");
635 }
636
637 #[test]
638 fn test_clean_title_strips_zwnj_u200c() {
639 assert_eq!(clean_title("a\u{200c}b"), "ab");
640 }
641
642 #[test]
643 fn test_clean_title_strips_zwj_u200d() {
644 assert_eq!(clean_title("a\u{200d}b"), "ab");
645 }
646
647 #[test]
648 fn test_clean_title_strips_lrm_rlm_u200e_u200f() {
649 assert_eq!(clean_title("a\u{200e}b\u{200f}c"), "abc");
650 }
651
652 #[test]
653 fn test_clean_title_strips_line_separator_u2028() {
654 assert_eq!(clean_title("a\u{2028}b"), "ab");
655 }
656
657 #[test]
658 fn test_clean_title_strips_paragraph_separator_u2029() {
659 assert_eq!(clean_title("a\u{2029}b"), "ab");
660 }
661
662 #[test]
663 fn test_clean_title_strips_narrow_nbsp_u202f() {
664 assert_eq!(clean_title("a\u{202f}b"), "ab");
665 }
666
667 #[test]
668 fn test_clean_title_strips_bidi_override_u202a_through_u202e() {
669 for cp in ['\u{202a}', '\u{202b}', '\u{202c}', '\u{202d}', '\u{202e}'] {
670 let input = format!("a{cp}b");
671 assert_eq!(clean_title(&input), "ab", "failed for U+{:04X}", cp as u32);
672 }
673 }
674
675 #[test]
676 fn test_clean_title_strips_word_joiner_u2060() {
677 assert_eq!(clean_title("a\u{2060}b"), "ab");
678 }
679
680 #[test]
681 fn test_clean_title_strips_bom_ufeff() {
682 assert_eq!(clean_title("\u{feff}hello"), "hello");
683 }
684
685 #[test]
686 fn test_clean_title_strips_soft_hyphen_u00ad() {
687 assert_eq!(clean_title("soft\u{00ad}hyphen"), "softhyphen");
688 }
689
690 #[test]
691 fn test_clean_title_trims_leading_trailing_spaces() {
692 assert_eq!(clean_title(" hello "), "hello");
693 }
694
695 #[test]
696 fn test_clean_title_trims_leading_trailing_tabs_after_replacement() {
697 assert_eq!(clean_title("\thello\t"), "hello");
699 }
700
701 #[test]
702 fn test_clean_title_preserves_interior_single_spaces() {
703 assert_eq!(clean_title("hello world"), "hello world");
704 }
705
706 #[test]
707 fn test_clean_title_does_not_collapse_double_spaces_interior() {
708 assert_eq!(clean_title("foo bar"), "foo bar");
710 }
711
712 #[test]
713 fn test_clean_title_preserves_unicode_letters() {
714 assert_eq!(clean_title("café"), "café");
715 }
716
717 #[test]
718 fn test_clean_title_empty_string_returns_empty() {
719 assert_eq!(clean_title(""), "");
720 }
721
722 #[test]
723 fn test_clean_title_all_whitespace_returns_empty() {
724 assert_eq!(clean_title(" \t\n "), "");
725 }
726
727 #[test]
728 fn test_clean_title_is_idempotent() {
729 let inputs = [
730 "hello\nworld",
731 " \u{200b}spaces\u{feff} ",
732 "café\u{2060}",
733 "normal text",
734 "",
735 ];
736 for input in inputs {
737 let once = clean_title(input);
738 let twice = clean_title(&once);
739 assert_eq!(once, twice, "not idempotent for input: {input:?}");
740 }
741 }
742
743 #[test]
745 fn test_validate_mut_atomic_no_partial_mutation_on_error() {
746 let limits = ValidationLimits {
747 max_title_len: 5,
748 ..ValidationLimits::default()
749 };
750 let v = Validator::with_limits(limits);
751
752 let long_title_after_clean = "toolong"; let mut output = output_with_findings(vec![
757 Finding {
758 severity: Severity::Info,
759 title: "A\u{200b}B".to_string(), detail: "detail".to_string(),
761 },
762 Finding {
763 severity: Severity::Info,
764 title: "C\u{200b}D".to_string(), detail: "detail".to_string(),
766 },
767 Finding {
768 severity: Severity::Info,
769 title: long_title_after_clean.to_string(),
770 detail: "detail".to_string(),
771 },
772 ]);
773
774 let orig0 = output.findings[0].title.clone();
776 let orig1 = output.findings[1].title.clone();
777 let orig2 = output.findings[2].title.clone();
778
779 let result = v.validate_mut(&mut output);
781 assert!(result.is_err(), "should fail on over-length title");
782
783 assert_eq!(
785 output.findings[0].title, orig0,
786 "finding 0 title must not be mutated on error"
787 );
788 assert_eq!(
789 output.findings[1].title, orig1,
790 "finding 1 title must not be mutated on error"
791 );
792 assert_eq!(
793 output.findings[2].title, orig2,
794 "finding 2 title must not be mutated on error"
795 );
796 }
797}