1#![forbid(unsafe_code)]
33#![warn(missing_docs)]
34
35use std::collections::BTreeMap;
36
37use ethos_core::codes::WarningCode;
38use ethos_core::grounding::{
39 CoordinateOrigin, GroundingCell, GroundingElement, GroundingSource, GroundingSpan,
40 GroundingTable, PageGeometry,
41};
42use ethos_core::verify_types::{
43 compute_all_evidence_grounded, CapabilityLimit, Check, CheckReason, CheckStatus, Claim,
44 ClaimKind, Evidence, GroundingMeta, MatchMethod, TextNormalization, VerificationConfig,
45 VerificationReport,
46};
47use serde::{Deserialize, Serialize};
48
49#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
55#[serde(untagged)]
56pub enum CitationInput {
57 Claims(Vec<Claim>),
59 Envelope(CitationEnvelope),
61}
62
63#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
65#[serde(deny_unknown_fields)]
66pub struct CitationEnvelope {
67 #[serde(default)]
69 pub document_fingerprint: Option<String>,
70 pub claims: Vec<Claim>,
72}
73
74impl CitationInput {
75 pub fn claims(&self) -> &[Claim] {
77 match self {
78 CitationInput::Claims(claims) => claims,
79 CitationInput::Envelope(envelope) => &envelope.claims,
80 }
81 }
82
83 pub fn document_fingerprint(&self) -> Option<&str> {
85 match self {
86 CitationInput::Claims(_) => None,
87 CitationInput::Envelope(envelope) => envelope.document_fingerprint.as_deref(),
88 }
89 }
90
91 fn into_parts(self) -> (Option<String>, Vec<Claim>) {
92 match self {
93 CitationInput::Claims(claims) => (None, claims),
94 CitationInput::Envelope(envelope) => (envelope.document_fingerprint, envelope.claims),
95 }
96 }
97}
98
99pub fn capability_warnings(
103 source: &dyn GroundingSource,
104 config: &VerificationConfig,
105) -> Vec<WarningCode> {
106 if capability_limits(source, config).is_empty() {
107 Vec::new()
108 } else {
109 vec![WarningCode::CapabilityLimited]
110 }
111}
112
113pub fn capability_limits(
117 source: &dyn GroundingSource,
118 config: &VerificationConfig,
119) -> Vec<CapabilityLimit> {
120 capability_limits_for(source.capabilities(), config)
121}
122
123fn capability_limits_for(
124 caps: ethos_core::grounding::Capabilities,
125 config: &VerificationConfig,
126) -> Vec<CapabilityLimit> {
127 let mut limits = Vec::new();
128 if !caps.fingerprint && config.staleness.require_fingerprint_match {
129 limits.push(CapabilityLimit::MissingFingerprint);
130 }
131 if !caps.spans {
132 limits.push(CapabilityLimit::MissingSpans);
133 }
134 if !caps.char_offsets {
135 limits.push(CapabilityLimit::MissingCharOffsets);
136 }
137 if !caps.tables && config.claim_kinds.contains(&ClaimKind::TableCell) {
138 limits.push(CapabilityLimit::MissingTables);
139 }
140 if caps.coordinate_origin == CoordinateOrigin::Unknown {
141 limits.push(CapabilityLimit::UnknownCoordinateOrigin);
142 }
143 if config.evidence.is_some_and(|e| e.include_crops) && !caps.crop_support {
144 limits.push(CapabilityLimit::MissingCropSupport);
145 }
146 limits
147}
148
149fn push_warning(warnings: &mut Vec<WarningCode>, warning: WarningCode) {
150 if !warnings.contains(&warning) {
151 warnings.push(warning);
152 }
153}
154
155pub fn verify_claims(
157 source: &dyn GroundingSource,
158 citations: CitationInput,
159 config: &VerificationConfig,
160 config_sha256: String,
161) -> VerificationReport {
162 let (citation_fingerprint, claims) = citations.into_parts();
163 let index = SourceIndex::new(source);
164 let source_fingerprint = source.fingerprint();
165 let capability_limits = capability_limits_for(index.capabilities, config);
166 let warnings = if capability_limits.is_empty() {
167 Vec::new()
168 } else {
169 vec![WarningCode::CapabilityLimited]
170 };
171 let fingerprint_stale = config.staleness.require_fingerprint_match
172 && matches!(
173 (citation_fingerprint.as_deref(), source_fingerprint.as_deref()),
174 (Some(expected), Some(actual)) if expected != actual
175 );
176 let fingerprint_unverifiable = config.staleness.require_fingerprint_match
177 && citation_fingerprint.is_some()
178 && source_fingerprint.is_none();
179 let citation_fingerprint_missing = config.staleness.require_fingerprint_match
180 && citation_fingerprint.is_none()
181 && source_fingerprint.is_some();
182 let include_text = config.evidence.is_some_and(|e| e.include_text);
183 let include_crops = config.evidence.is_some_and(|e| e.include_crops);
184 let mut unsupported = Vec::new();
185 let checks: Vec<Check> = claims
186 .into_iter()
187 .enumerate()
188 .map(|(idx, claim)| {
189 check_claim(
190 idx + 1,
191 source,
192 &index,
193 claim,
194 config,
195 CheckContext {
196 fingerprint_stale,
197 fingerprint_unverifiable,
198 citation_fingerprint_missing,
199 include_text,
200 include_crops,
201 },
202 &mut unsupported,
203 )
204 })
205 .collect();
206
207 VerificationReport {
208 schema_version: ethos_core::SCHEMA_VERSION.to_string(),
209 document_fingerprint: source_fingerprint,
210 verification_config_sha256: config_sha256,
211 grounding: GroundingMeta {
212 parser: source.parser(),
213 capabilities: index.capabilities,
214 },
215 capability_limits,
216 fingerprint_stale,
217 all_evidence_grounded: compute_all_evidence_grounded(
218 &checks,
219 &unsupported,
220 fingerprint_stale,
221 ),
222 checks,
223 unsupported_claim_kinds: unsupported,
224 warnings,
225 }
226}
227
228#[derive(Debug, Clone, Copy)]
229struct CheckContext {
230 fingerprint_stale: bool,
231 fingerprint_unverifiable: bool,
232 citation_fingerprint_missing: bool,
233 include_text: bool,
234 include_crops: bool,
235}
236
237fn check_claim(
238 id: usize,
239 source: &dyn GroundingSource,
240 index: &SourceIndex,
241 claim: Claim,
242 config: &VerificationConfig,
243 context: CheckContext,
244 unsupported: &mut Vec<String>,
245) -> Check {
246 let mut warnings = Vec::new();
247 let check_id = format!("v{id:04}");
248
249 if !claim.citation.has_locator() {
250 return Check {
251 id: check_id,
252 claim,
253 status: CheckStatus::Error,
254 reason: Some(CheckReason::MissingLocator),
255 match_method: MatchMethod::None,
256 semantic_unverified: false,
257 evidence: None,
258 warnings,
259 };
260 }
261
262 if !is_supported_kind(claim.kind) || !config.claim_kinds.contains(&claim.kind) {
263 push_unsupported(unsupported, claim.kind);
264 return Check {
265 id: check_id,
266 claim,
267 status: CheckStatus::UnsupportedClaimKind,
268 reason: Some(CheckReason::UnsupportedClaimKind),
269 match_method: MatchMethod::None,
270 semantic_unverified: false,
271 evidence: None,
272 warnings,
273 };
274 }
275
276 if requires_text(claim.kind)
277 && claim
278 .text
279 .as_deref()
280 .is_none_or(|text| text.trim().is_empty())
281 {
282 return Check {
283 id: check_id,
284 claim,
285 status: CheckStatus::Error,
286 reason: Some(CheckReason::MissingRequiredText),
287 match_method: MatchMethod::None,
288 semantic_unverified: false,
289 evidence: None,
290 warnings,
291 };
292 }
293
294 if context.fingerprint_stale {
295 return Check {
296 id: check_id,
297 claim,
298 status: CheckStatus::Stale,
299 reason: Some(CheckReason::StaleFingerprint),
300 match_method: MatchMethod::None,
301 semantic_unverified: false,
302 evidence: None,
303 warnings,
304 };
305 }
306
307 if context.fingerprint_unverifiable {
308 push_warning(&mut warnings, WarningCode::CapabilityLimited);
309 return Check {
310 id: check_id,
311 claim,
312 status: CheckStatus::CapabilityBlocked,
313 reason: Some(CheckReason::MissingSourceFingerprint),
314 match_method: MatchMethod::None,
315 semantic_unverified: false,
316 evidence: None,
317 warnings,
318 };
319 }
320
321 if context.citation_fingerprint_missing {
322 return Check {
323 id: check_id,
324 claim,
325 status: CheckStatus::Stale,
326 reason: Some(CheckReason::MissingCitationFingerprint),
327 match_method: MatchMethod::None,
328 semantic_unverified: false,
329 evidence: None,
330 warnings,
331 };
332 }
333
334 let mut target = match resolve_target(index, &claim, config) {
335 TargetResolution::Found(target) => target,
336 TargetResolution::NotFound(reason) => {
337 return Check {
338 id: check_id,
339 claim,
340 status: CheckStatus::NotFound,
341 reason: Some(reason),
342 match_method: MatchMethod::None,
343 semantic_unverified: false,
344 evidence: None,
345 warnings,
346 };
347 }
348 TargetResolution::Invalid(reason) => {
349 return Check {
350 id: check_id,
351 claim,
352 status: CheckStatus::Error,
353 reason: Some(reason),
354 match_method: MatchMethod::None,
355 semantic_unverified: false,
356 evidence: None,
357 warnings,
358 };
359 }
360 TargetResolution::CapabilityBlocked(reason) => {
361 push_warning(&mut warnings, WarningCode::CapabilityLimited);
362 return Check {
363 id: check_id,
364 claim,
365 status: CheckStatus::CapabilityBlocked,
366 reason: Some(reason),
367 match_method: MatchMethod::None,
368 semantic_unverified: false,
369 evidence: None,
370 warnings,
371 };
372 }
373 };
374
375 if let Some(adjacent_target) = adjacent_quote_target(index, &claim, &target, config) {
376 target = adjacent_target;
377 }
378
379 let evidence = make_evidence(source, &target, context.include_text, context.include_crops);
380 let (status, match_method, reason) =
381 check_resolved_claim(claim.kind, claim.text.as_deref(), &target, config);
382 Check {
383 id: check_id,
384 claim,
385 status,
386 reason,
387 match_method,
388 semantic_unverified: false,
389 evidence,
390 warnings,
391 }
392}
393
394fn check_resolved_claim(
395 kind: ClaimKind,
396 expected_text: Option<&str>,
397 target: &FoundTarget,
398 config: &VerificationConfig,
399) -> (CheckStatus, MatchMethod, Option<CheckReason>) {
400 match kind {
401 ClaimKind::Presence => check_presence_claim(),
402 ClaimKind::Quote | ClaimKind::Value | ClaimKind::TableCell => {
403 check_text_claim(kind, expected_text, target, config)
404 }
405 _ => unreachable!("unsupported kinds returned before matching"),
406 }
407}
408
409fn check_presence_claim() -> (CheckStatus, MatchMethod, Option<CheckReason>) {
410 (CheckStatus::Grounded, MatchMethod::PresenceOnly, None)
411}
412
413fn check_text_claim(
414 kind: ClaimKind,
415 expected_text: Option<&str>,
416 target: &FoundTarget,
417 config: &VerificationConfig,
418) -> (CheckStatus, MatchMethod, Option<CheckReason>) {
419 let match_method = if target.from_table_cell {
420 MatchMethod::TableCellLookup
421 } else {
422 text_match_method(kind, config)
423 };
424 let (status, reason) = match (expected_text, target.text.as_deref()) {
425 (Some(expected), Some(actual)) if text_matches(kind, expected, actual, config) => {
426 (CheckStatus::Grounded, None)
427 }
428 _ => (CheckStatus::Mismatch, Some(CheckReason::TextMismatch)),
429 };
430 (status, match_method, reason)
431}
432
433fn is_supported_kind(kind: ClaimKind) -> bool {
434 matches!(
435 kind,
436 ClaimKind::Quote | ClaimKind::Value | ClaimKind::Presence | ClaimKind::TableCell
437 )
438}
439
440fn requires_text(kind: ClaimKind) -> bool {
441 matches!(
442 kind,
443 ClaimKind::Quote | ClaimKind::Value | ClaimKind::TableCell
444 )
445}
446
447fn push_unsupported(unsupported: &mut Vec<String>, kind: ClaimKind) {
448 let name = claim_kind_name(kind).to_string();
449 if !unsupported.contains(&name) {
450 unsupported.push(name);
451 }
452}
453
454fn claim_kind_name(kind: ClaimKind) -> &'static str {
455 match kind {
456 ClaimKind::Quote => "quote",
457 ClaimKind::Value => "value",
458 ClaimKind::Presence => "presence",
459 ClaimKind::TableCell => "table_cell",
460 ClaimKind::Region => "region",
461 ClaimKind::Other => "other",
462 }
463}
464
465#[derive(Debug, Clone)]
466struct FoundTarget {
467 page: Option<String>,
468 bbox: Option<[i64; 4]>,
469 text: Option<String>,
470 from_table_cell: bool,
471 element_index: Option<usize>,
472}
473
474struct SourceIndex {
480 capabilities: ethos_core::grounding::Capabilities,
481 pages: Vec<PageGeometry>,
482 elements: Vec<GroundingElement>,
483 spans: Vec<GroundingSpan>,
484 tables: Vec<GroundingTable>,
485 element_by_id: BTreeMap<String, usize>,
486 span_by_id: BTreeMap<String, usize>,
487 table_by_id: BTreeMap<String, usize>,
488}
489
490impl SourceIndex {
491 fn new(source: &dyn GroundingSource) -> Self {
492 let capabilities = source.capabilities();
493 let pages = source.pages();
494 let elements = source.elements();
495 let spans = if capabilities.spans {
496 source.spans()
497 } else {
498 Vec::new()
499 };
500 let tables = if capabilities.tables {
501 source.tables()
502 } else {
503 Vec::new()
504 };
505 let element_by_id = index_elements(&elements);
506 let span_by_id = index_spans(&spans);
507 let table_by_id = index_tables(&tables);
508
509 SourceIndex {
510 capabilities,
511 pages,
512 elements,
513 spans,
514 tables,
515 element_by_id,
516 span_by_id,
517 table_by_id,
518 }
519 }
520
521 fn span(&self, id: &str) -> Option<&GroundingSpan> {
522 self.span_by_id
523 .get(id)
524 .and_then(|index| self.spans.get(*index))
525 }
526
527 fn table(&self, id: &str) -> Option<&GroundingTable> {
528 self.table_by_id
529 .get(id)
530 .and_then(|index| self.tables.get(*index))
531 }
532}
533
534fn index_elements(elements: &[GroundingElement]) -> BTreeMap<String, usize> {
535 let mut index = BTreeMap::new();
536 for (position, element) in elements.iter().enumerate() {
537 index.entry(element.id.clone()).or_insert(position);
538 }
539 index
540}
541
542fn index_spans(spans: &[GroundingSpan]) -> BTreeMap<String, usize> {
543 let mut index = BTreeMap::new();
544 for (position, span) in spans.iter().enumerate() {
545 index.entry(span.id.clone()).or_insert(position);
546 }
547 index
548}
549
550fn index_tables(tables: &[GroundingTable]) -> BTreeMap<String, usize> {
551 let mut index = BTreeMap::new();
552 for (position, table) in tables.iter().enumerate() {
553 index.entry(table.id.clone()).or_insert(position);
554 }
555 index
556}
557
558enum TargetResolution {
559 Found(FoundTarget),
560 NotFound(CheckReason),
561 Invalid(CheckReason),
562 CapabilityBlocked(CheckReason),
563}
564
565fn resolve_target(
566 index: &SourceIndex,
567 claim: &Claim,
568 config: &VerificationConfig,
569) -> TargetResolution {
570 if claim.kind == ClaimKind::TableCell
571 || claim.citation.table_id.is_some()
572 || claim.citation.cell.is_some()
573 {
574 return resolve_table_cell(index, claim);
575 }
576
577 if let Some(span_id) = claim.citation.span_id.as_deref() {
578 if !index.capabilities.spans {
579 return TargetResolution::CapabilityBlocked(CheckReason::MissingSpanCapability);
580 }
581 return index
582 .span(span_id)
583 .map(target_from_span)
584 .map(TargetResolution::Found)
585 .unwrap_or(TargetResolution::NotFound(CheckReason::SpanNotFound));
586 }
587
588 if let Some(element_id) = claim.citation.element_id.as_deref() {
589 return index
590 .element_by_id
591 .get(element_id)
592 .and_then(|position| {
593 index
594 .elements
595 .get(*position)
596 .map(|element| (*position, element))
597 })
598 .map(|(position, element)| target_from_element(element, Some(position)))
599 .map(TargetResolution::Found)
600 .unwrap_or(TargetResolution::NotFound(CheckReason::ElementNotFound));
601 }
602
603 if let (Some(page), Some(bbox)) = (claim.citation.page.as_deref(), claim.citation.bbox) {
604 if index.capabilities.coordinate_origin == CoordinateOrigin::Unknown {
605 return TargetResolution::CapabilityBlocked(CheckReason::UnknownCoordinateOrigin);
606 }
607 let tolerance = config.matching.bbox_containment_tolerance_q.unwrap_or(0);
608 return index
609 .elements
610 .iter()
611 .enumerate()
612 .filter(|(_, element)| {
613 element.page == page && contains_bbox(element.bbox, bbox, tolerance)
614 })
615 .min_by_key(|(position, element)| (bbox_area(element.bbox), *position))
616 .map(|(position, element)| target_from_element(element, Some(position)))
617 .map(TargetResolution::Found)
618 .unwrap_or(TargetResolution::NotFound(CheckReason::BboxNotFound));
619 }
620
621 if claim.citation.bbox.is_some() {
622 return TargetResolution::Invalid(CheckReason::MissingPageForBbox);
623 }
624
625 if let Some(page) = claim.citation.page.as_deref() {
626 return index
627 .pages
628 .iter()
629 .find(|candidate| candidate.id == page)
630 .map(|found| {
631 TargetResolution::Found(FoundTarget {
632 page: Some(found.id.clone()),
633 bbox: Some([0, 0, found.width, found.height]),
634 text: None,
635 from_table_cell: false,
636 element_index: None,
637 })
638 })
639 .unwrap_or(TargetResolution::NotFound(CheckReason::PageNotFound));
640 }
641
642 TargetResolution::NotFound(CheckReason::MissingLocator)
643}
644
645fn target_from_element(element: &GroundingElement, element_index: Option<usize>) -> FoundTarget {
646 FoundTarget {
647 page: Some(element.page.clone()),
648 bbox: Some(element.bbox),
649 text: element.text.clone(),
650 from_table_cell: false,
651 element_index,
652 }
653}
654
655fn target_from_span(span: &GroundingSpan) -> FoundTarget {
656 FoundTarget {
657 page: Some(span.page.clone()),
658 bbox: Some(span.bbox),
659 text: Some(span.text.clone()),
660 from_table_cell: false,
661 element_index: None,
662 }
663}
664
665fn resolve_table_cell(index: &SourceIndex, claim: &Claim) -> TargetResolution {
666 let Some(table_id) = claim.citation.table_id.as_deref() else {
667 return TargetResolution::Invalid(CheckReason::MissingTableCellLocator);
668 };
669 let Some(cell_ref) = claim.citation.cell else {
670 return TargetResolution::Invalid(CheckReason::MissingTableCellLocator);
671 };
672 if !index.capabilities.tables {
673 return TargetResolution::CapabilityBlocked(CheckReason::MissingTableCapability);
674 }
675 let Some(table) = index.table(table_id) else {
676 return TargetResolution::NotFound(CheckReason::TableNotFound);
677 };
678 target_from_table_cell(table, cell_ref.row, cell_ref.col)
679 .map(TargetResolution::Found)
680 .unwrap_or(TargetResolution::NotFound(CheckReason::TableCellNotFound))
681}
682
683fn target_from_table_cell(table: &GroundingTable, row: u32, col: u32) -> Option<FoundTarget> {
684 table
685 .cells
686 .iter()
687 .find(|cell| table_cell_covers(cell, row, col))
688 .map(|cell| target_from_cell(&table.page, cell))
689}
690
691fn table_cell_covers(cell: &GroundingCell, row: u32, col: u32) -> bool {
692 let row_end = cell.row.saturating_add(cell.row_span.max(1));
693 let col_end = cell.col.saturating_add(cell.col_span.max(1));
694 row >= cell.row && row < row_end && col >= cell.col && col < col_end
695}
696
697fn target_from_cell(page: &str, cell: &GroundingCell) -> FoundTarget {
698 FoundTarget {
699 page: Some(page.to_string()),
700 bbox: Some(cell.bbox),
701 text: Some(cell.text.clone()),
702 from_table_cell: true,
703 element_index: None,
704 }
705}
706
707fn adjacent_quote_target(
708 index: &SourceIndex,
709 claim: &Claim,
710 target: &FoundTarget,
711 config: &VerificationConfig,
712) -> Option<FoundTarget> {
713 if claim.kind != ClaimKind::Quote {
714 return None;
715 }
716 let expected = claim.text.as_deref()?;
717 if target
718 .text
719 .as_deref()
720 .is_some_and(|actual| text_matches(ClaimKind::Quote, expected, actual, config))
721 {
722 return None;
723 }
724
725 if claim.citation.bbox.is_some() {
726 return None;
727 }
728
729 if claim.citation.element_id.is_some() {
730 if let Some(position) = target.element_index {
731 return adjacent_text_pair_for_element(index, position, expected, config);
732 }
733 }
734
735 None
736}
737
738fn adjacent_text_pair_for_element(
739 index: &SourceIndex,
740 position: usize,
741 expected: &str,
742 config: &VerificationConfig,
743) -> Option<FoundTarget> {
744 let current = index.elements.get(position)?;
745 if let Some(second) = position
746 .checked_add(1)
747 .and_then(|next| index.elements.get(next))
748 {
749 if let Some(target) = adjacent_text_pair_target(current, second, expected, config) {
750 return Some(target);
751 }
752 }
753 position
754 .checked_sub(1)
755 .and_then(|previous| index.elements.get(previous))
756 .and_then(|first| adjacent_text_pair_target(first, current, expected, config))
757}
758
759fn adjacent_text_pair_target(
760 first: &GroundingElement,
761 second: &GroundingElement,
762 expected: &str,
763 config: &VerificationConfig,
764) -> Option<FoundTarget> {
765 if first.page != second.page {
766 return None;
767 }
768 if !element_bboxes_are_adjacent(first.bbox, second.bbox) {
769 return None;
770 }
771 let first_text = first.text.as_deref()?;
772 let second_text = second.text.as_deref()?;
773 let joined = join_adjacent_text(first_text, second_text, config);
774 if text_matches(ClaimKind::Quote, expected, first_text, config)
775 || text_matches(ClaimKind::Quote, expected, second_text, config)
776 || !text_matches(ClaimKind::Quote, expected, &joined, config)
777 {
778 return None;
779 }
780
781 Some(FoundTarget {
782 page: Some(first.page.clone()),
783 bbox: Some(union_bbox(first.bbox, second.bbox)),
784 text: Some(joined),
785 from_table_cell: false,
786 element_index: None,
787 })
788}
789
790fn join_adjacent_text(first: &str, second: &str, config: &VerificationConfig) -> String {
791 let joined = format!("{first} {second}");
792 match config.matching.text_normalization {
793 TextNormalization::None => joined,
794 TextNormalization::CollapseWhitespace => normalize_quote(&joined),
795 }
796}
797
798fn bbox_area(bbox: [i64; 4]) -> u128 {
799 let width = bbox[2].saturating_sub(bbox[0]).max(0) as u128;
800 let height = bbox[3].saturating_sub(bbox[1]).max(0) as u128;
801 width.saturating_mul(height)
802}
803
804fn element_bboxes_are_adjacent(first: [i64; 4], second: [i64; 4]) -> bool {
805 let same_line =
806 ranges_overlap_i64(first[1], first[3], second[1], second[3]) && first[2] == second[0];
807 let stacked =
808 ranges_overlap_i64(first[0], first[2], second[0], second[2]) && first[3] == second[1];
809 same_line || stacked
810}
811
812fn ranges_overlap_i64(a_start: i64, a_end: i64, b_start: i64, b_end: i64) -> bool {
813 a_start < b_end && b_start < a_end
814}
815
816fn union_bbox(left: [i64; 4], right: [i64; 4]) -> [i64; 4] {
817 [
818 left[0].min(right[0]),
819 left[1].min(right[1]),
820 left[2].max(right[2]),
821 left[3].max(right[3]),
822 ]
823}
824
825fn make_evidence(
826 source: &dyn GroundingSource,
827 target: &FoundTarget,
828 include_text: bool,
829 include_crops: bool,
830) -> Option<Evidence> {
831 let crop_ref = if include_crops && source.capabilities().crop_support {
832 target
833 .page
834 .as_deref()
835 .zip(target.bbox)
836 .and_then(|(page, bbox)| source.crop_ref(page, bbox))
837 } else {
838 None
839 };
840 Some(Evidence {
841 text: include_text.then(|| target.text.clone()).flatten(),
842 page: target.page.clone(),
843 bbox: target.bbox,
844 crop_ref,
845 })
846}
847
848fn contains_bbox(container: [i64; 4], inner: [i64; 4], tolerance: i64) -> bool {
849 inner[0] >= container[0] - tolerance
850 && inner[1] >= container[1] - tolerance
851 && inner[2] <= container[2] + tolerance
852 && inner[3] <= container[3] + tolerance
853}
854
855fn text_match_method(kind: ClaimKind, config: &VerificationConfig) -> MatchMethod {
856 match (kind, config.matching.text_normalization) {
857 (ClaimKind::Quote, TextNormalization::None) => MatchMethod::ExactTextContains,
858 (ClaimKind::Quote, TextNormalization::CollapseWhitespace) => {
859 MatchMethod::NormalizedTextContains
860 }
861 (_, TextNormalization::None) => MatchMethod::ExactText,
862 (_, TextNormalization::CollapseWhitespace) => MatchMethod::NormalizedText,
863 }
864}
865
866fn text_matches(
867 kind: ClaimKind,
868 expected: &str,
869 actual: &str,
870 config: &VerificationConfig,
871) -> bool {
872 let (mut expected, mut actual) = match config.matching.text_normalization {
873 TextNormalization::None => (expected.to_string(), actual.to_string()),
874 TextNormalization::CollapseWhitespace => {
875 (normalize_quote(expected), normalize_quote(actual))
876 }
877 };
878 if !config.matching.case_sensitive {
879 expected = expected.to_lowercase();
880 actual = actual.to_lowercase();
881 }
882 if kind == ClaimKind::Quote {
883 actual.contains(&expected)
884 } else {
885 actual == expected
886 }
887}
888
889pub fn normalize_quote(input: &str) -> String {
892 let line_normalized = input.replace("\r\n", "\n").replace('\r', "\n");
893 let mut out = String::with_capacity(line_normalized.len());
894 let mut in_ascii_ws = false;
895 for ch in line_normalized.chars() {
896 if ch.is_ascii_whitespace() {
897 if !in_ascii_ws {
898 out.push(' ');
899 in_ascii_ws = true;
900 }
901 } else {
902 out.push(ch);
903 in_ascii_ws = false;
904 }
905 }
906 out.trim().to_string()
907}
908
909#[cfg(test)]
910mod tests {
911 use super::*;
912 use ethos_core::grounding::{
913 Capabilities, GroundingCell, GroundingElement, GroundingSpan, GroundingTable, PageGeometry,
914 ParserIdentity,
915 };
916 use ethos_core::verify_types::{CapabilityLimit, CellRef, Citation, Claim};
917
918 #[derive(Clone)]
919 struct TestSource {
920 caps: Capabilities,
921 fingerprint: Option<String>,
922 crop_ref: Option<String>,
923 }
924
925 impl Default for TestSource {
926 fn default() -> Self {
927 Self {
928 caps: Capabilities {
929 spans: true,
930 char_offsets: true,
931 tables: true,
932 fingerprint: true,
933 coordinate_origin: CoordinateOrigin::TopLeft,
934 crop_support: false,
935 },
936 fingerprint: Some(
937 "sha256:b5d30710d0c25cc38d8dec924ecaf57ae4f81276dd5dc14d75cb3b5b6bde62d3"
938 .into(),
939 ),
940 crop_ref: None,
941 }
942 }
943 }
944
945 impl GroundingSource for TestSource {
946 fn parser(&self) -> ParserIdentity {
947 ParserIdentity {
948 name: "test-parser".into(),
949 version: "0.1.0".into(),
950 adapter: None,
951 adapter_version: None,
952 }
953 }
954 fn capabilities(&self) -> Capabilities {
955 self.caps
956 }
957 fn fingerprint(&self) -> Option<String> {
958 self.fingerprint.clone()
959 }
960 fn pages(&self) -> Vec<PageGeometry> {
961 vec![PageGeometry {
962 id: "p0001".into(),
963 index: 1,
964 width: 61200,
965 height: 79200,
966 rotation: 0,
967 }]
968 }
969 fn elements(&self) -> Vec<GroundingElement> {
970 vec![
971 GroundingElement {
972 id: "e000002".into(),
973 page: "p0001".into(),
974 bbox: [7200, 10100, 54000, 11500],
975 kind: "text_block".into(),
976 text: Some(
977 "Revenue grew to $12.4M in Q3 2025, driven by enterprise expansion.".into(),
978 ),
979 },
980 GroundingElement {
981 id: "e000003".into(),
982 page: "p0001".into(),
983 bbox: [7200, 13000, 54000, 20000],
984 kind: "table".into(),
985 text: None,
986 },
987 ]
988 }
989 fn spans(&self) -> Vec<GroundingSpan> {
990 vec![GroundingSpan {
991 id: "s000002".into(),
992 page: "p0001".into(),
993 bbox: [7200, 10100, 54000, 11500],
994 text: "Revenue grew to $12.4M in Q3 2025".into(),
995 element: Some("e000002".into()),
996 char_start: Some(0),
997 char_end: Some(34),
998 }]
999 }
1000 fn tables(&self) -> Vec<GroundingTable> {
1001 vec![GroundingTable {
1002 id: "t0001".into(),
1003 page: "p0001".into(),
1004 bbox: [7200, 13000, 54000, 20000],
1005 cells: vec![
1006 GroundingCell {
1007 row: 0,
1008 col: 0,
1009 row_span: 1,
1010 col_span: 1,
1011 bbox: [7200, 13000, 30600, 16500],
1012 text: "Metric".into(),
1013 },
1014 GroundingCell {
1015 row: 1,
1016 col: 1,
1017 row_span: 1,
1018 col_span: 1,
1019 bbox: [30600, 16500, 54000, 20000],
1020 text: "$12.4M".into(),
1021 },
1022 ],
1023 }]
1024 }
1025 fn crop_ref(&self, page: &str, bbox: [i64; 4]) -> Option<String> {
1026 if page == "p0001" && bbox == [7200, 10100, 54000, 11500] {
1027 self.crop_ref.clone()
1028 } else {
1029 None
1030 }
1031 }
1032 }
1033
1034 struct ElementSource {
1035 elements: Vec<GroundingElement>,
1036 }
1037
1038 impl GroundingSource for ElementSource {
1039 fn parser(&self) -> ParserIdentity {
1040 ParserIdentity {
1041 name: "element-test-parser".into(),
1042 version: "0.1.0".into(),
1043 adapter: None,
1044 adapter_version: None,
1045 }
1046 }
1047 fn capabilities(&self) -> Capabilities {
1048 Capabilities {
1049 spans: true,
1050 char_offsets: true,
1051 tables: true,
1052 fingerprint: true,
1053 coordinate_origin: CoordinateOrigin::TopLeft,
1054 crop_support: false,
1055 }
1056 }
1057 fn fingerprint(&self) -> Option<String> {
1058 Some("sha256:b5d30710d0c25cc38d8dec924ecaf57ae4f81276dd5dc14d75cb3b5b6bde62d3".into())
1059 }
1060 fn pages(&self) -> Vec<PageGeometry> {
1061 vec![
1062 PageGeometry {
1063 id: "p0001".into(),
1064 index: 1,
1065 width: 61200,
1066 height: 79200,
1067 rotation: 0,
1068 },
1069 PageGeometry {
1070 id: "p0002".into(),
1071 index: 2,
1072 width: 61200,
1073 height: 79200,
1074 rotation: 0,
1075 },
1076 ]
1077 }
1078 fn elements(&self) -> Vec<GroundingElement> {
1079 self.elements.clone()
1080 }
1081 fn spans(&self) -> Vec<GroundingSpan> {
1082 Vec::new()
1083 }
1084 fn tables(&self) -> Vec<GroundingTable> {
1085 Vec::new()
1086 }
1087 }
1088
1089 fn claim(kind: ClaimKind, text: Option<&str>, citation: Citation) -> Claim {
1090 Claim {
1091 kind,
1092 text: text.map(str::to_string),
1093 citation,
1094 }
1095 }
1096
1097 fn input(source: &TestSource, claims: Vec<Claim>) -> CitationInput {
1098 CitationInput::Envelope(CitationEnvelope {
1099 document_fingerprint: source.fingerprint(),
1100 claims,
1101 })
1102 }
1103
1104 fn verify(source: &TestSource, claims: Vec<Claim>) -> VerificationReport {
1105 let cfg = VerificationConfig::default_v1();
1106 verify_claims(source, input(source, claims), &cfg, "0".repeat(64))
1107 }
1108
1109 fn verify_with_config(
1110 source: &TestSource,
1111 claims: Vec<Claim>,
1112 cfg: &VerificationConfig,
1113 ) -> VerificationReport {
1114 verify_claims(source, input(source, claims), cfg, "0".repeat(64))
1115 }
1116
1117 fn element(id: &str, page: &str, bbox: [i64; 4], text: Option<&str>) -> GroundingElement {
1118 GroundingElement {
1119 id: id.into(),
1120 page: page.into(),
1121 bbox,
1122 kind: "text_block".into(),
1123 text: text.map(str::to_string),
1124 }
1125 }
1126
1127 fn verify_elements(elements: Vec<GroundingElement>, claims: Vec<Claim>) -> VerificationReport {
1128 let source = ElementSource { elements };
1129 let cfg = VerificationConfig::default_v1();
1130 let citations = CitationInput::Envelope(CitationEnvelope {
1131 document_fingerprint: source.fingerprint(),
1132 claims,
1133 });
1134 verify_claims(&source, citations, &cfg, "0".repeat(64))
1135 }
1136
1137 #[test]
1138 fn quote_and_presence_claims_ground_with_literal_matching() {
1139 let source = TestSource::default();
1140 let report = verify(
1141 &source,
1142 vec![
1143 claim(
1144 ClaimKind::Quote,
1145 Some("Revenue grew to $12.4M in Q3 2025"),
1146 Citation {
1147 element_id: Some("e000002".into()),
1148 ..Default::default()
1149 },
1150 ),
1151 claim(
1152 ClaimKind::Presence,
1153 None,
1154 Citation {
1155 span_id: Some("s000002".into()),
1156 ..Default::default()
1157 },
1158 ),
1159 ],
1160 );
1161
1162 assert!(report.all_evidence_grounded);
1163 assert_eq!(report.checks.len(), 2);
1164 assert_eq!(report.capability_limits, Vec::<CapabilityLimit>::new());
1165 assert_eq!(report.checks[0].status, CheckStatus::Grounded);
1166 assert_eq!(
1167 report.checks[0].match_method,
1168 MatchMethod::NormalizedTextContains
1169 );
1170 assert_eq!(report.checks[1].status, CheckStatus::Grounded);
1171 assert_eq!(report.checks[1].match_method, MatchMethod::PresenceOnly);
1172 assert_eq!(
1173 report.checks[0]
1174 .evidence
1175 .as_ref()
1176 .and_then(|e| e.text.as_deref()),
1177 Some("Revenue grew to $12.4M in Q3 2025, driven by enterprise expansion.")
1178 );
1179 assert_eq!(report.warnings, Vec::<WarningCode>::new());
1180 }
1181
1182 #[test]
1183 fn quote_claim_grounds_across_adjacent_element_text_fragments() {
1184 let report = verify_elements(
1185 vec![
1186 element(
1187 "split-a",
1188 "p0001",
1189 [100, 100, 400, 200],
1190 Some("The alpha trust loop verifies "),
1191 ),
1192 element(
1193 "split-b",
1194 "p0001",
1195 [400, 100, 700, 200],
1196 Some("grounded evidence"),
1197 ),
1198 ],
1199 vec![claim(
1200 ClaimKind::Quote,
1201 Some("The alpha trust loop verifies grounded evidence"),
1202 Citation {
1203 element_id: Some("split-a".into()),
1204 ..Default::default()
1205 },
1206 )],
1207 );
1208
1209 assert!(report.all_evidence_grounded);
1210 assert_eq!(report.checks[0].status, CheckStatus::Grounded);
1211 assert_eq!(
1212 report.checks[0].match_method,
1213 MatchMethod::NormalizedTextContains
1214 );
1215 assert_eq!(
1216 report.checks[0]
1217 .evidence
1218 .as_ref()
1219 .and_then(|e| e.text.as_deref()),
1220 Some("The alpha trust loop verifies grounded evidence")
1221 );
1222 assert_eq!(
1223 report.checks[0].evidence.as_ref().and_then(|e| e.bbox),
1224 Some([100, 100, 700, 200])
1225 );
1226 }
1227
1228 #[test]
1229 fn quote_claim_page_only_locator_does_not_search_adjacent_fragments() {
1230 let report = verify_elements(
1231 vec![
1232 element(
1233 "split-a",
1234 "p0001",
1235 [100, 100, 400, 200],
1236 Some("The alpha trust loop verifies "),
1237 ),
1238 element(
1239 "split-b",
1240 "p0001",
1241 [400, 100, 700, 200],
1242 Some("grounded evidence"),
1243 ),
1244 ],
1245 vec![claim(
1246 ClaimKind::Quote,
1247 Some("The alpha trust loop verifies grounded evidence"),
1248 Citation {
1249 page: Some("p0001".into()),
1250 ..Default::default()
1251 },
1252 )],
1253 );
1254
1255 assert!(!report.all_evidence_grounded);
1256 assert_eq!(report.checks[0].status, CheckStatus::Mismatch);
1257 assert_eq!(report.checks[0].reason, Some(CheckReason::TextMismatch));
1258 }
1259
1260 #[test]
1261 fn quote_claim_grounds_when_element_id_points_to_second_adjacent_fragment() {
1262 let report = verify_elements(
1263 vec![
1264 element(
1265 "split-a",
1266 "p0001",
1267 [100, 100, 400, 200],
1268 Some("The alpha trust loop verifies "),
1269 ),
1270 element(
1271 "split-b",
1272 "p0001",
1273 [400, 100, 700, 200],
1274 Some("grounded evidence"),
1275 ),
1276 ],
1277 vec![claim(
1278 ClaimKind::Quote,
1279 Some("The alpha trust loop verifies grounded evidence"),
1280 Citation {
1281 element_id: Some("split-b".into()),
1282 ..Default::default()
1283 },
1284 )],
1285 );
1286
1287 assert!(report.all_evidence_grounded);
1288 assert_eq!(report.checks[0].status, CheckStatus::Grounded);
1289 assert_eq!(
1290 report.checks[0]
1291 .evidence
1292 .as_ref()
1293 .and_then(|e| e.text.as_deref()),
1294 Some("The alpha trust loop verifies grounded evidence")
1295 );
1296 assert_eq!(
1297 report.checks[0].evidence.as_ref().and_then(|e| e.bbox),
1298 Some([100, 100, 700, 200])
1299 );
1300 }
1301
1302 #[test]
1303 fn quote_claim_does_not_stitch_non_touching_element_bboxes() {
1304 let report = verify_elements(
1305 vec![
1306 element(
1307 "split-a",
1308 "p0001",
1309 [100, 100, 390, 200],
1310 Some("The alpha trust loop verifies "),
1311 ),
1312 element(
1313 "split-b",
1314 "p0001",
1315 [400, 100, 700, 200],
1316 Some("grounded evidence"),
1317 ),
1318 ],
1319 vec![claim(
1320 ClaimKind::Quote,
1321 Some("The alpha trust loop verifies grounded evidence"),
1322 Citation {
1323 element_id: Some("split-a".into()),
1324 ..Default::default()
1325 },
1326 )],
1327 );
1328
1329 assert!(!report.all_evidence_grounded);
1330 assert_eq!(report.checks[0].status, CheckStatus::Mismatch);
1331 assert_eq!(report.checks[0].reason, Some(CheckReason::TextMismatch));
1332 }
1333
1334 #[test]
1335 fn quote_claim_bbox_locator_does_not_expand_outside_cited_region() {
1336 let report = verify_elements(
1337 vec![
1338 element(
1339 "split-a",
1340 "p0001",
1341 [100, 100, 400, 200],
1342 Some("The alpha trust loop verifies "),
1343 ),
1344 element(
1345 "split-b",
1346 "p0001",
1347 [400, 100, 700, 200],
1348 Some("grounded evidence"),
1349 ),
1350 ],
1351 vec![claim(
1352 ClaimKind::Quote,
1353 Some("The alpha trust loop verifies grounded evidence"),
1354 Citation {
1355 page: Some("p0001".into()),
1356 bbox: Some([120, 120, 380, 180]),
1357 ..Default::default()
1358 },
1359 )],
1360 );
1361
1362 assert!(!report.all_evidence_grounded);
1363 assert_eq!(report.checks[0].status, CheckStatus::Mismatch);
1364 assert_eq!(report.checks[0].reason, Some(CheckReason::TextMismatch));
1365 assert_eq!(
1366 report.checks[0]
1367 .evidence
1368 .as_ref()
1369 .and_then(|e| e.text.as_deref()),
1370 Some("The alpha trust loop verifies ")
1371 );
1372 assert_eq!(
1373 report.checks[0].evidence.as_ref().and_then(|e| e.bbox),
1374 Some([100, 100, 400, 200])
1375 );
1376 }
1377
1378 #[test]
1379 fn bbox_locator_prefers_smallest_containing_element() {
1380 let report = verify_elements(
1381 vec![
1382 element(
1383 "container",
1384 "p0001",
1385 [0, 0, 1000, 1000],
1386 Some("outer wrapper text"),
1387 ),
1388 element(
1389 "inner",
1390 "p0001",
1391 [100, 100, 400, 200],
1392 Some("The exact cited quote"),
1393 ),
1394 ],
1395 vec![claim(
1396 ClaimKind::Quote,
1397 Some("The exact cited quote"),
1398 Citation {
1399 page: Some("p0001".into()),
1400 bbox: Some([120, 120, 380, 180]),
1401 ..Default::default()
1402 },
1403 )],
1404 );
1405
1406 assert!(report.all_evidence_grounded);
1407 assert_eq!(report.checks[0].status, CheckStatus::Grounded);
1408 assert_eq!(
1409 report.checks[0]
1410 .evidence
1411 .as_ref()
1412 .and_then(|e| e.text.as_deref()),
1413 Some("The exact cited quote")
1414 );
1415 assert_eq!(
1416 report.checks[0].evidence.as_ref().and_then(|e| e.bbox),
1417 Some([100, 100, 400, 200])
1418 );
1419 }
1420
1421 #[test]
1422 fn quote_claim_does_not_ground_across_non_adjacent_or_wrong_page_fragments() {
1423 let non_adjacent = verify_elements(
1424 vec![
1425 element(
1426 "split-a",
1427 "p0001",
1428 [100, 100, 400, 200],
1429 Some("The alpha trust loop verifies "),
1430 ),
1431 element(
1432 "between",
1433 "p0001",
1434 [100, 220, 700, 320],
1435 Some("separate evidence"),
1436 ),
1437 element(
1438 "split-b",
1439 "p0001",
1440 [400, 100, 700, 200],
1441 Some("grounded evidence"),
1442 ),
1443 ],
1444 vec![claim(
1445 ClaimKind::Quote,
1446 Some("The alpha trust loop verifies grounded evidence"),
1447 Citation {
1448 element_id: Some("split-a".into()),
1449 ..Default::default()
1450 },
1451 )],
1452 );
1453 assert!(!non_adjacent.all_evidence_grounded);
1454 assert_eq!(non_adjacent.checks[0].status, CheckStatus::Mismatch);
1455 assert_eq!(
1456 non_adjacent.checks[0].reason,
1457 Some(CheckReason::TextMismatch)
1458 );
1459
1460 let wrong_page = verify_elements(
1461 vec![
1462 element(
1463 "split-a",
1464 "p0001",
1465 [100, 100, 400, 200],
1466 Some("The alpha trust loop verifies "),
1467 ),
1468 element(
1469 "split-b",
1470 "p0002",
1471 [400, 100, 700, 200],
1472 Some("grounded evidence"),
1473 ),
1474 ],
1475 vec![claim(
1476 ClaimKind::Quote,
1477 Some("The alpha trust loop verifies grounded evidence"),
1478 Citation {
1479 page: Some("p0001".into()),
1480 ..Default::default()
1481 },
1482 )],
1483 );
1484 assert!(!wrong_page.all_evidence_grounded);
1485 assert_eq!(wrong_page.checks[0].status, CheckStatus::Mismatch);
1486 assert_eq!(wrong_page.checks[0].reason, Some(CheckReason::TextMismatch));
1487 }
1488
1489 #[test]
1490 fn mismatch_and_not_found_keep_gate_false() {
1491 let source = TestSource::default();
1492 let report = verify(
1493 &source,
1494 vec![
1495 claim(
1496 ClaimKind::Quote,
1497 Some("Revenue fell to $1"),
1498 Citation {
1499 element_id: Some("e000002".into()),
1500 ..Default::default()
1501 },
1502 ),
1503 claim(
1504 ClaimKind::Presence,
1505 None,
1506 Citation {
1507 element_id: Some("missing".into()),
1508 ..Default::default()
1509 },
1510 ),
1511 ],
1512 );
1513
1514 assert!(!report.all_evidence_grounded);
1515 assert_eq!(report.checks[0].status, CheckStatus::Mismatch);
1516 assert_eq!(report.checks[0].reason, Some(CheckReason::TextMismatch));
1517 assert_eq!(report.checks[1].status, CheckStatus::NotFound);
1518 assert_eq!(report.checks[1].reason, Some(CheckReason::ElementNotFound));
1519 }
1520
1521 #[test]
1522 fn value_claims_use_literal_text_matching() {
1523 let source = TestSource::default();
1524 let report = verify(
1525 &source,
1526 vec![claim(
1527 ClaimKind::Value,
1528 Some("Revenue grew to $12.4M in Q3 2025, driven by enterprise expansion."),
1529 Citation {
1530 element_id: Some("e000002".into()),
1531 ..Default::default()
1532 },
1533 )],
1534 );
1535
1536 assert!(report.all_evidence_grounded);
1537 assert_eq!(report.unsupported_claim_kinds, Vec::<String>::new());
1538 assert_eq!(report.checks[0].status, CheckStatus::Grounded);
1539 assert_eq!(report.checks[0].match_method, MatchMethod::NormalizedText);
1540 }
1541
1542 #[test]
1543 fn value_substrings_do_not_ground() {
1544 let source = TestSource::default();
1545 let report = verify(
1546 &source,
1547 vec![claim(
1548 ClaimKind::Value,
1549 Some("1"),
1550 Citation {
1551 element_id: Some("e000002".into()),
1552 ..Default::default()
1553 },
1554 )],
1555 );
1556
1557 assert!(!report.all_evidence_grounded);
1558 assert_eq!(report.checks[0].status, CheckStatus::Mismatch);
1559 assert_eq!(report.checks[0].reason, Some(CheckReason::TextMismatch));
1560 assert_eq!(report.checks[0].match_method, MatchMethod::NormalizedText);
1561 }
1562
1563 #[test]
1564 fn table_cell_claims_lookup_cell_and_match_text() {
1565 let source = TestSource::default();
1566 let report = verify(
1567 &source,
1568 vec![claim(
1569 ClaimKind::TableCell,
1570 Some("$12.4M"),
1571 Citation {
1572 table_id: Some("t0001".into()),
1573 cell: Some(CellRef { row: 1, col: 1 }),
1574 ..Default::default()
1575 },
1576 )],
1577 );
1578
1579 assert!(report.all_evidence_grounded);
1580 assert_eq!(report.unsupported_claim_kinds, Vec::<String>::new());
1581 assert_eq!(report.checks[0].status, CheckStatus::Grounded);
1582 assert_eq!(report.checks[0].match_method, MatchMethod::TableCellLookup);
1583 assert_eq!(
1584 report.checks[0]
1585 .evidence
1586 .as_ref()
1587 .and_then(|e| e.text.as_deref()),
1588 Some("$12.4M")
1589 );
1590 }
1591
1592 #[test]
1593 fn table_cell_missing_cell_is_not_found() {
1594 let source = TestSource::default();
1595 let report = verify(
1596 &source,
1597 vec![claim(
1598 ClaimKind::TableCell,
1599 Some("$12.4M"),
1600 Citation {
1601 table_id: Some("t0001".into()),
1602 cell: Some(CellRef { row: 9, col: 9 }),
1603 ..Default::default()
1604 },
1605 )],
1606 );
1607
1608 assert!(!report.all_evidence_grounded);
1609 assert_eq!(report.checks[0].status, CheckStatus::NotFound);
1610 assert_eq!(
1611 report.checks[0].reason,
1612 Some(CheckReason::TableCellNotFound)
1613 );
1614 assert_eq!(report.checks[0].match_method, MatchMethod::None);
1615 }
1616
1617 #[test]
1618 fn empty_table_collection_is_not_found_when_tables_are_supported() {
1619 let source = TestSource {
1620 caps: Capabilities {
1621 tables: true,
1622 ..TestSource::default().caps
1623 },
1624 ..TestSource::default()
1625 };
1626 struct NoTables(TestSource);
1627 impl GroundingSource for NoTables {
1628 fn parser(&self) -> ParserIdentity {
1629 self.0.parser()
1630 }
1631 fn capabilities(&self) -> Capabilities {
1632 self.0.capabilities()
1633 }
1634 fn fingerprint(&self) -> Option<String> {
1635 self.0.fingerprint()
1636 }
1637 fn pages(&self) -> Vec<PageGeometry> {
1638 self.0.pages()
1639 }
1640 fn elements(&self) -> Vec<GroundingElement> {
1641 self.0.elements()
1642 }
1643 fn spans(&self) -> Vec<GroundingSpan> {
1644 self.0.spans()
1645 }
1646 fn tables(&self) -> Vec<GroundingTable> {
1647 Vec::new()
1648 }
1649 }
1650 let report = verify(
1651 &source,
1652 vec![claim(
1653 ClaimKind::TableCell,
1654 Some("$12.4M"),
1655 Citation {
1656 table_id: Some("missing".into()),
1657 cell: Some(CellRef { row: 1, col: 1 }),
1658 ..Default::default()
1659 },
1660 )],
1661 );
1662 assert_eq!(report.checks[0].status, CheckStatus::NotFound);
1663
1664 let no_tables = NoTables(source);
1665 let cfg = VerificationConfig::default_v1();
1666 let report = verify_claims(
1667 &no_tables,
1668 CitationInput::Envelope(CitationEnvelope {
1669 document_fingerprint: no_tables.fingerprint(),
1670 claims: vec![claim(
1671 ClaimKind::TableCell,
1672 Some("$12.4M"),
1673 Citation {
1674 table_id: Some("missing".into()),
1675 cell: Some(CellRef { row: 1, col: 1 }),
1676 ..Default::default()
1677 },
1678 )],
1679 }),
1680 &cfg,
1681 "0".repeat(64),
1682 );
1683 assert_eq!(report.checks[0].status, CheckStatus::NotFound);
1684 }
1685
1686 #[test]
1687 fn missing_table_capability_blocks_table_cell_claims() {
1688 let source = TestSource {
1689 caps: Capabilities {
1690 tables: false,
1691 ..TestSource::default().caps
1692 },
1693 ..TestSource::default()
1694 };
1695 let report = verify(
1696 &source,
1697 vec![claim(
1698 ClaimKind::TableCell,
1699 Some("$12.4M"),
1700 Citation {
1701 table_id: Some("t0001".into()),
1702 cell: Some(CellRef { row: 1, col: 1 }),
1703 ..Default::default()
1704 },
1705 )],
1706 );
1707
1708 assert_eq!(report.checks[0].status, CheckStatus::CapabilityBlocked);
1709 assert_eq!(
1710 report.checks[0].reason,
1711 Some(CheckReason::MissingTableCapability)
1712 );
1713 assert_eq!(
1714 report.capability_limits,
1715 vec![CapabilityLimit::MissingTables]
1716 );
1717 assert!(report.checks[0]
1718 .warnings
1719 .contains(&WarningCode::CapabilityLimited));
1720 }
1721
1722 #[test]
1723 fn crop_refs_are_echoed_only_when_requested_and_supported() {
1724 let source = TestSource {
1725 caps: Capabilities {
1726 crop_support: true,
1727 ..TestSource::default().caps
1728 },
1729 crop_ref: Some("crop://p0001/e000002.png".into()),
1730 ..TestSource::default()
1731 };
1732 let claim = claim(
1733 ClaimKind::Quote,
1734 Some("Revenue grew to $12.4M in Q3 2025"),
1735 Citation {
1736 element_id: Some("e000002".into()),
1737 ..Default::default()
1738 },
1739 );
1740
1741 let mut cfg = VerificationConfig::default_v1();
1742 cfg.evidence.as_mut().unwrap().include_crops = true;
1743 let with_crops = verify_with_config(&source, vec![claim.clone()], &cfg);
1744 assert_eq!(
1745 with_crops.checks[0]
1746 .evidence
1747 .as_ref()
1748 .and_then(|e| e.crop_ref.as_deref()),
1749 Some("crop://p0001/e000002.png")
1750 );
1751
1752 cfg.evidence.as_mut().unwrap().include_crops = false;
1753 let without_crops = verify_with_config(&source, vec![claim], &cfg);
1754 assert_eq!(
1755 without_crops.checks[0]
1756 .evidence
1757 .as_ref()
1758 .and_then(|e| e.crop_ref.as_deref()),
1759 None
1760 );
1761 }
1762
1763 #[test]
1764 fn requested_crop_refs_without_source_support_remain_capability_limited() {
1765 let source = TestSource {
1766 crop_ref: Some("crop://p0001/e000002.png".into()),
1767 ..TestSource::default()
1768 };
1769 let mut cfg = VerificationConfig::default_v1();
1770 cfg.evidence.as_mut().unwrap().include_crops = true;
1771
1772 let report = verify_with_config(
1773 &source,
1774 vec![claim(
1775 ClaimKind::Quote,
1776 Some("Revenue grew to $12.4M in Q3 2025"),
1777 Citation {
1778 element_id: Some("e000002".into()),
1779 ..Default::default()
1780 },
1781 )],
1782 &cfg,
1783 );
1784
1785 assert_eq!(report.checks[0].status, CheckStatus::Grounded);
1786 assert_eq!(
1787 report.capability_limits,
1788 vec![CapabilityLimit::MissingCropSupport]
1789 );
1790 assert!(report.warnings.contains(&WarningCode::CapabilityLimited));
1791 assert_eq!(
1792 report.checks[0]
1793 .evidence
1794 .as_ref()
1795 .and_then(|e| e.crop_ref.as_deref()),
1796 None
1797 );
1798 }
1799
1800 #[test]
1801 fn stale_fingerprint_marks_checks_stale_and_gate_false() {
1802 let source = TestSource::default();
1803 let cfg = VerificationConfig::default_v1();
1804 let report = verify_claims(
1805 &source,
1806 CitationInput::Envelope(CitationEnvelope {
1807 document_fingerprint: Some(
1808 "sha256:0000000000000000000000000000000000000000000000000000000000000000"
1809 .into(),
1810 ),
1811 claims: vec![claim(
1812 ClaimKind::Presence,
1813 None,
1814 Citation {
1815 element_id: Some("e000002".into()),
1816 ..Default::default()
1817 },
1818 )],
1819 }),
1820 &cfg,
1821 "0".repeat(64),
1822 );
1823
1824 assert!(report.fingerprint_stale);
1825 assert!(!report.all_evidence_grounded);
1826 assert_eq!(report.checks[0].status, CheckStatus::Stale);
1827 assert_eq!(report.checks[0].reason, Some(CheckReason::StaleFingerprint));
1828 }
1829
1830 #[test]
1831 fn missing_citation_fingerprint_blocks_when_required() {
1832 let source = TestSource::default();
1833 let cfg = VerificationConfig::default_v1();
1834 let report = verify_claims(
1835 &source,
1836 CitationInput::Envelope(CitationEnvelope {
1837 document_fingerprint: None,
1838 claims: vec![claim(
1839 ClaimKind::Presence,
1840 None,
1841 Citation {
1842 element_id: Some("e000002".into()),
1843 ..Default::default()
1844 },
1845 )],
1846 }),
1847 &cfg,
1848 "0".repeat(64),
1849 );
1850
1851 assert!(!report.fingerprint_stale);
1852 assert!(!report.all_evidence_grounded);
1853 assert_eq!(report.checks[0].status, CheckStatus::Stale);
1854 assert_eq!(
1855 report.checks[0].reason,
1856 Some(CheckReason::MissingCitationFingerprint)
1857 );
1858 }
1859
1860 #[test]
1861 fn unsupported_claim_kinds_are_explicit() {
1862 let source = TestSource::default();
1863 let report = verify(
1864 &source,
1865 vec![claim(
1866 ClaimKind::Region,
1867 None,
1868 Citation {
1869 element_id: Some("e000002".into()),
1870 ..Default::default()
1871 },
1872 )],
1873 );
1874
1875 assert!(!report.all_evidence_grounded);
1876 assert_eq!(report.checks[0].status, CheckStatus::UnsupportedClaimKind);
1877 assert_eq!(
1878 report.checks[0].reason,
1879 Some(CheckReason::UnsupportedClaimKind)
1880 );
1881 assert_eq!(report.unsupported_claim_kinds, vec!["region"]);
1882 }
1883
1884 #[test]
1885 fn non_v1_claim_kinds_are_deduped_and_keep_gate_false() {
1886 let source = TestSource::default();
1887 let report = verify(
1888 &source,
1889 vec![
1890 claim(
1891 ClaimKind::Presence,
1892 None,
1893 Citation {
1894 page: Some("p0001".into()),
1895 ..Default::default()
1896 },
1897 ),
1898 claim(
1899 ClaimKind::Region,
1900 None,
1901 Citation {
1902 element_id: Some("e000002".into()),
1903 ..Default::default()
1904 },
1905 ),
1906 claim(
1907 ClaimKind::Other,
1908 Some("$12.4M equals 12400000"),
1909 Citation {
1910 element_id: Some("e000002".into()),
1911 ..Default::default()
1912 },
1913 ),
1914 claim(
1915 ClaimKind::Region,
1916 None,
1917 Citation {
1918 page: Some("p0001".into()),
1919 ..Default::default()
1920 },
1921 ),
1922 ],
1923 );
1924
1925 assert!(!report.all_evidence_grounded);
1926 assert_eq!(report.checks[0].status, CheckStatus::Grounded);
1927 assert_eq!(report.checks[1].status, CheckStatus::UnsupportedClaimKind);
1928 assert_eq!(report.checks[2].status, CheckStatus::UnsupportedClaimKind);
1929 assert_eq!(report.checks[3].status, CheckStatus::UnsupportedClaimKind);
1930 assert_eq!(report.checks[1].match_method, MatchMethod::None);
1931 assert_eq!(report.checks[2].match_method, MatchMethod::None);
1932 assert_eq!(report.checks[3].match_method, MatchMethod::None);
1933 assert_eq!(
1934 report.checks[1].reason,
1935 Some(CheckReason::UnsupportedClaimKind)
1936 );
1937 assert_eq!(
1938 report.checks[2].reason,
1939 Some(CheckReason::UnsupportedClaimKind)
1940 );
1941 assert_eq!(
1942 report.checks[3].reason,
1943 Some(CheckReason::UnsupportedClaimKind)
1944 );
1945 assert!(report.checks[1].evidence.is_none());
1946 assert!(report.checks[2].evidence.is_none());
1947 assert!(report.checks[3].evidence.is_none());
1948 assert!(report.checks[1].warnings.is_empty());
1949 assert!(report.checks[2].warnings.is_empty());
1950 assert!(report.checks[3].warnings.is_empty());
1951 assert!(!report.checks[1].semantic_unverified);
1952 assert!(!report.checks[2].semantic_unverified);
1953 assert!(!report.checks[3].semantic_unverified);
1954 assert_eq!(report.unsupported_claim_kinds, vec!["region", "other"]);
1955 }
1956
1957 #[test]
1958 fn missing_span_capability_blocks_span_locator() {
1959 let source = TestSource {
1960 caps: Capabilities {
1961 spans: false,
1962 char_offsets: false,
1963 tables: false,
1964 fingerprint: false,
1965 coordinate_origin: CoordinateOrigin::Unknown,
1966 crop_support: false,
1967 },
1968 fingerprint: None,
1969 crop_ref: None,
1970 };
1971 let report = verify(
1972 &source,
1973 vec![claim(
1974 ClaimKind::Presence,
1975 None,
1976 Citation {
1977 span_id: Some("s000002".into()),
1978 ..Default::default()
1979 },
1980 )],
1981 );
1982
1983 assert!(!report.all_evidence_grounded);
1984 assert_eq!(report.checks[0].status, CheckStatus::CapabilityBlocked);
1985 assert_eq!(
1986 report.checks[0].reason,
1987 Some(CheckReason::MissingSpanCapability)
1988 );
1989 assert_eq!(
1990 report.capability_limits,
1991 vec![
1992 CapabilityLimit::MissingFingerprint,
1993 CapabilityLimit::MissingSpans,
1994 CapabilityLimit::MissingCharOffsets,
1995 CapabilityLimit::MissingTables,
1996 CapabilityLimit::UnknownCoordinateOrigin
1997 ]
1998 );
1999 assert!(report.warnings.contains(&WarningCode::CapabilityLimited));
2000 assert!(report.checks[0]
2001 .warnings
2002 .contains(&WarningCode::CapabilityLimited));
2003 }
2004
2005 #[test]
2006 fn citation_fingerprint_without_source_fingerprint_blocks_checks() {
2007 let source = TestSource {
2008 caps: Capabilities {
2009 fingerprint: false,
2010 ..TestSource::default().caps
2011 },
2012 fingerprint: None,
2013 ..TestSource::default()
2014 };
2015 let cfg = VerificationConfig::default_v1();
2016 let report = verify_claims(
2017 &source,
2018 CitationInput::Envelope(CitationEnvelope {
2019 document_fingerprint: Some(
2020 "sha256:b5d30710d0c25cc38d8dec924ecaf57ae4f81276dd5dc14d75cb3b5b6bde62d3"
2021 .into(),
2022 ),
2023 claims: vec![claim(
2024 ClaimKind::Presence,
2025 None,
2026 Citation {
2027 element_id: Some("e000002".into()),
2028 ..Default::default()
2029 },
2030 )],
2031 }),
2032 &cfg,
2033 "0".repeat(64),
2034 );
2035
2036 assert!(!report.fingerprint_stale);
2037 assert!(!report.all_evidence_grounded);
2038 assert_eq!(report.checks[0].status, CheckStatus::CapabilityBlocked);
2039 assert_eq!(
2040 report.checks[0].reason,
2041 Some(CheckReason::MissingSourceFingerprint)
2042 );
2043 assert_eq!(
2044 report.capability_limits,
2045 vec![CapabilityLimit::MissingFingerprint]
2046 );
2047 assert!(report.warnings.contains(&WarningCode::CapabilityLimited));
2048 assert!(report.checks[0]
2049 .warnings
2050 .contains(&WarningCode::CapabilityLimited));
2051 }
2052
2053 #[test]
2054 fn missing_text_is_error_for_library_callers() {
2055 let source = TestSource::default();
2056 let report = verify(
2057 &source,
2058 vec![claim(
2059 ClaimKind::Quote,
2060 None,
2061 Citation {
2062 element_id: Some("e000002".into()),
2063 ..Default::default()
2064 },
2065 )],
2066 );
2067
2068 assert!(!report.all_evidence_grounded);
2069 assert_eq!(report.checks[0].status, CheckStatus::Error);
2070 assert_eq!(
2071 report.checks[0].reason,
2072 Some(CheckReason::MissingRequiredText)
2073 );
2074 assert_eq!(report.checks[0].match_method, MatchMethod::None);
2075 }
2076
2077 #[test]
2078 fn quote_normalization_is_ascii_whitespace_only() {
2079 assert_eq!(normalize_quote(" a\r\n\t b "), "a b");
2080 assert_eq!(normalize_quote("a\u{00a0}b"), "a\u{00a0}b");
2081 }
2082
2083 #[test]
2084 fn report_serializes_to_schema_shape() {
2085 let source = TestSource::default();
2086 let report = verify(
2087 &source,
2088 vec![claim(
2089 ClaimKind::Presence,
2090 None,
2091 Citation {
2092 element_id: Some("e000002".into()),
2093 ..Default::default()
2094 },
2095 )],
2096 );
2097 let v = serde_json::to_value(&report).unwrap();
2098 assert_eq!(v["grounding"]["parser"]["name"], "test-parser");
2099 assert_eq!(v["fingerprint_stale"], false);
2100 assert_eq!(v["checks"].as_array().unwrap().len(), 1);
2101 }
2102}