1#![forbid(unsafe_code)]
33#![warn(missing_docs)]
34
35use std::collections::BTreeMap;
36
37use ethos_core::codes::WarningCode;
38use ethos_core::evidence_anchor::{
39 AnchorChecks, AnchorLevel, AnchorStatus, BboxCheck, CoordinateProfile, EvidenceAnchor,
40 EvidenceAnchorGrounding, EvidenceAnchorReport, EvidenceAnchorRequest, EvidenceKind,
41 EvidenceRef, FingerprintCheck, PageCheck, TableCellCheck, TextCheck, TextNormalizationProfile,
42 EVIDENCE_ANCHOR_REPORT_ARTIFACT_TYPE,
43};
44use ethos_core::grounding::{
45 CoordinateOrigin, GroundingCell, GroundingElement, GroundingSource, GroundingSpan,
46 GroundingTable, PageGeometry,
47};
48use ethos_core::verify_types::{
49 compute_all_evidence_grounded, CapabilityLimit, Check, CheckReason, CheckStatus, Claim,
50 ClaimKind, Evidence, GroundingMeta, MatchMethod, TextNormalization, VerificationConfig,
51 VerificationReport,
52};
53use serde::{Deserialize, Serialize};
54use sha2::{Digest, Sha256};
55
56#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
62#[serde(untagged)]
63pub enum CitationInput {
64 Claims(Vec<Claim>),
66 Envelope(CitationEnvelope),
68}
69
70#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
72#[serde(deny_unknown_fields)]
73pub struct CitationEnvelope {
74 #[serde(default)]
76 pub document_fingerprint: Option<String>,
77 pub claims: Vec<Claim>,
79}
80
81#[derive(Debug, Clone, PartialEq, Eq)]
83pub struct EvidenceAnchorError {
84 message: String,
85}
86
87impl EvidenceAnchorError {
88 fn new(message: impl Into<String>) -> Self {
89 EvidenceAnchorError {
90 message: message.into(),
91 }
92 }
93}
94
95impl std::fmt::Display for EvidenceAnchorError {
96 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
97 f.write_str(&self.message)
98 }
99}
100
101impl std::error::Error for EvidenceAnchorError {}
102
103pub fn anchor_evidence(
105 source: &dyn GroundingSource,
106 request: EvidenceAnchorRequest,
107) -> Result<EvidenceAnchorReport, EvidenceAnchorError> {
108 validate_anchor_request(&request)?;
109 let index = SourceIndex::new(source);
110 let fingerprint_check = fingerprint_check(request.source_fingerprint.as_deref(), source);
111 let source_fingerprint = source.fingerprint();
112 let grounding = EvidenceAnchorGrounding {
113 parser: source.parser(),
114 capabilities: source.capabilities(),
115 };
116 let anchors = request
117 .evidence_refs
118 .iter()
119 .map(|evidence_ref| anchor_one(&index, fingerprint_check, evidence_ref))
120 .collect();
121 Ok(EvidenceAnchorReport {
122 artifact_type: EVIDENCE_ANCHOR_REPORT_ARTIFACT_TYPE.to_string(),
123 schema_version: ethos_core::SCHEMA_VERSION.to_string(),
124 source_fingerprint,
125 grounding,
126 anchors,
127 })
128}
129
130fn validate_anchor_request(request: &EvidenceAnchorRequest) -> Result<(), EvidenceAnchorError> {
131 if request.artifact_type != ethos_core::evidence_anchor::EVIDENCE_ANCHOR_REQUEST_ARTIFACT_TYPE {
132 return Err(EvidenceAnchorError::new(
133 "evidence anchor request artifact_type is not supported",
134 ));
135 }
136 if request.schema_version != ethos_core::SCHEMA_VERSION {
137 return Err(EvidenceAnchorError::new(
138 "evidence anchor request schema_version is not supported",
139 ));
140 }
141 let mut ids = std::collections::BTreeSet::new();
142 for evidence_ref in &request.evidence_refs {
143 if !ids.insert(evidence_ref.evidence_id.as_str()) {
144 return Err(EvidenceAnchorError::new(format!(
145 "duplicate evidence_id '{}'",
146 evidence_ref.evidence_id
147 )));
148 }
149 validate_evidence_ref(evidence_ref)?;
150 }
151 Ok(())
152}
153
154fn validate_evidence_ref(evidence_ref: &EvidenceRef) -> Result<(), EvidenceAnchorError> {
155 validate_locator(evidence_ref)?;
156 validate_expected_text(evidence_ref)?;
157 validate_kind_level_compat(evidence_ref)?;
158 validate_required_anchor_inputs(evidence_ref)?;
159 validate_required_page_locator(evidence_ref)?;
160 Ok(())
161}
162
163fn validate_locator(evidence_ref: &EvidenceRef) -> Result<(), EvidenceAnchorError> {
164 let locator = &evidence_ref.locator;
165 if locator.page_index == Some(0) {
166 return Err(EvidenceAnchorError::new("page_index must be 1-based"));
167 }
168 if locator.page_index.is_some() && locator.page_id.is_some() {
169 return Err(EvidenceAnchorError::new(
170 "use exactly one of page_index or page_id",
171 ));
172 }
173 if locator.bbox.is_some()
174 && locator.coordinate_profile != Some(CoordinateProfile::EthosQuantizedTopLeftV1)
175 {
176 return Err(EvidenceAnchorError::new(
177 "bbox requires coordinate_profile ethos_quantized_top_left_v1",
178 ));
179 }
180 Ok(())
181}
182
183fn validate_expected_text(evidence_ref: &EvidenceRef) -> Result<(), EvidenceAnchorError> {
184 if let Some(expected_text) = evidence_ref.expected_text.as_deref() {
185 if normalize_expected_text(expected_text).is_empty() {
186 return Err(EvidenceAnchorError::new(
187 "expected_text must not be empty after normalization",
188 ));
189 }
190 }
191 if evidence_ref.expected_text_sha256.is_some() {
192 let Some(expected_text) = evidence_ref.expected_text.as_deref() else {
193 return Err(EvidenceAnchorError::new(
194 "expected_text_sha256 requires expected_text",
195 ));
196 };
197 if evidence_ref.text_normalization_profile
198 != Some(TextNormalizationProfile::EthosCollapseWhitespaceV1)
199 {
200 return Err(EvidenceAnchorError::new(
201 "expected_text_sha256 requires text_normalization_profile ethos_collapse_whitespace_v1",
202 ));
203 }
204 let expected_hash = format!(
205 "sha256:{}",
206 sha256_hex(normalize_expected_text(expected_text).as_bytes())
207 );
208 if evidence_ref.expected_text_sha256.as_deref() != Some(expected_hash.as_str()) {
209 return Err(EvidenceAnchorError::new(
210 "expected_text_sha256 does not match normalized expected_text",
211 ));
212 }
213 }
214 Ok(())
215}
216
217fn validate_kind_level_compat(evidence_ref: &EvidenceRef) -> Result<(), EvidenceAnchorError> {
218 match evidence_ref.evidence_kind {
219 EvidenceKind::Page if evidence_ref.required_anchor_level != AnchorLevel::Page => {
220 return Err(EvidenceAnchorError::new(
221 "page evidence supports only required_anchor_level page",
222 ));
223 }
224 EvidenceKind::Text if evidence_ref.required_anchor_level != AnchorLevel::Text => {
225 return Err(EvidenceAnchorError::new(
226 "text evidence supports only required_anchor_level text",
227 ));
228 }
229 EvidenceKind::TextRegion
230 if !matches!(
231 evidence_ref.required_anchor_level,
232 AnchorLevel::Text | AnchorLevel::Bbox | AnchorLevel::TextBbox
233 ) =>
234 {
235 return Err(EvidenceAnchorError::new(
236 "text_region evidence supports only text, bbox, or text_bbox anchor levels",
237 ));
238 }
239 EvidenceKind::TableCell if evidence_ref.required_anchor_level != AnchorLevel::TableCell => {
240 return Err(EvidenceAnchorError::new(
241 "table_cell evidence supports only required_anchor_level table_cell",
242 ));
243 }
244 EvidenceKind::TableCell
245 if evidence_ref.locator.table_id.is_none() || evidence_ref.locator.cell.is_none() =>
246 {
247 return Err(EvidenceAnchorError::new(
248 "table_cell evidence requires table_id and cell",
249 ));
250 }
251 EvidenceKind::Region | EvidenceKind::Other => {}
252 _ => {}
253 }
254 Ok(())
255}
256
257fn validate_required_anchor_inputs(evidence_ref: &EvidenceRef) -> Result<(), EvidenceAnchorError> {
258 if anchor_requires_text(evidence_ref) && evidence_ref.expected_text.is_none() {
259 return Err(EvidenceAnchorError::new(
260 "required_anchor_level text or text_bbox requires expected_text",
261 ));
262 }
263 if requires_bbox(evidence_ref) && evidence_ref.locator.bbox.is_none() {
264 return Err(EvidenceAnchorError::new(
265 "required_anchor_level bbox or text_bbox requires locator.bbox",
266 ));
267 }
268 Ok(())
269}
270
271fn validate_required_page_locator(evidence_ref: &EvidenceRef) -> Result<(), EvidenceAnchorError> {
272 if page_locator_required(evidence_ref)
273 && evidence_ref.locator.page_index.is_none()
274 && evidence_ref.locator.page_id.is_none()
275 {
276 return Err(EvidenceAnchorError::new(
277 "page_index or page_id is required for this evidence ref",
278 ));
279 }
280 Ok(())
281}
282
283fn page_locator_required(evidence_ref: &EvidenceRef) -> bool {
284 matches!(evidence_ref.evidence_kind, EvidenceKind::Page)
285 || evidence_ref.locator.bbox.is_some()
286 || (evidence_ref.locator.element_id.is_none()
287 && evidence_ref.locator.span_id.is_none()
288 && evidence_ref.locator.table_id.is_none())
289}
290
291fn fingerprint_check(
292 request_fingerprint: Option<&str>,
293 source: &dyn GroundingSource,
294) -> FingerprintCheck {
295 match (request_fingerprint, source.fingerprint()) {
296 (None, _) => FingerprintCheck::NotChecked,
297 (Some(_), None) => FingerprintCheck::CapabilityLimited,
298 (Some(expected), Some(actual)) if expected == actual => FingerprintCheck::Matched,
299 (Some(_), Some(_)) => FingerprintCheck::Stale,
300 }
301}
302
303fn anchor_one(
304 index: &SourceIndex,
305 fingerprint: FingerprintCheck,
306 evidence_ref: &EvidenceRef,
307) -> EvidenceAnchor {
308 let mut checks = AnchorChecks {
309 fingerprint,
310 ..AnchorChecks::default()
311 };
312 let mut capability_limits = Vec::new();
313
314 if matches!(
315 evidence_ref.evidence_kind,
316 EvidenceKind::Region | EvidenceKind::Other
317 ) {
318 return anchor_result(
319 evidence_ref,
320 AnchorStatus::UnsupportedEvidenceKind,
321 AnchorLevel::None,
322 checks,
323 capability_limits,
324 );
325 }
326 if fingerprint == FingerprintCheck::Stale {
327 return anchor_result(
328 evidence_ref,
329 AnchorStatus::StaleFingerprint,
330 AnchorLevel::None,
331 checks,
332 capability_limits,
333 );
334 }
335 if fingerprint == FingerprintCheck::CapabilityLimited {
336 capability_limits.push(CapabilityLimit::MissingFingerprint);
337 }
338
339 let page = resolve_page(index, evidence_ref);
340 checks.page = page.check;
341 let mut achieved_page = page.check == PageCheck::Found;
342 let mut text_ok = false;
343 let mut bbox_ok = false;
344 let mut table_ok = false;
345
346 match evidence_ref.evidence_kind {
347 EvidenceKind::Page => {}
348 EvidenceKind::Text | EvidenceKind::TextRegion => {
349 if anchor_requires_text(evidence_ref) {
350 let text = resolve_text(index, evidence_ref, page.page_id.as_deref());
351 checks.text = text.check;
352 text_ok = text.check == TextCheck::Matched;
353 if text.check == TextCheck::CapabilityLimited {
354 capability_limits.push(CapabilityLimit::MissingSpans);
355 }
356 }
357 if requires_bbox(evidence_ref) {
358 let bbox = resolve_bbox(index, evidence_ref, page.page_id.as_deref());
359 checks.bbox = bbox;
360 bbox_ok = bbox == BboxCheck::Valid;
361 if bbox == BboxCheck::CapabilityLimited {
362 capability_limits.push(CapabilityLimit::UnknownCoordinateOrigin);
363 }
364 }
365 }
366 EvidenceKind::TableCell => {
367 let table = resolve_anchor_table_cell(index, evidence_ref);
368 checks.table_cell = table.check;
369 table_ok = table.check == TableCellCheck::Matched;
370 achieved_page = table.page_found;
371 if table.check == TableCellCheck::CapabilityLimited {
372 capability_limits.push(CapabilityLimit::MissingTables);
373 }
374 }
375 EvidenceKind::Region | EvidenceKind::Other => {}
376 }
377
378 capability_limits.sort_by_key(|limit| capability_limit_order(*limit));
379 capability_limits.dedup();
380 let achieved_anchor_level =
381 achieved_anchor_level(evidence_ref, achieved_page, text_ok, bbox_ok, table_ok);
382 let anchor_status = anchor_status(evidence_ref, &checks, &capability_limits);
383 anchor_result(
384 evidence_ref,
385 anchor_status,
386 achieved_anchor_level,
387 checks,
388 capability_limits,
389 )
390}
391
392fn anchor_result(
393 evidence_ref: &EvidenceRef,
394 anchor_status: AnchorStatus,
395 achieved_anchor_level: AnchorLevel,
396 checks: AnchorChecks,
397 capability_limits: Vec<CapabilityLimit>,
398) -> EvidenceAnchor {
399 EvidenceAnchor {
400 evidence_id: evidence_ref.evidence_id.clone(),
401 evidence_kind: evidence_ref.evidence_kind,
402 anchor_status,
403 required_anchor_level: evidence_ref.required_anchor_level,
404 achieved_anchor_level,
405 checks,
406 capability_limits,
407 }
408}
409
410struct PageResolution {
411 check: PageCheck,
412 page_id: Option<String>,
413}
414
415fn resolve_page(index: &SourceIndex, evidence_ref: &EvidenceRef) -> PageResolution {
416 if let Some(page_id) = evidence_ref.locator.page_id.as_deref() {
417 return if index.pages.iter().any(|page| page.id == page_id) {
418 PageResolution {
419 check: PageCheck::Found,
420 page_id: Some(page_id.to_string()),
421 }
422 } else {
423 PageResolution {
424 check: PageCheck::NotFound,
425 page_id: None,
426 }
427 };
428 }
429 if let Some(page_index) = evidence_ref.locator.page_index {
430 return index
431 .pages
432 .iter()
433 .find(|page| page.index == page_index)
434 .map(|page| PageResolution {
435 check: PageCheck::Found,
436 page_id: Some(page.id.clone()),
437 })
438 .unwrap_or(PageResolution {
439 check: PageCheck::NotFound,
440 page_id: None,
441 });
442 }
443 PageResolution {
444 check: PageCheck::NotChecked,
445 page_id: None,
446 }
447}
448
449struct TextResolution {
450 check: TextCheck,
451}
452
453fn resolve_text(
454 index: &SourceIndex,
455 evidence_ref: &EvidenceRef,
456 page_id: Option<&str>,
457) -> TextResolution {
458 let Some(expected_text) = evidence_ref.expected_text.as_deref() else {
459 return TextResolution {
460 check: TextCheck::NotFound,
461 };
462 };
463 if let Some(span_id) = evidence_ref.locator.span_id.as_deref() {
464 if !index.capabilities.spans {
465 return TextResolution {
466 check: TextCheck::CapabilityLimited,
467 };
468 }
469 return match index.span(span_id) {
470 Some(span) => TextResolution {
471 check: text_check(expected_text, &span.text),
472 },
473 None => TextResolution {
474 check: TextCheck::NotFound,
475 },
476 };
477 }
478 if let Some(element_id) = evidence_ref.locator.element_id.as_deref() {
479 return index
480 .element_by_id
481 .get(element_id)
482 .and_then(|position| index.elements.get(*position))
483 .and_then(|element| element.text.as_deref())
484 .map(|actual| TextResolution {
485 check: text_check(expected_text, actual),
486 })
487 .unwrap_or(TextResolution {
488 check: TextCheck::NotFound,
489 });
490 }
491 let Some(page_id) = page_id else {
492 return TextResolution {
493 check: TextCheck::NotFound,
494 };
495 };
496 if index
497 .elements
498 .iter()
499 .filter(|element| element.page == page_id)
500 .filter_map(|element| element.text.as_deref())
501 .any(|actual| text_check(expected_text, actual) == TextCheck::Matched)
502 {
503 return TextResolution {
504 check: TextCheck::Matched,
505 };
506 }
507 if index
508 .spans
509 .iter()
510 .filter(|span| span.page == page_id)
511 .any(|span| text_check(expected_text, &span.text) == TextCheck::Matched)
512 {
513 return TextResolution {
514 check: TextCheck::Matched,
515 };
516 }
517 TextResolution {
518 check: if index.elements.iter().any(|element| element.page == page_id)
519 || index.spans.iter().any(|span| span.page == page_id)
520 {
521 TextCheck::Mismatch
522 } else {
523 TextCheck::NotFound
524 },
525 }
526}
527
528fn resolve_bbox(
529 index: &SourceIndex,
530 evidence_ref: &EvidenceRef,
531 page_id: Option<&str>,
532) -> BboxCheck {
533 let Some(bbox) = evidence_ref.locator.bbox else {
534 return BboxCheck::NotChecked;
535 };
536 if index.capabilities.coordinate_origin != CoordinateOrigin::TopLeft {
537 return BboxCheck::CapabilityLimited;
538 }
539 let Some(page_id) = page_id else {
540 return BboxCheck::NotFound;
541 };
542 let tolerance = VerificationConfig::default_v1()
543 .matching
544 .bbox_containment_tolerance_q
545 .unwrap_or(0);
546 if index
547 .elements
548 .iter()
549 .any(|element| element.page == page_id && contains_bbox(element.bbox, bbox, tolerance))
550 || index
551 .spans
552 .iter()
553 .any(|span| span.page == page_id && contains_bbox(span.bbox, bbox, tolerance))
554 || index
555 .tables
556 .iter()
557 .any(|table| table.page == page_id && contains_bbox(table.bbox, bbox, tolerance))
558 {
559 BboxCheck::Valid
560 } else {
561 BboxCheck::NotFound
562 }
563}
564
565struct TableResolution {
566 check: TableCellCheck,
567 page_found: bool,
568}
569
570fn resolve_anchor_table_cell(index: &SourceIndex, evidence_ref: &EvidenceRef) -> TableResolution {
571 if !index.capabilities.tables {
572 return TableResolution {
573 check: TableCellCheck::CapabilityLimited,
574 page_found: false,
575 };
576 }
577 let Some(table_id) = evidence_ref.locator.table_id.as_deref() else {
578 return TableResolution {
579 check: TableCellCheck::NotFound,
580 page_found: false,
581 };
582 };
583 let Some(cell_ref) = evidence_ref.locator.cell else {
584 return TableResolution {
585 check: TableCellCheck::NotFound,
586 page_found: false,
587 };
588 };
589 let Some(table) = index.table(table_id) else {
590 return TableResolution {
591 check: TableCellCheck::NotFound,
592 page_found: false,
593 };
594 };
595 let page_found = index.pages.iter().any(|page| page.id == table.page);
596 let Some(cell) = table
597 .cells
598 .iter()
599 .find(|cell| table_cell_covers(cell, cell_ref.row, cell_ref.col))
600 else {
601 return TableResolution {
602 check: TableCellCheck::NotFound,
603 page_found,
604 };
605 };
606 let check = match evidence_ref.expected_text.as_deref() {
607 Some(expected) => {
608 if table_cell_text_matches(expected, &cell.text) {
609 TableCellCheck::Matched
610 } else {
611 TableCellCheck::Mismatch
612 }
613 }
614 None => TableCellCheck::Matched,
615 };
616 TableResolution { check, page_found }
617}
618
619fn anchor_requires_text(evidence_ref: &EvidenceRef) -> bool {
620 matches!(
621 evidence_ref.required_anchor_level,
622 AnchorLevel::Text | AnchorLevel::TextBbox
623 )
624}
625
626fn requires_bbox(evidence_ref: &EvidenceRef) -> bool {
627 matches!(
628 evidence_ref.required_anchor_level,
629 AnchorLevel::Bbox | AnchorLevel::TextBbox
630 )
631}
632
633fn text_check(expected: &str, actual: &str) -> TextCheck {
634 if normalize_expected_text(actual).contains(&normalize_expected_text(expected)) {
635 TextCheck::Matched
636 } else {
637 TextCheck::Mismatch
638 }
639}
640
641fn table_cell_text_matches(expected: &str, actual: &str) -> bool {
642 normalize_expected_text(actual) == normalize_expected_text(expected)
643}
644
645fn normalize_expected_text(input: &str) -> String {
646 normalize_quote(input)
647}
648
649fn capability_limit_order(limit: CapabilityLimit) -> u8 {
650 match limit {
651 CapabilityLimit::MissingSpans => 0,
652 CapabilityLimit::MissingCharOffsets => 1,
653 CapabilityLimit::MissingTables => 2,
654 CapabilityLimit::MissingFingerprint => 3,
655 CapabilityLimit::UnknownCoordinateOrigin => 4,
656 CapabilityLimit::MissingCropSupport => 5,
657 }
658}
659
660fn sha256_hex(bytes: &[u8]) -> String {
661 let mut hasher = Sha256::new();
662 hasher.update(bytes);
663 format!("{:x}", hasher.finalize())
664}
665
666fn achieved_anchor_level(
667 evidence_ref: &EvidenceRef,
668 page_ok: bool,
669 text_ok: bool,
670 bbox_ok: bool,
671 table_ok: bool,
672) -> AnchorLevel {
673 match evidence_ref.evidence_kind {
674 EvidenceKind::Page if page_ok => AnchorLevel::Page,
675 EvidenceKind::Text if text_ok => AnchorLevel::Text,
676 EvidenceKind::TextRegion if text_ok && bbox_ok => AnchorLevel::TextBbox,
677 EvidenceKind::TextRegion if text_ok => AnchorLevel::Text,
678 EvidenceKind::TextRegion if bbox_ok => AnchorLevel::Bbox,
679 EvidenceKind::TableCell if table_ok => AnchorLevel::TableCell,
680 _ => AnchorLevel::None,
681 }
682}
683
684fn anchor_status(
685 evidence_ref: &EvidenceRef,
686 checks: &AnchorChecks,
687 capability_limits: &[CapabilityLimit],
688) -> AnchorStatus {
689 if checks.page == PageCheck::NotFound
690 || checks.text == TextCheck::NotFound
691 || checks.bbox == BboxCheck::NotFound
692 || checks.table_cell == TableCellCheck::NotFound
693 {
694 return AnchorStatus::NotFound;
695 }
696 if checks.text == TextCheck::Mismatch
697 || checks.bbox == BboxCheck::Invalid
698 || checks.table_cell == TableCellCheck::Mismatch
699 {
700 return AnchorStatus::Mismatch;
701 }
702 if checks.fingerprint == FingerprintCheck::CapabilityLimited
703 || checks.text == TextCheck::CapabilityLimited
704 || checks.bbox == BboxCheck::CapabilityLimited
705 || checks.table_cell == TableCellCheck::CapabilityLimited
706 || !capability_limits.is_empty()
707 {
708 return AnchorStatus::CapabilityLimited;
709 }
710 let bound = match evidence_ref.required_anchor_level {
711 AnchorLevel::Page => checks.page == PageCheck::Found,
712 AnchorLevel::Text => checks.text == TextCheck::Matched,
713 AnchorLevel::Bbox => checks.bbox == BboxCheck::Valid,
714 AnchorLevel::TextBbox => {
715 checks.text == TextCheck::Matched && checks.bbox == BboxCheck::Valid
716 }
717 AnchorLevel::TableCell => checks.table_cell == TableCellCheck::Matched,
718 AnchorLevel::None => false,
719 };
720 if bound {
721 AnchorStatus::Bound
722 } else {
723 AnchorStatus::NotFound
724 }
725}
726
727impl CitationInput {
728 pub fn claims(&self) -> &[Claim] {
730 match self {
731 CitationInput::Claims(claims) => claims,
732 CitationInput::Envelope(envelope) => &envelope.claims,
733 }
734 }
735
736 pub fn document_fingerprint(&self) -> Option<&str> {
738 match self {
739 CitationInput::Claims(_) => None,
740 CitationInput::Envelope(envelope) => envelope.document_fingerprint.as_deref(),
741 }
742 }
743
744 fn into_parts(self) -> (Option<String>, Vec<Claim>) {
745 match self {
746 CitationInput::Claims(claims) => (None, claims),
747 CitationInput::Envelope(envelope) => (envelope.document_fingerprint, envelope.claims),
748 }
749 }
750}
751
752pub fn capability_warnings(
756 source: &dyn GroundingSource,
757 config: &VerificationConfig,
758) -> Vec<WarningCode> {
759 if capability_limits(source, config).is_empty() {
760 Vec::new()
761 } else {
762 vec![WarningCode::CapabilityLimited]
763 }
764}
765
766pub fn capability_limits(
770 source: &dyn GroundingSource,
771 config: &VerificationConfig,
772) -> Vec<CapabilityLimit> {
773 capability_limits_for(source.capabilities(), config)
774}
775
776fn capability_limits_for(
777 caps: ethos_core::grounding::Capabilities,
778 config: &VerificationConfig,
779) -> Vec<CapabilityLimit> {
780 let mut limits = Vec::new();
781 if !caps.fingerprint && config.staleness.require_fingerprint_match {
782 limits.push(CapabilityLimit::MissingFingerprint);
783 }
784 if !caps.spans {
785 limits.push(CapabilityLimit::MissingSpans);
786 }
787 if !caps.char_offsets {
788 limits.push(CapabilityLimit::MissingCharOffsets);
789 }
790 if !caps.tables && config.claim_kinds.contains(&ClaimKind::TableCell) {
791 limits.push(CapabilityLimit::MissingTables);
792 }
793 if caps.coordinate_origin == CoordinateOrigin::Unknown {
794 limits.push(CapabilityLimit::UnknownCoordinateOrigin);
795 }
796 if config.evidence.is_some_and(|e| e.include_crops) && !caps.crop_support {
797 limits.push(CapabilityLimit::MissingCropSupport);
798 }
799 limits
800}
801
802fn push_warning(warnings: &mut Vec<WarningCode>, warning: WarningCode) {
803 if !warnings.contains(&warning) {
804 warnings.push(warning);
805 }
806}
807
808pub fn verify_claims(
810 source: &dyn GroundingSource,
811 citations: CitationInput,
812 config: &VerificationConfig,
813 config_sha256: String,
814) -> VerificationReport {
815 let (citation_fingerprint, claims) = citations.into_parts();
816 let index = SourceIndex::new(source);
817 let source_fingerprint = source.fingerprint();
818 let capability_limits = capability_limits_for(index.capabilities, config);
819 let warnings = if capability_limits.is_empty() {
820 Vec::new()
821 } else {
822 vec![WarningCode::CapabilityLimited]
823 };
824 let fingerprint_stale = config.staleness.require_fingerprint_match
825 && matches!(
826 (citation_fingerprint.as_deref(), source_fingerprint.as_deref()),
827 (Some(expected), Some(actual)) if expected != actual
828 );
829 let fingerprint_unverifiable = config.staleness.require_fingerprint_match
830 && citation_fingerprint.is_some()
831 && source_fingerprint.is_none();
832 let citation_fingerprint_missing = config.staleness.require_fingerprint_match
833 && citation_fingerprint.is_none()
834 && source_fingerprint.is_some();
835 let include_text = config.evidence.is_some_and(|e| e.include_text);
836 let include_crops = config.evidence.is_some_and(|e| e.include_crops);
837 let mut unsupported = Vec::new();
838 let checks: Vec<Check> = claims
839 .into_iter()
840 .enumerate()
841 .map(|(idx, claim)| {
842 check_claim(
843 idx + 1,
844 source,
845 &index,
846 claim,
847 config,
848 CheckContext {
849 fingerprint_stale,
850 fingerprint_unverifiable,
851 citation_fingerprint_missing,
852 include_text,
853 include_crops,
854 },
855 &mut unsupported,
856 )
857 })
858 .collect();
859
860 VerificationReport {
861 schema_version: ethos_core::SCHEMA_VERSION.to_string(),
862 document_fingerprint: source_fingerprint,
863 verification_config_sha256: config_sha256,
864 grounding: GroundingMeta {
865 parser: source.parser(),
866 capabilities: index.capabilities,
867 },
868 capability_limits,
869 fingerprint_stale,
870 all_evidence_grounded: compute_all_evidence_grounded(
871 &checks,
872 &unsupported,
873 fingerprint_stale,
874 ),
875 checks,
876 unsupported_claim_kinds: unsupported,
877 warnings,
878 }
879}
880
881#[derive(Debug, Clone, Copy)]
882struct CheckContext {
883 fingerprint_stale: bool,
884 fingerprint_unverifiable: bool,
885 citation_fingerprint_missing: bool,
886 include_text: bool,
887 include_crops: bool,
888}
889
890fn check_claim(
891 id: usize,
892 source: &dyn GroundingSource,
893 index: &SourceIndex,
894 claim: Claim,
895 config: &VerificationConfig,
896 context: CheckContext,
897 unsupported: &mut Vec<String>,
898) -> Check {
899 let mut warnings = Vec::new();
900 let check_id = format!("v{id:04}");
901
902 if !claim.citation.has_locator() {
903 return Check {
904 id: check_id,
905 claim,
906 status: CheckStatus::Error,
907 reason: Some(CheckReason::MissingLocator),
908 match_method: MatchMethod::None,
909 semantic_unverified: false,
910 evidence: None,
911 warnings,
912 };
913 }
914
915 if !is_supported_kind(claim.kind) || !config.claim_kinds.contains(&claim.kind) {
916 push_unsupported(unsupported, claim.kind);
917 return Check {
918 id: check_id,
919 claim,
920 status: CheckStatus::UnsupportedClaimKind,
921 reason: Some(CheckReason::UnsupportedClaimKind),
922 match_method: MatchMethod::None,
923 semantic_unverified: false,
924 evidence: None,
925 warnings,
926 };
927 }
928
929 if requires_text(claim.kind)
930 && claim
931 .text
932 .as_deref()
933 .is_none_or(|text| text.trim().is_empty())
934 {
935 return Check {
936 id: check_id,
937 claim,
938 status: CheckStatus::Error,
939 reason: Some(CheckReason::MissingRequiredText),
940 match_method: MatchMethod::None,
941 semantic_unverified: false,
942 evidence: None,
943 warnings,
944 };
945 }
946
947 if context.fingerprint_stale {
948 return Check {
949 id: check_id,
950 claim,
951 status: CheckStatus::Stale,
952 reason: Some(CheckReason::StaleFingerprint),
953 match_method: MatchMethod::None,
954 semantic_unverified: false,
955 evidence: None,
956 warnings,
957 };
958 }
959
960 if context.fingerprint_unverifiable {
961 push_warning(&mut warnings, WarningCode::CapabilityLimited);
962 return Check {
963 id: check_id,
964 claim,
965 status: CheckStatus::CapabilityBlocked,
966 reason: Some(CheckReason::MissingSourceFingerprint),
967 match_method: MatchMethod::None,
968 semantic_unverified: false,
969 evidence: None,
970 warnings,
971 };
972 }
973
974 if context.citation_fingerprint_missing {
975 return Check {
976 id: check_id,
977 claim,
978 status: CheckStatus::Stale,
979 reason: Some(CheckReason::MissingCitationFingerprint),
980 match_method: MatchMethod::None,
981 semantic_unverified: false,
982 evidence: None,
983 warnings,
984 };
985 }
986
987 let mut target = match resolve_target(index, &claim, config) {
988 TargetResolution::Found(target) => target,
989 TargetResolution::NotFound(reason) => {
990 return Check {
991 id: check_id,
992 claim,
993 status: CheckStatus::NotFound,
994 reason: Some(reason),
995 match_method: MatchMethod::None,
996 semantic_unverified: false,
997 evidence: None,
998 warnings,
999 };
1000 }
1001 TargetResolution::Invalid(reason) => {
1002 return Check {
1003 id: check_id,
1004 claim,
1005 status: CheckStatus::Error,
1006 reason: Some(reason),
1007 match_method: MatchMethod::None,
1008 semantic_unverified: false,
1009 evidence: None,
1010 warnings,
1011 };
1012 }
1013 TargetResolution::CapabilityBlocked(reason) => {
1014 push_warning(&mut warnings, WarningCode::CapabilityLimited);
1015 return Check {
1016 id: check_id,
1017 claim,
1018 status: CheckStatus::CapabilityBlocked,
1019 reason: Some(reason),
1020 match_method: MatchMethod::None,
1021 semantic_unverified: false,
1022 evidence: None,
1023 warnings,
1024 };
1025 }
1026 };
1027
1028 if let Some(adjacent_target) = adjacent_quote_target(index, &claim, &target, config) {
1029 target = adjacent_target;
1030 }
1031
1032 let evidence = make_evidence(source, &target, context.include_text, context.include_crops);
1033 let (status, match_method, reason) =
1034 check_resolved_claim(claim.kind, claim.text.as_deref(), &target, config);
1035 Check {
1036 id: check_id,
1037 claim,
1038 status,
1039 reason,
1040 match_method,
1041 semantic_unverified: false,
1042 evidence,
1043 warnings,
1044 }
1045}
1046
1047fn check_resolved_claim(
1048 kind: ClaimKind,
1049 expected_text: Option<&str>,
1050 target: &FoundTarget,
1051 config: &VerificationConfig,
1052) -> (CheckStatus, MatchMethod, Option<CheckReason>) {
1053 match kind {
1054 ClaimKind::Presence => check_presence_claim(),
1055 ClaimKind::Quote | ClaimKind::Value | ClaimKind::TableCell => {
1056 check_text_claim(kind, expected_text, target, config)
1057 }
1058 _ => unreachable!("unsupported kinds returned before matching"),
1059 }
1060}
1061
1062fn check_presence_claim() -> (CheckStatus, MatchMethod, Option<CheckReason>) {
1063 (CheckStatus::Grounded, MatchMethod::PresenceOnly, None)
1064}
1065
1066fn check_text_claim(
1067 kind: ClaimKind,
1068 expected_text: Option<&str>,
1069 target: &FoundTarget,
1070 config: &VerificationConfig,
1071) -> (CheckStatus, MatchMethod, Option<CheckReason>) {
1072 let match_method = if target.from_table_cell {
1073 MatchMethod::TableCellLookup
1074 } else {
1075 text_match_method(kind, config)
1076 };
1077 let (status, reason) = match (expected_text, target.text.as_deref()) {
1078 (Some(expected), Some(actual)) if text_matches(kind, expected, actual, config) => {
1079 (CheckStatus::Grounded, None)
1080 }
1081 _ => (CheckStatus::Mismatch, Some(CheckReason::TextMismatch)),
1082 };
1083 (status, match_method, reason)
1084}
1085
1086fn is_supported_kind(kind: ClaimKind) -> bool {
1087 matches!(
1088 kind,
1089 ClaimKind::Quote | ClaimKind::Value | ClaimKind::Presence | ClaimKind::TableCell
1090 )
1091}
1092
1093fn requires_text(kind: ClaimKind) -> bool {
1094 matches!(
1095 kind,
1096 ClaimKind::Quote | ClaimKind::Value | ClaimKind::TableCell
1097 )
1098}
1099
1100fn push_unsupported(unsupported: &mut Vec<String>, kind: ClaimKind) {
1101 let name = claim_kind_name(kind).to_string();
1102 if !unsupported.contains(&name) {
1103 unsupported.push(name);
1104 }
1105}
1106
1107fn claim_kind_name(kind: ClaimKind) -> &'static str {
1108 match kind {
1109 ClaimKind::Quote => "quote",
1110 ClaimKind::Value => "value",
1111 ClaimKind::Presence => "presence",
1112 ClaimKind::TableCell => "table_cell",
1113 ClaimKind::Region => "region",
1114 ClaimKind::Other => "other",
1115 }
1116}
1117
1118#[derive(Debug, Clone)]
1119struct FoundTarget {
1120 page: Option<String>,
1121 bbox: Option<[i64; 4]>,
1122 text: Option<String>,
1123 from_table_cell: bool,
1124 element_index: Option<usize>,
1125}
1126
1127struct SourceIndex {
1133 capabilities: ethos_core::grounding::Capabilities,
1134 pages: Vec<PageGeometry>,
1135 elements: Vec<GroundingElement>,
1136 spans: Vec<GroundingSpan>,
1137 tables: Vec<GroundingTable>,
1138 element_by_id: BTreeMap<String, usize>,
1139 span_by_id: BTreeMap<String, usize>,
1140 table_by_id: BTreeMap<String, usize>,
1141}
1142
1143impl SourceIndex {
1144 fn new(source: &dyn GroundingSource) -> Self {
1145 let capabilities = source.capabilities();
1146 let pages = source.pages();
1147 let elements = source.elements();
1148 let spans = if capabilities.spans {
1149 source.spans()
1150 } else {
1151 Vec::new()
1152 };
1153 let tables = if capabilities.tables {
1154 source.tables()
1155 } else {
1156 Vec::new()
1157 };
1158 let element_by_id = index_elements(&elements);
1159 let span_by_id = index_spans(&spans);
1160 let table_by_id = index_tables(&tables);
1161
1162 SourceIndex {
1163 capabilities,
1164 pages,
1165 elements,
1166 spans,
1167 tables,
1168 element_by_id,
1169 span_by_id,
1170 table_by_id,
1171 }
1172 }
1173
1174 fn span(&self, id: &str) -> Option<&GroundingSpan> {
1175 self.span_by_id
1176 .get(id)
1177 .and_then(|index| self.spans.get(*index))
1178 }
1179
1180 fn table(&self, id: &str) -> Option<&GroundingTable> {
1181 self.table_by_id
1182 .get(id)
1183 .and_then(|index| self.tables.get(*index))
1184 }
1185}
1186
1187fn index_elements(elements: &[GroundingElement]) -> BTreeMap<String, usize> {
1188 let mut index = BTreeMap::new();
1189 for (position, element) in elements.iter().enumerate() {
1190 index.entry(element.id.clone()).or_insert(position);
1191 }
1192 index
1193}
1194
1195fn index_spans(spans: &[GroundingSpan]) -> BTreeMap<String, usize> {
1196 let mut index = BTreeMap::new();
1197 for (position, span) in spans.iter().enumerate() {
1198 index.entry(span.id.clone()).or_insert(position);
1199 }
1200 index
1201}
1202
1203fn index_tables(tables: &[GroundingTable]) -> BTreeMap<String, usize> {
1204 let mut index = BTreeMap::new();
1205 for (position, table) in tables.iter().enumerate() {
1206 index.entry(table.id.clone()).or_insert(position);
1207 }
1208 index
1209}
1210
1211enum TargetResolution {
1212 Found(FoundTarget),
1213 NotFound(CheckReason),
1214 Invalid(CheckReason),
1215 CapabilityBlocked(CheckReason),
1216}
1217
1218fn resolve_target(
1219 index: &SourceIndex,
1220 claim: &Claim,
1221 config: &VerificationConfig,
1222) -> TargetResolution {
1223 if claim.kind == ClaimKind::TableCell
1224 || claim.citation.table_id.is_some()
1225 || claim.citation.cell.is_some()
1226 {
1227 return resolve_table_cell(index, claim);
1228 }
1229
1230 if let Some(span_id) = claim.citation.span_id.as_deref() {
1231 if !index.capabilities.spans {
1232 return TargetResolution::CapabilityBlocked(CheckReason::MissingSpanCapability);
1233 }
1234 return index
1235 .span(span_id)
1236 .map(target_from_span)
1237 .map(TargetResolution::Found)
1238 .unwrap_or(TargetResolution::NotFound(CheckReason::SpanNotFound));
1239 }
1240
1241 if let Some(element_id) = claim.citation.element_id.as_deref() {
1242 return index
1243 .element_by_id
1244 .get(element_id)
1245 .and_then(|position| {
1246 index
1247 .elements
1248 .get(*position)
1249 .map(|element| (*position, element))
1250 })
1251 .map(|(position, element)| target_from_element(element, Some(position)))
1252 .map(TargetResolution::Found)
1253 .unwrap_or(TargetResolution::NotFound(CheckReason::ElementNotFound));
1254 }
1255
1256 if let (Some(page), Some(bbox)) = (claim.citation.page.as_deref(), claim.citation.bbox) {
1257 if index.capabilities.coordinate_origin == CoordinateOrigin::Unknown {
1258 return TargetResolution::CapabilityBlocked(CheckReason::UnknownCoordinateOrigin);
1259 }
1260 let tolerance = config.matching.bbox_containment_tolerance_q.unwrap_or(0);
1261 return index
1262 .elements
1263 .iter()
1264 .enumerate()
1265 .filter(|(_, element)| {
1266 element.page == page && contains_bbox(element.bbox, bbox, tolerance)
1267 })
1268 .min_by_key(|(position, element)| (bbox_area(element.bbox), *position))
1269 .map(|(position, element)| target_from_element(element, Some(position)))
1270 .map(TargetResolution::Found)
1271 .unwrap_or(TargetResolution::NotFound(CheckReason::BboxNotFound));
1272 }
1273
1274 if claim.citation.bbox.is_some() {
1275 return TargetResolution::Invalid(CheckReason::MissingPageForBbox);
1276 }
1277
1278 if let Some(page) = claim.citation.page.as_deref() {
1279 return index
1280 .pages
1281 .iter()
1282 .find(|candidate| candidate.id == page)
1283 .map(|found| {
1284 TargetResolution::Found(FoundTarget {
1285 page: Some(found.id.clone()),
1286 bbox: Some([0, 0, found.width, found.height]),
1287 text: None,
1288 from_table_cell: false,
1289 element_index: None,
1290 })
1291 })
1292 .unwrap_or(TargetResolution::NotFound(CheckReason::PageNotFound));
1293 }
1294
1295 TargetResolution::NotFound(CheckReason::MissingLocator)
1296}
1297
1298fn target_from_element(element: &GroundingElement, element_index: Option<usize>) -> FoundTarget {
1299 FoundTarget {
1300 page: Some(element.page.clone()),
1301 bbox: Some(element.bbox),
1302 text: element.text.clone(),
1303 from_table_cell: false,
1304 element_index,
1305 }
1306}
1307
1308fn target_from_span(span: &GroundingSpan) -> FoundTarget {
1309 FoundTarget {
1310 page: Some(span.page.clone()),
1311 bbox: Some(span.bbox),
1312 text: Some(span.text.clone()),
1313 from_table_cell: false,
1314 element_index: None,
1315 }
1316}
1317
1318fn resolve_table_cell(index: &SourceIndex, claim: &Claim) -> TargetResolution {
1319 let Some(table_id) = claim.citation.table_id.as_deref() else {
1320 return TargetResolution::Invalid(CheckReason::MissingTableCellLocator);
1321 };
1322 let Some(cell_ref) = claim.citation.cell else {
1323 return TargetResolution::Invalid(CheckReason::MissingTableCellLocator);
1324 };
1325 if !index.capabilities.tables {
1326 return TargetResolution::CapabilityBlocked(CheckReason::MissingTableCapability);
1327 }
1328 let Some(table) = index.table(table_id) else {
1329 return TargetResolution::NotFound(CheckReason::TableNotFound);
1330 };
1331 target_from_table_cell(table, cell_ref.row, cell_ref.col)
1332 .map(TargetResolution::Found)
1333 .unwrap_or(TargetResolution::NotFound(CheckReason::TableCellNotFound))
1334}
1335
1336fn target_from_table_cell(table: &GroundingTable, row: u32, col: u32) -> Option<FoundTarget> {
1337 table
1338 .cells
1339 .iter()
1340 .find(|cell| table_cell_covers(cell, row, col))
1341 .map(|cell| target_from_cell(&table.page, cell))
1342}
1343
1344fn table_cell_covers(cell: &GroundingCell, row: u32, col: u32) -> bool {
1345 let row_end = cell.row.saturating_add(cell.row_span.max(1));
1346 let col_end = cell.col.saturating_add(cell.col_span.max(1));
1347 row >= cell.row && row < row_end && col >= cell.col && col < col_end
1348}
1349
1350fn target_from_cell(page: &str, cell: &GroundingCell) -> FoundTarget {
1351 FoundTarget {
1352 page: Some(page.to_string()),
1353 bbox: Some(cell.bbox),
1354 text: Some(cell.text.clone()),
1355 from_table_cell: true,
1356 element_index: None,
1357 }
1358}
1359
1360fn adjacent_quote_target(
1361 index: &SourceIndex,
1362 claim: &Claim,
1363 target: &FoundTarget,
1364 config: &VerificationConfig,
1365) -> Option<FoundTarget> {
1366 if claim.kind != ClaimKind::Quote {
1367 return None;
1368 }
1369 let expected = claim.text.as_deref()?;
1370 if target
1371 .text
1372 .as_deref()
1373 .is_some_and(|actual| text_matches(ClaimKind::Quote, expected, actual, config))
1374 {
1375 return None;
1376 }
1377
1378 if claim.citation.bbox.is_some() {
1379 return None;
1380 }
1381
1382 if claim.citation.element_id.is_some() {
1383 if let Some(position) = target.element_index {
1384 return adjacent_text_pair_for_element(index, position, expected, config);
1385 }
1386 }
1387
1388 None
1389}
1390
1391fn adjacent_text_pair_for_element(
1392 index: &SourceIndex,
1393 position: usize,
1394 expected: &str,
1395 config: &VerificationConfig,
1396) -> Option<FoundTarget> {
1397 let current = index.elements.get(position)?;
1398 if let Some(second) = position
1399 .checked_add(1)
1400 .and_then(|next| index.elements.get(next))
1401 {
1402 if let Some(target) = adjacent_text_pair_target(current, second, expected, config) {
1403 return Some(target);
1404 }
1405 }
1406 position
1407 .checked_sub(1)
1408 .and_then(|previous| index.elements.get(previous))
1409 .and_then(|first| adjacent_text_pair_target(first, current, expected, config))
1410}
1411
1412fn adjacent_text_pair_target(
1413 first: &GroundingElement,
1414 second: &GroundingElement,
1415 expected: &str,
1416 config: &VerificationConfig,
1417) -> Option<FoundTarget> {
1418 if first.page != second.page {
1419 return None;
1420 }
1421 if !element_bboxes_are_adjacent(first.bbox, second.bbox) {
1422 return None;
1423 }
1424 let first_text = first.text.as_deref()?;
1425 let second_text = second.text.as_deref()?;
1426 let joined = join_adjacent_text(first_text, second_text, config);
1427 if text_matches(ClaimKind::Quote, expected, first_text, config)
1428 || text_matches(ClaimKind::Quote, expected, second_text, config)
1429 || !text_matches(ClaimKind::Quote, expected, &joined, config)
1430 {
1431 return None;
1432 }
1433
1434 Some(FoundTarget {
1435 page: Some(first.page.clone()),
1436 bbox: Some(union_bbox(first.bbox, second.bbox)),
1437 text: Some(joined),
1438 from_table_cell: false,
1439 element_index: None,
1440 })
1441}
1442
1443fn join_adjacent_text(first: &str, second: &str, config: &VerificationConfig) -> String {
1444 let joined = format!("{first} {second}");
1445 match config.matching.text_normalization {
1446 TextNormalization::None => joined,
1447 TextNormalization::CollapseWhitespace => normalize_quote(&joined),
1448 }
1449}
1450
1451fn bbox_area(bbox: [i64; 4]) -> u128 {
1452 let width = bbox[2].saturating_sub(bbox[0]).max(0) as u128;
1453 let height = bbox[3].saturating_sub(bbox[1]).max(0) as u128;
1454 width.saturating_mul(height)
1455}
1456
1457fn element_bboxes_are_adjacent(first: [i64; 4], second: [i64; 4]) -> bool {
1458 let same_line =
1459 ranges_overlap_i64(first[1], first[3], second[1], second[3]) && first[2] == second[0];
1460 let stacked =
1461 ranges_overlap_i64(first[0], first[2], second[0], second[2]) && first[3] == second[1];
1462 same_line || stacked
1463}
1464
1465fn ranges_overlap_i64(a_start: i64, a_end: i64, b_start: i64, b_end: i64) -> bool {
1466 a_start < b_end && b_start < a_end
1467}
1468
1469fn union_bbox(left: [i64; 4], right: [i64; 4]) -> [i64; 4] {
1470 [
1471 left[0].min(right[0]),
1472 left[1].min(right[1]),
1473 left[2].max(right[2]),
1474 left[3].max(right[3]),
1475 ]
1476}
1477
1478fn make_evidence(
1479 source: &dyn GroundingSource,
1480 target: &FoundTarget,
1481 include_text: bool,
1482 include_crops: bool,
1483) -> Option<Evidence> {
1484 let crop_ref = if include_crops && source.capabilities().crop_support {
1485 target
1486 .page
1487 .as_deref()
1488 .zip(target.bbox)
1489 .and_then(|(page, bbox)| source.crop_ref(page, bbox))
1490 } else {
1491 None
1492 };
1493 Some(Evidence {
1494 text: include_text.then(|| target.text.clone()).flatten(),
1495 page: target.page.clone(),
1496 bbox: target.bbox,
1497 crop_ref,
1498 })
1499}
1500
1501fn contains_bbox(container: [i64; 4], inner: [i64; 4], tolerance: i64) -> bool {
1502 inner[0] >= container[0] - tolerance
1503 && inner[1] >= container[1] - tolerance
1504 && inner[2] <= container[2] + tolerance
1505 && inner[3] <= container[3] + tolerance
1506}
1507
1508fn text_match_method(kind: ClaimKind, config: &VerificationConfig) -> MatchMethod {
1509 match (kind, config.matching.text_normalization) {
1510 (ClaimKind::Quote, TextNormalization::None) => MatchMethod::ExactTextContains,
1511 (ClaimKind::Quote, TextNormalization::CollapseWhitespace) => {
1512 MatchMethod::NormalizedTextContains
1513 }
1514 (_, TextNormalization::None) => MatchMethod::ExactText,
1515 (_, TextNormalization::CollapseWhitespace) => MatchMethod::NormalizedText,
1516 }
1517}
1518
1519fn text_matches(
1520 kind: ClaimKind,
1521 expected: &str,
1522 actual: &str,
1523 config: &VerificationConfig,
1524) -> bool {
1525 let (mut expected, mut actual) = match config.matching.text_normalization {
1526 TextNormalization::None => (expected.to_string(), actual.to_string()),
1527 TextNormalization::CollapseWhitespace => {
1528 (normalize_quote(expected), normalize_quote(actual))
1529 }
1530 };
1531 if !config.matching.case_sensitive {
1532 expected = expected.to_lowercase();
1533 actual = actual.to_lowercase();
1534 }
1535 if kind == ClaimKind::Quote {
1536 actual.contains(&expected)
1537 } else {
1538 actual == expected
1539 }
1540}
1541
1542pub fn normalize_quote(input: &str) -> String {
1545 let line_normalized = input.replace("\r\n", "\n").replace('\r', "\n");
1546 let mut out = String::with_capacity(line_normalized.len());
1547 let mut in_ascii_ws = false;
1548 for ch in line_normalized.chars() {
1549 if ch.is_ascii_whitespace() {
1550 if !in_ascii_ws {
1551 out.push(' ');
1552 in_ascii_ws = true;
1553 }
1554 } else {
1555 out.push(ch);
1556 in_ascii_ws = false;
1557 }
1558 }
1559 out.trim().to_string()
1560}
1561
1562#[cfg(test)]
1563mod tests {
1564 use super::*;
1565 use ethos_core::grounding::{
1566 Capabilities, GroundingCell, GroundingElement, GroundingSpan, GroundingTable, PageGeometry,
1567 ParserIdentity,
1568 };
1569 use ethos_core::verify_types::{CapabilityLimit, CellRef, Citation, Claim};
1570
1571 #[derive(Clone)]
1572 struct TestSource {
1573 caps: Capabilities,
1574 fingerprint: Option<String>,
1575 crop_ref: Option<String>,
1576 }
1577
1578 impl Default for TestSource {
1579 fn default() -> Self {
1580 Self {
1581 caps: Capabilities {
1582 spans: true,
1583 char_offsets: true,
1584 tables: true,
1585 fingerprint: true,
1586 coordinate_origin: CoordinateOrigin::TopLeft,
1587 crop_support: false,
1588 },
1589 fingerprint: Some(
1590 "sha256:b5d30710d0c25cc38d8dec924ecaf57ae4f81276dd5dc14d75cb3b5b6bde62d3"
1591 .into(),
1592 ),
1593 crop_ref: None,
1594 }
1595 }
1596 }
1597
1598 impl GroundingSource for TestSource {
1599 fn parser(&self) -> ParserIdentity {
1600 ParserIdentity {
1601 name: "test-parser".into(),
1602 version: "0.1.0".into(),
1603 adapter: None,
1604 adapter_version: None,
1605 }
1606 }
1607 fn capabilities(&self) -> Capabilities {
1608 self.caps
1609 }
1610 fn fingerprint(&self) -> Option<String> {
1611 self.fingerprint.clone()
1612 }
1613 fn pages(&self) -> Vec<PageGeometry> {
1614 vec![PageGeometry {
1615 id: "p0001".into(),
1616 index: 1,
1617 width: 61200,
1618 height: 79200,
1619 rotation: 0,
1620 }]
1621 }
1622 fn elements(&self) -> Vec<GroundingElement> {
1623 vec![
1624 GroundingElement {
1625 id: "e000002".into(),
1626 page: "p0001".into(),
1627 bbox: [7200, 10100, 54000, 11500],
1628 kind: "text_block".into(),
1629 text: Some(
1630 "Revenue grew to $12.4M in Q3 2025, driven by enterprise expansion.".into(),
1631 ),
1632 },
1633 GroundingElement {
1634 id: "e000003".into(),
1635 page: "p0001".into(),
1636 bbox: [7200, 13000, 54000, 20000],
1637 kind: "table".into(),
1638 text: None,
1639 },
1640 ]
1641 }
1642 fn spans(&self) -> Vec<GroundingSpan> {
1643 vec![GroundingSpan {
1644 id: "s000002".into(),
1645 page: "p0001".into(),
1646 bbox: [7200, 10100, 54000, 11500],
1647 text: "Revenue grew to $12.4M in Q3 2025".into(),
1648 element: Some("e000002".into()),
1649 char_start: Some(0),
1650 char_end: Some(34),
1651 }]
1652 }
1653 fn tables(&self) -> Vec<GroundingTable> {
1654 vec![GroundingTable {
1655 id: "t0001".into(),
1656 page: "p0001".into(),
1657 bbox: [7200, 13000, 54000, 20000],
1658 cells: vec![
1659 GroundingCell {
1660 row: 0,
1661 col: 0,
1662 row_span: 1,
1663 col_span: 1,
1664 bbox: [7200, 13000, 30600, 16500],
1665 text: "Metric".into(),
1666 },
1667 GroundingCell {
1668 row: 1,
1669 col: 1,
1670 row_span: 1,
1671 col_span: 1,
1672 bbox: [30600, 16500, 54000, 20000],
1673 text: "$12.4M".into(),
1674 },
1675 ],
1676 }]
1677 }
1678 fn crop_ref(&self, page: &str, bbox: [i64; 4]) -> Option<String> {
1679 if page == "p0001" && bbox == [7200, 10100, 54000, 11500] {
1680 self.crop_ref.clone()
1681 } else {
1682 None
1683 }
1684 }
1685 }
1686
1687 struct ElementSource {
1688 elements: Vec<GroundingElement>,
1689 }
1690
1691 impl GroundingSource for ElementSource {
1692 fn parser(&self) -> ParserIdentity {
1693 ParserIdentity {
1694 name: "element-test-parser".into(),
1695 version: "0.1.0".into(),
1696 adapter: None,
1697 adapter_version: None,
1698 }
1699 }
1700 fn capabilities(&self) -> Capabilities {
1701 Capabilities {
1702 spans: true,
1703 char_offsets: true,
1704 tables: true,
1705 fingerprint: true,
1706 coordinate_origin: CoordinateOrigin::TopLeft,
1707 crop_support: false,
1708 }
1709 }
1710 fn fingerprint(&self) -> Option<String> {
1711 Some("sha256:b5d30710d0c25cc38d8dec924ecaf57ae4f81276dd5dc14d75cb3b5b6bde62d3".into())
1712 }
1713 fn pages(&self) -> Vec<PageGeometry> {
1714 vec![
1715 PageGeometry {
1716 id: "p0001".into(),
1717 index: 1,
1718 width: 61200,
1719 height: 79200,
1720 rotation: 0,
1721 },
1722 PageGeometry {
1723 id: "p0002".into(),
1724 index: 2,
1725 width: 61200,
1726 height: 79200,
1727 rotation: 0,
1728 },
1729 ]
1730 }
1731 fn elements(&self) -> Vec<GroundingElement> {
1732 self.elements.clone()
1733 }
1734 fn spans(&self) -> Vec<GroundingSpan> {
1735 Vec::new()
1736 }
1737 fn tables(&self) -> Vec<GroundingTable> {
1738 Vec::new()
1739 }
1740 }
1741
1742 fn claim(kind: ClaimKind, text: Option<&str>, citation: Citation) -> Claim {
1743 Claim {
1744 kind,
1745 text: text.map(str::to_string),
1746 citation,
1747 }
1748 }
1749
1750 fn input(source: &TestSource, claims: Vec<Claim>) -> CitationInput {
1751 CitationInput::Envelope(CitationEnvelope {
1752 document_fingerprint: source.fingerprint(),
1753 claims,
1754 })
1755 }
1756
1757 fn verify(source: &TestSource, claims: Vec<Claim>) -> VerificationReport {
1758 let cfg = VerificationConfig::default_v1();
1759 verify_claims(source, input(source, claims), &cfg, "0".repeat(64))
1760 }
1761
1762 fn verify_with_config(
1763 source: &TestSource,
1764 claims: Vec<Claim>,
1765 cfg: &VerificationConfig,
1766 ) -> VerificationReport {
1767 verify_claims(source, input(source, claims), cfg, "0".repeat(64))
1768 }
1769
1770 fn element(id: &str, page: &str, bbox: [i64; 4], text: Option<&str>) -> GroundingElement {
1771 GroundingElement {
1772 id: id.into(),
1773 page: page.into(),
1774 bbox,
1775 kind: "text_block".into(),
1776 text: text.map(str::to_string),
1777 }
1778 }
1779
1780 fn verify_elements(elements: Vec<GroundingElement>, claims: Vec<Claim>) -> VerificationReport {
1781 let source = ElementSource { elements };
1782 let cfg = VerificationConfig::default_v1();
1783 let citations = CitationInput::Envelope(CitationEnvelope {
1784 document_fingerprint: source.fingerprint(),
1785 claims,
1786 });
1787 verify_claims(&source, citations, &cfg, "0".repeat(64))
1788 }
1789
1790 #[test]
1791 fn quote_and_presence_claims_ground_with_literal_matching() {
1792 let source = TestSource::default();
1793 let report = verify(
1794 &source,
1795 vec![
1796 claim(
1797 ClaimKind::Quote,
1798 Some("Revenue grew to $12.4M in Q3 2025"),
1799 Citation {
1800 element_id: Some("e000002".into()),
1801 ..Default::default()
1802 },
1803 ),
1804 claim(
1805 ClaimKind::Presence,
1806 None,
1807 Citation {
1808 span_id: Some("s000002".into()),
1809 ..Default::default()
1810 },
1811 ),
1812 ],
1813 );
1814
1815 assert!(report.all_evidence_grounded);
1816 assert_eq!(report.checks.len(), 2);
1817 assert_eq!(report.capability_limits, Vec::<CapabilityLimit>::new());
1818 assert_eq!(report.checks[0].status, CheckStatus::Grounded);
1819 assert_eq!(
1820 report.checks[0].match_method,
1821 MatchMethod::NormalizedTextContains
1822 );
1823 assert_eq!(report.checks[1].status, CheckStatus::Grounded);
1824 assert_eq!(report.checks[1].match_method, MatchMethod::PresenceOnly);
1825 assert_eq!(
1826 report.checks[0]
1827 .evidence
1828 .as_ref()
1829 .and_then(|e| e.text.as_deref()),
1830 Some("Revenue grew to $12.4M in Q3 2025, driven by enterprise expansion.")
1831 );
1832 assert_eq!(report.warnings, Vec::<WarningCode>::new());
1833 }
1834
1835 #[test]
1836 fn quote_claim_grounds_across_adjacent_element_text_fragments() {
1837 let report = verify_elements(
1838 vec![
1839 element(
1840 "split-a",
1841 "p0001",
1842 [100, 100, 400, 200],
1843 Some("The alpha trust loop verifies "),
1844 ),
1845 element(
1846 "split-b",
1847 "p0001",
1848 [400, 100, 700, 200],
1849 Some("grounded evidence"),
1850 ),
1851 ],
1852 vec![claim(
1853 ClaimKind::Quote,
1854 Some("The alpha trust loop verifies grounded evidence"),
1855 Citation {
1856 element_id: Some("split-a".into()),
1857 ..Default::default()
1858 },
1859 )],
1860 );
1861
1862 assert!(report.all_evidence_grounded);
1863 assert_eq!(report.checks[0].status, CheckStatus::Grounded);
1864 assert_eq!(
1865 report.checks[0].match_method,
1866 MatchMethod::NormalizedTextContains
1867 );
1868 assert_eq!(
1869 report.checks[0]
1870 .evidence
1871 .as_ref()
1872 .and_then(|e| e.text.as_deref()),
1873 Some("The alpha trust loop verifies grounded evidence")
1874 );
1875 assert_eq!(
1876 report.checks[0].evidence.as_ref().and_then(|e| e.bbox),
1877 Some([100, 100, 700, 200])
1878 );
1879 }
1880
1881 #[test]
1882 fn quote_claim_page_only_locator_does_not_search_adjacent_fragments() {
1883 let report = verify_elements(
1884 vec![
1885 element(
1886 "split-a",
1887 "p0001",
1888 [100, 100, 400, 200],
1889 Some("The alpha trust loop verifies "),
1890 ),
1891 element(
1892 "split-b",
1893 "p0001",
1894 [400, 100, 700, 200],
1895 Some("grounded evidence"),
1896 ),
1897 ],
1898 vec![claim(
1899 ClaimKind::Quote,
1900 Some("The alpha trust loop verifies grounded evidence"),
1901 Citation {
1902 page: Some("p0001".into()),
1903 ..Default::default()
1904 },
1905 )],
1906 );
1907
1908 assert!(!report.all_evidence_grounded);
1909 assert_eq!(report.checks[0].status, CheckStatus::Mismatch);
1910 assert_eq!(report.checks[0].reason, Some(CheckReason::TextMismatch));
1911 }
1912
1913 #[test]
1914 fn quote_claim_grounds_when_element_id_points_to_second_adjacent_fragment() {
1915 let report = verify_elements(
1916 vec![
1917 element(
1918 "split-a",
1919 "p0001",
1920 [100, 100, 400, 200],
1921 Some("The alpha trust loop verifies "),
1922 ),
1923 element(
1924 "split-b",
1925 "p0001",
1926 [400, 100, 700, 200],
1927 Some("grounded evidence"),
1928 ),
1929 ],
1930 vec![claim(
1931 ClaimKind::Quote,
1932 Some("The alpha trust loop verifies grounded evidence"),
1933 Citation {
1934 element_id: Some("split-b".into()),
1935 ..Default::default()
1936 },
1937 )],
1938 );
1939
1940 assert!(report.all_evidence_grounded);
1941 assert_eq!(report.checks[0].status, CheckStatus::Grounded);
1942 assert_eq!(
1943 report.checks[0]
1944 .evidence
1945 .as_ref()
1946 .and_then(|e| e.text.as_deref()),
1947 Some("The alpha trust loop verifies grounded evidence")
1948 );
1949 assert_eq!(
1950 report.checks[0].evidence.as_ref().and_then(|e| e.bbox),
1951 Some([100, 100, 700, 200])
1952 );
1953 }
1954
1955 #[test]
1956 fn quote_claim_does_not_stitch_non_touching_element_bboxes() {
1957 let report = verify_elements(
1958 vec![
1959 element(
1960 "split-a",
1961 "p0001",
1962 [100, 100, 390, 200],
1963 Some("The alpha trust loop verifies "),
1964 ),
1965 element(
1966 "split-b",
1967 "p0001",
1968 [400, 100, 700, 200],
1969 Some("grounded evidence"),
1970 ),
1971 ],
1972 vec![claim(
1973 ClaimKind::Quote,
1974 Some("The alpha trust loop verifies grounded evidence"),
1975 Citation {
1976 element_id: Some("split-a".into()),
1977 ..Default::default()
1978 },
1979 )],
1980 );
1981
1982 assert!(!report.all_evidence_grounded);
1983 assert_eq!(report.checks[0].status, CheckStatus::Mismatch);
1984 assert_eq!(report.checks[0].reason, Some(CheckReason::TextMismatch));
1985 }
1986
1987 #[test]
1988 fn quote_claim_bbox_locator_does_not_expand_outside_cited_region() {
1989 let report = verify_elements(
1990 vec![
1991 element(
1992 "split-a",
1993 "p0001",
1994 [100, 100, 400, 200],
1995 Some("The alpha trust loop verifies "),
1996 ),
1997 element(
1998 "split-b",
1999 "p0001",
2000 [400, 100, 700, 200],
2001 Some("grounded evidence"),
2002 ),
2003 ],
2004 vec![claim(
2005 ClaimKind::Quote,
2006 Some("The alpha trust loop verifies grounded evidence"),
2007 Citation {
2008 page: Some("p0001".into()),
2009 bbox: Some([120, 120, 380, 180]),
2010 ..Default::default()
2011 },
2012 )],
2013 );
2014
2015 assert!(!report.all_evidence_grounded);
2016 assert_eq!(report.checks[0].status, CheckStatus::Mismatch);
2017 assert_eq!(report.checks[0].reason, Some(CheckReason::TextMismatch));
2018 assert_eq!(
2019 report.checks[0]
2020 .evidence
2021 .as_ref()
2022 .and_then(|e| e.text.as_deref()),
2023 Some("The alpha trust loop verifies ")
2024 );
2025 assert_eq!(
2026 report.checks[0].evidence.as_ref().and_then(|e| e.bbox),
2027 Some([100, 100, 400, 200])
2028 );
2029 }
2030
2031 #[test]
2032 fn bbox_locator_prefers_smallest_containing_element() {
2033 let report = verify_elements(
2034 vec![
2035 element(
2036 "container",
2037 "p0001",
2038 [0, 0, 1000, 1000],
2039 Some("outer wrapper text"),
2040 ),
2041 element(
2042 "inner",
2043 "p0001",
2044 [100, 100, 400, 200],
2045 Some("The exact cited quote"),
2046 ),
2047 ],
2048 vec![claim(
2049 ClaimKind::Quote,
2050 Some("The exact cited quote"),
2051 Citation {
2052 page: Some("p0001".into()),
2053 bbox: Some([120, 120, 380, 180]),
2054 ..Default::default()
2055 },
2056 )],
2057 );
2058
2059 assert!(report.all_evidence_grounded);
2060 assert_eq!(report.checks[0].status, CheckStatus::Grounded);
2061 assert_eq!(
2062 report.checks[0]
2063 .evidence
2064 .as_ref()
2065 .and_then(|e| e.text.as_deref()),
2066 Some("The exact cited quote")
2067 );
2068 assert_eq!(
2069 report.checks[0].evidence.as_ref().and_then(|e| e.bbox),
2070 Some([100, 100, 400, 200])
2071 );
2072 }
2073
2074 #[test]
2075 fn quote_claim_does_not_ground_across_non_adjacent_or_wrong_page_fragments() {
2076 let non_adjacent = verify_elements(
2077 vec![
2078 element(
2079 "split-a",
2080 "p0001",
2081 [100, 100, 400, 200],
2082 Some("The alpha trust loop verifies "),
2083 ),
2084 element(
2085 "between",
2086 "p0001",
2087 [100, 220, 700, 320],
2088 Some("separate evidence"),
2089 ),
2090 element(
2091 "split-b",
2092 "p0001",
2093 [400, 100, 700, 200],
2094 Some("grounded evidence"),
2095 ),
2096 ],
2097 vec![claim(
2098 ClaimKind::Quote,
2099 Some("The alpha trust loop verifies grounded evidence"),
2100 Citation {
2101 element_id: Some("split-a".into()),
2102 ..Default::default()
2103 },
2104 )],
2105 );
2106 assert!(!non_adjacent.all_evidence_grounded);
2107 assert_eq!(non_adjacent.checks[0].status, CheckStatus::Mismatch);
2108 assert_eq!(
2109 non_adjacent.checks[0].reason,
2110 Some(CheckReason::TextMismatch)
2111 );
2112
2113 let wrong_page = verify_elements(
2114 vec![
2115 element(
2116 "split-a",
2117 "p0001",
2118 [100, 100, 400, 200],
2119 Some("The alpha trust loop verifies "),
2120 ),
2121 element(
2122 "split-b",
2123 "p0002",
2124 [400, 100, 700, 200],
2125 Some("grounded evidence"),
2126 ),
2127 ],
2128 vec![claim(
2129 ClaimKind::Quote,
2130 Some("The alpha trust loop verifies grounded evidence"),
2131 Citation {
2132 page: Some("p0001".into()),
2133 ..Default::default()
2134 },
2135 )],
2136 );
2137 assert!(!wrong_page.all_evidence_grounded);
2138 assert_eq!(wrong_page.checks[0].status, CheckStatus::Mismatch);
2139 assert_eq!(wrong_page.checks[0].reason, Some(CheckReason::TextMismatch));
2140 }
2141
2142 #[test]
2143 fn mismatch_and_not_found_keep_gate_false() {
2144 let source = TestSource::default();
2145 let report = verify(
2146 &source,
2147 vec![
2148 claim(
2149 ClaimKind::Quote,
2150 Some("Revenue fell to $1"),
2151 Citation {
2152 element_id: Some("e000002".into()),
2153 ..Default::default()
2154 },
2155 ),
2156 claim(
2157 ClaimKind::Presence,
2158 None,
2159 Citation {
2160 element_id: Some("missing".into()),
2161 ..Default::default()
2162 },
2163 ),
2164 ],
2165 );
2166
2167 assert!(!report.all_evidence_grounded);
2168 assert_eq!(report.checks[0].status, CheckStatus::Mismatch);
2169 assert_eq!(report.checks[0].reason, Some(CheckReason::TextMismatch));
2170 assert_eq!(report.checks[1].status, CheckStatus::NotFound);
2171 assert_eq!(report.checks[1].reason, Some(CheckReason::ElementNotFound));
2172 }
2173
2174 #[test]
2175 fn value_claims_use_literal_text_matching() {
2176 let source = TestSource::default();
2177 let report = verify(
2178 &source,
2179 vec![claim(
2180 ClaimKind::Value,
2181 Some("Revenue grew to $12.4M in Q3 2025, driven by enterprise expansion."),
2182 Citation {
2183 element_id: Some("e000002".into()),
2184 ..Default::default()
2185 },
2186 )],
2187 );
2188
2189 assert!(report.all_evidence_grounded);
2190 assert_eq!(report.unsupported_claim_kinds, Vec::<String>::new());
2191 assert_eq!(report.checks[0].status, CheckStatus::Grounded);
2192 assert_eq!(report.checks[0].match_method, MatchMethod::NormalizedText);
2193 }
2194
2195 #[test]
2196 fn value_substrings_do_not_ground() {
2197 let source = TestSource::default();
2198 let report = verify(
2199 &source,
2200 vec![claim(
2201 ClaimKind::Value,
2202 Some("1"),
2203 Citation {
2204 element_id: Some("e000002".into()),
2205 ..Default::default()
2206 },
2207 )],
2208 );
2209
2210 assert!(!report.all_evidence_grounded);
2211 assert_eq!(report.checks[0].status, CheckStatus::Mismatch);
2212 assert_eq!(report.checks[0].reason, Some(CheckReason::TextMismatch));
2213 assert_eq!(report.checks[0].match_method, MatchMethod::NormalizedText);
2214 }
2215
2216 #[test]
2217 fn table_cell_claims_lookup_cell_and_match_text() {
2218 let source = TestSource::default();
2219 let report = verify(
2220 &source,
2221 vec![claim(
2222 ClaimKind::TableCell,
2223 Some("$12.4M"),
2224 Citation {
2225 table_id: Some("t0001".into()),
2226 cell: Some(CellRef { row: 1, col: 1 }),
2227 ..Default::default()
2228 },
2229 )],
2230 );
2231
2232 assert!(report.all_evidence_grounded);
2233 assert_eq!(report.unsupported_claim_kinds, Vec::<String>::new());
2234 assert_eq!(report.checks[0].status, CheckStatus::Grounded);
2235 assert_eq!(report.checks[0].match_method, MatchMethod::TableCellLookup);
2236 assert_eq!(
2237 report.checks[0]
2238 .evidence
2239 .as_ref()
2240 .and_then(|e| e.text.as_deref()),
2241 Some("$12.4M")
2242 );
2243 }
2244
2245 #[test]
2246 fn table_cell_missing_cell_is_not_found() {
2247 let source = TestSource::default();
2248 let report = verify(
2249 &source,
2250 vec![claim(
2251 ClaimKind::TableCell,
2252 Some("$12.4M"),
2253 Citation {
2254 table_id: Some("t0001".into()),
2255 cell: Some(CellRef { row: 9, col: 9 }),
2256 ..Default::default()
2257 },
2258 )],
2259 );
2260
2261 assert!(!report.all_evidence_grounded);
2262 assert_eq!(report.checks[0].status, CheckStatus::NotFound);
2263 assert_eq!(
2264 report.checks[0].reason,
2265 Some(CheckReason::TableCellNotFound)
2266 );
2267 assert_eq!(report.checks[0].match_method, MatchMethod::None);
2268 }
2269
2270 #[test]
2271 fn empty_table_collection_is_not_found_when_tables_are_supported() {
2272 let source = TestSource {
2273 caps: Capabilities {
2274 tables: true,
2275 ..TestSource::default().caps
2276 },
2277 ..TestSource::default()
2278 };
2279 struct NoTables(TestSource);
2280 impl GroundingSource for NoTables {
2281 fn parser(&self) -> ParserIdentity {
2282 self.0.parser()
2283 }
2284 fn capabilities(&self) -> Capabilities {
2285 self.0.capabilities()
2286 }
2287 fn fingerprint(&self) -> Option<String> {
2288 self.0.fingerprint()
2289 }
2290 fn pages(&self) -> Vec<PageGeometry> {
2291 self.0.pages()
2292 }
2293 fn elements(&self) -> Vec<GroundingElement> {
2294 self.0.elements()
2295 }
2296 fn spans(&self) -> Vec<GroundingSpan> {
2297 self.0.spans()
2298 }
2299 fn tables(&self) -> Vec<GroundingTable> {
2300 Vec::new()
2301 }
2302 }
2303 let report = verify(
2304 &source,
2305 vec![claim(
2306 ClaimKind::TableCell,
2307 Some("$12.4M"),
2308 Citation {
2309 table_id: Some("missing".into()),
2310 cell: Some(CellRef { row: 1, col: 1 }),
2311 ..Default::default()
2312 },
2313 )],
2314 );
2315 assert_eq!(report.checks[0].status, CheckStatus::NotFound);
2316
2317 let no_tables = NoTables(source);
2318 let cfg = VerificationConfig::default_v1();
2319 let report = verify_claims(
2320 &no_tables,
2321 CitationInput::Envelope(CitationEnvelope {
2322 document_fingerprint: no_tables.fingerprint(),
2323 claims: vec![claim(
2324 ClaimKind::TableCell,
2325 Some("$12.4M"),
2326 Citation {
2327 table_id: Some("missing".into()),
2328 cell: Some(CellRef { row: 1, col: 1 }),
2329 ..Default::default()
2330 },
2331 )],
2332 }),
2333 &cfg,
2334 "0".repeat(64),
2335 );
2336 assert_eq!(report.checks[0].status, CheckStatus::NotFound);
2337 }
2338
2339 #[test]
2340 fn missing_table_capability_blocks_table_cell_claims() {
2341 let source = TestSource {
2342 caps: Capabilities {
2343 tables: false,
2344 ..TestSource::default().caps
2345 },
2346 ..TestSource::default()
2347 };
2348 let report = verify(
2349 &source,
2350 vec![claim(
2351 ClaimKind::TableCell,
2352 Some("$12.4M"),
2353 Citation {
2354 table_id: Some("t0001".into()),
2355 cell: Some(CellRef { row: 1, col: 1 }),
2356 ..Default::default()
2357 },
2358 )],
2359 );
2360
2361 assert_eq!(report.checks[0].status, CheckStatus::CapabilityBlocked);
2362 assert_eq!(
2363 report.checks[0].reason,
2364 Some(CheckReason::MissingTableCapability)
2365 );
2366 assert_eq!(
2367 report.capability_limits,
2368 vec![CapabilityLimit::MissingTables]
2369 );
2370 assert!(report.checks[0]
2371 .warnings
2372 .contains(&WarningCode::CapabilityLimited));
2373 }
2374
2375 #[test]
2376 fn crop_refs_are_echoed_only_when_requested_and_supported() {
2377 let source = TestSource {
2378 caps: Capabilities {
2379 crop_support: true,
2380 ..TestSource::default().caps
2381 },
2382 crop_ref: Some("crop://p0001/e000002.png".into()),
2383 ..TestSource::default()
2384 };
2385 let claim = claim(
2386 ClaimKind::Quote,
2387 Some("Revenue grew to $12.4M in Q3 2025"),
2388 Citation {
2389 element_id: Some("e000002".into()),
2390 ..Default::default()
2391 },
2392 );
2393
2394 let mut cfg = VerificationConfig::default_v1();
2395 cfg.evidence.as_mut().unwrap().include_crops = true;
2396 let with_crops = verify_with_config(&source, vec![claim.clone()], &cfg);
2397 assert_eq!(
2398 with_crops.checks[0]
2399 .evidence
2400 .as_ref()
2401 .and_then(|e| e.crop_ref.as_deref()),
2402 Some("crop://p0001/e000002.png")
2403 );
2404
2405 cfg.evidence.as_mut().unwrap().include_crops = false;
2406 let without_crops = verify_with_config(&source, vec![claim], &cfg);
2407 assert_eq!(
2408 without_crops.checks[0]
2409 .evidence
2410 .as_ref()
2411 .and_then(|e| e.crop_ref.as_deref()),
2412 None
2413 );
2414 }
2415
2416 #[test]
2417 fn requested_crop_refs_without_source_support_remain_capability_limited() {
2418 let source = TestSource {
2419 crop_ref: Some("crop://p0001/e000002.png".into()),
2420 ..TestSource::default()
2421 };
2422 let mut cfg = VerificationConfig::default_v1();
2423 cfg.evidence.as_mut().unwrap().include_crops = true;
2424
2425 let report = verify_with_config(
2426 &source,
2427 vec![claim(
2428 ClaimKind::Quote,
2429 Some("Revenue grew to $12.4M in Q3 2025"),
2430 Citation {
2431 element_id: Some("e000002".into()),
2432 ..Default::default()
2433 },
2434 )],
2435 &cfg,
2436 );
2437
2438 assert_eq!(report.checks[0].status, CheckStatus::Grounded);
2439 assert_eq!(
2440 report.capability_limits,
2441 vec![CapabilityLimit::MissingCropSupport]
2442 );
2443 assert!(report.warnings.contains(&WarningCode::CapabilityLimited));
2444 assert_eq!(
2445 report.checks[0]
2446 .evidence
2447 .as_ref()
2448 .and_then(|e| e.crop_ref.as_deref()),
2449 None
2450 );
2451 }
2452
2453 #[test]
2454 fn stale_fingerprint_marks_checks_stale_and_gate_false() {
2455 let source = TestSource::default();
2456 let cfg = VerificationConfig::default_v1();
2457 let report = verify_claims(
2458 &source,
2459 CitationInput::Envelope(CitationEnvelope {
2460 document_fingerprint: Some(
2461 "sha256:0000000000000000000000000000000000000000000000000000000000000000"
2462 .into(),
2463 ),
2464 claims: vec![claim(
2465 ClaimKind::Presence,
2466 None,
2467 Citation {
2468 element_id: Some("e000002".into()),
2469 ..Default::default()
2470 },
2471 )],
2472 }),
2473 &cfg,
2474 "0".repeat(64),
2475 );
2476
2477 assert!(report.fingerprint_stale);
2478 assert!(!report.all_evidence_grounded);
2479 assert_eq!(report.checks[0].status, CheckStatus::Stale);
2480 assert_eq!(report.checks[0].reason, Some(CheckReason::StaleFingerprint));
2481 }
2482
2483 #[test]
2484 fn missing_citation_fingerprint_blocks_when_required() {
2485 let source = TestSource::default();
2486 let cfg = VerificationConfig::default_v1();
2487 let report = verify_claims(
2488 &source,
2489 CitationInput::Envelope(CitationEnvelope {
2490 document_fingerprint: None,
2491 claims: vec![claim(
2492 ClaimKind::Presence,
2493 None,
2494 Citation {
2495 element_id: Some("e000002".into()),
2496 ..Default::default()
2497 },
2498 )],
2499 }),
2500 &cfg,
2501 "0".repeat(64),
2502 );
2503
2504 assert!(!report.fingerprint_stale);
2505 assert!(!report.all_evidence_grounded);
2506 assert_eq!(report.checks[0].status, CheckStatus::Stale);
2507 assert_eq!(
2508 report.checks[0].reason,
2509 Some(CheckReason::MissingCitationFingerprint)
2510 );
2511 }
2512
2513 #[test]
2514 fn unsupported_claim_kinds_are_explicit() {
2515 let source = TestSource::default();
2516 let report = verify(
2517 &source,
2518 vec![claim(
2519 ClaimKind::Region,
2520 None,
2521 Citation {
2522 element_id: Some("e000002".into()),
2523 ..Default::default()
2524 },
2525 )],
2526 );
2527
2528 assert!(!report.all_evidence_grounded);
2529 assert_eq!(report.checks[0].status, CheckStatus::UnsupportedClaimKind);
2530 assert_eq!(
2531 report.checks[0].reason,
2532 Some(CheckReason::UnsupportedClaimKind)
2533 );
2534 assert_eq!(report.unsupported_claim_kinds, vec!["region"]);
2535 }
2536
2537 #[test]
2538 fn non_v1_claim_kinds_are_deduped_and_keep_gate_false() {
2539 let source = TestSource::default();
2540 let report = verify(
2541 &source,
2542 vec![
2543 claim(
2544 ClaimKind::Presence,
2545 None,
2546 Citation {
2547 page: Some("p0001".into()),
2548 ..Default::default()
2549 },
2550 ),
2551 claim(
2552 ClaimKind::Region,
2553 None,
2554 Citation {
2555 element_id: Some("e000002".into()),
2556 ..Default::default()
2557 },
2558 ),
2559 claim(
2560 ClaimKind::Other,
2561 Some("$12.4M equals 12400000"),
2562 Citation {
2563 element_id: Some("e000002".into()),
2564 ..Default::default()
2565 },
2566 ),
2567 claim(
2568 ClaimKind::Region,
2569 None,
2570 Citation {
2571 page: Some("p0001".into()),
2572 ..Default::default()
2573 },
2574 ),
2575 ],
2576 );
2577
2578 assert!(!report.all_evidence_grounded);
2579 assert_eq!(report.checks[0].status, CheckStatus::Grounded);
2580 assert_eq!(report.checks[1].status, CheckStatus::UnsupportedClaimKind);
2581 assert_eq!(report.checks[2].status, CheckStatus::UnsupportedClaimKind);
2582 assert_eq!(report.checks[3].status, CheckStatus::UnsupportedClaimKind);
2583 assert_eq!(report.checks[1].match_method, MatchMethod::None);
2584 assert_eq!(report.checks[2].match_method, MatchMethod::None);
2585 assert_eq!(report.checks[3].match_method, MatchMethod::None);
2586 assert_eq!(
2587 report.checks[1].reason,
2588 Some(CheckReason::UnsupportedClaimKind)
2589 );
2590 assert_eq!(
2591 report.checks[2].reason,
2592 Some(CheckReason::UnsupportedClaimKind)
2593 );
2594 assert_eq!(
2595 report.checks[3].reason,
2596 Some(CheckReason::UnsupportedClaimKind)
2597 );
2598 assert!(report.checks[1].evidence.is_none());
2599 assert!(report.checks[2].evidence.is_none());
2600 assert!(report.checks[3].evidence.is_none());
2601 assert!(report.checks[1].warnings.is_empty());
2602 assert!(report.checks[2].warnings.is_empty());
2603 assert!(report.checks[3].warnings.is_empty());
2604 assert!(!report.checks[1].semantic_unverified);
2605 assert!(!report.checks[2].semantic_unverified);
2606 assert!(!report.checks[3].semantic_unverified);
2607 assert_eq!(report.unsupported_claim_kinds, vec!["region", "other"]);
2608 }
2609
2610 #[test]
2611 fn missing_span_capability_blocks_span_locator() {
2612 let source = TestSource {
2613 caps: Capabilities {
2614 spans: false,
2615 char_offsets: false,
2616 tables: false,
2617 fingerprint: false,
2618 coordinate_origin: CoordinateOrigin::Unknown,
2619 crop_support: false,
2620 },
2621 fingerprint: None,
2622 crop_ref: None,
2623 };
2624 let report = verify(
2625 &source,
2626 vec![claim(
2627 ClaimKind::Presence,
2628 None,
2629 Citation {
2630 span_id: Some("s000002".into()),
2631 ..Default::default()
2632 },
2633 )],
2634 );
2635
2636 assert!(!report.all_evidence_grounded);
2637 assert_eq!(report.checks[0].status, CheckStatus::CapabilityBlocked);
2638 assert_eq!(
2639 report.checks[0].reason,
2640 Some(CheckReason::MissingSpanCapability)
2641 );
2642 assert_eq!(
2643 report.capability_limits,
2644 vec![
2645 CapabilityLimit::MissingFingerprint,
2646 CapabilityLimit::MissingSpans,
2647 CapabilityLimit::MissingCharOffsets,
2648 CapabilityLimit::MissingTables,
2649 CapabilityLimit::UnknownCoordinateOrigin
2650 ]
2651 );
2652 assert!(report.warnings.contains(&WarningCode::CapabilityLimited));
2653 assert!(report.checks[0]
2654 .warnings
2655 .contains(&WarningCode::CapabilityLimited));
2656 }
2657
2658 #[test]
2659 fn citation_fingerprint_without_source_fingerprint_blocks_checks() {
2660 let source = TestSource {
2661 caps: Capabilities {
2662 fingerprint: false,
2663 ..TestSource::default().caps
2664 },
2665 fingerprint: None,
2666 ..TestSource::default()
2667 };
2668 let cfg = VerificationConfig::default_v1();
2669 let report = verify_claims(
2670 &source,
2671 CitationInput::Envelope(CitationEnvelope {
2672 document_fingerprint: Some(
2673 "sha256:b5d30710d0c25cc38d8dec924ecaf57ae4f81276dd5dc14d75cb3b5b6bde62d3"
2674 .into(),
2675 ),
2676 claims: vec![claim(
2677 ClaimKind::Presence,
2678 None,
2679 Citation {
2680 element_id: Some("e000002".into()),
2681 ..Default::default()
2682 },
2683 )],
2684 }),
2685 &cfg,
2686 "0".repeat(64),
2687 );
2688
2689 assert!(!report.fingerprint_stale);
2690 assert!(!report.all_evidence_grounded);
2691 assert_eq!(report.checks[0].status, CheckStatus::CapabilityBlocked);
2692 assert_eq!(
2693 report.checks[0].reason,
2694 Some(CheckReason::MissingSourceFingerprint)
2695 );
2696 assert_eq!(
2697 report.capability_limits,
2698 vec![CapabilityLimit::MissingFingerprint]
2699 );
2700 assert!(report.warnings.contains(&WarningCode::CapabilityLimited));
2701 assert!(report.checks[0]
2702 .warnings
2703 .contains(&WarningCode::CapabilityLimited));
2704 }
2705
2706 #[test]
2707 fn missing_text_is_error_for_library_callers() {
2708 let source = TestSource::default();
2709 let report = verify(
2710 &source,
2711 vec![claim(
2712 ClaimKind::Quote,
2713 None,
2714 Citation {
2715 element_id: Some("e000002".into()),
2716 ..Default::default()
2717 },
2718 )],
2719 );
2720
2721 assert!(!report.all_evidence_grounded);
2722 assert_eq!(report.checks[0].status, CheckStatus::Error);
2723 assert_eq!(
2724 report.checks[0].reason,
2725 Some(CheckReason::MissingRequiredText)
2726 );
2727 assert_eq!(report.checks[0].match_method, MatchMethod::None);
2728 }
2729
2730 #[test]
2731 fn quote_normalization_is_ascii_whitespace_only() {
2732 assert_eq!(normalize_quote(" a\r\n\t b "), "a b");
2733 assert_eq!(normalize_quote("a\u{00a0}b"), "a\u{00a0}b");
2734 }
2735
2736 #[test]
2737 fn report_serializes_to_schema_shape() {
2738 let source = TestSource::default();
2739 let report = verify(
2740 &source,
2741 vec![claim(
2742 ClaimKind::Presence,
2743 None,
2744 Citation {
2745 element_id: Some("e000002".into()),
2746 ..Default::default()
2747 },
2748 )],
2749 );
2750 let v = serde_json::to_value(&report).unwrap();
2751 assert_eq!(v["grounding"]["parser"]["name"], "test-parser");
2752 assert_eq!(v["fingerprint_stale"], false);
2753 assert_eq!(v["checks"].as_array().unwrap().len(), 1);
2754 }
2755}