Skip to main content

ethos_verify/
lib.rs

1/*
2 * Copyright 2026 The Ethos maintainers
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//! # ethos-verify (Milestone A skeleton → B alpha → D v1)
18//!
19//! Parser-agnostic citation evidence verification. Consumes any parser's output through
20//! [`ethos_core::grounding::GroundingSource`] — Ethos itself is just another grounding
21//! source behind an adapter (PRD §1.5, §5.4).
22//!
23//! **Scope discipline:** verification is evidence grounding — the cited region exists,
24//! its text matches by a declared literal method, the fingerprint is fresh. It is never
25//! pixel-level, semantic, or arithmetic proof (PRD §14).
26//!
27//! The WS-VERIFY check engine intentionally supports only literal quote/value,
28//! presence, and table-cell lookup claims. Unsupported claim kinds remain
29//! explicit; no fuzzy, semantic, arithmetic, crop, OCR, layout, or
30//! parser-internal behavior belongs here.
31
32#![forbid(unsafe_code)]
33#![warn(missing_docs)]
34
35use std::collections::BTreeMap;
36
37use ethos_core::codes::WarningCode;
38use ethos_core::evidence_anchor::{
39    AnchorChecks, AnchorLevel, AnchorStatus, BboxCheck, CoordinateProfile, EvidenceAnchor,
40    EvidenceAnchorGrounding, EvidenceAnchorReport, EvidenceAnchorRequest, EvidenceKind,
41    EvidenceRef, FingerprintCheck, PageCheck, TableCellCheck, TextCheck, TextNormalizationProfile,
42    EVIDENCE_ANCHOR_REPORT_ARTIFACT_TYPE,
43};
44use ethos_core::grounding::{
45    CoordinateOrigin, GroundingCell, GroundingElement, GroundingSource, GroundingSpan,
46    GroundingTable, PageGeometry,
47};
48use ethos_core::verify_types::{
49    compute_all_evidence_grounded, CapabilityLimit, Check, CheckReason, CheckStatus, Claim,
50    ClaimKind, Evidence, GroundingMeta, MatchMethod, TextNormalization, VerificationConfig,
51    VerificationReport,
52};
53use serde::{Deserialize, Serialize};
54use sha2::{Digest, Sha256};
55
56/// Citation input accepted by the alpha verifier.
57///
58/// The public CLI accepts either a bare array of [`Claim`] objects or this envelope
59/// form. `document_fingerprint`, when present, is compared with the grounding
60/// source fingerprint under the active staleness policy.
61#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
62#[serde(untagged)]
63pub enum CitationInput {
64    /// Bare claim list.
65    Claims(Vec<Claim>),
66    /// Claim list with optional fingerprint anchor.
67    Envelope(CitationEnvelope),
68}
69
70/// Envelope form of citation input.
71#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
72#[serde(deny_unknown_fields)]
73pub struct CitationEnvelope {
74    /// Fingerprint the citations were produced against.
75    #[serde(default)]
76    pub document_fingerprint: Option<String>,
77    /// Claims to verify, in deterministic input order.
78    pub claims: Vec<Claim>,
79}
80
81/// Validation or source-shape error for evidence anchoring.
82#[derive(Debug, Clone, PartialEq, Eq)]
83pub struct EvidenceAnchorError {
84    message: String,
85}
86
87impl EvidenceAnchorError {
88    fn new(message: impl Into<String>) -> Self {
89        EvidenceAnchorError {
90            message: message.into(),
91        }
92    }
93}
94
95impl std::fmt::Display for EvidenceAnchorError {
96    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
97        f.write_str(&self.message)
98    }
99}
100
101impl std::error::Error for EvidenceAnchorError {}
102
103/// Validate and resolve evidence refs over one grounding source.
104pub fn anchor_evidence(
105    source: &dyn GroundingSource,
106    request: EvidenceAnchorRequest,
107) -> Result<EvidenceAnchorReport, EvidenceAnchorError> {
108    validate_anchor_request(&request)?;
109    let index = SourceIndex::new(source);
110    let fingerprint_check = fingerprint_check(request.source_fingerprint.as_deref(), source);
111    let source_fingerprint = source.fingerprint();
112    let grounding = EvidenceAnchorGrounding {
113        parser: source.parser(),
114        capabilities: source.capabilities(),
115    };
116    let anchors = request
117        .evidence_refs
118        .iter()
119        .map(|evidence_ref| anchor_one(&index, fingerprint_check, evidence_ref))
120        .collect();
121    Ok(EvidenceAnchorReport {
122        artifact_type: EVIDENCE_ANCHOR_REPORT_ARTIFACT_TYPE.to_string(),
123        schema_version: ethos_core::SCHEMA_VERSION.to_string(),
124        source_fingerprint,
125        grounding,
126        anchors,
127    })
128}
129
130fn validate_anchor_request(request: &EvidenceAnchorRequest) -> Result<(), EvidenceAnchorError> {
131    if request.artifact_type != ethos_core::evidence_anchor::EVIDENCE_ANCHOR_REQUEST_ARTIFACT_TYPE {
132        return Err(EvidenceAnchorError::new(
133            "evidence anchor request artifact_type is not supported",
134        ));
135    }
136    if request.schema_version != ethos_core::SCHEMA_VERSION {
137        return Err(EvidenceAnchorError::new(
138            "evidence anchor request schema_version is not supported",
139        ));
140    }
141    let mut ids = std::collections::BTreeSet::new();
142    for evidence_ref in &request.evidence_refs {
143        if !ids.insert(evidence_ref.evidence_id.as_str()) {
144            return Err(EvidenceAnchorError::new(format!(
145                "duplicate evidence_id '{}'",
146                evidence_ref.evidence_id
147            )));
148        }
149        validate_evidence_ref(evidence_ref)?;
150    }
151    Ok(())
152}
153
154fn validate_evidence_ref(evidence_ref: &EvidenceRef) -> Result<(), EvidenceAnchorError> {
155    validate_locator(evidence_ref)?;
156    validate_expected_text(evidence_ref)?;
157    validate_kind_level_compat(evidence_ref)?;
158    validate_required_anchor_inputs(evidence_ref)?;
159    validate_required_page_locator(evidence_ref)?;
160    Ok(())
161}
162
163fn validate_locator(evidence_ref: &EvidenceRef) -> Result<(), EvidenceAnchorError> {
164    let locator = &evidence_ref.locator;
165    if locator.page_index == Some(0) {
166        return Err(EvidenceAnchorError::new("page_index must be 1-based"));
167    }
168    if locator.page_index.is_some() && locator.page_id.is_some() {
169        return Err(EvidenceAnchorError::new(
170            "use exactly one of page_index or page_id",
171        ));
172    }
173    if locator.bbox.is_some()
174        && locator.coordinate_profile != Some(CoordinateProfile::EthosQuantizedTopLeftV1)
175    {
176        return Err(EvidenceAnchorError::new(
177            "bbox requires coordinate_profile ethos_quantized_top_left_v1",
178        ));
179    }
180    Ok(())
181}
182
183fn validate_expected_text(evidence_ref: &EvidenceRef) -> Result<(), EvidenceAnchorError> {
184    if let Some(expected_text) = evidence_ref.expected_text.as_deref() {
185        if normalize_expected_text(expected_text).is_empty() {
186            return Err(EvidenceAnchorError::new(
187                "expected_text must not be empty after normalization",
188            ));
189        }
190    }
191    if evidence_ref.expected_text_sha256.is_some() {
192        let Some(expected_text) = evidence_ref.expected_text.as_deref() else {
193            return Err(EvidenceAnchorError::new(
194                "expected_text_sha256 requires expected_text",
195            ));
196        };
197        if evidence_ref.text_normalization_profile
198            != Some(TextNormalizationProfile::EthosCollapseWhitespaceV1)
199        {
200            return Err(EvidenceAnchorError::new(
201                "expected_text_sha256 requires text_normalization_profile ethos_collapse_whitespace_v1",
202            ));
203        }
204        let expected_hash = format!(
205            "sha256:{}",
206            sha256_hex(normalize_expected_text(expected_text).as_bytes())
207        );
208        if evidence_ref.expected_text_sha256.as_deref() != Some(expected_hash.as_str()) {
209            return Err(EvidenceAnchorError::new(
210                "expected_text_sha256 does not match normalized expected_text",
211            ));
212        }
213    }
214    Ok(())
215}
216
217fn validate_kind_level_compat(evidence_ref: &EvidenceRef) -> Result<(), EvidenceAnchorError> {
218    match evidence_ref.evidence_kind {
219        EvidenceKind::Page if evidence_ref.required_anchor_level != AnchorLevel::Page => {
220            return Err(EvidenceAnchorError::new(
221                "page evidence supports only required_anchor_level page",
222            ));
223        }
224        EvidenceKind::Text if evidence_ref.required_anchor_level != AnchorLevel::Text => {
225            return Err(EvidenceAnchorError::new(
226                "text evidence supports only required_anchor_level text",
227            ));
228        }
229        EvidenceKind::TextRegion
230            if !matches!(
231                evidence_ref.required_anchor_level,
232                AnchorLevel::Text | AnchorLevel::Bbox | AnchorLevel::TextBbox
233            ) =>
234        {
235            return Err(EvidenceAnchorError::new(
236                "text_region evidence supports only text, bbox, or text_bbox anchor levels",
237            ));
238        }
239        EvidenceKind::TableCell if evidence_ref.required_anchor_level != AnchorLevel::TableCell => {
240            return Err(EvidenceAnchorError::new(
241                "table_cell evidence supports only required_anchor_level table_cell",
242            ));
243        }
244        EvidenceKind::TableCell
245            if evidence_ref.locator.table_id.is_none() || evidence_ref.locator.cell.is_none() =>
246        {
247            return Err(EvidenceAnchorError::new(
248                "table_cell evidence requires table_id and cell",
249            ));
250        }
251        EvidenceKind::Region | EvidenceKind::Other => {}
252        _ => {}
253    }
254    Ok(())
255}
256
257fn validate_required_anchor_inputs(evidence_ref: &EvidenceRef) -> Result<(), EvidenceAnchorError> {
258    if anchor_requires_text(evidence_ref) && evidence_ref.expected_text.is_none() {
259        return Err(EvidenceAnchorError::new(
260            "required_anchor_level text or text_bbox requires expected_text",
261        ));
262    }
263    if requires_bbox(evidence_ref) && evidence_ref.locator.bbox.is_none() {
264        return Err(EvidenceAnchorError::new(
265            "required_anchor_level bbox or text_bbox requires locator.bbox",
266        ));
267    }
268    Ok(())
269}
270
271fn validate_required_page_locator(evidence_ref: &EvidenceRef) -> Result<(), EvidenceAnchorError> {
272    if page_locator_required(evidence_ref)
273        && evidence_ref.locator.page_index.is_none()
274        && evidence_ref.locator.page_id.is_none()
275    {
276        return Err(EvidenceAnchorError::new(
277            "page_index or page_id is required for this evidence ref",
278        ));
279    }
280    Ok(())
281}
282
283fn page_locator_required(evidence_ref: &EvidenceRef) -> bool {
284    matches!(evidence_ref.evidence_kind, EvidenceKind::Page)
285        || evidence_ref.locator.bbox.is_some()
286        || (evidence_ref.locator.element_id.is_none()
287            && evidence_ref.locator.span_id.is_none()
288            && evidence_ref.locator.table_id.is_none())
289}
290
291fn fingerprint_check(
292    request_fingerprint: Option<&str>,
293    source: &dyn GroundingSource,
294) -> FingerprintCheck {
295    match (request_fingerprint, source.fingerprint()) {
296        (None, _) => FingerprintCheck::NotChecked,
297        (Some(_), None) => FingerprintCheck::CapabilityLimited,
298        (Some(expected), Some(actual)) if expected == actual => FingerprintCheck::Matched,
299        (Some(_), Some(_)) => FingerprintCheck::Stale,
300    }
301}
302
303fn anchor_one(
304    index: &SourceIndex,
305    fingerprint: FingerprintCheck,
306    evidence_ref: &EvidenceRef,
307) -> EvidenceAnchor {
308    let mut checks = AnchorChecks {
309        fingerprint,
310        ..AnchorChecks::default()
311    };
312    let mut capability_limits = Vec::new();
313
314    if matches!(
315        evidence_ref.evidence_kind,
316        EvidenceKind::Region | EvidenceKind::Other
317    ) {
318        return anchor_result(
319            evidence_ref,
320            AnchorStatus::UnsupportedEvidenceKind,
321            AnchorLevel::None,
322            checks,
323            capability_limits,
324        );
325    }
326    if fingerprint == FingerprintCheck::Stale {
327        return anchor_result(
328            evidence_ref,
329            AnchorStatus::StaleFingerprint,
330            AnchorLevel::None,
331            checks,
332            capability_limits,
333        );
334    }
335    if fingerprint == FingerprintCheck::CapabilityLimited {
336        capability_limits.push(CapabilityLimit::MissingFingerprint);
337    }
338
339    let page = resolve_page(index, evidence_ref);
340    checks.page = page.check;
341    let mut achieved_page = page.check == PageCheck::Found;
342    let mut text_ok = false;
343    let mut bbox_ok = false;
344    let mut table_ok = false;
345
346    match evidence_ref.evidence_kind {
347        EvidenceKind::Page => {}
348        EvidenceKind::Text | EvidenceKind::TextRegion => {
349            if anchor_requires_text(evidence_ref) {
350                let text = resolve_text(index, evidence_ref, page.page_id.as_deref());
351                checks.text = text.check;
352                text_ok = text.check == TextCheck::Matched;
353                if text.check == TextCheck::CapabilityLimited {
354                    capability_limits.push(CapabilityLimit::MissingSpans);
355                }
356            }
357            if requires_bbox(evidence_ref) {
358                let bbox = resolve_bbox(index, evidence_ref, page.page_id.as_deref());
359                checks.bbox = bbox;
360                bbox_ok = bbox == BboxCheck::Valid;
361                if bbox == BboxCheck::CapabilityLimited {
362                    capability_limits.push(CapabilityLimit::UnknownCoordinateOrigin);
363                }
364            }
365        }
366        EvidenceKind::TableCell => {
367            let table = resolve_anchor_table_cell(index, evidence_ref);
368            checks.table_cell = table.check;
369            table_ok = table.check == TableCellCheck::Matched;
370            achieved_page = table.page_found;
371            if table.check == TableCellCheck::CapabilityLimited {
372                capability_limits.push(CapabilityLimit::MissingTables);
373            }
374        }
375        EvidenceKind::Region | EvidenceKind::Other => {}
376    }
377
378    capability_limits.sort_by_key(|limit| capability_limit_order(*limit));
379    capability_limits.dedup();
380    let achieved_anchor_level =
381        achieved_anchor_level(evidence_ref, achieved_page, text_ok, bbox_ok, table_ok);
382    let anchor_status = anchor_status(evidence_ref, &checks, &capability_limits);
383    anchor_result(
384        evidence_ref,
385        anchor_status,
386        achieved_anchor_level,
387        checks,
388        capability_limits,
389    )
390}
391
392fn anchor_result(
393    evidence_ref: &EvidenceRef,
394    anchor_status: AnchorStatus,
395    achieved_anchor_level: AnchorLevel,
396    checks: AnchorChecks,
397    capability_limits: Vec<CapabilityLimit>,
398) -> EvidenceAnchor {
399    EvidenceAnchor {
400        evidence_id: evidence_ref.evidence_id.clone(),
401        evidence_kind: evidence_ref.evidence_kind,
402        anchor_status,
403        required_anchor_level: evidence_ref.required_anchor_level,
404        achieved_anchor_level,
405        checks,
406        capability_limits,
407    }
408}
409
410struct PageResolution {
411    check: PageCheck,
412    page_id: Option<String>,
413}
414
415fn resolve_page(index: &SourceIndex, evidence_ref: &EvidenceRef) -> PageResolution {
416    if let Some(page_id) = evidence_ref.locator.page_id.as_deref() {
417        return if index.pages.iter().any(|page| page.id == page_id) {
418            PageResolution {
419                check: PageCheck::Found,
420                page_id: Some(page_id.to_string()),
421            }
422        } else {
423            PageResolution {
424                check: PageCheck::NotFound,
425                page_id: None,
426            }
427        };
428    }
429    if let Some(page_index) = evidence_ref.locator.page_index {
430        return index
431            .pages
432            .iter()
433            .find(|page| page.index == page_index)
434            .map(|page| PageResolution {
435                check: PageCheck::Found,
436                page_id: Some(page.id.clone()),
437            })
438            .unwrap_or(PageResolution {
439                check: PageCheck::NotFound,
440                page_id: None,
441            });
442    }
443    PageResolution {
444        check: PageCheck::NotChecked,
445        page_id: None,
446    }
447}
448
449struct TextResolution {
450    check: TextCheck,
451}
452
453fn resolve_text(
454    index: &SourceIndex,
455    evidence_ref: &EvidenceRef,
456    page_id: Option<&str>,
457) -> TextResolution {
458    let Some(expected_text) = evidence_ref.expected_text.as_deref() else {
459        return TextResolution {
460            check: TextCheck::NotFound,
461        };
462    };
463    if let Some(span_id) = evidence_ref.locator.span_id.as_deref() {
464        if !index.capabilities.spans {
465            return TextResolution {
466                check: TextCheck::CapabilityLimited,
467            };
468        }
469        return match index.span(span_id) {
470            Some(span) => TextResolution {
471                check: text_check(expected_text, &span.text),
472            },
473            None => TextResolution {
474                check: TextCheck::NotFound,
475            },
476        };
477    }
478    if let Some(element_id) = evidence_ref.locator.element_id.as_deref() {
479        return index
480            .element_by_id
481            .get(element_id)
482            .and_then(|position| index.elements.get(*position))
483            .and_then(|element| element.text.as_deref())
484            .map(|actual| TextResolution {
485                check: text_check(expected_text, actual),
486            })
487            .unwrap_or(TextResolution {
488                check: TextCheck::NotFound,
489            });
490    }
491    let Some(page_id) = page_id else {
492        return TextResolution {
493            check: TextCheck::NotFound,
494        };
495    };
496    if index
497        .elements
498        .iter()
499        .filter(|element| element.page == page_id)
500        .filter_map(|element| element.text.as_deref())
501        .any(|actual| text_check(expected_text, actual) == TextCheck::Matched)
502    {
503        return TextResolution {
504            check: TextCheck::Matched,
505        };
506    }
507    if index
508        .spans
509        .iter()
510        .filter(|span| span.page == page_id)
511        .any(|span| text_check(expected_text, &span.text) == TextCheck::Matched)
512    {
513        return TextResolution {
514            check: TextCheck::Matched,
515        };
516    }
517    TextResolution {
518        check: if index.elements.iter().any(|element| element.page == page_id)
519            || index.spans.iter().any(|span| span.page == page_id)
520        {
521            TextCheck::Mismatch
522        } else {
523            TextCheck::NotFound
524        },
525    }
526}
527
528fn resolve_bbox(
529    index: &SourceIndex,
530    evidence_ref: &EvidenceRef,
531    page_id: Option<&str>,
532) -> BboxCheck {
533    let Some(bbox) = evidence_ref.locator.bbox else {
534        return BboxCheck::NotChecked;
535    };
536    if index.capabilities.coordinate_origin != CoordinateOrigin::TopLeft {
537        return BboxCheck::CapabilityLimited;
538    }
539    let Some(page_id) = page_id else {
540        return BboxCheck::NotFound;
541    };
542    let tolerance = VerificationConfig::default_v1()
543        .matching
544        .bbox_containment_tolerance_q
545        .unwrap_or(0);
546    if index
547        .elements
548        .iter()
549        .any(|element| element.page == page_id && contains_bbox(element.bbox, bbox, tolerance))
550        || index
551            .spans
552            .iter()
553            .any(|span| span.page == page_id && contains_bbox(span.bbox, bbox, tolerance))
554        || index
555            .tables
556            .iter()
557            .any(|table| table.page == page_id && contains_bbox(table.bbox, bbox, tolerance))
558    {
559        BboxCheck::Valid
560    } else {
561        BboxCheck::NotFound
562    }
563}
564
565struct TableResolution {
566    check: TableCellCheck,
567    page_found: bool,
568}
569
570fn resolve_anchor_table_cell(index: &SourceIndex, evidence_ref: &EvidenceRef) -> TableResolution {
571    if !index.capabilities.tables {
572        return TableResolution {
573            check: TableCellCheck::CapabilityLimited,
574            page_found: false,
575        };
576    }
577    let Some(table_id) = evidence_ref.locator.table_id.as_deref() else {
578        return TableResolution {
579            check: TableCellCheck::NotFound,
580            page_found: false,
581        };
582    };
583    let Some(cell_ref) = evidence_ref.locator.cell else {
584        return TableResolution {
585            check: TableCellCheck::NotFound,
586            page_found: false,
587        };
588    };
589    let Some(table) = index.table(table_id) else {
590        return TableResolution {
591            check: TableCellCheck::NotFound,
592            page_found: false,
593        };
594    };
595    let page_found = index.pages.iter().any(|page| page.id == table.page);
596    let Some(cell) = table
597        .cells
598        .iter()
599        .find(|cell| table_cell_covers(cell, cell_ref.row, cell_ref.col))
600    else {
601        return TableResolution {
602            check: TableCellCheck::NotFound,
603            page_found,
604        };
605    };
606    let check = match evidence_ref.expected_text.as_deref() {
607        Some(expected) => {
608            if table_cell_text_matches(expected, &cell.text) {
609                TableCellCheck::Matched
610            } else {
611                TableCellCheck::Mismatch
612            }
613        }
614        None => TableCellCheck::Matched,
615    };
616    TableResolution { check, page_found }
617}
618
619fn anchor_requires_text(evidence_ref: &EvidenceRef) -> bool {
620    matches!(
621        evidence_ref.required_anchor_level,
622        AnchorLevel::Text | AnchorLevel::TextBbox
623    )
624}
625
626fn requires_bbox(evidence_ref: &EvidenceRef) -> bool {
627    matches!(
628        evidence_ref.required_anchor_level,
629        AnchorLevel::Bbox | AnchorLevel::TextBbox
630    )
631}
632
633fn text_check(expected: &str, actual: &str) -> TextCheck {
634    if normalize_expected_text(actual).contains(&normalize_expected_text(expected)) {
635        TextCheck::Matched
636    } else {
637        TextCheck::Mismatch
638    }
639}
640
641fn table_cell_text_matches(expected: &str, actual: &str) -> bool {
642    normalize_expected_text(actual) == normalize_expected_text(expected)
643}
644
645fn normalize_expected_text(input: &str) -> String {
646    normalize_quote(input)
647}
648
649fn capability_limit_order(limit: CapabilityLimit) -> u8 {
650    match limit {
651        CapabilityLimit::MissingSpans => 0,
652        CapabilityLimit::MissingCharOffsets => 1,
653        CapabilityLimit::MissingTables => 2,
654        CapabilityLimit::MissingFingerprint => 3,
655        CapabilityLimit::UnknownCoordinateOrigin => 4,
656        CapabilityLimit::MissingCropSupport => 5,
657    }
658}
659
660fn sha256_hex(bytes: &[u8]) -> String {
661    let mut hasher = Sha256::new();
662    hasher.update(bytes);
663    format!("{:x}", hasher.finalize())
664}
665
666fn achieved_anchor_level(
667    evidence_ref: &EvidenceRef,
668    page_ok: bool,
669    text_ok: bool,
670    bbox_ok: bool,
671    table_ok: bool,
672) -> AnchorLevel {
673    match evidence_ref.evidence_kind {
674        EvidenceKind::Page if page_ok => AnchorLevel::Page,
675        EvidenceKind::Text if text_ok => AnchorLevel::Text,
676        EvidenceKind::TextRegion if text_ok && bbox_ok => AnchorLevel::TextBbox,
677        EvidenceKind::TextRegion if text_ok => AnchorLevel::Text,
678        EvidenceKind::TextRegion if bbox_ok => AnchorLevel::Bbox,
679        EvidenceKind::TableCell if table_ok => AnchorLevel::TableCell,
680        _ => AnchorLevel::None,
681    }
682}
683
684fn anchor_status(
685    evidence_ref: &EvidenceRef,
686    checks: &AnchorChecks,
687    capability_limits: &[CapabilityLimit],
688) -> AnchorStatus {
689    if checks.page == PageCheck::NotFound
690        || checks.text == TextCheck::NotFound
691        || checks.bbox == BboxCheck::NotFound
692        || checks.table_cell == TableCellCheck::NotFound
693    {
694        return AnchorStatus::NotFound;
695    }
696    if checks.text == TextCheck::Mismatch
697        || checks.bbox == BboxCheck::Invalid
698        || checks.table_cell == TableCellCheck::Mismatch
699    {
700        return AnchorStatus::Mismatch;
701    }
702    if checks.fingerprint == FingerprintCheck::CapabilityLimited
703        || checks.text == TextCheck::CapabilityLimited
704        || checks.bbox == BboxCheck::CapabilityLimited
705        || checks.table_cell == TableCellCheck::CapabilityLimited
706        || !capability_limits.is_empty()
707    {
708        return AnchorStatus::CapabilityLimited;
709    }
710    let bound = match evidence_ref.required_anchor_level {
711        AnchorLevel::Page => checks.page == PageCheck::Found,
712        AnchorLevel::Text => checks.text == TextCheck::Matched,
713        AnchorLevel::Bbox => checks.bbox == BboxCheck::Valid,
714        AnchorLevel::TextBbox => {
715            checks.text == TextCheck::Matched && checks.bbox == BboxCheck::Valid
716        }
717        AnchorLevel::TableCell => checks.table_cell == TableCellCheck::Matched,
718        AnchorLevel::None => false,
719    };
720    if bound {
721        AnchorStatus::Bound
722    } else {
723        AnchorStatus::NotFound
724    }
725}
726
727impl CitationInput {
728    /// Claims in deterministic input order.
729    pub fn claims(&self) -> &[Claim] {
730        match self {
731            CitationInput::Claims(claims) => claims,
732            CitationInput::Envelope(envelope) => &envelope.claims,
733        }
734    }
735
736    /// Fingerprint anchor declared by the citation envelope, when present.
737    pub fn document_fingerprint(&self) -> Option<&str> {
738        match self {
739            CitationInput::Claims(_) => None,
740            CitationInput::Envelope(envelope) => envelope.document_fingerprint.as_deref(),
741        }
742    }
743
744    fn into_parts(self) -> (Option<String>, Vec<Claim>) {
745        match self {
746            CitationInput::Claims(claims) => (None, claims),
747            CitationInput::Envelope(envelope) => (envelope.document_fingerprint, envelope.claims),
748        }
749    }
750}
751
752/// Compute the capability-downgrade warnings for a source under a config (PRD §5.5):
753/// every missing capability the run would rely on surfaces as `capability_limited` —
754/// explicitly, never as silent approximation.
755pub fn capability_warnings(
756    source: &dyn GroundingSource,
757    config: &VerificationConfig,
758) -> Vec<WarningCode> {
759    if capability_limits(source, config).is_empty() {
760        Vec::new()
761    } else {
762        vec![WarningCode::CapabilityLimited]
763    }
764}
765
766/// Compute structured capability gaps for the run. These explain the stable
767/// `capability_limited` warning without minting parser-warning codes for every
768/// verification capability.
769pub fn capability_limits(
770    source: &dyn GroundingSource,
771    config: &VerificationConfig,
772) -> Vec<CapabilityLimit> {
773    capability_limits_for(source.capabilities(), config)
774}
775
776fn capability_limits_for(
777    caps: ethos_core::grounding::Capabilities,
778    config: &VerificationConfig,
779) -> Vec<CapabilityLimit> {
780    let mut limits = Vec::new();
781    if !caps.fingerprint && config.staleness.require_fingerprint_match {
782        limits.push(CapabilityLimit::MissingFingerprint);
783    }
784    if !caps.spans {
785        limits.push(CapabilityLimit::MissingSpans);
786    }
787    if !caps.char_offsets {
788        limits.push(CapabilityLimit::MissingCharOffsets);
789    }
790    if !caps.tables && config.claim_kinds.contains(&ClaimKind::TableCell) {
791        limits.push(CapabilityLimit::MissingTables);
792    }
793    if caps.coordinate_origin == CoordinateOrigin::Unknown {
794        limits.push(CapabilityLimit::UnknownCoordinateOrigin);
795    }
796    if config.evidence.is_some_and(|e| e.include_crops) && !caps.crop_support {
797        limits.push(CapabilityLimit::MissingCropSupport);
798    }
799    limits
800}
801
802fn push_warning(warnings: &mut Vec<WarningCode>, warning: WarningCode) {
803    if !warnings.contains(&warning) {
804        warnings.push(warning);
805    }
806}
807
808/// Verify citation claims over a parser-agnostic [`GroundingSource`].
809pub fn verify_claims(
810    source: &dyn GroundingSource,
811    citations: CitationInput,
812    config: &VerificationConfig,
813    config_sha256: String,
814) -> VerificationReport {
815    let (citation_fingerprint, claims) = citations.into_parts();
816    let index = SourceIndex::new(source);
817    let source_fingerprint = source.fingerprint();
818    let capability_limits = capability_limits_for(index.capabilities, config);
819    let warnings = if capability_limits.is_empty() {
820        Vec::new()
821    } else {
822        vec![WarningCode::CapabilityLimited]
823    };
824    let fingerprint_stale = config.staleness.require_fingerprint_match
825        && matches!(
826            (citation_fingerprint.as_deref(), source_fingerprint.as_deref()),
827            (Some(expected), Some(actual)) if expected != actual
828        );
829    let fingerprint_unverifiable = config.staleness.require_fingerprint_match
830        && citation_fingerprint.is_some()
831        && source_fingerprint.is_none();
832    let citation_fingerprint_missing = config.staleness.require_fingerprint_match
833        && citation_fingerprint.is_none()
834        && source_fingerprint.is_some();
835    let include_text = config.evidence.is_some_and(|e| e.include_text);
836    let include_crops = config.evidence.is_some_and(|e| e.include_crops);
837    let mut unsupported = Vec::new();
838    let checks: Vec<Check> = claims
839        .into_iter()
840        .enumerate()
841        .map(|(idx, claim)| {
842            check_claim(
843                idx + 1,
844                source,
845                &index,
846                claim,
847                config,
848                CheckContext {
849                    fingerprint_stale,
850                    fingerprint_unverifiable,
851                    citation_fingerprint_missing,
852                    include_text,
853                    include_crops,
854                },
855                &mut unsupported,
856            )
857        })
858        .collect();
859
860    VerificationReport {
861        schema_version: ethos_core::SCHEMA_VERSION.to_string(),
862        document_fingerprint: source_fingerprint,
863        verification_config_sha256: config_sha256,
864        grounding: GroundingMeta {
865            parser: source.parser(),
866            capabilities: index.capabilities,
867        },
868        capability_limits,
869        fingerprint_stale,
870        all_evidence_grounded: compute_all_evidence_grounded(
871            &checks,
872            &unsupported,
873            fingerprint_stale,
874        ),
875        checks,
876        unsupported_claim_kinds: unsupported,
877        warnings,
878    }
879}
880
881#[derive(Debug, Clone, Copy)]
882struct CheckContext {
883    fingerprint_stale: bool,
884    fingerprint_unverifiable: bool,
885    citation_fingerprint_missing: bool,
886    include_text: bool,
887    include_crops: bool,
888}
889
890fn check_claim(
891    id: usize,
892    source: &dyn GroundingSource,
893    index: &SourceIndex,
894    claim: Claim,
895    config: &VerificationConfig,
896    context: CheckContext,
897    unsupported: &mut Vec<String>,
898) -> Check {
899    let mut warnings = Vec::new();
900    let check_id = format!("v{id:04}");
901
902    if !claim.citation.has_locator() {
903        return Check {
904            id: check_id,
905            claim,
906            status: CheckStatus::Error,
907            reason: Some(CheckReason::MissingLocator),
908            match_method: MatchMethod::None,
909            semantic_unverified: false,
910            evidence: None,
911            warnings,
912        };
913    }
914
915    if !is_supported_kind(claim.kind) || !config.claim_kinds.contains(&claim.kind) {
916        push_unsupported(unsupported, claim.kind);
917        return Check {
918            id: check_id,
919            claim,
920            status: CheckStatus::UnsupportedClaimKind,
921            reason: Some(CheckReason::UnsupportedClaimKind),
922            match_method: MatchMethod::None,
923            semantic_unverified: false,
924            evidence: None,
925            warnings,
926        };
927    }
928
929    if requires_text(claim.kind)
930        && claim
931            .text
932            .as_deref()
933            .is_none_or(|text| text.trim().is_empty())
934    {
935        return Check {
936            id: check_id,
937            claim,
938            status: CheckStatus::Error,
939            reason: Some(CheckReason::MissingRequiredText),
940            match_method: MatchMethod::None,
941            semantic_unverified: false,
942            evidence: None,
943            warnings,
944        };
945    }
946
947    if context.fingerprint_stale {
948        return Check {
949            id: check_id,
950            claim,
951            status: CheckStatus::Stale,
952            reason: Some(CheckReason::StaleFingerprint),
953            match_method: MatchMethod::None,
954            semantic_unverified: false,
955            evidence: None,
956            warnings,
957        };
958    }
959
960    if context.fingerprint_unverifiable {
961        push_warning(&mut warnings, WarningCode::CapabilityLimited);
962        return Check {
963            id: check_id,
964            claim,
965            status: CheckStatus::CapabilityBlocked,
966            reason: Some(CheckReason::MissingSourceFingerprint),
967            match_method: MatchMethod::None,
968            semantic_unverified: false,
969            evidence: None,
970            warnings,
971        };
972    }
973
974    if context.citation_fingerprint_missing {
975        return Check {
976            id: check_id,
977            claim,
978            status: CheckStatus::Stale,
979            reason: Some(CheckReason::MissingCitationFingerprint),
980            match_method: MatchMethod::None,
981            semantic_unverified: false,
982            evidence: None,
983            warnings,
984        };
985    }
986
987    let mut target = match resolve_target(index, &claim, config) {
988        TargetResolution::Found(target) => target,
989        TargetResolution::NotFound(reason) => {
990            return Check {
991                id: check_id,
992                claim,
993                status: CheckStatus::NotFound,
994                reason: Some(reason),
995                match_method: MatchMethod::None,
996                semantic_unverified: false,
997                evidence: None,
998                warnings,
999            };
1000        }
1001        TargetResolution::Invalid(reason) => {
1002            return Check {
1003                id: check_id,
1004                claim,
1005                status: CheckStatus::Error,
1006                reason: Some(reason),
1007                match_method: MatchMethod::None,
1008                semantic_unverified: false,
1009                evidence: None,
1010                warnings,
1011            };
1012        }
1013        TargetResolution::CapabilityBlocked(reason) => {
1014            push_warning(&mut warnings, WarningCode::CapabilityLimited);
1015            return Check {
1016                id: check_id,
1017                claim,
1018                status: CheckStatus::CapabilityBlocked,
1019                reason: Some(reason),
1020                match_method: MatchMethod::None,
1021                semantic_unverified: false,
1022                evidence: None,
1023                warnings,
1024            };
1025        }
1026    };
1027
1028    if let Some(adjacent_target) = adjacent_quote_target(index, &claim, &target, config) {
1029        target = adjacent_target;
1030    }
1031
1032    let evidence = make_evidence(source, &target, context.include_text, context.include_crops);
1033    let (status, match_method, reason) =
1034        check_resolved_claim(claim.kind, claim.text.as_deref(), &target, config);
1035    Check {
1036        id: check_id,
1037        claim,
1038        status,
1039        reason,
1040        match_method,
1041        semantic_unverified: false,
1042        evidence,
1043        warnings,
1044    }
1045}
1046
1047fn check_resolved_claim(
1048    kind: ClaimKind,
1049    expected_text: Option<&str>,
1050    target: &FoundTarget,
1051    config: &VerificationConfig,
1052) -> (CheckStatus, MatchMethod, Option<CheckReason>) {
1053    match kind {
1054        ClaimKind::Presence => check_presence_claim(),
1055        ClaimKind::Quote | ClaimKind::Value | ClaimKind::TableCell => {
1056            check_text_claim(kind, expected_text, target, config)
1057        }
1058        _ => unreachable!("unsupported kinds returned before matching"),
1059    }
1060}
1061
1062fn check_presence_claim() -> (CheckStatus, MatchMethod, Option<CheckReason>) {
1063    (CheckStatus::Grounded, MatchMethod::PresenceOnly, None)
1064}
1065
1066fn check_text_claim(
1067    kind: ClaimKind,
1068    expected_text: Option<&str>,
1069    target: &FoundTarget,
1070    config: &VerificationConfig,
1071) -> (CheckStatus, MatchMethod, Option<CheckReason>) {
1072    let match_method = if target.from_table_cell {
1073        MatchMethod::TableCellLookup
1074    } else {
1075        text_match_method(kind, config)
1076    };
1077    let (status, reason) = match (expected_text, target.text.as_deref()) {
1078        (Some(expected), Some(actual)) if text_matches(kind, expected, actual, config) => {
1079            (CheckStatus::Grounded, None)
1080        }
1081        _ => (CheckStatus::Mismatch, Some(CheckReason::TextMismatch)),
1082    };
1083    (status, match_method, reason)
1084}
1085
1086fn is_supported_kind(kind: ClaimKind) -> bool {
1087    matches!(
1088        kind,
1089        ClaimKind::Quote | ClaimKind::Value | ClaimKind::Presence | ClaimKind::TableCell
1090    )
1091}
1092
1093fn requires_text(kind: ClaimKind) -> bool {
1094    matches!(
1095        kind,
1096        ClaimKind::Quote | ClaimKind::Value | ClaimKind::TableCell
1097    )
1098}
1099
1100fn push_unsupported(unsupported: &mut Vec<String>, kind: ClaimKind) {
1101    let name = claim_kind_name(kind).to_string();
1102    if !unsupported.contains(&name) {
1103        unsupported.push(name);
1104    }
1105}
1106
1107fn claim_kind_name(kind: ClaimKind) -> &'static str {
1108    match kind {
1109        ClaimKind::Quote => "quote",
1110        ClaimKind::Value => "value",
1111        ClaimKind::Presence => "presence",
1112        ClaimKind::TableCell => "table_cell",
1113        ClaimKind::Region => "region",
1114        ClaimKind::Other => "other",
1115    }
1116}
1117
1118#[derive(Debug, Clone)]
1119struct FoundTarget {
1120    page: Option<String>,
1121    bbox: Option<[i64; 4]>,
1122    text: Option<String>,
1123    from_table_cell: bool,
1124    element_index: Option<usize>,
1125}
1126
1127/// Per-run grounding snapshot used to avoid cloning full entity collections per claim.
1128///
1129/// The lookup maps intentionally preserve first-match-by-id behavior, matching the trait default
1130/// and current native/ODL adapters. If an adapter gives `element_by_id` different semantics, update
1131/// this index at the same time so verifier resolution does not silently diverge.
1132struct SourceIndex {
1133    capabilities: ethos_core::grounding::Capabilities,
1134    pages: Vec<PageGeometry>,
1135    elements: Vec<GroundingElement>,
1136    spans: Vec<GroundingSpan>,
1137    tables: Vec<GroundingTable>,
1138    element_by_id: BTreeMap<String, usize>,
1139    span_by_id: BTreeMap<String, usize>,
1140    table_by_id: BTreeMap<String, usize>,
1141}
1142
1143impl SourceIndex {
1144    fn new(source: &dyn GroundingSource) -> Self {
1145        let capabilities = source.capabilities();
1146        let pages = source.pages();
1147        let elements = source.elements();
1148        let spans = if capabilities.spans {
1149            source.spans()
1150        } else {
1151            Vec::new()
1152        };
1153        let tables = if capabilities.tables {
1154            source.tables()
1155        } else {
1156            Vec::new()
1157        };
1158        let element_by_id = index_elements(&elements);
1159        let span_by_id = index_spans(&spans);
1160        let table_by_id = index_tables(&tables);
1161
1162        SourceIndex {
1163            capabilities,
1164            pages,
1165            elements,
1166            spans,
1167            tables,
1168            element_by_id,
1169            span_by_id,
1170            table_by_id,
1171        }
1172    }
1173
1174    fn span(&self, id: &str) -> Option<&GroundingSpan> {
1175        self.span_by_id
1176            .get(id)
1177            .and_then(|index| self.spans.get(*index))
1178    }
1179
1180    fn table(&self, id: &str) -> Option<&GroundingTable> {
1181        self.table_by_id
1182            .get(id)
1183            .and_then(|index| self.tables.get(*index))
1184    }
1185}
1186
1187fn index_elements(elements: &[GroundingElement]) -> BTreeMap<String, usize> {
1188    let mut index = BTreeMap::new();
1189    for (position, element) in elements.iter().enumerate() {
1190        index.entry(element.id.clone()).or_insert(position);
1191    }
1192    index
1193}
1194
1195fn index_spans(spans: &[GroundingSpan]) -> BTreeMap<String, usize> {
1196    let mut index = BTreeMap::new();
1197    for (position, span) in spans.iter().enumerate() {
1198        index.entry(span.id.clone()).or_insert(position);
1199    }
1200    index
1201}
1202
1203fn index_tables(tables: &[GroundingTable]) -> BTreeMap<String, usize> {
1204    let mut index = BTreeMap::new();
1205    for (position, table) in tables.iter().enumerate() {
1206        index.entry(table.id.clone()).or_insert(position);
1207    }
1208    index
1209}
1210
1211enum TargetResolution {
1212    Found(FoundTarget),
1213    NotFound(CheckReason),
1214    Invalid(CheckReason),
1215    CapabilityBlocked(CheckReason),
1216}
1217
1218fn resolve_target(
1219    index: &SourceIndex,
1220    claim: &Claim,
1221    config: &VerificationConfig,
1222) -> TargetResolution {
1223    if claim.kind == ClaimKind::TableCell
1224        || claim.citation.table_id.is_some()
1225        || claim.citation.cell.is_some()
1226    {
1227        return resolve_table_cell(index, claim);
1228    }
1229
1230    if let Some(span_id) = claim.citation.span_id.as_deref() {
1231        if !index.capabilities.spans {
1232            return TargetResolution::CapabilityBlocked(CheckReason::MissingSpanCapability);
1233        }
1234        return index
1235            .span(span_id)
1236            .map(target_from_span)
1237            .map(TargetResolution::Found)
1238            .unwrap_or(TargetResolution::NotFound(CheckReason::SpanNotFound));
1239    }
1240
1241    if let Some(element_id) = claim.citation.element_id.as_deref() {
1242        return index
1243            .element_by_id
1244            .get(element_id)
1245            .and_then(|position| {
1246                index
1247                    .elements
1248                    .get(*position)
1249                    .map(|element| (*position, element))
1250            })
1251            .map(|(position, element)| target_from_element(element, Some(position)))
1252            .map(TargetResolution::Found)
1253            .unwrap_or(TargetResolution::NotFound(CheckReason::ElementNotFound));
1254    }
1255
1256    if let (Some(page), Some(bbox)) = (claim.citation.page.as_deref(), claim.citation.bbox) {
1257        if index.capabilities.coordinate_origin == CoordinateOrigin::Unknown {
1258            return TargetResolution::CapabilityBlocked(CheckReason::UnknownCoordinateOrigin);
1259        }
1260        let tolerance = config.matching.bbox_containment_tolerance_q.unwrap_or(0);
1261        return index
1262            .elements
1263            .iter()
1264            .enumerate()
1265            .filter(|(_, element)| {
1266                element.page == page && contains_bbox(element.bbox, bbox, tolerance)
1267            })
1268            .min_by_key(|(position, element)| (bbox_area(element.bbox), *position))
1269            .map(|(position, element)| target_from_element(element, Some(position)))
1270            .map(TargetResolution::Found)
1271            .unwrap_or(TargetResolution::NotFound(CheckReason::BboxNotFound));
1272    }
1273
1274    if claim.citation.bbox.is_some() {
1275        return TargetResolution::Invalid(CheckReason::MissingPageForBbox);
1276    }
1277
1278    if let Some(page) = claim.citation.page.as_deref() {
1279        return index
1280            .pages
1281            .iter()
1282            .find(|candidate| candidate.id == page)
1283            .map(|found| {
1284                TargetResolution::Found(FoundTarget {
1285                    page: Some(found.id.clone()),
1286                    bbox: Some([0, 0, found.width, found.height]),
1287                    text: None,
1288                    from_table_cell: false,
1289                    element_index: None,
1290                })
1291            })
1292            .unwrap_or(TargetResolution::NotFound(CheckReason::PageNotFound));
1293    }
1294
1295    TargetResolution::NotFound(CheckReason::MissingLocator)
1296}
1297
1298fn target_from_element(element: &GroundingElement, element_index: Option<usize>) -> FoundTarget {
1299    FoundTarget {
1300        page: Some(element.page.clone()),
1301        bbox: Some(element.bbox),
1302        text: element.text.clone(),
1303        from_table_cell: false,
1304        element_index,
1305    }
1306}
1307
1308fn target_from_span(span: &GroundingSpan) -> FoundTarget {
1309    FoundTarget {
1310        page: Some(span.page.clone()),
1311        bbox: Some(span.bbox),
1312        text: Some(span.text.clone()),
1313        from_table_cell: false,
1314        element_index: None,
1315    }
1316}
1317
1318fn resolve_table_cell(index: &SourceIndex, claim: &Claim) -> TargetResolution {
1319    let Some(table_id) = claim.citation.table_id.as_deref() else {
1320        return TargetResolution::Invalid(CheckReason::MissingTableCellLocator);
1321    };
1322    let Some(cell_ref) = claim.citation.cell else {
1323        return TargetResolution::Invalid(CheckReason::MissingTableCellLocator);
1324    };
1325    if !index.capabilities.tables {
1326        return TargetResolution::CapabilityBlocked(CheckReason::MissingTableCapability);
1327    }
1328    let Some(table) = index.table(table_id) else {
1329        return TargetResolution::NotFound(CheckReason::TableNotFound);
1330    };
1331    target_from_table_cell(table, cell_ref.row, cell_ref.col)
1332        .map(TargetResolution::Found)
1333        .unwrap_or(TargetResolution::NotFound(CheckReason::TableCellNotFound))
1334}
1335
1336fn target_from_table_cell(table: &GroundingTable, row: u32, col: u32) -> Option<FoundTarget> {
1337    table
1338        .cells
1339        .iter()
1340        .find(|cell| table_cell_covers(cell, row, col))
1341        .map(|cell| target_from_cell(&table.page, cell))
1342}
1343
1344fn table_cell_covers(cell: &GroundingCell, row: u32, col: u32) -> bool {
1345    let row_end = cell.row.saturating_add(cell.row_span.max(1));
1346    let col_end = cell.col.saturating_add(cell.col_span.max(1));
1347    row >= cell.row && row < row_end && col >= cell.col && col < col_end
1348}
1349
1350fn target_from_cell(page: &str, cell: &GroundingCell) -> FoundTarget {
1351    FoundTarget {
1352        page: Some(page.to_string()),
1353        bbox: Some(cell.bbox),
1354        text: Some(cell.text.clone()),
1355        from_table_cell: true,
1356        element_index: None,
1357    }
1358}
1359
1360fn adjacent_quote_target(
1361    index: &SourceIndex,
1362    claim: &Claim,
1363    target: &FoundTarget,
1364    config: &VerificationConfig,
1365) -> Option<FoundTarget> {
1366    if claim.kind != ClaimKind::Quote {
1367        return None;
1368    }
1369    let expected = claim.text.as_deref()?;
1370    if target
1371        .text
1372        .as_deref()
1373        .is_some_and(|actual| text_matches(ClaimKind::Quote, expected, actual, config))
1374    {
1375        return None;
1376    }
1377
1378    if claim.citation.bbox.is_some() {
1379        return None;
1380    }
1381
1382    if claim.citation.element_id.is_some() {
1383        if let Some(position) = target.element_index {
1384            return adjacent_text_pair_for_element(index, position, expected, config);
1385        }
1386    }
1387
1388    None
1389}
1390
1391fn adjacent_text_pair_for_element(
1392    index: &SourceIndex,
1393    position: usize,
1394    expected: &str,
1395    config: &VerificationConfig,
1396) -> Option<FoundTarget> {
1397    let current = index.elements.get(position)?;
1398    if let Some(second) = position
1399        .checked_add(1)
1400        .and_then(|next| index.elements.get(next))
1401    {
1402        if let Some(target) = adjacent_text_pair_target(current, second, expected, config) {
1403            return Some(target);
1404        }
1405    }
1406    position
1407        .checked_sub(1)
1408        .and_then(|previous| index.elements.get(previous))
1409        .and_then(|first| adjacent_text_pair_target(first, current, expected, config))
1410}
1411
1412fn adjacent_text_pair_target(
1413    first: &GroundingElement,
1414    second: &GroundingElement,
1415    expected: &str,
1416    config: &VerificationConfig,
1417) -> Option<FoundTarget> {
1418    if first.page != second.page {
1419        return None;
1420    }
1421    if !element_bboxes_are_adjacent(first.bbox, second.bbox) {
1422        return None;
1423    }
1424    let first_text = first.text.as_deref()?;
1425    let second_text = second.text.as_deref()?;
1426    let joined = join_adjacent_text(first_text, second_text, config);
1427    if text_matches(ClaimKind::Quote, expected, first_text, config)
1428        || text_matches(ClaimKind::Quote, expected, second_text, config)
1429        || !text_matches(ClaimKind::Quote, expected, &joined, config)
1430    {
1431        return None;
1432    }
1433
1434    Some(FoundTarget {
1435        page: Some(first.page.clone()),
1436        bbox: Some(union_bbox(first.bbox, second.bbox)),
1437        text: Some(joined),
1438        from_table_cell: false,
1439        element_index: None,
1440    })
1441}
1442
1443fn join_adjacent_text(first: &str, second: &str, config: &VerificationConfig) -> String {
1444    let joined = format!("{first} {second}");
1445    match config.matching.text_normalization {
1446        TextNormalization::None => joined,
1447        TextNormalization::CollapseWhitespace => normalize_quote(&joined),
1448    }
1449}
1450
1451fn bbox_area(bbox: [i64; 4]) -> u128 {
1452    let width = bbox[2].saturating_sub(bbox[0]).max(0) as u128;
1453    let height = bbox[3].saturating_sub(bbox[1]).max(0) as u128;
1454    width.saturating_mul(height)
1455}
1456
1457fn element_bboxes_are_adjacent(first: [i64; 4], second: [i64; 4]) -> bool {
1458    let same_line =
1459        ranges_overlap_i64(first[1], first[3], second[1], second[3]) && first[2] == second[0];
1460    let stacked =
1461        ranges_overlap_i64(first[0], first[2], second[0], second[2]) && first[3] == second[1];
1462    same_line || stacked
1463}
1464
1465fn ranges_overlap_i64(a_start: i64, a_end: i64, b_start: i64, b_end: i64) -> bool {
1466    a_start < b_end && b_start < a_end
1467}
1468
1469fn union_bbox(left: [i64; 4], right: [i64; 4]) -> [i64; 4] {
1470    [
1471        left[0].min(right[0]),
1472        left[1].min(right[1]),
1473        left[2].max(right[2]),
1474        left[3].max(right[3]),
1475    ]
1476}
1477
1478fn make_evidence(
1479    source: &dyn GroundingSource,
1480    target: &FoundTarget,
1481    include_text: bool,
1482    include_crops: bool,
1483) -> Option<Evidence> {
1484    let crop_ref = if include_crops && source.capabilities().crop_support {
1485        target
1486            .page
1487            .as_deref()
1488            .zip(target.bbox)
1489            .and_then(|(page, bbox)| source.crop_ref(page, bbox))
1490    } else {
1491        None
1492    };
1493    Some(Evidence {
1494        text: include_text.then(|| target.text.clone()).flatten(),
1495        page: target.page.clone(),
1496        bbox: target.bbox,
1497        crop_ref,
1498    })
1499}
1500
1501fn contains_bbox(container: [i64; 4], inner: [i64; 4], tolerance: i64) -> bool {
1502    inner[0] >= container[0] - tolerance
1503        && inner[1] >= container[1] - tolerance
1504        && inner[2] <= container[2] + tolerance
1505        && inner[3] <= container[3] + tolerance
1506}
1507
1508fn text_match_method(kind: ClaimKind, config: &VerificationConfig) -> MatchMethod {
1509    match (kind, config.matching.text_normalization) {
1510        (ClaimKind::Quote, TextNormalization::None) => MatchMethod::ExactTextContains,
1511        (ClaimKind::Quote, TextNormalization::CollapseWhitespace) => {
1512            MatchMethod::NormalizedTextContains
1513        }
1514        (_, TextNormalization::None) => MatchMethod::ExactText,
1515        (_, TextNormalization::CollapseWhitespace) => MatchMethod::NormalizedText,
1516    }
1517}
1518
1519fn text_matches(
1520    kind: ClaimKind,
1521    expected: &str,
1522    actual: &str,
1523    config: &VerificationConfig,
1524) -> bool {
1525    let (mut expected, mut actual) = match config.matching.text_normalization {
1526        TextNormalization::None => (expected.to_string(), actual.to_string()),
1527        TextNormalization::CollapseWhitespace => {
1528            (normalize_quote(expected), normalize_quote(actual))
1529        }
1530    };
1531    if !config.matching.case_sensitive {
1532        expected = expected.to_lowercase();
1533        actual = actual.to_lowercase();
1534    }
1535    if kind == ClaimKind::Quote {
1536        actual.contains(&expected)
1537    } else {
1538        actual == expected
1539    }
1540}
1541
1542/// Normalize a quote for literal matching: normalize line endings, collapse ASCII
1543/// whitespace runs to one ASCII space, then trim.
1544pub fn normalize_quote(input: &str) -> String {
1545    let line_normalized = input.replace("\r\n", "\n").replace('\r', "\n");
1546    let mut out = String::with_capacity(line_normalized.len());
1547    let mut in_ascii_ws = false;
1548    for ch in line_normalized.chars() {
1549        if ch.is_ascii_whitespace() {
1550            if !in_ascii_ws {
1551                out.push(' ');
1552                in_ascii_ws = true;
1553            }
1554        } else {
1555            out.push(ch);
1556            in_ascii_ws = false;
1557        }
1558    }
1559    out.trim().to_string()
1560}
1561
1562#[cfg(test)]
1563mod tests {
1564    use super::*;
1565    use ethos_core::grounding::{
1566        Capabilities, GroundingCell, GroundingElement, GroundingSpan, GroundingTable, PageGeometry,
1567        ParserIdentity,
1568    };
1569    use ethos_core::verify_types::{CapabilityLimit, CellRef, Citation, Claim};
1570
1571    #[derive(Clone)]
1572    struct TestSource {
1573        caps: Capabilities,
1574        fingerprint: Option<String>,
1575        crop_ref: Option<String>,
1576    }
1577
1578    impl Default for TestSource {
1579        fn default() -> Self {
1580            Self {
1581                caps: Capabilities {
1582                    spans: true,
1583                    char_offsets: true,
1584                    tables: true,
1585                    fingerprint: true,
1586                    coordinate_origin: CoordinateOrigin::TopLeft,
1587                    crop_support: false,
1588                },
1589                fingerprint: Some(
1590                    "sha256:b5d30710d0c25cc38d8dec924ecaf57ae4f81276dd5dc14d75cb3b5b6bde62d3"
1591                        .into(),
1592                ),
1593                crop_ref: None,
1594            }
1595        }
1596    }
1597
1598    impl GroundingSource for TestSource {
1599        fn parser(&self) -> ParserIdentity {
1600            ParserIdentity {
1601                name: "test-parser".into(),
1602                version: "0.1.0".into(),
1603                adapter: None,
1604                adapter_version: None,
1605            }
1606        }
1607        fn capabilities(&self) -> Capabilities {
1608            self.caps
1609        }
1610        fn fingerprint(&self) -> Option<String> {
1611            self.fingerprint.clone()
1612        }
1613        fn pages(&self) -> Vec<PageGeometry> {
1614            vec![PageGeometry {
1615                id: "p0001".into(),
1616                index: 1,
1617                width: 61200,
1618                height: 79200,
1619                rotation: 0,
1620            }]
1621        }
1622        fn elements(&self) -> Vec<GroundingElement> {
1623            vec![
1624                GroundingElement {
1625                    id: "e000002".into(),
1626                    page: "p0001".into(),
1627                    bbox: [7200, 10100, 54000, 11500],
1628                    kind: "text_block".into(),
1629                    text: Some(
1630                        "Revenue grew to $12.4M in Q3 2025, driven by enterprise expansion.".into(),
1631                    ),
1632                },
1633                GroundingElement {
1634                    id: "e000003".into(),
1635                    page: "p0001".into(),
1636                    bbox: [7200, 13000, 54000, 20000],
1637                    kind: "table".into(),
1638                    text: None,
1639                },
1640            ]
1641        }
1642        fn spans(&self) -> Vec<GroundingSpan> {
1643            vec![GroundingSpan {
1644                id: "s000002".into(),
1645                page: "p0001".into(),
1646                bbox: [7200, 10100, 54000, 11500],
1647                text: "Revenue grew to $12.4M in Q3 2025".into(),
1648                element: Some("e000002".into()),
1649                char_start: Some(0),
1650                char_end: Some(34),
1651            }]
1652        }
1653        fn tables(&self) -> Vec<GroundingTable> {
1654            vec![GroundingTable {
1655                id: "t0001".into(),
1656                page: "p0001".into(),
1657                bbox: [7200, 13000, 54000, 20000],
1658                cells: vec![
1659                    GroundingCell {
1660                        row: 0,
1661                        col: 0,
1662                        row_span: 1,
1663                        col_span: 1,
1664                        bbox: [7200, 13000, 30600, 16500],
1665                        text: "Metric".into(),
1666                    },
1667                    GroundingCell {
1668                        row: 1,
1669                        col: 1,
1670                        row_span: 1,
1671                        col_span: 1,
1672                        bbox: [30600, 16500, 54000, 20000],
1673                        text: "$12.4M".into(),
1674                    },
1675                ],
1676            }]
1677        }
1678        fn crop_ref(&self, page: &str, bbox: [i64; 4]) -> Option<String> {
1679            if page == "p0001" && bbox == [7200, 10100, 54000, 11500] {
1680                self.crop_ref.clone()
1681            } else {
1682                None
1683            }
1684        }
1685    }
1686
1687    struct ElementSource {
1688        elements: Vec<GroundingElement>,
1689    }
1690
1691    impl GroundingSource for ElementSource {
1692        fn parser(&self) -> ParserIdentity {
1693            ParserIdentity {
1694                name: "element-test-parser".into(),
1695                version: "0.1.0".into(),
1696                adapter: None,
1697                adapter_version: None,
1698            }
1699        }
1700        fn capabilities(&self) -> Capabilities {
1701            Capabilities {
1702                spans: true,
1703                char_offsets: true,
1704                tables: true,
1705                fingerprint: true,
1706                coordinate_origin: CoordinateOrigin::TopLeft,
1707                crop_support: false,
1708            }
1709        }
1710        fn fingerprint(&self) -> Option<String> {
1711            Some("sha256:b5d30710d0c25cc38d8dec924ecaf57ae4f81276dd5dc14d75cb3b5b6bde62d3".into())
1712        }
1713        fn pages(&self) -> Vec<PageGeometry> {
1714            vec![
1715                PageGeometry {
1716                    id: "p0001".into(),
1717                    index: 1,
1718                    width: 61200,
1719                    height: 79200,
1720                    rotation: 0,
1721                },
1722                PageGeometry {
1723                    id: "p0002".into(),
1724                    index: 2,
1725                    width: 61200,
1726                    height: 79200,
1727                    rotation: 0,
1728                },
1729            ]
1730        }
1731        fn elements(&self) -> Vec<GroundingElement> {
1732            self.elements.clone()
1733        }
1734        fn spans(&self) -> Vec<GroundingSpan> {
1735            Vec::new()
1736        }
1737        fn tables(&self) -> Vec<GroundingTable> {
1738            Vec::new()
1739        }
1740    }
1741
1742    fn claim(kind: ClaimKind, text: Option<&str>, citation: Citation) -> Claim {
1743        Claim {
1744            kind,
1745            text: text.map(str::to_string),
1746            citation,
1747        }
1748    }
1749
1750    fn input(source: &TestSource, claims: Vec<Claim>) -> CitationInput {
1751        CitationInput::Envelope(CitationEnvelope {
1752            document_fingerprint: source.fingerprint(),
1753            claims,
1754        })
1755    }
1756
1757    fn verify(source: &TestSource, claims: Vec<Claim>) -> VerificationReport {
1758        let cfg = VerificationConfig::default_v1();
1759        verify_claims(source, input(source, claims), &cfg, "0".repeat(64))
1760    }
1761
1762    fn verify_with_config(
1763        source: &TestSource,
1764        claims: Vec<Claim>,
1765        cfg: &VerificationConfig,
1766    ) -> VerificationReport {
1767        verify_claims(source, input(source, claims), cfg, "0".repeat(64))
1768    }
1769
1770    fn element(id: &str, page: &str, bbox: [i64; 4], text: Option<&str>) -> GroundingElement {
1771        GroundingElement {
1772            id: id.into(),
1773            page: page.into(),
1774            bbox,
1775            kind: "text_block".into(),
1776            text: text.map(str::to_string),
1777        }
1778    }
1779
1780    fn verify_elements(elements: Vec<GroundingElement>, claims: Vec<Claim>) -> VerificationReport {
1781        let source = ElementSource { elements };
1782        let cfg = VerificationConfig::default_v1();
1783        let citations = CitationInput::Envelope(CitationEnvelope {
1784            document_fingerprint: source.fingerprint(),
1785            claims,
1786        });
1787        verify_claims(&source, citations, &cfg, "0".repeat(64))
1788    }
1789
1790    #[test]
1791    fn quote_and_presence_claims_ground_with_literal_matching() {
1792        let source = TestSource::default();
1793        let report = verify(
1794            &source,
1795            vec![
1796                claim(
1797                    ClaimKind::Quote,
1798                    Some("Revenue grew to $12.4M in Q3 2025"),
1799                    Citation {
1800                        element_id: Some("e000002".into()),
1801                        ..Default::default()
1802                    },
1803                ),
1804                claim(
1805                    ClaimKind::Presence,
1806                    None,
1807                    Citation {
1808                        span_id: Some("s000002".into()),
1809                        ..Default::default()
1810                    },
1811                ),
1812            ],
1813        );
1814
1815        assert!(report.all_evidence_grounded);
1816        assert_eq!(report.checks.len(), 2);
1817        assert_eq!(report.capability_limits, Vec::<CapabilityLimit>::new());
1818        assert_eq!(report.checks[0].status, CheckStatus::Grounded);
1819        assert_eq!(
1820            report.checks[0].match_method,
1821            MatchMethod::NormalizedTextContains
1822        );
1823        assert_eq!(report.checks[1].status, CheckStatus::Grounded);
1824        assert_eq!(report.checks[1].match_method, MatchMethod::PresenceOnly);
1825        assert_eq!(
1826            report.checks[0]
1827                .evidence
1828                .as_ref()
1829                .and_then(|e| e.text.as_deref()),
1830            Some("Revenue grew to $12.4M in Q3 2025, driven by enterprise expansion.")
1831        );
1832        assert_eq!(report.warnings, Vec::<WarningCode>::new());
1833    }
1834
1835    #[test]
1836    fn quote_claim_grounds_across_adjacent_element_text_fragments() {
1837        let report = verify_elements(
1838            vec![
1839                element(
1840                    "split-a",
1841                    "p0001",
1842                    [100, 100, 400, 200],
1843                    Some("The alpha trust loop verifies "),
1844                ),
1845                element(
1846                    "split-b",
1847                    "p0001",
1848                    [400, 100, 700, 200],
1849                    Some("grounded evidence"),
1850                ),
1851            ],
1852            vec![claim(
1853                ClaimKind::Quote,
1854                Some("The alpha trust loop verifies grounded evidence"),
1855                Citation {
1856                    element_id: Some("split-a".into()),
1857                    ..Default::default()
1858                },
1859            )],
1860        );
1861
1862        assert!(report.all_evidence_grounded);
1863        assert_eq!(report.checks[0].status, CheckStatus::Grounded);
1864        assert_eq!(
1865            report.checks[0].match_method,
1866            MatchMethod::NormalizedTextContains
1867        );
1868        assert_eq!(
1869            report.checks[0]
1870                .evidence
1871                .as_ref()
1872                .and_then(|e| e.text.as_deref()),
1873            Some("The alpha trust loop verifies grounded evidence")
1874        );
1875        assert_eq!(
1876            report.checks[0].evidence.as_ref().and_then(|e| e.bbox),
1877            Some([100, 100, 700, 200])
1878        );
1879    }
1880
1881    #[test]
1882    fn quote_claim_page_only_locator_does_not_search_adjacent_fragments() {
1883        let report = verify_elements(
1884            vec![
1885                element(
1886                    "split-a",
1887                    "p0001",
1888                    [100, 100, 400, 200],
1889                    Some("The alpha trust loop verifies "),
1890                ),
1891                element(
1892                    "split-b",
1893                    "p0001",
1894                    [400, 100, 700, 200],
1895                    Some("grounded evidence"),
1896                ),
1897            ],
1898            vec![claim(
1899                ClaimKind::Quote,
1900                Some("The alpha trust loop verifies grounded evidence"),
1901                Citation {
1902                    page: Some("p0001".into()),
1903                    ..Default::default()
1904                },
1905            )],
1906        );
1907
1908        assert!(!report.all_evidence_grounded);
1909        assert_eq!(report.checks[0].status, CheckStatus::Mismatch);
1910        assert_eq!(report.checks[0].reason, Some(CheckReason::TextMismatch));
1911    }
1912
1913    #[test]
1914    fn quote_claim_grounds_when_element_id_points_to_second_adjacent_fragment() {
1915        let report = verify_elements(
1916            vec![
1917                element(
1918                    "split-a",
1919                    "p0001",
1920                    [100, 100, 400, 200],
1921                    Some("The alpha trust loop verifies "),
1922                ),
1923                element(
1924                    "split-b",
1925                    "p0001",
1926                    [400, 100, 700, 200],
1927                    Some("grounded evidence"),
1928                ),
1929            ],
1930            vec![claim(
1931                ClaimKind::Quote,
1932                Some("The alpha trust loop verifies grounded evidence"),
1933                Citation {
1934                    element_id: Some("split-b".into()),
1935                    ..Default::default()
1936                },
1937            )],
1938        );
1939
1940        assert!(report.all_evidence_grounded);
1941        assert_eq!(report.checks[0].status, CheckStatus::Grounded);
1942        assert_eq!(
1943            report.checks[0]
1944                .evidence
1945                .as_ref()
1946                .and_then(|e| e.text.as_deref()),
1947            Some("The alpha trust loop verifies grounded evidence")
1948        );
1949        assert_eq!(
1950            report.checks[0].evidence.as_ref().and_then(|e| e.bbox),
1951            Some([100, 100, 700, 200])
1952        );
1953    }
1954
1955    #[test]
1956    fn quote_claim_does_not_stitch_non_touching_element_bboxes() {
1957        let report = verify_elements(
1958            vec![
1959                element(
1960                    "split-a",
1961                    "p0001",
1962                    [100, 100, 390, 200],
1963                    Some("The alpha trust loop verifies "),
1964                ),
1965                element(
1966                    "split-b",
1967                    "p0001",
1968                    [400, 100, 700, 200],
1969                    Some("grounded evidence"),
1970                ),
1971            ],
1972            vec![claim(
1973                ClaimKind::Quote,
1974                Some("The alpha trust loop verifies grounded evidence"),
1975                Citation {
1976                    element_id: Some("split-a".into()),
1977                    ..Default::default()
1978                },
1979            )],
1980        );
1981
1982        assert!(!report.all_evidence_grounded);
1983        assert_eq!(report.checks[0].status, CheckStatus::Mismatch);
1984        assert_eq!(report.checks[0].reason, Some(CheckReason::TextMismatch));
1985    }
1986
1987    #[test]
1988    fn quote_claim_bbox_locator_does_not_expand_outside_cited_region() {
1989        let report = verify_elements(
1990            vec![
1991                element(
1992                    "split-a",
1993                    "p0001",
1994                    [100, 100, 400, 200],
1995                    Some("The alpha trust loop verifies "),
1996                ),
1997                element(
1998                    "split-b",
1999                    "p0001",
2000                    [400, 100, 700, 200],
2001                    Some("grounded evidence"),
2002                ),
2003            ],
2004            vec![claim(
2005                ClaimKind::Quote,
2006                Some("The alpha trust loop verifies grounded evidence"),
2007                Citation {
2008                    page: Some("p0001".into()),
2009                    bbox: Some([120, 120, 380, 180]),
2010                    ..Default::default()
2011                },
2012            )],
2013        );
2014
2015        assert!(!report.all_evidence_grounded);
2016        assert_eq!(report.checks[0].status, CheckStatus::Mismatch);
2017        assert_eq!(report.checks[0].reason, Some(CheckReason::TextMismatch));
2018        assert_eq!(
2019            report.checks[0]
2020                .evidence
2021                .as_ref()
2022                .and_then(|e| e.text.as_deref()),
2023            Some("The alpha trust loop verifies ")
2024        );
2025        assert_eq!(
2026            report.checks[0].evidence.as_ref().and_then(|e| e.bbox),
2027            Some([100, 100, 400, 200])
2028        );
2029    }
2030
2031    #[test]
2032    fn bbox_locator_prefers_smallest_containing_element() {
2033        let report = verify_elements(
2034            vec![
2035                element(
2036                    "container",
2037                    "p0001",
2038                    [0, 0, 1000, 1000],
2039                    Some("outer wrapper text"),
2040                ),
2041                element(
2042                    "inner",
2043                    "p0001",
2044                    [100, 100, 400, 200],
2045                    Some("The exact cited quote"),
2046                ),
2047            ],
2048            vec![claim(
2049                ClaimKind::Quote,
2050                Some("The exact cited quote"),
2051                Citation {
2052                    page: Some("p0001".into()),
2053                    bbox: Some([120, 120, 380, 180]),
2054                    ..Default::default()
2055                },
2056            )],
2057        );
2058
2059        assert!(report.all_evidence_grounded);
2060        assert_eq!(report.checks[0].status, CheckStatus::Grounded);
2061        assert_eq!(
2062            report.checks[0]
2063                .evidence
2064                .as_ref()
2065                .and_then(|e| e.text.as_deref()),
2066            Some("The exact cited quote")
2067        );
2068        assert_eq!(
2069            report.checks[0].evidence.as_ref().and_then(|e| e.bbox),
2070            Some([100, 100, 400, 200])
2071        );
2072    }
2073
2074    #[test]
2075    fn quote_claim_does_not_ground_across_non_adjacent_or_wrong_page_fragments() {
2076        let non_adjacent = verify_elements(
2077            vec![
2078                element(
2079                    "split-a",
2080                    "p0001",
2081                    [100, 100, 400, 200],
2082                    Some("The alpha trust loop verifies "),
2083                ),
2084                element(
2085                    "between",
2086                    "p0001",
2087                    [100, 220, 700, 320],
2088                    Some("separate evidence"),
2089                ),
2090                element(
2091                    "split-b",
2092                    "p0001",
2093                    [400, 100, 700, 200],
2094                    Some("grounded evidence"),
2095                ),
2096            ],
2097            vec![claim(
2098                ClaimKind::Quote,
2099                Some("The alpha trust loop verifies grounded evidence"),
2100                Citation {
2101                    element_id: Some("split-a".into()),
2102                    ..Default::default()
2103                },
2104            )],
2105        );
2106        assert!(!non_adjacent.all_evidence_grounded);
2107        assert_eq!(non_adjacent.checks[0].status, CheckStatus::Mismatch);
2108        assert_eq!(
2109            non_adjacent.checks[0].reason,
2110            Some(CheckReason::TextMismatch)
2111        );
2112
2113        let wrong_page = verify_elements(
2114            vec![
2115                element(
2116                    "split-a",
2117                    "p0001",
2118                    [100, 100, 400, 200],
2119                    Some("The alpha trust loop verifies "),
2120                ),
2121                element(
2122                    "split-b",
2123                    "p0002",
2124                    [400, 100, 700, 200],
2125                    Some("grounded evidence"),
2126                ),
2127            ],
2128            vec![claim(
2129                ClaimKind::Quote,
2130                Some("The alpha trust loop verifies grounded evidence"),
2131                Citation {
2132                    page: Some("p0001".into()),
2133                    ..Default::default()
2134                },
2135            )],
2136        );
2137        assert!(!wrong_page.all_evidence_grounded);
2138        assert_eq!(wrong_page.checks[0].status, CheckStatus::Mismatch);
2139        assert_eq!(wrong_page.checks[0].reason, Some(CheckReason::TextMismatch));
2140    }
2141
2142    #[test]
2143    fn mismatch_and_not_found_keep_gate_false() {
2144        let source = TestSource::default();
2145        let report = verify(
2146            &source,
2147            vec![
2148                claim(
2149                    ClaimKind::Quote,
2150                    Some("Revenue fell to $1"),
2151                    Citation {
2152                        element_id: Some("e000002".into()),
2153                        ..Default::default()
2154                    },
2155                ),
2156                claim(
2157                    ClaimKind::Presence,
2158                    None,
2159                    Citation {
2160                        element_id: Some("missing".into()),
2161                        ..Default::default()
2162                    },
2163                ),
2164            ],
2165        );
2166
2167        assert!(!report.all_evidence_grounded);
2168        assert_eq!(report.checks[0].status, CheckStatus::Mismatch);
2169        assert_eq!(report.checks[0].reason, Some(CheckReason::TextMismatch));
2170        assert_eq!(report.checks[1].status, CheckStatus::NotFound);
2171        assert_eq!(report.checks[1].reason, Some(CheckReason::ElementNotFound));
2172    }
2173
2174    #[test]
2175    fn value_claims_use_literal_text_matching() {
2176        let source = TestSource::default();
2177        let report = verify(
2178            &source,
2179            vec![claim(
2180                ClaimKind::Value,
2181                Some("Revenue grew to $12.4M in Q3 2025, driven by enterprise expansion."),
2182                Citation {
2183                    element_id: Some("e000002".into()),
2184                    ..Default::default()
2185                },
2186            )],
2187        );
2188
2189        assert!(report.all_evidence_grounded);
2190        assert_eq!(report.unsupported_claim_kinds, Vec::<String>::new());
2191        assert_eq!(report.checks[0].status, CheckStatus::Grounded);
2192        assert_eq!(report.checks[0].match_method, MatchMethod::NormalizedText);
2193    }
2194
2195    #[test]
2196    fn value_substrings_do_not_ground() {
2197        let source = TestSource::default();
2198        let report = verify(
2199            &source,
2200            vec![claim(
2201                ClaimKind::Value,
2202                Some("1"),
2203                Citation {
2204                    element_id: Some("e000002".into()),
2205                    ..Default::default()
2206                },
2207            )],
2208        );
2209
2210        assert!(!report.all_evidence_grounded);
2211        assert_eq!(report.checks[0].status, CheckStatus::Mismatch);
2212        assert_eq!(report.checks[0].reason, Some(CheckReason::TextMismatch));
2213        assert_eq!(report.checks[0].match_method, MatchMethod::NormalizedText);
2214    }
2215
2216    #[test]
2217    fn table_cell_claims_lookup_cell_and_match_text() {
2218        let source = TestSource::default();
2219        let report = verify(
2220            &source,
2221            vec![claim(
2222                ClaimKind::TableCell,
2223                Some("$12.4M"),
2224                Citation {
2225                    table_id: Some("t0001".into()),
2226                    cell: Some(CellRef { row: 1, col: 1 }),
2227                    ..Default::default()
2228                },
2229            )],
2230        );
2231
2232        assert!(report.all_evidence_grounded);
2233        assert_eq!(report.unsupported_claim_kinds, Vec::<String>::new());
2234        assert_eq!(report.checks[0].status, CheckStatus::Grounded);
2235        assert_eq!(report.checks[0].match_method, MatchMethod::TableCellLookup);
2236        assert_eq!(
2237            report.checks[0]
2238                .evidence
2239                .as_ref()
2240                .and_then(|e| e.text.as_deref()),
2241            Some("$12.4M")
2242        );
2243    }
2244
2245    #[test]
2246    fn table_cell_missing_cell_is_not_found() {
2247        let source = TestSource::default();
2248        let report = verify(
2249            &source,
2250            vec![claim(
2251                ClaimKind::TableCell,
2252                Some("$12.4M"),
2253                Citation {
2254                    table_id: Some("t0001".into()),
2255                    cell: Some(CellRef { row: 9, col: 9 }),
2256                    ..Default::default()
2257                },
2258            )],
2259        );
2260
2261        assert!(!report.all_evidence_grounded);
2262        assert_eq!(report.checks[0].status, CheckStatus::NotFound);
2263        assert_eq!(
2264            report.checks[0].reason,
2265            Some(CheckReason::TableCellNotFound)
2266        );
2267        assert_eq!(report.checks[0].match_method, MatchMethod::None);
2268    }
2269
2270    #[test]
2271    fn empty_table_collection_is_not_found_when_tables_are_supported() {
2272        let source = TestSource {
2273            caps: Capabilities {
2274                tables: true,
2275                ..TestSource::default().caps
2276            },
2277            ..TestSource::default()
2278        };
2279        struct NoTables(TestSource);
2280        impl GroundingSource for NoTables {
2281            fn parser(&self) -> ParserIdentity {
2282                self.0.parser()
2283            }
2284            fn capabilities(&self) -> Capabilities {
2285                self.0.capabilities()
2286            }
2287            fn fingerprint(&self) -> Option<String> {
2288                self.0.fingerprint()
2289            }
2290            fn pages(&self) -> Vec<PageGeometry> {
2291                self.0.pages()
2292            }
2293            fn elements(&self) -> Vec<GroundingElement> {
2294                self.0.elements()
2295            }
2296            fn spans(&self) -> Vec<GroundingSpan> {
2297                self.0.spans()
2298            }
2299            fn tables(&self) -> Vec<GroundingTable> {
2300                Vec::new()
2301            }
2302        }
2303        let report = verify(
2304            &source,
2305            vec![claim(
2306                ClaimKind::TableCell,
2307                Some("$12.4M"),
2308                Citation {
2309                    table_id: Some("missing".into()),
2310                    cell: Some(CellRef { row: 1, col: 1 }),
2311                    ..Default::default()
2312                },
2313            )],
2314        );
2315        assert_eq!(report.checks[0].status, CheckStatus::NotFound);
2316
2317        let no_tables = NoTables(source);
2318        let cfg = VerificationConfig::default_v1();
2319        let report = verify_claims(
2320            &no_tables,
2321            CitationInput::Envelope(CitationEnvelope {
2322                document_fingerprint: no_tables.fingerprint(),
2323                claims: vec![claim(
2324                    ClaimKind::TableCell,
2325                    Some("$12.4M"),
2326                    Citation {
2327                        table_id: Some("missing".into()),
2328                        cell: Some(CellRef { row: 1, col: 1 }),
2329                        ..Default::default()
2330                    },
2331                )],
2332            }),
2333            &cfg,
2334            "0".repeat(64),
2335        );
2336        assert_eq!(report.checks[0].status, CheckStatus::NotFound);
2337    }
2338
2339    #[test]
2340    fn missing_table_capability_blocks_table_cell_claims() {
2341        let source = TestSource {
2342            caps: Capabilities {
2343                tables: false,
2344                ..TestSource::default().caps
2345            },
2346            ..TestSource::default()
2347        };
2348        let report = verify(
2349            &source,
2350            vec![claim(
2351                ClaimKind::TableCell,
2352                Some("$12.4M"),
2353                Citation {
2354                    table_id: Some("t0001".into()),
2355                    cell: Some(CellRef { row: 1, col: 1 }),
2356                    ..Default::default()
2357                },
2358            )],
2359        );
2360
2361        assert_eq!(report.checks[0].status, CheckStatus::CapabilityBlocked);
2362        assert_eq!(
2363            report.checks[0].reason,
2364            Some(CheckReason::MissingTableCapability)
2365        );
2366        assert_eq!(
2367            report.capability_limits,
2368            vec![CapabilityLimit::MissingTables]
2369        );
2370        assert!(report.checks[0]
2371            .warnings
2372            .contains(&WarningCode::CapabilityLimited));
2373    }
2374
2375    #[test]
2376    fn crop_refs_are_echoed_only_when_requested_and_supported() {
2377        let source = TestSource {
2378            caps: Capabilities {
2379                crop_support: true,
2380                ..TestSource::default().caps
2381            },
2382            crop_ref: Some("crop://p0001/e000002.png".into()),
2383            ..TestSource::default()
2384        };
2385        let claim = claim(
2386            ClaimKind::Quote,
2387            Some("Revenue grew to $12.4M in Q3 2025"),
2388            Citation {
2389                element_id: Some("e000002".into()),
2390                ..Default::default()
2391            },
2392        );
2393
2394        let mut cfg = VerificationConfig::default_v1();
2395        cfg.evidence.as_mut().unwrap().include_crops = true;
2396        let with_crops = verify_with_config(&source, vec![claim.clone()], &cfg);
2397        assert_eq!(
2398            with_crops.checks[0]
2399                .evidence
2400                .as_ref()
2401                .and_then(|e| e.crop_ref.as_deref()),
2402            Some("crop://p0001/e000002.png")
2403        );
2404
2405        cfg.evidence.as_mut().unwrap().include_crops = false;
2406        let without_crops = verify_with_config(&source, vec![claim], &cfg);
2407        assert_eq!(
2408            without_crops.checks[0]
2409                .evidence
2410                .as_ref()
2411                .and_then(|e| e.crop_ref.as_deref()),
2412            None
2413        );
2414    }
2415
2416    #[test]
2417    fn requested_crop_refs_without_source_support_remain_capability_limited() {
2418        let source = TestSource {
2419            crop_ref: Some("crop://p0001/e000002.png".into()),
2420            ..TestSource::default()
2421        };
2422        let mut cfg = VerificationConfig::default_v1();
2423        cfg.evidence.as_mut().unwrap().include_crops = true;
2424
2425        let report = verify_with_config(
2426            &source,
2427            vec![claim(
2428                ClaimKind::Quote,
2429                Some("Revenue grew to $12.4M in Q3 2025"),
2430                Citation {
2431                    element_id: Some("e000002".into()),
2432                    ..Default::default()
2433                },
2434            )],
2435            &cfg,
2436        );
2437
2438        assert_eq!(report.checks[0].status, CheckStatus::Grounded);
2439        assert_eq!(
2440            report.capability_limits,
2441            vec![CapabilityLimit::MissingCropSupport]
2442        );
2443        assert!(report.warnings.contains(&WarningCode::CapabilityLimited));
2444        assert_eq!(
2445            report.checks[0]
2446                .evidence
2447                .as_ref()
2448                .and_then(|e| e.crop_ref.as_deref()),
2449            None
2450        );
2451    }
2452
2453    #[test]
2454    fn stale_fingerprint_marks_checks_stale_and_gate_false() {
2455        let source = TestSource::default();
2456        let cfg = VerificationConfig::default_v1();
2457        let report = verify_claims(
2458            &source,
2459            CitationInput::Envelope(CitationEnvelope {
2460                document_fingerprint: Some(
2461                    "sha256:0000000000000000000000000000000000000000000000000000000000000000"
2462                        .into(),
2463                ),
2464                claims: vec![claim(
2465                    ClaimKind::Presence,
2466                    None,
2467                    Citation {
2468                        element_id: Some("e000002".into()),
2469                        ..Default::default()
2470                    },
2471                )],
2472            }),
2473            &cfg,
2474            "0".repeat(64),
2475        );
2476
2477        assert!(report.fingerprint_stale);
2478        assert!(!report.all_evidence_grounded);
2479        assert_eq!(report.checks[0].status, CheckStatus::Stale);
2480        assert_eq!(report.checks[0].reason, Some(CheckReason::StaleFingerprint));
2481    }
2482
2483    #[test]
2484    fn missing_citation_fingerprint_blocks_when_required() {
2485        let source = TestSource::default();
2486        let cfg = VerificationConfig::default_v1();
2487        let report = verify_claims(
2488            &source,
2489            CitationInput::Envelope(CitationEnvelope {
2490                document_fingerprint: None,
2491                claims: vec![claim(
2492                    ClaimKind::Presence,
2493                    None,
2494                    Citation {
2495                        element_id: Some("e000002".into()),
2496                        ..Default::default()
2497                    },
2498                )],
2499            }),
2500            &cfg,
2501            "0".repeat(64),
2502        );
2503
2504        assert!(!report.fingerprint_stale);
2505        assert!(!report.all_evidence_grounded);
2506        assert_eq!(report.checks[0].status, CheckStatus::Stale);
2507        assert_eq!(
2508            report.checks[0].reason,
2509            Some(CheckReason::MissingCitationFingerprint)
2510        );
2511    }
2512
2513    #[test]
2514    fn unsupported_claim_kinds_are_explicit() {
2515        let source = TestSource::default();
2516        let report = verify(
2517            &source,
2518            vec![claim(
2519                ClaimKind::Region,
2520                None,
2521                Citation {
2522                    element_id: Some("e000002".into()),
2523                    ..Default::default()
2524                },
2525            )],
2526        );
2527
2528        assert!(!report.all_evidence_grounded);
2529        assert_eq!(report.checks[0].status, CheckStatus::UnsupportedClaimKind);
2530        assert_eq!(
2531            report.checks[0].reason,
2532            Some(CheckReason::UnsupportedClaimKind)
2533        );
2534        assert_eq!(report.unsupported_claim_kinds, vec!["region"]);
2535    }
2536
2537    #[test]
2538    fn non_v1_claim_kinds_are_deduped_and_keep_gate_false() {
2539        let source = TestSource::default();
2540        let report = verify(
2541            &source,
2542            vec![
2543                claim(
2544                    ClaimKind::Presence,
2545                    None,
2546                    Citation {
2547                        page: Some("p0001".into()),
2548                        ..Default::default()
2549                    },
2550                ),
2551                claim(
2552                    ClaimKind::Region,
2553                    None,
2554                    Citation {
2555                        element_id: Some("e000002".into()),
2556                        ..Default::default()
2557                    },
2558                ),
2559                claim(
2560                    ClaimKind::Other,
2561                    Some("$12.4M equals 12400000"),
2562                    Citation {
2563                        element_id: Some("e000002".into()),
2564                        ..Default::default()
2565                    },
2566                ),
2567                claim(
2568                    ClaimKind::Region,
2569                    None,
2570                    Citation {
2571                        page: Some("p0001".into()),
2572                        ..Default::default()
2573                    },
2574                ),
2575            ],
2576        );
2577
2578        assert!(!report.all_evidence_grounded);
2579        assert_eq!(report.checks[0].status, CheckStatus::Grounded);
2580        assert_eq!(report.checks[1].status, CheckStatus::UnsupportedClaimKind);
2581        assert_eq!(report.checks[2].status, CheckStatus::UnsupportedClaimKind);
2582        assert_eq!(report.checks[3].status, CheckStatus::UnsupportedClaimKind);
2583        assert_eq!(report.checks[1].match_method, MatchMethod::None);
2584        assert_eq!(report.checks[2].match_method, MatchMethod::None);
2585        assert_eq!(report.checks[3].match_method, MatchMethod::None);
2586        assert_eq!(
2587            report.checks[1].reason,
2588            Some(CheckReason::UnsupportedClaimKind)
2589        );
2590        assert_eq!(
2591            report.checks[2].reason,
2592            Some(CheckReason::UnsupportedClaimKind)
2593        );
2594        assert_eq!(
2595            report.checks[3].reason,
2596            Some(CheckReason::UnsupportedClaimKind)
2597        );
2598        assert!(report.checks[1].evidence.is_none());
2599        assert!(report.checks[2].evidence.is_none());
2600        assert!(report.checks[3].evidence.is_none());
2601        assert!(report.checks[1].warnings.is_empty());
2602        assert!(report.checks[2].warnings.is_empty());
2603        assert!(report.checks[3].warnings.is_empty());
2604        assert!(!report.checks[1].semantic_unverified);
2605        assert!(!report.checks[2].semantic_unverified);
2606        assert!(!report.checks[3].semantic_unverified);
2607        assert_eq!(report.unsupported_claim_kinds, vec!["region", "other"]);
2608    }
2609
2610    #[test]
2611    fn missing_span_capability_blocks_span_locator() {
2612        let source = TestSource {
2613            caps: Capabilities {
2614                spans: false,
2615                char_offsets: false,
2616                tables: false,
2617                fingerprint: false,
2618                coordinate_origin: CoordinateOrigin::Unknown,
2619                crop_support: false,
2620            },
2621            fingerprint: None,
2622            crop_ref: None,
2623        };
2624        let report = verify(
2625            &source,
2626            vec![claim(
2627                ClaimKind::Presence,
2628                None,
2629                Citation {
2630                    span_id: Some("s000002".into()),
2631                    ..Default::default()
2632                },
2633            )],
2634        );
2635
2636        assert!(!report.all_evidence_grounded);
2637        assert_eq!(report.checks[0].status, CheckStatus::CapabilityBlocked);
2638        assert_eq!(
2639            report.checks[0].reason,
2640            Some(CheckReason::MissingSpanCapability)
2641        );
2642        assert_eq!(
2643            report.capability_limits,
2644            vec![
2645                CapabilityLimit::MissingFingerprint,
2646                CapabilityLimit::MissingSpans,
2647                CapabilityLimit::MissingCharOffsets,
2648                CapabilityLimit::MissingTables,
2649                CapabilityLimit::UnknownCoordinateOrigin
2650            ]
2651        );
2652        assert!(report.warnings.contains(&WarningCode::CapabilityLimited));
2653        assert!(report.checks[0]
2654            .warnings
2655            .contains(&WarningCode::CapabilityLimited));
2656    }
2657
2658    #[test]
2659    fn citation_fingerprint_without_source_fingerprint_blocks_checks() {
2660        let source = TestSource {
2661            caps: Capabilities {
2662                fingerprint: false,
2663                ..TestSource::default().caps
2664            },
2665            fingerprint: None,
2666            ..TestSource::default()
2667        };
2668        let cfg = VerificationConfig::default_v1();
2669        let report = verify_claims(
2670            &source,
2671            CitationInput::Envelope(CitationEnvelope {
2672                document_fingerprint: Some(
2673                    "sha256:b5d30710d0c25cc38d8dec924ecaf57ae4f81276dd5dc14d75cb3b5b6bde62d3"
2674                        .into(),
2675                ),
2676                claims: vec![claim(
2677                    ClaimKind::Presence,
2678                    None,
2679                    Citation {
2680                        element_id: Some("e000002".into()),
2681                        ..Default::default()
2682                    },
2683                )],
2684            }),
2685            &cfg,
2686            "0".repeat(64),
2687        );
2688
2689        assert!(!report.fingerprint_stale);
2690        assert!(!report.all_evidence_grounded);
2691        assert_eq!(report.checks[0].status, CheckStatus::CapabilityBlocked);
2692        assert_eq!(
2693            report.checks[0].reason,
2694            Some(CheckReason::MissingSourceFingerprint)
2695        );
2696        assert_eq!(
2697            report.capability_limits,
2698            vec![CapabilityLimit::MissingFingerprint]
2699        );
2700        assert!(report.warnings.contains(&WarningCode::CapabilityLimited));
2701        assert!(report.checks[0]
2702            .warnings
2703            .contains(&WarningCode::CapabilityLimited));
2704    }
2705
2706    #[test]
2707    fn missing_text_is_error_for_library_callers() {
2708        let source = TestSource::default();
2709        let report = verify(
2710            &source,
2711            vec![claim(
2712                ClaimKind::Quote,
2713                None,
2714                Citation {
2715                    element_id: Some("e000002".into()),
2716                    ..Default::default()
2717                },
2718            )],
2719        );
2720
2721        assert!(!report.all_evidence_grounded);
2722        assert_eq!(report.checks[0].status, CheckStatus::Error);
2723        assert_eq!(
2724            report.checks[0].reason,
2725            Some(CheckReason::MissingRequiredText)
2726        );
2727        assert_eq!(report.checks[0].match_method, MatchMethod::None);
2728    }
2729
2730    #[test]
2731    fn quote_normalization_is_ascii_whitespace_only() {
2732        assert_eq!(normalize_quote("  a\r\n\t b  "), "a b");
2733        assert_eq!(normalize_quote("a\u{00a0}b"), "a\u{00a0}b");
2734    }
2735
2736    #[test]
2737    fn report_serializes_to_schema_shape() {
2738        let source = TestSource::default();
2739        let report = verify(
2740            &source,
2741            vec![claim(
2742                ClaimKind::Presence,
2743                None,
2744                Citation {
2745                    element_id: Some("e000002".into()),
2746                    ..Default::default()
2747                },
2748            )],
2749        );
2750        let v = serde_json::to_value(&report).unwrap();
2751        assert_eq!(v["grounding"]["parser"]["name"], "test-parser");
2752        assert_eq!(v["fingerprint_stale"], false);
2753        assert_eq!(v["checks"].as_array().unwrap().len(), 1);
2754    }
2755}