Skip to main content

ethos_verify/
lib.rs

1/*
2 * Copyright 2026 The Ethos maintainers
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//! # ethos-verify (Milestone A skeleton → B alpha → D v1)
18//!
19//! Parser-agnostic citation evidence verification. Consumes any parser's output through
20//! [`ethos_core::grounding::GroundingSource`] — Ethos itself is just another grounding
21//! source behind an adapter (PRD §1.5, §5.4).
22//!
23//! **Scope discipline:** verification is evidence grounding — the cited region exists,
24//! its text matches by a declared literal method, the fingerprint is fresh. It is never
25//! pixel-level, semantic, or arithmetic proof (PRD §14).
26//!
27//! The WS-VERIFY check engine intentionally supports only literal quote/value,
28//! presence, and table-cell lookup claims. Unsupported claim kinds remain
29//! explicit; no fuzzy, semantic, arithmetic, crop, OCR, layout, or
30//! parser-internal behavior belongs here.
31
32#![forbid(unsafe_code)]
33#![warn(missing_docs)]
34
35use std::collections::BTreeMap;
36
37use ethos_core::codes::WarningCode;
38use ethos_core::grounding::{
39    CoordinateOrigin, GroundingCell, GroundingElement, GroundingSource, GroundingSpan,
40    GroundingTable, PageGeometry,
41};
42use ethos_core::verify_types::{
43    compute_all_evidence_grounded, CapabilityLimit, Check, CheckReason, CheckStatus, Claim,
44    ClaimKind, Evidence, GroundingMeta, MatchMethod, TextNormalization, VerificationConfig,
45    VerificationReport,
46};
47use serde::{Deserialize, Serialize};
48
49/// Citation input accepted by the alpha verifier.
50///
51/// The public CLI accepts either a bare array of [`Claim`] objects or this envelope
52/// form. `document_fingerprint`, when present, is compared with the grounding
53/// source fingerprint under the active staleness policy.
54#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
55#[serde(untagged)]
56pub enum CitationInput {
57    /// Bare claim list.
58    Claims(Vec<Claim>),
59    /// Claim list with optional fingerprint anchor.
60    Envelope(CitationEnvelope),
61}
62
63/// Envelope form of citation input.
64#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
65#[serde(deny_unknown_fields)]
66pub struct CitationEnvelope {
67    /// Fingerprint the citations were produced against.
68    #[serde(default)]
69    pub document_fingerprint: Option<String>,
70    /// Claims to verify, in deterministic input order.
71    pub claims: Vec<Claim>,
72}
73
74impl CitationInput {
75    /// Claims in deterministic input order.
76    pub fn claims(&self) -> &[Claim] {
77        match self {
78            CitationInput::Claims(claims) => claims,
79            CitationInput::Envelope(envelope) => &envelope.claims,
80        }
81    }
82
83    /// Fingerprint anchor declared by the citation envelope, when present.
84    pub fn document_fingerprint(&self) -> Option<&str> {
85        match self {
86            CitationInput::Claims(_) => None,
87            CitationInput::Envelope(envelope) => envelope.document_fingerprint.as_deref(),
88        }
89    }
90
91    fn into_parts(self) -> (Option<String>, Vec<Claim>) {
92        match self {
93            CitationInput::Claims(claims) => (None, claims),
94            CitationInput::Envelope(envelope) => (envelope.document_fingerprint, envelope.claims),
95        }
96    }
97}
98
99/// Compute the capability-downgrade warnings for a source under a config (PRD §5.5):
100/// every missing capability the run would rely on surfaces as `capability_limited` —
101/// explicitly, never as silent approximation.
102pub fn capability_warnings(
103    source: &dyn GroundingSource,
104    config: &VerificationConfig,
105) -> Vec<WarningCode> {
106    if capability_limits(source, config).is_empty() {
107        Vec::new()
108    } else {
109        vec![WarningCode::CapabilityLimited]
110    }
111}
112
113/// Compute structured capability gaps for the run. These explain the stable
114/// `capability_limited` warning without minting parser-warning codes for every
115/// verification capability.
116pub fn capability_limits(
117    source: &dyn GroundingSource,
118    config: &VerificationConfig,
119) -> Vec<CapabilityLimit> {
120    capability_limits_for(source.capabilities(), config)
121}
122
123fn capability_limits_for(
124    caps: ethos_core::grounding::Capabilities,
125    config: &VerificationConfig,
126) -> Vec<CapabilityLimit> {
127    let mut limits = Vec::new();
128    if !caps.fingerprint && config.staleness.require_fingerprint_match {
129        limits.push(CapabilityLimit::MissingFingerprint);
130    }
131    if !caps.spans {
132        limits.push(CapabilityLimit::MissingSpans);
133    }
134    if !caps.char_offsets {
135        limits.push(CapabilityLimit::MissingCharOffsets);
136    }
137    if !caps.tables && config.claim_kinds.contains(&ClaimKind::TableCell) {
138        limits.push(CapabilityLimit::MissingTables);
139    }
140    if caps.coordinate_origin == CoordinateOrigin::Unknown {
141        limits.push(CapabilityLimit::UnknownCoordinateOrigin);
142    }
143    if config.evidence.is_some_and(|e| e.include_crops) && !caps.crop_support {
144        limits.push(CapabilityLimit::MissingCropSupport);
145    }
146    limits
147}
148
149fn push_warning(warnings: &mut Vec<WarningCode>, warning: WarningCode) {
150    if !warnings.contains(&warning) {
151        warnings.push(warning);
152    }
153}
154
155/// Verify citation claims over a parser-agnostic [`GroundingSource`].
156pub fn verify_claims(
157    source: &dyn GroundingSource,
158    citations: CitationInput,
159    config: &VerificationConfig,
160    config_sha256: String,
161) -> VerificationReport {
162    let (citation_fingerprint, claims) = citations.into_parts();
163    let index = SourceIndex::new(source);
164    let source_fingerprint = source.fingerprint();
165    let capability_limits = capability_limits_for(index.capabilities, config);
166    let warnings = if capability_limits.is_empty() {
167        Vec::new()
168    } else {
169        vec![WarningCode::CapabilityLimited]
170    };
171    let fingerprint_stale = config.staleness.require_fingerprint_match
172        && matches!(
173            (citation_fingerprint.as_deref(), source_fingerprint.as_deref()),
174            (Some(expected), Some(actual)) if expected != actual
175        );
176    let fingerprint_unverifiable = config.staleness.require_fingerprint_match
177        && citation_fingerprint.is_some()
178        && source_fingerprint.is_none();
179    let citation_fingerprint_missing = config.staleness.require_fingerprint_match
180        && citation_fingerprint.is_none()
181        && source_fingerprint.is_some();
182    let include_text = config.evidence.is_some_and(|e| e.include_text);
183    let include_crops = config.evidence.is_some_and(|e| e.include_crops);
184    let mut unsupported = Vec::new();
185    let checks: Vec<Check> = claims
186        .into_iter()
187        .enumerate()
188        .map(|(idx, claim)| {
189            check_claim(
190                idx + 1,
191                source,
192                &index,
193                claim,
194                config,
195                CheckContext {
196                    fingerprint_stale,
197                    fingerprint_unverifiable,
198                    citation_fingerprint_missing,
199                    include_text,
200                    include_crops,
201                },
202                &mut unsupported,
203            )
204        })
205        .collect();
206
207    VerificationReport {
208        schema_version: ethos_core::SCHEMA_VERSION.to_string(),
209        document_fingerprint: source_fingerprint,
210        verification_config_sha256: config_sha256,
211        grounding: GroundingMeta {
212            parser: source.parser(),
213            capabilities: index.capabilities,
214        },
215        capability_limits,
216        fingerprint_stale,
217        all_evidence_grounded: compute_all_evidence_grounded(
218            &checks,
219            &unsupported,
220            fingerprint_stale,
221        ),
222        checks,
223        unsupported_claim_kinds: unsupported,
224        warnings,
225    }
226}
227
228#[derive(Debug, Clone, Copy)]
229struct CheckContext {
230    fingerprint_stale: bool,
231    fingerprint_unverifiable: bool,
232    citation_fingerprint_missing: bool,
233    include_text: bool,
234    include_crops: bool,
235}
236
237fn check_claim(
238    id: usize,
239    source: &dyn GroundingSource,
240    index: &SourceIndex,
241    claim: Claim,
242    config: &VerificationConfig,
243    context: CheckContext,
244    unsupported: &mut Vec<String>,
245) -> Check {
246    let mut warnings = Vec::new();
247    let check_id = format!("v{id:04}");
248
249    if !claim.citation.has_locator() {
250        return Check {
251            id: check_id,
252            claim,
253            status: CheckStatus::Error,
254            reason: Some(CheckReason::MissingLocator),
255            match_method: MatchMethod::None,
256            semantic_unverified: false,
257            evidence: None,
258            warnings,
259        };
260    }
261
262    if !is_supported_kind(claim.kind) || !config.claim_kinds.contains(&claim.kind) {
263        push_unsupported(unsupported, claim.kind);
264        return Check {
265            id: check_id,
266            claim,
267            status: CheckStatus::UnsupportedClaimKind,
268            reason: Some(CheckReason::UnsupportedClaimKind),
269            match_method: MatchMethod::None,
270            semantic_unverified: false,
271            evidence: None,
272            warnings,
273        };
274    }
275
276    if requires_text(claim.kind)
277        && claim
278            .text
279            .as_deref()
280            .is_none_or(|text| text.trim().is_empty())
281    {
282        return Check {
283            id: check_id,
284            claim,
285            status: CheckStatus::Error,
286            reason: Some(CheckReason::MissingRequiredText),
287            match_method: MatchMethod::None,
288            semantic_unverified: false,
289            evidence: None,
290            warnings,
291        };
292    }
293
294    if context.fingerprint_stale {
295        return Check {
296            id: check_id,
297            claim,
298            status: CheckStatus::Stale,
299            reason: Some(CheckReason::StaleFingerprint),
300            match_method: MatchMethod::None,
301            semantic_unverified: false,
302            evidence: None,
303            warnings,
304        };
305    }
306
307    if context.fingerprint_unverifiable {
308        push_warning(&mut warnings, WarningCode::CapabilityLimited);
309        return Check {
310            id: check_id,
311            claim,
312            status: CheckStatus::CapabilityBlocked,
313            reason: Some(CheckReason::MissingSourceFingerprint),
314            match_method: MatchMethod::None,
315            semantic_unverified: false,
316            evidence: None,
317            warnings,
318        };
319    }
320
321    if context.citation_fingerprint_missing {
322        return Check {
323            id: check_id,
324            claim,
325            status: CheckStatus::Stale,
326            reason: Some(CheckReason::MissingCitationFingerprint),
327            match_method: MatchMethod::None,
328            semantic_unverified: false,
329            evidence: None,
330            warnings,
331        };
332    }
333
334    let mut target = match resolve_target(index, &claim, config) {
335        TargetResolution::Found(target) => target,
336        TargetResolution::NotFound(reason) => {
337            return Check {
338                id: check_id,
339                claim,
340                status: CheckStatus::NotFound,
341                reason: Some(reason),
342                match_method: MatchMethod::None,
343                semantic_unverified: false,
344                evidence: None,
345                warnings,
346            };
347        }
348        TargetResolution::Invalid(reason) => {
349            return Check {
350                id: check_id,
351                claim,
352                status: CheckStatus::Error,
353                reason: Some(reason),
354                match_method: MatchMethod::None,
355                semantic_unverified: false,
356                evidence: None,
357                warnings,
358            };
359        }
360        TargetResolution::CapabilityBlocked(reason) => {
361            push_warning(&mut warnings, WarningCode::CapabilityLimited);
362            return Check {
363                id: check_id,
364                claim,
365                status: CheckStatus::CapabilityBlocked,
366                reason: Some(reason),
367                match_method: MatchMethod::None,
368                semantic_unverified: false,
369                evidence: None,
370                warnings,
371            };
372        }
373    };
374
375    if let Some(adjacent_target) = adjacent_quote_target(index, &claim, &target, config) {
376        target = adjacent_target;
377    }
378
379    let evidence = make_evidence(source, &target, context.include_text, context.include_crops);
380    let (status, match_method, reason) =
381        check_resolved_claim(claim.kind, claim.text.as_deref(), &target, config);
382    Check {
383        id: check_id,
384        claim,
385        status,
386        reason,
387        match_method,
388        semantic_unverified: false,
389        evidence,
390        warnings,
391    }
392}
393
394fn check_resolved_claim(
395    kind: ClaimKind,
396    expected_text: Option<&str>,
397    target: &FoundTarget,
398    config: &VerificationConfig,
399) -> (CheckStatus, MatchMethod, Option<CheckReason>) {
400    match kind {
401        ClaimKind::Presence => check_presence_claim(),
402        ClaimKind::Quote | ClaimKind::Value | ClaimKind::TableCell => {
403            check_text_claim(kind, expected_text, target, config)
404        }
405        _ => unreachable!("unsupported kinds returned before matching"),
406    }
407}
408
409fn check_presence_claim() -> (CheckStatus, MatchMethod, Option<CheckReason>) {
410    (CheckStatus::Grounded, MatchMethod::PresenceOnly, None)
411}
412
413fn check_text_claim(
414    kind: ClaimKind,
415    expected_text: Option<&str>,
416    target: &FoundTarget,
417    config: &VerificationConfig,
418) -> (CheckStatus, MatchMethod, Option<CheckReason>) {
419    let match_method = if target.from_table_cell {
420        MatchMethod::TableCellLookup
421    } else {
422        text_match_method(kind, config)
423    };
424    let (status, reason) = match (expected_text, target.text.as_deref()) {
425        (Some(expected), Some(actual)) if text_matches(kind, expected, actual, config) => {
426            (CheckStatus::Grounded, None)
427        }
428        _ => (CheckStatus::Mismatch, Some(CheckReason::TextMismatch)),
429    };
430    (status, match_method, reason)
431}
432
433fn is_supported_kind(kind: ClaimKind) -> bool {
434    matches!(
435        kind,
436        ClaimKind::Quote | ClaimKind::Value | ClaimKind::Presence | ClaimKind::TableCell
437    )
438}
439
440fn requires_text(kind: ClaimKind) -> bool {
441    matches!(
442        kind,
443        ClaimKind::Quote | ClaimKind::Value | ClaimKind::TableCell
444    )
445}
446
447fn push_unsupported(unsupported: &mut Vec<String>, kind: ClaimKind) {
448    let name = claim_kind_name(kind).to_string();
449    if !unsupported.contains(&name) {
450        unsupported.push(name);
451    }
452}
453
454fn claim_kind_name(kind: ClaimKind) -> &'static str {
455    match kind {
456        ClaimKind::Quote => "quote",
457        ClaimKind::Value => "value",
458        ClaimKind::Presence => "presence",
459        ClaimKind::TableCell => "table_cell",
460        ClaimKind::Region => "region",
461        ClaimKind::Other => "other",
462    }
463}
464
465#[derive(Debug, Clone)]
466struct FoundTarget {
467    page: Option<String>,
468    bbox: Option<[i64; 4]>,
469    text: Option<String>,
470    from_table_cell: bool,
471    element_index: Option<usize>,
472}
473
474/// Per-run grounding snapshot used to avoid cloning full entity collections per claim.
475///
476/// The lookup maps intentionally preserve first-match-by-id behavior, matching the trait default
477/// and current native/ODL adapters. If an adapter gives `element_by_id` different semantics, update
478/// this index at the same time so verifier resolution does not silently diverge.
479struct SourceIndex {
480    capabilities: ethos_core::grounding::Capabilities,
481    pages: Vec<PageGeometry>,
482    elements: Vec<GroundingElement>,
483    spans: Vec<GroundingSpan>,
484    tables: Vec<GroundingTable>,
485    element_by_id: BTreeMap<String, usize>,
486    span_by_id: BTreeMap<String, usize>,
487    table_by_id: BTreeMap<String, usize>,
488}
489
490impl SourceIndex {
491    fn new(source: &dyn GroundingSource) -> Self {
492        let capabilities = source.capabilities();
493        let pages = source.pages();
494        let elements = source.elements();
495        let spans = if capabilities.spans {
496            source.spans()
497        } else {
498            Vec::new()
499        };
500        let tables = if capabilities.tables {
501            source.tables()
502        } else {
503            Vec::new()
504        };
505        let element_by_id = index_elements(&elements);
506        let span_by_id = index_spans(&spans);
507        let table_by_id = index_tables(&tables);
508
509        SourceIndex {
510            capabilities,
511            pages,
512            elements,
513            spans,
514            tables,
515            element_by_id,
516            span_by_id,
517            table_by_id,
518        }
519    }
520
521    fn span(&self, id: &str) -> Option<&GroundingSpan> {
522        self.span_by_id
523            .get(id)
524            .and_then(|index| self.spans.get(*index))
525    }
526
527    fn table(&self, id: &str) -> Option<&GroundingTable> {
528        self.table_by_id
529            .get(id)
530            .and_then(|index| self.tables.get(*index))
531    }
532}
533
534fn index_elements(elements: &[GroundingElement]) -> BTreeMap<String, usize> {
535    let mut index = BTreeMap::new();
536    for (position, element) in elements.iter().enumerate() {
537        index.entry(element.id.clone()).or_insert(position);
538    }
539    index
540}
541
542fn index_spans(spans: &[GroundingSpan]) -> BTreeMap<String, usize> {
543    let mut index = BTreeMap::new();
544    for (position, span) in spans.iter().enumerate() {
545        index.entry(span.id.clone()).or_insert(position);
546    }
547    index
548}
549
550fn index_tables(tables: &[GroundingTable]) -> BTreeMap<String, usize> {
551    let mut index = BTreeMap::new();
552    for (position, table) in tables.iter().enumerate() {
553        index.entry(table.id.clone()).or_insert(position);
554    }
555    index
556}
557
558enum TargetResolution {
559    Found(FoundTarget),
560    NotFound(CheckReason),
561    Invalid(CheckReason),
562    CapabilityBlocked(CheckReason),
563}
564
565fn resolve_target(
566    index: &SourceIndex,
567    claim: &Claim,
568    config: &VerificationConfig,
569) -> TargetResolution {
570    if claim.kind == ClaimKind::TableCell
571        || claim.citation.table_id.is_some()
572        || claim.citation.cell.is_some()
573    {
574        return resolve_table_cell(index, claim);
575    }
576
577    if let Some(span_id) = claim.citation.span_id.as_deref() {
578        if !index.capabilities.spans {
579            return TargetResolution::CapabilityBlocked(CheckReason::MissingSpanCapability);
580        }
581        return index
582            .span(span_id)
583            .map(target_from_span)
584            .map(TargetResolution::Found)
585            .unwrap_or(TargetResolution::NotFound(CheckReason::SpanNotFound));
586    }
587
588    if let Some(element_id) = claim.citation.element_id.as_deref() {
589        return index
590            .element_by_id
591            .get(element_id)
592            .and_then(|position| {
593                index
594                    .elements
595                    .get(*position)
596                    .map(|element| (*position, element))
597            })
598            .map(|(position, element)| target_from_element(element, Some(position)))
599            .map(TargetResolution::Found)
600            .unwrap_or(TargetResolution::NotFound(CheckReason::ElementNotFound));
601    }
602
603    if let (Some(page), Some(bbox)) = (claim.citation.page.as_deref(), claim.citation.bbox) {
604        if index.capabilities.coordinate_origin == CoordinateOrigin::Unknown {
605            return TargetResolution::CapabilityBlocked(CheckReason::UnknownCoordinateOrigin);
606        }
607        let tolerance = config.matching.bbox_containment_tolerance_q.unwrap_or(0);
608        return index
609            .elements
610            .iter()
611            .enumerate()
612            .filter(|(_, element)| {
613                element.page == page && contains_bbox(element.bbox, bbox, tolerance)
614            })
615            .min_by_key(|(position, element)| (bbox_area(element.bbox), *position))
616            .map(|(position, element)| target_from_element(element, Some(position)))
617            .map(TargetResolution::Found)
618            .unwrap_or(TargetResolution::NotFound(CheckReason::BboxNotFound));
619    }
620
621    if claim.citation.bbox.is_some() {
622        return TargetResolution::Invalid(CheckReason::MissingPageForBbox);
623    }
624
625    if let Some(page) = claim.citation.page.as_deref() {
626        return index
627            .pages
628            .iter()
629            .find(|candidate| candidate.id == page)
630            .map(|found| {
631                TargetResolution::Found(FoundTarget {
632                    page: Some(found.id.clone()),
633                    bbox: Some([0, 0, found.width, found.height]),
634                    text: None,
635                    from_table_cell: false,
636                    element_index: None,
637                })
638            })
639            .unwrap_or(TargetResolution::NotFound(CheckReason::PageNotFound));
640    }
641
642    TargetResolution::NotFound(CheckReason::MissingLocator)
643}
644
645fn target_from_element(element: &GroundingElement, element_index: Option<usize>) -> FoundTarget {
646    FoundTarget {
647        page: Some(element.page.clone()),
648        bbox: Some(element.bbox),
649        text: element.text.clone(),
650        from_table_cell: false,
651        element_index,
652    }
653}
654
655fn target_from_span(span: &GroundingSpan) -> FoundTarget {
656    FoundTarget {
657        page: Some(span.page.clone()),
658        bbox: Some(span.bbox),
659        text: Some(span.text.clone()),
660        from_table_cell: false,
661        element_index: None,
662    }
663}
664
665fn resolve_table_cell(index: &SourceIndex, claim: &Claim) -> TargetResolution {
666    let Some(table_id) = claim.citation.table_id.as_deref() else {
667        return TargetResolution::Invalid(CheckReason::MissingTableCellLocator);
668    };
669    let Some(cell_ref) = claim.citation.cell else {
670        return TargetResolution::Invalid(CheckReason::MissingTableCellLocator);
671    };
672    if !index.capabilities.tables {
673        return TargetResolution::CapabilityBlocked(CheckReason::MissingTableCapability);
674    }
675    let Some(table) = index.table(table_id) else {
676        return TargetResolution::NotFound(CheckReason::TableNotFound);
677    };
678    target_from_table_cell(table, cell_ref.row, cell_ref.col)
679        .map(TargetResolution::Found)
680        .unwrap_or(TargetResolution::NotFound(CheckReason::TableCellNotFound))
681}
682
683fn target_from_table_cell(table: &GroundingTable, row: u32, col: u32) -> Option<FoundTarget> {
684    table
685        .cells
686        .iter()
687        .find(|cell| table_cell_covers(cell, row, col))
688        .map(|cell| target_from_cell(&table.page, cell))
689}
690
691fn table_cell_covers(cell: &GroundingCell, row: u32, col: u32) -> bool {
692    let row_end = cell.row.saturating_add(cell.row_span.max(1));
693    let col_end = cell.col.saturating_add(cell.col_span.max(1));
694    row >= cell.row && row < row_end && col >= cell.col && col < col_end
695}
696
697fn target_from_cell(page: &str, cell: &GroundingCell) -> FoundTarget {
698    FoundTarget {
699        page: Some(page.to_string()),
700        bbox: Some(cell.bbox),
701        text: Some(cell.text.clone()),
702        from_table_cell: true,
703        element_index: None,
704    }
705}
706
707fn adjacent_quote_target(
708    index: &SourceIndex,
709    claim: &Claim,
710    target: &FoundTarget,
711    config: &VerificationConfig,
712) -> Option<FoundTarget> {
713    if claim.kind != ClaimKind::Quote {
714        return None;
715    }
716    let expected = claim.text.as_deref()?;
717    if target
718        .text
719        .as_deref()
720        .is_some_and(|actual| text_matches(ClaimKind::Quote, expected, actual, config))
721    {
722        return None;
723    }
724
725    if claim.citation.bbox.is_some() {
726        return None;
727    }
728
729    if claim.citation.element_id.is_some() {
730        if let Some(position) = target.element_index {
731            return adjacent_text_pair_for_element(index, position, expected, config);
732        }
733    }
734
735    None
736}
737
738fn adjacent_text_pair_for_element(
739    index: &SourceIndex,
740    position: usize,
741    expected: &str,
742    config: &VerificationConfig,
743) -> Option<FoundTarget> {
744    let current = index.elements.get(position)?;
745    if let Some(second) = position
746        .checked_add(1)
747        .and_then(|next| index.elements.get(next))
748    {
749        if let Some(target) = adjacent_text_pair_target(current, second, expected, config) {
750            return Some(target);
751        }
752    }
753    position
754        .checked_sub(1)
755        .and_then(|previous| index.elements.get(previous))
756        .and_then(|first| adjacent_text_pair_target(first, current, expected, config))
757}
758
759fn adjacent_text_pair_target(
760    first: &GroundingElement,
761    second: &GroundingElement,
762    expected: &str,
763    config: &VerificationConfig,
764) -> Option<FoundTarget> {
765    if first.page != second.page {
766        return None;
767    }
768    if !element_bboxes_are_adjacent(first.bbox, second.bbox) {
769        return None;
770    }
771    let first_text = first.text.as_deref()?;
772    let second_text = second.text.as_deref()?;
773    let joined = join_adjacent_text(first_text, second_text, config);
774    if text_matches(ClaimKind::Quote, expected, first_text, config)
775        || text_matches(ClaimKind::Quote, expected, second_text, config)
776        || !text_matches(ClaimKind::Quote, expected, &joined, config)
777    {
778        return None;
779    }
780
781    Some(FoundTarget {
782        page: Some(first.page.clone()),
783        bbox: Some(union_bbox(first.bbox, second.bbox)),
784        text: Some(joined),
785        from_table_cell: false,
786        element_index: None,
787    })
788}
789
790fn join_adjacent_text(first: &str, second: &str, config: &VerificationConfig) -> String {
791    let joined = format!("{first} {second}");
792    match config.matching.text_normalization {
793        TextNormalization::None => joined,
794        TextNormalization::CollapseWhitespace => normalize_quote(&joined),
795    }
796}
797
798fn bbox_area(bbox: [i64; 4]) -> u128 {
799    let width = bbox[2].saturating_sub(bbox[0]).max(0) as u128;
800    let height = bbox[3].saturating_sub(bbox[1]).max(0) as u128;
801    width.saturating_mul(height)
802}
803
804fn element_bboxes_are_adjacent(first: [i64; 4], second: [i64; 4]) -> bool {
805    let same_line =
806        ranges_overlap_i64(first[1], first[3], second[1], second[3]) && first[2] == second[0];
807    let stacked =
808        ranges_overlap_i64(first[0], first[2], second[0], second[2]) && first[3] == second[1];
809    same_line || stacked
810}
811
812fn ranges_overlap_i64(a_start: i64, a_end: i64, b_start: i64, b_end: i64) -> bool {
813    a_start < b_end && b_start < a_end
814}
815
816fn union_bbox(left: [i64; 4], right: [i64; 4]) -> [i64; 4] {
817    [
818        left[0].min(right[0]),
819        left[1].min(right[1]),
820        left[2].max(right[2]),
821        left[3].max(right[3]),
822    ]
823}
824
825fn make_evidence(
826    source: &dyn GroundingSource,
827    target: &FoundTarget,
828    include_text: bool,
829    include_crops: bool,
830) -> Option<Evidence> {
831    let crop_ref = if include_crops && source.capabilities().crop_support {
832        target
833            .page
834            .as_deref()
835            .zip(target.bbox)
836            .and_then(|(page, bbox)| source.crop_ref(page, bbox))
837    } else {
838        None
839    };
840    Some(Evidence {
841        text: include_text.then(|| target.text.clone()).flatten(),
842        page: target.page.clone(),
843        bbox: target.bbox,
844        crop_ref,
845    })
846}
847
848fn contains_bbox(container: [i64; 4], inner: [i64; 4], tolerance: i64) -> bool {
849    inner[0] >= container[0] - tolerance
850        && inner[1] >= container[1] - tolerance
851        && inner[2] <= container[2] + tolerance
852        && inner[3] <= container[3] + tolerance
853}
854
855fn text_match_method(kind: ClaimKind, config: &VerificationConfig) -> MatchMethod {
856    match (kind, config.matching.text_normalization) {
857        (ClaimKind::Quote, TextNormalization::None) => MatchMethod::ExactTextContains,
858        (ClaimKind::Quote, TextNormalization::CollapseWhitespace) => {
859            MatchMethod::NormalizedTextContains
860        }
861        (_, TextNormalization::None) => MatchMethod::ExactText,
862        (_, TextNormalization::CollapseWhitespace) => MatchMethod::NormalizedText,
863    }
864}
865
866fn text_matches(
867    kind: ClaimKind,
868    expected: &str,
869    actual: &str,
870    config: &VerificationConfig,
871) -> bool {
872    let (mut expected, mut actual) = match config.matching.text_normalization {
873        TextNormalization::None => (expected.to_string(), actual.to_string()),
874        TextNormalization::CollapseWhitespace => {
875            (normalize_quote(expected), normalize_quote(actual))
876        }
877    };
878    if !config.matching.case_sensitive {
879        expected = expected.to_lowercase();
880        actual = actual.to_lowercase();
881    }
882    if kind == ClaimKind::Quote {
883        actual.contains(&expected)
884    } else {
885        actual == expected
886    }
887}
888
889/// Normalize a quote for literal matching: normalize line endings, collapse ASCII
890/// whitespace runs to one ASCII space, then trim.
891pub fn normalize_quote(input: &str) -> String {
892    let line_normalized = input.replace("\r\n", "\n").replace('\r', "\n");
893    let mut out = String::with_capacity(line_normalized.len());
894    let mut in_ascii_ws = false;
895    for ch in line_normalized.chars() {
896        if ch.is_ascii_whitespace() {
897            if !in_ascii_ws {
898                out.push(' ');
899                in_ascii_ws = true;
900            }
901        } else {
902            out.push(ch);
903            in_ascii_ws = false;
904        }
905    }
906    out.trim().to_string()
907}
908
909#[cfg(test)]
910mod tests {
911    use super::*;
912    use ethos_core::grounding::{
913        Capabilities, GroundingCell, GroundingElement, GroundingSpan, GroundingTable, PageGeometry,
914        ParserIdentity,
915    };
916    use ethos_core::verify_types::{CapabilityLimit, CellRef, Citation, Claim};
917
918    #[derive(Clone)]
919    struct TestSource {
920        caps: Capabilities,
921        fingerprint: Option<String>,
922        crop_ref: Option<String>,
923    }
924
925    impl Default for TestSource {
926        fn default() -> Self {
927            Self {
928                caps: Capabilities {
929                    spans: true,
930                    char_offsets: true,
931                    tables: true,
932                    fingerprint: true,
933                    coordinate_origin: CoordinateOrigin::TopLeft,
934                    crop_support: false,
935                },
936                fingerprint: Some(
937                    "sha256:b5d30710d0c25cc38d8dec924ecaf57ae4f81276dd5dc14d75cb3b5b6bde62d3"
938                        .into(),
939                ),
940                crop_ref: None,
941            }
942        }
943    }
944
945    impl GroundingSource for TestSource {
946        fn parser(&self) -> ParserIdentity {
947            ParserIdentity {
948                name: "test-parser".into(),
949                version: "0.1.0".into(),
950                adapter: None,
951                adapter_version: None,
952            }
953        }
954        fn capabilities(&self) -> Capabilities {
955            self.caps
956        }
957        fn fingerprint(&self) -> Option<String> {
958            self.fingerprint.clone()
959        }
960        fn pages(&self) -> Vec<PageGeometry> {
961            vec![PageGeometry {
962                id: "p0001".into(),
963                index: 1,
964                width: 61200,
965                height: 79200,
966                rotation: 0,
967            }]
968        }
969        fn elements(&self) -> Vec<GroundingElement> {
970            vec![
971                GroundingElement {
972                    id: "e000002".into(),
973                    page: "p0001".into(),
974                    bbox: [7200, 10100, 54000, 11500],
975                    kind: "text_block".into(),
976                    text: Some(
977                        "Revenue grew to $12.4M in Q3 2025, driven by enterprise expansion.".into(),
978                    ),
979                },
980                GroundingElement {
981                    id: "e000003".into(),
982                    page: "p0001".into(),
983                    bbox: [7200, 13000, 54000, 20000],
984                    kind: "table".into(),
985                    text: None,
986                },
987            ]
988        }
989        fn spans(&self) -> Vec<GroundingSpan> {
990            vec![GroundingSpan {
991                id: "s000002".into(),
992                page: "p0001".into(),
993                bbox: [7200, 10100, 54000, 11500],
994                text: "Revenue grew to $12.4M in Q3 2025".into(),
995                element: Some("e000002".into()),
996                char_start: Some(0),
997                char_end: Some(34),
998            }]
999        }
1000        fn tables(&self) -> Vec<GroundingTable> {
1001            vec![GroundingTable {
1002                id: "t0001".into(),
1003                page: "p0001".into(),
1004                bbox: [7200, 13000, 54000, 20000],
1005                cells: vec![
1006                    GroundingCell {
1007                        row: 0,
1008                        col: 0,
1009                        row_span: 1,
1010                        col_span: 1,
1011                        bbox: [7200, 13000, 30600, 16500],
1012                        text: "Metric".into(),
1013                    },
1014                    GroundingCell {
1015                        row: 1,
1016                        col: 1,
1017                        row_span: 1,
1018                        col_span: 1,
1019                        bbox: [30600, 16500, 54000, 20000],
1020                        text: "$12.4M".into(),
1021                    },
1022                ],
1023            }]
1024        }
1025        fn crop_ref(&self, page: &str, bbox: [i64; 4]) -> Option<String> {
1026            if page == "p0001" && bbox == [7200, 10100, 54000, 11500] {
1027                self.crop_ref.clone()
1028            } else {
1029                None
1030            }
1031        }
1032    }
1033
1034    struct ElementSource {
1035        elements: Vec<GroundingElement>,
1036    }
1037
1038    impl GroundingSource for ElementSource {
1039        fn parser(&self) -> ParserIdentity {
1040            ParserIdentity {
1041                name: "element-test-parser".into(),
1042                version: "0.1.0".into(),
1043                adapter: None,
1044                adapter_version: None,
1045            }
1046        }
1047        fn capabilities(&self) -> Capabilities {
1048            Capabilities {
1049                spans: true,
1050                char_offsets: true,
1051                tables: true,
1052                fingerprint: true,
1053                coordinate_origin: CoordinateOrigin::TopLeft,
1054                crop_support: false,
1055            }
1056        }
1057        fn fingerprint(&self) -> Option<String> {
1058            Some("sha256:b5d30710d0c25cc38d8dec924ecaf57ae4f81276dd5dc14d75cb3b5b6bde62d3".into())
1059        }
1060        fn pages(&self) -> Vec<PageGeometry> {
1061            vec![
1062                PageGeometry {
1063                    id: "p0001".into(),
1064                    index: 1,
1065                    width: 61200,
1066                    height: 79200,
1067                    rotation: 0,
1068                },
1069                PageGeometry {
1070                    id: "p0002".into(),
1071                    index: 2,
1072                    width: 61200,
1073                    height: 79200,
1074                    rotation: 0,
1075                },
1076            ]
1077        }
1078        fn elements(&self) -> Vec<GroundingElement> {
1079            self.elements.clone()
1080        }
1081        fn spans(&self) -> Vec<GroundingSpan> {
1082            Vec::new()
1083        }
1084        fn tables(&self) -> Vec<GroundingTable> {
1085            Vec::new()
1086        }
1087    }
1088
1089    fn claim(kind: ClaimKind, text: Option<&str>, citation: Citation) -> Claim {
1090        Claim {
1091            kind,
1092            text: text.map(str::to_string),
1093            citation,
1094        }
1095    }
1096
1097    fn input(source: &TestSource, claims: Vec<Claim>) -> CitationInput {
1098        CitationInput::Envelope(CitationEnvelope {
1099            document_fingerprint: source.fingerprint(),
1100            claims,
1101        })
1102    }
1103
1104    fn verify(source: &TestSource, claims: Vec<Claim>) -> VerificationReport {
1105        let cfg = VerificationConfig::default_v1();
1106        verify_claims(source, input(source, claims), &cfg, "0".repeat(64))
1107    }
1108
1109    fn verify_with_config(
1110        source: &TestSource,
1111        claims: Vec<Claim>,
1112        cfg: &VerificationConfig,
1113    ) -> VerificationReport {
1114        verify_claims(source, input(source, claims), cfg, "0".repeat(64))
1115    }
1116
1117    fn element(id: &str, page: &str, bbox: [i64; 4], text: Option<&str>) -> GroundingElement {
1118        GroundingElement {
1119            id: id.into(),
1120            page: page.into(),
1121            bbox,
1122            kind: "text_block".into(),
1123            text: text.map(str::to_string),
1124        }
1125    }
1126
1127    fn verify_elements(elements: Vec<GroundingElement>, claims: Vec<Claim>) -> VerificationReport {
1128        let source = ElementSource { elements };
1129        let cfg = VerificationConfig::default_v1();
1130        let citations = CitationInput::Envelope(CitationEnvelope {
1131            document_fingerprint: source.fingerprint(),
1132            claims,
1133        });
1134        verify_claims(&source, citations, &cfg, "0".repeat(64))
1135    }
1136
1137    #[test]
1138    fn quote_and_presence_claims_ground_with_literal_matching() {
1139        let source = TestSource::default();
1140        let report = verify(
1141            &source,
1142            vec![
1143                claim(
1144                    ClaimKind::Quote,
1145                    Some("Revenue grew to $12.4M in Q3 2025"),
1146                    Citation {
1147                        element_id: Some("e000002".into()),
1148                        ..Default::default()
1149                    },
1150                ),
1151                claim(
1152                    ClaimKind::Presence,
1153                    None,
1154                    Citation {
1155                        span_id: Some("s000002".into()),
1156                        ..Default::default()
1157                    },
1158                ),
1159            ],
1160        );
1161
1162        assert!(report.all_evidence_grounded);
1163        assert_eq!(report.checks.len(), 2);
1164        assert_eq!(report.capability_limits, Vec::<CapabilityLimit>::new());
1165        assert_eq!(report.checks[0].status, CheckStatus::Grounded);
1166        assert_eq!(
1167            report.checks[0].match_method,
1168            MatchMethod::NormalizedTextContains
1169        );
1170        assert_eq!(report.checks[1].status, CheckStatus::Grounded);
1171        assert_eq!(report.checks[1].match_method, MatchMethod::PresenceOnly);
1172        assert_eq!(
1173            report.checks[0]
1174                .evidence
1175                .as_ref()
1176                .and_then(|e| e.text.as_deref()),
1177            Some("Revenue grew to $12.4M in Q3 2025, driven by enterprise expansion.")
1178        );
1179        assert_eq!(report.warnings, Vec::<WarningCode>::new());
1180    }
1181
1182    #[test]
1183    fn quote_claim_grounds_across_adjacent_element_text_fragments() {
1184        let report = verify_elements(
1185            vec![
1186                element(
1187                    "split-a",
1188                    "p0001",
1189                    [100, 100, 400, 200],
1190                    Some("The alpha trust loop verifies "),
1191                ),
1192                element(
1193                    "split-b",
1194                    "p0001",
1195                    [400, 100, 700, 200],
1196                    Some("grounded evidence"),
1197                ),
1198            ],
1199            vec![claim(
1200                ClaimKind::Quote,
1201                Some("The alpha trust loop verifies grounded evidence"),
1202                Citation {
1203                    element_id: Some("split-a".into()),
1204                    ..Default::default()
1205                },
1206            )],
1207        );
1208
1209        assert!(report.all_evidence_grounded);
1210        assert_eq!(report.checks[0].status, CheckStatus::Grounded);
1211        assert_eq!(
1212            report.checks[0].match_method,
1213            MatchMethod::NormalizedTextContains
1214        );
1215        assert_eq!(
1216            report.checks[0]
1217                .evidence
1218                .as_ref()
1219                .and_then(|e| e.text.as_deref()),
1220            Some("The alpha trust loop verifies grounded evidence")
1221        );
1222        assert_eq!(
1223            report.checks[0].evidence.as_ref().and_then(|e| e.bbox),
1224            Some([100, 100, 700, 200])
1225        );
1226    }
1227
1228    #[test]
1229    fn quote_claim_page_only_locator_does_not_search_adjacent_fragments() {
1230        let report = verify_elements(
1231            vec![
1232                element(
1233                    "split-a",
1234                    "p0001",
1235                    [100, 100, 400, 200],
1236                    Some("The alpha trust loop verifies "),
1237                ),
1238                element(
1239                    "split-b",
1240                    "p0001",
1241                    [400, 100, 700, 200],
1242                    Some("grounded evidence"),
1243                ),
1244            ],
1245            vec![claim(
1246                ClaimKind::Quote,
1247                Some("The alpha trust loop verifies grounded evidence"),
1248                Citation {
1249                    page: Some("p0001".into()),
1250                    ..Default::default()
1251                },
1252            )],
1253        );
1254
1255        assert!(!report.all_evidence_grounded);
1256        assert_eq!(report.checks[0].status, CheckStatus::Mismatch);
1257        assert_eq!(report.checks[0].reason, Some(CheckReason::TextMismatch));
1258    }
1259
1260    #[test]
1261    fn quote_claim_grounds_when_element_id_points_to_second_adjacent_fragment() {
1262        let report = verify_elements(
1263            vec![
1264                element(
1265                    "split-a",
1266                    "p0001",
1267                    [100, 100, 400, 200],
1268                    Some("The alpha trust loop verifies "),
1269                ),
1270                element(
1271                    "split-b",
1272                    "p0001",
1273                    [400, 100, 700, 200],
1274                    Some("grounded evidence"),
1275                ),
1276            ],
1277            vec![claim(
1278                ClaimKind::Quote,
1279                Some("The alpha trust loop verifies grounded evidence"),
1280                Citation {
1281                    element_id: Some("split-b".into()),
1282                    ..Default::default()
1283                },
1284            )],
1285        );
1286
1287        assert!(report.all_evidence_grounded);
1288        assert_eq!(report.checks[0].status, CheckStatus::Grounded);
1289        assert_eq!(
1290            report.checks[0]
1291                .evidence
1292                .as_ref()
1293                .and_then(|e| e.text.as_deref()),
1294            Some("The alpha trust loop verifies grounded evidence")
1295        );
1296        assert_eq!(
1297            report.checks[0].evidence.as_ref().and_then(|e| e.bbox),
1298            Some([100, 100, 700, 200])
1299        );
1300    }
1301
1302    #[test]
1303    fn quote_claim_does_not_stitch_non_touching_element_bboxes() {
1304        let report = verify_elements(
1305            vec![
1306                element(
1307                    "split-a",
1308                    "p0001",
1309                    [100, 100, 390, 200],
1310                    Some("The alpha trust loop verifies "),
1311                ),
1312                element(
1313                    "split-b",
1314                    "p0001",
1315                    [400, 100, 700, 200],
1316                    Some("grounded evidence"),
1317                ),
1318            ],
1319            vec![claim(
1320                ClaimKind::Quote,
1321                Some("The alpha trust loop verifies grounded evidence"),
1322                Citation {
1323                    element_id: Some("split-a".into()),
1324                    ..Default::default()
1325                },
1326            )],
1327        );
1328
1329        assert!(!report.all_evidence_grounded);
1330        assert_eq!(report.checks[0].status, CheckStatus::Mismatch);
1331        assert_eq!(report.checks[0].reason, Some(CheckReason::TextMismatch));
1332    }
1333
1334    #[test]
1335    fn quote_claim_bbox_locator_does_not_expand_outside_cited_region() {
1336        let report = verify_elements(
1337            vec![
1338                element(
1339                    "split-a",
1340                    "p0001",
1341                    [100, 100, 400, 200],
1342                    Some("The alpha trust loop verifies "),
1343                ),
1344                element(
1345                    "split-b",
1346                    "p0001",
1347                    [400, 100, 700, 200],
1348                    Some("grounded evidence"),
1349                ),
1350            ],
1351            vec![claim(
1352                ClaimKind::Quote,
1353                Some("The alpha trust loop verifies grounded evidence"),
1354                Citation {
1355                    page: Some("p0001".into()),
1356                    bbox: Some([120, 120, 380, 180]),
1357                    ..Default::default()
1358                },
1359            )],
1360        );
1361
1362        assert!(!report.all_evidence_grounded);
1363        assert_eq!(report.checks[0].status, CheckStatus::Mismatch);
1364        assert_eq!(report.checks[0].reason, Some(CheckReason::TextMismatch));
1365        assert_eq!(
1366            report.checks[0]
1367                .evidence
1368                .as_ref()
1369                .and_then(|e| e.text.as_deref()),
1370            Some("The alpha trust loop verifies ")
1371        );
1372        assert_eq!(
1373            report.checks[0].evidence.as_ref().and_then(|e| e.bbox),
1374            Some([100, 100, 400, 200])
1375        );
1376    }
1377
1378    #[test]
1379    fn bbox_locator_prefers_smallest_containing_element() {
1380        let report = verify_elements(
1381            vec![
1382                element(
1383                    "container",
1384                    "p0001",
1385                    [0, 0, 1000, 1000],
1386                    Some("outer wrapper text"),
1387                ),
1388                element(
1389                    "inner",
1390                    "p0001",
1391                    [100, 100, 400, 200],
1392                    Some("The exact cited quote"),
1393                ),
1394            ],
1395            vec![claim(
1396                ClaimKind::Quote,
1397                Some("The exact cited quote"),
1398                Citation {
1399                    page: Some("p0001".into()),
1400                    bbox: Some([120, 120, 380, 180]),
1401                    ..Default::default()
1402                },
1403            )],
1404        );
1405
1406        assert!(report.all_evidence_grounded);
1407        assert_eq!(report.checks[0].status, CheckStatus::Grounded);
1408        assert_eq!(
1409            report.checks[0]
1410                .evidence
1411                .as_ref()
1412                .and_then(|e| e.text.as_deref()),
1413            Some("The exact cited quote")
1414        );
1415        assert_eq!(
1416            report.checks[0].evidence.as_ref().and_then(|e| e.bbox),
1417            Some([100, 100, 400, 200])
1418        );
1419    }
1420
1421    #[test]
1422    fn quote_claim_does_not_ground_across_non_adjacent_or_wrong_page_fragments() {
1423        let non_adjacent = verify_elements(
1424            vec![
1425                element(
1426                    "split-a",
1427                    "p0001",
1428                    [100, 100, 400, 200],
1429                    Some("The alpha trust loop verifies "),
1430                ),
1431                element(
1432                    "between",
1433                    "p0001",
1434                    [100, 220, 700, 320],
1435                    Some("separate evidence"),
1436                ),
1437                element(
1438                    "split-b",
1439                    "p0001",
1440                    [400, 100, 700, 200],
1441                    Some("grounded evidence"),
1442                ),
1443            ],
1444            vec![claim(
1445                ClaimKind::Quote,
1446                Some("The alpha trust loop verifies grounded evidence"),
1447                Citation {
1448                    element_id: Some("split-a".into()),
1449                    ..Default::default()
1450                },
1451            )],
1452        );
1453        assert!(!non_adjacent.all_evidence_grounded);
1454        assert_eq!(non_adjacent.checks[0].status, CheckStatus::Mismatch);
1455        assert_eq!(
1456            non_adjacent.checks[0].reason,
1457            Some(CheckReason::TextMismatch)
1458        );
1459
1460        let wrong_page = verify_elements(
1461            vec![
1462                element(
1463                    "split-a",
1464                    "p0001",
1465                    [100, 100, 400, 200],
1466                    Some("The alpha trust loop verifies "),
1467                ),
1468                element(
1469                    "split-b",
1470                    "p0002",
1471                    [400, 100, 700, 200],
1472                    Some("grounded evidence"),
1473                ),
1474            ],
1475            vec![claim(
1476                ClaimKind::Quote,
1477                Some("The alpha trust loop verifies grounded evidence"),
1478                Citation {
1479                    page: Some("p0001".into()),
1480                    ..Default::default()
1481                },
1482            )],
1483        );
1484        assert!(!wrong_page.all_evidence_grounded);
1485        assert_eq!(wrong_page.checks[0].status, CheckStatus::Mismatch);
1486        assert_eq!(wrong_page.checks[0].reason, Some(CheckReason::TextMismatch));
1487    }
1488
1489    #[test]
1490    fn mismatch_and_not_found_keep_gate_false() {
1491        let source = TestSource::default();
1492        let report = verify(
1493            &source,
1494            vec![
1495                claim(
1496                    ClaimKind::Quote,
1497                    Some("Revenue fell to $1"),
1498                    Citation {
1499                        element_id: Some("e000002".into()),
1500                        ..Default::default()
1501                    },
1502                ),
1503                claim(
1504                    ClaimKind::Presence,
1505                    None,
1506                    Citation {
1507                        element_id: Some("missing".into()),
1508                        ..Default::default()
1509                    },
1510                ),
1511            ],
1512        );
1513
1514        assert!(!report.all_evidence_grounded);
1515        assert_eq!(report.checks[0].status, CheckStatus::Mismatch);
1516        assert_eq!(report.checks[0].reason, Some(CheckReason::TextMismatch));
1517        assert_eq!(report.checks[1].status, CheckStatus::NotFound);
1518        assert_eq!(report.checks[1].reason, Some(CheckReason::ElementNotFound));
1519    }
1520
1521    #[test]
1522    fn value_claims_use_literal_text_matching() {
1523        let source = TestSource::default();
1524        let report = verify(
1525            &source,
1526            vec![claim(
1527                ClaimKind::Value,
1528                Some("Revenue grew to $12.4M in Q3 2025, driven by enterprise expansion."),
1529                Citation {
1530                    element_id: Some("e000002".into()),
1531                    ..Default::default()
1532                },
1533            )],
1534        );
1535
1536        assert!(report.all_evidence_grounded);
1537        assert_eq!(report.unsupported_claim_kinds, Vec::<String>::new());
1538        assert_eq!(report.checks[0].status, CheckStatus::Grounded);
1539        assert_eq!(report.checks[0].match_method, MatchMethod::NormalizedText);
1540    }
1541
1542    #[test]
1543    fn value_substrings_do_not_ground() {
1544        let source = TestSource::default();
1545        let report = verify(
1546            &source,
1547            vec![claim(
1548                ClaimKind::Value,
1549                Some("1"),
1550                Citation {
1551                    element_id: Some("e000002".into()),
1552                    ..Default::default()
1553                },
1554            )],
1555        );
1556
1557        assert!(!report.all_evidence_grounded);
1558        assert_eq!(report.checks[0].status, CheckStatus::Mismatch);
1559        assert_eq!(report.checks[0].reason, Some(CheckReason::TextMismatch));
1560        assert_eq!(report.checks[0].match_method, MatchMethod::NormalizedText);
1561    }
1562
1563    #[test]
1564    fn table_cell_claims_lookup_cell_and_match_text() {
1565        let source = TestSource::default();
1566        let report = verify(
1567            &source,
1568            vec![claim(
1569                ClaimKind::TableCell,
1570                Some("$12.4M"),
1571                Citation {
1572                    table_id: Some("t0001".into()),
1573                    cell: Some(CellRef { row: 1, col: 1 }),
1574                    ..Default::default()
1575                },
1576            )],
1577        );
1578
1579        assert!(report.all_evidence_grounded);
1580        assert_eq!(report.unsupported_claim_kinds, Vec::<String>::new());
1581        assert_eq!(report.checks[0].status, CheckStatus::Grounded);
1582        assert_eq!(report.checks[0].match_method, MatchMethod::TableCellLookup);
1583        assert_eq!(
1584            report.checks[0]
1585                .evidence
1586                .as_ref()
1587                .and_then(|e| e.text.as_deref()),
1588            Some("$12.4M")
1589        );
1590    }
1591
1592    #[test]
1593    fn table_cell_missing_cell_is_not_found() {
1594        let source = TestSource::default();
1595        let report = verify(
1596            &source,
1597            vec![claim(
1598                ClaimKind::TableCell,
1599                Some("$12.4M"),
1600                Citation {
1601                    table_id: Some("t0001".into()),
1602                    cell: Some(CellRef { row: 9, col: 9 }),
1603                    ..Default::default()
1604                },
1605            )],
1606        );
1607
1608        assert!(!report.all_evidence_grounded);
1609        assert_eq!(report.checks[0].status, CheckStatus::NotFound);
1610        assert_eq!(
1611            report.checks[0].reason,
1612            Some(CheckReason::TableCellNotFound)
1613        );
1614        assert_eq!(report.checks[0].match_method, MatchMethod::None);
1615    }
1616
1617    #[test]
1618    fn empty_table_collection_is_not_found_when_tables_are_supported() {
1619        let source = TestSource {
1620            caps: Capabilities {
1621                tables: true,
1622                ..TestSource::default().caps
1623            },
1624            ..TestSource::default()
1625        };
1626        struct NoTables(TestSource);
1627        impl GroundingSource for NoTables {
1628            fn parser(&self) -> ParserIdentity {
1629                self.0.parser()
1630            }
1631            fn capabilities(&self) -> Capabilities {
1632                self.0.capabilities()
1633            }
1634            fn fingerprint(&self) -> Option<String> {
1635                self.0.fingerprint()
1636            }
1637            fn pages(&self) -> Vec<PageGeometry> {
1638                self.0.pages()
1639            }
1640            fn elements(&self) -> Vec<GroundingElement> {
1641                self.0.elements()
1642            }
1643            fn spans(&self) -> Vec<GroundingSpan> {
1644                self.0.spans()
1645            }
1646            fn tables(&self) -> Vec<GroundingTable> {
1647                Vec::new()
1648            }
1649        }
1650        let report = verify(
1651            &source,
1652            vec![claim(
1653                ClaimKind::TableCell,
1654                Some("$12.4M"),
1655                Citation {
1656                    table_id: Some("missing".into()),
1657                    cell: Some(CellRef { row: 1, col: 1 }),
1658                    ..Default::default()
1659                },
1660            )],
1661        );
1662        assert_eq!(report.checks[0].status, CheckStatus::NotFound);
1663
1664        let no_tables = NoTables(source);
1665        let cfg = VerificationConfig::default_v1();
1666        let report = verify_claims(
1667            &no_tables,
1668            CitationInput::Envelope(CitationEnvelope {
1669                document_fingerprint: no_tables.fingerprint(),
1670                claims: vec![claim(
1671                    ClaimKind::TableCell,
1672                    Some("$12.4M"),
1673                    Citation {
1674                        table_id: Some("missing".into()),
1675                        cell: Some(CellRef { row: 1, col: 1 }),
1676                        ..Default::default()
1677                    },
1678                )],
1679            }),
1680            &cfg,
1681            "0".repeat(64),
1682        );
1683        assert_eq!(report.checks[0].status, CheckStatus::NotFound);
1684    }
1685
1686    #[test]
1687    fn missing_table_capability_blocks_table_cell_claims() {
1688        let source = TestSource {
1689            caps: Capabilities {
1690                tables: false,
1691                ..TestSource::default().caps
1692            },
1693            ..TestSource::default()
1694        };
1695        let report = verify(
1696            &source,
1697            vec![claim(
1698                ClaimKind::TableCell,
1699                Some("$12.4M"),
1700                Citation {
1701                    table_id: Some("t0001".into()),
1702                    cell: Some(CellRef { row: 1, col: 1 }),
1703                    ..Default::default()
1704                },
1705            )],
1706        );
1707
1708        assert_eq!(report.checks[0].status, CheckStatus::CapabilityBlocked);
1709        assert_eq!(
1710            report.checks[0].reason,
1711            Some(CheckReason::MissingTableCapability)
1712        );
1713        assert_eq!(
1714            report.capability_limits,
1715            vec![CapabilityLimit::MissingTables]
1716        );
1717        assert!(report.checks[0]
1718            .warnings
1719            .contains(&WarningCode::CapabilityLimited));
1720    }
1721
1722    #[test]
1723    fn crop_refs_are_echoed_only_when_requested_and_supported() {
1724        let source = TestSource {
1725            caps: Capabilities {
1726                crop_support: true,
1727                ..TestSource::default().caps
1728            },
1729            crop_ref: Some("crop://p0001/e000002.png".into()),
1730            ..TestSource::default()
1731        };
1732        let claim = claim(
1733            ClaimKind::Quote,
1734            Some("Revenue grew to $12.4M in Q3 2025"),
1735            Citation {
1736                element_id: Some("e000002".into()),
1737                ..Default::default()
1738            },
1739        );
1740
1741        let mut cfg = VerificationConfig::default_v1();
1742        cfg.evidence.as_mut().unwrap().include_crops = true;
1743        let with_crops = verify_with_config(&source, vec![claim.clone()], &cfg);
1744        assert_eq!(
1745            with_crops.checks[0]
1746                .evidence
1747                .as_ref()
1748                .and_then(|e| e.crop_ref.as_deref()),
1749            Some("crop://p0001/e000002.png")
1750        );
1751
1752        cfg.evidence.as_mut().unwrap().include_crops = false;
1753        let without_crops = verify_with_config(&source, vec![claim], &cfg);
1754        assert_eq!(
1755            without_crops.checks[0]
1756                .evidence
1757                .as_ref()
1758                .and_then(|e| e.crop_ref.as_deref()),
1759            None
1760        );
1761    }
1762
1763    #[test]
1764    fn requested_crop_refs_without_source_support_remain_capability_limited() {
1765        let source = TestSource {
1766            crop_ref: Some("crop://p0001/e000002.png".into()),
1767            ..TestSource::default()
1768        };
1769        let mut cfg = VerificationConfig::default_v1();
1770        cfg.evidence.as_mut().unwrap().include_crops = true;
1771
1772        let report = verify_with_config(
1773            &source,
1774            vec![claim(
1775                ClaimKind::Quote,
1776                Some("Revenue grew to $12.4M in Q3 2025"),
1777                Citation {
1778                    element_id: Some("e000002".into()),
1779                    ..Default::default()
1780                },
1781            )],
1782            &cfg,
1783        );
1784
1785        assert_eq!(report.checks[0].status, CheckStatus::Grounded);
1786        assert_eq!(
1787            report.capability_limits,
1788            vec![CapabilityLimit::MissingCropSupport]
1789        );
1790        assert!(report.warnings.contains(&WarningCode::CapabilityLimited));
1791        assert_eq!(
1792            report.checks[0]
1793                .evidence
1794                .as_ref()
1795                .and_then(|e| e.crop_ref.as_deref()),
1796            None
1797        );
1798    }
1799
1800    #[test]
1801    fn stale_fingerprint_marks_checks_stale_and_gate_false() {
1802        let source = TestSource::default();
1803        let cfg = VerificationConfig::default_v1();
1804        let report = verify_claims(
1805            &source,
1806            CitationInput::Envelope(CitationEnvelope {
1807                document_fingerprint: Some(
1808                    "sha256:0000000000000000000000000000000000000000000000000000000000000000"
1809                        .into(),
1810                ),
1811                claims: vec![claim(
1812                    ClaimKind::Presence,
1813                    None,
1814                    Citation {
1815                        element_id: Some("e000002".into()),
1816                        ..Default::default()
1817                    },
1818                )],
1819            }),
1820            &cfg,
1821            "0".repeat(64),
1822        );
1823
1824        assert!(report.fingerprint_stale);
1825        assert!(!report.all_evidence_grounded);
1826        assert_eq!(report.checks[0].status, CheckStatus::Stale);
1827        assert_eq!(report.checks[0].reason, Some(CheckReason::StaleFingerprint));
1828    }
1829
1830    #[test]
1831    fn missing_citation_fingerprint_blocks_when_required() {
1832        let source = TestSource::default();
1833        let cfg = VerificationConfig::default_v1();
1834        let report = verify_claims(
1835            &source,
1836            CitationInput::Envelope(CitationEnvelope {
1837                document_fingerprint: None,
1838                claims: vec![claim(
1839                    ClaimKind::Presence,
1840                    None,
1841                    Citation {
1842                        element_id: Some("e000002".into()),
1843                        ..Default::default()
1844                    },
1845                )],
1846            }),
1847            &cfg,
1848            "0".repeat(64),
1849        );
1850
1851        assert!(!report.fingerprint_stale);
1852        assert!(!report.all_evidence_grounded);
1853        assert_eq!(report.checks[0].status, CheckStatus::Stale);
1854        assert_eq!(
1855            report.checks[0].reason,
1856            Some(CheckReason::MissingCitationFingerprint)
1857        );
1858    }
1859
1860    #[test]
1861    fn unsupported_claim_kinds_are_explicit() {
1862        let source = TestSource::default();
1863        let report = verify(
1864            &source,
1865            vec![claim(
1866                ClaimKind::Region,
1867                None,
1868                Citation {
1869                    element_id: Some("e000002".into()),
1870                    ..Default::default()
1871                },
1872            )],
1873        );
1874
1875        assert!(!report.all_evidence_grounded);
1876        assert_eq!(report.checks[0].status, CheckStatus::UnsupportedClaimKind);
1877        assert_eq!(
1878            report.checks[0].reason,
1879            Some(CheckReason::UnsupportedClaimKind)
1880        );
1881        assert_eq!(report.unsupported_claim_kinds, vec!["region"]);
1882    }
1883
1884    #[test]
1885    fn non_v1_claim_kinds_are_deduped_and_keep_gate_false() {
1886        let source = TestSource::default();
1887        let report = verify(
1888            &source,
1889            vec![
1890                claim(
1891                    ClaimKind::Presence,
1892                    None,
1893                    Citation {
1894                        page: Some("p0001".into()),
1895                        ..Default::default()
1896                    },
1897                ),
1898                claim(
1899                    ClaimKind::Region,
1900                    None,
1901                    Citation {
1902                        element_id: Some("e000002".into()),
1903                        ..Default::default()
1904                    },
1905                ),
1906                claim(
1907                    ClaimKind::Other,
1908                    Some("$12.4M equals 12400000"),
1909                    Citation {
1910                        element_id: Some("e000002".into()),
1911                        ..Default::default()
1912                    },
1913                ),
1914                claim(
1915                    ClaimKind::Region,
1916                    None,
1917                    Citation {
1918                        page: Some("p0001".into()),
1919                        ..Default::default()
1920                    },
1921                ),
1922            ],
1923        );
1924
1925        assert!(!report.all_evidence_grounded);
1926        assert_eq!(report.checks[0].status, CheckStatus::Grounded);
1927        assert_eq!(report.checks[1].status, CheckStatus::UnsupportedClaimKind);
1928        assert_eq!(report.checks[2].status, CheckStatus::UnsupportedClaimKind);
1929        assert_eq!(report.checks[3].status, CheckStatus::UnsupportedClaimKind);
1930        assert_eq!(report.checks[1].match_method, MatchMethod::None);
1931        assert_eq!(report.checks[2].match_method, MatchMethod::None);
1932        assert_eq!(report.checks[3].match_method, MatchMethod::None);
1933        assert_eq!(
1934            report.checks[1].reason,
1935            Some(CheckReason::UnsupportedClaimKind)
1936        );
1937        assert_eq!(
1938            report.checks[2].reason,
1939            Some(CheckReason::UnsupportedClaimKind)
1940        );
1941        assert_eq!(
1942            report.checks[3].reason,
1943            Some(CheckReason::UnsupportedClaimKind)
1944        );
1945        assert!(report.checks[1].evidence.is_none());
1946        assert!(report.checks[2].evidence.is_none());
1947        assert!(report.checks[3].evidence.is_none());
1948        assert!(report.checks[1].warnings.is_empty());
1949        assert!(report.checks[2].warnings.is_empty());
1950        assert!(report.checks[3].warnings.is_empty());
1951        assert!(!report.checks[1].semantic_unverified);
1952        assert!(!report.checks[2].semantic_unverified);
1953        assert!(!report.checks[3].semantic_unverified);
1954        assert_eq!(report.unsupported_claim_kinds, vec!["region", "other"]);
1955    }
1956
1957    #[test]
1958    fn missing_span_capability_blocks_span_locator() {
1959        let source = TestSource {
1960            caps: Capabilities {
1961                spans: false,
1962                char_offsets: false,
1963                tables: false,
1964                fingerprint: false,
1965                coordinate_origin: CoordinateOrigin::Unknown,
1966                crop_support: false,
1967            },
1968            fingerprint: None,
1969            crop_ref: None,
1970        };
1971        let report = verify(
1972            &source,
1973            vec![claim(
1974                ClaimKind::Presence,
1975                None,
1976                Citation {
1977                    span_id: Some("s000002".into()),
1978                    ..Default::default()
1979                },
1980            )],
1981        );
1982
1983        assert!(!report.all_evidence_grounded);
1984        assert_eq!(report.checks[0].status, CheckStatus::CapabilityBlocked);
1985        assert_eq!(
1986            report.checks[0].reason,
1987            Some(CheckReason::MissingSpanCapability)
1988        );
1989        assert_eq!(
1990            report.capability_limits,
1991            vec![
1992                CapabilityLimit::MissingFingerprint,
1993                CapabilityLimit::MissingSpans,
1994                CapabilityLimit::MissingCharOffsets,
1995                CapabilityLimit::MissingTables,
1996                CapabilityLimit::UnknownCoordinateOrigin
1997            ]
1998        );
1999        assert!(report.warnings.contains(&WarningCode::CapabilityLimited));
2000        assert!(report.checks[0]
2001            .warnings
2002            .contains(&WarningCode::CapabilityLimited));
2003    }
2004
2005    #[test]
2006    fn citation_fingerprint_without_source_fingerprint_blocks_checks() {
2007        let source = TestSource {
2008            caps: Capabilities {
2009                fingerprint: false,
2010                ..TestSource::default().caps
2011            },
2012            fingerprint: None,
2013            ..TestSource::default()
2014        };
2015        let cfg = VerificationConfig::default_v1();
2016        let report = verify_claims(
2017            &source,
2018            CitationInput::Envelope(CitationEnvelope {
2019                document_fingerprint: Some(
2020                    "sha256:b5d30710d0c25cc38d8dec924ecaf57ae4f81276dd5dc14d75cb3b5b6bde62d3"
2021                        .into(),
2022                ),
2023                claims: vec![claim(
2024                    ClaimKind::Presence,
2025                    None,
2026                    Citation {
2027                        element_id: Some("e000002".into()),
2028                        ..Default::default()
2029                    },
2030                )],
2031            }),
2032            &cfg,
2033            "0".repeat(64),
2034        );
2035
2036        assert!(!report.fingerprint_stale);
2037        assert!(!report.all_evidence_grounded);
2038        assert_eq!(report.checks[0].status, CheckStatus::CapabilityBlocked);
2039        assert_eq!(
2040            report.checks[0].reason,
2041            Some(CheckReason::MissingSourceFingerprint)
2042        );
2043        assert_eq!(
2044            report.capability_limits,
2045            vec![CapabilityLimit::MissingFingerprint]
2046        );
2047        assert!(report.warnings.contains(&WarningCode::CapabilityLimited));
2048        assert!(report.checks[0]
2049            .warnings
2050            .contains(&WarningCode::CapabilityLimited));
2051    }
2052
2053    #[test]
2054    fn missing_text_is_error_for_library_callers() {
2055        let source = TestSource::default();
2056        let report = verify(
2057            &source,
2058            vec![claim(
2059                ClaimKind::Quote,
2060                None,
2061                Citation {
2062                    element_id: Some("e000002".into()),
2063                    ..Default::default()
2064                },
2065            )],
2066        );
2067
2068        assert!(!report.all_evidence_grounded);
2069        assert_eq!(report.checks[0].status, CheckStatus::Error);
2070        assert_eq!(
2071            report.checks[0].reason,
2072            Some(CheckReason::MissingRequiredText)
2073        );
2074        assert_eq!(report.checks[0].match_method, MatchMethod::None);
2075    }
2076
2077    #[test]
2078    fn quote_normalization_is_ascii_whitespace_only() {
2079        assert_eq!(normalize_quote("  a\r\n\t b  "), "a b");
2080        assert_eq!(normalize_quote("a\u{00a0}b"), "a\u{00a0}b");
2081    }
2082
2083    #[test]
2084    fn report_serializes_to_schema_shape() {
2085        let source = TestSource::default();
2086        let report = verify(
2087            &source,
2088            vec![claim(
2089                ClaimKind::Presence,
2090                None,
2091                Citation {
2092                    element_id: Some("e000002".into()),
2093                    ..Default::default()
2094                },
2095            )],
2096        );
2097        let v = serde_json::to_value(&report).unwrap();
2098        assert_eq!(v["grounding"]["parser"]["name"], "test-parser");
2099        assert_eq!(v["fingerprint_stale"], false);
2100        assert_eq!(v["checks"].as_array().unwrap().len(), 1);
2101    }
2102}