Skip to main content

ethos_core/
codes.rs

1/*
2 * Copyright 2026 The Ethos maintainers
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//! Stable warning codes (PRD §10). New codes are `contract-change` events; renames are
18//! breaking. Error codes live in [`crate::error`] (full feature) because only the parser
19//! emits them; warning codes are shared with verification reports (verify-types feature).
20
21use serde::{Deserialize, Serialize};
22
23/// The 11 stable warning codes. Wire format is snake_case.
24#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
25#[serde(rename_all = "snake_case")]
26pub enum WarningCode {
27    /// Reading-order heuristic below confidence threshold.
28    LowConfidenceReadingOrder,
29    /// Table-structure heuristic below confidence threshold.
30    LowConfidenceTableStructure,
31    /// Hidden text detected (excluded from default chunks).
32    HiddenTextDetected,
33    /// Text positioned off the page box (excluded from default chunks).
34    OffPageTextDetected,
35    /// Low-contrast (e.g. white-on-white) text detected (excluded from default chunks).
36    LowContrastTextDetected,
37    /// Annotations present on a page.
38    AnnotationsPresent,
39    /// External links present (never followed).
40    ExternalLinksPresent,
41    /// Page has no extractable text (image-only).
42    ImageOnlyPage,
43    /// Annotation subtype not supported.
44    UnsupportedAnnotation,
45    /// Parse completed partially (e.g. a page-level failure inside limits).
46    PartialParse,
47    /// A grounding/verification capability was missing; result downgraded explicitly.
48    CapabilityLimited,
49}
50
51impl WarningCode {
52    /// All codes, in contract order.
53    pub const ALL: [WarningCode; 11] = [
54        WarningCode::LowConfidenceReadingOrder,
55        WarningCode::LowConfidenceTableStructure,
56        WarningCode::HiddenTextDetected,
57        WarningCode::OffPageTextDetected,
58        WarningCode::LowContrastTextDetected,
59        WarningCode::AnnotationsPresent,
60        WarningCode::ExternalLinksPresent,
61        WarningCode::ImageOnlyPage,
62        WarningCode::UnsupportedAnnotation,
63        WarningCode::PartialParse,
64        WarningCode::CapabilityLimited,
65    ];
66
67    /// Stable wire string (snake_case), identical to the serde form.
68    pub fn as_str(self) -> &'static str {
69        match self {
70            WarningCode::LowConfidenceReadingOrder => "low_confidence_reading_order",
71            WarningCode::LowConfidenceTableStructure => "low_confidence_table_structure",
72            WarningCode::HiddenTextDetected => "hidden_text_detected",
73            WarningCode::OffPageTextDetected => "off_page_text_detected",
74            WarningCode::LowContrastTextDetected => "low_contrast_text_detected",
75            WarningCode::AnnotationsPresent => "annotations_present",
76            WarningCode::ExternalLinksPresent => "external_links_present",
77            WarningCode::ImageOnlyPage => "image_only_page",
78            WarningCode::UnsupportedAnnotation => "unsupported_annotation",
79            WarningCode::PartialParse => "partial_parse",
80            WarningCode::CapabilityLimited => "capability_limited",
81        }
82    }
83
84    /// Security-class codes route to `security_warnings` / the security report;
85    /// the rest are parser warnings (determinism contract §8).
86    pub fn is_security(self) -> bool {
87        matches!(
88            self,
89            WarningCode::HiddenTextDetected
90                | WarningCode::OffPageTextDetected
91                | WarningCode::LowContrastTextDetected
92                | WarningCode::AnnotationsPresent
93                | WarningCode::ExternalLinksPresent
94                | WarningCode::UnsupportedAnnotation
95                | WarningCode::ImageOnlyPage
96        )
97    }
98
99    /// Security-report codes that currently require inventory data not present in the
100    /// canonical document warning lane.
101    pub fn is_inventory_backed_security(self) -> bool {
102        matches!(
103            self,
104            WarningCode::AnnotationsPresent
105                | WarningCode::ExternalLinksPresent
106                | WarningCode::UnsupportedAnnotation
107        )
108    }
109
110    /// Security-report codes backed by text spans and excluded from default chunks.
111    pub fn is_text_backed_security(self) -> bool {
112        matches!(
113            self,
114            WarningCode::HiddenTextDetected
115                | WarningCode::OffPageTextDetected
116                | WarningCode::LowContrastTextDetected
117        )
118    }
119
120    /// Security-report codes that require a page reference.
121    pub fn is_page_backed_security(self) -> bool {
122        self.is_text_backed_security() || matches!(self, WarningCode::ImageOnlyPage)
123    }
124
125    /// Warning codes that must never appear in default chunk artifacts.
126    pub fn excludes_from_default_chunks(self) -> bool {
127        self.is_text_backed_security()
128    }
129
130    /// Fixed message template for security-report findings.
131    pub fn security_report_message_template(self) -> Option<&'static str> {
132        match self {
133            WarningCode::HiddenTextDetected => {
134                Some("hidden text detected: excluded from default chunks")
135            }
136            WarningCode::OffPageTextDetected => {
137                Some("off-page text detected: excluded from default chunks")
138            }
139            WarningCode::LowContrastTextDetected => {
140                Some("low-contrast text detected: excluded from default chunks")
141            }
142            WarningCode::AnnotationsPresent => Some("annotations present on page"),
143            WarningCode::ExternalLinksPresent => Some("external links present on page"),
144            WarningCode::UnsupportedAnnotation => Some("unsupported annotation ignored"),
145            WarningCode::ImageOnlyPage => Some("image-only page"),
146            _ => None,
147        }
148    }
149}
150
151impl core::fmt::Display for WarningCode {
152    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
153        f.write_str(self.as_str())
154    }
155}
156
157#[cfg(test)]
158mod tests {
159    use super::*;
160
161    #[test]
162    fn wire_format_is_snake_case_and_stable() {
163        for code in WarningCode::ALL {
164            let json = serde_json::to_string(&code).unwrap();
165            assert_eq!(json, format!("\"{}\"", code.as_str()));
166            let back: WarningCode = serde_json::from_str(&json).unwrap();
167            assert_eq!(back, code);
168        }
169    }
170
171    #[test]
172    fn security_split_matches_contract() {
173        let security: Vec<_> = WarningCode::ALL
174            .iter()
175            .filter(|c| c.is_security())
176            .map(|c| c.as_str())
177            .collect();
178        assert_eq!(
179            security,
180            vec![
181                "hidden_text_detected",
182                "off_page_text_detected",
183                "low_contrast_text_detected",
184                "annotations_present",
185                "external_links_present",
186                "image_only_page",
187                "unsupported_annotation",
188            ]
189        );
190    }
191
192    #[test]
193    fn security_report_warning_policy_matches_contract() {
194        let inventory_backed: Vec<_> = WarningCode::ALL
195            .iter()
196            .filter(|c| c.is_inventory_backed_security())
197            .map(|c| c.as_str())
198            .collect();
199        assert_eq!(
200            inventory_backed,
201            vec![
202                "annotations_present",
203                "external_links_present",
204                "unsupported_annotation",
205            ]
206        );
207
208        let text_backed: Vec<_> = WarningCode::ALL
209            .iter()
210            .filter(|c| c.is_text_backed_security())
211            .map(|c| c.as_str())
212            .collect();
213        assert_eq!(
214            text_backed,
215            vec![
216                "hidden_text_detected",
217                "off_page_text_detected",
218                "low_contrast_text_detected",
219            ]
220        );
221
222        let page_backed: Vec<_> = WarningCode::ALL
223            .iter()
224            .filter(|c| c.is_page_backed_security())
225            .map(|c| c.as_str())
226            .collect();
227        assert_eq!(
228            page_backed,
229            vec![
230                "hidden_text_detected",
231                "off_page_text_detected",
232                "low_contrast_text_detected",
233                "image_only_page",
234            ]
235        );
236
237        let default_chunk_excluded: Vec<_> = WarningCode::ALL
238            .iter()
239            .filter(|c| c.excludes_from_default_chunks())
240            .map(|c| c.as_str())
241            .collect();
242        assert_eq!(default_chunk_excluded, text_backed);
243    }
244
245    #[test]
246    fn security_report_message_templates_match_contract() {
247        let templates: Vec<_> = WarningCode::ALL
248            .iter()
249            .filter_map(|c| {
250                c.security_report_message_template()
251                    .map(|template| (c.as_str(), template))
252            })
253            .collect();
254        assert_eq!(
255            templates,
256            vec![
257                (
258                    "hidden_text_detected",
259                    "hidden text detected: excluded from default chunks",
260                ),
261                (
262                    "off_page_text_detected",
263                    "off-page text detected: excluded from default chunks",
264                ),
265                (
266                    "low_contrast_text_detected",
267                    "low-contrast text detected: excluded from default chunks",
268                ),
269                ("annotations_present", "annotations present on page"),
270                ("external_links_present", "external links present on page"),
271                ("image_only_page", "image-only page"),
272                ("unsupported_annotation", "unsupported annotation ignored"),
273            ]
274        );
275    }
276}