Skip to main content

ethos_core/
config.rs

1/*
2 * Copyright 2026 The Ethos maintainers
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//! Parse configuration: page selection (`--pages 1-5,9`), resource limits, and the
18//! effective-config hash subset (determinism contract §7).
19
20use serde::{Deserialize, Serialize};
21
22use crate::error::{ErrorCode, EthosError};
23
24/// Canonical page selection. Page selection enters `config_sha256`: a different range is a
25/// legitimately different canonical output.
26#[derive(Debug, Clone, PartialEq, Eq, Default)]
27pub enum PageSelection {
28    /// All pages (canonical form: the string `"all"`).
29    #[default]
30    All,
31    /// Merged, sorted, 1-based inclusive ranges (canonical form: `[[lo, hi], …]`).
32    Ranges(Vec<(u32, u32)>),
33}
34
35impl PageSelection {
36    /// Parse `--pages` syntax: comma-separated 1-based pages or inclusive ranges,
37    /// e.g. `1-5,9`. Overlapping/adjacent ranges merge; order normalizes ascending.
38    ///
39    /// Syntax and out-of-range failures are [`PageSelectionError`] — *usage* errors the
40    /// CLI maps to exit code 2, deliberately distinct from the 10 stable parse-failure
41    /// codes (those describe the document, not the invocation).
42    pub fn parse(input: &str) -> Result<Self, PageSelectionError> {
43        let trimmed = input.trim();
44        if trimmed.is_empty() {
45            return Err(PageSelectionError::new("empty page selection"));
46        }
47        if trimmed == "all" {
48            return Ok(PageSelection::All);
49        }
50        let mut ranges: Vec<(u32, u32)> = Vec::new();
51        for part in trimmed.split(',') {
52            let part = part.trim();
53            if part.is_empty() {
54                return Err(PageSelectionError::new("empty segment in page selection"));
55            }
56            let (lo, hi) = match part.split_once('-') {
57                Some((a, b)) => (parse_page_number(a)?, parse_page_number(b)?),
58                None => {
59                    let n = parse_page_number(part)?;
60                    (n, n)
61                }
62            };
63            if lo > hi {
64                return Err(PageSelectionError::new(
65                    "descending range in page selection",
66                ));
67            }
68            ranges.push((lo, hi));
69        }
70        ranges.sort_unstable();
71        let mut merged: Vec<(u32, u32)> = Vec::with_capacity(ranges.len());
72        for (lo, hi) in ranges {
73            match merged.last_mut() {
74                Some((_, prev_hi)) if lo <= prev_hi.saturating_add(1) => {
75                    *prev_hi = (*prev_hi).max(hi);
76                }
77                _ => merged.push((lo, hi)),
78            }
79        }
80        Ok(PageSelection::Ranges(merged))
81    }
82
83    /// True when `page` (1-based) is selected.
84    pub fn contains(&self, page: u32) -> bool {
85        match self {
86            PageSelection::All => true,
87            PageSelection::Ranges(rs) => rs.iter().any(|&(lo, hi)| page >= lo && page <= hi),
88        }
89    }
90
91    /// Highest selected page, when bounded.
92    pub fn max_page(&self) -> Option<u32> {
93        match self {
94            PageSelection::All => None,
95            PageSelection::Ranges(rs) => rs.last().map(|&(_, hi)| hi),
96        }
97    }
98
99    /// Validate against the document's page count: out-of-range selection is a stable
100    /// error (`page_limit_exceeded` is about limits; out-of-range selection is usage —
101    /// PRD §16 wants a stable error on out-of-range, mapped here to `invalid` usage error).
102    pub fn validate_against(&self, page_count: u32) -> Result<(), PageSelectionError> {
103        if let Some(max) = self.max_page() {
104            if max > page_count {
105                return Err(PageSelectionError::new(
106                    "page selection out of document range",
107                ));
108            }
109        }
110        Ok(())
111    }
112
113    /// Canonical JSON form for the config hash (contract §7):
114    /// `"all"` or `[[lo, hi], …]`.
115    #[cfg(feature = "full")]
116    pub fn canonical_value(&self) -> serde_json::Value {
117        match self {
118            PageSelection::All => serde_json::Value::String("all".to_string()),
119            PageSelection::Ranges(rs) => serde_json::Value::Array(
120                rs.iter()
121                    .map(|&(lo, hi)| {
122                        serde_json::Value::Array(vec![
123                            serde_json::Value::from(lo),
124                            serde_json::Value::from(hi),
125                        ])
126                    })
127                    .collect(),
128            ),
129        }
130    }
131}
132
133/// Page-selection syntax/validation error — a *usage* error (CLI exit 2), distinct from
134/// the stable parse-failure codes.
135#[derive(Debug, Clone, PartialEq, Eq)]
136pub struct PageSelectionError {
137    /// Deterministic message.
138    pub message: String,
139}
140
141impl PageSelectionError {
142    fn new(message: impl Into<String>) -> Self {
143        PageSelectionError {
144            message: message.into(),
145        }
146    }
147}
148
149impl core::fmt::Display for PageSelectionError {
150    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
151        f.write_str(&self.message)
152    }
153}
154impl std::error::Error for PageSelectionError {}
155
156fn parse_page_number(s: &str) -> Result<u32, PageSelectionError> {
157    let s = s.trim();
158    if s.is_empty() || !s.bytes().all(|b| b.is_ascii_digit()) {
159        return Err(PageSelectionError::new("malformed page number"));
160    }
161    let n: u32 = s
162        .parse()
163        .map_err(|_| PageSelectionError::new("page number out of range"))?;
164    if n == 0 {
165        return Err(PageSelectionError::new("pages are 1-based"));
166    }
167    Ok(n)
168}
169
170/// Resource limits (PRD §10 base requirements). Defaults are deliberately conservative.
171#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
172pub struct Limits {
173    /// Max input file size in bytes.
174    pub max_file_bytes: u64,
175    /// Max page count.
176    pub max_pages: u32,
177    /// Max wall-time per parse, milliseconds.
178    pub max_parse_ms: u64,
179}
180
181impl Default for Limits {
182    fn default() -> Self {
183        Limits {
184            max_file_bytes: 256 * 1024 * 1024,
185            max_pages: 5000,
186            max_parse_ms: 120_000,
187        }
188    }
189}
190
191/// Effective parse configuration.
192#[derive(Debug, Clone, Default, PartialEq, Eq)]
193pub struct ParseConfig {
194    /// Page selection.
195    pub pages: PageSelection,
196    /// Resource limits.
197    pub limits: Limits,
198}
199
200impl ParseConfig {
201    /// The config-hash subset (contract §7): exactly the profile's `config_hash_inputs`
202    /// (v1: `pages`), as a canonical JSON object.
203    #[cfg(feature = "full")]
204    pub fn config_hash_subset(&self) -> serde_json::Value {
205        let mut map = serde_json::Map::new();
206        map.insert("pages".to_string(), self.pages.canonical_value());
207        serde_json::Value::Object(map)
208    }
209
210    /// `config_sha256` over c14n of the hash subset.
211    #[cfg(feature = "full")]
212    pub fn config_sha256(&self) -> Result<String, EthosError> {
213        crate::c14n::sha256_hex(&self.config_hash_subset())
214            .map_err(|e| EthosError::new(ErrorCode::InternalError, e.to_string()))
215    }
216}
217
218#[cfg(test)]
219mod tests {
220    use super::*;
221    use proptest::prelude::*;
222
223    #[test]
224    fn parses_prd_syntax() {
225        assert_eq!(
226            PageSelection::parse("1-5,9").unwrap(),
227            PageSelection::Ranges(vec![(1, 5), (9, 9)])
228        );
229        assert_eq!(PageSelection::parse("all").unwrap(), PageSelection::All);
230        assert_eq!(
231            PageSelection::parse("3").unwrap(),
232            PageSelection::Ranges(vec![(3, 3)])
233        );
234        // merge: overlap and adjacency normalize to one canonical form
235        assert_eq!(
236            PageSelection::parse("4-6,1-3").unwrap(),
237            PageSelection::Ranges(vec![(1, 6)])
238        );
239        assert_eq!(
240            PageSelection::parse("2,1,3").unwrap(),
241            PageSelection::Ranges(vec![(1, 3)])
242        );
243    }
244
245    #[test]
246    fn rejects_malformed() {
247        for bad in ["", "0", "5-2", "1,,2", "a-b", "1-", "-3", "1.5"] {
248            assert!(PageSelection::parse(bad).is_err(), "should reject {bad:?}");
249        }
250    }
251
252    #[test]
253    fn validates_document_range() {
254        let s = PageSelection::parse("1-5,9").unwrap();
255        assert!(s.validate_against(9).is_ok());
256        assert!(s.validate_against(8).is_err());
257        assert!(PageSelection::All.validate_against(1).is_ok());
258    }
259
260    #[test]
261    fn canonical_value_and_hash_are_stable() {
262        let s = PageSelection::parse("9,1-5").unwrap();
263        assert_eq!(s.canonical_value().to_string(), "[[1,5],[9,9]]");
264        let cfg = ParseConfig {
265            pages: s,
266            ..Default::default()
267        };
268        // equivalent input spellings hash identically
269        let cfg2 = ParseConfig {
270            pages: PageSelection::parse("1-3,4-5,9").unwrap(),
271            ..Default::default()
272        };
273        assert_eq!(cfg.config_sha256().unwrap(), cfg2.config_sha256().unwrap());
274        // and the default ("all") differs
275        assert_ne!(
276            cfg.config_sha256().unwrap(),
277            ParseConfig::default().config_sha256().unwrap()
278        );
279    }
280
281    proptest! {
282        #[test]
283        fn parse_is_idempotent_through_canonical_form(
284            ranges in proptest::collection::vec((1u32..200, 0u32..20), 1..6)
285        ) {
286            // build an arbitrary syntax string, parse, render canonical, re-parse: fixed point
287            let syntax = ranges.iter()
288                .map(|&(lo, span)| if span == 0 { format!("{lo}") } else { format!("{lo}-{}", lo + span) })
289                .collect::<Vec<_>>()
290                .join(",");
291            let parsed = PageSelection::parse(&syntax).unwrap();
292            if let PageSelection::Ranges(rs) = &parsed {
293                let rendered = rs.iter().map(|&(lo, hi)| if lo == hi { format!("{lo}") } else { format!("{lo}-{hi}") }).collect::<Vec<_>>().join(",");
294                prop_assert_eq!(PageSelection::parse(&rendered).unwrap(), parsed);
295            }
296        }
297    }
298}