1use serde::{Deserialize, Serialize};
21
22use crate::error::{ErrorCode, EthosError};
23
24#[derive(Debug, Clone, PartialEq, Eq, Default)]
27pub enum PageSelection {
28 #[default]
30 All,
31 Ranges(Vec<(u32, u32)>),
33}
34
35impl PageSelection {
36 pub fn parse(input: &str) -> Result<Self, PageSelectionError> {
43 let trimmed = input.trim();
44 if trimmed.is_empty() {
45 return Err(PageSelectionError::new("empty page selection"));
46 }
47 if trimmed == "all" {
48 return Ok(PageSelection::All);
49 }
50 let mut ranges: Vec<(u32, u32)> = Vec::new();
51 for part in trimmed.split(',') {
52 let part = part.trim();
53 if part.is_empty() {
54 return Err(PageSelectionError::new("empty segment in page selection"));
55 }
56 let (lo, hi) = match part.split_once('-') {
57 Some((a, b)) => (parse_page_number(a)?, parse_page_number(b)?),
58 None => {
59 let n = parse_page_number(part)?;
60 (n, n)
61 }
62 };
63 if lo > hi {
64 return Err(PageSelectionError::new(
65 "descending range in page selection",
66 ));
67 }
68 ranges.push((lo, hi));
69 }
70 ranges.sort_unstable();
71 let mut merged: Vec<(u32, u32)> = Vec::with_capacity(ranges.len());
72 for (lo, hi) in ranges {
73 match merged.last_mut() {
74 Some((_, prev_hi)) if lo <= prev_hi.saturating_add(1) => {
75 *prev_hi = (*prev_hi).max(hi);
76 }
77 _ => merged.push((lo, hi)),
78 }
79 }
80 Ok(PageSelection::Ranges(merged))
81 }
82
83 pub fn contains(&self, page: u32) -> bool {
85 match self {
86 PageSelection::All => true,
87 PageSelection::Ranges(rs) => rs.iter().any(|&(lo, hi)| page >= lo && page <= hi),
88 }
89 }
90
91 pub fn max_page(&self) -> Option<u32> {
93 match self {
94 PageSelection::All => None,
95 PageSelection::Ranges(rs) => rs.last().map(|&(_, hi)| hi),
96 }
97 }
98
99 pub fn validate_against(&self, page_count: u32) -> Result<(), PageSelectionError> {
103 if let Some(max) = self.max_page() {
104 if max > page_count {
105 return Err(PageSelectionError::new(
106 "page selection out of document range",
107 ));
108 }
109 }
110 Ok(())
111 }
112
113 #[cfg(feature = "full")]
116 pub fn canonical_value(&self) -> serde_json::Value {
117 match self {
118 PageSelection::All => serde_json::Value::String("all".to_string()),
119 PageSelection::Ranges(rs) => serde_json::Value::Array(
120 rs.iter()
121 .map(|&(lo, hi)| {
122 serde_json::Value::Array(vec![
123 serde_json::Value::from(lo),
124 serde_json::Value::from(hi),
125 ])
126 })
127 .collect(),
128 ),
129 }
130 }
131}
132
133#[derive(Debug, Clone, PartialEq, Eq)]
136pub struct PageSelectionError {
137 pub message: String,
139}
140
141impl PageSelectionError {
142 fn new(message: impl Into<String>) -> Self {
143 PageSelectionError {
144 message: message.into(),
145 }
146 }
147}
148
149impl core::fmt::Display for PageSelectionError {
150 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
151 f.write_str(&self.message)
152 }
153}
154impl std::error::Error for PageSelectionError {}
155
156fn parse_page_number(s: &str) -> Result<u32, PageSelectionError> {
157 let s = s.trim();
158 if s.is_empty() || !s.bytes().all(|b| b.is_ascii_digit()) {
159 return Err(PageSelectionError::new("malformed page number"));
160 }
161 let n: u32 = s
162 .parse()
163 .map_err(|_| PageSelectionError::new("page number out of range"))?;
164 if n == 0 {
165 return Err(PageSelectionError::new("pages are 1-based"));
166 }
167 Ok(n)
168}
169
170#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
172pub struct Limits {
173 pub max_file_bytes: u64,
175 pub max_pages: u32,
177 pub max_parse_ms: u64,
179}
180
181impl Default for Limits {
182 fn default() -> Self {
183 Limits {
184 max_file_bytes: 256 * 1024 * 1024,
185 max_pages: 5000,
186 max_parse_ms: 120_000,
187 }
188 }
189}
190
191#[derive(Debug, Clone, Default, PartialEq, Eq)]
193pub struct ParseConfig {
194 pub pages: PageSelection,
196 pub limits: Limits,
198}
199
200impl ParseConfig {
201 #[cfg(feature = "full")]
204 pub fn config_hash_subset(&self) -> serde_json::Value {
205 let mut map = serde_json::Map::new();
206 map.insert("pages".to_string(), self.pages.canonical_value());
207 serde_json::Value::Object(map)
208 }
209
210 #[cfg(feature = "full")]
212 pub fn config_sha256(&self) -> Result<String, EthosError> {
213 crate::c14n::sha256_hex(&self.config_hash_subset())
214 .map_err(|e| EthosError::new(ErrorCode::InternalError, e.to_string()))
215 }
216}
217
218#[cfg(test)]
219mod tests {
220 use super::*;
221 use proptest::prelude::*;
222
223 #[test]
224 fn parses_prd_syntax() {
225 assert_eq!(
226 PageSelection::parse("1-5,9").unwrap(),
227 PageSelection::Ranges(vec![(1, 5), (9, 9)])
228 );
229 assert_eq!(PageSelection::parse("all").unwrap(), PageSelection::All);
230 assert_eq!(
231 PageSelection::parse("3").unwrap(),
232 PageSelection::Ranges(vec![(3, 3)])
233 );
234 assert_eq!(
236 PageSelection::parse("4-6,1-3").unwrap(),
237 PageSelection::Ranges(vec![(1, 6)])
238 );
239 assert_eq!(
240 PageSelection::parse("2,1,3").unwrap(),
241 PageSelection::Ranges(vec![(1, 3)])
242 );
243 }
244
245 #[test]
246 fn rejects_malformed() {
247 for bad in ["", "0", "5-2", "1,,2", "a-b", "1-", "-3", "1.5"] {
248 assert!(PageSelection::parse(bad).is_err(), "should reject {bad:?}");
249 }
250 }
251
252 #[test]
253 fn validates_document_range() {
254 let s = PageSelection::parse("1-5,9").unwrap();
255 assert!(s.validate_against(9).is_ok());
256 assert!(s.validate_against(8).is_err());
257 assert!(PageSelection::All.validate_against(1).is_ok());
258 }
259
260 #[test]
261 fn canonical_value_and_hash_are_stable() {
262 let s = PageSelection::parse("9,1-5").unwrap();
263 assert_eq!(s.canonical_value().to_string(), "[[1,5],[9,9]]");
264 let cfg = ParseConfig {
265 pages: s,
266 ..Default::default()
267 };
268 let cfg2 = ParseConfig {
270 pages: PageSelection::parse("1-3,4-5,9").unwrap(),
271 ..Default::default()
272 };
273 assert_eq!(cfg.config_sha256().unwrap(), cfg2.config_sha256().unwrap());
274 assert_ne!(
276 cfg.config_sha256().unwrap(),
277 ParseConfig::default().config_sha256().unwrap()
278 );
279 }
280
281 proptest! {
282 #[test]
283 fn parse_is_idempotent_through_canonical_form(
284 ranges in proptest::collection::vec((1u32..200, 0u32..20), 1..6)
285 ) {
286 let syntax = ranges.iter()
288 .map(|&(lo, span)| if span == 0 { format!("{lo}") } else { format!("{lo}-{}", lo + span) })
289 .collect::<Vec<_>>()
290 .join(",");
291 let parsed = PageSelection::parse(&syntax).unwrap();
292 if let PageSelection::Ranges(rs) = &parsed {
293 let rendered = rs.iter().map(|&(lo, hi)| if lo == hi { format!("{lo}") } else { format!("{lo}-{hi}") }).collect::<Vec<_>>().join(",");
294 prop_assert_eq!(PageSelection::parse(&rendered).unwrap(), parsed);
295 }
296 }
297 }
298}