Skip to main content

openjd_expr/
range_expr.rs

1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// Copyright by contributors to this project.
3// SPDX-License-Identifier: (Apache-2.0 OR MIT)
4
5//! Integer range expression parsing.
6//!
7//! Mirrors Python `openjd.expr._range_expr`. Parses expressions like
8//! `"1-10"`, `"1-10:2"`, `"1-5,10-15"` into sorted, non-overlapping ranges.
9
10use crate::error::ExpressionError;
11use std::fmt;
12
13/// Maximum number of comma-separated sub-ranges in a single range expression.
14///
15/// Each sub-range becomes one `IntRange` entry in the parsed `RangeExpr`.
16/// Real-world range expressions (frame ranges, task chunks) contain at most
17/// a few dozen sub-ranges; 10,000 is two orders of magnitude above any
18/// plausible legitimate use. Rejecting larger inputs at parse time prevents
19/// an attacker from forcing a multi-megabyte `Vec<IntRange>` allocation
20/// through a parameter value before any downstream resource-bounding
21/// (e.g. the evaluator's memory limit) applies.
22///
23/// This cap targets the source-text and heap dimensions of a `RangeExpr`.
24/// It does **not** cap the logical element count of a single chunk —
25/// `RangeExpr` stores chunks symbolically (`start`, `end`, `step`), so a
26/// single-chunk range `"1-100000000000"` allocates only one `IntRange`
27/// regardless of its logical length. Downstream materialization (e.g.
28/// `list(range_expr)`) is already bounded by the evaluator's per-element
29/// operation charge and memory limit.
30///
31/// See `specs/expr/range-expr.md` (Defensive caps) for rationale.
32pub const MAX_RANGE_EXPR_CHUNKS: usize = 10_000;
33
34/// Error raised when parsing a range expression fails.
35#[derive(Debug, Clone)]
36pub struct RangeExprError {
37    pub expr: String,
38    pub message: String,
39    pub position: Option<usize>,
40}
41
42impl RangeExprError {
43    pub fn new(expr: impl Into<String>, message: impl Into<String>) -> Self {
44        Self {
45            expr: expr.into(),
46            message: message.into(),
47            position: None,
48        }
49    }
50    pub fn at(expr: impl Into<String>, message: impl Into<String>, position: usize) -> Self {
51        Self {
52            expr: expr.into(),
53            message: message.into(),
54            position: Some(position),
55        }
56    }
57}
58
59impl fmt::Display for RangeExprError {
60    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
61        if let Some(pos) = self.position {
62            write!(
63                f,
64                "{} in '{}' after '{}'",
65                self.message,
66                self.expr,
67                &self.expr[..pos]
68            )
69        } else {
70            write!(f, "{}: '{}'", self.message, self.expr)
71        }
72    }
73}
74
75impl std::error::Error for RangeExprError {}
76
77impl From<RangeExprError> for ExpressionError {
78    /// Lift a range-expression parse error into an `ExpressionError`.
79    ///
80    /// If the error carries a `position`, attach the source string with a
81    /// span covering the single character at that offset so the standard
82    /// caret renderer points at the failure. Without a position, fall
83    /// back to the stringified `Display` form.
84    fn from(e: RangeExprError) -> Self {
85        let msg = e.to_string();
86        let err = ExpressionError::parse_error(msg);
87        match e.position {
88            Some(pos) if pos < e.expr.len() => err.with_span(&e.expr, pos, pos + 1),
89            _ => err,
90        }
91    }
92}
93
94/// A single contiguous range of integers with a step.
95#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
96pub struct IntRange {
97    pub start: i64,
98    pub end: i64,
99    pub step: i64,
100}
101
102impl IntRange {
103    /// Create a range, normalizing descending ranges to ascending form.
104    /// After construction, `start <= end` and `step > 0` always hold.
105    pub fn new(start: i64, end: i64, step: i64) -> Result<Self, ExpressionError> {
106        if step == 0 {
107            return Err(ExpressionError::parse_error("Range: step must not be zero"));
108        }
109        if step > 0 && start > end {
110            return Err(ExpressionError::parse_error(
111                "Range: a descending range must have a negative step",
112            ));
113        }
114        if step < 0 && start < end {
115            return Err(ExpressionError::parse_error(
116                "Range: an ascending range must have a positive step",
117            ));
118        }
119        if step < 0 {
120            // Normalize descending to ascending form (matching Python _IntRange)
121            let count = ((start - end) / (-step)) + 1;
122            let last = start + (count - 1) * step; // smallest value
123            Ok(Self {
124                start: last,
125                end: start,
126                step: -step,
127            })
128        } else {
129            // Normalize end to actual last value in the range
130            let count = (end - start) / step + 1;
131            let actual_end = start + (count - 1) * step;
132            Ok(Self {
133                start,
134                end: actual_end,
135                step,
136            })
137        }
138    }
139
140    /// Number of integers in this range.
141    pub fn len(&self) -> usize {
142        // After normalization, start <= end and step > 0 always
143        ((self.end - self.start) / self.step + 1) as usize
144    }
145
146    /// Returns `true` if the range contains no elements.
147    pub fn is_empty(&self) -> bool {
148        self.len() == 0
149    }
150
151    /// Test whether `value` is a member of this range.
152    pub fn contains(&self, value: i64) -> bool {
153        if value < self.start || value > self.end {
154            return false;
155        }
156        (value - self.start) % self.step == 0
157    }
158
159    /// Iterate over all values in ascending order.
160    pub fn iter(&self) -> impl Iterator<Item = i64> + '_ {
161        (0..self.len() as i64).map(move |i| self.start + i * self.step)
162    }
163
164    /// Get element by zero-based index, or `None` if out of bounds.
165    pub fn get(&self, index: usize) -> Option<i64> {
166        if index < self.len() {
167            Some(self.start + index as i64 * self.step)
168        } else {
169            None
170        }
171    }
172}
173
174impl std::fmt::Display for IntRange {
175    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
176        let len = self.len();
177        if len == 1 {
178            write!(f, "{}", self.start)
179        } else if len == 2 {
180            write!(f, "{},{}", self.start, self.end)
181        } else if self.step == 1 {
182            write!(f, "{}-{}", self.start, self.end)
183        } else {
184            write!(f, "{}-{}:{}", self.start, self.end, self.step)
185        }
186    }
187}
188
189/// A range expression: a sorted set of non-overlapping integer ranges.
190#[derive(Debug, Clone, Eq, serde::Serialize)]
191pub struct RangeExpr {
192    ranges: Vec<IntRange>,
193    /// Cumulative length indices for O(log n) getitem. Entry i = total elements in ranges[0..=i].
194    cumulative_lengths: Vec<usize>,
195    /// Packed: lower 63 bits = length, MSB = contiguous display flag.
196    /// Contiguous flag only affects Display; not preserved through constructors.
197    #[serde(serialize_with = "serialize_length")]
198    length: usize,
199}
200
201const CONTIGUOUS_BIT: usize = 1 << (usize::BITS - 1);
202const LENGTH_MASK: usize = !CONTIGUOUS_BIT;
203
204fn serialize_length<S: serde::Serializer>(length: &usize, s: S) -> Result<S::Ok, S::Error> {
205    s.serialize_u64((length & LENGTH_MASK) as u64)
206}
207
208impl<'de> serde::Deserialize<'de> for RangeExpr {
209    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
210        #[derive(serde::Deserialize)]
211        struct RangeExprHelper {
212            ranges: Vec<IntRange>,
213        }
214        let helper = RangeExprHelper::deserialize(deserializer)?;
215        Self::from_ranges(helper.ranges).map_err(serde::de::Error::custom)
216    }
217}
218
219impl PartialEq for RangeExpr {
220    fn eq(&self, other: &Self) -> bool {
221        self.ranges == other.ranges
222    }
223}
224
225impl std::hash::Hash for RangeExpr {
226    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
227        self.ranges.hash(state);
228    }
229}
230
231impl std::str::FromStr for RangeExpr {
232    type Err = ExpressionError;
233
234    fn from_str(expr: &str) -> Result<Self, Self::Err> {
235        parse_range_expr(expr)
236    }
237}
238
239impl RangeExpr {
240    /// Set contiguous display mode. When true, Display uses "{start}-{end}" format
241    /// even for single values (e.g., "5-5"). Only meaningful for contiguous chunks.
242    #[must_use]
243    pub fn with_contiguous(mut self, contiguous: bool) -> Self {
244        if contiguous {
245            self.length |= CONTIGUOUS_BIT;
246        } else {
247            self.length &= LENGTH_MASK;
248        }
249        self
250    }
251
252    /// Create a RangeExpr from a list of individual values.
253    pub fn from_values(mut values: Vec<i64>) -> Self {
254        if values.is_empty() {
255            return Self {
256                ranges: Vec::new(),
257                cumulative_lengths: Vec::new(),
258                length: 0,
259            };
260        }
261        // Sort and deduplicate (matching Python from_list)
262        values.sort();
263        values.dedup();
264        let length = values.len();
265        let mut ranges = Vec::new();
266        let mut i = 0;
267        while i < values.len() {
268            let start = values[i];
269            // Detect step: check if next values form an arithmetic sequence
270            if i + 1 < values.len() {
271                let step = values[i + 1] - values[i];
272                if step != 0 {
273                    let mut j = i + 1;
274                    while j < values.len() && values[j] == start + (j - i) as i64 * step {
275                        j += 1;
276                    }
277                    if j > i + 1 {
278                        let end = values[j - 1];
279                        ranges.push(IntRange { start, end, step });
280                        i = j;
281                        continue;
282                    }
283                }
284            }
285            ranges.push(IntRange {
286                start,
287                end: start,
288                step: 1,
289            });
290            i += 1;
291        }
292        let cumulative_lengths = build_cumulative(&ranges);
293        Self {
294            ranges,
295            cumulative_lengths,
296            length,
297        }
298    }
299
300    /// Create from pre-built `IntRange`s. Sorts, merges adjacent ranges, and validates no overlaps.
301    ///
302    /// Returns an error if the number of input ranges exceeds
303    /// [`MAX_RANGE_EXPR_CHUNKS`].
304    pub fn from_ranges(mut ranges: Vec<IntRange>) -> Result<Self, ExpressionError> {
305        if ranges.is_empty() {
306            return Err(ExpressionError::parse_error(
307                "Range expression cannot be empty",
308            ));
309        }
310        if ranges.len() > MAX_RANGE_EXPR_CHUNKS {
311            return Err(ExpressionError::parse_error(format!(
312                "Range expression has too many sub-ranges ({}); maximum is {}",
313                ranges.len(),
314                MAX_RANGE_EXPR_CHUNKS,
315            )));
316        }
317        // Sort by start
318        ranges.sort_by_key(|r| (r.start, r.end));
319        // Merge adjacent ranges with same step
320        let mut merged = vec![ranges[0].clone()];
321        for r in &ranges[1..] {
322            let last = merged.last().unwrap();
323            if last.step == r.step && last.end + r.step == r.start {
324                let new_end = r.end;
325                let last_start = last.start;
326                let step = last.step;
327                *merged.last_mut().unwrap() = IntRange {
328                    start: last_start,
329                    end: new_end,
330                    step,
331                };
332            } else {
333                merged.push(r.clone());
334            }
335        }
336        // Validate no overlaps
337        for i in 1..merged.len() {
338            if merged[i].start <= merged[i - 1].end {
339                return Err(ExpressionError::parse_error(format!(
340                    "Range expression has overlapping ranges: {} and {}",
341                    merged[i - 1],
342                    merged[i]
343                )));
344            }
345        }
346        // Use saturating arithmetic when summing per-chunk lengths so that a
347        // multi-chunk range whose combined logical length exceeds `usize`
348        // capacity saturates at `usize::MAX` rather than wrapping to a small
349        // value that would corrupt `len()`/`is_empty()`. `RangeExpr` stores
350        // chunks symbolically, so an enormous logical length is not itself a
351        // DoS vector — only the chunk count is capped via
352        // [`MAX_RANGE_EXPR_CHUNKS`].
353        let length: usize = merged
354            .iter()
355            .fold(0usize, |acc, r| acc.saturating_add(r.len()));
356        let cumulative_lengths = build_cumulative(&merged);
357        Ok(Self {
358            ranges: merged,
359            cumulative_lengths,
360            length,
361        })
362    }
363
364    /// Total number of integers across all sub-ranges.
365    pub fn len(&self) -> usize {
366        self.length & LENGTH_MASK
367    }
368
369    /// Returns `true` if the range expression contains no elements.
370    pub fn is_empty(&self) -> bool {
371        self.length & LENGTH_MASK == 0
372    }
373
374    /// Test membership via binary search on sub-range endpoints. O(log m).
375    pub fn contains(&self, value: i64) -> bool {
376        // Binary search on range ends (mirrors Python's bisect_left on _ends)
377        let idx = self.ranges.partition_point(|r| r.end < value);
378        idx < self.ranges.len() && self.ranges[idx].contains(value)
379    }
380
381    /// Get element by index (supports negative indexing like Python).
382    pub fn get(&self, index: i64) -> Option<i64> {
383        let len = self.length & LENGTH_MASK;
384        let idx = if index < 0 { len as i64 + index } else { index } as usize;
385        if idx >= len {
386            return None;
387        }
388        // Binary search on cumulative lengths (mirrors Python's bisect on _range_length_indices)
389        let range_idx = self.cumulative_lengths.partition_point(|&cum| cum <= idx);
390        let offset = if range_idx == 0 {
391            idx
392        } else {
393            idx - self.cumulative_lengths[range_idx - 1]
394        };
395        self.ranges[range_idx].get(offset)
396    }
397
398    /// The underlying sub-ranges (sorted, non-overlapping).
399    pub fn ranges(&self) -> &[IntRange] {
400        &self.ranges
401    }
402
403    /// Cumulative element counts per sub-range (for index mapping).
404    pub fn cumulative_lengths(&self) -> &[usize] {
405        &self.cumulative_lengths
406    }
407
408    /// Iterate over all values in ascending order across all sub-ranges.
409    pub fn iter(&self) -> impl Iterator<Item = i64> + '_ {
410        self.ranges.iter().flat_map(|r| r.iter())
411    }
412
413    pub fn to_vec(&self) -> Vec<i64> {
414        self.iter().collect()
415    }
416
417    /// Slice this range expression with a positive step.
418    ///
419    /// `start`, `stop`, `step` are already-normalized indices into the
420    /// flattened element sequence. `step` must be positive. Returns a new
421    /// `RangeExpr` without materializing any elements.
422    ///
423    /// Runs in O(m) where m is the number of sub-ranges, regardless of
424    /// how many elements are selected. Each sub-range's intersection with
425    /// the slice index sequence is computed as pure arithmetic.
426    ///
427    /// For negative step (reverse slices), callers should use `get()` to
428    /// collect elements into a list, since `RangeExpr` cannot represent
429    /// descending sequences.
430    pub fn slice(&self, start: i64, stop: i64, step: i64) -> Result<RangeExpr, ExpressionError> {
431        if step <= 0 {
432            return Err(ExpressionError::parse_error(
433                "RangeExpr::slice requires a positive step",
434            ));
435        }
436        let total_len = self.len() as i64;
437        // Clamp to valid range
438        let start = start.max(0).min(total_len);
439        let stop = stop.max(0).min(total_len);
440        if start >= stop {
441            return Ok(RangeExpr {
442                ranges: Vec::new(),
443                cumulative_lengths: Vec::new(),
444                length: 0,
445            });
446        }
447
448        let mut result_ranges = Vec::new();
449        let mut cum_start: i64 = 0; // global index where current sub-range begins
450
451        for r in &self.ranges {
452            let r_len = r.len() as i64;
453            let cum_end = cum_start + r_len; // exclusive end of this sub-range's global indices
454
455            // The slice selects global indices: start, start+step, start+2*step, ...
456            // Find the first slice index >= cum_start and the last < cum_end,
457            // intersected with [start, stop).
458
459            // First slice index that falls within this sub-range:
460            // We need the smallest k such that start + k*step >= cum_start and start + k*step < cum_end
461            let first_global = if start >= cum_start {
462                start
463            } else {
464                // First multiple of step at or after cum_start
465                let offset = cum_start - start;
466                let k = (offset + step - 1) / step; // ceil division
467                start + k * step
468            };
469
470            // Must also be < stop and < cum_end
471            let range_stop = stop.min(cum_end);
472            if first_global >= range_stop {
473                cum_start = cum_end;
474                continue;
475            }
476
477            // Verify it's aligned to the slice stride
478            debug_assert!((first_global - start) % step == 0);
479
480            // Local offset within this IntRange
481            let first_local = (first_global - cum_start) as usize;
482            // How many slice-selected indices fall in this sub-range?
483            let count = (range_stop - first_global - 1) / step + 1;
484            let last_local = first_local + (count as usize - 1) * step as usize;
485
486            // Map local indices to values: value = r.start + local * r.step
487            let new_start = r.start + first_local as i64 * r.step;
488            let new_end = r.start + last_local as i64 * r.step;
489            let new_step = r.step * step;
490
491            if count == 1 {
492                result_ranges.push(IntRange {
493                    start: new_start,
494                    end: new_start,
495                    step: 1,
496                });
497            } else {
498                result_ranges.push(IntRange {
499                    start: new_start,
500                    end: new_end,
501                    step: new_step,
502                });
503            }
504
505            cum_start = cum_end;
506        }
507
508        if result_ranges.is_empty() {
509            return Ok(RangeExpr {
510                ranges: Vec::new(),
511                cumulative_lengths: Vec::new(),
512                length: 0,
513            });
514        }
515        let length = result_ranges.iter().map(|r| r.len()).sum();
516        let cumulative_lengths = build_cumulative(&result_ranges);
517        Ok(RangeExpr {
518            ranges: result_ranges,
519            cumulative_lengths,
520            length,
521        })
522    }
523
524    /// Heap allocation size (for memory tracking).
525    pub fn heap_size(&self) -> usize {
526        use std::mem::size_of;
527        self.ranges.capacity() * size_of::<IntRange>()
528            + self.cumulative_lengths.capacity() * size_of::<usize>()
529    }
530}
531
532fn build_cumulative(ranges: &[IntRange]) -> Vec<usize> {
533    let mut cum = Vec::with_capacity(ranges.len());
534    let mut total = 0;
535    for r in ranges {
536        total += r.len();
537        cum.push(total);
538    }
539    cum
540}
541
542impl std::fmt::Display for RangeExpr {
543    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
544        if self.length & CONTIGUOUS_BIT != 0 {
545            // Contiguous chunk format: always "{start}-{end}", even for single values
546            if self.ranges.len() == 1 && self.ranges[0].step == 1 {
547                return write!(f, "{}-{}", self.ranges[0].start, self.ranges[0].end);
548            }
549            let len = self.length & LENGTH_MASK;
550            if len == 1 {
551                let val = self.ranges[0].start;
552                return write!(f, "{val}-{val}");
553            }
554            // Multiple ranges: fall through to normal display
555        }
556        let parts: Vec<String> = self.ranges.iter().map(|r| r.to_string()).collect();
557        write!(f, "{}", parts.join(","))
558    }
559}
560
561/// Parse a range expression string.
562fn parse_range_expr(expr: &str) -> Result<RangeExpr, ExpressionError> {
563    let expr = expr.trim();
564    if expr.is_empty() {
565        return Err(ExpressionError::parse_error("Empty expression"));
566    }
567
568    let mut ranges = Vec::new();
569    let mut pos = 0;
570    let bytes = expr.as_bytes();
571
572    loop {
573        // Bail out early if the input is producing more sub-ranges than
574        // we are willing to handle. `from_ranges` would reject the same
575        // input, but the early check lets us stop consuming source
576        // characters as soon as the limit is hit.
577        if ranges.len() > MAX_RANGE_EXPR_CHUNKS {
578            return Err(ExpressionError::parse_error(format!(
579                "Range expression has too many sub-ranges (> {MAX_RANGE_EXPR_CHUNKS}): '{expr}'",
580            )));
581        }
582        // Skip whitespace
583        while pos < bytes.len() && bytes[pos].is_ascii_whitespace() {
584            pos += 1;
585        }
586        if pos >= bytes.len() {
587            // If we got here after a comma, that's a trailing comma error
588            if !ranges.is_empty() && pos > 0 {
589                // Check if the last non-whitespace char before end was a comma
590                let last_content = expr.trim_end();
591                if last_content.ends_with(',') {
592                    return Err(ExpressionError::parse_error(format!(
593                        "Trailing comma in range expression: '{expr}'"
594                    )));
595                }
596            }
597            break;
598        }
599
600        // Parse integer (possibly negative)
601        let start = parse_integer(expr, &mut pos)?;
602
603        // Skip whitespace
604        while pos < bytes.len() && bytes[pos].is_ascii_whitespace() {
605            pos += 1;
606        }
607
608        if pos >= bytes.len() || bytes[pos] == b',' {
609            // Single value
610            ranges.push(IntRange::new(start, start, 1)?);
611            if pos < bytes.len() {
612                pos += 1;
613            } // skip comma
614            continue;
615        }
616
617        if bytes[pos] != b'-' {
618            return Err(ExpressionError::parse_error(format!(
619                "Unexpected '{}' in '{expr}'",
620                bytes[pos] as char
621            )));
622        }
623        pos += 1; // skip '-'
624
625        // Skip whitespace
626        while pos < bytes.len() && bytes[pos].is_ascii_whitespace() {
627            pos += 1;
628        }
629
630        let end = parse_integer(expr, &mut pos)?;
631
632        // Skip whitespace
633        while pos < bytes.len() && bytes[pos].is_ascii_whitespace() {
634            pos += 1;
635        }
636
637        if pos >= bytes.len() || bytes[pos] == b',' {
638            // Range without step
639            if start <= end {
640                ranges.push(IntRange::new(start, end, 1)?);
641            } else {
642                // Descending range without step is invalid per spec
643                return Err(ExpressionError::parse_error(format!(
644                    "Descending range {start}-{end} requires a negative step"
645                )));
646            }
647            if pos < bytes.len() {
648                pos += 1;
649            } // skip comma
650            continue;
651        }
652
653        if bytes[pos] != b':' {
654            return Err(ExpressionError::parse_error(format!(
655                "Expected ':' or ',' in '{expr}'"
656            )));
657        }
658        pos += 1; // skip ':'
659
660        // Skip whitespace
661        while pos < bytes.len() && bytes[pos].is_ascii_whitespace() {
662            pos += 1;
663        }
664
665        let step = parse_integer(expr, &mut pos)?;
666        if step == 0 {
667            return Err(ExpressionError::parse_error("Step must not be zero"));
668        }
669
670        ranges.push(IntRange::new(start, end, step)?);
671
672        // Skip whitespace
673        while pos < bytes.len() && bytes[pos].is_ascii_whitespace() {
674            pos += 1;
675        }
676
677        if pos < bytes.len() {
678            if bytes[pos] == b',' {
679                pos += 1;
680            } else {
681                return Err(ExpressionError::parse_error(format!(
682                    "Unexpected '{}' in '{expr}'",
683                    bytes[pos] as char
684                )));
685            }
686        }
687    }
688
689    if ranges.is_empty() {
690        return Err(ExpressionError::parse_error("Empty expression"));
691    }
692
693    RangeExpr::from_ranges(ranges)
694}
695
696fn parse_integer(expr: &str, pos: &mut usize) -> Result<i64, ExpressionError> {
697    let bytes = expr.as_bytes();
698    if *pos >= bytes.len() {
699        return Err(ExpressionError::parse_error(format!(
700            "Unexpected end of expression: '{expr}'"
701        )));
702    }
703
704    let negative = bytes[*pos] == b'-';
705    if negative {
706        *pos += 1;
707    }
708
709    if *pos >= bytes.len() || !bytes[*pos].is_ascii_digit() {
710        return Err(ExpressionError::parse_error(format!(
711            "Expected integer in '{expr}'"
712        )));
713    }
714
715    let start = *pos;
716    while *pos < bytes.len() && bytes[*pos].is_ascii_digit() {
717        *pos += 1;
718    }
719
720    let num_str = &expr[start..*pos];
721    let value: i64 = num_str.parse().map_err(|_| {
722        ExpressionError::parse_error(format!("Invalid integer '{num_str}' in '{expr}'"))
723    })?;
724
725    Ok(if negative { -value } else { value })
726}
727
728#[cfg(test)]
729mod tests {
730    use super::*;
731
732    #[test]
733    fn simple_range() {
734        let r = "1-5".parse::<RangeExpr>().unwrap();
735        assert_eq!(r.to_vec(), vec![1, 2, 3, 4, 5]);
736    }
737
738    #[test]
739    fn stepped_range() {
740        let r = "1-10:2".parse::<RangeExpr>().unwrap();
741        assert_eq!(r.to_vec(), vec![1, 3, 5, 7, 9]);
742    }
743
744    #[test]
745    fn multiple_ranges() {
746        let r = "1-3,10-12".parse::<RangeExpr>().unwrap();
747        assert_eq!(r.to_vec(), vec![1, 2, 3, 10, 11, 12]);
748    }
749
750    #[test]
751    fn single_value() {
752        let r = "42".parse::<RangeExpr>().unwrap();
753        assert_eq!(r.to_vec(), vec![42]);
754    }
755
756    #[test]
757    fn negative_range() {
758        let r = "-3 - 2".parse::<RangeExpr>().unwrap();
759        assert_eq!(r.to_vec(), vec![-3, -2, -1, 0, 1, 2]);
760    }
761
762    #[test]
763    fn overlap_error() {
764        assert!("1-5,3-7".parse::<RangeExpr>().is_err());
765    }
766
767    #[test]
768    fn zero_step_error() {
769        assert!("1-5:0".parse::<RangeExpr>().is_err());
770    }
771
772    #[test]
773    fn empty_error() {
774        assert!("".parse::<RangeExpr>().is_err());
775    }
776
777    #[test]
778    fn descending_without_step_error() {
779        assert!("5-1".parse::<RangeExpr>().is_err());
780    }
781
782    // ── slice() tests ──
783
784    #[test]
785    fn slice_basic() {
786        let r = "1-10".parse::<RangeExpr>().unwrap();
787        assert_eq!(r.slice(2, 5, 1).unwrap().to_vec(), vec![3, 4, 5]);
788    }
789
790    #[test]
791    fn slice_from_start() {
792        let r = "1-10".parse::<RangeExpr>().unwrap();
793        assert_eq!(r.slice(0, 3, 1).unwrap().to_vec(), vec![1, 2, 3]);
794    }
795
796    #[test]
797    fn slice_to_end() {
798        let r = "1-5".parse::<RangeExpr>().unwrap();
799        assert_eq!(r.slice(3, 5, 1).unwrap().to_vec(), vec![4, 5]);
800    }
801
802    #[test]
803    fn slice_with_step() {
804        let r = "1-10".parse::<RangeExpr>().unwrap();
805        assert_eq!(r.slice(0, 10, 2).unwrap().to_vec(), vec![1, 3, 5, 7, 9]);
806    }
807
808    #[test]
809    fn slice_reverse_returns_error() {
810        let r = "1-5".parse::<RangeExpr>().unwrap();
811        assert!(r.slice(4, -1, -1).is_err());
812    }
813
814    #[test]
815    fn slice_empty_result() {
816        let r = "1-5".parse::<RangeExpr>().unwrap();
817        assert!(r.slice(5, 10, 1).unwrap().is_empty());
818    }
819
820    #[test]
821    fn slice_stepped_source() {
822        // Source: 1,3,5,7,9 (step 2). Slice [1:4] → elements at indices 1,2,3 → 3,5,7
823        let r = "1-10:2".parse::<RangeExpr>().unwrap();
824        assert_eq!(r.slice(1, 4, 1).unwrap().to_vec(), vec![3, 5, 7]);
825    }
826
827    #[test]
828    fn slice_stepped_source_with_step() {
829        // Source: 1,3,5,7,9. Slice [::2] → indices 0,2,4 → 1,5,9
830        let r = "1-10:2".parse::<RangeExpr>().unwrap();
831        assert_eq!(r.slice(0, 5, 2).unwrap().to_vec(), vec![1, 5, 9]);
832    }
833
834    #[test]
835    fn slice_multi_range() {
836        // Source: 1,2,3,10,11,12. Slice [1:5] → indices 1,2,3,4 → 2,3,10,11
837        let r = "1-3,10-12".parse::<RangeExpr>().unwrap();
838        assert_eq!(r.slice(1, 5, 1).unwrap().to_vec(), vec![2, 3, 10, 11]);
839    }
840
841    #[test]
842    fn slice_multi_range_reverse_returns_error() {
843        let r = "1-3,10-12".parse::<RangeExpr>().unwrap();
844        assert!(r.slice(5, -1, -1).is_err());
845    }
846
847    #[test]
848    fn slice_large_range_no_materialization() {
849        // 1 billion elements — should complete instantly
850        let r = RangeExpr::from_ranges(vec![IntRange {
851            start: 1,
852            end: 1_000_000_000,
853            step: 1,
854        }])
855        .unwrap();
856        assert_eq!(r.slice(0, 3, 1).unwrap().to_vec(), vec![1, 2, 3]);
857    }
858
859    #[test]
860    fn slice_large_range_tail() {
861        let r = RangeExpr::from_ranges(vec![IntRange {
862            start: 1,
863            end: 1_000_000_000,
864            step: 1,
865        }])
866        .unwrap();
867        let len = r.len() as i64;
868        assert_eq!(
869            r.slice(len - 3, len, 1).unwrap().to_vec(),
870            vec![999_999_998, 999_999_999, 1_000_000_000]
871        );
872    }
873
874    #[test]
875    fn slice_large_range_with_step() {
876        let r = RangeExpr::from_ranges(vec![IntRange {
877            start: 1,
878            end: 1_000_000_000,
879            step: 1,
880        }])
881        .unwrap();
882        // Every 100 millionth element, first 3
883        assert_eq!(
884            r.slice(0, 1_000_000_000, 100_000_000).unwrap().to_vec(),
885            vec![
886                1,
887                100_000_001,
888                200_000_001,
889                300_000_001,
890                400_000_001,
891                500_000_001,
892                600_000_001,
893                700_000_001,
894                800_000_001,
895                900_000_001
896            ]
897        );
898    }
899
900    #[test]
901    fn slice_zero_step_error() {
902        let r = "1-5".parse::<RangeExpr>().unwrap();
903        assert!(r.slice(0, 5, 0).is_err());
904    }
905
906    #[test]
907    fn slice_negative_step_error() {
908        let r = "1-5".parse::<RangeExpr>().unwrap();
909        assert!(r.slice(4, -1, -1).is_err());
910    }
911
912    #[test]
913    fn slice_single_element() {
914        let r = "1-10".parse::<RangeExpr>().unwrap();
915        assert_eq!(r.slice(3, 4, 1).unwrap().to_vec(), vec![4]);
916    }
917
918    // ── Defensive caps (SEC-2026-4) ──
919
920    #[test]
921    fn reject_too_many_chunks_from_str() {
922        // MAX_RANGE_EXPR_CHUNKS + 1 single values: 0,1,2,...
923        let expr = (0..=MAX_RANGE_EXPR_CHUNKS as i64)
924            .map(|i| i.to_string())
925            .collect::<Vec<_>>()
926            .join(",");
927        let err = expr.parse::<RangeExpr>().unwrap_err().to_string();
928        assert!(err.contains("too many sub-ranges"), "got: {err}");
929    }
930
931    #[test]
932    fn accept_max_chunks_from_str() {
933        // Exactly MAX_RANGE_EXPR_CHUNKS non-contiguous single values:
934        // 0,2,4,...,(2*N-2). Stride of 2 prevents adjacent-range merging,
935        // so we end up with exactly N sub-ranges after from_ranges.
936        let expr = (0..MAX_RANGE_EXPR_CHUNKS as i64)
937            .map(|i| (i * 2).to_string())
938            .collect::<Vec<_>>()
939            .join(",");
940        let r = expr.parse::<RangeExpr>().unwrap();
941        assert_eq!(r.ranges().len(), MAX_RANGE_EXPR_CHUNKS);
942    }
943
944    #[test]
945    fn reject_too_many_chunks_from_ranges() {
946        let ranges: Vec<IntRange> = (0..=MAX_RANGE_EXPR_CHUNKS as i64)
947            .map(|i| IntRange::new(i * 2, i * 2, 1).unwrap())
948            .collect();
949        let err = RangeExpr::from_ranges(ranges).unwrap_err().to_string();
950        assert!(err.contains("too many sub-ranges"), "got: {err}");
951    }
952
953    #[test]
954    fn accept_single_huge_chunk() {
955        // A single chunk with a very large logical length is allowed —
956        // `RangeExpr` stores chunks symbolically, so the heap cost is O(1)
957        // regardless of `end - start`. Only chunk count is capped.
958        let expr = "1-100000000000";
959        let r = expr.parse::<RangeExpr>().unwrap();
960        assert_eq!(r.ranges().len(), 1);
961        assert_eq!(r.len(), 100_000_000_000);
962    }
963
964    #[test]
965    fn length_uses_saturating_sum() {
966        // `from_ranges` uses `saturating_add` when summing per-chunk lengths,
967        // so a multi-chunk range whose combined logical length would exceed
968        // `usize::MAX` cannot wrap to a small value that would corrupt
969        // `len()` / `is_empty()`. This test confirms the happy-path summation
970        // matches the expected total; the saturating behavior is exercised
971        // only in the arithmetic failure mode and is a defensive guard.
972        let ranges = vec![
973            IntRange::new(0, 1_000_000, 1).unwrap(),
974            IntRange::new(2_000_000, 3_000_000, 1).unwrap(),
975        ];
976        let r = RangeExpr::from_ranges(ranges).unwrap();
977        assert_eq!(r.ranges().len(), 2);
978        assert_eq!(r.len(), 1_000_001 + 1_000_001);
979    }
980
981    #[test]
982    fn chunk_cap_parse_does_not_hang() {
983        // A pathological input with 100,000 comma-separated values. The parser
984        // should reject it in well under a second without building the full
985        // vector. This guards against regressions that would remove the
986        // in-loop cap check.
987        let start = std::time::Instant::now();
988        let expr = (0..100_000i64)
989            .map(|i| i.to_string())
990            .collect::<Vec<_>>()
991            .join(",");
992        let err = expr.parse::<RangeExpr>().unwrap_err().to_string();
993        assert!(err.contains("too many sub-ranges"), "got: {err}");
994        // Generous budget (2 seconds) to avoid flakes on loaded CI machines.
995        assert!(
996            start.elapsed() < std::time::Duration::from_secs(2),
997            "parser took too long on 100k-chunk input: {:?}",
998            start.elapsed(),
999        );
1000    }
1001}