range_split/
str.rs

1//! Utilities for validating ranges on UTF-8 strings.
2
3use core::ops::{Bound, RangeBounds};
4
5/// Asserts that the given range is valid for the given string slice.
6///
7/// The first parameter shall be of a type implementing `AsRef<str>`.
8/// The second parameter shall be of a type implementing
9/// the standard library trait `RangeBounds<usize>`.
10///
11/// The range is valid if it fits within the slice and its bounds are
12/// on UTF-8 code point boundaries. If either of these checks fails,
13/// `panic!` is invoked with a description of the failure.
14///
15/// # Examples
16///
17/// ```
18/// # use range_split::assert_str_range;
19/// let s = "Hello";
20/// assert_str_range!(s, ..0);
21/// assert_str_range!(s, 5..);
22///
23/// let r = (..=2);
24/// assert_str_range!(s, r);
25/// let (head, tail) = s.as_bytes().split_at(r.end + 1);
26/// ```
27///
28/// ```should_panic
29/// # use range_split::assert_str_range;
30/// let s = "Привет".to_string();
31/// assert_str_range!(s, ..1); // fails due to splitting a UTF-8 sequence
32/// ```
33#[macro_export]
34macro_rules! assert_str_range {
35    ($s:expr, $r:expr) => {{
36        let s = &$s;
37        let r = &$r;
38        if !$crate::str::is_valid_range(s, r) {
39            $crate::str::range_fail(s, r)
40        }
41    }};
42}
43
44/// Checks that `range` is valid for splitting the string slice `s`.
45///
46/// The range is valid if it fits within the slice and its bounds are
47/// on UTF-8 code point boundaries.
48pub fn is_valid_range<S, R>(s: S, range: &R) -> bool
49where
50    S: AsRef<str>,
51    R: RangeBounds<usize>,
52{
53    let s = s.as_ref();
54    validate_start_bound(s, range.start_bound()).is_ok()
55        && validate_end_bound(s, range.end_bound()).is_ok()
56}
57
58enum InvalidBound {
59    OutOfBuffer,
60    NotCharBoundary,
61}
62
63#[inline]
64fn validate_start_bound(
65    s: &str,
66    bound: Bound<&usize>,
67) -> Result<(), InvalidBound> {
68    use Bound::*;
69
70    match bound {
71        Unbounded => Ok(()),
72        Included(index) => validate_index(s, *index),
73        Excluded(index) => validate_next_index(s, *index),
74    }
75}
76
77#[inline]
78fn validate_end_bound(
79    s: &str,
80    bound: Bound<&usize>,
81) -> Result<(), InvalidBound> {
82    use Bound::*;
83
84    match bound {
85        Unbounded => Ok(()),
86        Excluded(index) => validate_index(s, *index),
87        Included(index) => validate_next_index(s, *index),
88    }
89}
90
91#[inline]
92fn validate_index(s: &str, index: usize) -> Result<(), InvalidBound> {
93    use InvalidBound::*;
94
95    // .is_char_boundary() fails on OOB as well, but we check it as
96    // the fast path first and discern the failure cause later.
97    if s.is_char_boundary(index) {
98        Ok(())
99    } else if index > s.len() {
100        Err(OutOfBuffer)
101    } else {
102        Err(NotCharBoundary)
103    }
104}
105
106#[inline]
107fn validate_next_index(s: &str, index: usize) -> Result<(), InvalidBound> {
108    use InvalidBound::*;
109
110    // The check for OOB also rules out integer overflow in index + 1
111    if index >= s.len() {
112        Err(OutOfBuffer)
113    } else if s.is_char_boundary(index + 1) {
114        Ok(())
115    } else {
116        Err(NotCharBoundary)
117    }
118}
119
120#[doc(hidden)]
121#[cold]
122pub fn range_fail<S, R>(s: S, range: &R) -> !
123where
124    S: AsRef<str>,
125    R: RangeBounds<usize>,
126{
127    range_fail_internal(s.as_ref(), range.start_bound(), range.end_bound())
128}
129
130fn range_fail_internal(
131    s: &str,
132    start_bound: Bound<&usize>,
133    end_bound: Bound<&usize>,
134) -> ! {
135    use InvalidBound::*;
136
137    let start_validity = validate_start_bound(s, start_bound);
138    let end_validity = validate_end_bound(s, end_bound);
139    let r = (start_bound, end_bound);
140    match (start_validity, end_validity) {
141        (Err(OutOfBuffer), _) | (_, Err(OutOfBuffer)) => {
142            panic!("range {:?} is out of bounds", r)
143        }
144        (Err(NotCharBoundary), _) | (_, Err(NotCharBoundary)) => {
145            panic!("range {:?} does not split on a UTF-8 boundary", r)
146        }
147        (Ok(()), Ok(())) => unreachable!("there was no problem with the range"),
148    }
149}