strtools/
lib.rs

1//! This crate provides the [`StrTools`] trait which exposes a variety of helper functions for
2//! handling strings for use cases like handling user input.
3//!
4//! # Examples
5//! ```
6//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
7//! use strtools::StrTools;
8//!
9//! // split a string by some separator but ignore escaped ones
10//! let parts: Vec<_> = r"this string\ is split by\ spaces and commas, unless they are\ escaped"
11//!     .split_non_escaped_sanitize('\\', [' ', ','])?
12//!     .collect();
13//!
14//! assert_eq!(
15//!     parts,
16//!     [
17//!         "this",
18//!         "string is",
19//!         "split",
20//!         "by spaces",
21//!         "and",
22//!         "commas",
23//!         "",
24//!         "unless",
25//!         "they",
26//!         "are escaped"
27//!     ]
28//! );
29//! # Ok(())
30//! # }
31//! ```
32//! ```
33//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
34//! use strtools::StrTools;
35//!
36//! let parts: Vec<_> = r"\.\/.*s(\d\d)e(\d\d[a-d])/S$1E$2/gu"
37//!     .split_non_escaped_sanitize('\\', '/')?
38//!     .collect();
39//!
40//! // parsing user input regex rules like `<rule>/<replace>/<flags>`
41//! // the rule contained an escaped separator but we don't want to
42//! // actually escape it for the regex engine
43//! assert_eq!(parts, [r"\./.*s(\d\d)e(\d\d[a-d])", "S$1E$2", "gu"]);
44//! # Ok(())
45//! # }
46//! ```
47// keep the nightly features set small in hopes that all used features are stabilized by the time
48// this crate will stabilize
49#![feature(
50    associated_type_defaults,
51    cow_is_borrowed,
52    // https://github.com/rust-lang/rust/issues/57349
53    // this should be fine, the only listed regression is very niche use case, but this would block
54    // stabilization
55    const_mut_refs,
56    decl_macro,
57    is_sorted,
58    let_chains
59)]
60// check for missing documentation
61#![warn(
62    missing_docs,
63    clippy::missing_panics_doc,
64    clippy::missing_errors_doc,
65    clippy::missing_safety_doc
66)]
67// reduce unsafe scopes to their minimum
68#![deny(unsafe_op_in_unsafe_fn)]
69
70use parse::{FromStrBack, FromStrFront};
71use util::Sorted;
72
73pub mod escape;
74pub mod find;
75pub mod parse;
76pub mod split;
77pub mod util;
78
79/// The main trait of this crate, providing various extension methods for [`str`].
80/// See the individual function documentation for more info. **The methods on this trait are subject
81/// to change during the development of the crates core functionality.**
82pub trait StrTools: util::sealed::Sealed {
83    /// Behaves similar to [`str::split`] but generic of the the amount of indices.
84    ///
85    /// # Panics
86    /// Panics if the last index is out of bounds:
87    /// `indices.last() > Some(input.len)`
88    ///
89    /// # Examples
90    /// ```
91    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
92    /// use strtools::StrTools;
93    ///
94    /// let value = "0123456789ab";
95    /// let ([first, second], third) = value.split_n_times(&[4, 8].try_into()?);
96    ///
97    /// assert_eq!(first, "0123");
98    /// assert_eq!(second, "4567");
99    /// assert_eq!(third, "89ab");
100    /// # Ok(())
101    /// # }
102    /// ```
103    fn split_n_times<const N: usize>(&self, indices: &Sorted<usize, N>) -> ([&str; N], &str);
104
105    /// Splits a [`str`] by the given delimiters unless they are preceded by an escape.
106    /// Escapes before significant chars are removed, significant chars are the delimiters and the
107    /// escape itself. Trailing escapes are ignored as if followed by a non-significant char.
108    /// `delims` single char or an array of chars, which will be sorted, see the
109    /// [free version][free] of this function for more control over delimiter sorting.
110    ///
111    /// # Errors
112    /// Returns an error if:
113    /// - `esc == delim`
114    ///
115    /// # Complexity
116    /// This algorithm requires `O(n * max(log m, 1))` time where `n` is the length of the input
117    /// string and `m` is the length of the delimiters.
118    ///
119    /// # Allocation
120    /// If no escapes are encountered in a part, no allocations are done and the part is borrowed,
121    /// otherwise a [`String`] and all but the escape chars before delimiters are copied over.
122    ///
123    /// # Examples
124    /// ```
125    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
126    /// use strtools::StrTools;
127    ///
128    /// let value = r"Pa\rt0:Part1:Part2\:StillPart2";
129    /// let parts: Vec<_> = value.split_non_escaped_sanitize('\\', ':')?.collect();
130    ///
131    /// // notice that the escape char was removed in Part2 but not in Part1 as it's just used as
132    /// // an indicator for escaping the delimiters or escapes themselves
133    /// assert_eq!(parts, [r"Pa\rt0", "Part1", "Part2:StillPart2"]);
134    /// # Ok(())
135    /// # }
136    /// ```
137    ///
138    /// [free]: split::non_escaped_sanitize
139    fn split_non_escaped_sanitize<D: Into<Sorted<char, N>>, const N: usize>(
140        &self,
141        esc: char,
142        delims: D,
143    ) -> Result<split::NonEscapedSanitize<'_, N>, split::NonEscapedError>;
144
145    /// Splits a [`str`] by the given delimiters unless they are preceded by an escape.
146    /// Escapes before significant chars are removed, significant chars are the delimiters and the
147    /// escape itself. Trailing escapes are ignored as if followed by a non-significant char.
148    /// `delims` single char or an array of chars, which will be sorted, see the
149    /// [free version][free] of this function for more control over delimiter sorting.
150    ///
151    /// # Errors
152    /// Returns an error if:
153    /// - `esc == delim`
154    ///
155    /// # Complexity
156    /// This algorithm requires `O(n * max(log m, 1))` time where `n` is the length of the input
157    /// string and `m` is the length of the delimiters.
158    ///
159    /// # Allocation
160    /// No allocations are done.
161    ///
162    /// # Examples
163    /// ```
164    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
165    /// use strtools::StrTools;
166    ///
167    /// let value = r"Pa\rt0:Part1:Part2\:StillPart2";
168    /// let parts: Vec<_> = value.split_non_escaped('\\', ':')?.collect();
169    ///
170    /// // no sanitization is done here the separators are simply ignored
171    /// assert_eq!(parts, [r"Pa\rt0", "Part1", r"Part2\:StillPart2"]);
172    /// # Ok(())
173    /// # }
174    /// ```
175    ///
176    /// [free]: split::non_escaped
177    fn split_non_escaped<D: Into<Sorted<char, N>>, const N: usize>(
178        &self,
179        esc: char,
180        delims: D,
181    ) -> Result<split::NonEscaped<'_, N>, split::NonEscapedError>;
182
183    /// Attempts to parse `T` from the beginning of the [`str`], returns the rest of the `input` and
184    /// `T` if parsing succeeded.
185    ///
186    /// # Errors
187    /// Returns an error if:
188    /// - the start of `input` contain any valid representation of `Self`
189    /// - `input` did not contain a complete representation of `Self`
190    ///
191    /// # Examples
192    /// ```
193    /// use strtools::StrTools;
194    ///
195    /// let result = "-128 Look mom, no error!".parse_front::<i8>();
196    /// assert_eq!(result, Ok((-128, " Look mom, no error!")));
197    /// ```
198    fn parse_front<T: FromStrFront>(&self) -> Result<(T, &str), T::Error>;
199
200    /// Attempts to parse `T` from the end of the [`str`], returns the rest of the `input` and T` if
201    /// parsing succeeded.
202    ///
203    /// # Errors
204    /// Returns an error if:
205    /// - the start of `input` contain any valid representation of `Self`
206    /// - `input` did not contain a complete representation of `Self`
207    ///
208    /// # Examples
209    /// ```
210    /// use strtools::StrTools;
211    ///
212    /// let result = "Look mom, no error! -128".parse_back::<i8>();
213    /// assert_eq!(result, Ok((-128, "Look mom, no error! ")));
214    /// ```
215    fn parse_back<T: FromStrBack>(&self) -> Result<(T, &str), T::Error>;
216}
217
218impl StrTools for str {
219    fn split_n_times<const N: usize>(&self, indices: &Sorted<usize, N>) -> ([&str; N], &str) {
220        split::n_times(self, indices)
221    }
222
223    fn split_non_escaped_sanitize<D: Into<Sorted<char, N>>, const N: usize>(
224        &self,
225        esc: char,
226        delims: D,
227    ) -> Result<split::NonEscapedSanitize<'_, N>, split::NonEscapedError> {
228        split::non_escaped_sanitize(self, esc, delims.into())
229    }
230
231    fn split_non_escaped<D: Into<Sorted<char, N>>, const N: usize>(
232        &self,
233        esc: char,
234        delims: D,
235    ) -> Result<split::NonEscaped<'_, N>, split::NonEscapedError> {
236        split::non_escaped(self, esc, delims.into())
237    }
238
239    fn parse_front<T: FromStrFront>(&self) -> Result<(T, &str), T::Error> {
240        T::from_str_front(self)
241    }
242
243    fn parse_back<T: FromStrBack>(&self) -> Result<(T, &str), T::Error> {
244        T::from_str_back(self)
245    }
246}