string_iter/
lib.rs

1#![no_std]
2//! An overly designed [`&str`] iterator made
3//! with zero-copy parsing in mind, with an emphasis on ergonomics.
4//! 
5//! # Usage
6//! 
7//! `StringIter` offers iteration and pattern matching methods 
8//! as well as methods normally found in string types 
9//! that would make sense for an iterator.
10//! 
11//! The standard StringIter yields a char in both its [`char`] and [`&str`]
12//! representations, allowing easily storage in its [`&str`] or [`Cow<str>`](std::borrow::Cow) form.
13//! 
14//! * Trimming
15//! 
16//! ```
17//! # use string_iter::prelude::*;
18//! let mut iter = "  !#$@!foo&*  ".str_iter();
19//! iter.trim();
20//! assert_eq!(iter.as_str(), "!#$@!foo&*");
21//! iter.trim_start_by(|x: char| !x.is_alphabetic());
22//! assert_eq!(iter.as_str(), "foo&*");
23//! iter.trim_end_by(|x: char| !x.is_alphabetic());
24//! assert_eq!(iter.as_str(), "foo");
25//! ```
26//! 
27//! * Peeking
28//! 
29//! ```
30//! # use string_iter::prelude::*;
31//! let mut iter = "bar".str_iter();
32//! assert_eq!(iter.peek(), Some(('b', "b")));
33//! assert_eq!(iter.peek_back(), Some(('r', "r")));
34//! assert_eq!(iter.peekn(2), Ok("ba"));
35//! assert_eq!(iter.peekn_back(2), Ok("ar"));
36//! assert_eq!(iter.peekn(4), Err("bar"));
37//! assert_eq!(iter.peekn_back(4), Err("bar"));
38//! ```
39//! * Iterating
40//! 
41//! ```
42//! # use string_iter::prelude::*;
43//! let chars = [('😀', "😀"), ('🙁', "🙁"), ('😡', "😡"), ('😱', "😱")];
44//! for (a, b) in "😀🙁😡😱".str_iter().zip(chars.into_iter()) {
45//!     assert_eq!(a, b);
46//! }
47//! ```
48//! 
49//! * Look-ahead
50//! 
51//! ```
52//! # use string_iter::prelude::*;
53//! let mut iter = "蟹🦀a🚀𓄇ë".str_iter().look_ahead(2).strs();
54//! assert_eq!(iter.next(), Some("蟹🦀"));
55//! assert_eq!(iter.next(), Some("🦀a"));
56//! assert_eq!(iter.next(), Some("a🚀"));
57//! assert_eq!(iter.next(), Some("🚀𓄇"));
58//! assert_eq!(iter.next(), Some("𓄇ë"));
59//! assert_eq!(iter.next(), Some("ë"));
60//! assert_eq!(iter.next(), None);
61//! ```
62//! 
63//! * Slice by pattern
64//! ```
65//! # use string_iter::prelude::*;
66//! let mut iter = "{{foo}bar}baz".str_iter();
67//! let mut count = 0;
68//! let s = iter.next_slice((|x| {
69//!     match x {
70//!         '{' => count += 1,
71//!         '}' => count -= 1,
72//!         _ => (),
73//!     };
74//!     count == 0
75//! }).sep_with(Sep::Yield));
76//! assert_eq!(s, Some("{{foo}bar}"));
77//! assert_eq!(iter.as_str(), "baz");
78//! ```
79//! 
80//! * Splitting
81//! 
82//! ```
83//! # use string_iter::prelude::*;
84//! let mut iter = "thisIsCamelCase"
85//!     .str_iter()
86//!     .into_substrs(|c: char| c.is_uppercase());
87//! assert_eq!(iter.next(), Some("this"));
88//! assert_eq!(iter.next(), Some("Is"));
89//! assert_eq!(iter.next(), Some("Camel"));
90//! assert_eq!(iter.next(), Some("Case"));
91//! assert_eq!(iter.next(), None);
92//! ```
93//! 
94//! # Patterns
95//! 
96//! We use [`Patterns`](Pattern) in [`trim`](StringIter::trim_by), 
97//! [`slice`](StringIter::try_next_slice) and 
98//! [`split`](StringIter::into_substrs).
99//! 
100//! In [`trim`](StringIter::trim_by), the pattern matches until a false value is found.
101//! 
102//! In [`slice`](StringIter::try_next_slice) and 
103//! [`split`](StringIter::into_substrs), the pattern matches until a true value is found.
104//! 
105//! See [`Sep`] and [`sep_with()`](SetSep) for dealing with the corner case.
106//! 
107//! ## Supported Patterns
108//! 
109//! * [`isize`]
110//! 
111//! Matches once on the nth `char`.
112//! 
113//! * `..isize`
114//! 
115//! Matches the first `n` `char`s. 
116//! This is useful with [`trim`](StringIter::trim_by).
117//! 
118//! * [`char`]
119//! 
120//! Matches a char.
121//! 
122//! * [`&str`]
123//! 
124//! Matching an `&str` by looking ahead.
125//! 
126//! * `&[char]` or `[char;N]`
127//! 
128//! Matches any char in the set.
129//! 
130//! * `char..=char`
131//! 
132//! Matches a char in range, 
133//! we only support inclusive ranges to avoid errors.
134//! 
135//! * `FnMut(char) -> FallibleBool`
136//! 
137//! Matches any char that makes the function return true.
138//! 
139//! [`FallibleBool`] can be [`bool`], [`Option<bool>`] or [`Result<bool, E: Debug>`]
140//! 
141//! * `(FnMut(&str) -> FallibleBool).expecting(n)`
142//! 
143//! Matches any [`&str`] that makes the function return true
144//! by looking ahead for `n` `char`s.
145//! 
146//! * `(FnMut(char, &str) -> FallibleBool).expecting(n)`
147//! 
148//! Matches any [`&str`] that makes the function return true
149//! by looking ahead for `n` `char`s.
150//! 
151//! `char` is the first [`char`] in [`&str`]
152//! 
153//! * [`Interval`](patterns::Interval) or [`interval!()`](`interval!`)
154//! 
155//! Match repeatedly by an interval.
156//! 
157//! * [`pat!()`](pat!)
158//! 
159//! A macro that turns `match` patterns into [`Pattern`]s.
160//! 
161//! * Custom implementations of [`Pattern`]
162//! 
163//! You can write your own pattern types!
164//! 
165//! # Examples
166//! 
167//! Getting an ascii identifier from a string
168//! ```
169//! # use string_iter::prelude::*;
170//! let foo = r#"  ferris123@crab.io "#;
171//! let mut iter = foo.str_iter();
172//! iter.trim_start();
173//! let mut quotes = 0;
174//! let slice = match iter.peek() {
175//!     Some(('a'..='z'|'A'..='Z'|'_', _)) => {
176//!         iter.next_slice(pat!(!'a'..='z'|'A'..='Z'|'0'..='9'|'_'))
177//!     }
178//!     _ => panic!("expected ident")
179//! };
180//! assert_eq!(slice, Some("ferris123"));
181//! 
182//! // note @ is still in the iterator
183//! assert_eq!(iter.as_str(), "@crab.io ");
184//! ```
185//! 
186//! Getting a string literal "foo" from a string:
187//! ```
188//! # use string_iter::prelude::*;
189//! let foo = r#"    "foo"  bar "#;
190//! let mut iter = foo.str_iter();
191//! iter.trim_start();
192//! let mut quotes = 0;
193//! let slice = iter.next_slice((|c| match c {
194//!     '"' =>  {
195//!         quotes += 1;
196//!         quotes == 2
197//!     }
198//!     _ => false,
199//! }).sep_with(Sep::Yield));
200//! assert_eq!(slice, Some("\"foo\""));
201//! assert_eq!(iter.as_str(), "  bar ");
202//! ```
203//! 
204//! # Performance
205//! 
206//! This crate is comparable in speed to [`str::chars()`].
207//! 
208//! If operating on [`char`]s alone, [`str::chars()`] is faster.
209//! 
210//! But [`StringIter`] can be faster than [`str::chars()`]
211//! if you need to convert the [`char`] back into UTF-8.
212//! 
213//! # Safety
214//! 
215//! This crate uses **a lot** of unsafe code to take advantage of the
216//! UTF-8 invarient and bypass some bounds checks and UTF-8 checks.
217//! 
218//! In addition we do not guarantee memory safety if given invalid UTF-8 input.
219//! 
220//! Please file an issue if you find any soundness problem.
221
222use core::{borrow::Borrow, fmt::Display};
223mod slice;
224mod merge;
225mod split;
226mod iter_fns;
227mod interval;
228mod pattern;
229mod iterators;
230mod string_ext;
231
232pub use merge::Merge;
233pub use string_ext::{StringExt, StringIndex};
234
235pub use pattern::{
236    Pattern,
237    Sep, 
238    SetSep,
239    Never,
240    FallibleBool,
241    CharStrPredicate, StrPredicate
242};
243
244pub mod iter {
245    //! Misallenious iterators used in this crate.
246    //! 
247    //! Mapped iterators share regular methods with [`StringIter`](crate::StringIter)
248    //! and are functionally identical.
249    pub use crate::iterators::*;
250    pub use crate::merge::MergeIter;
251    pub use crate::split::SplitIter;
252}
253pub mod patterns {
254    //! Misallenious patterns used in this crate.
255    pub use crate::pattern:: {
256        SizedCharStrPredicate, 
257        SizedStrPredicate,
258        SepConfig,
259    };
260    pub use crate::interval::Interval;
261}
262
263
264pub mod prelude {
265    //! Convenience re-export of common members
266    //! ```
267    //! use string_iter::prelude::*;
268    //! ```
269    #[doc(no_inline)]
270    pub use crate::StringIterable;
271    #[doc(no_inline)]
272    pub use crate::string_ext::StringExt;
273    #[doc(no_inline)]
274    pub use crate::pattern::{Sep, SetSep, CharStrPredicate, StrPredicate};
275    #[doc(no_inline)]
276    pub use crate::merge::Merge;
277    pub use crate::interval;
278    pub use crate::pat;
279}
280
281/// A struct that can be iterated with a [`StringIter`]
282pub trait StringIterable {
283    /// Construct a new [`StringIter`]
284    fn str_iter<'t>(&'t self) -> StringIter<'t>;
285}
286
287impl<T> StringIterable for T where T: AsRef<str>{
288    fn str_iter<'t>(&'t self) -> StringIter<'t> {
289        StringIter { str: self.as_ref() }
290    }
291}
292
293/// A double ended, UTF-8 [`char`] based [`Iterator`] for [`&str`]s that 
294/// supports iterating, looking ahead, trimming, pattern matching, splitting
295/// and other common string operations.
296/// 
297/// Also a drop in replacement for an [`AsRef<str>`] or a [`Borrow<str>`].
298#[repr(transparent)]
299#[derive(Debug, Clone)]
300pub struct StringIter<'t>{
301    str: &'t str,
302}
303
304impl<'t> StringIter<'t> {
305    /// Construct a new StringIter from a &str
306    pub const fn new(s: &'t str) -> Self {
307        StringIter {
308            str: s
309        }
310    }
311
312    /// Returns the length of the underlying [`str`] in bytes.
313    pub const fn len(&self) -> usize{
314        self.str.len()
315    }
316
317    /// Returns `true` if the underlying [`str`] has a length of zero bytes.
318    pub const fn is_empty(&self) -> bool{
319        self.str.is_empty()
320    }
321
322    /// Returns the underlying [`str`] of this [`StringIter`]
323    pub const fn as_str(&self) -> &'t str {
324        self.str
325    }
326
327    /// Returns the underlying `[u8]` of this [`StringIter`]
328    pub const fn as_bytes(&self) -> &'t[u8] {
329        self.str.as_bytes()
330    }
331
332    unsafe fn slice_front_ptr(&self, ptr: *const u8) -> &'t str{
333        let len = ptr as usize - self.str.as_ptr() as usize;
334        self.str.get_unchecked(..len)
335    }
336
337    unsafe fn slice_back_ptr(&self, ptr: *const u8) -> &'t str{
338        let len = ptr as usize - self.str.as_ptr() as usize;
339        self.str.get_unchecked(len..)
340    }
341
342    /// Returns true if the given [`&str`] matches the prefix of the underlying [`str`]
343    ///
344    /// Returns false if it does not.
345    pub fn startswith(&self, s: &str) -> bool{
346        self.str.starts_with(s)
347    }
348
349    /// Returns true if the given [`&str`] matches the suffix of the underlying [`str`]
350    ///
351    /// Returns false if it does not.
352    pub fn endswith(&self, s: &str) -> bool{
353        self.str.ends_with(s)
354    }
355
356    /// Removes leading and trailing whitespaces from this [`StringIter`]
357    pub fn trim(&mut self){
358        self.str = self.str.trim()
359    }
360
361    /// Removes leading whitespaces from this [`StringIter`]
362    pub fn trim_start(&mut self){
363        self.str = self.str.trim_start()
364    }
365
366    /// Removes trailing whitespaces from this [`StringIter`]
367    pub fn trim_end(&mut self){
368        self.str = self.str.trim_end()
369    }
370    
371    /// Skip `n` leading [`char`]s from this [`StringIter`], 
372    /// returns `true` if the string is empty afterwards.
373    pub fn skip_front(&mut self, n: usize) -> bool{
374        for _ in 0..n{
375            self.next();
376        }
377        self.is_empty()
378    }
379
380    /// Skip `n` trailing [`char`]s from this [`StringIter`], 
381    /// returns `true` if the string is empty afterwards.
382    pub fn skip_back(&mut self, n: usize) -> bool{
383        for _ in 0..n{
384            self.next_back();
385        }
386        self.is_empty()
387    }
388}
389
390impl AsRef<str> for StringIter<'_> {
391    fn as_ref(&self) -> &str {
392        self.str
393    }
394}
395
396impl Borrow<str> for StringIter<'_> {
397    fn borrow(&self) -> &str {
398        self.str
399    }
400}
401
402impl<'t> From<&'t str> for StringIter<'t> {
403    fn from(value: &'t str) -> Self {
404        Self { str: value }
405    }
406}
407
408impl<'t> Into<&'t str> for StringIter<'t> {
409    fn into(self) -> &'t str {
410        self.str
411    }
412}
413
414impl<'t> Display for StringIter<'t> {
415    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
416        f.write_str(self.str)
417    }
418}
419
420impl PartialEq<str> for StringIter<'_> {
421    fn eq(&self, other: &str) -> bool {
422        self.str == other
423    }
424}
425
426impl PartialOrd<str> for StringIter<'_> {
427    fn partial_cmp(&self, other: &str) -> Option<core::cmp::Ordering> {
428        self.str.partial_cmp(other)
429    }
430}
431
432
433impl PartialEq<&str> for StringIter<'_> {
434    fn eq(&self, other: &&str) -> bool {
435        self.str == *other
436    }
437}
438
439impl PartialOrd<&str> for StringIter<'_> {
440    fn partial_cmp(&self, other: &&str) -> Option<core::cmp::Ordering> {
441        self.str.partial_cmp(other)
442    }
443}
444
445#[cfg(feature="std")]
446const _: () = {
447    extern crate alloc;
448    use alloc::boxed::Box;
449    use alloc::rc::Rc;
450    use alloc::string::String;
451    use alloc::borrow::Cow;
452    use alloc::sync::Arc;
453
454    impl<'t> Into<String> for StringIter<'t> {
455        fn into(self) -> String {
456            self.str.into()
457        }
458    }
459
460    impl<'t> Into<Box<str>> for StringIter<'t> {
461        fn into(self) -> Box<str> {
462            self.str.into()
463        }
464    }
465
466    impl<'t> Into<Rc<str>> for StringIter<'t> {
467        fn into(self) -> Rc<str> {
468            self.str.into()
469        }
470    }
471
472    impl<'t> Into<Arc<str>> for StringIter<'t> {
473        fn into(self) -> Arc<str> {
474            self.str.into()
475        }
476    }
477
478    impl<'t> Into<Cow<'t, str>> for StringIter<'t> {
479        fn into(self) -> Cow<'t, str> {
480            Cow::Borrowed(self.str)
481        }
482    }
483
484    impl<'t> From<&'t String> for StringIter<'t> {
485        fn from(s: &'t String) -> Self {
486            Self::new(s.as_ref())
487        }
488    }
489
490    impl<'t> From<&'t Box<str>> for StringIter<'t> {
491        fn from(s: &'t Box<str>) -> Self {
492            Self::new(s.as_ref())
493        }
494    }
495
496    impl<'t> From<&'t Rc<str>> for StringIter<'t> {
497        fn from(s: &'t Rc<str>) -> Self {
498            Self::new(s.as_ref())
499        }
500    }
501
502    impl<'t> From<&'t Arc<str>> for StringIter<'t> {
503        fn from(s: &'t Arc<str>) -> Self {
504            Self::new(s.as_ref())
505        }
506    }
507
508    impl<'a, 't: 'a> From<&'t Cow<'a, str>> for StringIter<'t> {
509        fn from(s: &'t Cow<'a, str>) -> Self {
510            Self::new(s.as_ref())
511        }
512    }
513
514    /// This conversion only works if the [`Cow`] is Borrowed
515    impl<'t> TryFrom<Cow<'t, str>> for StringIter<'t> {
516        type Error = ();
517        fn try_from(cow: Cow<'t, str>) -> Result<Self, ()> {
518            match cow {
519                Cow::Borrowed(s) => Ok(Self { str: s }),
520                Cow::Owned(_) => Err(()),
521            }
522        }
523    }
524};