string_iter/lib.rs
1#![no_std]
2//! An overly designed [`&str`] iterator made
3//! with zero-copy parsing in mind, with an emphasis on ergonomics.
4//!
5//! # Usage
6//!
7//! `StringIter` offers iteration and pattern matching methods
8//! as well as methods normally found in string types
9//! that would make sense for an iterator.
10//!
11//! The standard StringIter yields a char in both its [`char`] and [`&str`]
12//! representations, allowing easily storage in its [`&str`] or [`Cow<str>`](std::borrow::Cow) form.
13//!
14//! * Trimming
15//!
16//! ```
17//! # use string_iter::prelude::*;
18//! let mut iter = " !#$@!foo&* ".str_iter();
19//! iter.trim();
20//! assert_eq!(iter.as_str(), "!#$@!foo&*");
21//! iter.trim_start_by(|x: char| !x.is_alphabetic());
22//! assert_eq!(iter.as_str(), "foo&*");
23//! iter.trim_end_by(|x: char| !x.is_alphabetic());
24//! assert_eq!(iter.as_str(), "foo");
25//! ```
26//!
27//! * Peeking
28//!
29//! ```
30//! # use string_iter::prelude::*;
31//! let mut iter = "bar".str_iter();
32//! assert_eq!(iter.peek(), Some(('b', "b")));
33//! assert_eq!(iter.peek_back(), Some(('r', "r")));
34//! assert_eq!(iter.peekn(2), Ok("ba"));
35//! assert_eq!(iter.peekn_back(2), Ok("ar"));
36//! assert_eq!(iter.peekn(4), Err("bar"));
37//! assert_eq!(iter.peekn_back(4), Err("bar"));
38//! ```
39//! * Iterating
40//!
41//! ```
42//! # use string_iter::prelude::*;
43//! let chars = [('😀', "😀"), ('🙁', "🙁"), ('😡', "😡"), ('😱', "😱")];
44//! for (a, b) in "😀🙁😡😱".str_iter().zip(chars.into_iter()) {
45//! assert_eq!(a, b);
46//! }
47//! ```
48//!
49//! * Look-ahead
50//!
51//! ```
52//! # use string_iter::prelude::*;
53//! let mut iter = "蟹🦀a🚀𓄇ë".str_iter().look_ahead(2).strs();
54//! assert_eq!(iter.next(), Some("蟹🦀"));
55//! assert_eq!(iter.next(), Some("🦀a"));
56//! assert_eq!(iter.next(), Some("a🚀"));
57//! assert_eq!(iter.next(), Some("🚀𓄇"));
58//! assert_eq!(iter.next(), Some("𓄇ë"));
59//! assert_eq!(iter.next(), Some("ë"));
60//! assert_eq!(iter.next(), None);
61//! ```
62//!
63//! * Slice by pattern
64//! ```
65//! # use string_iter::prelude::*;
66//! let mut iter = "{{foo}bar}baz".str_iter();
67//! let mut count = 0;
68//! let s = iter.next_slice((|x| {
69//! match x {
70//! '{' => count += 1,
71//! '}' => count -= 1,
72//! _ => (),
73//! };
74//! count == 0
75//! }).sep_with(Sep::Yield));
76//! assert_eq!(s, Some("{{foo}bar}"));
77//! assert_eq!(iter.as_str(), "baz");
78//! ```
79//!
80//! * Splitting
81//!
82//! ```
83//! # use string_iter::prelude::*;
84//! let mut iter = "thisIsCamelCase"
85//! .str_iter()
86//! .into_substrs(|c: char| c.is_uppercase());
87//! assert_eq!(iter.next(), Some("this"));
88//! assert_eq!(iter.next(), Some("Is"));
89//! assert_eq!(iter.next(), Some("Camel"));
90//! assert_eq!(iter.next(), Some("Case"));
91//! assert_eq!(iter.next(), None);
92//! ```
93//!
94//! # Patterns
95//!
96//! We use [`Patterns`](Pattern) in [`trim`](StringIter::trim_by),
97//! [`slice`](StringIter::try_next_slice) and
98//! [`split`](StringIter::into_substrs).
99//!
100//! In [`trim`](StringIter::trim_by), the pattern matches until a false value is found.
101//!
102//! In [`slice`](StringIter::try_next_slice) and
103//! [`split`](StringIter::into_substrs), the pattern matches until a true value is found.
104//!
105//! See [`Sep`] and [`sep_with()`](SetSep) for dealing with the corner case.
106//!
107//! ## Supported Patterns
108//!
109//! * [`isize`]
110//!
111//! Matches once on the nth `char`.
112//!
113//! * `..isize`
114//!
115//! Matches the first `n` `char`s.
116//! This is useful with [`trim`](StringIter::trim_by).
117//!
118//! * [`char`]
119//!
120//! Matches a char.
121//!
122//! * [`&str`]
123//!
124//! Matching an `&str` by looking ahead.
125//!
126//! * `&[char]` or `[char;N]`
127//!
128//! Matches any char in the set.
129//!
130//! * `char..=char`
131//!
132//! Matches a char in range,
133//! we only support inclusive ranges to avoid errors.
134//!
135//! * `FnMut(char) -> FallibleBool`
136//!
137//! Matches any char that makes the function return true.
138//!
139//! [`FallibleBool`] can be [`bool`], [`Option<bool>`] or [`Result<bool, E: Debug>`]
140//!
141//! * `(FnMut(&str) -> FallibleBool).expecting(n)`
142//!
143//! Matches any [`&str`] that makes the function return true
144//! by looking ahead for `n` `char`s.
145//!
146//! * `(FnMut(char, &str) -> FallibleBool).expecting(n)`
147//!
148//! Matches any [`&str`] that makes the function return true
149//! by looking ahead for `n` `char`s.
150//!
151//! `char` is the first [`char`] in [`&str`]
152//!
153//! * [`Interval`](patterns::Interval) or [`interval!()`](`interval!`)
154//!
155//! Match repeatedly by an interval.
156//!
157//! * [`pat!()`](pat!)
158//!
159//! A macro that turns `match` patterns into [`Pattern`]s.
160//!
161//! * Custom implementations of [`Pattern`]
162//!
163//! You can write your own pattern types!
164//!
165//! # Examples
166//!
167//! Getting an ascii identifier from a string
168//! ```
169//! # use string_iter::prelude::*;
170//! let foo = r#" ferris123@crab.io "#;
171//! let mut iter = foo.str_iter();
172//! iter.trim_start();
173//! let mut quotes = 0;
174//! let slice = match iter.peek() {
175//! Some(('a'..='z'|'A'..='Z'|'_', _)) => {
176//! iter.next_slice(pat!(!'a'..='z'|'A'..='Z'|'0'..='9'|'_'))
177//! }
178//! _ => panic!("expected ident")
179//! };
180//! assert_eq!(slice, Some("ferris123"));
181//!
182//! // note @ is still in the iterator
183//! assert_eq!(iter.as_str(), "@crab.io ");
184//! ```
185//!
186//! Getting a string literal "foo" from a string:
187//! ```
188//! # use string_iter::prelude::*;
189//! let foo = r#" "foo" bar "#;
190//! let mut iter = foo.str_iter();
191//! iter.trim_start();
192//! let mut quotes = 0;
193//! let slice = iter.next_slice((|c| match c {
194//! '"' => {
195//! quotes += 1;
196//! quotes == 2
197//! }
198//! _ => false,
199//! }).sep_with(Sep::Yield));
200//! assert_eq!(slice, Some("\"foo\""));
201//! assert_eq!(iter.as_str(), " bar ");
202//! ```
203//!
204//! # Performance
205//!
206//! This crate is comparable in speed to [`str::chars()`].
207//!
208//! If operating on [`char`]s alone, [`str::chars()`] is faster.
209//!
210//! But [`StringIter`] can be faster than [`str::chars()`]
211//! if you need to convert the [`char`] back into UTF-8.
212//!
213//! # Safety
214//!
215//! This crate uses **a lot** of unsafe code to take advantage of the
216//! UTF-8 invarient and bypass some bounds checks and UTF-8 checks.
217//!
218//! In addition we do not guarantee memory safety if given invalid UTF-8 input.
219//!
220//! Please file an issue if you find any soundness problem.
221
222use core::{borrow::Borrow, fmt::Display};
223mod slice;
224mod merge;
225mod split;
226mod iter_fns;
227mod interval;
228mod pattern;
229mod iterators;
230mod string_ext;
231
232pub use merge::Merge;
233pub use string_ext::{StringExt, StringIndex};
234
235pub use pattern::{
236 Pattern,
237 Sep,
238 SetSep,
239 Never,
240 FallibleBool,
241 CharStrPredicate, StrPredicate
242};
243
244pub mod iter {
245 //! Misallenious iterators used in this crate.
246 //!
247 //! Mapped iterators share regular methods with [`StringIter`](crate::StringIter)
248 //! and are functionally identical.
249 pub use crate::iterators::*;
250 pub use crate::merge::MergeIter;
251 pub use crate::split::SplitIter;
252}
253pub mod patterns {
254 //! Misallenious patterns used in this crate.
255 pub use crate::pattern:: {
256 SizedCharStrPredicate,
257 SizedStrPredicate,
258 SepConfig,
259 };
260 pub use crate::interval::Interval;
261}
262
263
264pub mod prelude {
265 //! Convenience re-export of common members
266 //! ```
267 //! use string_iter::prelude::*;
268 //! ```
269 #[doc(no_inline)]
270 pub use crate::StringIterable;
271 #[doc(no_inline)]
272 pub use crate::string_ext::StringExt;
273 #[doc(no_inline)]
274 pub use crate::pattern::{Sep, SetSep, CharStrPredicate, StrPredicate};
275 #[doc(no_inline)]
276 pub use crate::merge::Merge;
277 pub use crate::interval;
278 pub use crate::pat;
279}
280
281/// A struct that can be iterated with a [`StringIter`]
282pub trait StringIterable {
283 /// Construct a new [`StringIter`]
284 fn str_iter<'t>(&'t self) -> StringIter<'t>;
285}
286
287impl<T> StringIterable for T where T: AsRef<str>{
288 fn str_iter<'t>(&'t self) -> StringIter<'t> {
289 StringIter { str: self.as_ref() }
290 }
291}
292
293/// A double ended, UTF-8 [`char`] based [`Iterator`] for [`&str`]s that
294/// supports iterating, looking ahead, trimming, pattern matching, splitting
295/// and other common string operations.
296///
297/// Also a drop in replacement for an [`AsRef<str>`] or a [`Borrow<str>`].
298#[repr(transparent)]
299#[derive(Debug, Clone)]
300pub struct StringIter<'t>{
301 str: &'t str,
302}
303
304impl<'t> StringIter<'t> {
305 /// Construct a new StringIter from a &str
306 pub const fn new(s: &'t str) -> Self {
307 StringIter {
308 str: s
309 }
310 }
311
312 /// Returns the length of the underlying [`str`] in bytes.
313 pub const fn len(&self) -> usize{
314 self.str.len()
315 }
316
317 /// Returns `true` if the underlying [`str`] has a length of zero bytes.
318 pub const fn is_empty(&self) -> bool{
319 self.str.is_empty()
320 }
321
322 /// Returns the underlying [`str`] of this [`StringIter`]
323 pub const fn as_str(&self) -> &'t str {
324 self.str
325 }
326
327 /// Returns the underlying `[u8]` of this [`StringIter`]
328 pub const fn as_bytes(&self) -> &'t[u8] {
329 self.str.as_bytes()
330 }
331
332 unsafe fn slice_front_ptr(&self, ptr: *const u8) -> &'t str{
333 let len = ptr as usize - self.str.as_ptr() as usize;
334 self.str.get_unchecked(..len)
335 }
336
337 unsafe fn slice_back_ptr(&self, ptr: *const u8) -> &'t str{
338 let len = ptr as usize - self.str.as_ptr() as usize;
339 self.str.get_unchecked(len..)
340 }
341
342 /// Returns true if the given [`&str`] matches the prefix of the underlying [`str`]
343 ///
344 /// Returns false if it does not.
345 pub fn startswith(&self, s: &str) -> bool{
346 self.str.starts_with(s)
347 }
348
349 /// Returns true if the given [`&str`] matches the suffix of the underlying [`str`]
350 ///
351 /// Returns false if it does not.
352 pub fn endswith(&self, s: &str) -> bool{
353 self.str.ends_with(s)
354 }
355
356 /// Removes leading and trailing whitespaces from this [`StringIter`]
357 pub fn trim(&mut self){
358 self.str = self.str.trim()
359 }
360
361 /// Removes leading whitespaces from this [`StringIter`]
362 pub fn trim_start(&mut self){
363 self.str = self.str.trim_start()
364 }
365
366 /// Removes trailing whitespaces from this [`StringIter`]
367 pub fn trim_end(&mut self){
368 self.str = self.str.trim_end()
369 }
370
371 /// Skip `n` leading [`char`]s from this [`StringIter`],
372 /// returns `true` if the string is empty afterwards.
373 pub fn skip_front(&mut self, n: usize) -> bool{
374 for _ in 0..n{
375 self.next();
376 }
377 self.is_empty()
378 }
379
380 /// Skip `n` trailing [`char`]s from this [`StringIter`],
381 /// returns `true` if the string is empty afterwards.
382 pub fn skip_back(&mut self, n: usize) -> bool{
383 for _ in 0..n{
384 self.next_back();
385 }
386 self.is_empty()
387 }
388}
389
390impl AsRef<str> for StringIter<'_> {
391 fn as_ref(&self) -> &str {
392 self.str
393 }
394}
395
396impl Borrow<str> for StringIter<'_> {
397 fn borrow(&self) -> &str {
398 self.str
399 }
400}
401
402impl<'t> From<&'t str> for StringIter<'t> {
403 fn from(value: &'t str) -> Self {
404 Self { str: value }
405 }
406}
407
408impl<'t> Into<&'t str> for StringIter<'t> {
409 fn into(self) -> &'t str {
410 self.str
411 }
412}
413
414impl<'t> Display for StringIter<'t> {
415 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
416 f.write_str(self.str)
417 }
418}
419
420impl PartialEq<str> for StringIter<'_> {
421 fn eq(&self, other: &str) -> bool {
422 self.str == other
423 }
424}
425
426impl PartialOrd<str> for StringIter<'_> {
427 fn partial_cmp(&self, other: &str) -> Option<core::cmp::Ordering> {
428 self.str.partial_cmp(other)
429 }
430}
431
432
433impl PartialEq<&str> for StringIter<'_> {
434 fn eq(&self, other: &&str) -> bool {
435 self.str == *other
436 }
437}
438
439impl PartialOrd<&str> for StringIter<'_> {
440 fn partial_cmp(&self, other: &&str) -> Option<core::cmp::Ordering> {
441 self.str.partial_cmp(other)
442 }
443}
444
445#[cfg(feature="std")]
446const _: () = {
447 extern crate alloc;
448 use alloc::boxed::Box;
449 use alloc::rc::Rc;
450 use alloc::string::String;
451 use alloc::borrow::Cow;
452 use alloc::sync::Arc;
453
454 impl<'t> Into<String> for StringIter<'t> {
455 fn into(self) -> String {
456 self.str.into()
457 }
458 }
459
460 impl<'t> Into<Box<str>> for StringIter<'t> {
461 fn into(self) -> Box<str> {
462 self.str.into()
463 }
464 }
465
466 impl<'t> Into<Rc<str>> for StringIter<'t> {
467 fn into(self) -> Rc<str> {
468 self.str.into()
469 }
470 }
471
472 impl<'t> Into<Arc<str>> for StringIter<'t> {
473 fn into(self) -> Arc<str> {
474 self.str.into()
475 }
476 }
477
478 impl<'t> Into<Cow<'t, str>> for StringIter<'t> {
479 fn into(self) -> Cow<'t, str> {
480 Cow::Borrowed(self.str)
481 }
482 }
483
484 impl<'t> From<&'t String> for StringIter<'t> {
485 fn from(s: &'t String) -> Self {
486 Self::new(s.as_ref())
487 }
488 }
489
490 impl<'t> From<&'t Box<str>> for StringIter<'t> {
491 fn from(s: &'t Box<str>) -> Self {
492 Self::new(s.as_ref())
493 }
494 }
495
496 impl<'t> From<&'t Rc<str>> for StringIter<'t> {
497 fn from(s: &'t Rc<str>) -> Self {
498 Self::new(s.as_ref())
499 }
500 }
501
502 impl<'t> From<&'t Arc<str>> for StringIter<'t> {
503 fn from(s: &'t Arc<str>) -> Self {
504 Self::new(s.as_ref())
505 }
506 }
507
508 impl<'a, 't: 'a> From<&'t Cow<'a, str>> for StringIter<'t> {
509 fn from(s: &'t Cow<'a, str>) -> Self {
510 Self::new(s.as_ref())
511 }
512 }
513
514 /// This conversion only works if the [`Cow`] is Borrowed
515 impl<'t> TryFrom<Cow<'t, str>> for StringIter<'t> {
516 type Error = ();
517 fn try_from(cow: Cow<'t, str>) -> Result<Self, ()> {
518 match cow {
519 Cow::Borrowed(s) => Ok(Self { str: s }),
520 Cow::Owned(_) => Err(()),
521 }
522 }
523 }
524};