stry_common/utils/fenn/slice.rs
1//! Types allowing for the 'lazy' slicing of `&str`s while keeping a single lifetime.
2
3use std::{
4 fmt::{Display, Error, Formatter},
5 ops::{Range, RangeFrom, RangeFull, RangeInclusive, RangeTo, RangeToInclusive},
6};
7
8/// A 'lazy' [`str`] slice using [`Range`]s.
9#[derive(Debug, Hash, PartialEq, Eq)]
10pub struct Slice<'s> {
11 source: &'s str,
12 range: Range<usize>,
13}
14
15impl<'s> Slice<'s> {
16 pub const fn new(source: &'s str) -> Slice<'s> {
17 Slice {
18 range: 0..source.len(),
19 source,
20 }
21 }
22
23 /// Returns the length of `self`.
24 ///
25 /// This length is in bytes, not [`char`]s or graphemes. In other words,
26 /// it may not be what a human considers the length of the string.
27 ///
28 /// [`char`]: prim@char
29 ///
30 /// # Examples
31 ///
32 /// Basic usage:
33 ///
34 /// ```
35 /// # use lazy_slice::Slice;
36 /// let len = Slice::new("foo").len();
37 /// assert_eq!(3, len);
38 ///
39 /// assert_eq!(Slice::new("ƒoo").len(), 4); // fancy f!
40 /// # // assert_eq!(Slice::new("ƒoo").chars().count(), 3);
41 /// ```
42 pub const fn len(&self) -> usize {
43 self.range.end - self.range.start
44 }
45
46 /// Returns `true` if `self` has a length of zero bytes.
47 ///
48 /// # Examples
49 ///
50 /// Basic usage:
51 ///
52 /// ```
53 /// # use lazy_slice::Slice;
54 /// let s = Slice::new("");
55 /// assert!(s.is_empty());
56 ///
57 /// let s = Slice::new("not empty");
58 /// assert!(!s.is_empty());
59 /// ```
60 pub const fn is_empty(&self) -> bool {
61 self.range.start == self.range.end
62 }
63
64 //
65
66 /// Returns `true` if the given pattern matches a prefix of this
67 /// string slice.
68 ///
69 /// Returns `false` if it does not.
70 ///
71 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
72 /// function or closure that determines if a character matches.
73 ///
74 /// [`char`]: prim@char
75 /// [pattern]: std::str::pattern::Pattern
76 ///
77 /// # Examples
78 ///
79 /// Basic usage:
80 ///
81 /// ```
82 /// # use lazy_slice::Slice;
83 /// let bananas = Slice::new("bananas");
84 ///
85 /// assert!(bananas.starts_with("bana"));
86 /// assert!(!bananas.starts_with("nana"));
87 /// ```
88 pub fn starts_with<'r, P, F>(&self, pat: P) -> bool
89 where
90 P: Into<Pattern<'r, F>>,
91 F: FnMut(char) -> bool,
92 {
93 let pat: Pattern<'r, F> = pat.into();
94
95 let slice = &self.source[self.range.start..self.range.end];
96
97 match pat {
98 Pattern::Char(pat) => slice.starts_with(pat),
99 Pattern::CharArrayRef(pat) => slice.starts_with(pat),
100 Pattern::Function(pat) => slice.starts_with(pat),
101 Pattern::Str(pat) => slice.starts_with(pat),
102 Pattern::StrRef(pat) => slice.starts_with(pat),
103 Pattern::StringRef(pat) => slice.starts_with(pat),
104 }
105 }
106
107 /// Returns `true` if the given pattern matches a suffix of this
108 /// string slice.
109 ///
110 /// Returns `false` if it does not.
111 ///
112 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
113 /// function or closure that determines if a character matches.
114 ///
115 /// [`char`]: prim@char
116 /// [pattern]: std::str::pattern::Pattern
117 ///
118 /// # Examples
119 ///
120 /// Basic usage:
121 ///
122 /// ```
123 /// # use lazy_slice::Slice;
124 /// let bananas = Slice::new("bananas");
125 ///
126 /// assert!(bananas.ends_with("anas"));
127 /// assert!(!bananas.ends_with("nana"));
128 /// ```
129 pub fn ends_with<'r, P, F>(&self, pat: P) -> bool
130 where
131 P: Into<Pattern<'r, F>>,
132 F: FnMut(char) -> bool,
133 {
134 let pat: Pattern<'r, F> = pat.into();
135
136 let slice = &self.source[self.range.start..self.range.end];
137
138 match pat {
139 Pattern::Char(pat) => slice.ends_with(pat),
140 Pattern::CharArrayRef(pat) => slice.ends_with(pat),
141 Pattern::Function(pat) => slice.ends_with(pat),
142 Pattern::Str(pat) => slice.ends_with(pat),
143 Pattern::StrRef(pat) => slice.ends_with(pat),
144 Pattern::StringRef(pat) => slice.ends_with(pat),
145 }
146 }
147
148 /// Returns `true` if the given pattern matches a sub-slice of
149 /// this string slice.
150 ///
151 /// Returns `false` if it does not.
152 ///
153 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
154 /// function or closure that determines if a character matches.
155 ///
156 /// [`char`]: prim@char
157 /// [pattern]: std::str::pattern::Pattern
158 ///
159 /// # Examples
160 ///
161 /// Basic usage:
162 ///
163 /// ```
164 /// # use lazy_slice::Slice;
165 /// let bananas = Slice::new("bananas");
166 ///
167 /// assert!(bananas.contains("nana"));
168 /// assert!(!bananas.contains("apples"));
169 /// ```
170 pub fn contains<'r, P, F>(&self, pat: P) -> bool
171 where
172 P: Into<Pattern<'r, F>>,
173 F: FnMut(char) -> bool,
174 {
175 let pat: Pattern<'r, F> = pat.into();
176
177 let slice = &self.source[self.range.start..self.range.end];
178
179 match pat {
180 Pattern::Char(pat) => slice.contains(pat),
181 Pattern::CharArrayRef(pat) => slice.contains(pat),
182 Pattern::Function(pat) => slice.contains(pat),
183 Pattern::Str(pat) => slice.contains(pat),
184 Pattern::StrRef(pat) => slice.contains(pat),
185 Pattern::StringRef(pat) => slice.contains(pat),
186 }
187 }
188
189 //
190
191 // pub fn lines(&self) -> Lines<'s> {
192 // self.split('\n');
193
194 // todo!()
195 // }
196
197 //
198
199 // pub fn split<'r, P, F>(&self, pat: P)
200 // where
201 // P: Into<Pattern<'r, F>>,
202 // F: FnMut(char) -> bool,
203 // {
204 // }
205
206 //
207
208 /// Returns a string slice with leading and trailing whitespace removed.
209 ///
210 /// 'Whitespace' is defined according to the terms of the Unicode Derived
211 /// Core Property `White_Space`.
212 ///
213 /// # Examples
214 ///
215 /// Basic usage:
216 ///
217 /// ```
218 /// # use lazy_slice::Slice;
219 /// let s = Slice::new(" Hello\tworld\t");
220 ///
221 /// assert_eq!("Hello\tworld", s.trim().slice());
222 /// ```
223 pub fn trim(&self) -> Slice<'s> {
224 self.trim_start().trim_end()
225 }
226
227 /// Returns a string slice with leading whitespace removed.
228 ///
229 /// 'Whitespace' is defined according to the terms of the Unicode Derived
230 /// Core Property `White_Space`.
231 ///
232 /// # Text directionality
233 ///
234 /// A string is a sequence of bytes. `start` in this context means the first
235 /// position of that byte string; for a left-to-right language like English or
236 /// Russian, this will be left side, and for right-to-left languages like
237 /// Arabic or Hebrew, this will be the right side.
238 ///
239 /// # Examples
240 ///
241 /// Basic usage:
242 ///
243 /// ```
244 /// # use lazy_slice::Slice;
245 /// let s = Slice::new(" Hello\tworld\t");
246 /// assert_eq!("Hello\tworld\t", s.trim_start().slice());
247 /// ```
248 ///
249 /// Directionality:
250 ///
251 /// ```
252 /// # use lazy_slice::Slice;
253 /// let s = Slice::new(" English ");
254 /// assert!(Some('E') == s.trim_start().slice().chars().next());
255 ///
256 /// let s = Slice::new(" עברית ");
257 /// assert!(Some('ע') == s.trim_start().slice().chars().next());
258 /// ```
259 pub fn trim_start(&self) -> Slice<'s> {
260 let slice = &self.source[self.range.start..self.range.end];
261
262 if !slice.starts_with(|c: char| c.is_whitespace()) {
263 // return early if no whitespace
264 return Slice {
265 source: self.source,
266 range: self.range.start..self.range.end,
267 };
268 }
269
270 let mut up_to = 0;
271
272 for (i, c) in slice.char_indices() {
273 if !c.is_whitespace() {
274 break;
275 }
276
277 up_to = i;
278 }
279
280 // last index points to the start of the last whitespace
281 // we need to remove it
282 up_to += 1;
283
284 Slice {
285 source: self.source,
286 range: (self.range.start + up_to)..self.range.end,
287 }
288 }
289
290 /// Returns a string slice with trailing whitespace removed.
291 ///
292 /// 'Whitespace' is defined according to the terms of the Unicode Derived
293 /// Core Property `White_Space`.
294 ///
295 /// # Text directionality
296 ///
297 /// A string is a sequence of bytes. `end` in this context means the last
298 /// position of that byte string; for a left-to-right language like English or
299 /// Russian, this will be right side, and for right-to-left languages like
300 /// Arabic or Hebrew, this will be the left side.
301 ///
302 /// # Examples
303 ///
304 /// Basic usage:
305 ///
306 /// ```
307 /// # use lazy_slice::Slice;
308 /// let s = Slice::new(" Hello\tworld\t");
309 /// assert_eq!(" Hello\tworld", s.trim_end().slice());
310 /// ```
311 ///
312 /// Directionality:
313 ///
314 /// ```
315 /// # use lazy_slice::Slice;
316 /// let s = Slice::new(" English ");
317 /// assert!(Some('h') == s.trim_end().slice().chars().rev().next());
318 ///
319 /// let s = Slice::new(" עברית ");
320 /// assert!(Some('ת') == s.trim_end().slice().chars().rev().next());
321 /// ```
322 pub fn trim_end(&self) -> Slice<'s> {
323 let slice = &self.source[self.range.start..self.range.end];
324
325 if !slice.ends_with(|c: char| c.is_whitespace()) {
326 // return early if no whitespace
327 return Slice {
328 source: self.source,
329 range: self.range.start..self.range.end,
330 };
331 }
332
333 let mut down_to = 0;
334
335 for (i, c) in slice.char_indices().rev() {
336 if !c.is_whitespace() {
337 break;
338 }
339
340 down_to = i;
341 }
342
343 if down_to == 0 {
344 // there were no whitespace
345 down_to = self.range.end;
346 }
347
348 Slice {
349 source: self.source,
350 range: self.range.start..(down_to + self.range.start),
351 }
352 }
353
354 //
355
356 /// It isn't possible to return a owned [`Slice`] from a [`Index`], so you have to use this function.
357 ///
358 /// [`Index`]: std::ops::Index
359 pub fn index<R>(&self, range: R) -> Slice<'s>
360 where
361 R: Into<Ranges>,
362 {
363 Slice {
364 source: self.source,
365 range: Self::normalize_ranges((self.range.start)..(self.range.end), range.into()),
366 }
367 }
368
369 #[inline]
370 const fn normalize_ranges(base: Range<usize>, range: Ranges) -> Range<usize> {
371 match range {
372 // ..
373 Ranges::RangeFull(_) => (base.start)..(base.end),
374 // <num>..<num>
375 Ranges::Range(range) => Self::convert_range((base.start)..(base.end), range),
376 // <num>..=<num>
377 Ranges::RangeInclusive(range) => Self::convert_range(
378 (base.start)..(base.end),
379 (*range.start())..((*range.end()) + 1),
380 ),
381 // ..<num>
382 Ranges::RangeTo(range) => Self::convert_range_to((base.start)..(base.end), range),
383 // ..=<num>
384 Ranges::RangeToInclusive(range) => {
385 Self::convert_range_to((base.start)..(base.end), ..(range.end + 1))
386 }
387 // <num>..
388 Ranges::RangeFrom(range) => (base.start + range.start)..(base.end),
389 }
390 }
391
392 #[inline]
393 const fn convert_range(base: Range<usize>, other: Range<usize>) -> Range<usize> {
394 (base.start + other.start)..(if other.end == 0 {
395 base.end
396 } else {
397 base.start + other.end
398 })
399 }
400
401 #[inline]
402 const fn convert_range_to(base: Range<usize>, other: RangeTo<usize>) -> Range<usize> {
403 (base.start)..(if other.end == 0 {
404 base.end
405 } else {
406 base.start + other.end
407 })
408 }
409
410 //
411
412 /// Consume and 'run' the slice, returning the given range of the source [`str`].
413 pub fn slice(mut self) -> &'s str {
414 self.source = &self.source[self.range.start..self.range.end];
415
416 self.source
417 }
418}
419
420impl<'s> Clone for Slice<'s> {
421 fn clone(&self) -> Self {
422 Slice {
423 source: self.source,
424 range: self.range.start..self.range.end,
425 }
426 }
427}
428
429impl<'s> Display for Slice<'s> {
430 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
431 write!(f, "{}", &self.source[self.range.start..self.range.end])
432 }
433}
434
435// pub struct Split<'s> {
436// source: &'s str,
437// }
438
439// impl<'s> Iterator for Split<'s> {
440// type Item = Slice<'s>;
441
442// fn next(&mut self) -> Option<Self::Item> {
443// todo!()
444// }
445// }
446
447// pub struct Lines<'s> {
448// source: &'s str,
449// }
450
451// impl<'s> Iterator for Lines<'s> {
452// type Item = Slice<'s>;
453
454// fn next(&mut self) -> Option<Self::Item> {
455// todo!()
456// }
457// }
458
459pub enum Ranges {
460 Range(Range<usize>),
461 RangeFrom(RangeFrom<usize>),
462 RangeFull(RangeFull),
463 RangeInclusive(RangeInclusive<usize>),
464 RangeTo(RangeTo<usize>),
465 RangeToInclusive(RangeToInclusive<usize>),
466}
467
468impl From<Range<usize>> for Ranges {
469 fn from(range: Range<usize>) -> Ranges {
470 Ranges::Range(range)
471 }
472}
473
474impl From<RangeFrom<usize>> for Ranges {
475 fn from(range: RangeFrom<usize>) -> Ranges {
476 Ranges::RangeFrom(range)
477 }
478}
479
480impl From<RangeFull> for Ranges {
481 fn from(range: RangeFull) -> Ranges {
482 Ranges::RangeFull(range)
483 }
484}
485
486impl From<RangeInclusive<usize>> for Ranges {
487 fn from(range: RangeInclusive<usize>) -> Ranges {
488 Ranges::RangeInclusive(range)
489 }
490}
491
492impl From<RangeTo<usize>> for Ranges {
493 fn from(range: RangeTo<usize>) -> Ranges {
494 Ranges::RangeTo(range)
495 }
496}
497
498impl From<RangeToInclusive<usize>> for Ranges {
499 fn from(range: RangeToInclusive<usize>) -> Ranges {
500 Ranges::RangeToInclusive(range)
501 }
502}
503
504/// A horrible wrapper around the unstable [Pattern API](https://github.com/rust-lang/rust/issues/56345).
505pub enum Pattern<'r, F>
506where
507 F: FnMut(char) -> bool,
508{
509 Char(char),
510 CharArrayRef(&'r [char]),
511 Function(F),
512 Str(&'r str),
513 StrRef(&'r &'r str),
514 StringRef(&'r String),
515}
516
517impl From<char> for Pattern<'_, fn(char) -> bool> {
518 fn from(pat: char) -> Self {
519 Pattern::Char(pat)
520 }
521}
522
523impl<'r> From<&'r [char]> for Pattern<'r, fn(char) -> bool> {
524 fn from(pat: &'r [char]) -> Self {
525 Pattern::CharArrayRef(pat)
526 }
527}
528
529impl<F> From<F> for Pattern<'_, F>
530where
531 F: FnMut(char) -> bool,
532{
533 fn from(pat: F) -> Self {
534 Pattern::Function(pat)
535 }
536}
537
538impl<'r> From<&'r str> for Pattern<'r, fn(char) -> bool> {
539 fn from(pat: &'r str) -> Self {
540 Pattern::Str(pat)
541 }
542}
543
544impl<'r> From<&'r &'r str> for Pattern<'r, fn(char) -> bool> {
545 fn from(pat: &'r &'r str) -> Self {
546 Pattern::StrRef(pat)
547 }
548}
549
550impl<'r> From<&'r String> for Pattern<'r, fn(char) -> bool> {
551 fn from(pat: &'r String) -> Self {
552 Pattern::StringRef(pat)
553 }
554}
555
556#[cfg(test)]
557mod test {
558 use super::*;
559
560 #[test]
561 fn test_starts_with() {
562 assert_eq!(
563 true,
564 Slice::new("Hello World!").starts_with("Hello"),
565 "`starts_with` is true",
566 );
567 }
568
569 #[test]
570 fn test_trim() {
571 assert_eq!(
572 "Hello World!",
573 Slice::new("Hello World!").trim().slice(),
574 "`trim` without any whitespace",
575 );
576 assert_eq!(
577 "Hello World!",
578 Slice::new(" Hello World! ").trim().slice(),
579 "`trim` with whitespace",
580 );
581 }
582
583 #[test]
584 fn test_trim_start() {
585 assert_eq!(
586 "Hello World!",
587 Slice::new("Hello World!").trim_start().slice(),
588 "`trim_start` without any whitespace",
589 );
590 assert_eq!(
591 "Hello World!",
592 Slice::new(" Hello World!").trim_start().slice(),
593 "`trim_start` with whitespace",
594 );
595 }
596
597 #[test]
598 fn test_trim_end() {
599 assert_eq!(
600 "Hello World!",
601 Slice::new("Hello World!").trim_end().slice(),
602 "`trim_end` without any whitespace",
603 );
604 assert_eq!(
605 "Hello World!",
606 Slice::new("Hello World! ").trim_end().slice(),
607 "`trim_end` with whitespace",
608 );
609 }
610
611 #[test]
612 fn test_index() {
613 let slice = Slice::new("Hello World!")
614 .index(..11) // "Hello World"
615 .index(6..) // "World"
616 .index(..) // "World"
617 .index(1..=2) // "or"
618 .index(..);
619
620 assert_eq!("or", slice.slice());
621 }
622
623 #[test]
624 fn test_index_range_full() {
625 let slice = Slice::new("Hello World!")
626 .index(..)
627 .index(..)
628 .index(..)
629 .index(..)
630 .index(..);
631
632 assert_eq!("Hello World!", slice.slice());
633 }
634
635 #[test]
636 fn test_index_range() {
637 let slice = Slice::new("Hello World!")
638 .index(0..11) // "Hello World"
639 .index(1..11) // "ello World"
640 .index(2..8); // "lo Wor"
641
642 assert_eq!("lo Wor", slice.slice());
643 }
644
645 #[test]
646 fn test_index_range_inclusive() {
647 let slice = Slice::new("Hello World!")
648 .index(0..=11) // "Hello World!"
649 .index(1..=8) // "ello Wor
650 .index(0..=4); // "ello "
651
652 assert_eq!("ello ", slice.slice());
653 }
654}