pipe_chain/
str.rs

1//! [str] related combinators
2use crate::{
3    take_atom, take_while, Incomplete, InvalidRepetition, LenBytes, Pipe, Repetition,
4    Tag, TakeAtom,
5};
6use std::{error::Error as StdError, str::CharIndices};
7use tuplify::PushBack;
8use unicode_segmentation::{
9    GraphemeIndices, USentenceBoundIndices, UWordBoundIndices, UnicodeSegmentation,
10};
11
12impl LenBytes for char {
13    fn len_bytes(&self) -> usize { self.len_utf8() }
14}
15
16impl LenBytes for &str {
17    fn len_bytes(&self) -> usize { self.len() }
18}
19
20/// Splits an [str] in chars
21pub struct CharAtom<'a>(&'a str, CharIndices<'a>);
22
23impl<'a> From<&'a str> for CharAtom<'a> {
24    fn from(value: &'a str) -> Self { CharAtom(value, value.char_indices()) }
25}
26
27impl<'a> TakeAtom for CharAtom<'a> {
28    type Atom = char;
29    type Container = &'a str;
30
31    fn next(&mut self) -> Option<(usize, Self::Atom)> { self.1.next() }
32
33    fn split_at(self, index: usize) -> (Self::Container, Self::Container) {
34        (&self.0[index..], &self.0[..index])
35    }
36}
37
38#[cfg(feature = "unicode")]
39/// Splits an [str] in graphemes see: [UnicodeSegmentation::graphemes]
40pub struct GraphemeAtom<'a>(&'a str, GraphemeIndices<'a>);
41
42#[cfg(feature = "unicode")]
43impl<'a> From<&'a str> for GraphemeAtom<'a> {
44    fn from(value: &'a str) -> Self { GraphemeAtom(value, value.grapheme_indices(true)) }
45}
46
47#[cfg(feature = "unicode")]
48impl<'a> TakeAtom for GraphemeAtom<'a> {
49    type Atom = &'a str;
50    type Container = &'a str;
51
52    fn next(&mut self) -> Option<(usize, Self::Atom)> { self.1.next() }
53
54    fn split_at(self, index: usize) -> (Self::Container, Self::Container) {
55        (&self.0[index..], &self.0[..index])
56    }
57}
58
59#[cfg(feature = "unicode")]
60/// Splits an [str] in words see: [UnicodeSegmentation::unicode_words]
61pub struct WordAtom<'a>(&'a str, UWordBoundIndices<'a>);
62
63#[cfg(feature = "unicode")]
64impl<'a> From<&'a str> for WordAtom<'a> {
65    fn from(value: &'a str) -> Self { WordAtom(value, value.split_word_bound_indices()) }
66}
67
68#[cfg(feature = "unicode")]
69impl<'a> TakeAtom for WordAtom<'a> {
70    type Atom = &'a str;
71    type Container = &'a str;
72
73    fn next(&mut self) -> Option<(usize, Self::Atom)> { self.1.next() }
74
75    fn split_at(self, index: usize) -> (Self::Container, Self::Container) {
76        (&self.0[index..], &self.0[..index])
77    }
78}
79
80#[cfg(feature = "unicode")]
81/// Splits an [str] in sentences see: [UnicodeSegmentation::unicode_sentences]
82pub struct SentenceAtom<'a>(&'a str, USentenceBoundIndices<'a>);
83
84#[cfg(feature = "unicode")]
85impl<'a> From<&'a str> for SentenceAtom<'a> {
86    fn from(value: &'a str) -> Self {
87        SentenceAtom(value, value.split_sentence_bound_indices())
88    }
89}
90
91#[cfg(feature = "unicode")]
92impl<'a> TakeAtom for SentenceAtom<'a> {
93    type Atom = &'a str;
94    type Container = &'a str;
95
96    fn next(&mut self) -> Option<(usize, Self::Atom)> { self.1.next() }
97
98    fn split_at(self, index: usize) -> (Self::Container, Self::Container) {
99        (&self.0[index..], &self.0[..index])
100    }
101}
102
103/// Takes the given quantity of ascii whitespaces
104pub fn whitespaces<'a, E>(
105    qty: impl TryInto<Repetition, Error = impl Into<InvalidRepetition>>,
106) -> impl Pipe<&'a str, (&'a str,), E>
107where
108    Incomplete: Into<E>,
109    E: StdError,
110{
111    let qty = qty.try_into().map_err(Into::into).unwrap();
112    move |i: &'a str| {
113        take_while(|x: char| x.is_ascii_whitespace(), qty).apply(CharAtom::from(i))
114    }
115}
116
117/// Takes the given quantity of ascii digits
118pub fn digits<'a, E>(
119    qty: impl TryInto<Repetition, Error = impl Into<InvalidRepetition>>,
120) -> impl Pipe<&'a str, (&'a str,), E>
121where
122    Incomplete: Into<E>,
123    E: StdError,
124{
125    let qty = qty.try_into().map_err(Into::into).unwrap();
126    move |i: &'a str| {
127        take_while(|x: char| x.is_ascii_digit(), qty).apply(CharAtom::from(i))
128    }
129}
130
131/// Takes the given quantity of ascii hexadecimal digits
132pub fn hex_digits<'a, E>(
133    qty: impl TryInto<Repetition, Error = impl Into<InvalidRepetition>>,
134) -> impl Pipe<&'a str, (&'a str,), E>
135where
136    Incomplete: Into<E>,
137    E: StdError,
138{
139    let qty = qty.try_into().map_err(Into::into).unwrap();
140    move |i: &'a str| {
141        take_while(|x: char| x.is_ascii_hexdigit(), qty).apply(CharAtom::from(i))
142    }
143}
144
145/// Takes the given quantity of ascii octal digits
146pub fn oct_digits<'a, E>(
147    qty: impl TryInto<Repetition, Error = impl Into<InvalidRepetition>>,
148) -> impl Pipe<&'a str, (&'a str,), E>
149where
150    Incomplete: Into<E>,
151    E: StdError,
152{
153    let qty = qty.try_into().map_err(Into::into).unwrap();
154    move |i: &'a str| {
155        take_while(|x: char| matches!(x, '0'..='7'), qty).apply(CharAtom::from(i))
156    }
157}
158
159/// Takes the given quantity of ascii binary digits
160pub fn bin_digits<'a, E>(
161    qty: impl TryInto<Repetition, Error = impl Into<InvalidRepetition>>,
162) -> impl Pipe<&'a str, (&'a str,), E>
163where
164    Incomplete: Into<E>,
165    E: StdError,
166{
167    let qty = qty.try_into().map_err(Into::into).unwrap();
168    move |i: &'a str| {
169        take_while(|x: char| matches!(x, '0'..='1'), qty).apply(CharAtom::from(i))
170    }
171}
172
173/// str tag error containing the expected and found string
174#[derive(Debug, Clone, PartialEq, Eq)]
175pub struct TagStrError(pub String, pub String);
176
177impl std::fmt::Display for TagStrError {
178    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
179        write!(f, "Tag: expected: '{}' got: '{}'", self.0, self.1)
180    }
181}
182
183impl std::error::Error for TagStrError {}
184
185impl<'a, 'b, E> Tag<&'a str, E> for &'b str
186where
187    E: StdError,
188    Incomplete: Into<E>,
189    TagStrError: Into<E>,
190{
191    type Output = &'a str;
192
193    fn strip_from(&self, input: &'a str) -> Result<(&'a str, (Self::Output,)), E> {
194        if let Some(x) = input.strip_prefix(self) {
195            Ok((x, (&input[..self.len()],)))
196        } else {
197            Err(if self.starts_with(input) {
198                Incomplete::Size(self.len() - input.len()).into()
199            } else {
200                let end = if input.len() < self.len() {
201                    input.len()
202                } else {
203                    input.ceil_char_boundary(self.len())
204                };
205                TagStrError(self.to_string(), input[..end].to_string()).into()
206            })
207        }
208    }
209}
210
211/// char tag error containing the expected and found char
212#[derive(Debug, Clone, PartialEq, Eq)]
213pub struct TagCharError(pub char, pub char);
214
215impl std::fmt::Display for TagCharError {
216    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
217        write!(f, "Tag: expected: '{}' got: '{}'", self.0, self.1)
218    }
219}
220
221impl std::error::Error for TagCharError {}
222
223impl<'a, E> Tag<&'a str, E> for char
224where
225    E: StdError,
226    Incomplete: Into<E>,
227    TagCharError: Into<E>,
228{
229    type Output = char;
230
231    fn strip_from(&self, input: &'a str) -> Result<(&'a str, (Self::Output,)), E> {
232        if let Some(x) = input.strip_prefix(*self) {
233            Ok((x, (*self,)))
234        } else {
235            Err(if input.len() < self.len_utf8() {
236                Incomplete::Size(self.len_utf8() - input.len()).into()
237            } else {
238                TagCharError(*self, input.chars().next().unwrap()).into()
239            })
240        }
241    }
242}
243
244/// takes `qty` of chars from an input
245///
246/// ```rust
247/// # use pipe_chain::{
248/// #   str::chars,
249/// #   Incomplete, Pipe,
250/// # };
251/// assert_eq!(
252///     chars::<_, Incomplete>(..5).unwrap().apply("aỹe"),
253///     Ok(("", (vec!['a', 'y', '\u{0303}', 'e'],)))
254/// );
255/// ```
256pub fn chars<'a, E, E2>(
257    qty: impl TryInto<Repetition, Error = E>,
258) -> Result<impl Pipe<&'a str, (Vec<char>,), E2>, E>
259where
260    Incomplete: Into<E2>,
261{
262    let qty = qty.try_into()?;
263    Ok(move |input| take_atom(qty).unwrap().apply(CharAtom::from(input)))
264}
265
266/// takes `qty` of graphemes clusters from an input
267///
268/// ```rust
269/// # use pipe_chain::{
270/// #   str::graphemes,
271/// #   Incomplete, Pipe,
272/// # };
273/// assert_eq!(
274///     graphemes::<_, Incomplete>(..5).unwrap().apply("aỹe"),
275///     Ok(("", (vec!["a", "ỹ", "e"],)))
276/// );
277/// ```
278pub fn graphemes<'a, E, E2>(
279    qty: impl TryInto<Repetition, Error = E>,
280) -> Result<impl Pipe<&'a str, (Vec<&'a str>,), E2>, E>
281where
282    Incomplete: Into<E2>,
283{
284    let qty = qty.try_into()?;
285    Ok(move |input| take_atom(qty).unwrap().apply(GraphemeAtom::from(input)))
286}
287
288/// takes `qty` of words from an input
289///
290/// ```rust
291/// # use pipe_chain::{
292/// #   str::words,
293/// #   Incomplete, Pipe,
294/// # };
295/// assert_eq!(
296///     words::<_, Incomplete>(..5)
297///         .unwrap()
298///         .apply("Pack my box with five dozen liquor jugs."),
299///     Ok((" with five dozen liquor jugs.", (vec!["Pack", " ", "my", " ", "box",],)))
300/// );
301/// ```
302pub fn words<'a, E, E2>(
303    qty: impl TryInto<Repetition, Error = E>,
304) -> Result<impl Pipe<&'a str, (Vec<&'a str>,), E2>, E>
305where
306    Incomplete: Into<E2>,
307{
308    let qty = qty.try_into()?;
309    Ok(move |input| take_atom(qty).unwrap().apply(WordAtom::from(input)))
310}
311
312/// takes `qty` of sentences from an input
313/// /// takes `qty` of words from an input
314///
315/// ```rust
316/// # use pipe_chain::{
317/// #   str::{sentences, words},
318/// #   Incomplete, Pipe,
319/// # };
320/// assert_eq!(
321///     sentences::<_, Incomplete>(..2)
322///         .unwrap()
323///         .apply("Sentence 1. Sentence 2. Sentence 3. Sentence 4"),
324///     Ok(("Sentence 3. Sentence 4", (vec!["Sentence 1. ", "Sentence 2. "],)))
325/// );
326/// assert_eq!(
327///     sentences::<_, Incomplete>(..)
328///         .unwrap()
329///         .apply("Sentence 1. Sentence 2. Sentence 3. Sentence 4"),
330///     Ok(("", (vec!["Sentence 1. ", "Sentence 2. ", "Sentence 3. ", "Sentence 4"],)))
331/// );
332/// ```
333pub fn sentences<'a, E, E2>(
334    qty: impl TryInto<Repetition, Error = E>,
335) -> Result<impl Pipe<&'a str, (Vec<&'a str>,), E2>, E>
336where
337    Incomplete: Into<E2>,
338{
339    let qty = qty.try_into()?;
340    Ok(move |input| take_atom(qty).unwrap().apply(SentenceAtom::from(input)))
341}
342
343/// Returns the consumed input instead of the pipe result
344/// ```rust
345/// # use fatal_error::FatalError;
346/// # use pipe_chain::{Pipe, AndExt, AndThenExt, tag, str::{consumed, TagStrError}, Incomplete};
347/// # #[derive(Debug, PartialEq, Eq)]
348/// # enum Error {
349/// #     Incomplete(Incomplete),
350/// #     Tag(TagStrError)
351/// # }
352/// #
353/// # impl std::fmt::Display for Error {
354/// #     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
355/// #         write!(f, "{self:?}")
356/// #     }
357/// # }
358/// #
359/// # impl From<Incomplete> for Error {
360/// #     fn from(value: Incomplete) -> Self { Error::Incomplete(value) }
361/// # }
362/// # impl From<TagStrError> for Error {
363/// #     fn from(value: TagStrError) -> Self { Error::Tag(value) }
364/// # }
365/// #
366/// # impl std::error::Error for Error {}
367/// assert_eq!(
368///     consumed(tag::<Error, _, _>("foo").and(tag("bar"))).apply("foobarbaz"),
369///     Ok(("baz", ("foobar",)))
370/// );
371/// ```
372pub fn consumed<'a, O, E>(
373    mut p: impl Pipe<&'a str, O, E>,
374) -> impl Pipe<&'a str, (&'a str,), E> {
375    move |x: &'a str| {
376        let (i, _) = p.apply(x)?;
377        Ok((i, (&x[..x.len() - i.len()],)))
378    }
379}
380
381/// Get the consumed offset in bytes in addition to the output of the given [Pipe]
382/// ```rust
383/// # use pipe_chain::{
384/// #     str::{consumed, with_offset, TagStrError},
385/// #     tag, AndExt, Incomplete, Pipe,
386/// # };
387/// # use fatal_error::FatalError;
388/// #
389/// # #[derive(Debug, PartialEq, Eq)]
390/// # enum Error {
391/// #     Incomplete(Incomplete),
392/// #     Tag(TagStrError),
393/// # }
394/// #
395/// # impl std::fmt::Display for Error {
396/// #     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
397/// #         write!(f, "{self:?}")
398/// #     }
399/// # }
400/// #
401/// # impl From<Incomplete> for Error {
402/// #     fn from(value: Incomplete) -> Self { Error::Incomplete(value) }
403/// # }
404/// #
405/// # impl From<TagStrError> for Error {
406/// #     fn from(value: TagStrError) -> Self { Error::Tag(value) }
407/// # }
408/// #
409/// # impl std::error::Error for Error {}
410/// assert_eq!(
411///     with_offset(tag::<Error, _, _>("foo").and(tag("bar"))).apply("foobarbaz"),
412///     Ok(("baz", ("foo", "bar", 6)))
413/// );
414/// ```
415pub fn with_offset<'a, O: PushBack<usize>, E>(
416    mut p: impl Pipe<&'a str, O, E>,
417) -> impl Pipe<&'a str, O::Output, E> {
418    move |x: &'a str| {
419        let (i, o) = p.apply(x)?;
420        Ok((i, (o.push_back(x.len() - i.len()))))
421    }
422}
423
424#[cfg(test)]
425mod test {
426
427    use crate::{str::sentences, Incomplete, Pipe};
428
429    #[test]
430    fn test_unicode() {
431        assert_eq!(
432            sentences::<_, Incomplete>(..)
433                .unwrap()
434                .apply("Pack my box with five dozen liquor jugs."),
435            Ok(("", (vec!["Pack my box with five dozen liquor jugs.",],)))
436        );
437    }
438}