1use crate::{
3 take_atom, take_while, Incomplete, InvalidRepetition, LenBytes, Pipe, Repetition,
4 Tag, TakeAtom,
5};
6use std::{error::Error as StdError, str::CharIndices};
7use tuplify::PushBack;
8use unicode_segmentation::{
9 GraphemeIndices, USentenceBoundIndices, UWordBoundIndices, UnicodeSegmentation,
10};
11
12impl LenBytes for char {
13 fn len_bytes(&self) -> usize { self.len_utf8() }
14}
15
16impl LenBytes for &str {
17 fn len_bytes(&self) -> usize { self.len() }
18}
19
20pub struct CharAtom<'a>(&'a str, CharIndices<'a>);
22
23impl<'a> From<&'a str> for CharAtom<'a> {
24 fn from(value: &'a str) -> Self { CharAtom(value, value.char_indices()) }
25}
26
27impl<'a> TakeAtom for CharAtom<'a> {
28 type Atom = char;
29 type Container = &'a str;
30
31 fn next(&mut self) -> Option<(usize, Self::Atom)> { self.1.next() }
32
33 fn split_at(self, index: usize) -> (Self::Container, Self::Container) {
34 (&self.0[index..], &self.0[..index])
35 }
36}
37
38#[cfg(feature = "unicode")]
39pub struct GraphemeAtom<'a>(&'a str, GraphemeIndices<'a>);
41
42#[cfg(feature = "unicode")]
43impl<'a> From<&'a str> for GraphemeAtom<'a> {
44 fn from(value: &'a str) -> Self { GraphemeAtom(value, value.grapheme_indices(true)) }
45}
46
47#[cfg(feature = "unicode")]
48impl<'a> TakeAtom for GraphemeAtom<'a> {
49 type Atom = &'a str;
50 type Container = &'a str;
51
52 fn next(&mut self) -> Option<(usize, Self::Atom)> { self.1.next() }
53
54 fn split_at(self, index: usize) -> (Self::Container, Self::Container) {
55 (&self.0[index..], &self.0[..index])
56 }
57}
58
59#[cfg(feature = "unicode")]
60pub struct WordAtom<'a>(&'a str, UWordBoundIndices<'a>);
62
63#[cfg(feature = "unicode")]
64impl<'a> From<&'a str> for WordAtom<'a> {
65 fn from(value: &'a str) -> Self { WordAtom(value, value.split_word_bound_indices()) }
66}
67
68#[cfg(feature = "unicode")]
69impl<'a> TakeAtom for WordAtom<'a> {
70 type Atom = &'a str;
71 type Container = &'a str;
72
73 fn next(&mut self) -> Option<(usize, Self::Atom)> { self.1.next() }
74
75 fn split_at(self, index: usize) -> (Self::Container, Self::Container) {
76 (&self.0[index..], &self.0[..index])
77 }
78}
79
80#[cfg(feature = "unicode")]
81pub struct SentenceAtom<'a>(&'a str, USentenceBoundIndices<'a>);
83
84#[cfg(feature = "unicode")]
85impl<'a> From<&'a str> for SentenceAtom<'a> {
86 fn from(value: &'a str) -> Self {
87 SentenceAtom(value, value.split_sentence_bound_indices())
88 }
89}
90
91#[cfg(feature = "unicode")]
92impl<'a> TakeAtom for SentenceAtom<'a> {
93 type Atom = &'a str;
94 type Container = &'a str;
95
96 fn next(&mut self) -> Option<(usize, Self::Atom)> { self.1.next() }
97
98 fn split_at(self, index: usize) -> (Self::Container, Self::Container) {
99 (&self.0[index..], &self.0[..index])
100 }
101}
102
103pub fn whitespaces<'a, E>(
105 qty: impl TryInto<Repetition, Error = impl Into<InvalidRepetition>>,
106) -> impl Pipe<&'a str, (&'a str,), E>
107where
108 Incomplete: Into<E>,
109 E: StdError,
110{
111 let qty = qty.try_into().map_err(Into::into).unwrap();
112 move |i: &'a str| {
113 take_while(|x: char| x.is_ascii_whitespace(), qty).apply(CharAtom::from(i))
114 }
115}
116
117pub fn digits<'a, E>(
119 qty: impl TryInto<Repetition, Error = impl Into<InvalidRepetition>>,
120) -> impl Pipe<&'a str, (&'a str,), E>
121where
122 Incomplete: Into<E>,
123 E: StdError,
124{
125 let qty = qty.try_into().map_err(Into::into).unwrap();
126 move |i: &'a str| {
127 take_while(|x: char| x.is_ascii_digit(), qty).apply(CharAtom::from(i))
128 }
129}
130
131pub fn hex_digits<'a, E>(
133 qty: impl TryInto<Repetition, Error = impl Into<InvalidRepetition>>,
134) -> impl Pipe<&'a str, (&'a str,), E>
135where
136 Incomplete: Into<E>,
137 E: StdError,
138{
139 let qty = qty.try_into().map_err(Into::into).unwrap();
140 move |i: &'a str| {
141 take_while(|x: char| x.is_ascii_hexdigit(), qty).apply(CharAtom::from(i))
142 }
143}
144
145pub fn oct_digits<'a, E>(
147 qty: impl TryInto<Repetition, Error = impl Into<InvalidRepetition>>,
148) -> impl Pipe<&'a str, (&'a str,), E>
149where
150 Incomplete: Into<E>,
151 E: StdError,
152{
153 let qty = qty.try_into().map_err(Into::into).unwrap();
154 move |i: &'a str| {
155 take_while(|x: char| matches!(x, '0'..='7'), qty).apply(CharAtom::from(i))
156 }
157}
158
159pub fn bin_digits<'a, E>(
161 qty: impl TryInto<Repetition, Error = impl Into<InvalidRepetition>>,
162) -> impl Pipe<&'a str, (&'a str,), E>
163where
164 Incomplete: Into<E>,
165 E: StdError,
166{
167 let qty = qty.try_into().map_err(Into::into).unwrap();
168 move |i: &'a str| {
169 take_while(|x: char| matches!(x, '0'..='1'), qty).apply(CharAtom::from(i))
170 }
171}
172
173#[derive(Debug, Clone, PartialEq, Eq)]
175pub struct TagStrError(pub String, pub String);
176
177impl std::fmt::Display for TagStrError {
178 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
179 write!(f, "Tag: expected: '{}' got: '{}'", self.0, self.1)
180 }
181}
182
183impl std::error::Error for TagStrError {}
184
185impl<'a, 'b, E> Tag<&'a str, E> for &'b str
186where
187 E: StdError,
188 Incomplete: Into<E>,
189 TagStrError: Into<E>,
190{
191 type Output = &'a str;
192
193 fn strip_from(&self, input: &'a str) -> Result<(&'a str, (Self::Output,)), E> {
194 if let Some(x) = input.strip_prefix(self) {
195 Ok((x, (&input[..self.len()],)))
196 } else {
197 Err(if self.starts_with(input) {
198 Incomplete::Size(self.len() - input.len()).into()
199 } else {
200 let end = if input.len() < self.len() {
201 input.len()
202 } else {
203 input.ceil_char_boundary(self.len())
204 };
205 TagStrError(self.to_string(), input[..end].to_string()).into()
206 })
207 }
208 }
209}
210
211#[derive(Debug, Clone, PartialEq, Eq)]
213pub struct TagCharError(pub char, pub char);
214
215impl std::fmt::Display for TagCharError {
216 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
217 write!(f, "Tag: expected: '{}' got: '{}'", self.0, self.1)
218 }
219}
220
221impl std::error::Error for TagCharError {}
222
223impl<'a, E> Tag<&'a str, E> for char
224where
225 E: StdError,
226 Incomplete: Into<E>,
227 TagCharError: Into<E>,
228{
229 type Output = char;
230
231 fn strip_from(&self, input: &'a str) -> Result<(&'a str, (Self::Output,)), E> {
232 if let Some(x) = input.strip_prefix(*self) {
233 Ok((x, (*self,)))
234 } else {
235 Err(if input.len() < self.len_utf8() {
236 Incomplete::Size(self.len_utf8() - input.len()).into()
237 } else {
238 TagCharError(*self, input.chars().next().unwrap()).into()
239 })
240 }
241 }
242}
243
244pub fn chars<'a, E, E2>(
257 qty: impl TryInto<Repetition, Error = E>,
258) -> Result<impl Pipe<&'a str, (Vec<char>,), E2>, E>
259where
260 Incomplete: Into<E2>,
261{
262 let qty = qty.try_into()?;
263 Ok(move |input| take_atom(qty).unwrap().apply(CharAtom::from(input)))
264}
265
266pub fn graphemes<'a, E, E2>(
279 qty: impl TryInto<Repetition, Error = E>,
280) -> Result<impl Pipe<&'a str, (Vec<&'a str>,), E2>, E>
281where
282 Incomplete: Into<E2>,
283{
284 let qty = qty.try_into()?;
285 Ok(move |input| take_atom(qty).unwrap().apply(GraphemeAtom::from(input)))
286}
287
288pub fn words<'a, E, E2>(
303 qty: impl TryInto<Repetition, Error = E>,
304) -> Result<impl Pipe<&'a str, (Vec<&'a str>,), E2>, E>
305where
306 Incomplete: Into<E2>,
307{
308 let qty = qty.try_into()?;
309 Ok(move |input| take_atom(qty).unwrap().apply(WordAtom::from(input)))
310}
311
312pub fn sentences<'a, E, E2>(
334 qty: impl TryInto<Repetition, Error = E>,
335) -> Result<impl Pipe<&'a str, (Vec<&'a str>,), E2>, E>
336where
337 Incomplete: Into<E2>,
338{
339 let qty = qty.try_into()?;
340 Ok(move |input| take_atom(qty).unwrap().apply(SentenceAtom::from(input)))
341}
342
343pub fn consumed<'a, O, E>(
373 mut p: impl Pipe<&'a str, O, E>,
374) -> impl Pipe<&'a str, (&'a str,), E> {
375 move |x: &'a str| {
376 let (i, _) = p.apply(x)?;
377 Ok((i, (&x[..x.len() - i.len()],)))
378 }
379}
380
381pub fn with_offset<'a, O: PushBack<usize>, E>(
416 mut p: impl Pipe<&'a str, O, E>,
417) -> impl Pipe<&'a str, O::Output, E> {
418 move |x: &'a str| {
419 let (i, o) = p.apply(x)?;
420 Ok((i, (o.push_back(x.len() - i.len()))))
421 }
422}
423
424#[cfg(test)]
425mod test {
426
427 use crate::{str::sentences, Incomplete, Pipe};
428
429 #[test]
430 fn test_unicode() {
431 assert_eq!(
432 sentences::<_, Incomplete>(..)
433 .unwrap()
434 .apply("Pack my box with five dozen liquor jugs."),
435 Ok(("", (vec!["Pack my box with five dozen liquor jugs.",],)))
436 );
437 }
438}