use crate::{
take_atom, take_while, Incomplete, InvalidRepetition, LenBytes, Pipe, Repetition,
Tag, TakeAtom,
};
use std::{error::Error as StdError, str::CharIndices};
use tuplify::PushBack;
use unicode_segmentation::{
GraphemeIndices, USentenceBoundIndices, UWordBoundIndices, UnicodeSegmentation,
};
impl LenBytes for char {
fn len_bytes(&self) -> usize { self.len_utf8() }
}
impl LenBytes for &str {
fn len_bytes(&self) -> usize { self.len() }
}
pub struct CharAtom<'a>(&'a str, CharIndices<'a>);
impl<'a> From<&'a str> for CharAtom<'a> {
fn from(value: &'a str) -> Self { CharAtom(value, value.char_indices()) }
}
impl<'a> TakeAtom for CharAtom<'a> {
type Atom = char;
type Container = &'a str;
fn next(&mut self) -> Option<(usize, Self::Atom)> { self.1.next() }
fn split_at(self, index: usize) -> (Self::Container, Self::Container) {
(&self.0[index..], &self.0[..index])
}
}
#[cfg(feature = "unicode")]
pub struct GraphemeAtom<'a>(&'a str, GraphemeIndices<'a>);
#[cfg(feature = "unicode")]
impl<'a> From<&'a str> for GraphemeAtom<'a> {
fn from(value: &'a str) -> Self { GraphemeAtom(value, value.grapheme_indices(true)) }
}
#[cfg(feature = "unicode")]
impl<'a> TakeAtom for GraphemeAtom<'a> {
type Atom = &'a str;
type Container = &'a str;
fn next(&mut self) -> Option<(usize, Self::Atom)> { self.1.next() }
fn split_at(self, index: usize) -> (Self::Container, Self::Container) {
(&self.0[index..], &self.0[..index])
}
}
#[cfg(feature = "unicode")]
pub struct WordAtom<'a>(&'a str, UWordBoundIndices<'a>);
#[cfg(feature = "unicode")]
impl<'a> From<&'a str> for WordAtom<'a> {
fn from(value: &'a str) -> Self { WordAtom(value, value.split_word_bound_indices()) }
}
#[cfg(feature = "unicode")]
impl<'a> TakeAtom for WordAtom<'a> {
type Atom = &'a str;
type Container = &'a str;
fn next(&mut self) -> Option<(usize, Self::Atom)> { self.1.next() }
fn split_at(self, index: usize) -> (Self::Container, Self::Container) {
(&self.0[index..], &self.0[..index])
}
}
#[cfg(feature = "unicode")]
pub struct SentenceAtom<'a>(&'a str, USentenceBoundIndices<'a>);
#[cfg(feature = "unicode")]
impl<'a> From<&'a str> for SentenceAtom<'a> {
fn from(value: &'a str) -> Self {
SentenceAtom(value, value.split_sentence_bound_indices())
}
}
#[cfg(feature = "unicode")]
impl<'a> TakeAtom for SentenceAtom<'a> {
type Atom = &'a str;
type Container = &'a str;
fn next(&mut self) -> Option<(usize, Self::Atom)> { self.1.next() }
fn split_at(self, index: usize) -> (Self::Container, Self::Container) {
(&self.0[index..], &self.0[..index])
}
}
pub fn whitespaces<'a, E>(
qty: impl TryInto<Repetition, Error = impl Into<InvalidRepetition>>,
) -> impl Pipe<&'a str, (&'a str,), E>
where
Incomplete: Into<E>,
E: StdError,
{
let qty = qty.try_into().map_err(Into::into).unwrap();
move |i: &'a str| {
take_while(|x: char| x.is_ascii_whitespace(), qty).apply(CharAtom::from(i))
}
}
pub fn digits<'a, E>(
qty: impl TryInto<Repetition, Error = impl Into<InvalidRepetition>>,
) -> impl Pipe<&'a str, (&'a str,), E>
where
Incomplete: Into<E>,
E: StdError,
{
let qty = qty.try_into().map_err(Into::into).unwrap();
move |i: &'a str| {
take_while(|x: char| x.is_ascii_digit(), qty).apply(CharAtom::from(i))
}
}
pub fn hex_digits<'a, E>(
qty: impl TryInto<Repetition, Error = impl Into<InvalidRepetition>>,
) -> impl Pipe<&'a str, (&'a str,), E>
where
Incomplete: Into<E>,
E: StdError,
{
let qty = qty.try_into().map_err(Into::into).unwrap();
move |i: &'a str| {
take_while(|x: char| x.is_ascii_hexdigit(), qty).apply(CharAtom::from(i))
}
}
pub fn oct_digits<'a, E>(
qty: impl TryInto<Repetition, Error = impl Into<InvalidRepetition>>,
) -> impl Pipe<&'a str, (&'a str,), E>
where
Incomplete: Into<E>,
E: StdError,
{
let qty = qty.try_into().map_err(Into::into).unwrap();
move |i: &'a str| {
take_while(|x: char| matches!(x, '0'..='7'), qty).apply(CharAtom::from(i))
}
}
pub fn bin_digits<'a, E>(
qty: impl TryInto<Repetition, Error = impl Into<InvalidRepetition>>,
) -> impl Pipe<&'a str, (&'a str,), E>
where
Incomplete: Into<E>,
E: StdError,
{
let qty = qty.try_into().map_err(Into::into).unwrap();
move |i: &'a str| {
take_while(|x: char| matches!(x, '0'..='1'), qty).apply(CharAtom::from(i))
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct TagStrError(pub String, pub String);
impl std::fmt::Display for TagStrError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Tag: expected: '{}' got: '{}'", self.0, self.1)
}
}
impl std::error::Error for TagStrError {}
impl<'a, 'b, E> Tag<&'a str, E> for &'b str
where
E: StdError,
Incomplete: Into<E>,
TagStrError: Into<E>,
{
type Output = &'a str;
fn strip_from(&self, input: &'a str) -> Result<(&'a str, (Self::Output,)), E> {
if let Some(x) = input.strip_prefix(self) {
Ok((x, (&input[..self.len()],)))
} else {
Err(if self.starts_with(input) {
Incomplete::Size(self.len() - input.len()).into()
} else {
let end = if input.len() < self.len() {
input.len()
} else {
input.ceil_char_boundary(self.len())
};
TagStrError(self.to_string(), input[..end].to_string()).into()
})
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct TagCharError(pub char, pub char);
impl std::fmt::Display for TagCharError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Tag: expected: '{}' got: '{}'", self.0, self.1)
}
}
impl std::error::Error for TagCharError {}
impl<'a, E> Tag<&'a str, E> for char
where
E: StdError,
Incomplete: Into<E>,
TagCharError: Into<E>,
{
type Output = char;
fn strip_from(&self, input: &'a str) -> Result<(&'a str, (Self::Output,)), E> {
if let Some(x) = input.strip_prefix(*self) {
Ok((x, (*self,)))
} else {
Err(if input.len() < self.len_utf8() {
Incomplete::Size(self.len_utf8() - input.len()).into()
} else {
TagCharError(*self, input.chars().next().unwrap()).into()
})
}
}
}
pub fn chars<'a, E, E2>(
qty: impl TryInto<Repetition, Error = E>,
) -> Result<impl Pipe<&'a str, (Vec<char>,), E2>, E>
where
Incomplete: Into<E2>,
{
let qty = qty.try_into()?;
Ok(move |input| take_atom(qty).unwrap().apply(CharAtom::from(input)))
}
pub fn graphemes<'a, E, E2>(
qty: impl TryInto<Repetition, Error = E>,
) -> Result<impl Pipe<&'a str, (Vec<&'a str>,), E2>, E>
where
Incomplete: Into<E2>,
{
let qty = qty.try_into()?;
Ok(move |input| take_atom(qty).unwrap().apply(GraphemeAtom::from(input)))
}
pub fn words<'a, E, E2>(
qty: impl TryInto<Repetition, Error = E>,
) -> Result<impl Pipe<&'a str, (Vec<&'a str>,), E2>, E>
where
Incomplete: Into<E2>,
{
let qty = qty.try_into()?;
Ok(move |input| take_atom(qty).unwrap().apply(WordAtom::from(input)))
}
pub fn sentences<'a, E, E2>(
qty: impl TryInto<Repetition, Error = E>,
) -> Result<impl Pipe<&'a str, (Vec<&'a str>,), E2>, E>
where
Incomplete: Into<E2>,
{
let qty = qty.try_into()?;
Ok(move |input| take_atom(qty).unwrap().apply(SentenceAtom::from(input)))
}
pub fn consumed<'a, O, E>(
mut p: impl Pipe<&'a str, O, E>,
) -> impl Pipe<&'a str, (&'a str,), E> {
move |x: &'a str| {
let (i, _) = p.apply(x)?;
Ok((i, (&x[..x.len() - i.len()],)))
}
}
pub fn with_offset<'a, O: PushBack<usize>, E>(
mut p: impl Pipe<&'a str, O, E>,
) -> impl Pipe<&'a str, O::Output, E> {
move |x: &'a str| {
let (i, o) = p.apply(x)?;
Ok((i, (o.push_back(x.len() - i.len()))))
}
}
#[cfg(test)]
mod test {
use crate::{str::sentences, Incomplete, Pipe};
#[test]
fn test_unicode() {
assert_eq!(
sentences::<_, Incomplete>(..)
.unwrap()
.apply("Pack my box with five dozen liquor jugs."),
Ok(("", (vec!["Pack my box with five dozen liquor jugs.",],)))
);
}
}