use crate::prelude::*;
use alloc::string::ToString;
use super::*;
pub trait Char: Copy + PartialEq + Sealed {
fn is_inline_whitespace(&self) -> bool;
fn is_whitespace(&self) -> bool;
fn is_newline(&self) -> bool;
fn digit_zero() -> Self;
fn is_digit(&self, radix: u32) -> bool;
fn is_ident_start(&self) -> bool;
fn is_ident_continue(&self) -> bool;
fn to_ascii(&self) -> Option<u8>;
}
impl Sealed for &Grapheme {}
impl Char for &Grapheme {
fn is_inline_whitespace(&self) -> bool {
self.as_str() == " " || self.as_str() == "\t"
}
fn is_whitespace(&self) -> bool {
let mut iter = self.as_str().chars();
iter.all(unicode::is_whitespace)
}
fn is_newline(&self) -> bool {
[
"\r\n", "\n", "\r", "\x0B", "\x0C", "\u{0085}", "\u{2028}", "\u{2029}", ]
.as_slice()
.contains(&self.as_str())
}
fn digit_zero() -> Self {
Grapheme::digit_zero()
}
fn is_digit(&self, radix: u32) -> bool {
let mut iter = self.as_str().chars();
match (iter.next(), iter.next()) {
(Some(i), None) => i.is_digit(radix),
_ => false,
}
}
fn to_ascii(&self) -> Option<u8> {
let mut iter = self.as_bytes().iter();
match (iter.next(), iter.next()) {
(Some(i), None) if i.is_ascii() => Some(*i),
_ => None,
}
}
fn is_ident_start(&self) -> bool {
let (first, rest) = self.split();
let is_start = unicode_ident::is_xid_start(first) || first == '_';
is_start && rest.chars().all(unicode_ident::is_xid_continue)
}
fn is_ident_continue(&self) -> bool {
let mut iter = self.as_str().chars();
iter.all(unicode_ident::is_xid_continue)
}
}
impl Sealed for char {}
impl Char for char {
fn is_inline_whitespace(&self) -> bool {
*self == ' ' || *self == '\t'
}
fn is_whitespace(&self) -> bool {
unicode::is_whitespace(*self)
}
fn is_newline(&self) -> bool {
[
'\n', '\r', '\x0B', '\x0C', '\u{0085}', '\u{2028}', '\u{2029}', ]
.as_slice()
.contains(self)
}
fn digit_zero() -> Self {
'0'
}
fn is_digit(&self, radix: u32) -> bool {
char::is_digit(*self, radix)
}
fn to_ascii(&self) -> Option<u8> {
self.is_ascii().then_some(*self as u8)
}
fn is_ident_start(&self) -> bool {
unicode_ident::is_xid_start(*self) || *self == '_'
}
fn is_ident_continue(&self) -> bool {
unicode_ident::is_xid_continue(*self)
}
}
impl Sealed for u8 {}
impl Char for u8 {
fn is_inline_whitespace(&self) -> bool {
*self == b' ' || *self == b'\t'
}
fn is_whitespace(&self) -> bool {
self.is_ascii_whitespace()
}
fn is_newline(&self) -> bool {
[
b'\n', b'\r', b'\x0B', b'\x0C', ]
.as_slice()
.contains(self)
}
fn digit_zero() -> Self {
b'0'
}
fn is_digit(&self, radix: u32) -> bool {
(*self as char).is_digit(radix)
}
fn to_ascii(&self) -> Option<u8> {
Some(*self)
}
fn is_ident_start(&self) -> bool {
(*self as char).is_ident_start()
}
fn is_ident_continue(&self) -> bool {
(*self as char).is_ident_continue()
}
}
#[derive(Copy, Clone)]
pub struct Padded<A> {
pub(crate) parser: A,
}
impl<'src, I, O, E, A> Parser<'src, I, O, E> for Padded<A>
where
I: Input<'src>,
E: ParserExtra<'src, I>,
I::Token: Char,
A: Parser<'src, I, O, E>,
{
#[doc(hidden)]
#[cfg(feature = "debug")]
fn node_info(&self, scope: &mut debug::NodeScope) -> debug::NodeInfo {
debug::NodeInfo::Padded(Box::new(self.parser.node_info(scope)))
}
fn go<M: Mode>(&self, inp: &mut InputRef<'src, '_, I, E>) -> PResult<M, O> {
inp.skip_while(|c| c.is_whitespace());
let out = self.parser.go::<M>(inp)?;
inp.skip_while(|c| c.is_whitespace());
Ok(out)
}
go_extra!(O);
}
#[derive(Clone, Debug)]
#[non_exhaustive]
pub enum TextExpected<Slice> {
Whitespace,
InlineWhitespace,
Newline,
Digit(u32, u32),
AnyIdentifier,
Identifier(Slice),
Int,
}
impl<Slice: Copy> Copy for TextExpected<Slice> {}
pub fn whitespace<'src, I, E>() -> Repeated<impl Parser<'src, I, (), E> + Copy, (), I, E>
where
I: StrInput<'src>,
I::Token: Char + 'src,
E: ParserExtra<'src, I>,
E::Error: LabelError<'src, I, TextExpected<()>>,
{
any()
.filter(|c: &I::Token| c.is_whitespace())
.labelled_with(|| TextExpected::Whitespace)
.as_builtin()
.ignored()
.repeated()
}
pub fn inline_whitespace<'src, I, E>() -> Repeated<impl Parser<'src, I, (), E> + Copy, (), I, E>
where
I: StrInput<'src>,
I::Token: Char + 'src,
E: ParserExtra<'src, I>,
E::Error: LabelError<'src, I, TextExpected<()>>,
{
any()
.filter(|c: &I::Token| c.is_inline_whitespace())
.labelled_with(|| TextExpected::InlineWhitespace)
.as_builtin()
.ignored()
.repeated()
}
#[must_use]
pub fn newline<'src, I, E>() -> impl Parser<'src, I, (), E> + Copy
where
I: StrInput<'src>,
I::Token: Char + 'src,
E: ParserExtra<'src, I>,
&'src str: OrderedSeq<'src, I::Token>,
E::Error: LabelError<'src, I, TextExpected<()>>,
{
custom(|inp| {
let before = inp.cursor();
if inp
.peek()
.map_or(false, |c: I::Token| c.to_ascii() == Some(b'\r'))
{
inp.skip();
if inp
.peek()
.map_or(false, |c: I::Token| c.to_ascii() == Some(b'\n'))
{
inp.skip();
}
Ok(())
} else {
let c = inp.next();
if c.map_or(false, |c: I::Token| c.is_newline()) {
Ok(())
} else {
let span = inp.span_since(&before);
Err(LabelError::expected_found(
[TextExpected::Newline],
c.map(MaybeRef::Val),
span,
))
}
}
})
.labelled_with(|| TextExpected::Newline)
.as_builtin()
}
#[must_use]
pub fn digits<'src, I, E>(
radix: u32,
) -> Repeated<impl Parser<'src, I, <I as Input<'src>>::Token, E> + Copy, I::Token, I, E>
where
I: StrInput<'src>,
I::Token: Char + 'src,
E: ParserExtra<'src, I>,
E::Error: LabelError<'src, I, TextExpected<()>>,
{
any()
.filter(move |c: &I::Token| c.is_digit(radix))
.labelled_with(move || TextExpected::Digit(0, radix))
.as_builtin()
.map_err(move |mut err: E::Error| {
err.label_with(TextExpected::Digit(0, radix));
err
})
.repeated()
.at_least(1)
}
#[must_use]
pub fn int<'src, I, E>(radix: u32) -> impl Parser<'src, I, <I as SliceInput<'src>>::Slice, E> + Copy
where
I: StrInput<'src>,
I::Token: Char + 'src,
E: ParserExtra<'src, I>,
E::Error: LabelError<'src, I, TextExpected<()>> + LabelError<'src, I, MaybeRef<'src, I::Token>>,
{
any()
.filter(move |c: &I::Token| c.is_digit(radix) && c != &I::Token::digit_zero())
.then(
any()
.filter(move |c: &I::Token| c.is_digit(radix))
.repeated(),
)
.ignored()
.or(just(I::Token::digit_zero()).ignored())
.to_slice()
.labelled_with(|| TextExpected::Int)
.as_builtin()
}
pub mod ascii {
use super::*;
#[must_use]
pub fn ident<'src, I, E>() -> impl Parser<'src, I, <I as SliceInput<'src>>::Slice, E> + Copy
where
I: StrInput<'src>,
I::Token: Char + 'src,
E: ParserExtra<'src, I>,
E::Error: LabelError<'src, I, TextExpected<()>>,
{
any()
.filter(|c: &I::Token| {
c.to_ascii()
.map_or(false, |i| i.is_ascii_alphabetic() || i == b'_')
})
.then(
any()
.filter(|c: &I::Token| {
c.to_ascii()
.map_or(false, |i| i.is_ascii_alphanumeric() || i == b'_')
})
.repeated(),
)
.to_slice()
.labelled_with(|| TextExpected::AnyIdentifier)
.as_builtin()
}
#[track_caller]
pub fn keyword<'src, I, S, E>(
keyword: S,
) -> impl Parser<'src, I, <I as SliceInput<'src>>::Slice, E> + Clone + 'src
where
I: StrInput<'src>,
I::Token: Char + fmt::Debug + 'src,
S: PartialEq<I::Slice> + Clone + 'src,
E: ParserExtra<'src, I> + 'src,
E::Error: LabelError<'src, I, TextExpected<()>> + LabelError<'src, I, TextExpected<S>>,
{
ident()
.try_map({
let keyword = keyword.clone();
move |s: I::Slice, span| {
if keyword == s {
Ok(())
} else {
Err(LabelError::expected_found(
[TextExpected::Identifier(keyword.clone())],
None,
span,
))
}
}
})
.to_slice()
.labelled(TextExpected::Identifier(keyword))
.as_builtin()
}
}
pub use unicode::*;
pub mod unicode {
use super::*;
use core::str::{Bytes, Chars};
use unicode_segmentation::UnicodeSegmentation;
#[derive(PartialEq, Eq)]
#[repr(transparent)]
pub struct Grapheme {
inner: str,
}
impl Grapheme {
fn new(inner: &str) -> &Self {
unsafe { &*(inner as *const str as *const Self) }
}
pub fn digit_zero() -> &'static Self {
Self::new("0")
}
pub fn code_points(&self) -> Chars<'_> {
self.inner.chars()
}
pub fn bytes(&self) -> Bytes<'_> {
self.inner.bytes()
}
pub fn as_str(&self) -> &str {
&self.inner
}
pub fn as_bytes(&self) -> &[u8] {
self.inner.as_bytes()
}
pub fn split(&self) -> (char, &str) {
let mut iter = self.inner.chars();
let first = iter.next().unwrap();
(first, iter.as_str())
}
}
impl fmt::Debug for Grapheme {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str("g'")?;
for i in self.as_str().chars() {
write!(f, "{}", i.escape_debug())?;
}
f.write_str("'")?;
Ok(())
}
}
impl fmt::Display for Grapheme {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(&self.inner, f)
}
}
impl AsRef<str> for Grapheme {
fn as_ref(&self) -> &str {
self.as_str()
}
}
impl AsRef<[u8]> for Grapheme {
fn as_ref(&self) -> &[u8] {
self.as_bytes()
}
}
impl AsRef<Grapheme> for Grapheme {
fn as_ref(&self) -> &Grapheme {
self
}
}
impl Borrow<str> for Grapheme {
fn borrow(&self) -> &str {
self.as_str()
}
}
impl Borrow<[u8]> for Grapheme {
fn borrow(&self) -> &[u8] {
self.as_bytes()
}
}
impl<'src> From<&'src Grapheme> for Box<Grapheme> {
fn from(value: &'src Grapheme) -> Self {
let value: Box<str> = Box::from(value.as_str());
unsafe { Box::from_raw(Box::into_raw(value) as *mut Grapheme) }
}
}
impl From<Box<Grapheme>> for Box<str> {
fn from(value: Box<Grapheme>) -> Self {
unsafe { Box::from_raw(Box::into_raw(value) as *mut str) }
}
}
impl From<Box<Grapheme>> for Box<[u8]> {
fn from(value: Box<Grapheme>) -> Self {
Box::<str>::from(value).into()
}
}
#[derive(PartialEq, Eq)]
#[repr(transparent)]
pub struct Graphemes {
inner: str,
}
impl Graphemes {
pub fn new(inner: &str) -> &Self {
unsafe { &*(inner as *const str as *const Self) }
}
pub fn iter(&self) -> GraphemesIter<'_> {
self.into_iter()
}
pub fn code_points(&self) -> Chars<'_> {
self.inner.chars()
}
pub fn bytes(&self) -> Bytes<'_> {
self.inner.bytes()
}
pub fn as_str(&self) -> &str {
&self.inner
}
pub fn as_bytes(&self) -> &[u8] {
self.inner.as_bytes()
}
}
impl fmt::Debug for Graphemes {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str("g")?;
fmt::Debug::fmt(&self.inner, f)
}
}
impl fmt::Display for Graphemes {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(&self.inner, f)
}
}
impl AsRef<str> for Graphemes {
fn as_ref(&self) -> &str {
self.as_str()
}
}
impl AsRef<[u8]> for Graphemes {
fn as_ref(&self) -> &[u8] {
self.as_bytes()
}
}
impl AsRef<Graphemes> for Graphemes {
fn as_ref(&self) -> &Graphemes {
self
}
}
impl Borrow<str> for Graphemes {
fn borrow(&self) -> &str {
self.as_str()
}
}
impl Borrow<[u8]> for Graphemes {
fn borrow(&self) -> &[u8] {
self.as_bytes()
}
}
impl<'src> From<&'src str> for &'src Graphemes {
fn from(value: &'src str) -> Self {
Graphemes::new(value)
}
}
impl<'src> From<&'src Graphemes> for &'src str {
fn from(value: &'src Graphemes) -> Self {
value.as_str()
}
}
impl<'src> From<&'src Graphemes> for Box<Graphemes> {
fn from(value: &'src Graphemes) -> Self {
value.as_str().into()
}
}
impl<'src> From<&'src str> for Box<Graphemes> {
fn from(value: &'src str) -> Self {
Box::<str>::from(value).into()
}
}
impl From<Box<str>> for Box<Graphemes> {
fn from(value: Box<str>) -> Self {
unsafe { Box::from_raw(Box::into_raw(value) as *mut Graphemes) }
}
}
impl From<Box<Graphemes>> for Box<str> {
fn from(value: Box<Graphemes>) -> Self {
unsafe { Box::from_raw(Box::into_raw(value) as *mut str) }
}
}
impl From<Box<Graphemes>> for Box<[u8]> {
fn from(value: Box<Graphemes>) -> Self {
Box::<str>::from(value).into()
}
}
impl<'src> IntoIterator for &'src Graphemes {
type Item = &'src Grapheme;
type IntoIter = GraphemesIter<'src>;
fn into_iter(self) -> Self::IntoIter {
GraphemesIter::new(self)
}
}
impl Sealed for &'_ Graphemes {}
impl<'src> StrInput<'src> for &'src Graphemes {
#[doc(hidden)]
fn stringify(slice: Self::Slice) -> String {
slice.to_string()
}
}
impl<'src> Input<'src> for &'src Graphemes {
type Cursor = usize;
type Span = SimpleSpan<usize>;
type Token = &'src Grapheme;
type MaybeToken = &'src Grapheme;
type Cache = Self;
#[inline]
fn begin(self) -> (Self::Cursor, Self::Cache) {
(0, self)
}
#[inline]
fn cursor_location(cursor: &Self::Cursor) -> usize {
*cursor
}
#[inline(always)]
unsafe fn next_maybe(
this: &mut Self::Cache,
cursor: &mut Self::Cursor,
) -> Option<Self::MaybeToken> {
if *cursor < this.as_str().len() {
let c = this
.as_str()
.get_unchecked(*cursor..)
.graphemes(true)
.next()
.unwrap_unchecked();
*cursor += c.len();
Some(Grapheme::new(c))
} else {
None
}
}
#[inline(always)]
unsafe fn span(_this: &mut Self::Cache, range: Range<&Self::Cursor>) -> Self::Span {
(*range.start..*range.end).into()
}
}
impl<'src> ExactSizeInput<'src> for &'src Graphemes {
#[inline(always)]
unsafe fn span_from(this: &mut Self::Cache, range: RangeFrom<&Self::Cursor>) -> Self::Span {
(*range.start..this.as_str().len()).into()
}
}
impl<'src> ValueInput<'src> for &'src Graphemes {
#[inline(always)]
unsafe fn next(this: &mut Self::Cache, cursor: &mut Self::Cursor) -> Option<Self::Token> {
Self::next_maybe(this, cursor)
}
}
impl<'src> SliceInput<'src> for &'src Graphemes {
type Slice = Self;
#[inline(always)]
fn full_slice(this: &mut Self::Cache) -> Self::Slice {
*this
}
#[inline(always)]
unsafe fn slice(this: &mut Self::Cache, range: Range<&Self::Cursor>) -> Self::Slice {
Graphemes::new(&this.as_str()[*range.start..*range.end])
}
#[inline(always)]
unsafe fn slice_from(
this: &mut Self::Cache,
from: RangeFrom<&Self::Cursor>,
) -> Self::Slice {
Graphemes::new(&this.as_str()[*from.start..])
}
}
#[derive(Debug, Clone)]
pub struct GraphemesIter<'src> {
iter: unicode_segmentation::Graphemes<'src>,
}
impl<'src> GraphemesIter<'src> {
pub fn new(graphemes: &'src Graphemes) -> Self {
Self {
iter: graphemes.as_str().graphemes(true),
}
}
pub fn as_str(self) -> &'src str {
self.iter.as_str()
}
}
impl<'src> Iterator for GraphemesIter<'src> {
type Item = &'src Grapheme;
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
#[inline]
fn next(&mut self) -> Option<Self::Item> {
self.iter.next().map(Grapheme::new)
}
}
impl DoubleEndedIterator for GraphemesIter<'_> {
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
self.iter.next_back().map(Grapheme::new)
}
}
#[must_use]
pub fn ident<'src, I, E>() -> impl Parser<'src, I, <I as SliceInput<'src>>::Slice, E> + Copy
where
I: StrInput<'src>,
I::Token: Char + 'src,
E: ParserExtra<'src, I>,
E::Error: LabelError<'src, I, TextExpected<()>>,
{
any()
.filter(|c: &I::Token| c.is_ident_start())
.then(
any()
.filter(|c: &I::Token| c.is_ident_continue())
.repeated(),
)
.to_slice()
.labelled(TextExpected::AnyIdentifier)
.as_builtin()
}
#[track_caller]
pub fn keyword<'src, I, S, E>(
keyword: S,
) -> impl Parser<'src, I, <I as SliceInput<'src>>::Slice, E> + Clone + 'src
where
I: StrInput<'src>,
I::Slice: PartialEq,
I::Token: Char + fmt::Debug + 'src,
S: PartialEq<I::Slice> + Clone + 'src,
E: ParserExtra<'src, I> + 'src,
E::Error: LabelError<'src, I, TextExpected<()>> + LabelError<'src, I, TextExpected<S>>,
{
ident()
.try_map({
let keyword = keyword.clone();
move |s: I::Slice, span| {
if keyword == s {
Ok(())
} else {
Err(LabelError::expected_found(
[TextExpected::Identifier(keyword.clone())],
None,
span,
))
}
}
})
.to_slice()
.labelled(TextExpected::Identifier(keyword.clone()))
.as_builtin()
}
pub fn is_whitespace(c: char) -> bool {
c.is_whitespace()
&& !matches!(
c,
'\u{202A}'
| '\u{202B}'
| '\u{202C}'
| '\u{202D}'
| '\u{202E}'
| '\u{2066}'
| '\u{2067}'
| '\u{2068}'
| '\u{2069}'
)
}
}
#[cfg(test)]
mod tests {
use crate::prelude::*;
use std::fmt;
fn make_ascii_kw_parser<'src, I>(s: I::Slice) -> impl Parser<'src, I, ()>
where
I: crate::StrInput<'src>,
I::Slice: PartialEq,
I::Token: crate::Char + fmt::Debug + 'src,
{
text::ascii::keyword(s).ignored()
}
fn make_unicode_kw_parser<'src, I>(s: I::Slice) -> impl Parser<'src, I, ()>
where
I: crate::StrInput<'src>,
I::Slice: PartialEq,
I::Token: crate::Char + fmt::Debug + 'src,
{
text::unicode::keyword(s).ignored()
}
fn test_ok<'src, P: Parser<'src, &'src str, &'src str>>(parser: P, input: &'src str) {
assert_eq!(
parser.parse(input),
ParseResult {
output: Some(input),
errs: vec![]
}
);
}
fn test_err<'src, P: Parser<'src, &'src str, &'src str>>(parser: P, input: &'src str) {
assert_eq!(
parser.parse(input),
ParseResult {
output: None,
errs: vec![EmptyErr::default()]
}
);
}
#[test]
fn keyword_good() {
make_ascii_kw_parser::<&str>("hello");
make_ascii_kw_parser::<&str>("_42");
make_ascii_kw_parser::<&str>("_42");
make_unicode_kw_parser::<&str>("שלום");
make_unicode_kw_parser::<&str>("привет");
make_unicode_kw_parser::<&str>("你好");
}
#[test]
fn ident() {
let ident = text::ident::<&str, extra::Default>();
test_ok(ident, "foo");
test_ok(ident, "foo_bar");
test_ok(ident, "foo_");
test_ok(ident, "_foo");
test_ok(ident, "_");
test_ok(ident, "__");
test_ok(ident, "__init__");
test_err(ident, "");
test_err(ident, ".");
test_err(ident, "123");
}
#[test]
fn whitespace() {
use crate::{whitespace, LabelError, TextExpected};
let parser = whitespace::<&str, extra::Err<Rich<_>>>().exactly(1);
assert_eq!(
parser.parse("").into_output_errors(),
(
None,
vec![LabelError::<&str, _>::expected_found(
vec![TextExpected::<&str>::Whitespace],
None,
SimpleSpan::new((), 0..0)
)]
)
);
}
}