use core::{borrow::Borrow, marker::PhantomData};
use crate::{
container::OrderedSeq,
derive::parser,
error::{Error, Span},
input::{Input, InputType, StrInput},
parser::ParserExtras,
prelude::Parser,
primitive::*,
IResult,
};
mod private {
pub trait Sealed {}
}
pub trait Char: Sized + Copy + PartialEq + core::fmt::Debug + Sealed + 'static {
type Str: ?Sized + AsRef<[u8]> + AsRef<Self::Str> + 'static;
fn from_ascii(c: u8) -> Self;
fn is_inline_whitespace(&self) -> bool;
fn is_whitespace(&self) -> bool;
fn digit_zero() -> Self;
fn is_digit(&self, radix: u32) -> bool;
fn is_ident_start(&self) -> bool;
fn is_ident_continue(&self) -> bool;
fn to_char(&self) -> char;
type StrCharIter<'a>: Iterator<Item = Self>;
fn str_to_chars(s: &Self::Str) -> Self::StrCharIter<'_>;
}
impl Sealed for char {}
impl Char for char {
type Str = str;
fn from_ascii(c: u8) -> Self {
c as char
}
fn is_inline_whitespace(&self) -> bool {
*self == ' ' || *self == '\t'
}
fn is_whitespace(&self) -> bool {
char::is_whitespace(*self)
}
fn digit_zero() -> Self {
'0'
}
fn is_digit(&self, radix: u32) -> bool {
char::is_digit(*self, radix)
}
fn to_char(&self) -> char {
*self
}
type StrCharIter<'a> = core::str::Chars<'a>;
fn str_to_chars(s: &Self::Str) -> Self::StrCharIter<'_> {
s.chars()
}
fn is_ident_start(&self) -> bool {
unicode_ident::is_xid_start(*self)
}
fn is_ident_continue(&self) -> bool {
unicode_ident::is_xid_continue(*self)
}
}
impl Sealed for u8 {}
impl Char for u8 {
type Str = [u8];
fn from_ascii(c: u8) -> Self {
c
}
fn is_inline_whitespace(&self) -> bool {
*self == b' ' || *self == b'\t'
}
fn is_whitespace(&self) -> bool {
self.is_ascii_whitespace()
}
fn digit_zero() -> Self {
b'0'
}
fn is_digit(&self, radix: u32) -> bool {
(*self as char).is_digit(radix)
}
fn to_char(&self) -> char {
*self as char
}
type StrCharIter<'a> = core::iter::Copied<core::slice::Iter<'a, u8>>;
fn str_to_chars(s: &Self::Str) -> Self::StrCharIter<'_> {
s.iter().copied()
}
fn is_ident_start(&self) -> bool {
self.to_char().is_ident_start()
}
fn is_ident_continue(&self) -> bool {
self.to_char().is_ident_continue()
}
}
pub mod ascii {
use super::*;
pub fn ident<'a, I: InputType + StrInput<'a, C> + 'a, C: Char, E: ParserExtras<I> + 'a>(
inp: Input<'_, I, E>,
) -> IResult<'_, I, E, &'a C::Str> {
let before = inp.offset;
let (inp, cr) = any(inp)?;
let chr = cr.to_char();
let span = inp.span_since(before);
if !(chr.is_ascii_alphabetic() || chr == '_') {
return Err((
inp,
Error::expected_token_found(
Span::new_usize(span),
vec![],
crate::MaybeDeref::Val(cr),
),
));
}
any.filter(|c: &C| c.to_char().is_ascii_alphanumeric() || c.to_char() == '_')
.repeated()
.slice()
.parse(inp)
}
#[track_caller]
pub fn keyword<
'a,
C: Char + core::fmt::Debug + 'a,
I: InputType + StrInput<'a, C> + 'a,
E: ParserExtras<I> + 'a,
Str: AsRef<C::Str> + 'a + Clone,
>(
keyword: Str,
) -> impl Fn(Input<'_, I, E>) -> IResult<'_, I, E, &'a C::Str>
where
C::Str: PartialEq,
{
#[cfg(debug_assertions)]
{
let mut cs = C::str_to_chars(keyword.as_ref());
if let Some(c) = cs.next() {
assert!(c.to_char().is_ascii_alphabetic() || c.to_char() == '_', "The first character of a keyword must be ASCII alphabetic or an underscore, not {c:?}");
} else {
panic!("Keyword must have at least one character");
}
for c in cs {
assert!(c.to_char().is_ascii_alphanumeric() || c.to_char() == '_', "Trailing characters of a keyword must be ASCII alphanumeric or an underscore, not {c:?}");
}
}
move |input| {
let before = input.offset;
let (input, ident) = ident(input)?;
if ident != keyword.as_ref() {
let span = input.span_since(before);
let err = Error::expected_token_found(
Span::new_usize(span),
vec![],
crate::MaybeDeref::Val(unsafe {
input.input.next(before).1.unwrap_unchecked()
}),
);
return Err((input, err));
}
let slice = input.input.slice(input.span_since(before));
Ok((input, slice))
}
}
}
static NEWLINE_CHARACTERS_AFTER_CRLF: [char; 6] = [
'\r', '\x0B', '\x0C', '\u{0085}', '\u{2028}', '\u{2029}', ];
pub fn newline<I: InputType, E: ParserExtras<I>>() -> impl Parser<I, (), E>
where
I::Token: Char + PartialEq,
{
(cr.optional().ignore_then(lf)) .or(any.filter(|cr: &I::Token| {
NEWLINE_CHARACTERS_AFTER_CRLF.contains(&cr.to_char())
}))
.ignored()
}
#[parser(extras = E)]
pub fn lf<I: InputType, E: ParserExtras<I>>(input: I) -> I::Token
where
I::Token: Char + PartialEq,
{
just(Char::from_ascii(b'\n'))(input)
}
#[parser(extras = E)]
pub fn crlf<I: InputType, E: ParserExtras<I>>(input: I) -> [I::Token; 2]
where
I::Token: Char + PartialEq,
{
just([Char::from_ascii(b'\r'), Char::from_ascii(b'\n')])(input)
}
#[parser(extras = E)]
pub fn cr<I: InputType, E: ParserExtras<I>>(input: I) -> I::Token
where
I::Token: Char + PartialEq,
{
just(Char::from_ascii(b'\r'))(input)
}
pub fn just_ignore_case<
'a,
'parse,
I: InputType,
E: ParserExtras<I>,
T: OrderedSeq<'a, I::Token> + Clone,
>(
seq: T,
) -> impl Fn(Input<'parse, I, E>) -> IResult<'parse, I, E, T>
where
I::Token: Char + PartialEq + Clone + 'static,
{
move |mut input| {
if let Some(err) = seq.seq_iter().find_map(|next| {
let befunge = input.offset;
let next = T::to_maybe_ref(next);
match input.next_inner() {
(_, Some(token))
if next.borrow_as_t().to_char().eq_ignore_ascii_case(
&token.borrow().to_char(),
) =>
{
None
}
(_, found) => Some(Error::expected_token_found_or_eof(
Span::new_usize(input.span_since(befunge)),
vec![next.into_clone()],
found.map(crate::MaybeDeref::Val),
)),
}
}) {
Err((input, err))
} else {
Ok((input, seq.clone()))
}
}
}
pub use unicode::*;
use self::private::Sealed;
pub mod unicode {
use core::fmt::Display;
use super::*;
#[parser(extras = E)]
pub fn ident<'a, I: InputType + StrInput<'a, C> + 'a, C: Char, E: ParserExtras<I> + 'a>(
input: I,
) -> &'a C::Str {
let before = input.offset;
let (mut input, ()) = filter(|c: &C| c.is_ident_start()).check(input)?;
input.skip_while(|c: &C| c.is_ident_continue());
eprintln!(
"offset after parsing ident: {} vs before {before}",
input.offset
);
let slice = input.input.slice(input.span_since(before));
Ok((input, slice))
}
#[track_caller]
pub fn keyword<
'a,
I: InputType + StrInput<'a, C> + 'a,
C: Char,
Str: AsRef<C::Str> + Clone,
E: ParserExtras<I> + 'a,
>(
keyword: Str,
) -> impl Fn(Input<'_, I, E>) -> IResult<'_, I, E, &'a C::Str>
where
C::Str: PartialEq + Display,
{
#[cfg(debug_assertions)]
{
let mut cs = C::str_to_chars(keyword.as_ref());
if let Some(c) = cs.next() {
assert!(c.is_ident_start(), "The first character of a keyword must be a valid unicode XID_START, not {c:?}");
} else {
panic!("Keyword must have at least one character");
}
for c in cs {
assert!(c.is_ident_continue(), "Trailing characters of a keyword must be valid as unicode XID_CONTINUE, not {c:?}");
}
}
move |input| {
let befunge = input.offset;
let (input, s) = ident(input)?;
let span = input.span_since(befunge);
eprintln!("[keyword] {s} =? {}", keyword.as_ref());
if s == keyword.as_ref() {
Ok((input, s))
} else {
let err = Error::expected_token_found(
Span::new_usize(span.clone()),
vec![],
crate::MaybeDeref::Val(C::str_to_chars(s).next().unwrap()),
);
Err((input, err))
}
}
}
}
#[must_use]
pub fn digits<C, I, E>(radix: u32) -> Repeated<impl Parser<I, C, E>, C>
where
C: Char,
I: InputType<Token = C>,
E: ParserExtras<I>,
{
any.filter(move |c: &C| c.is_digit(radix))
.repeated()
.at_least(1)
}
pub fn int<'a, I: InputType + StrInput<'a, C>, C: Char, E: ParserExtras<I>>(
radix: u32,
) -> impl Fn(Input<'_, I, E>) -> IResult<'_, I, E, &'a C::Str> {
move |input| {
with_slice(input, move |input| {
let (input, cr) = any(input)?;
let befunge = input.offset;
if !(cr.is_digit(radix) && cr != C::digit_zero()) {
let err = Error::expected_token_found(
Span::new_usize(input.span_since(befunge)),
vec![],
crate::MaybeDeref::Val(cr),
);
return Err((input, err));
}
any.filter(move |cr: &C| cr.is_digit(radix))
.repeated()
.ignored()
.or(just(C::digit_zero()).ignored())
.check(input)
})
}
}
#[derive(Copy, Clone)]
pub struct Padded<A, C>(A, PhantomData<C>);
pub fn padded<
'a,
I: InputType + StrInput<'a, C>,
E: ParserExtras<I>,
C: Char,
O,
A: Parser<I, O, E>,
>(
parser: A,
) -> Padded<A, C> {
Padded(parser, PhantomData)
}
impl<
'a,
I: InputType + StrInput<'a, C>,
E: ParserExtras<I>,
C: Char,
O,
A: Parser<I, O, E>,
> Parser<I, O, E> for Padded<A, C>
{
fn check<'parse>(&self, mut input: Input<'parse, I, E>) -> IResult<'parse, I, E, ()> {
input.skip_while(Char::is_whitespace);
let (mut input, ()) = self.0.check(input)?;
input.skip_while(Char::is_whitespace);
Ok((input, ()))
}
fn parse<'parse>(&self, mut input: Input<'parse, I, E>) -> IResult<'parse, I, E, O> {
input.skip_while(Char::is_whitespace);
let (mut input, output) = self.0.parse(input)?;
input.skip_while(Char::is_whitespace);
Ok((input, output))
}
}
pub fn whitespace<'a, C: Char, I: InputType + StrInput<'a, C>, E: ParserExtras<I>>(
) -> impl Parser<I, (), E> {
any.filter(|c: &I::Token| c.is_whitespace())
.ignored()
.repeated()
.ignored()
}
pub fn inline_whitespace<'a, C: Char, I: InputType + StrInput<'a, C>, E: ParserExtras<I>>(
) -> Repeated<impl Parser<I, (), E>, (), ()> {
any.filter(|c: &I::Token| c.is_inline_whitespace())
.ignored()
.repeated_custom()
}