use super::*;
use core::{convert::Infallible, ops::Not, slice::SliceIndex};
#[derive(Debug, Clone, Copy)]
pub enum ParserError {
EOF,
}
pub type AtWith<T> = (T, usize);
pub type AtStr<'a> = AtWith<&'a str>;
pub type AtToken<'a, Custom = Infallible> = AtWith<Token<'a, Custom>>;
pub type AtText<'a> = AtWith<Text<'a>>;
type CustomFn<'a, Custom, S> = fn(&'a S, usize) -> Option<AtToken<'a, Custom>>;
pub trait Parser {
fn parse_md(&self) -> Vec<Token<'_>> {
let mut tokens = Vec::new();
self.parse_md_with_buf(&mut tokens);
tokens
}
fn parse_md_with_buf<'a>(&'a self, buf: &mut Vec<Token<'a>>) {
let mut at = 0;
while let Some((token, nat)) = self.parse_token(at, |_, _| None) {
at = nat;
buf.push(token);
}
}
fn parse_md_custom<'a, Custom>(
&'a self,
custom: CustomFn<'a, Custom, Self>,
) -> Vec<Token<'a, Custom>> {
let mut tokens = Vec::new();
self.parse_md_with_buf_custom(&mut tokens, custom);
tokens
}
fn parse_md_with_buf_custom<'a, Custom>(
&'a self,
buf: &mut Vec<Token<'a, Custom>>,
custom: CustomFn<'a, Custom, Self>,
) {
let mut at = 0;
while let Some((token, nat)) = self.parse_token(at, custom) {
at = nat;
buf.push(token);
}
}
fn parse_token<'a, Custom>(
&'a self,
at: usize,
custom: CustomFn<'a, Custom, Self>,
) -> Option<AtToken<'a, Custom>> {
self.eof(at)
.not()
.then(|| {
let (_, after_ws) = self.consume_whitespace(at)?;
self.parse_line_break(after_ws)
.or_else(|| custom(self, after_ws))
.or_else(|| self.parse_header(after_ws))
.or_else(|| self.parse_list_item(after_ws))
.or_else(|| self.parse_texty(at))
})
.flatten()
}
#[inline(always)]
fn parse_texty<Custom>(&self, at: usize) -> Option<AtToken<'_, Custom>> {
self.parse_code(at)
.or_else(|| self.parse_inline_url(at))
.or_else(|| self.parse_text(at).map(|(t, at)| (t.into_token(), at)))
}
fn parse_code<Custom>(&self, at: usize) -> Option<AtToken<'_, Custom>> {
self.consume_while(at, is_backtick)
.ok()
.flatten()
.map(|(ticks, nat)| {
let len = ticks.len();
match len {
3 => self.parse_code_fence(nat),
1 => self.parse_inline_code(nat),
_ => None,
}
})
.flatten()
}
fn parse_inline_code<Custom>(&self, at: usize) -> Option<AtToken<'_, Custom>> {
self.consume_while(at, |c| is_backtick(c).not())
.ok()
.flatten()
.map(|(value, at)| (Text::code(value).into_token(), at + 1))
}
fn parse_list_item<Custom>(&self, at: usize) -> Option<AtToken<'_, Custom>> {
self.consume_char_if(at, |c| matches!(c, '-' | '+' | '*'))
.map(|nat| (None, nat))
.or_else(|| {
self.consume_while(at, |c| c.is_ascii_digit())
.ok()
.flatten()
.map(|(place, nat)| {
self.consume_char_if(nat, |c| c == '.')
.map(|nat| place.parse::<usize>().ok().map(|p| (Some(p), nat)))
.flatten()
})
.flatten()
})
.map(|(place, nat)| {
self.consume_whitespace(nat)
.map(|(s, nat)| s.is_empty().not().then(|| (Token::ListItem(place), nat)))
.flatten()
})
.flatten()
}
fn parse_code_fence<Custom>(&self, at: usize) -> Option<AtToken<'_, Custom>> {
self.consume_until_str(at, "```")
.ok()
.flatten()
.map(|(v, at)| {
let part_count = v.split('\n').count();
let (code, attrs) = (part_count >= 1)
.then(|| {
let mut split = v.split('\n');
let attrs_raw = split.next().unwrap();
let code = v.trim_start_matches(attrs_raw).trim_start_matches('\n');
(code, attrs_raw)
})
.unwrap_or_else(|| (v.trim_start_matches('\n'), ""));
(Token::CodeFence { code, attrs }, at + 3)
})
}
fn parse_header<Custom>(&self, at: usize) -> Option<AtToken<'_, Custom>> {
self.consume_while(at, |c| c == '#')
.ok()
.flatten()
.map(|(_, hnat)| {
self.consume_whitespace(hnat)
.map(|(w, nat)| {
w.is_empty()
.not()
.then(|| {
let h = hnat - at;
(h > 0 && h < 7).then(|| (Token::Header(h), nat))
})
.flatten()
})
.flatten()
})
.flatten()
}
fn parse_inline_url<Custom>(&self, at: usize) -> Option<AtToken<'_, Custom>> {
self.consume_char_if(at, |c| c == '<')
.map(|nat| {
self.consume_while(nat, |c| c != '>')
.ok()
.flatten()
.map(|(url, nat)| {
(
Token::Url {
name: None,
is_image: false,
url,
},
nat + 1,
)
})
})
.flatten()
}
fn parse_text(&self, at: usize) -> Option<AtText<'_>> {
self.consume_while(at, |c| c == '*')
.ok()
.flatten()
.map(|(stars, nat)| {
let count = stars.len();
(1..=count)
.rev()
.flat_map(|search| {
let check_italic = count == 2 && search == 1;
let offset = check_italic.not().then(|| count - search).unwrap_or(0);
self.consume_until_str(nat - offset, &stars[0..search])
.ok()
.flatten()
.map(|(s, nnat)| {
(
Text {
value: check_italic
.then(|| self.get_range_str(nat - 1..nnat))
.unwrap_or(s),
bold: search != 1,
italic: search != 2,
code: false,
},
nnat + search,
)
})
})
.next()
})
.flatten()
.or_else(|| {
self.consume_while(at, |c| matches!(c, '\n' | '<' | '`' | '*').not())
.map_or_else(try_handle_err, |v| v.map(|(s, nat)| (Text::naked(s), nat)))
})
}
fn parse_line_break<Custom>(&self, at: usize) -> Option<AtToken<'_, Custom>> {
self.consume_char_if(at, |c| c == '\n')
.map(|nat| (Token::LineBreak, nat))
}
fn consume_whitespace(&self, at: usize) -> Option<AtStr<'_>> {
self.consume_while(at, |c| c != '\n' && c.is_whitespace())
.unwrap_or_else(|(err, maybe_info)| match err {
ParserError::EOF => maybe_info,
})
.or(Some(("", at)))
}
#[inline(always)]
fn consume_char_if<F: Fn(char) -> bool>(&self, at: usize, f: F) -> Option<usize> {
self.consume_char(at)
.ok()
.map(|(c, nat)| f(c).then(|| nat))
.flatten()
}
#[inline(always)]
fn consume_while<F: Fn(char) -> bool>(
&self,
at: usize,
f: F,
) -> Result<Option<AtStr<'_>>, (ParserError, Option<AtStr<'_>>)> {
self.consume_until(at, |c, _, _| f(c).not())
}
fn consume_until<F: Fn(char, usize, usize) -> bool>(
&self,
mut at: usize,
f: F,
) -> Result<Option<AtStr<'_>>, (ParserError, Option<AtStr<'_>>)> {
let old_at = at;
loop {
let (ch, nat) = self.consume_char(at).map_err(|err| {
(err, {
let content = self.get_range_str(old_at..at);
content.is_empty().not().then(|| (content, at))
})
})?;
if f(ch, nat, at) {
let content = self.get_range_str(old_at..at);
return Ok(content.is_empty().not().then(|| (content, at)));
}
at = nat;
}
}
#[inline(always)]
fn consume_until_str(
&self,
at: usize,
s: &str,
) -> Result<Option<AtStr<'_>>, (ParserError, Option<AtStr<'_>>)> {
self.consume_until(at, |_, _, at| self.get_range_str(at..).starts_with(s))
}
#[inline(always)]
fn eof(&self, at: usize) -> bool {
self.next_char(at).is_err()
}
#[inline(always)]
fn consume_char(&self, at: usize) -> Result<(char, usize), ParserError> {
self.next_char(at).map(|c| (c, at + char_bytes(c)))
}
fn get_range_str<S: SliceIndex<str>>(&self, range: S) -> &S::Output;
fn next_char(&self, at: usize) -> Result<char, ParserError>;
}
impl<'a> Parser for &'a str {
#[inline(always)]
fn next_char(&self, at: usize) -> Result<char, ParserError> {
self.chars().nth(at).ok_or(ParserError::EOF)
}
#[inline(always)]
fn get_range_str<S: SliceIndex<str>>(&self, range: S) -> &S::Output {
&self[range]
}
}
impl Parser for String {
#[inline(always)]
fn next_char(&self, at: usize) -> Result<char, ParserError> {
self.chars().nth(at).ok_or(ParserError::EOF)
}
#[inline(always)]
fn get_range_str<S: SliceIndex<str>>(&self, range: S) -> &S::Output {
&self.as_str()[range]
}
}
#[inline(always)]
fn try_handle_err(err: (ParserError, Option<AtStr<'_>>)) -> Option<AtText<'_>> {
let (err, maybe_info) = err;
match err {
ParserError::EOF => maybe_info.map(|(s, at)| (Text::naked(s), at)),
}
}
#[inline(always)]
fn char_bytes(c: char) -> usize {
let mut temp = [0_u8; 4];
let temp = c.encode_utf8(&mut temp);
temp.len()
}
#[inline(always)]
const fn is_backtick(c: char) -> bool {
c == '`'
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Token<'a, Custom: 'a = Infallible> {
Text(Text<'a>),
Url {
name: Option<Text<'a>>,
url: &'a str,
is_image: bool,
},
Header(usize),
ListItem(Option<usize>),
CodeFence { code: &'a str, attrs: &'a str },
LineBreak,
Custom(Custom),
}
#[derive(Default, Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Text<'a> {
pub value: &'a str,
pub bold: bool,
pub italic: bool,
pub code: bool,
}
impl<'a> Text<'a> {
pub const fn code(value: &'a str) -> Self {
Self {
value,
code: true,
italic: false,
bold: false,
}
}
pub const fn naked(value: &'a str) -> Self {
Self {
value,
code: false,
italic: false,
bold: false,
}
}
pub const fn into_token<Custom>(self) -> Token<'a, Custom> {
Token::Text(self)
}
}