1use nom::character::{complete::space0, streaming::anychar};
2#[allow(unused_imports)]
3use nom::{
4 branch::alt,
5 bytes::complete::{escaped, tag, take_till, take_until, take_while, take_while_m_n},
6 character::{
7 complete::{alphanumeric1, char as cchar, multispace0, multispace1, none_of, one_of},
8 is_alphabetic, is_newline, is_space,
9 streaming::space1,
10 },
11 combinator::{fail, map, map_res, opt, value},
12 number::complete::be_u8,
13 sequence::{delimited, tuple},
14 IResult,
15};
16
17use crate::lexer::Literal;
18
19#[derive(Debug, PartialEq, Eq, Clone, Copy)]
20pub enum Token<'a> {
21 OpenTag,
22 EndTag,
23 CloseTag,
24 NewLine,
25 Eof,
26 Literal(Literal<'a>),
27}
28
29impl<'a> Token<'a> {
30 pub fn ident(&self) -> Option<&'a str> {
31 self.raw_string()
33 }
34
35 pub fn raw_string(&self) -> Option<&'a str> {
36 self.literal().map(|l| l.raw)
37 }
38
39 pub fn literal(&self) -> Option<Literal<'a>> {
40 match self {
41 Self::Literal(l) => Some(*l),
42 _ => None,
43 }
44 }
45}
46
47fn opentag(input: &[u8]) -> IResult<&[u8], Token> {
48 value(Token::OpenTag, tag(b"<"))(input)
49}
50
51fn endtag(input: &[u8]) -> IResult<&[u8], Token> {
52 value(Token::EndTag, tag(b">"))(input)
53}
54
55fn newline(input: &[u8]) -> IResult<&[u8], Token> {
56 value(Token::NewLine, tuple((opt(tag(b"\r")), tag(b"\n"))))(input)
57}
58
59fn closetag(input: &[u8]) -> IResult<&[u8], Token> {
60 value(Token::CloseTag, tag(b"</"))(input)
61}
62
63fn comment(input: &[u8]) -> IResult<&[u8], &[u8]> {
64 map(
65 tuple((
66 tag("#"),
67 take_till(is_newline),
68 opt(tag(b"\r")),
69 opt(tag(b"\n")),
70 )),
71 |x| x.1,
72 )(input)
73}
74
75fn literal(input: &[u8]) -> IResult<&[u8], Token> {
76 let (_, mut first) = be_u8(input)?;
77
78 let (input, raw) = match first {
79 b'"' => map_res(
80 map(
82 delimited(
83 cchar(first as _),
84 opt(escaped(none_of(r#"\""#), '\\', anychar)),
85 cchar(first as _),
86 ),
87 Option::unwrap_or_default,
88 ),
89 std::str::from_utf8,
90 )(input),
91 b'\'' => map_res(
92 map(
94 delimited(
95 cchar(first as _),
96 opt(escaped(none_of(r#"\'"#), '\\', anychar)),
97 cchar(first as _),
98 ),
99 Option::unwrap_or_default,
100 ),
101 std::str::from_utf8,
102 )(input),
103 _ => {
104 first = 0;
105 map_res(
106 escaped(none_of(" \t\r\n<>'\"\\"), '\\', anychar),
107 std::str::from_utf8,
108 )(input)
109 }
110 }?;
111 Ok((input, Token::Literal(Literal { raw, quote: first })))
112}
113
114pub fn tokenizer(input: &[u8]) -> IResult<&[u8], Token> {
115 inner_tokenizer::<false>(input)
116}
117
118fn space_and_comment<const NL: bool>(input: &[u8]) -> IResult<&[u8], Option<&[u8]>> {
119 let space = if NL { space0 } else { multispace0 };
120 map(tuple((space, opt(comment))), |x| x.1)(input)
121}
122
123pub fn inner_tokenizer<const NL: bool>(mut input: &[u8]) -> IResult<&[u8], Token> {
124 loop {
125 let (rest, cmt) = space_and_comment::<NL>(input)?;
126 input = rest;
127 if cmt.is_some() {
128 } else {
130 break;
131 }
132 }
133 if input.len() == 0 {
134 return Ok((input, Token::Eof));
135 }
136 alt((closetag, opentag, endtag, newline, literal))(input)
137}