udled_tokenizers/
ident.rs1use udled::{any, token::Spanned, Lex, Span, StringExt, Tokenizer};
2
3#[derive(Debug, Clone, Copy, Default)]
5pub struct Ident;
6
7impl Tokenizer for Ident {
8 type Token<'a> = Lex<'a>;
9
10 fn to_token<'a>(
11 &self,
12 reader: &mut udled::Reader<'_, 'a>,
13 ) -> Result<Self::Token<'a>, udled::Error> {
14 let start_idx = reader.position();
15
16 let mut end_idx = start_idx;
17
18 let Some(first) = reader.peek_ch() else {
19 return Err(reader.error("expected identifier"));
20 };
21
22 if !first.is_alphabetic() && first != "_" {
23 return Err(reader.error("expected identifier"));
24 }
25
26 loop {
27 let Some(ch) = reader.peek_ch() else {
28 break;
29 };
30
31 if ch == "\0" {
32 break;
33 }
34
35 if !ch.is_ascii_alphanumeric() && ch != "_" {
36 break;
37 }
38
39 end_idx += 1;
40
41 reader.eat_ch()?;
42 }
43
44 if start_idx == end_idx {
45 return Err(reader.error("expected identifier"));
46 }
47
48 let ret = &reader.source()[start_idx..reader.position()];
49
50 Ok(Lex::new(ret, Span::new(start_idx, reader.position())))
51 }
52
53 fn peek<'a>(&self, reader: &mut udled::Reader<'_, '_>) -> Result<bool, udled::Error> {
54 let ch = reader.eat_ch()?;
55 Ok(ch.is_alphabetic() || ch == "_")
56 }
57}
58
59pub struct XmlIdent;
61
62impl XmlIdent {}
63
64impl Tokenizer for XmlIdent {
65 type Token<'a> = Lex<'a>;
66
67 fn to_token<'a>(
68 &self,
69 reader: &mut udled::Reader<'_, 'a>,
70 ) -> Result<Self::Token<'a>, udled::Error> {
71 let start_tokenizer = any!(
72 ':',
73 'a'..='z',
74 'A'..='Z',
75 '\u{2070}'..='\u{218F}',
76 '\u{2C00}'..='\u{2FEF}',
77 '\u{3001}'..='\u{D7FF}',
78 '\u{F900}'..='\u{FDCF}',
79 '\u{FDF0}'..='\u{FFFD}'
80 );
81 let rest_tokenizer = any!(
82 '0'..='9',
83 '-',
84 ".",
85 '_',
86 '\u{00B7}',
87 '\u{0300}'..='\u{036F}',
88 '\u{203F}'..='\u{2040}'
89 );
90
91 let all = any!(&start_tokenizer, rest_tokenizer);
92
93 let start = reader.parse(Spanned(&start_tokenizer))?;
94 let mut end = start;
95
96 loop {
97 if reader.eof() {
98 break;
99 }
100
101 if !reader.peek(&all)? {
102 break;
103 }
104
105 end = reader.parse(Spanned(&all))?;
106 }
107
108 let span = start + end;
109
110 if let Some(content) = span.slice(reader.source()) {
111 Ok(Lex::new(content, span))
112 } else {
113 Err(reader.error("Invalid range"))
114 }
115 }
116
117 fn peek(&self, reader: &mut udled::Reader<'_, '_>) -> Result<bool, udled::Error> {
118 reader.peek(any!(
119 ':',
120 'a'..='z',
121 'A'..='Z',
122 '\u{2070}'..='\u{218F}',
123 '\u{2C00}'..='\u{2FEF}',
124 '\u{3001}'..='\u{D7FF}',
125 '\u{F900}'..='\u{FDCF}',
126 '\u{FDF0}'..='\u{FFFD}'
127 ))
128 }
129}
130
131#[cfg(test)]
132mod test {
133 use udled::{token::Ws, Input, Lex, Span};
134
135 use super::{Ident, XmlIdent};
136
137 #[test]
138 fn xml_ident() {
139 let mut input = Input::new("div custom-tag data-id2");
140
141 assert_eq!(
142 input.parse((XmlIdent, Ws, XmlIdent, Ws, XmlIdent)).unwrap(),
143 (
144 Lex::new("div", Span::new(0, 3)),
145 Span::new(3, 4),
146 Lex::new("custom-tag", Span::new(4, 14)),
147 Span::new(14, 15),
148 Lex::new("data-id2", Span::new(15, 23))
149 )
150 );
151 }
152
153 #[test]
154 fn ident() {
155 let mut input = Input::new("Ident other");
156 assert_eq!(
157 input.parse(Ident).unwrap(),
158 Lex {
159 value: "Ident",
160 span: Span { start: 0, end: 5 }
161 }
162 );
163 }
164}