1use std::{fmt::Debug, ops::Range};
2
3use crate::{expected::ExpectedData, lang::CompiledLang, vec::RawVec};
4
5use super::{err::ParseError, lang::Lang};
6use ansi_term::Colour::{Blue, Green, Red};
7
8pub type Span = Range<usize>;
9
10#[derive(Debug, Clone, PartialEq, Eq)]
11pub struct Lexeme<L: Lang> {
12 pub span: Span,
13 pub kind: L::Token,
14 pub text: String,
15}
16
17#[repr(C)]
18#[derive(Debug, Clone, PartialEq, Eq, Copy)]
19pub struct LexemeData {
20 pub kind: usize,
21 pub start: usize,
22 pub end: usize,
23}
24
25impl Lexeme<CompiledLang> {
26 pub fn from_data(value: LexemeData, src: &str) -> Self {
27 Lexeme {
28 span: value.start..value.end,
29 kind: value.kind as u32,
30 text: src[value.start..value.end].to_string(),
31 }
32 }
33}
34
35#[derive(Debug)]
36pub struct Group<L: Lang> {
37 pub kind: L::Syntax,
38 pub children: Vec<Node<L>>,
39}
40
41#[derive(Debug)]
42pub enum Node<L: Lang> {
43 Group(Group<L>),
44 Lexeme(Lexeme<L>),
45 Err(ParseError<L>),
46}
47
48impl<L: Lang> Node<L> {
49 pub fn push_tok(&mut self, lexeme: Lexeme<L>) {
50 let Node::Group(Group { children, .. }) = self else {
51 panic!("Expected a group")
52 };
53 children.push(Node::Lexeme(lexeme))
54 }
55
56 fn debug_at(&self, offset: usize, errors: bool, tokens: bool, lang: &L) {
57 fn print_offset(n: usize) {
58 for _ in 0..n {
59 print!(" ");
60 }
61 }
62 match self {
63 Node::Group(Group { kind, children }) => {
64 print_offset(offset);
65 println!("{}", Green.paint(lang.syntax_name(kind)));
66 for child in children.iter() {
67 child.debug_at(offset + 1, errors, tokens, lang);
68 }
69 }
70 Node::Lexeme(lexeme) => {
71 if tokens {
72 print_offset(offset);
73 println!(
74 "{}: {:?}",
75 Blue.paint(lang.token_name(&lexeme.kind)),
76 lexeme.text
77 )
78 }
79 }
80 Node::Err(err_group) => {
81 if errors {
82 print_offset(offset);
83 err_group.debug_at(offset, lang)
84 }
85 }
86 }
87 }
88
89 pub fn as_group(&self) -> &Group<L> {
90 let Node::Group(group) = self else {
91 panic!("Expected a group");
92 };
93 group
94 }
95
96 pub fn debug_print(&self, errors: bool, tokens: bool, lang: &L) {
97 self.debug_at(0, errors, tokens, lang);
98 }
99
100 pub fn name(&self) -> L::Syntax {
101 match self {
102 Node::Group(Group { kind, .. }) => kind.clone(),
103 Node::Lexeme(_) => panic!("Lexeme has no name"),
104 Node::Err(_) => panic!("ErrGroup has no name"),
105 }
106 }
107
108 pub fn green_children(&self) -> impl Iterator<Item = &Group<L>> {
109 match self {
110 Node::Group(Group { children, .. }) => children.iter().filter_map(|it| match it {
111 Node::Group(group) => Some(group),
112 Node::Lexeme(_) => None,
113 Node::Err(_) => None,
114 }),
115 Node::Lexeme(_) => panic!("Lexeme has no children"),
116 Node::Err(_) => panic!("ErrGroup has no children"),
117 }
118 }
119
120 pub fn at_offset(&self, offset: usize) -> Option<&Node<L>> {
121 match self {
122 Node::Group(group) => group.children.iter().find_map(|it| it.at_offset(offset)),
123 Node::Lexeme(Lexeme { span, .. }) if span.start <= offset && offset <= span.end => {
124 Some(self)
125 }
126 Node::Err(err) if err.span().start <= offset && offset <= err.span().end => Some(self),
127 _ => None,
128 }
129 }
130}
131
132impl<L: Lang> Group<L> {
133 pub fn name(&self) -> L::Syntax {
134 self.kind.clone()
135 }
136
137 pub fn green_children(&self) -> impl Iterator<Item = &Group<L>> {
138 self.children.iter().filter_map(|it| match it {
139 Node::Group(group) => Some(group),
140 Node::Lexeme(_) => None,
141 Node::Err(_) => None,
142 })
143 }
144
145 pub fn green_node_by_name(&self, name: L::Syntax) -> Option<&Group<L>> {
146 self.green_children().find(|it| it.kind == name)
147 }
148
149 pub fn lexeme_by_kind(&self, name: L::Token) -> Option<&Lexeme<L>> {
150 self.children.iter().find_map(|it| {
151 if let Node::Lexeme(l) = it
152 && l.kind == name
153 {
154 Some(l)
155 } else {
156 None
157 }
158 })
159 }
160}
161
162impl<L: Lang> ParseError<L> {
163 fn debug_at(&self, offset: usize, lang: &L) {
164 match self {
166 ParseError::MissingError { expected, .. } => {
167 let expected = expected
168 .iter()
169 .map(|it| it.debug_name(lang))
170 .collect::<Vec<_>>()
171 .join(",");
172 println!("Missing: {expected}");
173 }
174 ParseError::Unexpected { actual, .. } => {
175 println!("Unexpected:");
176 for token in actual {
177 for _ in 0..offset {
178 print!(" ");
179 }
180 println!(
181 " {}: {:?}",
182 Red.paint(lang.token_name(&token.kind)),
183 token.text
184 )
185 }
186 }
187 }
188 }
189}
190
191impl<L: Lang> Node<L> {
192 pub fn lexemes(&self) -> LexemeIter<'_, L> {
194 LexemeIter { stack: vec![self] }
195 }
196
197 pub fn errors(&self) -> ErrorIter<'_, L> {
198 ErrorIter {
199 stack: vec![self],
200 offset: 0,
201 }
202 }
203
204 pub fn start_offset(&self) -> usize {
205 match self {
206 Node::Group(group) => group.start_offset(),
207 Node::Lexeme(lexeme) => lexeme.span.start,
208 Node::Err(parse_error) => parse_error.start(),
209 }
210 }
211
212 pub fn end_offset(&self) -> usize {
213 match self {
214 Node::Group(group) => group.end_offset(),
215 Node::Lexeme(lexeme) => lexeme.span.end,
216 Node::Err(parse_error) => parse_error
217 .actual()
218 .last()
219 .map(|it| it.span.end)
220 .unwrap_or(parse_error.start()),
221 }
222 }
223
224 pub fn span(&self) -> Span {
225 self.start_offset()..self.end_offset()
226 }
227
228 pub fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
229 match self {
230 Node::Group(group) => group.fmt(f),
231 Node::Lexeme(lexeme) => write!(f, "{}", &lexeme.text),
232 Node::Err(parse_error) => {
233 for lexeme in parse_error.actual() {
234 write!(f, "{}", &lexeme.text)?
235 }
236 Ok(())
237 }
238 }
239 }
240}
241
242impl<L: Lang> Group<L> {
243 pub fn errors(&self) -> ErrorIter<'_, L> {
244 let mut stack = vec![];
245 for child in self.children.iter().rev() {
246 stack.push(child);
247 }
248 ErrorIter { stack, offset: 0 }
249 }
250
251 pub fn start_offset(&self) -> usize {
252 if let Some(first) = self.children.first() {
253 first.start_offset()
254 } else {
255 0
256 }
257 }
258
259 pub fn end_offset(&self) -> usize {
260 if let Some(first) = self.children.last() {
261 first.end_offset()
262 } else {
263 0
264 }
265 }
266
267 pub fn span(&self) -> Span {
268 self.start_offset()..self.end_offset()
269 }
270
271 pub fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
272 for child in self.children.iter() {
273 child.fmt(f)?
274 }
275 Ok(())
276 }
277
278 pub fn lexemes(&self) -> impl Iterator<Item = &Lexeme<L>> {
279 self.children.iter().flat_map(|it| it.lexemes())
280 }
281}
282
283pub struct LexemeIter<'a, L: Lang> {
284 stack: Vec<&'a Node<L>>,
285}
286
287impl<'a, L: Lang> Iterator for LexemeIter<'a, L> {
288 type Item = &'a Lexeme<L>;
289
290 fn next(&mut self) -> Option<Self::Item> {
291 while let Some(node) = self.stack.pop() {
292 match node {
293 Node::Lexeme(l) => return Some(l),
294 Node::Group(g) => {
295 for child in g.children.iter().rev() {
297 self.stack.push(child);
298 }
299 }
300 Node::Err(_) => {
301 }
303 }
304 }
305 None
306 }
307}
308
309pub struct ErrorIter<'a, L: Lang> {
310 stack: Vec<&'a Node<L>>,
311 offset: usize,
312}
313
314impl<'a, L: Lang> Iterator for ErrorIter<'a, L> {
315 type Item = (usize, &'a ParseError<L>);
316
317 fn next(&mut self) -> Option<Self::Item> {
318 while let Some(node) = self.stack.pop() {
319 match node {
320 Node::Lexeme(l) => {
321 self.offset = l.span.end;
322 }
323 Node::Group(g) => {
324 for child in g.children.iter().rev() {
326 self.stack.push(child);
327 }
328 }
329 Node::Err(e) => return Some((self.offset, e)),
330 }
331 }
332 None
333 }
334}
335
336#[repr(C)]
337#[derive(Clone, Copy)]
338pub struct NodeData {
339 kind: u32,
340 group_kind: u32,
341 payload: NodeDataPayload,
342}
343
344#[derive(Clone, Copy)]
345#[repr(C)]
346pub union NodeDataPayload {
347 pub lexeme: LexemeData,
348 pub node_vec: RawVec<NodeData>,
349 pub lexeme_vec: RawVec<LexemeData>,
350 pub expected_vec: RawVec<ExpectedData>,
351}
352
353impl Node<CompiledLang> {
354 pub fn from_data(value: NodeData, src: &str, offset: &mut usize) -> Self {
355 match value.kind {
356 0 => {
357 let payload = unsafe { value.payload.lexeme };
358 *offset = payload.end;
359 Node::Lexeme(Lexeme {
360 span: payload.start..payload.end,
361 kind: payload.kind as u32,
362 text: src[payload.start..payload.end].to_string(),
363 })
364 }
365
366 1 => unsafe {
367 let children = Vec::from(value.payload.node_vec);
368 Node::Group(Group {
369 kind: value.group_kind,
370 children: children
371 .into_iter()
372 .map(|it| Node::from_data(it, src, offset))
373 .collect(),
374 })
375 },
376 2 => unsafe {
377 let tokens = Vec::from(value.payload.lexeme_vec)
378 .into_iter()
379 .map(|it| Lexeme::from_data(it, src))
380 .collect::<Vec<_>>();
381 if let Some(last) = tokens.last() {
382 *offset = last.span.end;
383 }
384
385 Node::Err(ParseError::Unexpected {
386 actual: tokens,
387 start: *offset,
388 })
389 },
390 3 => unsafe {
391 let expected = Vec::from(value.payload.expected_vec);
392 Node::Err(ParseError::MissingError {
393 start: *offset,
394 expected: expected.into_iter().map(|it| it.into()).collect(),
395 })
396 },
397 id => panic!("Unexpected node id '{id}'"),
398 }
399 }
400}