1#![allow(dead_code)]
2#![cfg_attr(not(feature = "std"), no_std)]
3
4use crate::termcolors::*;
5
6use core::cmp::{min,max};
7use core::fmt::Write;
8
9#[cfg(not(feature = "std"))]
10pub mod mystd {
11 extern crate alloc;
12 pub use alloc::format;
13 pub use alloc::vec;
14 pub use alloc::vec::Vec;
15 pub use alloc::string::String;
16 pub use alloc::boxed::Box;
17 pub use alloc::string::ToString;
18}
19
20#[cfg(not(feature = "std"))]
21use mystd::*;
22
23pub struct ParserState<'b,T,E,Context=()>
24where
25T: PartialEq + core::fmt::Debug,
26{
27 token: Option<T>,
28 token_count: usize, token_pos: usize, #[allow(clippy::type_complexity)]
31 tokenize: fn(reader: &mut &'b str, context: &'_ mut Context) -> Result<(T,usize), (E,usize,usize)>, pub reader: &'b str, generate_backtrack_token_count: usize, pub context: Context,
35 depth_remaining: usize, }
37impl<'b,T,E,Context> core::fmt::Debug for ParserState<'b,T,E,Context>
38where
39T: core::fmt::Debug + PartialEq,
40{
41 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
42 write!(f, "[{}={:?}@{:?} err_on={}]", self.token_count, self.token, self.token_pos, self.generate_backtrack_token_count)
43 }
44}
45
46type TokenPos = (usize,usize); #[derive(Debug)]
49pub enum ParserStatus<E> {
50 LookaheadBacktrack(),
51 DepthLimitReached(),
52 Error(E,TokenPos),
53}
54
55pub type ParserResult<T,E> = Result<T, ParserStatus<E>>;
56
57#[derive(Copy,Clone)]
58pub struct TokenInfo {
59 token_count: usize,
60 token_start_until_end_of_input: usize,
61 token_end_until_end_of_input: usize,
62}
63
64impl TokenInfo {
65 pub fn get_start(&self, length_of_whole_input: usize) -> usize {
66 length_of_whole_input - self.token_start_until_end_of_input
67 }
68 pub fn get_end(&self, length_of_whole_input: usize) -> usize {
69 length_of_whole_input - self.token_end_until_end_of_input
70 }
71 pub fn start_to_end_of_input(&self) -> usize {
72 self.token_start_until_end_of_input
73 }
74 pub fn end_to_end_of_input(&self) -> usize {
75 self.token_end_until_end_of_input
76 }
77 pub fn get_length(&self) -> usize {
78 self.token_start_until_end_of_input - self.token_end_until_end_of_input
79 }
80 #[must_use]
82 pub fn bound(&self, start: usize, end: usize) -> TokenInfo {
83 TokenInfo {
84 token_start_until_end_of_input: self.token_end_until_end_of_input + start,
85 token_end_until_end_of_input: self.token_end_until_end_of_input + end,
86 .. *self
87 }
88 }
89}
90
91type ParserPath<'b,T,E,S,Context> = fn (reader: &mut ParserState<'b,T,E,Context>) -> ParserResult<S,E>;
92type ParserPathWithState<'b,T,E,S,Context,State> = fn (parser: &mut ParserState<'b,T,E,Context>, state: &mut State) -> ParserResult<S,E>;
93
94impl<'b,T,E,Context> ParserState<'b,T,E,Context>
95where
96T: PartialEq + Copy + Clone + core::fmt::Debug,
97E: Copy + Clone + core::fmt::Debug,
98{
99 pub fn repeat<P>(&mut self, mut path: P) -> ParserResult<(),E>
100 where P: FnMut(&mut Self) -> ParserResult<bool,E>,
101 {
102 if self.depth_remaining == 0 {
103 return Err(ParserStatus::DepthLimitReached());
104 }
105 self.depth_remaining -= 1;
106 let getc = self.generate_backtrack_token_count;
107 loop {
108 self.generate_backtrack_token_count = self.token_count;
109 match path(self) {
110 Ok(false) => {
111 break;
112 },
113 Ok(_) => {
114 if self.token_count <= self.generate_backtrack_token_count {
116 break;
117 }
118 },
119 Err(ParserStatus::LookaheadBacktrack()) => {
120 break;
121 }
122 Err(e) => {
123 self.depth_remaining += 1;
124 return Err(e);
125 }
126 }
127 }
128 self.depth_remaining += 1;
129 self.generate_backtrack_token_count = getc;
130 Ok(())
131 }
132
133 pub fn opt<S, P>(&mut self, path: P) -> ParserResult<Option<S>,E>
134 where P: FnOnce(&mut Self) -> ParserResult<S,E>,
135 {
136 let getc = self.generate_backtrack_token_count;
137 self.generate_backtrack_token_count = self.token_count;
138 match path(self) {
139 Ok(v) => {
140 Ok(Some(v))
141 },
142 Err(ParserStatus::LookaheadBacktrack()) => {
143 self.generate_backtrack_token_count = getc;
144 Ok(None)
145 },
146 Err(e) => {
147 Err(e)
148 },
149 }
150 }
151
152 pub fn choose<S>(&mut self, paths: &[ParserPath<'b,T,E,S,Context>], err: fn () -> E) -> ParserResult<S,E> {
153 debug_assert!(!paths.is_empty());
154 if self.depth_remaining == 0 {
155 return Err(ParserStatus::DepthLimitReached());
156 }
157 self.depth_remaining -= 1;
158 let getc = self.generate_backtrack_token_count;
159 self.generate_backtrack_token_count = self.token_count;
160 for p in paths {
161 match p(self) {
162 Err(ParserStatus::LookaheadBacktrack()) => {
163 },
164 Ok(_) if self.token_count <= self.generate_backtrack_token_count => {
166 },
168 v => {
169 self.depth_remaining += 1;
170 return v;
171 },
172 }
173 }
174 self.depth_remaining += 1;
175 self.generate_backtrack_token_count = getc;
176 if self.generate_backtrack_token_count == self.token_count {
177 return Err(ParserStatus::LookaheadBacktrack());
178 }
179 Err(ParserStatus::Error(err(), (self.token_pos, self.reader.len())))
180 }
181
182 pub fn choose_with_state<S,State>(&mut self, paths: &[ParserPathWithState<'b,T,E,S,Context,State>], state: &mut State, err: fn () -> E) -> ParserResult<S,E> {
183 debug_assert!(!paths.is_empty());
184 if self.depth_remaining == 0 {
185 return Err(ParserStatus::DepthLimitReached());
186 }
187 self.depth_remaining -= 1;
188 let getc = self.generate_backtrack_token_count;
189 self.generate_backtrack_token_count = self.token_count;
190 for p in paths {
191 match p(self, state) {
192 Err(ParserStatus::LookaheadBacktrack()) => {
193 },
194 Ok(_) if self.token_count <= self.generate_backtrack_token_count => {
196 },
198 v => {
199 self.depth_remaining += 1;
200 return v;
201 },
202 }
203 }
204 self.depth_remaining += 1;
205 self.generate_backtrack_token_count = getc;
206 if self.generate_backtrack_token_count == self.token_count {
207 return Err(ParserStatus::LookaheadBacktrack());
208 }
209 Err(ParserStatus::Error(err(), (self.token_pos, self.reader.len())))
210 }
211
212 pub fn call<S>(&mut self, path: ParserPath<'b,T,E,S,Context>) -> ParserResult<S,E> {
214 if self.depth_remaining == 0 {
215 return Err(ParserStatus::DepthLimitReached());
216 }
217 self.depth_remaining -= 1;
218 let result = path(self);
219 self.depth_remaining += 1;
220 result
221 }
222
223 fn _peek(&mut self) -> Result<T,ParserStatus<E>> {
224 if let Some(x) = &self.token {
225 return Ok(*x);
226 }
227 let (t,token_until_end) = (self.tokenize)(&mut self.reader, &mut self.context).map_err(|(x,error_start,error_end)| {
228 self.token_pos = error_start;
229 debug_assert!(self.token_pos >= self.reader.len());
230 ParserStatus::Error(x, (error_start, error_end))
231 })?;
232 self.token = Some(t);
234 self.token_pos = token_until_end;
235 debug_assert!(self.token_pos >= self.reader.len());
236 Ok(t)
237 }
238
239 fn _token_info(&self) -> TokenInfo {
240 TokenInfo {
241 token_count: self.token_count,
242 token_start_until_end_of_input: self.token_pos,
243 token_end_until_end_of_input: self.reader.len(),
244 }
245 }
246
247 fn _next(&mut self) -> TokenInfo {
248 let info = self._token_info();
249 self.token = None;
250 self.token_count += 1;
251 info
252 }
253
254 pub fn accept<F>(&mut self, expected: T, info: Option<&TokenInfo>, err: F) -> ParserResult<TokenInfo,E>
256 where F: Fn () -> E,
257 {
258 match self._peek()? {
259 t if expected == t => {
260 Ok(self._next())
261 },
262 _ if self.generate_backtrack_token_count == self.token_count => {
263 Err(ParserStatus::LookaheadBacktrack())
264 },
265 _ => {
266 let (start, end) = if let Some(TokenInfo{token_count: _, token_start_until_end_of_input, token_end_until_end_of_input}) = info { (*token_start_until_end_of_input, *token_end_until_end_of_input) } else { (self.token_pos, self.reader.len()) };
268 Err(ParserStatus::Error(err(), (start, end)))
269 }
270 }
271 }
272
273 pub fn get(&mut self) -> ParserResult<(T,TokenInfo),E> {
274 let retval = self._peek()?;
275 Ok((retval, self._next()))
276 }
277
278 pub fn undo_get(&mut self, token: T, info: TokenInfo) {
279 debug_assert!(self.token.is_none());
280 self.token = Some(token);
281 self.token_count = info.token_count;
282 }
283
284 pub fn error_token<F>(&mut self, token: T, info: TokenInfo, err: F) -> ParserStatus<E>
287 where F: FnOnce(&mut Self) -> E,
288 {
289 if self.generate_backtrack_token_count == info.token_count {
290 self.token = Some(token);
291 self.token_count = info.token_count;
292 ParserStatus::LookaheadBacktrack()
293 } else {
294 ParserStatus::Error(err(self), (info.token_start_until_end_of_input, info.token_end_until_end_of_input))
295 }
296 }
297
298 pub fn error_other(&mut self, info: &TokenInfo, err: E) -> ParserStatus<E> {
300 ParserStatus::Error(err, (info.token_start_until_end_of_input, info.token_end_until_end_of_input))
301 }
302
303 pub fn token_info(&mut self) -> ParserResult<TokenInfo,E> {
304 self._peek()?;
305 Ok(self._token_info())
306 }
307
308 pub fn context(&mut self) -> &mut Context {
309 &mut self.context
310 }
311
312 pub fn consume(self) -> Context {
313 self.context
314 }
315
316 #[allow(clippy::type_complexity)]
317 pub fn new_with(reader: &'b str, tokenize: fn(reader: &mut &'b str, context: &'_ mut Context) -> Result<(T,usize), (E,usize,usize)>, context: Context) -> Self {
318 const MAX_DEPTH: usize = 128;
319 Self {
320 token: None,
321 token_count: 0,
322 token_pos: 0,
323 reader,
324 tokenize,
325 generate_backtrack_token_count: usize::MAX,
326 context,
327 depth_remaining: MAX_DEPTH,
328 }
329 }
330
331 #[allow(clippy::type_complexity)]
332 pub fn parse<R>(&mut self, f: fn(&mut Self) -> ParserResult<R,E>, unexpected_token: E, depth_limit_reached: E) -> Result<R,(E,usize,usize,Option<(usize,usize)>)> {
333 f(self).map_err(|x| {
334 match x {
336 ParserStatus::LookaheadBacktrack() => {
337 (unexpected_token, self.token_pos, self.reader.len(), None)
338 },
339 ParserStatus::Error(err, token_info) => {
340 let extra = if self.token_pos != token_info.0 {
341 Some((self.token_pos, self.reader.len()))
342 } else {
343 None
344 };
345 (err, token_info.0, token_info.1, extra)
346 }
347 ParserStatus::DepthLimitReached() => {
348 (depth_limit_reached, self.token_pos, self.reader.len(), None)
349 },
350 }
351 })
352 }
353}
354
355pub trait Spanner {
356 fn next(&mut self, v: char) -> bool;
357 fn valid(&mut self, _len: usize) -> bool {
358 true
359 }
360 fn span<'b>(&mut self, reader: &mut &'b str) -> Option<&'b str> {
361 let index = reader.char_indices().find(|(_pos, c)| !self.next(*c)).map(|(pos, _c)| pos).unwrap_or(reader.len());
362 if !self.valid(index) {
363 return None;
364 }
365 let value = &reader[0..index];
366 *reader = &reader[index..];
367 Some(value)
368 }
369}
370
371pub struct StringLiteralSpanner {
372 first: bool,
373 end: bool,
374 prev: char,
375 pub unescape_needed: bool,
376 delim: char,
377}
378
379impl Spanner for StringLiteralSpanner {
380 fn next(&mut self, b: char) -> bool {
381 if self.end {
382 return false;
383 }
384 if self.first {
385 self.first = false;
386 return b == self.delim;
387 }
388 if b == self.delim && self.prev != '\\' {
389 self.end = true;
390 return true;
391 }
392 if self.prev == '\\' {
393 self.prev = 0 as char;
394 self.unescape_needed = true;
395 } else {
396 self.prev = b;
397 }
398 true
399 }
400 fn valid(&mut self, _len: usize) -> bool {
401 self.end
402 }
403}
404impl StringLiteralSpanner {
405 pub fn new(delim: char) -> Self {
406 Self {
407 first: true,
408 end: false,
409 prev: 0 as char,
410 unescape_needed: false,
411 delim,
412 }
413 }
414}
415
416pub struct NumberSpanner {
417 first: bool,
418 prev: char,
419 pub float: bool,
420 has_digits: bool,
421}
422
423impl Spanner for NumberSpanner {
424 fn next(&mut self, b: char) -> bool {
425 if self.first && !(b.is_ascii_digit() || b == '-') {
426 return false;
427 }
428 if !self.first && !(b.is_ascii_digit() || b == '.' || b == 'e' || b == 'E' || ((self.prev == 'e' || self.prev == 'E') && (b == '+' || b == '-'))) {
429 return false;
430 }
431 self.first = false;
432 self.float = self.float || b == '.' || b == 'e' || b == 'E';
433 self.has_digits = self.has_digits || b.is_ascii_digit();
434 self.prev = b;
435 true
436 }
437 fn valid(&mut self, _len: usize) -> bool {
438 self.has_digits
439 }
440}
441
442impl NumberSpanner {
443 pub fn new() -> Self {
444 Self {
445 first: true,
446 prev: 0 as char,
447 float: false,
448 has_digits: false,
449 }
450 }
451}
452
453impl Default for NumberSpanner {
454 fn default() -> Self {
455 Self::new()
456 }
457}
458
459pub struct HairyTemplateTagContentSpanner<const INCLUDE_OUTER_TAGS: bool> {
463 prev_was_escape: bool,
464 string: bool,
465 nested: u32,
466 len: usize,
467}
468
469impl<const INCLUDE_OUTER_TAGS: bool> Spanner for HairyTemplateTagContentSpanner<INCLUDE_OUTER_TAGS> {
470 fn next(&mut self, b: char) -> bool {
471 if INCLUDE_OUTER_TAGS && self.nested == 0 && !self.string && self.len > 0 {
472 return false;
473 }
474 if !self.prev_was_escape && !self.string && b == '{' {
475 self.nested += 1;
476 }
477 if !self.prev_was_escape && !self.string && b == '}' {
478 if self.nested == 0 {
479 return false;
480 }
481 self.nested -= 1;
482 }
483 if !self.prev_was_escape && b == '"' {
484 self.string = !self.string;
485 }
486 self.prev_was_escape = !self.prev_was_escape && b == '\\';
487 self.len += 1;
488 true
489 }
490 fn valid(&mut self, _len: usize) -> bool {
491 self.nested == 0 && !self.string && !self.prev_was_escape
492 }
493}
494impl<const INCLUDE_OUTER_TAGS: bool> HairyTemplateTagContentSpanner<INCLUDE_OUTER_TAGS> {
495 pub fn new() -> Self {
496 Self {
497 prev_was_escape: false,
498 string: false,
499 nested: 0,
500 len: 0,
501 }
502 }
503}
504
505impl<const INCLUDE_OUTER_TAGS: bool> Default for HairyTemplateTagContentSpanner<INCLUDE_OUTER_TAGS> {
506 fn default() -> Self {
507 Self::new()
508 }
509}
510pub trait Acceptor<'a> {
511 fn accept(self, expected: &str) -> bool;
512 fn span_fn<M>(self, matcher: &mut M) -> Option<&'a str>
513 where
514 M: FnMut(char) -> bool;
515 fn span<P: Spanner>(self, spanner: &mut P) -> Option<&'a str>;
516}
517impl<'a> Acceptor<'a> for &mut &'a str {
518 fn accept(self, expected: &str) -> bool {
519 if let Some(remaining) = self.strip_prefix(expected) {
520 *self = remaining;
521 true
522 } else {
523 false
524 }
525 }
526
527 fn span_fn<M>(self, matcher: &mut M) -> Option<&'a str>
529 where
530 M: FnMut(char) -> bool,
531 {
532 let index = self.char_indices().find(|(_pos, c)| !matcher(*c)).map(|(pos, _c)| pos).unwrap_or(self.len());
533 if index == 0 {
534 return None;
535 }
536 let value = &self[0..index];
537 *self = &self[index..];
538 Some(value)
539 }
540 fn span<P: Spanner>(self, spanner: &mut P) -> Option<&'a str> {
541 spanner.span(self)
542 }
543}
544
545#[derive(Debug, Clone)]
546pub struct LineContext {
547 offsets: Vec<u32>,
548}
549
550impl LineContext {
551 pub fn empty() -> Self {
552 Self { offsets: Vec::new(), }
553 }
554 pub fn new(reader: &str) -> Self {
555 let mut offsets : Vec<u32> = Vec::new();
556 offsets.push(0);
557 for (i,c) in reader.char_indices() {
558 if c == '\n' {
559 offsets.push(i as u32 + 1);
560 }
561 }
562 offsets.push(reader.len() as u32 + 1);
563 Self {
564 offsets
565 }
566 }
567 pub fn format_error_context_short(&self, reader: &str, start: usize, end: usize) -> Result<String,core::fmt::Error> {
568 let (start, end) = (reader.len() - start, reader.len() - end);
570 let (_, _, line_start, line_end) = self.position_to_line_info(start as u32);
571 let line = &reader[line_start as usize..line_end as usize];
572 let (start_in_line, end_in_line) = (start - line_start as usize, max(start+1, min(end, line_end as usize)) - line_start as usize);
573
574 const UNDERLINE : &str = "↑"; let mut retval = String::new();
577
578 let mut highlight = false;
579 for (i,c) in line.char_indices() {
580 if i >= end_in_line {
581 if highlight {
582 write!(retval, "")?;
583 highlight = false;
584 }
585 } else if i >= start_in_line && !highlight {
586 write!(retval, "")?;
587 highlight = true;
588 }
589 write!(retval, "{}", c)?;
590 }
591 writeln!(retval)?;
592
593 let mut pos = 0;
594 for (i,c) in line.char_indices() {
595 pos = i+1;
596 if i >= end_in_line {
597 break;
598 } else if i >= start_in_line {
599 write!(retval, "{}", UNDERLINE)?;
600 } else if c == '\t' {
601 write!(retval, "\t")?;
602 } else {
603 write!(retval, " ")?;
604 }
605 }
606 for _ in pos..end_in_line {
607 write!(retval, "{}", UNDERLINE)?;
608 }
609 Ok(retval)
610 }
611 pub fn format_error_context_html(&self, reader: &str, start: usize, end: usize, extra_line_no: u32) -> Result<(u32, String),core::fmt::Error> {
612 let (start, end) = (reader.len() - start, reader.len() - end);
614 let (line_no, _, line_start, line_end) = self.position_to_line_info(start as u32);
615 let line = &reader[line_start as usize..line_end as usize];
616 let (start_in_line, end_in_line) = (start - line_start as usize, max(start+1, min(end, line_end as usize)) - line_start as usize);
617
618 let line_no_text = format!("{}", line_no+extra_line_no);
619 let mut retval = String::new();
620
621 write!(retval, "<code><span class=\"lineno\">{}</span>", line_no_text)?;
622 let mut highlight = false;
623 for (i,c) in line.char_indices() {
624 if i >= end_in_line {
625 if highlight {
626 write!(retval, "</span>")?;
627 highlight = false;
628 }
629 } else if i >= start_in_line && !highlight {
630 write!(retval, "<span class=\"highlight\">")?;
631 highlight = true;
632 }
633 write!(retval, "{}", c)?;
634 }
635 if highlight {
636 write!(retval, "</span>")?;
637 }
638 write!(retval, "</code>")?;
639 Ok((line_no+extra_line_no, retval))
640 }
641 pub fn format_error_context_console(&self, reader: &str, start: usize, end: usize, extra_line_no: u32) -> Result<(u32, String,String),core::fmt::Error> {
642 let (start, end) = (reader.len() - start, reader.len() - end);
644 let (line_no, _, line_start, line_end) = self.position_to_line_info(start as u32);
645 let line = &reader[line_start as usize..line_end as usize];
646 let (start_in_line, end_in_line) = (start - line_start as usize, max(start+1, min(end, line_end as usize)) - line_start as usize);
647
648 const DELIM : &str = " │ "; const DELIM_ALT : &str = " ┿ "; const DELIM_ALT2 : &str = " ├ "; const UNDERLINE : &str = "▔"; let line_no_text = format!("{:>3}", line_no+extra_line_no);
654 let prefix = format!("{}{TERM_BRIGHT_BLACK}{}{TERM_RESET}", [' '].iter().cycle().take(line_no_text.len()).collect::<String>(), DELIM);
655 let mut retval = String::new();
656
657 write!(retval, "{TERM_BRIGHT_BLACK}{}{}{TERM_RESET}", line_no_text, DELIM_ALT)?;
658 let mut highlight = false;
659 for (i,c) in line.char_indices() {
660 if i >= end_in_line {
661 if highlight {
662 write!(retval, "{TERM_RESET}{TERM_DIM_DEFAULT}")?;
663 highlight = false;
664 }
665 } else if i >= start_in_line && !highlight {
666 write!(retval, "{TERM_BRIGHT_YELLOW}")?;
667 highlight = true;
668 }
669 write!(retval, "{}", c)?;
670 }
671 writeln!(retval, "{TERM_RESET}")?;
672
673 write!(retval, "{}{TERM_BRIGHT_RED}", prefix)?;
674 let mut pos = 0;
675 for (i,c) in line.char_indices() {
676 pos = i+1;
677 if i >= end_in_line {
678 break;
679 } else if i >= start_in_line {
680 write!(retval, "{}", UNDERLINE)?;
681 } else if c == '\t' {
682 write!(retval, "\t")?;
683 } else {
684 write!(retval, " ")?;
685 }
686 }
687 for _ in pos..end_in_line {
688 write!(retval, "{}", UNDERLINE)?;
689 }
690 writeln!(retval, "{TERM_RESET}")?;
691 Ok((line_no+extra_line_no, prefix, retval))
692 }
693 pub fn remaining_to_line_info(&self, remaining: u32) -> (u32, u32, u32, u32) {
694 if let Some(last) = self.offsets.last() {
695 self.position_to_line_info(last - 1 - remaining)
696 } else {
697 (0, 0, 0, 0)
698 }
699 }
700 pub fn position_to_line_info(&self, pos: u32) -> (u32, u32, u32, u32) {
702 let i = self.offsets.partition_point(|x| *x <= pos);
703 if i > 0 && i < self.offsets.len() {
704 let start_of_this_line = self.offsets[i-1];
705 let start_of_next_line = self.offsets[i];
706 return (i as u32, pos - start_of_this_line, start_of_this_line, start_of_next_line-1) }
708 (0, 0, 0, 0) }
710
711}
712
713
714#[cfg(test)]
715mod tests {
716 use crate::*;
717 #[test]
718 fn number_spanner() {
719 let mut spanner = NumberSpanner::new();
720 let mut reader = "3.14";
721 assert!(spanner.span(&mut reader).is_some());
722 assert!(spanner.float);
723 }
724}