1use std::ops::Index;
2use std::rc::Rc;
3use std::str::Bytes;
4use std::{fmt::Debug, iter};
5
6type HashMap<K, V> = rustc_hash::FxHashMap<K, V>;
7
8use super::{ParseError, Result};
9
10#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
11pub struct Position {
12 pub absolute: usize,
14 pub line: usize,
16 pub line_beginning: usize,
18}
19
20impl Position {
21 pub fn new(absolute: usize, line: usize, line_beginning: usize) -> Self {
22 assert!(line <= absolute);
23 assert!(line_beginning <= absolute);
24 Self {
25 absolute,
26 line,
27 line_beginning,
28 }
29 }
30}
31
32#[derive(Clone, PartialEq, Eq, Hash)]
33pub struct Span {
34 pub start: Position,
35 pub end: Position,
36 pub file_name: Rc<str>,
37 pub file_content: Rc<str>,
38}
39
40impl Debug for Span {
41 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
42 f.debug_struct("Span")
43 .field("start", &self.start)
44 .field("end", &self.end)
45 .field("file_name", &self.file_name)
46 .field("file_content [len]", &self.file_content.len())
47 .finish()
48 }
49}
50
51impl Default for Span {
52 fn default() -> Self {
53 Self {
54 start: Position::default(),
55 end: Position::default(),
56 file_name: Rc::from(""),
57 file_content: Rc::from(""),
58 }
59 }
60}
61
62impl Span {
63 pub fn new(start: Position, end: Position, file_name: Rc<str>, file_content: Rc<str>) -> Span {
64 assert!(start.absolute <= end.absolute);
65 assert!(start.line <= end.line);
66 Span {
67 start,
68 end,
69 file_name,
70 file_content,
71 }
72 }
73
74 pub fn cover(&self, other: &Span) -> Span {
75 assert!(self.file_name == other.file_name);
76
77 let start: Position = if self.start() <= other.start() {
78 self.start
79 } else {
80 other.start
81 };
82
83 let end: Position = if self.end() >= other.end() {
84 self.end
85 } else {
86 other.end
87 };
88
89 Span::new(
90 start,
91 end,
92 self.file_name.clone(),
93 self.file_content.clone(),
94 )
95 }
96
97 pub fn start(&self) -> usize {
98 self.start.absolute
99 }
100
101 pub fn end(&self) -> usize {
102 self.end.absolute
103 }
104
105 pub fn file_name(&self) -> String {
106 self.file_name.clone().to_string()
107 }
108
109 pub fn file_content(&self) -> String {
110 self.file_content.clone().to_string()
111 }
112}
113
114impl Index<Span> for str {
115 type Output = str;
116 fn index(&self, span: Span) -> &Self::Output {
117 &self[span.start()..span.end()]
118 }
119}
120
121impl Index<Span> for String {
122 type Output = str;
123 fn index(&self, span: Span) -> &Self::Output {
124 &self[span.start()..span.end()]
125 }
126}
127
128#[derive(Debug, Clone, PartialEq, Eq, Hash)]
129pub struct Spanned<T> {
130 pub t: T,
131 pub span: Span,
132}
133
134impl<T> Spanned<T> {
135 pub fn new(t: T, span: Span) -> Spanned<T> {
136 Spanned { t, span }
137 }
138}
139
140#[derive(Clone, PartialEq, Eq, Hash)]
141pub enum SExpr {
144 Atom(Spanned<String>),
145 List(Spanned<Vec<SExpr>>),
146}
147
148impl SExpr {
149 pub fn atom<'a>(&'a self, vars: Option<&'a HashMap<String, SExpr>>) -> Option<&'a str> {
150 match self {
151 SExpr::Atom(a) => {
152 let s = a.t.as_str();
153 match (s.strip_prefix('$'), vars) {
154 (Some(varname), Some(vars)) => match vars.get(varname) {
155 Some(var) => {
156 #[cfg(feature = "lsp")]
157 super::LSP_VARIABLE_REFERENCES.with_borrow_mut(|refs| {
158 refs.push(varname, a.span.clone());
159 });
160 var.atom(Some(vars))
161 }
162 None => Some(s),
163 },
164 _ => Some(s),
165 }
166 }
167 _ => None,
168 }
169 }
170
171 pub fn list<'a>(&'a self, vars: Option<&'a HashMap<String, SExpr>>) -> Option<&'a [SExpr]> {
172 match self {
173 SExpr::List(l) => Some(&l.t),
174 SExpr::Atom(a) => match (a.t.strip_prefix('$'), vars) {
175 (Some(varname), Some(vars)) => match vars.get(varname) {
176 Some(var) => {
177 #[cfg(feature = "lsp")]
178 super::LSP_VARIABLE_REFERENCES.with_borrow_mut(|refs| {
179 refs.push(varname, a.span.clone());
180 });
181 var.list(Some(vars))
182 }
183 None => None,
184 },
185 _ => None,
186 },
187 }
188 }
189
190 pub fn span_list<'a>(
191 &'a self,
192 vars: Option<&'a HashMap<String, SExpr>>,
193 ) -> Option<&'a Spanned<Vec<SExpr>>> {
194 match self {
195 SExpr::List(l) => Some(l),
196 SExpr::Atom(a) => match (a.t.strip_prefix('$'), vars) {
197 (Some(varname), Some(vars)) => match vars.get(varname) {
198 Some(var) => {
199 #[cfg(feature = "lsp")]
200 super::LSP_VARIABLE_REFERENCES.with_borrow_mut(|refs| {
201 refs.push(varname, a.span.clone());
202 });
203 var.span_list(Some(vars))
204 }
205 None => None,
206 },
207 _ => None,
208 },
209 }
210 }
211
212 pub fn span(&self) -> Span {
213 match self {
214 SExpr::Atom(a) => a.span.clone(),
215 SExpr::List(l) => l.span.clone(),
216 }
217 }
218}
219
220impl std::fmt::Debug for SExpr {
221 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
222 match self {
223 SExpr::Atom(a) => write!(f, "{}", &a.t),
224 SExpr::List(l) => {
225 write!(f, "(")?;
226 for i in 0..l.t.len() - 1 {
227 write!(f, "{:?} ", &l.t[i])?;
228 }
229 if let Some(last) = &l.t.last() {
230 write!(f, "{last:?}")?;
231 }
232 write!(f, ")")?;
233 Ok(())
234 }
235 }
236 }
237}
238
239#[derive(Clone, PartialEq, Eq, Debug)]
240pub enum SExprMetaData {
242 LineComment(Spanned<String>),
243 BlockComment(Spanned<String>),
244 Whitespace(Spanned<String>),
245}
246
247impl SExprMetaData {
248 pub fn span(&self) -> Span {
249 match self {
250 Self::LineComment(x) => x.span.clone(),
251 Self::BlockComment(x) => x.span.clone(),
252 Self::Whitespace(x) => x.span.clone(),
253 }
254 }
255}
256
257#[derive(Debug)]
258enum Token {
259 Open,
260 Close,
261 StringTok,
262 BlockComment,
263 LineComment,
264 Whitespace,
265}
266
267#[derive(Clone)]
268struct PositionCountingBytesIterator<'a> {
270 bytes: Bytes<'a>,
271 source_length: usize,
272 line: usize,
273 line_beginning: usize,
274}
275
276impl<'a> PositionCountingBytesIterator<'a> {
277 fn new(s: &'a str) -> Self {
278 Self {
279 bytes: s.bytes(),
280 source_length: s.len(),
281 line: 0,
282 line_beginning: 0,
283 }
284 }
285
286 fn pos(&self) -> Position {
287 let absolute = self.source_length - self.bytes.len();
288 Position::new(absolute, self.line, self.line_beginning)
289 }
290}
291
292impl Iterator for PositionCountingBytesIterator<'_> {
293 type Item = u8;
294
295 fn next(&mut self) -> Option<Self::Item> {
296 self.bytes.next().inspect(|&b| {
297 if b == b'\n' {
298 self.line += 1;
299 self.line_beginning = self.source_length - self.bytes.len()
300 }
301 })
302 }
303}
304
305pub struct Lexer<'a> {
306 bytes: PositionCountingBytesIterator<'a>,
307 ignore_whitespace_and_comments: bool,
308}
309
310fn is_start(b: u8) -> bool {
311 matches!(b, b'(' | b')' | b'"') || b.is_ascii_whitespace()
312}
313
314type TokenRes = std::result::Result<Token, String>;
315
316impl<'a> Lexer<'a> {
317 #[allow(clippy::new_ret_no_self)]
318 fn new(
321 source: &'a str,
322 file_name: &'a str,
323 ignore_whitespace_and_comments: bool,
324 ) -> impl Iterator<Item = Spanned<TokenRes>> + 'a {
325 let _bytes = source.bytes().next();
326
327 let mut lexer = Lexer {
328 bytes: PositionCountingBytesIterator::new(source),
329 ignore_whitespace_and_comments,
330 };
331 let file_name: Rc<str> = Rc::from(file_name);
332 let file_content: Rc<str> = Rc::from(source);
333 iter::from_fn(move || {
334 lexer.next_token().map(|(start, t)| {
335 let end = lexer.bytes.pos();
336 Spanned::new(
337 t,
338 Span::new(start, end, file_name.clone(), file_content.clone()),
339 )
340 })
341 })
342 }
343
344 fn next_while(&mut self, f: impl Fn(u8) -> bool) {
345 for b in self.bytes.clone() {
346 if f(b) {
347 self.bytes.next().expect("iter lag");
349 } else {
350 break;
351 }
352 }
353 }
354
355 fn read_until_multiline_string_end(&mut self) -> TokenRes {
357 for b2 in self.bytes.clone().skip(1) {
358 let b1 = self.bytes.next().expect("iter lag");
361 if b1 == b'"' && b2 == b'#' {
362 self.bytes.next();
363 return Ok(Token::StringTok);
364 }
365 }
366 Err("Unterminated multiline string. Add \"# after the end of your string.".to_string())
367 }
368
369 fn read_until_multiline_comment_end(&mut self) -> TokenRes {
371 for b2 in self.bytes.clone().skip(1) {
372 let b1 = self.bytes.next().expect("iter lag");
375 if b1 == b'|' && b2 == b'#' {
376 self.bytes.next();
377 return Ok(Token::BlockComment);
378 }
379 }
380 Err("Unterminated multiline comment. Add |# after the end of your comment.".to_string())
381 }
382
383 fn next_token(&mut self) -> Option<(Position, TokenRes)> {
384 use Token::*;
385 loop {
386 let start = self.bytes.pos();
387 break match self.bytes.next() {
388 Some(b) => Some((
389 start,
390 Ok(match b {
391 b'(' => Open,
392 b')' => Close,
393 b'"' => {
394 self.next_while(|b| b != b'"' && b != b'\n');
395 match self.bytes.next() {
396 Some(b'"') => StringTok,
397 _ => return Some((start, Err("Unterminated string".to_string()))),
398 }
399 }
400 b';' => match self.bytes.clone().next() {
401 Some(b';') => {
402 self.next_while(|b| b != b'\n');
403 self.bytes.next();
405 if self.ignore_whitespace_and_comments {
406 continue;
407 }
408 Token::LineComment
409 }
410 _ => self.next_string(),
411 },
412 b'r' => {
413 match (self.bytes.clone().next(), self.bytes.clone().nth(1)) {
414 (Some(b'#'), Some(b'"')) => {
415 self.bytes.next();
417 self.bytes.next();
418 let tok: Token = match self.read_until_multiline_string_end() {
419 Ok(t) => t,
420 e @ Err(_) => return Some((start, e)),
421 };
422 tok
423 }
424 _ => self.next_string(),
425 }
426 }
427 b'#' => match self.bytes.clone().next() {
428 Some(b'|') => {
429 self.bytes.next();
431 let tok: Token = match self.read_until_multiline_comment_end() {
432 Ok(t) => t,
433 e @ Err(_) => return Some((start, e)),
434 };
435 if self.ignore_whitespace_and_comments {
436 continue;
437 }
438 tok
439 }
440 _ => self.next_string(),
441 },
442 b if b.is_ascii_whitespace() => {
443 let tok = self.next_whitespace();
444 if self.ignore_whitespace_and_comments {
445 continue;
446 }
447 tok
448 }
449 _ => self.next_string(),
450 }),
451 )),
452 None => None,
453 };
454 }
455 }
456
457 fn next_string(&mut self) -> Token {
458 self.next_while(|b| !is_start(b));
460 Token::StringTok
461 }
462
463 fn next_whitespace(&mut self) -> Token {
464 self.next_while(|b| b.is_ascii_whitespace());
465 Token::Whitespace
466 }
467}
468
469pub type TopLevel = Spanned<Vec<SExpr>>;
470
471pub fn parse(cfg: &str, file_name: &str) -> std::result::Result<Vec<TopLevel>, ParseError> {
472 let ignore_whitespace_and_comments = true;
473 parse_(cfg, file_name, ignore_whitespace_and_comments).map(|(x, _)| x)
474}
475
476pub fn parse_(
477 cfg: &str,
478 file_name: &str,
479 ignore_whitespace_and_comments: bool,
480) -> Result<(Vec<TopLevel>, Vec<SExprMetaData>)> {
481 let cfg = strip_utf8_bom(cfg);
482 parse_with(
483 cfg,
484 Lexer::new(cfg, file_name, ignore_whitespace_and_comments),
485 )
486 .map_err(|e| {
487 if e.msg.contains("Unterminated multiline comment") {
488 if let Some(mut span) = e.span {
489 span.end = span.start;
490 span.end.absolute += 2;
491 ParseError::new(span, e.msg)
492 } else {
493 e
494 }
495 } else {
496 e
497 }
498 })
499}
500
501fn strip_utf8_bom(s: &str) -> &str {
502 match s.as_bytes().strip_prefix(&[0xef, 0xbb, 0xbf]) {
503 Some(stripped) => std::str::from_utf8(stripped).expect("valid input"),
504 None => s,
505 }
506}
507
508fn parse_with(
509 s: &str,
510 mut tokens: impl Iterator<Item = Spanned<TokenRes>>,
511) -> Result<(Vec<TopLevel>, Vec<SExprMetaData>)> {
512 use Token::*;
513 let mut stack = vec![Spanned::new(vec![], Span::default())];
514 let mut metadata: Vec<SExprMetaData> = vec![];
515 loop {
516 match tokens.next() {
517 None => break,
518 Some(Spanned { t, span }) => match t.map_err(|s| ParseError::new(span.clone(), s))? {
519 Open => stack.push(Spanned::new(vec![], span)),
520 Close => {
521 let Spanned {
522 t: exprs,
523 span: stack_span,
524 } = stack.pop().expect("placeholder unpopped");
527 if stack.is_empty() {
528 return Err(ParseError::new(span, "Unexpected closing parenthesis"));
529 }
530 let expr = SExpr::List(Spanned::new(exprs, stack_span.cover(&span)));
531 stack.last_mut().expect("not empty").t.push(expr);
532 }
533 StringTok => stack
534 .last_mut()
535 .expect("not empty")
536 .t
537 .push(SExpr::Atom(Spanned::new(s[span.clone()].to_string(), span))),
538 BlockComment => metadata.push(SExprMetaData::BlockComment(Spanned::new(
539 s[span.clone()].to_string(),
540 span,
541 ))),
542 LineComment => metadata.push(SExprMetaData::LineComment(Spanned::new(
543 s[span.clone()].to_string(),
544 span,
545 ))),
546 Whitespace => metadata.push(SExprMetaData::Whitespace(Spanned::new(
547 s[span.clone()].to_string(),
548 span,
549 ))),
550 },
551 }
552 }
553 let Spanned { t: exprs, span: sp } = stack.pop().expect("placeholder unpopped");
556 if !stack.is_empty() {
557 return Err(ParseError::new(sp, "Unclosed opening parenthesis"));
558 }
559 let exprs = exprs
560 .into_iter()
561 .map(|expr| match expr {
562 SExpr::List(es) => Ok(es),
563 SExpr::Atom(s) => Err(ParseError::new(s.span, "Everything must be in a list")),
564 })
565 .collect::<Result<_>>()?;
566 Ok((exprs, metadata))
567}
568
569use miette::{Diagnostic, SourceSpan};
570use thiserror::Error;
571
572#[derive(Error, Debug, Diagnostic)]
573#[error("Error in configuration syntax")]
574#[diagnostic()]
575pub struct LexError {
576 #[label("Here")]
578 pub err_span: SourceSpan,
579 #[help]
580 pub help_msg: String,
581}