1use std::{borrow::Cow, collections::HashSet};
2
3use decursion::FutureExt;
4use kast_util::*;
5
6mod lexer;
7mod peek2;
8mod syntax;
9
10pub use lexer::{is_punctuation, lex, StringType, Token};
11pub use syntax::{Associativity, Priority, Syntax, SyntaxDefinition, SyntaxDefinitionPart};
12
13use lexer::*;
14use syntax::{BindingPower, Edge};
15
16#[derive(Debug, Clone, PartialEq, Eq, Hash)]
17pub enum Ast<Data = Span> {
18 Simple {
19 token: Token,
20 data: Data,
21 },
22 Complex {
23 definition: Parc<SyntaxDefinition>,
24 values: Tuple<Self>,
25 data: Data,
26 },
27 SyntaxDefinition {
28 def: Parc<SyntaxDefinition>,
29 data: Data,
30 },
31 FromScratch {
32 next: Option<Box<Self>>,
33 data: Data,
34 },
35}
36
37impl<Data> Ast<Data> {
38 pub fn data(&self) -> &Data {
39 match self {
40 Ast::Simple { data, .. }
41 | Ast::Complex { data, .. }
42 | Ast::SyntaxDefinition { data, .. }
43 | Ast::FromScratch { data, .. } => data,
44 }
45 }
46 pub fn data_mut(&mut self) -> &mut Data {
47 match self {
48 Ast::Simple { data, .. }
49 | Ast::Complex { data, .. }
50 | Ast::SyntaxDefinition { data, .. }
51 | Ast::FromScratch { data, .. } => data,
52 }
53 }
54 pub fn map_data<NewData>(self, f: impl Fn(Data) -> NewData + Copy) -> Ast<NewData> {
55 match self {
56 Ast::Simple { token, data } => Ast::Simple {
57 token,
58 data: f(data),
59 },
60 Ast::Complex {
61 definition,
62 values,
63 data,
64 } => Ast::Complex {
65 definition,
66 values: values.map(|ast| ast.map_data(f)),
67 data: f(data),
68 },
69 Ast::SyntaxDefinition { def, data } => Ast::SyntaxDefinition { def, data: f(data) },
70 Ast::FromScratch { next, data } => Ast::FromScratch {
71 next: next.map(|ast| Box::new(ast.map_data(f))),
72 data: f(data),
73 },
74 }
75 }
76 pub fn as_ident(&self) -> Option<&str> {
77 match self {
78 Ast::Simple {
79 token: Token::Ident { name, .. },
80 ..
81 } => Some(name),
82 _ => None,
83 }
84 }
85}
86
87pub trait HasSpan {
88 fn span(&self) -> &Span;
89}
90
91impl HasSpan for Span {
92 fn span(&self) -> &Span {
93 self
94 }
95}
96
97impl<Data: HasSpan> Ast<Data> {
98 pub fn show_short(&self) -> impl std::fmt::Display + '_ {
99 struct Show<'a, Data>(&'a Ast<Data>);
100 impl<Data: HasSpan> std::fmt::Display for Show<'_, Data> {
101 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
102 match &self.0 {
103 Ast::Simple { token, data: _ } => write!(f, "token {token}")?,
104 Ast::Complex {
105 definition,
106 values: _,
107 data: _,
108 } => write!(f, "{:?}", definition.name)?,
109 Ast::SyntaxDefinition { def: _, data: _ } => write!(f, "syntax definition")?,
110 Ast::FromScratch { next: _, data: _ } => write!(f, "syntax from scratch")?,
111 }
112 write!(f, " at {}", self.0.data().span())
113 }
114 }
115 Show(self)
116 }
117}
118
119impl<Data> std::fmt::Display for Ast<Data> {
120 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
121 match self {
122 Ast::Simple { token, data: _ } => write!(f, "{:?}", token.raw()),
123 Ast::Complex {
124 definition,
125 values,
126 data: _,
127 } => {
128 write!(f, "{}", values.fmt_with_name(&definition.name))
129 }
130 Ast::SyntaxDefinition { def, data: _ } => write!(f, "syntax {:?}", def.name),
131 Ast::FromScratch { next, data: _ } => {
132 write!(f, "syntax from scratch")?;
133 if let Some(next) = next {
134 write!(f, "{next}")?;
135 }
136 Ok(())
137 }
138 }
139 }
140}
141
142pub fn parse(syntax: &Syntax, source: SourceFile) -> Result<Option<Ast>, Error> {
143 let mut parser = Parser {
144 reader: lex(source)?,
145 };
146 let result = parser.read_all(syntax);
147 result.map_err(|msg| {
148 msg.at(match parser.reader.peek() {
149 Some(peek) => peek.span.clone(),
150 None => Span {
151 start: parser.reader.position(),
152 end: parser.reader.position(),
153 filename: parser.reader.filename().to_owned(),
154 },
155 })
156 })
157}
158
159pub fn read_syntax(source: SourceFile) -> Result<Syntax, Error> {
160 let ast = parse(&Syntax::empty(), source)?;
161 let mut syntax = Syntax::empty();
162 fn collect_syntax_definitions(syntax: &mut Syntax, ast: Ast) -> Result<(), Error> {
163 match ast {
164 Ast::Simple { .. } => {}
165 Ast::Complex {
166 definition: _,
167 values,
168 data: _,
169 } => {
170 for (_name, value) in values {
171 collect_syntax_definitions(syntax, value)?;
172 }
173 }
174 Ast::SyntaxDefinition { def, data: span } => {
175 syntax
176 .insert(def)
177 .map_err(|ErrorMessage(message)| Error { message, span })?;
178 }
179 Ast::FromScratch { next, data: _ } => {
180 if let Some(next) = next {
181 collect_syntax_definitions(syntax, *next)?;
182 }
183 }
184 }
185 Ok(())
186 }
187 if let Some(ast) = ast {
188 collect_syntax_definitions(&mut syntax, ast)?;
189 }
190 Ok(syntax)
191}
192
193struct Parser {
194 reader: peek2::Reader<SpannedToken>,
195}
196
197enum ParsedSyntax {
198 Definition(SyntaxDefinition),
199 FromScratch,
200}
201
202fn read_syntax_def(reader: &mut peek2::Reader<SpannedToken>) -> Result<(ParsedSyntax, Span)> {
203 let Span {
204 start,
205 end: _,
206 filename,
207 } = match reader.peek().unwrap() {
208 token if token.raw() == "syntax" => reader.next().unwrap().span,
209 token => return error!("expected a syntax definition, got {}", token.token),
210 };
211 let name_token = reader.next().expect("expected a name for the syntax");
212 if name_token.raw() == "from" {
213 let scratch_token = reader.next().expect("expected 'scratch'");
214 if scratch_token.raw() != "scratch" {
215 return error!("expected 'scratch'");
216 }
217 return Ok((
218 ParsedSyntax::FromScratch,
219 Span {
220 start,
221 end: scratch_token.span.end,
222 filename: filename.clone(),
223 },
224 ));
225 }
226 let name = match name_token.token {
227 Token::Ident { name, .. } => name,
228 _ => return error!("name for the syntax must be an identifier"),
229 };
230 let associativity = match reader.next().expect("expected a associativity").token {
231 Token::Punctuation { raw } if raw == "<-" => Associativity::Left,
232 Token::Punctuation { raw } if raw == "->" => Associativity::Right,
233 _ => return error!("expected associativity (<- or ->)"),
234 };
235 let priority = match reader.next().expect("expected a priority").token {
236 Token::Number { raw } | Token::String { contents: raw, .. } => {
237 Priority::new(match raw.parse() {
238 Ok(number) => number,
239 Err(e) => return error!("failed to parse priority: {e}"),
240 })
241 }
242 _ => return error!("syntax priority must be a number"),
243 };
244 if reader.next().map(|spanned| spanned.token.raw().to_owned()) != Some("=".to_owned()) {
245 return error!("expected a =");
246 }
247 let mut parts = Vec::new();
248 let mut end = None;
249 while let Some(token) = reader.peek() {
250 parts.push(match &token.token {
251 Token::Ident { name, .. } => {
252 if name == "_" {
253 SyntaxDefinitionPart::UnnamedBinding
254 } else {
255 SyntaxDefinitionPart::NamedBinding(name.clone())
256 }
257 }
258 Token::String { contents, .. } => SyntaxDefinitionPart::Keyword(contents.clone()),
259 _ => break,
260 });
261 end = Some(reader.next().unwrap().span.end);
262 }
263 Ok((
264 ParsedSyntax::Definition(SyntaxDefinition {
265 name,
266 priority,
267 associativity,
268 parts,
269 }),
270 Span {
271 start,
272 end: end.unwrap(),
273 filename: filename.clone(),
274 },
275 ))
276}
277
278enum ProgressPart {
282 Keyword(String, Span),
283 Value(Ast),
284}
285
286impl ProgressPart {
287 pub fn span(&self) -> &Span {
288 match self {
289 ProgressPart::Keyword(_, span) => span,
290 ProgressPart::Value(ast) => ast.data(),
291 }
292 }
293 pub fn into_keyword(self) -> Option<String> {
294 match self {
295 ProgressPart::Keyword(keyword, _) => Some(keyword),
296 ProgressPart::Value(_) => None,
297 }
298 }
299 pub fn into_value(self) -> Option<Ast> {
300 match self {
301 ProgressPart::Keyword(_, _) => None,
302 ProgressPart::Value(value) => Some(value),
303 }
304 }
305 fn format(progress: &[Self]) -> impl std::fmt::Display + '_ {
306 struct Format<'a>(&'a [ProgressPart]);
307 impl std::fmt::Display for Format<'_> {
308 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
309 if self.0.is_empty() {
310 write!(f, "<no progress>")?;
311 } else {
312 write!(f, "\"")?;
313 for (index, part) in self.0.iter().enumerate() {
314 if index != 0 {
315 write!(f, " ")?;
316 }
317 write!(f, "{part}")?;
318 }
319 write!(f, "\"")?;
320 }
321 Ok(())
322 }
323 }
324 Format(progress)
325 }
326}
327
328impl std::fmt::Display for ProgressPart {
329 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
330 match self {
331 Self::Keyword(keyword, _) => write!(f, "{keyword}"),
332 Self::Value(_) => write!(f, "_"),
333 }
334 }
335}
336
337enum ReadOneResult {
339 Progress(Ast),
341 NoProgress(Option<Ast>),
343}
344
345impl Parser {
346 fn skip_comments(&mut self) {
347 while self.reader.peek().unwrap().is_comment() {
348 self.reader.next().unwrap();
349 }
350 }
351
352 async fn read_one(
358 &mut self,
359 syntax: &Syntax,
360 continuation_keywords: &HashSet<&str>,
361 start_value: Option<Ast>,
362 outer_bp: Option<BindingPower>,
363 ) -> Result<ReadOneResult> {
364 tracing::trace!(
365 "start reading one with outer_bp={outer_bp:?}, start_value={}",
366 display_option(&start_value),
367 );
368 if start_value.is_none() {
370 self.skip_comments();
371 let peek = self.reader.peek().unwrap();
372 let raw = peek.raw();
373 if raw == "syntax" {
374 tracing::trace!("see syntax keyword, parsing syntax definition...");
375 let (parsed, span) = read_syntax_def(&mut self.reader)?;
376 match parsed {
377 ParsedSyntax::Definition(def) => {
378 return Ok(ReadOneResult::Progress(Ast::SyntaxDefinition {
379 def: Parc::new(def),
380 data: span,
381 }));
382 }
383 ParsedSyntax::FromScratch => {
384 let next = self
385 .read_expr(&Syntax::empty(), &Default::default(), None)
386 .decurse()
387 .await?;
388 return Ok(ReadOneResult::Progress(Ast::FromScratch {
389 data: Span {
390 start: span.start,
391 end: next
392 .as_ref()
393 .map(|next| next.data().end)
394 .unwrap_or(span.end),
395 filename: span.filename,
396 },
397 next: next.map(Box::new),
398 }));
399 }
400 }
401 }
402 if !syntax.keywords.contains(raw)
403 && matches!(
404 peek.token,
405 Token::String { .. } | Token::Number { .. } | Token::Ident { .. }
406 )
407 {
408 let SpannedToken { token, span } = self.reader.next().unwrap();
409 tracing::trace!("seeing a simple value {token}");
410 return Ok(ReadOneResult::Progress(Ast::Simple { token, data: span }));
411 }
412 }
413 let mut current_node;
414 let mut parsed_parts = Vec::new();
415 if let Some(start_value) = start_value {
416 parsed_parts.push(ProgressPart::Value(start_value));
417 current_node = &syntax.root_with_start_value;
418 } else {
419 current_node = &syntax.root_without_start_value;
420 }
421 let mut made_progress = false;
422 loop {
423 self.skip_comments();
424 let peek = self.reader.peek().unwrap();
425 let raw = peek.raw();
426 tracing::trace!("peek = {raw}");
427 if let Some(next_node) = current_node
428 .next
429 .get(&Edge::Keyword(raw.to_owned()))
430 .filter(|_| !continuation_keywords.contains(raw))
431 {
432 if !should_resume(outer_bp, next_node.binding_power) {
433 tracing::trace!("not continuing with keyword {raw:?} because of outer_bp");
434 break;
435 }
436 tracing::trace!("continuing with keyword {raw:?}");
437 parsed_parts.push({
438 let keyword = self.reader.next().unwrap();
439 ProgressPart::Keyword(keyword.token.into_raw(), keyword.span)
440 });
441 current_node = next_node;
442 made_progress = true;
443 } else if let Some(next_node) = current_node.next.get(&Edge::Value) {
444 if !should_resume(outer_bp, next_node.binding_power) {
445 tracing::trace!("not trying to read a value because of outer_bp");
446 break;
447 }
448 let (current_bp, inner_continuation_keywords) = if current_node.is_open_paren {
449 tracing::trace!(
450 "since we just opened a paren, we reset bp and continuation keywords",
451 );
452 (None, HashSet::new())
453 } else {
454 let mut keywords = continuation_keywords.clone();
455 for edge in next_node.next.keys() {
456 if edge.is_open_bracket() {
457 continue;
459 }
460 if let Edge::Keyword(keyword) = edge {
461 keywords.insert(keyword);
462 }
463 }
464 let bp = if !made_progress {
465 next_node.binding_power
466 } else {
467 current_node.binding_power
468 };
469 (bp, keywords)
470 };
471 tracing::trace!("trying to read a value to continue with");
472 tracing::trace!("current_bp={current_bp:?}");
473 tracing::trace!("inner_continuation_keywords={inner_continuation_keywords:?})");
474 match self
475 .read_expr(syntax, &inner_continuation_keywords, current_bp)
476 .decurse()
477 .await?
478 {
479 Some(value) => {
480 tracing::trace!("continuing with a value");
481 parsed_parts.push(ProgressPart::Value(value));
482 current_node = next_node;
483 made_progress = true;
484 }
485 None => {
486 tracing::trace!("did not read a value, stopping");
487 break;
488 }
489 }
490 } else {
491 break;
492 }
493 }
494 if !made_progress {
495 tracing::trace!("no progress was made, returning start_value back");
496 assert!(parsed_parts.len() <= 1);
497 let start_value = parsed_parts.pop().map(|part| match part {
498 ProgressPart::Keyword(_, _) => unreachable!(),
499 ProgressPart::Value(value) => value,
500 });
501 return Ok(ReadOneResult::NoProgress(start_value));
502 }
503 let Some(definition) = ¤t_node.finish else {
504 return error!(
505 "Can not finish parsing {}, expected {}",
506 ProgressPart::format(&parsed_parts),
507 current_node.format_possible_continuations(),
508 );
509 };
510 tracing::trace!("read one finished, collecting progress");
511 let span = Span {
512 filename: self.reader.filename().to_owned(),
513 start: parsed_parts[0].span().start,
514 end: parsed_parts.last().unwrap().span().end,
515 };
516 Ok(ReadOneResult::Progress(Ast::Complex {
517 definition: definition.clone(),
518 values: assign_progress(definition, parsed_parts).expect("Failed to assign values"),
519 data: span,
520 }))
521 }
522}
523
524impl Parser {
525 async fn read_expr(
528 &mut self,
529 syntax: &Syntax,
530 continuation_keywords: &HashSet<&str>,
531 outer_bp: Option<BindingPower>,
532 ) -> Result<Option<Ast>> {
533 if self.reader.peek().unwrap().is_eof() {
534 return Ok(None);
535 }
536 tracing::trace!("starting to read expr with outer_bp={outer_bp:?}");
537 let mut syntax = Cow::Borrowed(syntax);
538 let mut already_parsed = None;
539 loop {
540 tracing::trace!(
541 "trying to read one more node with already_parsed={}",
542 display_option(&already_parsed),
543 );
544 match self
545 .read_one(&syntax, continuation_keywords, already_parsed, outer_bp)
546 .await?
547 {
548 ReadOneResult::Progress(value) => {
549 if let Ast::SyntaxDefinition { def, data: _ } = &value {
550 let mut new_syntax = syntax.into_owned();
551 new_syntax.insert(def.clone())?;
552 syntax = Cow::Owned(new_syntax);
553 }
554 already_parsed = Some(value);
555 }
556 ReadOneResult::NoProgress(value) => {
557 match &value {
558 Some(value) => tracing::trace!("read expr - done! parsed {value}"),
559 None => tracing::trace!("read expr - done! nothing was parsed"),
560 }
561 return Ok(value);
562 }
563 }
564 }
565 }
566
567 fn read_all(&mut self, syntax: &Syntax) -> Result<Option<Ast>> {
568 let result = decursion::run_decursing(self.read_expr(syntax, &HashSet::new(), None))?;
569 let peek = self.reader.peek().unwrap();
570 if !peek.is_eof() {
571 return error!("unexpected token {:?}", peek.raw());
572 }
573 Ok(result)
574 }
575}
576
577fn assign_progress(
578 definition: &SyntaxDefinition,
579 values: impl IntoIterator<Item = ProgressPart>,
580) -> Result<Tuple<Ast>> {
581 let mut result = Tuple::empty();
582 let mut progress = values.into_iter();
583 for part in &definition.parts {
584 let progress = progress
585 .next()
586 .ok_or_else(|| error_fmt!("not enough progress was made"))?;
587 match part {
588 SyntaxDefinitionPart::Keyword(expected) => {
589 assert_eq!(
590 expected.as_str(),
591 progress
592 .into_keyword()
593 .ok_or_else(|| error_fmt!("expected a keyword"))?
594 .as_str(),
595 );
596 }
597 SyntaxDefinitionPart::UnnamedBinding => {
598 result.add_unnamed(
599 progress
600 .into_value()
601 .ok_or_else(|| error_fmt!("expected a value"))?,
602 );
603 }
604 SyntaxDefinitionPart::NamedBinding(name) => {
605 result.add_named(
606 name.clone(),
607 progress
608 .into_value()
609 .ok_or_else(|| error_fmt!("expected a value"))?,
610 );
611 }
612 }
613 }
614 if progress.next().is_some() {
615 return error!("too many values");
616 }
617 Ok(result)
618}
619
620fn should_resume(outer_bp: Option<BindingPower>, with: Option<BindingPower>) -> bool {
621 let Some(outer_bp) = outer_bp else {
622 return true;
623 };
624 match with {
625 None => false,
626 Some(with) => match outer_bp.priority.cmp(&with.priority) {
627 std::cmp::Ordering::Equal => {
628 if outer_bp.associativity != with.associativity {
629 panic!("same priority different associativity: {outer_bp:?} & {with:?}");
630 }
631 match outer_bp.associativity {
632 Associativity::Left => false,
633 Associativity::Right => true,
634 }
635 }
636 std::cmp::Ordering::Less => true,
637 std::cmp::Ordering::Greater => false,
638 },
639 }
640}