kast_ast/
lib.rs

1use std::{borrow::Cow, collections::HashSet};
2
3use decursion::FutureExt;
4use kast_util::*;
5
6mod lexer;
7mod peek2;
8mod syntax;
9
10pub use lexer::{is_punctuation, lex, StringType, Token};
11pub use syntax::{Associativity, Priority, Syntax, SyntaxDefinition, SyntaxDefinitionPart};
12
13use lexer::*;
14use syntax::{BindingPower, Edge};
15
16#[derive(Debug, Clone, PartialEq, Eq, Hash)]
17pub enum Ast<Data = Span> {
18    Simple {
19        token: Token,
20        data: Data,
21    },
22    Complex {
23        definition: Parc<SyntaxDefinition>,
24        values: Tuple<Self>,
25        data: Data,
26    },
27    SyntaxDefinition {
28        def: Parc<SyntaxDefinition>,
29        data: Data,
30    },
31    FromScratch {
32        next: Option<Box<Self>>,
33        data: Data,
34    },
35}
36
37impl<Data> Ast<Data> {
38    pub fn data(&self) -> &Data {
39        match self {
40            Ast::Simple { data, .. }
41            | Ast::Complex { data, .. }
42            | Ast::SyntaxDefinition { data, .. }
43            | Ast::FromScratch { data, .. } => data,
44        }
45    }
46    pub fn data_mut(&mut self) -> &mut Data {
47        match self {
48            Ast::Simple { data, .. }
49            | Ast::Complex { data, .. }
50            | Ast::SyntaxDefinition { data, .. }
51            | Ast::FromScratch { data, .. } => data,
52        }
53    }
54    pub fn map_data<NewData>(self, f: impl Fn(Data) -> NewData + Copy) -> Ast<NewData> {
55        match self {
56            Ast::Simple { token, data } => Ast::Simple {
57                token,
58                data: f(data),
59            },
60            Ast::Complex {
61                definition,
62                values,
63                data,
64            } => Ast::Complex {
65                definition,
66                values: values.map(|ast| ast.map_data(f)),
67                data: f(data),
68            },
69            Ast::SyntaxDefinition { def, data } => Ast::SyntaxDefinition { def, data: f(data) },
70            Ast::FromScratch { next, data } => Ast::FromScratch {
71                next: next.map(|ast| Box::new(ast.map_data(f))),
72                data: f(data),
73            },
74        }
75    }
76    pub fn as_ident(&self) -> Option<&str> {
77        match self {
78            Ast::Simple {
79                token: Token::Ident { name, .. },
80                ..
81            } => Some(name),
82            _ => None,
83        }
84    }
85}
86
87pub trait HasSpan {
88    fn span(&self) -> &Span;
89}
90
91impl HasSpan for Span {
92    fn span(&self) -> &Span {
93        self
94    }
95}
96
97impl<Data: HasSpan> Ast<Data> {
98    pub fn show_short(&self) -> impl std::fmt::Display + '_ {
99        struct Show<'a, Data>(&'a Ast<Data>);
100        impl<Data: HasSpan> std::fmt::Display for Show<'_, Data> {
101            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
102                match &self.0 {
103                    Ast::Simple { token, data: _ } => write!(f, "token {token}")?,
104                    Ast::Complex {
105                        definition,
106                        values: _,
107                        data: _,
108                    } => write!(f, "{:?}", definition.name)?,
109                    Ast::SyntaxDefinition { def: _, data: _ } => write!(f, "syntax definition")?,
110                    Ast::FromScratch { next: _, data: _ } => write!(f, "syntax from scratch")?,
111                }
112                write!(f, " at {}", self.0.data().span())
113            }
114        }
115        Show(self)
116    }
117}
118
119impl<Data> std::fmt::Display for Ast<Data> {
120    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
121        match self {
122            Ast::Simple { token, data: _ } => write!(f, "{:?}", token.raw()),
123            Ast::Complex {
124                definition,
125                values,
126                data: _,
127            } => {
128                write!(f, "{}", values.fmt_with_name(&definition.name))
129            }
130            Ast::SyntaxDefinition { def, data: _ } => write!(f, "syntax {:?}", def.name),
131            Ast::FromScratch { next, data: _ } => {
132                write!(f, "syntax from scratch")?;
133                if let Some(next) = next {
134                    write!(f, "{next}")?;
135                }
136                Ok(())
137            }
138        }
139    }
140}
141
142pub fn parse(syntax: &Syntax, source: SourceFile) -> Result<Option<Ast>, Error> {
143    let mut parser = Parser {
144        reader: lex(source)?,
145    };
146    let result = parser.read_all(syntax);
147    result.map_err(|msg| {
148        msg.at(match parser.reader.peek() {
149            Some(peek) => peek.span.clone(),
150            None => Span {
151                start: parser.reader.position(),
152                end: parser.reader.position(),
153                filename: parser.reader.filename().to_owned(),
154            },
155        })
156    })
157}
158
159pub fn read_syntax(source: SourceFile) -> Result<Syntax, Error> {
160    let ast = parse(&Syntax::empty(), source)?;
161    let mut syntax = Syntax::empty();
162    fn collect_syntax_definitions(syntax: &mut Syntax, ast: Ast) -> Result<(), Error> {
163        match ast {
164            Ast::Simple { .. } => {}
165            Ast::Complex {
166                definition: _,
167                values,
168                data: _,
169            } => {
170                for (_name, value) in values {
171                    collect_syntax_definitions(syntax, value)?;
172                }
173            }
174            Ast::SyntaxDefinition { def, data: span } => {
175                syntax
176                    .insert(def)
177                    .map_err(|ErrorMessage(message)| Error { message, span })?;
178            }
179            Ast::FromScratch { next, data: _ } => {
180                if let Some(next) = next {
181                    collect_syntax_definitions(syntax, *next)?;
182                }
183            }
184        }
185        Ok(())
186    }
187    if let Some(ast) = ast {
188        collect_syntax_definitions(&mut syntax, ast)?;
189    }
190    Ok(syntax)
191}
192
193struct Parser {
194    reader: peek2::Reader<SpannedToken>,
195}
196
197enum ParsedSyntax {
198    Definition(SyntaxDefinition),
199    FromScratch,
200}
201
202fn read_syntax_def(reader: &mut peek2::Reader<SpannedToken>) -> Result<(ParsedSyntax, Span)> {
203    let Span {
204        start,
205        end: _,
206        filename,
207    } = match reader.peek().unwrap() {
208        token if token.raw() == "syntax" => reader.next().unwrap().span,
209        token => return error!("expected a syntax definition, got {}", token.token),
210    };
211    let name_token = reader.next().expect("expected a name for the syntax");
212    if name_token.raw() == "from" {
213        let scratch_token = reader.next().expect("expected 'scratch'");
214        if scratch_token.raw() != "scratch" {
215            return error!("expected 'scratch'");
216        }
217        return Ok((
218            ParsedSyntax::FromScratch,
219            Span {
220                start,
221                end: scratch_token.span.end,
222                filename: filename.clone(),
223            },
224        ));
225    }
226    let name = match name_token.token {
227        Token::Ident { name, .. } => name,
228        _ => return error!("name for the syntax must be an identifier"),
229    };
230    let associativity = match reader.next().expect("expected a associativity").token {
231        Token::Punctuation { raw } if raw == "<-" => Associativity::Left,
232        Token::Punctuation { raw } if raw == "->" => Associativity::Right,
233        _ => return error!("expected associativity (<- or ->)"),
234    };
235    let priority = match reader.next().expect("expected a priority").token {
236        Token::Number { raw } | Token::String { contents: raw, .. } => {
237            Priority::new(match raw.parse() {
238                Ok(number) => number,
239                Err(e) => return error!("failed to parse priority: {e}"),
240            })
241        }
242        _ => return error!("syntax priority must be a number"),
243    };
244    if reader.next().map(|spanned| spanned.token.raw().to_owned()) != Some("=".to_owned()) {
245        return error!("expected a =");
246    }
247    let mut parts = Vec::new();
248    let mut end = None;
249    while let Some(token) = reader.peek() {
250        parts.push(match &token.token {
251            Token::Ident { name, .. } => {
252                if name == "_" {
253                    SyntaxDefinitionPart::UnnamedBinding
254                } else {
255                    SyntaxDefinitionPart::NamedBinding(name.clone())
256                }
257            }
258            Token::String { contents, .. } => SyntaxDefinitionPart::Keyword(contents.clone()),
259            _ => break,
260        });
261        end = Some(reader.next().unwrap().span.end);
262    }
263    Ok((
264        ParsedSyntax::Definition(SyntaxDefinition {
265            name,
266            priority,
267            associativity,
268            parts,
269        }),
270        Span {
271            start,
272            end: end.unwrap(),
273            filename: filename.clone(),
274        },
275    ))
276}
277
278// My wife yelled at me for forgetting to lock the front door last night, but later apologized. She
279// wanted to be safe, then sorry.
280
281enum ProgressPart {
282    Keyword(String, Span),
283    Value(Ast),
284}
285
286impl ProgressPart {
287    pub fn span(&self) -> &Span {
288        match self {
289            ProgressPart::Keyword(_, span) => span,
290            ProgressPart::Value(ast) => ast.data(),
291        }
292    }
293    pub fn into_keyword(self) -> Option<String> {
294        match self {
295            ProgressPart::Keyword(keyword, _) => Some(keyword),
296            ProgressPart::Value(_) => None,
297        }
298    }
299    pub fn into_value(self) -> Option<Ast> {
300        match self {
301            ProgressPart::Keyword(_, _) => None,
302            ProgressPart::Value(value) => Some(value),
303        }
304    }
305    fn format(progress: &[Self]) -> impl std::fmt::Display + '_ {
306        struct Format<'a>(&'a [ProgressPart]);
307        impl std::fmt::Display for Format<'_> {
308            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
309                if self.0.is_empty() {
310                    write!(f, "<no progress>")?;
311                } else {
312                    write!(f, "\"")?;
313                    for (index, part) in self.0.iter().enumerate() {
314                        if index != 0 {
315                            write!(f, " ")?;
316                        }
317                        write!(f, "{part}")?;
318                    }
319                    write!(f, "\"")?;
320                }
321                Ok(())
322            }
323        }
324        Format(progress)
325    }
326}
327
328impl std::fmt::Display for ProgressPart {
329    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
330        match self {
331            Self::Keyword(keyword, _) => write!(f, "{keyword}"),
332            Self::Value(_) => write!(f, "_"),
333        }
334    }
335}
336
337/// Bеst viewers on https://www.twitch.tv/kuviman
338enum ReadOneResult {
339    /// Progress was made, contains the parsed expr
340    Progress(Ast),
341    /// No progress could be made, contains original start value
342    NoProgress(Option<Ast>),
343}
344
345impl Parser {
346    fn skip_comments(&mut self) {
347        while self.reader.peek().unwrap().is_comment() {
348            self.reader.next().unwrap();
349        }
350    }
351
352    /// Read a single ast node (combine start value with smth if given),
353    /// without trying to combine it with the rest of the tokens
354    ///
355    /// If currently parsing an inner value inside another expr,
356    /// binding power of the outer expr must be taken into consideration
357    async fn read_one(
358        &mut self,
359        syntax: &Syntax,
360        continuation_keywords: &HashSet<&str>,
361        start_value: Option<Ast>,
362        outer_bp: Option<BindingPower>,
363    ) -> Result<ReadOneResult> {
364        tracing::trace!(
365            "start reading one with outer_bp={outer_bp:?}, start_value={}",
366            display_option(&start_value),
367        );
368        // first lets see if we can just read a simple value (or a syntax def)
369        if start_value.is_none() {
370            self.skip_comments();
371            let peek = self.reader.peek().unwrap();
372            let raw = peek.raw();
373            if raw == "syntax" {
374                tracing::trace!("see syntax keyword, parsing syntax definition...");
375                let (parsed, span) = read_syntax_def(&mut self.reader)?;
376                match parsed {
377                    ParsedSyntax::Definition(def) => {
378                        return Ok(ReadOneResult::Progress(Ast::SyntaxDefinition {
379                            def: Parc::new(def),
380                            data: span,
381                        }));
382                    }
383                    ParsedSyntax::FromScratch => {
384                        let next = self
385                            .read_expr(&Syntax::empty(), &Default::default(), None)
386                            .decurse()
387                            .await?;
388                        return Ok(ReadOneResult::Progress(Ast::FromScratch {
389                            data: Span {
390                                start: span.start,
391                                end: next
392                                    .as_ref()
393                                    .map(|next| next.data().end)
394                                    .unwrap_or(span.end),
395                                filename: span.filename,
396                            },
397                            next: next.map(Box::new),
398                        }));
399                    }
400                }
401            }
402            if !syntax.keywords.contains(raw)
403                && matches!(
404                    peek.token,
405                    Token::String { .. } | Token::Number { .. } | Token::Ident { .. }
406                )
407            {
408                let SpannedToken { token, span } = self.reader.next().unwrap();
409                tracing::trace!("seeing a simple value {token}");
410                return Ok(ReadOneResult::Progress(Ast::Simple { token, data: span }));
411            }
412        }
413        let mut current_node;
414        let mut parsed_parts = Vec::new();
415        if let Some(start_value) = start_value {
416            parsed_parts.push(ProgressPart::Value(start_value));
417            current_node = &syntax.root_with_start_value;
418        } else {
419            current_node = &syntax.root_without_start_value;
420        }
421        let mut made_progress = false;
422        loop {
423            self.skip_comments();
424            let peek = self.reader.peek().unwrap();
425            let raw = peek.raw();
426            tracing::trace!("peek = {raw}");
427            if let Some(next_node) = current_node
428                .next
429                .get(&Edge::Keyword(raw.to_owned()))
430                .filter(|_| !continuation_keywords.contains(raw))
431            {
432                if !should_resume(outer_bp, next_node.binding_power) {
433                    tracing::trace!("not continuing with keyword {raw:?} because of outer_bp");
434                    break;
435                }
436                tracing::trace!("continuing with keyword {raw:?}");
437                parsed_parts.push({
438                    let keyword = self.reader.next().unwrap();
439                    ProgressPart::Keyword(keyword.token.into_raw(), keyword.span)
440                });
441                current_node = next_node;
442                made_progress = true;
443            } else if let Some(next_node) = current_node.next.get(&Edge::Value) {
444                if !should_resume(outer_bp, next_node.binding_power) {
445                    tracing::trace!("not trying to read a value because of outer_bp");
446                    break;
447                }
448                let (current_bp, inner_continuation_keywords) = if current_node.is_open_paren {
449                    tracing::trace!(
450                        "since we just opened a paren, we reset bp and continuation keywords",
451                    );
452                    (None, HashSet::new())
453                } else {
454                    let mut keywords = continuation_keywords.clone();
455                    for edge in next_node.next.keys() {
456                        if edge.is_open_bracket() {
457                            // TODO maybe is hack?
458                            continue;
459                        }
460                        if let Edge::Keyword(keyword) = edge {
461                            keywords.insert(keyword);
462                        }
463                    }
464                    let bp = if !made_progress {
465                        next_node.binding_power
466                    } else {
467                        current_node.binding_power
468                    };
469                    (bp, keywords)
470                };
471                tracing::trace!("trying to read a value to continue with");
472                tracing::trace!("current_bp={current_bp:?}");
473                tracing::trace!("inner_continuation_keywords={inner_continuation_keywords:?})");
474                match self
475                    .read_expr(syntax, &inner_continuation_keywords, current_bp)
476                    .decurse()
477                    .await?
478                {
479                    Some(value) => {
480                        tracing::trace!("continuing with a value");
481                        parsed_parts.push(ProgressPart::Value(value));
482                        current_node = next_node;
483                        made_progress = true;
484                    }
485                    None => {
486                        tracing::trace!("did not read a value, stopping");
487                        break;
488                    }
489                }
490            } else {
491                break;
492            }
493        }
494        if !made_progress {
495            tracing::trace!("no progress was made, returning start_value back");
496            assert!(parsed_parts.len() <= 1);
497            let start_value = parsed_parts.pop().map(|part| match part {
498                ProgressPart::Keyword(_, _) => unreachable!(),
499                ProgressPart::Value(value) => value,
500            });
501            return Ok(ReadOneResult::NoProgress(start_value));
502        }
503        let Some(definition) = &current_node.finish else {
504            return error!(
505                "Can not finish parsing {}, expected {}",
506                ProgressPart::format(&parsed_parts),
507                current_node.format_possible_continuations(),
508            );
509        };
510        tracing::trace!("read one finished, collecting progress");
511        let span = Span {
512            filename: self.reader.filename().to_owned(),
513            start: parsed_parts[0].span().start,
514            end: parsed_parts.last().unwrap().span().end,
515        };
516        Ok(ReadOneResult::Progress(Ast::Complex {
517            definition: definition.clone(),
518            values: assign_progress(definition, parsed_parts).expect("Failed to assign values"),
519            data: span,
520        }))
521    }
522}
523
524impl Parser {
525    /// Try to read an expr, maybe inside another expr with given binding power
526    /// (can only use stronger binding power then)
527    async fn read_expr(
528        &mut self,
529        syntax: &Syntax,
530        continuation_keywords: &HashSet<&str>,
531        outer_bp: Option<BindingPower>,
532    ) -> Result<Option<Ast>> {
533        if self.reader.peek().unwrap().is_eof() {
534            return Ok(None);
535        }
536        tracing::trace!("starting to read expr with outer_bp={outer_bp:?}");
537        let mut syntax = Cow::Borrowed(syntax);
538        let mut already_parsed = None;
539        loop {
540            tracing::trace!(
541                "trying to read one more node with already_parsed={}",
542                display_option(&already_parsed),
543            );
544            match self
545                .read_one(&syntax, continuation_keywords, already_parsed, outer_bp)
546                .await?
547            {
548                ReadOneResult::Progress(value) => {
549                    if let Ast::SyntaxDefinition { def, data: _ } = &value {
550                        let mut new_syntax = syntax.into_owned();
551                        new_syntax.insert(def.clone())?;
552                        syntax = Cow::Owned(new_syntax);
553                    }
554                    already_parsed = Some(value);
555                }
556                ReadOneResult::NoProgress(value) => {
557                    match &value {
558                        Some(value) => tracing::trace!("read expr - done! parsed {value}"),
559                        None => tracing::trace!("read expr - done! nothing was parsed"),
560                    }
561                    return Ok(value);
562                }
563            }
564        }
565    }
566
567    fn read_all(&mut self, syntax: &Syntax) -> Result<Option<Ast>> {
568        let result = decursion::run_decursing(self.read_expr(syntax, &HashSet::new(), None))?;
569        let peek = self.reader.peek().unwrap();
570        if !peek.is_eof() {
571            return error!("unexpected token {:?}", peek.raw());
572        }
573        Ok(result)
574    }
575}
576
577fn assign_progress(
578    definition: &SyntaxDefinition,
579    values: impl IntoIterator<Item = ProgressPart>,
580) -> Result<Tuple<Ast>> {
581    let mut result = Tuple::empty();
582    let mut progress = values.into_iter();
583    for part in &definition.parts {
584        let progress = progress
585            .next()
586            .ok_or_else(|| error_fmt!("not enough progress was made"))?;
587        match part {
588            SyntaxDefinitionPart::Keyword(expected) => {
589                assert_eq!(
590                    expected.as_str(),
591                    progress
592                        .into_keyword()
593                        .ok_or_else(|| error_fmt!("expected a keyword"))?
594                        .as_str(),
595                );
596            }
597            SyntaxDefinitionPart::UnnamedBinding => {
598                result.add_unnamed(
599                    progress
600                        .into_value()
601                        .ok_or_else(|| error_fmt!("expected a value"))?,
602                );
603            }
604            SyntaxDefinitionPart::NamedBinding(name) => {
605                result.add_named(
606                    name.clone(),
607                    progress
608                        .into_value()
609                        .ok_or_else(|| error_fmt!("expected a value"))?,
610                );
611            }
612        }
613    }
614    if progress.next().is_some() {
615        return error!("too many values");
616    }
617    Ok(result)
618}
619
620fn should_resume(outer_bp: Option<BindingPower>, with: Option<BindingPower>) -> bool {
621    let Some(outer_bp) = outer_bp else {
622        return true;
623    };
624    match with {
625        None => false,
626        Some(with) => match outer_bp.priority.cmp(&with.priority) {
627            std::cmp::Ordering::Equal => {
628                if outer_bp.associativity != with.associativity {
629                    panic!("same priority different associativity: {outer_bp:?} & {with:?}");
630                }
631                match outer_bp.associativity {
632                    Associativity::Left => false,
633                    Associativity::Right => true,
634                }
635            }
636            std::cmp::Ordering::Less => true,
637            std::cmp::Ordering::Greater => false,
638        },
639    }
640}