Skip to main content

oak_wolfram/parser/
mod.rs

1//! Parser implementation for the Wolfram language.
2
3/// Wolfram element types.
4pub mod element_type;
5
6use crate::{
7    language::WolframLanguage,
8    lexer::{WolframLexer, token_type::WolframTokenType},
9    parser::element_type::WolframElementType,
10};
11use oak_core::{
12    parser::{OperatorInfo, ParseCache, ParseOutput, Parser, ParserState, Pratt, PrattParser, binary, parse_with_lexer, postfix, unary},
13    source::{Source, TextEdit},
14    tree::GreenNode,
15};
16
17pub(crate) type State<'a, S> = ParserState<'a, WolframLanguage, S>;
18
19/// Parser for the Wolfram language.
20#[derive(Debug, Clone)]
21pub struct WolframParser<'config> {
22    /// The Wolfram language configuration.
23    config: &'config WolframLanguage,
24}
25
26impl<'config> WolframParser<'config> {
27    /// Creates a new `WolframParser` with the given configuration.
28    pub fn new(config: &'config WolframLanguage) -> Self {
29        Self { config }
30    }
31}
32
33impl<'config> Parser<WolframLanguage> for WolframParser<'config> {
34    fn parse<'a, S: Source + ?Sized>(&self, text: &'a S, edits: &[TextEdit], cache: &'a mut impl ParseCache<WolframLanguage>) -> ParseOutput<'a, WolframLanguage> {
35        let lexer = WolframLexer::new(&self.config);
36        parse_with_lexer(&lexer, text, edits, cache, |state| {
37            let checkpoint = state.checkpoint();
38
39            while state.not_at_end() {
40                self.parse_expression(state);
41            }
42
43            Ok(state.finish_at(checkpoint, WolframElementType::Root))
44        })
45    }
46}
47
48impl<'config> WolframParser<'config> {
49    fn parse_expression<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) {
50        self.parse_pratt(state, 0);
51    }
52
53    fn parse_pratt<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>, min_precedence: u8) -> &'a GreenNode<'a, WolframLanguage> {
54        PrattParser::new(self.clone()).parse_expr(state, min_precedence)
55    }
56
57    fn parse_arguments<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) {
58        let checkpoint = state.checkpoint();
59        state.bump(); // [
60
61        while state.not_at(WolframTokenType::RightBracket) && state.not_at_end() {
62            self.parse_expression(state);
63            if state.at(WolframTokenType::Comma) {
64                state.bump();
65            }
66        }
67
68        if state.at(WolframTokenType::RightBracket) {
69            state.bump();
70        }
71        state.finish_at(checkpoint, WolframElementType::Arguments);
72    }
73
74    fn parse_list<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) {
75        let checkpoint = state.checkpoint();
76        state.bump(); // {
77
78        while state.not_at(WolframTokenType::RightBrace) && state.not_at_end() {
79            self.parse_expression(state);
80            if state.at(WolframTokenType::Comma) {
81                state.bump();
82            }
83        }
84
85        if state.at(WolframTokenType::RightBrace) {
86            state.bump();
87        }
88        state.finish_at(checkpoint, WolframElementType::List);
89    }
90}
91
92impl<'config> Pratt<WolframLanguage> for WolframParser<'config> {
93    fn primary<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> &'a GreenNode<'a, WolframLanguage> {
94        let checkpoint = state.checkpoint();
95
96        if state.at(WolframTokenType::Identifier) {
97            state.bump();
98            // Check if it's a function call f[...]
99            while state.at(WolframTokenType::LeftBracket) {
100                self.parse_arguments(state);
101                state.finish_at(checkpoint, WolframElementType::Call);
102            }
103            if state.checkpoint() == checkpoint { state.finish_at(checkpoint, WolframElementType::Symbol) } else { state.finish_at(checkpoint, WolframElementType::Call) }
104        }
105        else if state.at(WolframTokenType::Integer) || state.at(WolframTokenType::Real) || state.at(WolframTokenType::String) {
106            state.bump();
107            state.finish_at(checkpoint, WolframElementType::Literal)
108        }
109        else if state.at(WolframTokenType::LeftBrace) {
110            self.parse_list(state);
111            state.finish_at(checkpoint, WolframElementType::List)
112        }
113        else if state.at(WolframTokenType::Slot) || state.at(WolframTokenType::SlotSequence) {
114            state.bump();
115            state.finish_at(checkpoint, WolframElementType::Symbol)
116        }
117        else if state.at(WolframTokenType::LeftParen) {
118            state.bump();
119            self.parse_expression(state);
120            if state.at(WolframTokenType::RightParen) {
121                state.bump();
122            }
123            state.finish_at(checkpoint, WolframElementType::Expression)
124        }
125        else {
126            // Error handling
127            state.bump();
128            state.finish_at(checkpoint, WolframElementType::Error)
129        }
130    }
131
132    fn prefix<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> &'a GreenNode<'a, WolframLanguage> {
133        let kind = match state.peek_kind() {
134            Some(k) => k,
135            None => return self.primary(state),
136        };
137
138        let info = match kind {
139            WolframTokenType::Minus => Some(OperatorInfo::right(150)),     // Unary minus
140            WolframTokenType::Factorial => Some(OperatorInfo::right(150)), // ! (Not)
141            _ => None,
142        };
143
144        if let Some(info) = info { unary(state, kind, info.precedence, WolframElementType::PrefixExpr, |s, p| self.parse_pratt(s, p)) } else { self.primary(state) }
145    }
146
147    fn infix<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>, left: &'a GreenNode<'a, WolframLanguage>, min_precedence: u8) -> Option<&'a GreenNode<'a, WolframLanguage>> {
148        let kind = state.peek_kind()?;
149
150        // Postfix operators
151        let postfix_info = match kind {
152            WolframTokenType::Ampersand => Some(OperatorInfo::left(10)),  // body &
153            WolframTokenType::Factorial => Some(OperatorInfo::left(160)), // x!
154            _ => None,
155        };
156
157        if let Some(info) = postfix_info {
158            if info.precedence < min_precedence {
159                return None;
160            }
161            return Some(postfix(state, left, kind, WolframElementType::PostfixExpr));
162        }
163
164        // Binary/Infix operators
165        let info = match kind {
166            WolframTokenType::Assign | WolframTokenType::Set | WolframTokenType::SetDelayed => Some(OperatorInfo::right(20)),
167            WolframTokenType::Rule | WolframTokenType::RuleDelayed | WolframTokenType::Arrow => Some(OperatorInfo::right(30)),
168            WolframTokenType::SlashSlash => Some(OperatorInfo::left(40)), // x // f
169            WolframTokenType::Or => Some(OperatorInfo::left(50)),
170            WolframTokenType::And => Some(OperatorInfo::left(60)),
171            WolframTokenType::Equal | WolframTokenType::NotEqual | WolframTokenType::Less | WolframTokenType::Greater | WolframTokenType::LessEqual | WolframTokenType::GreaterEqual => Some(OperatorInfo::none(70)),
172            WolframTokenType::Plus | WolframTokenType::Minus => Some(OperatorInfo::left(80)),
173            WolframTokenType::Times | WolframTokenType::Divide => Some(OperatorInfo::left(90)),
174            WolframTokenType::At => Some(OperatorInfo::right(100)),                 // f @ x
175            WolframTokenType::MapOperator => Some(OperatorInfo::right(110)),        // f /@ list
176            WolframTokenType::ApplyOperator => Some(OperatorInfo::right(110)),      // f @@ expr
177            WolframTokenType::ApplyLevelOperator => Some(OperatorInfo::right(110)), // f @@@ expr
178            WolframTokenType::MapAllOperator => Some(OperatorInfo::right(110)),     // f //@ list
179            WolframTokenType::Power => Some(OperatorInfo::right(120)),
180            _ => None,
181        }?;
182
183        if info.precedence < min_precedence {
184            return None;
185        }
186
187        Some(binary(state, left, kind, info.precedence, info.associativity, WolframElementType::BinaryExpr, |s, p| self.parse_pratt(s, p)))
188    }
189}