Skip to main content

oak_wolfram/parser/
mod.rs

1pub mod element_type;
2
3use crate::{
4    language::WolframLanguage,
5    lexer::{WolframLexer, token_type::WolframTokenType},
6    parser::element_type::WolframElementType,
7};
8use oak_core::{
9    parser::{Associativity, OperatorInfo, ParseCache, ParseOutput, Parser, ParserState, Pratt, PrattParser, binary, parse_with_lexer, postfix, unary},
10    source::{Source, TextEdit},
11    tree::GreenNode,
12};
13
14pub(crate) type State<'a, S> = ParserState<'a, WolframLanguage, S>;
15
16/// Wolfram Parser
17#[derive(Debug, Clone)]
18pub struct WolframParser<'config> {
19    config: &'config WolframLanguage,
20}
21
22impl<'config> WolframParser<'config> {
23    pub fn new(config: &'config WolframLanguage) -> Self {
24        Self { config }
25    }
26}
27
28impl<'config> Parser<WolframLanguage> for WolframParser<'config> {
29    fn parse<'a, S: Source + ?Sized>(&self, text: &'a S, edits: &[TextEdit], cache: &'a mut impl ParseCache<WolframLanguage>) -> ParseOutput<'a, WolframLanguage> {
30        let lexer = WolframLexer::new(&self.config);
31        parse_with_lexer(&lexer, text, edits, cache, |state| {
32            let checkpoint = state.checkpoint();
33
34            while state.not_at_end() {
35                self.parse_expression(state);
36            }
37
38            Ok(state.finish_at(checkpoint, WolframElementType::Root))
39        })
40    }
41}
42
43impl<'config> WolframParser<'config> {
44    fn parse_expression<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) {
45        self.parse_pratt(state, 0);
46    }
47
48    fn parse_pratt<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>, min_precedence: u8) -> &'a GreenNode<'a, WolframLanguage> {
49        PrattParser::new(self.clone()).parse_expr(state, min_precedence)
50    }
51
52    fn parse_arguments<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) {
53        let checkpoint = state.checkpoint();
54        state.bump(); // [
55
56        while state.not_at(WolframTokenType::RightBracket) && state.not_at_end() {
57            self.parse_expression(state);
58            if state.at(WolframTokenType::Comma) {
59                state.bump();
60            }
61        }
62
63        if state.at(WolframTokenType::RightBracket) {
64            state.bump();
65        }
66        state.finish_at(checkpoint, WolframElementType::Arguments);
67    }
68
69    fn parse_list<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) {
70        let checkpoint = state.checkpoint();
71        state.bump(); // {
72
73        while state.not_at(WolframTokenType::RightBrace) && state.not_at_end() {
74            self.parse_expression(state);
75            if state.at(WolframTokenType::Comma) {
76                state.bump();
77            }
78        }
79
80        if state.at(WolframTokenType::RightBrace) {
81            state.bump();
82        }
83        state.finish_at(checkpoint, WolframElementType::List);
84    }
85}
86
87impl<'config> Pratt<WolframLanguage> for WolframParser<'config> {
88    fn primary<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> &'a GreenNode<'a, WolframLanguage> {
89        let checkpoint = state.checkpoint();
90
91        if state.at(WolframTokenType::Identifier) {
92            state.bump();
93            // 检查是否是函数调用 f[...]
94            while state.at(WolframTokenType::LeftBracket) {
95                self.parse_arguments(state);
96                state.finish_at(checkpoint, WolframElementType::Call);
97            }
98            if state.checkpoint() == checkpoint { state.finish_at(checkpoint, WolframElementType::Symbol) } else { state.finish_at(checkpoint, WolframElementType::Call) }
99        }
100        else if state.at(WolframTokenType::Integer) || state.at(WolframTokenType::Real) || state.at(WolframTokenType::String) {
101            state.bump();
102            state.finish_at(checkpoint, WolframElementType::Literal)
103        }
104        else if state.at(WolframTokenType::LeftBrace) {
105            self.parse_list(state);
106            state.finish_at(checkpoint, WolframElementType::List)
107        }
108        else if state.at(WolframTokenType::Slot) || state.at(WolframTokenType::SlotSequence) {
109            state.bump();
110            state.finish_at(checkpoint, WolframElementType::Symbol)
111        }
112        else if state.at(WolframTokenType::LeftParen) {
113            state.bump();
114            self.parse_expression(state);
115            if state.at(WolframTokenType::RightParen) {
116                state.bump();
117            }
118            state.finish_at(checkpoint, WolframElementType::Expression)
119        }
120        else {
121            // 容错处理
122            state.bump();
123            state.finish_at(checkpoint, WolframElementType::Error)
124        }
125    }
126
127    fn prefix<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> &'a GreenNode<'a, WolframLanguage> {
128        let kind = match state.peek_kind() {
129            Some(k) => k,
130            None => return self.primary(state),
131        };
132
133        let info = match kind {
134            WolframTokenType::Minus => Some(OperatorInfo::right(150)),     // Unary minus
135            WolframTokenType::Factorial => Some(OperatorInfo::right(150)), // ! (Not)
136            _ => None,
137        };
138
139        if let Some(info) = info { unary(state, kind, info.precedence, WolframElementType::PrefixExpr, |s, p| self.parse_pratt(s, p)) } else { self.primary(state) }
140    }
141
142    fn infix<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>, left: &'a GreenNode<'a, WolframLanguage>, min_precedence: u8) -> Option<&'a GreenNode<'a, WolframLanguage>> {
143        let kind = state.peek_kind()?;
144
145        // 后缀运算符
146        let postfix_info = match kind {
147            WolframTokenType::Ampersand => Some(OperatorInfo::left(10)),  // body &
148            WolframTokenType::Factorial => Some(OperatorInfo::left(160)), // x!
149            _ => None,
150        };
151
152        if let Some(info) = postfix_info {
153            if info.precedence < min_precedence {
154                return None;
155            }
156            return Some(postfix(state, left, kind, WolframElementType::PostfixExpr));
157        }
158
159        // 二元/中缀运算符
160        let info = match kind {
161            WolframTokenType::Assign | WolframTokenType::Set | WolframTokenType::SetDelayed => Some(OperatorInfo::right(20)),
162            WolframTokenType::Rule | WolframTokenType::RuleDelayed | WolframTokenType::Arrow => Some(OperatorInfo::right(30)),
163            WolframTokenType::SlashSlash => Some(OperatorInfo::left(40)), // x // f
164            WolframTokenType::Or => Some(OperatorInfo::left(50)),
165            WolframTokenType::And => Some(OperatorInfo::left(60)),
166            WolframTokenType::Equal | WolframTokenType::NotEqual | WolframTokenType::Less | WolframTokenType::Greater | WolframTokenType::LessEqual | WolframTokenType::GreaterEqual => Some(OperatorInfo::none(70)),
167            WolframTokenType::Plus | WolframTokenType::Minus => Some(OperatorInfo::left(80)),
168            WolframTokenType::Times | WolframTokenType::Divide => Some(OperatorInfo::left(90)),
169            WolframTokenType::At => Some(OperatorInfo::right(100)),                 // f @ x
170            WolframTokenType::MapOperator => Some(OperatorInfo::right(110)),        // f /@ list
171            WolframTokenType::ApplyOperator => Some(OperatorInfo::right(110)),      // f @@ expr
172            WolframTokenType::ApplyLevelOperator => Some(OperatorInfo::right(110)), // f @@@ expr
173            WolframTokenType::MapAllOperator => Some(OperatorInfo::right(110)),     // f //@ list
174            WolframTokenType::Power => Some(OperatorInfo::right(120)),
175            _ => None,
176        }?;
177
178        if info.precedence < min_precedence {
179            return None;
180        }
181
182        Some(binary(state, left, kind, info.precedence, info.associativity, WolframElementType::BinaryExpr, |s, p| self.parse_pratt(s, p)))
183    }
184}