Skip to main content

oak_bash/parser/
mod.rs

1#![doc = include_str!("readme.md")]
2/// Bash element types and role definitions.
3pub mod element_type;
4
5pub use element_type::BashElementType;
6
7use crate::{
8    language::BashLanguage,
9    lexer::{BashLexer, BashTokenType},
10};
11use oak_core::{
12    OakError, TextEdit,
13    parser::{ParseCache, Parser, ParserState},
14    source::Source,
15};
16
17pub(crate) type State<'a, S> = ParserState<'a, BashLanguage, S>;
18
19/// Parser for the Bash language.
20pub struct BashParser<'config> {
21    pub(crate) config: &'config BashLanguage,
22}
23
24impl<'config> BashParser<'config> {
25    /// Creates a new `BashParser` instance.
26    pub fn new(config: &'config BashLanguage) -> Self {
27        Self { config }
28    }
29
30    pub(crate) fn parse_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
31        if state.at(BashTokenType::Keyword) {
32            let text = state.peek_text();
33            match text.as_deref() {
34                Some("if") => self.parse_if(state),
35                Some("while") => self.parse_while(state),
36                Some("for") => self.parse_for(state),
37                Some("function") => self.parse_function(state),
38                _ => self.parse_command_or_pipeline(state),
39            }
40        }
41        else if state.at(BashTokenType::Identifier) && state.peek_kind_at(1) == Some(BashTokenType::Delimiter) && state.peek_at(1).map(|t| state.source.get_text_in(t.span)).as_deref() == Some("(") {
42            self.parse_function(state)
43        }
44        else if state.at(BashTokenType::Identifier) && state.peek_kind_at(1) == Some(BashTokenType::Operator) && state.peek_at(1).map(|t| state.source.get_text_in(t.span)).as_deref() == Some("=") {
45            self.parse_variable_assignment(state)
46        }
47        else {
48            self.parse_command_or_pipeline(state)
49        }
50    }
51
52    fn parse_variable_assignment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
53        let checkpoint = state.checkpoint();
54        state.expect(BashTokenType::Identifier)?;
55        state.expect(BashTokenType::Operator)?; // =
56
57        // Parse the value
58        while state.not_at_end() && !state.at(BashTokenType::Newline) && !state.at(BashTokenType::Delimiter) {
59            state.bump();
60        }
61
62        state.finish_at(checkpoint, BashElementType::VariableAssignment);
63        Ok(())
64    }
65
66    fn parse_command_or_pipeline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
67        let checkpoint = state.checkpoint();
68        self.parse_command(state)?;
69
70        if state.peek_text().as_deref() == Some("|") {
71            while state.peek_text().as_deref() == Some("|") {
72                state.bump();
73                self.parse_command(state)?;
74            }
75            state.finish_at(checkpoint, BashElementType::Pipeline);
76        }
77
78        Ok(())
79    }
80
81    fn parse_if<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
82        let checkpoint = state.checkpoint();
83        state.expect(BashTokenType::Keyword)?; // if
84
85        // Parse condition
86        while state.not_at_end() && state.peek_text().as_deref() != Some("then") {
87            state.bump();
88        }
89
90        if state.peek_text().as_deref() == Some("then") {
91            state.bump();
92        }
93
94        // Parse body
95        while state.not_at_end() {
96            let text = state.peek_text();
97            if matches!(text.as_deref(), Some("elif" | "else" | "fi")) {
98                break;
99            }
100            self.parse_statement(state).ok();
101        }
102
103        while state.peek_text().as_deref() == Some("elif") {
104            state.bump();
105            while state.not_at_end() && state.peek_text().as_deref() != Some("then") {
106                state.bump();
107            }
108            if state.peek_text().as_deref() == Some("then") {
109                state.bump();
110            }
111            while state.not_at_end() {
112                let text = state.peek_text();
113                if matches!(text.as_deref(), Some("elif" | "else" | "fi")) {
114                    break;
115                }
116                self.parse_statement(state).ok();
117            }
118        }
119
120        if state.peek_text().as_deref() == Some("else") {
121            state.bump();
122            while state.not_at_end() && state.peek_text().as_deref() != Some("fi") {
123                self.parse_statement(state).ok();
124            }
125        }
126
127        if state.peek_text().as_deref() == Some("fi") {
128            state.bump();
129        }
130
131        state.finish_at(checkpoint, BashElementType::IfStatement);
132        Ok(())
133    }
134
135    fn parse_while<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
136        let checkpoint = state.checkpoint();
137        state.expect(BashTokenType::Keyword)?; // while
138
139        while state.not_at_end() && state.peek_text().as_deref() != Some("do") {
140            state.bump();
141        }
142
143        if state.peek_text().as_deref() == Some("do") {
144            state.bump();
145        }
146
147        while state.not_at_end() && state.peek_text().as_deref() != Some("done") {
148            self.parse_statement(state).ok();
149        }
150
151        if state.peek_text().as_deref() == Some("done") {
152            state.bump();
153        }
154
155        state.finish_at(checkpoint, BashElementType::WhileStatement);
156        Ok(())
157    }
158
159    fn parse_for<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
160        let checkpoint = state.checkpoint();
161        state.expect(BashTokenType::Keyword)?; // for
162
163        while state.not_at_end() && state.peek_text().as_deref() != Some("do") {
164            state.bump();
165        }
166
167        if state.peek_text().as_deref() == Some("do") {
168            state.bump();
169        }
170
171        while state.not_at_end() && state.peek_text().as_deref() != Some("done") {
172            self.parse_statement(state).ok();
173        }
174
175        if state.peek_text().as_deref() == Some("done") {
176            state.bump();
177        }
178
179        state.finish_at(checkpoint, BashElementType::ForStatement);
180        Ok(())
181    }
182
183    fn parse_function<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
184        let checkpoint = state.checkpoint();
185        if state.peek_text().as_deref() == Some("function") {
186            state.bump();
187        }
188
189        state.expect(BashTokenType::Identifier).ok();
190
191        if state.peek_text().as_deref() == Some("(") {
192            state.bump();
193            if state.peek_text().as_deref() == Some(")") {
194                state.bump();
195            }
196        }
197
198        // Bash functions usually followed by a compound command, often a brace group
199        if state.peek_text().as_deref() == Some("{") {
200            state.bump();
201            while state.not_at_end() && state.peek_text().as_deref() != Some("}") {
202                self.parse_statement(state).ok();
203            }
204            if state.peek_text().as_deref() == Some("}") {
205                state.bump();
206            }
207        }
208        else {
209            self.parse_statement(state).ok();
210        }
211
212        state.finish_at(checkpoint, BashElementType::FunctionDefinition);
213        Ok(())
214    }
215
216    pub(crate) fn parse_command<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
217        let checkpoint = state.checkpoint();
218        while state.not_at_end() && !state.at(BashTokenType::Newline) && !state.at(BashTokenType::Delimiter) && state.peek_text().as_deref() != Some("|") {
219            if matches!(state.peek_text().as_deref(), Some(">" | ">>" | "<" | "<<")) {
220                self.parse_redirection(state)?;
221            }
222            else {
223                state.bump();
224            }
225        }
226        state.finish_at(checkpoint, BashElementType::CommandStatement);
227        Ok(())
228    }
229
230    fn parse_redirection<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
231        let checkpoint = state.checkpoint();
232        state.bump(); // The redirection operator
233
234        // Skip whitespace if any
235        while state.at(BashTokenType::Whitespace) {
236            state.bump();
237        }
238
239        // The target file or descriptor
240        if state.not_at_end() && !state.at(BashTokenType::Newline) && !state.at(BashTokenType::Delimiter) {
241            state.bump();
242        }
243
244        state.finish_at(checkpoint, BashElementType::Redirection);
245        Ok(())
246    }
247}
248
249impl<'config> Parser<BashLanguage> for BashParser<'config> {
250    fn parse<'a, S: Source + ?Sized>(&self, text: &'a S, edits: &[TextEdit], cache: &'a mut impl ParseCache<BashLanguage>) -> oak_core::ParseOutput<'a, BashLanguage> {
251        let lexer = BashLexer::new(self.config);
252        oak_core::parser::parse_with_lexer(&lexer, text, edits, cache, |state| {
253            let checkpoint = state.checkpoint();
254
255            while state.not_at_end() && !state.at(BashTokenType::Eof) {
256                if state.at(BashTokenType::Newline) || state.at(BashTokenType::Delimiter) {
257                    state.bump()
258                }
259                else {
260                    self.parse_statement(state).ok();
261                }
262            }
263
264            Ok(state.finish_at(checkpoint, BashElementType::Root))
265        })
266    }
267}