1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
//! Bash Parser
//!
//! Parses token stream from lexer into bash AST.
//! Implements recursive descent parsing for bash syntax.
use super::ast::*;
use super::lexer::{Lexer, LexerError, Token};
use thiserror::Error;
// Re-export error display functions that were extracted to parser_error_display.rs
pub use super::parser_error_display::format_parse_diagnostic;
#[derive(Error, Debug)]
pub enum ParseError {
#[error("Lexer error: {0}")]
LexerError(#[from] LexerError),
#[error("Unexpected token: expected {expected}, found {found} at line {line}")]
UnexpectedToken {
expected: String,
found: String,
line: usize,
},
#[error("Unexpected end of file")]
UnexpectedEof,
#[error("Invalid syntax: {0}")]
InvalidSyntax(String),
}
pub type ParseResult<T> = Result<T, ParseError>;
impl ParseError {
/// Extract line number from any parse error variant
pub fn line(&self) -> Option<usize> {
match self {
Self::UnexpectedToken { line, .. } => Some(*line),
Self::LexerError(LexerError::UnexpectedChar(_, line, _)) => Some(*line),
Self::LexerError(LexerError::UnterminatedString(line, _)) => Some(*line),
_ => None,
}
}
/// Extract column number from any parse error variant
pub fn column(&self) -> Option<usize> {
match self {
Self::LexerError(LexerError::UnexpectedChar(_, _, col)) => Some(*col),
Self::LexerError(LexerError::UnterminatedString(_, col)) => Some(*col),
_ => None,
}
}
}
pub struct BashParser {
pub(crate) tokens: Vec<Token>,
/// Character positions of each token in the source string
pub(crate) token_positions: Vec<usize>,
pub(crate) position: usize,
pub(crate) current_line: usize,
pub(crate) tracer: Option<crate::tracing::TraceManager>,
/// Original source code, stored for error diagnostics
pub(crate) source: String,
}
impl BashParser {
/// Create a new Bash parser from source code.
///
/// Tokenizes the input and prepares the parser for AST generation.
///
/// # Arguments
///
/// * `input` - Bash script source code as a string
///
/// # Returns
///
/// * `Ok(BashParser)` - Ready to parse the script
/// * `Err(ParseError)` - Lexer error (invalid tokens)
///
/// # Examples
///
/// ## Basic usage
///
/// ```
/// use bashrs::bash_parser::BashParser;
///
/// let script = "echo hello";
/// let parser = BashParser::new(script).unwrap();
/// // Parser is ready to call parse()
/// ```
///
/// ## Variable assignment
///
/// ```
/// use bashrs::bash_parser::BashParser;
///
/// let script = "x=42\ny=hello";
/// let mut parser = BashParser::new(script).unwrap();
/// let ast = parser.parse().unwrap();
/// assert_eq!(ast.statements.len(), 2);
/// ```
///
/// ## Invalid syntax
///
/// ```
/// use bashrs::bash_parser::BashParser;
///
/// // Unclosed string
/// let script = r#"echo "unclosed"#;
/// let result = BashParser::new(script);
/// // Lexer detects the error
/// assert!(result.is_err());
/// ```
pub fn new(input: &str) -> ParseResult<Self> {
let mut lexer = Lexer::new(input);
let (tokens, token_positions) = lexer.tokenize_with_positions()?;
Ok(Self {
tokens,
token_positions,
position: 0,
current_line: 1,
tracer: None,
source: input.to_string(),
})
}
/// Get the original source code (for error diagnostics)
pub fn source(&self) -> &str {
&self.source
}
/// Enable tracing for this parser
///
/// Allows instrumentation of parsing events for debugging and analysis.
/// Zero overhead when not called (tracer remains None).
pub fn with_tracer(mut self, tracer: crate::tracing::TraceManager) -> Self {
self.tracer = Some(tracer);
self
}
/// Parse the tokenized script into a Bash AST.
///
/// Performs recursive descent parsing to build an abstract syntax tree
/// representing the structure of the bash script.
///
/// # Returns
///
/// * `Ok(BashAst)` - Successfully parsed AST
/// * `Err(ParseError)` - Syntax error or unexpected token
///
/// # Examples
///
/// ## Simple command
///
/// ```
/// use bashrs::bash_parser::BashParser;
///
/// let script = "echo hello";
/// let mut parser = BashParser::new(script).unwrap();
/// let ast = parser.parse().unwrap();
/// assert_eq!(ast.statements.len(), 1);
/// ```
///
/// ## Multiple statements
///
/// ```
/// use bashrs::bash_parser::BashParser;
///
/// let script = r#"
/// x=10
/// echo $x
/// y=20
/// "#;
/// let mut parser = BashParser::new(script).unwrap();
/// let ast = parser.parse().unwrap();
/// assert_eq!(ast.statements.len(), 3);
/// ```
///
/// ## Control flow
///
/// ```
/// use bashrs::bash_parser::BashParser;
///
/// let script = r#"
/// if [ "$x" = "5" ]; then
/// echo "five"
/// fi
/// "#;
/// let mut parser = BashParser::new(script).unwrap();
/// let ast = parser.parse().unwrap();
/// // Should parse the if statement
/// assert!(!ast.statements.is_empty());
/// ```
pub fn parse(&mut self) -> ParseResult<BashAst> {
// Contract: parser-soundness-v1.yaml precondition (pv codegen)
contract_pre_parse!(self.tokens);
let start_time = std::time::Instant::now();
// Emit ParseStart trace event
if let Some(ref tracer) = self.tracer {
tracer.emit_parse(crate::tracing::ParseEvent::ParseStart {
source: String::from("<input>"),
line: 1,
col: 1,
});
}
let mut statements = Vec::new();
let parse_result = (|| -> ParseResult<BashAst> {
while !self.is_at_end() {
// Skip newlines and semicolons between statements
while self.check(&Token::Newline) || self.check(&Token::Semicolon) {
self.advance();
}
if self.is_at_end() {
break;
}
let stmt = self.parse_statement()?;
// Emit ParseNode trace event for each statement
if let Some(ref tracer) = self.tracer {
tracer.emit_parse(crate::tracing::ParseEvent::ParseNode {
node_type: stmt.node_type().to_string(),
span: stmt.span(),
});
}
statements.push(stmt);
// Skip newlines and semicolons after statement
while self.check(&Token::Newline) || self.check(&Token::Semicolon) {
self.advance();
}
}
let duration = start_time.elapsed();
let parse_time_ms = duration.as_millis() as u64;
// Emit ParseComplete trace event
if let Some(ref tracer) = self.tracer {
tracer.emit_parse(crate::tracing::ParseEvent::ParseComplete {
node_count: statements.len(),
duration,
});
}
Ok(BashAst {
statements,
metadata: AstMetadata {
source_file: None,
line_count: self.current_line,
parse_time_ms,
},
})
})();
// Emit ParseError trace event if parsing failed
if let Err(ref err) = parse_result {
if let Some(ref tracer) = self.tracer {
tracer.emit_parse(crate::tracing::ParseEvent::ParseError {
error: err.to_string(),
span: crate::tracing::Span::single_line(self.current_line, 1, 1),
});
}
}
parse_result
}
pub(crate) fn parse_statement(&mut self) -> ParseResult<BashStmt> {
// Skip comments and collect them
if let Some(Token::Comment(text)) = self.peek() {
let comment = text.clone();
self.advance();
return Ok(BashStmt::Comment {
text: comment,
span: Span::new(self.current_line, 0, self.current_line, 0),
});
}
// Parse first statement (could be part of pipeline)
let first_stmt = match self.peek() {
// Bash allows keywords as variable names (e.g., fi=1, for=2, while=3)
// Check for assignment pattern first before treating as control structure
Some(t) if Self::is_keyword_token(t) && self.peek_ahead(1) == Some(&Token::Assign) => {
self.parse_assignment(false)
}
// Control flow statements (if/for/while/until/case/select)
Some(Token::If) => self.parse_if(),
Some(Token::While) => self.parse_while(),
Some(Token::Until) => self.parse_until(),
Some(Token::For) => self.parse_for(),
Some(Token::Select) => self.parse_select(), // F017: select statement
Some(Token::Case) => self.parse_case(),
// Declaration statements (function/return/export/local/coproc)
Some(Token::Function) => self.parse_function(),
Some(Token::Return) => self.parse_return(),
Some(Token::Export) => self.parse_export(),
Some(Token::Local) => self.parse_local(),
Some(Token::Coproc) => self.parse_coproc(), // BUG-018
// Identifiers: assignment, function def shorthand, or command
Some(Token::Identifier(_)) => self.parse_identifier_statement(),
// Issue #67: Handle standalone arithmetic ((expr)) as a command
Some(Token::ArithmeticExpansion(_)) => self.parse_standalone_arithmetic(),
// Compound commands: brace group, subshell, test, extended test
Some(Token::LeftBrace) => self.parse_brace_group(),
Some(Token::LeftParen) => self.parse_subshell(),
Some(Token::LeftBracket) => self.parse_test_command(),
Some(Token::DoubleLeftBracket) => self.parse_extended_test_command(),
_ => self.parse_command(),
}?;
// Handle pipeline, logical operators, and background
self.parse_statement_tail(first_stmt)
}
/// Check if a token is a keyword that can also serve as a variable name in assignments
fn is_keyword_token(token: &Token) -> bool {
matches!(
token,
Token::If
| Token::Then
| Token::Elif
| Token::Else
| Token::Fi
| Token::While
| Token::Until
| Token::For
| Token::Do
| Token::Done
| Token::Case
| Token::Esac
| Token::In
| Token::Function
| Token::Return
)
}
/// Parse an identifier that could be an assignment, function definition, or command
fn parse_identifier_statement(&mut self) -> ParseResult<BashStmt> {
// Could be assignment, function, or command
// BUG-012 FIX: Also handle += for array append
// F019 FIX: Also handle array element assignment: name[index]=value
if self.peek_ahead(1) == Some(&Token::Assign)
|| matches!(self.peek_ahead(1), Some(Token::Identifier(s)) if s == "+=")
{
self.parse_assignment(false)
} else if self.peek_ahead(1) == Some(&Token::LeftBracket)
&& self.peek_ahead(3) == Some(&Token::RightBracket)
&& self.peek_ahead(4) == Some(&Token::Assign)
{
// F019: Array element assignment: hash[key]=value
// Must have pattern: name[index]=value (with ] followed by =)
self.parse_assignment(false)
} else if self.peek_ahead(1) == Some(&Token::LeftParen)
&& self.peek_ahead(2) == Some(&Token::RightParen)
{
// This is a function definition: name() { ... }
self.parse_function_shorthand()
} else {
self.parse_command()
}
}
/// Issue #67: Handle standalone arithmetic ((expr)) as a command
fn parse_standalone_arithmetic(&mut self) -> ParseResult<BashStmt> {
let arith_expr = match self.peek() {
Some(Token::ArithmeticExpansion(expr)) => expr.clone(),
_ => return Err(self.syntax_error("arithmetic expansion")),
};
self.advance();
Ok(BashStmt::Command {
name: ":".to_string(),
args: vec![BashExpr::Literal(format!("$(({}))", arith_expr))],
redirects: vec![],
span: Span::new(self.current_line, 0, self.current_line, 0),
})
}
/// Parse pipeline, logical operators (&&, ||), and background (&) after the first statement
fn parse_statement_tail(&mut self, first_stmt: BashStmt) -> ParseResult<BashStmt> {
// Check for pipeline: cmd1 | cmd2 | cmd3
let stmt = if self.check(&Token::Pipe) {
let mut commands = vec![first_stmt];
// Collect all piped commands
while self.check(&Token::Pipe) {
self.advance(); // consume '|'
// Skip newlines after pipe
self.skip_newlines();
// Parse next command in pipeline — compound commands
// are valid on the right side of a pipe:
// cmd | while read line; do ...; done
// cmd | if ...; then ...; fi
// cmd | { cmd1; cmd2; }
let next_cmd = self.parse_pipeline_rhs()?;
commands.push(next_cmd);
}
// Return pipeline with all collected commands
BashStmt::Pipeline {
commands,
span: Span::new(self.current_line, 0, self.current_line, 0),
}
} else {
first_stmt
};
// Issue #59: Check for logical AND (&&) or OR (||) operators
// These have lower precedence than pipes, so we check after pipeline handling
// cmd1 | cmd2 && cmd3 parses as (cmd1 | cmd2) && cmd3
if self.check(&Token::And) {
self.advance(); // consume '&&'
self.skip_newlines(); // allow newlines after &&
// Parse right side (which could itself be a pipeline or logical list)
let right = self.parse_statement()?;
return Ok(BashStmt::AndList {
left: Box::new(stmt),
right: Box::new(right),
span: Span::new(self.current_line, 0, self.current_line, 0),
});
}
if self.check(&Token::Or) {
self.advance(); // consume '||'
self.skip_newlines(); // allow newlines after ||
// Parse right side (which could itself be a pipeline or logical list)
let right = self.parse_statement()?;
return Ok(BashStmt::OrList {
left: Box::new(stmt),
right: Box::new(right),
span: Span::new(self.current_line, 0, self.current_line, 0),
});
}
// Consume trailing & (background operator) — acts as statement terminator
if self.check(&Token::Ampersand) {
self.advance();
}
// Not a pipeline or logical list, return the statement
Ok(stmt)
}
}