bashrs 6.66.0

Rust-to-Shell transpiler for deterministic bootstrap scripts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
impl Lexer {
    fn read_heredoc(&mut self) -> Result<Token, LexerError> {
        let delimiter = self.read_heredoc_delimiter()?;
        self.skip_to_next_line();

        // Read heredoc content until we find a line matching the delimiter
        let content = self.read_heredoc_content(&delimiter, false);

        Ok(Token::Heredoc { delimiter, content })
    }

    /// BUG-007 FIX: Read indented heredoc (<<-DELIMITER)
    /// In indented heredocs, leading tabs are stripped from content lines
    /// and the delimiter can be indented with tabs
    fn read_heredoc_indented(&mut self) -> Result<Token, LexerError> {
        let delimiter = self.read_heredoc_delimiter()?;
        self.skip_to_next_line();

        // Read heredoc content - strip leading tabs
        let content = self.read_heredoc_content(&delimiter, true);

        Ok(Token::Heredoc { delimiter, content })
    }

    /// Read a heredoc delimiter, handling optional quoting (<<'EOF' or <<"EOF").
    /// BUG-006 FIX: Handle quoted delimiters.
    fn read_heredoc_delimiter(&mut self) -> Result<String, LexerError> {
        // Skip any leading whitespace
        while !self.is_at_end() && (self.current_char() == ' ' || self.current_char() == '\t') {
            self.advance();
        }

        // Check for quoted delimiter
        let quote_char =
            if !self.is_at_end() && (self.current_char() == '\'' || self.current_char() == '"') {
                let q = self.current_char();
                self.advance(); // skip opening quote
                Some(q)
            } else {
                None
            };

        // Read delimiter characters
        let mut delimiter = String::new();
        while !self.is_at_end() {
            let ch = self.current_char();
            if let Some(q) = quote_char {
                if ch == q {
                    self.advance(); // skip closing quote
                    break;
                }
                delimiter.push(self.advance());
            } else if ch.is_alphanumeric() || ch == '_' {
                delimiter.push(self.advance());
            } else {
                break;
            }
        }

        if delimiter.is_empty() {
            let ch = if self.is_at_end() {
                '\0'
            } else {
                self.current_char()
            };
            return Err(LexerError::UnexpectedChar(ch, self.line, self.column));
        }

        Ok(delimiter)
    }

    /// Skip to the end of the current line and consume the newline character.
    fn skip_to_next_line(&mut self) {
        while !self.is_at_end() && self.current_char() != '\n' {
            self.advance();
        }
        if !self.is_at_end() {
            self.advance(); // skip newline
        }
    }

    /// Read heredoc content lines until a line matches the delimiter.
    /// If `strip_tabs` is true, leading tabs are stripped from each line (<<- mode).
    fn read_heredoc_content(&mut self, delimiter: &str, strip_tabs: bool) -> String {
        let mut content = String::new();
        let mut current_line = String::new();

        while !self.is_at_end() {
            let ch = self.current_char();

            if ch == '\n' {
                let check_line = if strip_tabs {
                    current_line.trim_start_matches('\t')
                } else {
                    current_line.trim()
                };

                if check_line == delimiter {
                    // Don't consume the trailing newline — let it become a
                    // Token::Newline so the parser sees the statement boundary.
                    break;
                }

                // Not delimiter - add line to content (with newline)
                if !content.is_empty() {
                    content.push('\n');
                }
                let line_to_add = if strip_tabs {
                    current_line.trim_start_matches('\t')
                } else {
                    &current_line
                };
                content.push_str(line_to_add);
                current_line.clear();

                self.advance(); // skip newline
            } else {
                current_line.push(self.advance());
            }
        }

        // Handle delimiter on last line without trailing newline
        if !current_line.is_empty() {
            let check_line = if strip_tabs {
                current_line.trim_start_matches('\t')
            } else {
                current_line.trim()
            };
            if check_line != delimiter {
                if !content.is_empty() {
                    content.push('\n');
                }
                let line_to_add = if strip_tabs {
                    current_line.trim_start_matches('\t')
                } else {
                    &current_line
                };
                content.push_str(line_to_add);
            }
        }

        content
    }

    /// Issue #61: Read a here-string (<<< word)
    /// Here-strings provide a single word/string to stdin
    /// Examples:
    ///   cat <<< "hello world"
    ///   read word <<< hello
    ///   cmd <<< "$variable"
    fn read_herestring(&mut self) -> Result<Token, LexerError> {
        // Skip whitespace after <<<
        while !self.is_at_end() && (self.current_char() == ' ' || self.current_char() == '\t') {
            self.advance();
        }

        if self.is_at_end() {
            return Err(LexerError::UnexpectedChar('\0', self.line, self.column));
        }

        let ch = self.current_char();

        // Handle quoted strings
        if ch == '"' || ch == '\'' {
            let quote = ch;
            self.advance(); // skip opening quote
            let mut content = String::new();

            while !self.is_at_end() {
                let c = self.current_char();
                if c == quote {
                    self.advance(); // skip closing quote
                    break;
                } else if c == '\\' && quote == '"' {
                    // Handle escape sequences in double quotes
                    self.advance();
                    if !self.is_at_end() {
                        content.push(self.advance());
                    }
                } else {
                    content.push(self.advance());
                }
            }

            return Ok(Token::HereString(content));
        }

        // Handle unquoted word (or $variable)
        let mut content = String::new();

        while !self.is_at_end() {
            let c = self.current_char();
            // Stop at whitespace, newline, pipe, or other shell metacharacters
            if c.is_whitespace() || c == '\n' || c == '|' || c == ';' || c == '&' {
                break;
            }
            content.push(self.advance());
        }

        Ok(Token::HereString(content))
    }

    fn read_string(&mut self, quote: char) -> Result<Token, LexerError> {
        let start_line = self.line;
        let start_col = self.column;

        self.advance(); // skip opening quote
        let mut string = String::new();

        // Track nesting depth for command substitutions $(...)
        // When inside a command substitution, quotes are part of the command,
        // not terminators for the outer string.
        let mut cmd_subst_depth = 0;

        while !self.is_at_end() {
            let ch = self.current_char();

            // Only treat quote as terminator if we're not inside a command substitution
            if ch == quote && cmd_subst_depth == 0 {
                break;
            }

            // Advance past the character
            let ch = self.advance();

            if ch == '\\' && !self.is_at_end() {
                // Handle escape sequences
                let escaped = self.advance();
                match escaped {
                    'n' => string.push('\n'),
                    't' => string.push('\t'),
                    'r' => string.push('\r'),
                    '\\' => string.push('\\'),
                    _ => {
                        string.push('\\');
                        string.push(escaped);
                    }
                }
            } else if ch == '$' && !self.is_at_end() && self.current_char() == '(' {
                // Entering command substitution $(...)
                // Issue #59: Handle nested quotes inside command substitution
                string.push(ch);
                string.push(self.advance()); // push '('
                cmd_subst_depth += 1;
            } else if ch == '(' && cmd_subst_depth > 0 {
                // Nested parenthesis inside command substitution
                string.push(ch);
                cmd_subst_depth += 1;
            } else if ch == ')' && cmd_subst_depth > 0 {
                // Closing parenthesis - might be end of command substitution
                string.push(ch);
                cmd_subst_depth -= 1;
            } else {
                string.push(ch);
            }
        }

        if self.is_at_end() {
            return Err(LexerError::UnterminatedString(start_line, start_col));
        }

        self.advance(); // skip closing quote
        Ok(Token::String(string))
    }

    fn read_number(&mut self) -> Result<Token, LexerError> {
        let mut num_str = String::new();

        while !self.is_at_end() && self.current_char().is_ascii_digit() {
            num_str.push(self.advance());
        }

        // If followed by ':' + digit, treat as word (port mapping 8080:8080, version 1:2:3)
        if !self.is_at_end()
            && self.current_char() == ':'
            && self.peek_char(1).is_some_and(|c| c.is_ascii_digit())
        {
            num_str.push(self.advance()); // consume ':'
            while !self.is_at_end()
                && (self.current_char().is_ascii_digit() || self.current_char() == ':')
            {
                num_str.push(self.advance());
            }
            return Ok(Token::Identifier(num_str));
        }

        num_str
            .parse::<i64>()
            .map(Token::Number)
            .map_err(|_| LexerError::InvalidNumber(num_str))
    }

    fn read_identifier_or_keyword(&mut self) -> Token {
        let mut ident = String::new();
        let mut has_special_chars = false;

        while !self.is_at_end() {
            let ch = self.current_char();
            if ch.is_alphanumeric() || ch == '_' {
                ident.push(self.advance());
            } else if self.is_ident_continuation_char(ch) || self.is_ident_separator_with_next(ch) {
                has_special_chars = true;
                ident.push(self.advance());
            } else {
                break;
            }
        }

        // Keywords can only match if the identifier has no special characters
        if !has_special_chars {
            if let Some(keyword) = Self::lookup_keyword(&ident) {
                return keyword;
            }
        }
        Token::Identifier(ident)
    }

    /// Characters that are always allowed as identifier continuations (paths, globs).
    fn is_ident_continuation_char(&self, ch: char) -> bool {
        ch == '/' || ch == '*' || ch == '?'
    }

    /// Characters that are allowed in identifiers only when followed by an
    /// alphanumeric character (or '/' for colon in URLs like http://...).
    /// BUG-010 FIX: Allow dashes in identifiers for function names like my-func.
    fn is_ident_separator_with_next(&self, ch: char) -> bool {
        if !matches!(ch, '-' | '.' | ':' | '@') {
            return false;
        }
        match self.peek_char(1) {
            Some(next) => next.is_alphanumeric() || (ch == ':' && next == '/'),
            None => false,
        }
    }

    /// Look up a keyword token from an identifier string.
    /// Returns `None` if the string is not a keyword.
    fn lookup_keyword(ident: &str) -> Option<Token> {
        match ident {
            "if" => Some(Token::If),
            "then" => Some(Token::Then),
            "elif" => Some(Token::Elif),
            "else" => Some(Token::Else),
            "fi" => Some(Token::Fi),
            "for" => Some(Token::For),
            "while" => Some(Token::While),
            "until" => Some(Token::Until),
            "select" => Some(Token::Select),
            "do" => Some(Token::Do),
            "done" => Some(Token::Done),
            "case" => Some(Token::Case),
            "esac" => Some(Token::Esac),
            "in" => Some(Token::In),
            "function" => Some(Token::Function),
            "return" => Some(Token::Return),
            "export" => Some(Token::Export),
            "local" => Some(Token::Local),
            "coproc" => Some(Token::Coproc),
            _ => None,
        }
    }

    fn read_bare_word(&mut self) -> Token {
        let mut word = String::new();

        while !self.is_at_end() {
            let ch = self.current_char();

            // Handle escape sequences (e.g., \; in find -exec ... \;)
            if ch == '\\' {
                word.push(self.advance()); // include backslash
                if !self.is_at_end() {
                    word.push(self.advance()); // include escaped char
                }
                continue;
            }

            // Handle extended glob patterns inline: @(...), +(...), ?(...), !(...)
            if self.is_extended_glob_start(ch) {
                self.read_inline_extended_glob(&mut word);
            } else if Self::is_bare_word_char(ch) {
                word.push(self.advance());
            } else {
                break;
            }
        }

        Token::Identifier(word)
    }

    /// Check if the current character starts an extended glob pattern: @(, +(, ?(, !(
    fn is_extended_glob_start(&self, ch: char) -> bool {
        matches!(ch, '@' | '+' | '?' | '!') && self.peek_char(1) == Some('(')
    }

    /// Read an extended glob pattern (@(...), +(...), etc.) and append it to `word`.
    fn read_inline_extended_glob(&mut self, word: &mut String) {
        word.push(self.advance()); // push @/+/?/!
        word.push(self.advance()); // push (
        let mut depth = 1;
        while !self.is_at_end() && depth > 0 {
            let c = self.current_char();
            if c == '(' {
                depth += 1;
            } else if c == ')' {
                depth -= 1;
                if depth == 0 {
                    word.push(self.advance());
                    break;
                }
            }
            word.push(self.advance());
        }
    }

}

include!("lexer_read_operators.rs");