1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
use crate::error::{RableError, Result};
use crate::token::{Token, TokenType};
use super::Lexer;
impl Lexer {
/// Reads a word token, handling quoting and expansions.
#[allow(clippy::too_many_lines)]
pub(super) fn read_word_token(&mut self, start: usize, line: usize) -> Result<Token> {
let mut value = String::new();
while let Some(c) = self.peek_char() {
match c {
// Metacharacters end a word
' ' | '\t' | '\n' | '|' | '&' | ';' | ')' => break,
// < and > are metacharacters, but <( and >( are process substitution
'<' | '>' => {
if !value.is_empty() && self.input.get(self.pos + 1) == Some(&'(') {
// Process substitution mid-word: cat<(cmd)
self.read_process_sub_into(&mut value)?;
} else {
break;
}
}
// ( is a metacharacter UNLESS preceded by = (array) or extglob prefix
'(' => {
if value.ends_with('=') {
// Array assignment: arr=(...)
self.advance_char();
value.push('(');
self.read_matched_parens(&mut value, 1)?;
} else if value.ends_with('@')
|| value.ends_with('?')
|| value.ends_with('+')
|| value.ends_with('!')
|| (value.ends_with('*') && self.config.extglob)
{
// Extglob: @(...), ?(...), etc.
self.advance_char();
value.push('(');
self.read_matched_parens(&mut value, 1)?;
} else {
break;
}
}
'\'' | '"' | '\\' | '$' | '`' => {
self.read_word_special(&mut value, c)?;
}
// Extglob: @(...), ?(...), +(...), !(...)
'@' | '?' | '+' | '!' if self.input.get(self.pos + 1) == Some(&'(') => {
self.read_extglob(&mut value, c)?;
}
// * extglob only when extglob mode is enabled
'*' if self.input.get(self.pos + 1) == Some(&'(') && self.config.extglob => {
self.read_extglob(&mut value, c)?;
}
// Regular character
_ => {
self.advance_char();
value.push(c);
}
}
}
if value.is_empty() {
return Err(RableError::parse("unexpected character", start, line));
}
// Check for reserved words at command start
let kind = if self.ctx.command_start {
TokenType::reserved_word(&value).unwrap_or(TokenType::Word)
} else {
TokenType::Word
};
// After a word, we're no longer at command start (unless it's a keyword
// that expects another command)
self.ctx.command_start = kind.starts_command()
|| matches!(
kind,
TokenType::Then
| TokenType::Else
| TokenType::Elif
| TokenType::Do
| TokenType::Semi
);
Ok(Token::new(kind, value, start, line))
}
/// Reads a quoted string, escape, dollar expansion, or backtick within a word.
pub(super) fn read_word_special(&mut self, value: &mut String, c: char) -> Result<()> {
match c {
'\'' => {
self.advance_char();
value.push('\'');
self.read_single_quoted(value)?;
}
'"' => {
self.advance_char();
value.push('"');
self.read_double_quoted(value)?;
}
'\\' => {
self.advance_char();
if self.peek_char() == Some('\n') {
self.advance_char(); // line continuation
} else {
value.push('\\');
if let Some(next) = self.advance_char() {
value.push(next);
}
}
}
'$' => {
self.read_dollar(value)?;
}
'`' => {
self.advance_char();
value.push('`');
self.read_backtick(value)?;
}
_ => {}
}
Ok(())
}
/// Reads an extglob pattern `@(...)`, `?(...)`, etc.
pub(super) fn read_extglob(&mut self, value: &mut String, prefix: char) -> Result<()> {
self.advance_char();
value.push(prefix);
self.advance_char();
value.push('(');
self.read_matched_parens(value, 1)
}
/// Reads a process substitution into an existing word value.
pub(super) fn read_process_sub_into(&mut self, value: &mut String) -> Result<()> {
let dir = self.advance_char().unwrap_or('<');
value.push(dir);
self.advance_char(); // (
value.push('(');
self.read_matched_parens(value, 1)
}
/// Reads a process substitution `<(...)` or `>(...)` as a word token.
pub(super) fn read_process_sub_word(&mut self, start: usize, line: usize) -> Result<Token> {
let mut value = String::new();
// Read < or >
let dir = self.advance_char().unwrap_or('<');
value.push(dir);
// Read (
self.advance_char();
value.push('(');
// Read until matching )
self.read_matched_parens(&mut value, 1)?;
self.ctx.command_start = false;
Ok(Token::new(TokenType::Word, value, start, line))
}
}