cmd_lib_core/
parser.rs

1use std::collections::{VecDeque, HashMap};
2use crate::process::{GroupCmds, Cmds, Cmd, FdOrFile};
3
4#[doc(hidden)]
5pub struct Parser {
6    str_lits: Option<VecDeque<String>>,
7    sym_table: Option<HashMap<&'static str, String>>,
8
9    file: &'static str,
10    line: u32,
11
12    src: String,
13}
14
15impl Parser {
16    pub fn new<S: Into<String>>(src: S) -> Self {
17        Self {
18            str_lits: None,
19            sym_table: None,
20            file: "",
21            line: 0,
22            src: src.into(),
23        }
24    }
25
26    pub fn with_lits(&mut self, str_lits: VecDeque<String>) -> &mut Self {
27        self.str_lits = Some(str_lits);
28        self
29    }
30
31    pub fn with_sym_table(&mut self, sym_table: HashMap<&'static str, String>) -> &mut Self {
32        self.sym_table = Some(sym_table);
33        self
34    }
35
36    pub fn with_location(&mut self, file: &'static str, line: u32) -> &mut Self {
37        self.file = file;
38        self.line = line;
39        self
40    }
41
42    fn resolve_name(&self, src: String) -> String {
43        if self.sym_table.is_none() {
44            return src;
45        }
46
47        let mut output = String::new();
48        let input: Vec<char> = src.chars().collect();
49        let len = input.len();
50
51        let mut i = 0;
52        while i < len {
53            if input[i] == '$' && (i == 0 || input[i - 1] != '\\') {
54                i += 1;
55                let with_bracket = i < len && input[i] == '{';
56                let mut var = String::new();
57                if with_bracket { i += 1; }
58                while i < len
59                    && ((input[i] >= 'a' && input[i] <= 'z')
60                        || (input[i] >= 'A' && input[i] <= 'Z')
61                        || (input[i] >= '0' && input[i] <= '9')
62                        || (input[i] == '_'))
63                {
64                    var.push(input[i]);
65                    i += 1;
66                }
67                if with_bracket {
68                    if input[i] != '}' {
69                        panic!("invalid name {}, {}:{}\n{}", var, self.file, self.line, src);
70                    }
71                } else {
72                    i -= 1; // back off 1 char
73                }
74                match self.sym_table.as_ref().unwrap().get(var.as_str()) {
75                    None => panic!("resolve {} failed, {}:{}\n{}", var, self.file, self.line, src),
76                    Some(v) => output += v,
77                };
78            } else {
79                output.push(input[i]);
80            }
81            i += 1;
82        }
83
84        output
85    }
86
87    pub fn parse(&mut self) -> GroupCmds {
88        let mut ret = GroupCmds::new();
89        let s: Vec<char> = self.src.chars().collect();
90        let len = s.len();
91        let mut i = 0;
92
93        // skip leading spaces
94        while i < len  && char::is_whitespace(s[i]) { i += 1; }
95        if i == len { return ret; }
96
97        // skip variables declaration part
98        if i < len && s[i] == '|' {
99            i += 1;
100            while i < len && s[i] != '|' { i += 1; }
101            i += 1;
102        }
103
104        // real commands parsing starts
105        while i < len {
106            while i < len && char::is_whitespace(s[i]) { i += 1; }
107            if i == len { break; }
108
109            let cmd = self.parse_cmd(&s, &mut i);
110            if !cmd.0.is_empty() {
111                ret.add(cmd.0, cmd.1);
112            }
113
114            // skip comments
115            while i < len  && char::is_whitespace(s[i]) { i += 1; }
116            if i == len { break; }
117            if i + 1 < len && s[i] == '/' && s[i + 1] == '/' {
118                i += 2;
119                while i < len && s[i] != '\n' { i += 1; }
120            }
121        }
122        ret
123    }
124
125    fn parse_cmd(&mut self, s: &Vec<char>, i: &mut usize) -> (Cmds, Option<Cmds>) {
126        let mut ret = vec![Cmds::new(), Cmds::new()];
127        let len = s.len();
128        for j in 0..2 {
129            while *i < len && s[*i] != ';' {
130                while *i < len && char::is_whitespace(s[*i]) { *i += 1; }
131                if *i == len { break; }
132
133                let cmd = self.parse_pipe(s, i);
134                if !cmd.is_empty() {
135                    ret[j].pipe(cmd);
136                }
137                if *i < len && s[*i] == '|' {
138                    break;
139                }
140            }
141            if *i < len && s[*i] == '|' {
142                assert_eq!(s[*i + 1], '|');
143                *i += 2;    // skip "||" operator
144            } else {
145                break;
146            }
147        }
148        if *i < len && s[*i] == ';' { *i += 1; }
149        let (ret1, ret0) = (ret.pop().unwrap(), ret.pop().unwrap());
150        (ret0, if ret1.is_empty() { None } else { Some(ret1) })
151    }
152
153    fn parse_pipe(&mut self, s: &Vec<char>, i: &mut usize) -> Cmd {
154        let mut ret = Cmd::new();
155        let len = s.len();
156        while *i < len && s[*i] != '|' && s[*i] != ';' {
157            while *i < len && char::is_whitespace(s[*i]) { *i += 1; }
158            if *i == len { break; }
159            let mut arg = String::new();
160            while *i < len &&
161                  !(s[*i] == '|' || s[*i] == ';' || char::is_whitespace(s[*i])) {
162                if s[*i] == 'r' || s[*i] == 'b' ||
163                   (s[*i] == '\"' && (*i == 0 || s[*i - 1] != '\\')) {
164                    arg += &self.parse_str_lit(s, i);
165                }
166
167                if *i < len && s[*i] == '>' {
168                    *i += 1;
169                    if !arg.is_empty() {
170                        if arg == "&" {     // "&> f" equals to ">&2 2>f"
171                            ret.set_redirect(2, self.parse_redirect(s, i));
172                            ret.set_redirect(1, FdOrFile::Fd(2, false));
173                            arg.clear();
174                        } else if let Ok(fd) = arg.parse::<i32>() {
175                            if fd != 1 && fd != 2 {
176                                panic!("fd redirect only support stdout(1) and stderr(2) {}:{}", self.file, self.line);
177                            }
178                            ret.set_redirect(fd, self.parse_redirect(s, i));
179                            arg.clear();
180                        } else {
181                            ret.set_redirect(1, self.parse_redirect(s, i));
182                        }
183                    } else {
184                        ret.set_redirect(1, self.parse_redirect(s, i));
185                    }
186                }
187
188                if *i < len && s[*i] == '<' {
189                    *i += 1;
190                    ret.set_redirect(0, self.parse_redirect(s, i));
191                }
192
193                let arg1 = self.parse_normal_arg(s, i);
194                arg += &self.resolve_name(arg1);
195            }
196            if !arg.is_empty() {
197                ret.add_arg(arg);
198            }
199        }
200        if *i < len && s[*i] == '|' {
201            if *i + 1 < len && s[*i + 1] != '|' {
202                *i += 1;
203            }
204        }
205        ret
206    }
207
208    fn parse_normal_arg(&mut self, s: &Vec<char>, i: &mut usize) -> String {
209        let mut arg = String::new();
210        let len = s.len();
211        while *i < len &&
212              !(s[*i] == '|' || s[*i] == ';' || char::is_whitespace(s[*i])) {
213            if s[*i] == '\"' && s[*i - 1] != '\\' { // normal string literal
214                break;
215            }
216
217            if s[*i] == 'r' || s[*i] == 'b' {
218                let mut j = *i + 1;
219                while j < len && s[j] == '#' { j += 1; }
220                if j < len && s[j] == '\"' {        // raw string literal
221                    break;
222                }
223            }
224
225            if s[*i] == '>' {                       // stdout redirect
226                break;
227            }
228
229            if s[*i] == '<' {                       // stdin redirect
230                break;
231            }
232
233            arg.push(s[*i]);
234            *i += 1;
235        }
236        arg
237    }
238
239    fn parse_redirect(&mut self, s: &Vec<char>, i: &mut usize) -> FdOrFile {
240        let mut append = false;
241        let len = s.len();
242
243        if *i < len && s[*i] == '>' {
244            append = true;
245            *i += 1;
246        }
247
248        if *i < len && s[*i] == '&' {
249            let mut fd_str = String::new();
250            *i += 1;
251            while *i < len && s[*i].is_digit(10) {
252                fd_str.push(s[*i]);
253                *i += 1;
254            }
255            return FdOrFile::Fd(fd_str.parse().unwrap(), append);
256        }
257
258        while *i < len && char::is_whitespace(s[*i]) {
259            *i += 1;
260        }
261
262        if s[*i] == '&' {
263            panic!("syntax error near unexpected token `&' at {}:{}", self.file, self.line);
264        }
265
266        if s[*i] == 'r' || s[*i] == 'b' ||
267           (s[*i] == '\"' && (*i == 0 || s[*i - 1] != '\\')) {
268            let file = self.parse_str_lit(s, i);
269            if !file.is_empty() {
270                return FdOrFile::File(file, append);
271            }
272        }
273
274        let file = self.parse_normal_arg(s, i);
275        FdOrFile::File(self.resolve_name(file), append)
276    }
277
278    fn parse_str_lit(&mut self, s: &Vec<char>, i: &mut usize) -> String {
279        let mut str_lit = String::new();
280        let len = s.len();
281        let mut is_str_lit = false;
282        let mut is_raw = false;
283        let mut cnt = 0;    // '#' counts for raw string literal
284        if s[*i] == 'r' || s[*i] == 'b' {
285            let mut j = *i + 1;
286            while j < len && s[j] == '#' { j += 1; }
287            if j < len && s[j] == '\"' {
288                is_str_lit = true;
289                is_raw = true;
290                cnt = j - *i - 1;
291                *i = j + 1;
292            }
293        } else if s[*i] == '\"' && (*i == 0 || s[*i - 1] != '\\') {
294            is_str_lit = true;
295            *i += 1;
296        }
297
298        if !is_str_lit {
299            return "".to_string();
300        }
301
302        let mut found_end = false;
303        while *i < len && !found_end {
304            if s[*i] == '\"' {
305                let mut cnt2 = cnt;
306                let mut j = *i + 1;
307                while j < len && cnt2 > 0 && s[j] == '#' {
308                    cnt2 -= 1;
309                    j += 1;
310                }
311                if cnt2 == 0 {
312                    found_end = true;
313                    *i = j;
314                    break;
315                }
316            }
317            str_lit.push(s[*i]);
318            *i += 1;
319        }
320        if !found_end {
321            panic!("invalid raw string literal at {}:{}", self.file, self.line);
322        }
323
324        if self.str_lits.is_none() {
325            return str_lit;
326        }
327
328        str_lit = self.str_lits.as_mut().unwrap().pop_front().unwrap().to_string();
329        if is_raw {
330            return str_lit; // don't resolve names for raw string literals
331        } else {
332            return self.resolve_name(str_lit);
333        }
334    }
335}
336
337#[cfg(test)]
338mod tests {
339    use super::*;
340
341    #[test]
342    fn test_parser_or_cmd() {
343        assert!(Parser::new("ls /nofile || true; echo continue")
344                .parse()
345                .run_cmd()
346                .is_ok());
347    }
348
349    #[test]
350    fn test_parser_stdout_redirect() {
351        Parser::new("echo rust > /tmp/echo_rust").parse();
352        Parser::new("echo rust >&2").parse();
353        assert!(Parser::new("rm /tmp/echo_rust").parse().run_cmd().is_ok());
354    }
355}
356