nu_parser/
lite_parser.rs

1//! Lite parsing converts a flat stream of tokens from the lexer to a syntax element structure that
2//! can be parsed.
3
4use crate::{Token, TokenContents};
5use itertools::{Either, Itertools};
6use nu_protocol::{ast::RedirectionSource, engine::StateWorkingSet, ParseError, Span};
7use std::mem;
8
9#[derive(Debug, Clone, Copy)]
10pub enum LiteRedirectionTarget {
11    File {
12        connector: Span,
13        file: Span,
14        append: bool,
15    },
16    Pipe {
17        connector: Span,
18    },
19}
20
21impl LiteRedirectionTarget {
22    pub fn connector(&self) -> Span {
23        match self {
24            LiteRedirectionTarget::File { connector, .. }
25            | LiteRedirectionTarget::Pipe { connector } => *connector,
26        }
27    }
28
29    pub fn spans(&self) -> impl Iterator<Item = Span> {
30        match *self {
31            LiteRedirectionTarget::File {
32                connector, file, ..
33            } => Either::Left([connector, file].into_iter()),
34            LiteRedirectionTarget::Pipe { connector } => Either::Right(std::iter::once(connector)),
35        }
36    }
37}
38
39#[derive(Debug, Clone)]
40pub enum LiteRedirection {
41    Single {
42        source: RedirectionSource,
43        target: LiteRedirectionTarget,
44    },
45    Separate {
46        out: LiteRedirectionTarget,
47        err: LiteRedirectionTarget,
48    },
49}
50
51impl LiteRedirection {
52    pub fn spans(&self) -> impl Iterator<Item = Span> {
53        match self {
54            LiteRedirection::Single { target, .. } => Either::Left(target.spans()),
55            LiteRedirection::Separate { out, err } => {
56                Either::Right(out.spans().chain(err.spans()).sorted())
57            }
58        }
59    }
60}
61
62#[derive(Debug, Clone, Default)]
63pub struct LiteCommand {
64    pub pipe: Option<Span>,
65    pub comments: Vec<Span>,
66    pub parts: Vec<Span>,
67    pub redirection: Option<LiteRedirection>,
68    /// one past the end indices of attributes
69    pub attribute_idx: Vec<usize>,
70}
71
72impl LiteCommand {
73    fn push(&mut self, span: Span) {
74        self.parts.push(span);
75    }
76
77    fn check_accepts_redirection(&self, span: Span) -> Option<ParseError> {
78        self.parts
79            .is_empty()
80            .then_some(ParseError::UnexpectedRedirection { span })
81    }
82
83    fn try_add_redirection(
84        &mut self,
85        source: RedirectionSource,
86        target: LiteRedirectionTarget,
87    ) -> Result<(), ParseError> {
88        let redirection = match (self.redirection.take(), source) {
89            (None, _) if self.parts.is_empty() => Err(ParseError::UnexpectedRedirection {
90                span: target.connector(),
91            }),
92            (None, source) => Ok(LiteRedirection::Single { source, target }),
93            (
94                Some(LiteRedirection::Single {
95                    source: RedirectionSource::Stdout,
96                    target: out,
97                }),
98                RedirectionSource::Stderr,
99            ) => Ok(LiteRedirection::Separate { out, err: target }),
100            (
101                Some(LiteRedirection::Single {
102                    source: RedirectionSource::Stderr,
103                    target: err,
104                }),
105                RedirectionSource::Stdout,
106            ) => Ok(LiteRedirection::Separate { out: target, err }),
107            (
108                Some(LiteRedirection::Single {
109                    source,
110                    target: first,
111                }),
112                _,
113            ) => Err(ParseError::MultipleRedirections(
114                source,
115                first.connector(),
116                target.connector(),
117            )),
118            (
119                Some(LiteRedirection::Separate { out, .. }),
120                RedirectionSource::Stdout | RedirectionSource::StdoutAndStderr,
121            ) => Err(ParseError::MultipleRedirections(
122                RedirectionSource::Stdout,
123                out.connector(),
124                target.connector(),
125            )),
126            (Some(LiteRedirection::Separate { err, .. }), RedirectionSource::Stderr) => {
127                Err(ParseError::MultipleRedirections(
128                    RedirectionSource::Stderr,
129                    err.connector(),
130                    target.connector(),
131                ))
132            }
133        }?;
134
135        self.redirection = Some(redirection);
136
137        Ok(())
138    }
139
140    pub fn parts_including_redirection(&self) -> impl Iterator<Item = Span> + '_ {
141        self.parts
142            .iter()
143            .copied()
144            .chain(
145                self.redirection
146                    .iter()
147                    .flat_map(|redirection| redirection.spans()),
148            )
149            .sorted_unstable_by_key(|a| (a.start, a.end))
150    }
151
152    pub fn command_parts(&self) -> &[Span] {
153        let command_start = self.attribute_idx.last().copied().unwrap_or(0);
154        &self.parts[command_start..]
155    }
156
157    pub fn has_attributes(&self) -> bool {
158        !self.attribute_idx.is_empty()
159    }
160
161    pub fn attribute_commands(&'_ self) -> impl Iterator<Item = LiteCommand> + '_ {
162        std::iter::once(0)
163            .chain(self.attribute_idx.iter().copied())
164            .tuple_windows()
165            .map(|(s, e)| LiteCommand {
166                parts: self.parts[s..e].to_owned(),
167                ..Default::default()
168            })
169    }
170}
171
172#[derive(Debug, Clone, Default)]
173pub struct LitePipeline {
174    pub commands: Vec<LiteCommand>,
175}
176
177impl LitePipeline {
178    fn push(&mut self, element: &mut LiteCommand) {
179        if !element.parts.is_empty() || element.redirection.is_some() {
180            self.commands.push(mem::take(element));
181        }
182    }
183}
184
185#[derive(Debug, Clone, Default)]
186pub struct LiteBlock {
187    pub block: Vec<LitePipeline>,
188}
189
190impl LiteBlock {
191    fn push(&mut self, pipeline: &mut LitePipeline) {
192        if !pipeline.commands.is_empty() {
193            self.block.push(mem::take(pipeline));
194        }
195    }
196}
197
198fn last_non_comment_token(tokens: &[Token], cur_idx: usize) -> Option<TokenContents> {
199    let mut expect = TokenContents::Comment;
200    for token in tokens.iter().take(cur_idx).rev() {
201        // skip ([Comment]+ [Eol]) pair
202        match (token.contents, expect) {
203            (TokenContents::Comment, TokenContents::Comment)
204            | (TokenContents::Comment, TokenContents::Eol) => expect = TokenContents::Eol,
205            (TokenContents::Eol, TokenContents::Eol) => expect = TokenContents::Comment,
206            (token, _) => return Some(token),
207        }
208    }
209    None
210}
211
212#[derive(PartialEq, Eq)]
213enum Mode {
214    Assignment,
215    Attribute,
216    Normal,
217}
218
219pub fn lite_parse(
220    tokens: &[Token],
221    working_set: &StateWorkingSet,
222) -> (LiteBlock, Option<ParseError>) {
223    if tokens.is_empty() {
224        return (LiteBlock::default(), None);
225    }
226
227    let mut block = LiteBlock::default();
228    let mut pipeline = LitePipeline::default();
229    let mut command = LiteCommand::default();
230
231    let mut last_token = TokenContents::Eol;
232    let mut file_redirection = None;
233    let mut curr_comment: Option<Vec<Span>> = None;
234    let mut mode = Mode::Normal;
235    let mut error = None;
236
237    for (idx, token) in tokens.iter().enumerate() {
238        match mode {
239            Mode::Attribute => {
240                match &token.contents {
241                    // Consume until semicolon or terminating EOL. Attributes can't contain pipelines or redirections.
242                    TokenContents::Eol | TokenContents::Semicolon => {
243                        command.attribute_idx.push(command.parts.len());
244                        mode = Mode::Normal;
245                        if matches!(last_token, TokenContents::Eol | TokenContents::Semicolon) {
246                            // Clear out the comment as we're entering a new comment
247                            curr_comment = None;
248                            pipeline.push(&mut command);
249                            block.push(&mut pipeline);
250                        }
251                    }
252                    TokenContents::Comment => {
253                        command.comments.push(token.span);
254                        curr_comment = None;
255                    }
256                    _ => command.push(token.span),
257                }
258            }
259            Mode::Assignment => {
260                match &token.contents {
261                    // Consume until semicolon or terminating EOL. Assignments absorb pipelines and
262                    // redirections.
263                    TokenContents::Eol => {
264                        // Handle `[Command] [Pipe] ([Comment] | [Eol])+ [Command]`
265                        //
266                        // `[Eol]` branch checks if previous token is `[Pipe]` to construct pipeline
267                        // and so `[Comment] | [Eol]` should be ignore to make it work
268                        let actual_token = last_non_comment_token(tokens, idx);
269                        if actual_token != Some(TokenContents::Pipe) {
270                            mode = Mode::Normal;
271                            pipeline.push(&mut command);
272                            block.push(&mut pipeline);
273                        }
274
275                        if last_token == TokenContents::Eol {
276                            // Clear out the comment as we're entering a new comment
277                            curr_comment = None;
278                        }
279                    }
280                    TokenContents::Semicolon => {
281                        mode = Mode::Normal;
282                        pipeline.push(&mut command);
283                        block.push(&mut pipeline);
284                    }
285                    TokenContents::Comment => {
286                        command.comments.push(token.span);
287                        curr_comment = None;
288                    }
289                    _ => command.push(token.span),
290                }
291            }
292            Mode::Normal => {
293                if let Some((source, append, span)) = file_redirection.take() {
294                    match &token.contents {
295                        TokenContents::PipePipe => {
296                            error = error.or(Some(ParseError::ShellOrOr(token.span)));
297                            command.push(span);
298                            command.push(token.span);
299                        }
300                        TokenContents::Item => {
301                            let target = LiteRedirectionTarget::File {
302                                connector: span,
303                                file: token.span,
304                                append,
305                            };
306                            if let Err(err) = command.try_add_redirection(source, target) {
307                                error = error.or(Some(err));
308                                command.push(span);
309                                command.push(token.span)
310                            }
311                        }
312                        TokenContents::AssignmentOperator => {
313                            error = error
314                                .or(Some(ParseError::Expected("redirection target", token.span)));
315                            command.push(span);
316                            command.push(token.span);
317                        }
318                        TokenContents::OutGreaterThan
319                        | TokenContents::OutGreaterGreaterThan
320                        | TokenContents::ErrGreaterThan
321                        | TokenContents::ErrGreaterGreaterThan
322                        | TokenContents::OutErrGreaterThan
323                        | TokenContents::OutErrGreaterGreaterThan => {
324                            error = error
325                                .or(Some(ParseError::Expected("redirection target", token.span)));
326                            command.push(span);
327                            command.push(token.span);
328                        }
329                        TokenContents::Pipe
330                        | TokenContents::ErrGreaterPipe
331                        | TokenContents::OutErrGreaterPipe => {
332                            error = error
333                                .or(Some(ParseError::Expected("redirection target", token.span)));
334                            command.push(span);
335                            pipeline.push(&mut command);
336                            command.pipe = Some(token.span);
337                        }
338                        TokenContents::Eol => {
339                            error = error
340                                .or(Some(ParseError::Expected("redirection target", token.span)));
341                            command.push(span);
342                            pipeline.push(&mut command);
343                        }
344                        TokenContents::Semicolon => {
345                            error = error
346                                .or(Some(ParseError::Expected("redirection target", token.span)));
347                            command.push(span);
348                            pipeline.push(&mut command);
349                            block.push(&mut pipeline);
350                        }
351                        TokenContents::Comment => {
352                            error =
353                                error.or(Some(ParseError::Expected("redirection target", span)));
354                            command.push(span);
355                            command.comments.push(token.span);
356                            curr_comment = None;
357                        }
358                    }
359                } else {
360                    match &token.contents {
361                        TokenContents::PipePipe => {
362                            error = error.or(Some(ParseError::ShellOrOr(token.span)));
363                            command.push(token.span);
364                        }
365                        TokenContents::Item => {
366                            // FIXME: This is commented out to preserve old parser behavior,
367                            // but we should probably error here.
368                            //
369                            // if element.redirection.is_some() {
370                            //     error = error.or(Some(ParseError::LabeledError(
371                            //         "Unexpected positional".into(),
372                            //         "cannot add positional arguments after output redirection".into(),
373                            //         token.span,
374                            //     )));
375                            // }
376                            //
377                            // For example, this is currently allowed: ^echo thing o> out.txt extra_arg
378
379                            if working_set.get_span_contents(token.span).starts_with(b"@") {
380                                if matches!(
381                                    last_token,
382                                    TokenContents::Eol | TokenContents::Semicolon
383                                ) {
384                                    mode = Mode::Attribute;
385                                }
386                                command.push(token.span);
387                            } else {
388                                // If we have a comment, go ahead and attach it
389                                if let Some(curr_comment) = curr_comment.take() {
390                                    command.comments = curr_comment;
391                                }
392                                command.push(token.span);
393                            }
394                        }
395                        TokenContents::AssignmentOperator => {
396                            // When in assignment mode, we'll just consume pipes or redirections as part of
397                            // the command.
398                            mode = Mode::Assignment;
399                            if let Some(curr_comment) = curr_comment.take() {
400                                command.comments = curr_comment;
401                            }
402                            command.push(token.span);
403                        }
404                        TokenContents::OutGreaterThan => {
405                            error = error.or(command.check_accepts_redirection(token.span));
406                            file_redirection = Some((RedirectionSource::Stdout, false, token.span));
407                        }
408                        TokenContents::OutGreaterGreaterThan => {
409                            error = error.or(command.check_accepts_redirection(token.span));
410                            file_redirection = Some((RedirectionSource::Stdout, true, token.span));
411                        }
412                        TokenContents::ErrGreaterThan => {
413                            error = error.or(command.check_accepts_redirection(token.span));
414                            file_redirection = Some((RedirectionSource::Stderr, false, token.span));
415                        }
416                        TokenContents::ErrGreaterGreaterThan => {
417                            error = error.or(command.check_accepts_redirection(token.span));
418                            file_redirection = Some((RedirectionSource::Stderr, true, token.span));
419                        }
420                        TokenContents::OutErrGreaterThan => {
421                            error = error.or(command.check_accepts_redirection(token.span));
422                            file_redirection =
423                                Some((RedirectionSource::StdoutAndStderr, false, token.span));
424                        }
425                        TokenContents::OutErrGreaterGreaterThan => {
426                            error = error.or(command.check_accepts_redirection(token.span));
427                            file_redirection =
428                                Some((RedirectionSource::StdoutAndStderr, true, token.span));
429                        }
430                        TokenContents::ErrGreaterPipe => {
431                            let target = LiteRedirectionTarget::Pipe {
432                                connector: token.span,
433                            };
434                            if let Err(err) =
435                                command.try_add_redirection(RedirectionSource::Stderr, target)
436                            {
437                                error = error.or(Some(err));
438                            }
439                            pipeline.push(&mut command);
440                            command.pipe = Some(token.span);
441                        }
442                        TokenContents::OutErrGreaterPipe => {
443                            let target = LiteRedirectionTarget::Pipe {
444                                connector: token.span,
445                            };
446                            if let Err(err) = command
447                                .try_add_redirection(RedirectionSource::StdoutAndStderr, target)
448                            {
449                                error = error.or(Some(err));
450                            }
451                            pipeline.push(&mut command);
452                            command.pipe = Some(token.span);
453                        }
454                        TokenContents::Pipe => {
455                            pipeline.push(&mut command);
456                            command.pipe = Some(token.span);
457                        }
458                        TokenContents::Eol => {
459                            // Handle `[Command] [Pipe] ([Comment] | [Eol])+ [Command]`
460                            //
461                            // `[Eol]` branch checks if previous token is `[Pipe]` to construct pipeline
462                            // and so `[Comment] | [Eol]` should be ignore to make it work
463                            let actual_token = last_non_comment_token(tokens, idx);
464                            if actual_token != Some(TokenContents::Pipe) {
465                                pipeline.push(&mut command);
466                                block.push(&mut pipeline);
467                            }
468
469                            if last_token == TokenContents::Eol {
470                                // Clear out the comment as we're entering a new comment
471                                curr_comment = None;
472                            }
473                        }
474                        TokenContents::Semicolon => {
475                            pipeline.push(&mut command);
476                            block.push(&mut pipeline);
477                        }
478                        TokenContents::Comment => {
479                            // Comment is beside something
480                            if last_token != TokenContents::Eol {
481                                command.comments.push(token.span);
482                                curr_comment = None;
483                            } else {
484                                // Comment precedes something
485                                if let Some(curr_comment) = &mut curr_comment {
486                                    curr_comment.push(token.span);
487                                } else {
488                                    curr_comment = Some(vec![token.span]);
489                                }
490                            }
491                        }
492                    }
493                }
494            }
495        }
496
497        last_token = token.contents;
498    }
499
500    if let Some((_, _, span)) = file_redirection {
501        command.push(span);
502        error = error.or(Some(ParseError::Expected("redirection target", span)));
503    }
504
505    if let Mode::Attribute = mode {
506        command.attribute_idx.push(command.parts.len());
507    }
508
509    pipeline.push(&mut command);
510    block.push(&mut pipeline);
511
512    if last_non_comment_token(tokens, tokens.len()) == Some(TokenContents::Pipe) {
513        (
514            block,
515            Some(ParseError::UnexpectedEof(
516                "pipeline missing end".into(),
517                tokens[tokens.len() - 1].span,
518            )),
519        )
520    } else {
521        (block, error)
522    }
523}