Skip to main content

nu_parser/
parser.rs

1#![allow(clippy::byte_char_slices)]
2
3use crate::{
4    Token, TokenContents,
5    lex::{LexState, is_assignment_operator, lex, lex_n_tokens, lex_signature},
6    lite_parser::{LiteCommand, LitePipeline, LiteRedirection, LiteRedirectionTarget, lite_parse},
7    parse_keywords::*,
8    parse_patterns::parse_pattern,
9    parse_shape_specs::{parse_completer, parse_shape_name, parse_type},
10    type_check::{self, check_range_types, math_result_type, type_compatible},
11};
12use itertools::Itertools;
13use log::trace;
14use nu_engine::DIR_VAR_PARSER_INFO;
15use nu_protocol::{
16    BlockId, DeclId, DidYouMean, ENV_VARIABLE_ID, FilesizeUnit, Flag, IN_VARIABLE_ID, ParseError,
17    PositionalArg, ShellError, Signature, Span, Spanned, SyntaxShape, Type, Value, VarId,
18    ast::*,
19    casing::Casing,
20    did_you_mean,
21    engine::{CommandType, StateWorkingSet},
22    eval_const::eval_constant,
23};
24use std::{
25    collections::{HashMap, HashSet},
26    str,
27    sync::Arc,
28};
29
30pub(crate) const PERCENT_FORCED_BUILTIN_PARSER_INFO: &str = "percent_forced_builtin";
31
32pub fn garbage(working_set: &mut StateWorkingSet, span: Span) -> Expression {
33    Expression::garbage(working_set, span)
34}
35
36pub fn garbage_pipeline(working_set: &mut StateWorkingSet, spans: &[Span]) -> Pipeline {
37    Pipeline::from_vec(vec![garbage(working_set, Span::concat(spans))])
38}
39
40fn is_identifier_byte(b: &u8) -> bool {
41    !b".[({+-*^%/=!<>&|".contains(b)
42}
43
44pub fn is_math_expression_like(working_set: &mut StateWorkingSet, span: Span) -> bool {
45    let bytes = working_set.get_span_contents(span);
46    match bytes {
47        [] => return false,
48        b"true" | b"false" | b"null" | b"not" | b"if" | b"match" => return true,
49        [b'r', b'#', ..] => return true,
50        [b'(' | b'{' | b'[' | b'$' | b'"' | b'\'' | b'-', ..] => return true,
51        _ => {}
52    }
53
54    let starting_error_count = working_set.parse_errors.len();
55
56    // Number
57    parse_number(working_set, span);
58    if working_set.parse_errors.len() == starting_error_count {
59        return true;
60    }
61    working_set.parse_errors.truncate(starting_error_count);
62
63    // Filesize
64    parse_filesize(working_set, span);
65    if working_set.parse_errors.len() == starting_error_count {
66        return true;
67    }
68    working_set.parse_errors.truncate(starting_error_count);
69
70    parse_duration(working_set, span);
71    if working_set.parse_errors.len() == starting_error_count {
72        return true;
73    }
74    working_set.parse_errors.truncate(starting_error_count);
75
76    parse_datetime(working_set, span);
77    if working_set.parse_errors.len() == starting_error_count {
78        return true;
79    }
80    working_set.parse_errors.truncate(starting_error_count);
81
82    parse_binary(working_set, span);
83    // We need an additional negate match to check if the last error was unexpected
84    // or more specifically, if it was `ParseError::InvalidBinaryString`.
85    // If so, we suppress the error and stop parsing to the next (which is `parse_range()`).
86    if working_set.parse_errors.len() == starting_error_count {
87        return true;
88    } else if !matches!(
89        working_set.parse_errors.last(),
90        Some(ParseError::Expected(_, _))
91    ) {
92        working_set.parse_errors.truncate(starting_error_count);
93        return true;
94    }
95    working_set.parse_errors.truncate(starting_error_count);
96
97    let is_range = parse_range(working_set, span).is_some();
98    working_set.parse_errors.truncate(starting_error_count);
99    is_range
100}
101
102fn is_env_variable_name(bytes: &[u8]) -> bool {
103    match bytes {
104        [first, rest @ ..] if first == &b'_' || first.is_ascii_alphabetic() => {
105            rest.iter().all(|&b| b.is_ascii_alphanumeric() || b == b'_')
106        }
107        _ => false,
108    }
109}
110
111fn is_identifier(bytes: &[u8]) -> bool {
112    bytes.iter().all(is_identifier_byte)
113}
114
115pub fn is_variable(bytes: &[u8]) -> bool {
116    match bytes {
117        [b'$', var @ ..] | var if !var.is_empty() => is_identifier(var),
118        _ => false,
119    }
120}
121
122#[rustfmt::skip]
123pub fn trim_quotes(bytes: &[u8]) -> &[u8] {
124    match bytes {
125          [b'\'', trimmed @ .., b'\'']
126        | [ b'"', trimmed @ ..,  b'"']
127        | [ b'`', trimmed @ ..,  b'`'] => trimmed,
128        not_trimmed => not_trimmed,
129    }
130}
131
132#[rustfmt::skip]
133pub fn trim_quotes_str(s: &str) -> &str {
134    match s.as_bytes() {
135          [b'\'', .., b'\'']
136        | [ b'"', ..,  b'"']
137        | [ b'`', ..,  b'`'] => &s[1..(s.len() - 1)],
138        _ => s,
139    }
140}
141
142/// Return type of `check_call`
143#[derive(Debug, PartialEq, Eq)]
144pub enum CallKind {
145    Help,
146    Valid,
147    Invalid,
148}
149
150pub(crate) fn check_call(
151    working_set: &mut StateWorkingSet,
152    command: Span,
153    sig: &Signature,
154    call: &Call,
155) -> CallKind {
156    // Allow the call to pass if they pass in the help flag
157    if call.named_iter().any(|(n, _, _)| n.item == "help") {
158        return CallKind::Help;
159    }
160
161    if call.positional_iter().count() < sig.required_positional.len() {
162        let end_offset = call
163            .positional_iter()
164            .last()
165            .map(|last| last.span.end)
166            .unwrap_or(command.end);
167        // Comparing the types of all signature positional arguments against the parsed
168        // expressions found in the call. If one type is not found then it could be assumed
169        // that positional argument is missing from the parsed call
170        for argument in &sig.required_positional {
171            let found = call.positional_iter().fold(false, |ac, expr| {
172                if argument.shape.to_type() == expr.ty || argument.shape == SyntaxShape::Any {
173                    true
174                } else {
175                    ac
176                }
177            });
178            if !found {
179                working_set.error(ParseError::MissingPositional(
180                    argument.name.clone(),
181                    Span::new(end_offset, end_offset),
182                    sig.call_signature(),
183                ));
184                return CallKind::Invalid;
185            }
186        }
187
188        let missing = &sig.required_positional[call.positional_iter().count()];
189        working_set.error(ParseError::MissingPositional(
190            missing.name.clone(),
191            Span::new(end_offset, end_offset),
192            sig.call_signature(),
193        ));
194        return CallKind::Invalid;
195    } else {
196        for req_flag in sig.named.iter().filter(|x| x.required) {
197            if call.named_iter().all(|(n, _, _)| n.item != req_flag.long) {
198                working_set.error(ParseError::MissingRequiredFlag(
199                    req_flag.long.clone(),
200                    command,
201                ));
202                return CallKind::Invalid;
203            }
204        }
205    }
206    CallKind::Valid
207}
208
209/// Parses an unknown argument for the given signature. This handles the parsing as appropriate to
210/// the rest type of the command.
211fn parse_unknown_arg(
212    working_set: &mut StateWorkingSet,
213    span: Span,
214    signature: &Signature,
215) -> Expression {
216    let shape = signature
217        .rest_positional
218        .as_ref()
219        .map(|arg| arg.shape.clone())
220        .unwrap_or(SyntaxShape::Any);
221
222    parse_value(working_set, span, &shape)
223}
224
225/// Parses a string in the arg or head position of an external call.
226///
227/// If the string begins with `r#`, it is parsed as a raw string. If it doesn't contain any quotes
228/// or parentheses, it is parsed as a glob pattern so that tilde and glob expansion can be handled
229/// by `run-external`. Otherwise, we use a custom state machine to put together an interpolated
230/// string, where each balanced pair of quotes is parsed as a separate part of the string, and then
231/// concatenated together.
232///
233/// For example, `-foo="bar\nbaz"` becomes `$"-foo=bar\nbaz"`
234fn parse_external_string(working_set: &mut StateWorkingSet, span: Span) -> Expression {
235    let contents = working_set.get_span_contents(span);
236
237    if contents.starts_with(b"r#") {
238        parse_raw_string(working_set, span)
239    } else if contents
240        .iter()
241        .any(|b| matches!(b, b'"' | b'\'' | b'(' | b')' | b'`'))
242    {
243        enum State {
244            Bare {
245                from: usize,
246            },
247            BackTickQuote {
248                from: usize,
249            },
250            Quote {
251                from: usize,
252                quote_char: u8,
253                escaped: bool,
254            },
255            Parenthesized {
256                from: usize,
257                depth: usize,
258            },
259        }
260        // Find the spans of parts of the string that can be parsed as their own strings for
261        // concatenation.
262        //
263        // By passing each of these parts to `parse_string()`, we can eliminate the quotes and also
264        // handle string interpolation.
265        let make_span = |from: usize, index: usize| Span {
266            start: span.start + from,
267            end: span.start + index,
268        };
269        let mut spans = vec![];
270        let mut state = State::Bare { from: 0 };
271        let mut index = 0;
272        while index < contents.len() {
273            let ch = contents[index];
274            match &mut state {
275                State::Bare { from } => match ch {
276                    b'"' | b'\'' => {
277                        // Push bare string
278                        if index != *from {
279                            spans.push(make_span(*from, index));
280                        }
281                        // then transition to other state
282                        state = State::Quote {
283                            from: index,
284                            quote_char: ch,
285                            escaped: false,
286                        };
287                    }
288                    b'$' => {
289                        if let Some(&quote_char @ (b'"' | b'\'')) = contents.get(index + 1) {
290                            // Start a dollar quote (interpolated string)
291                            if index != *from {
292                                spans.push(make_span(*from, index));
293                            }
294                            state = State::Quote {
295                                from: index,
296                                quote_char,
297                                escaped: false,
298                            };
299                            // Skip over two chars (the dollar sign and the quote)
300                            index += 2;
301                            continue;
302                        }
303                    }
304                    b'`' => {
305                        if index != *from {
306                            spans.push(make_span(*from, index))
307                        }
308                        state = State::BackTickQuote { from: index }
309                    }
310                    b'(' => {
311                        if index != *from {
312                            spans.push(make_span(*from, index))
313                        }
314                        state = State::Parenthesized {
315                            from: index,
316                            depth: 1,
317                        }
318                    }
319                    // Continue to consume
320                    _ => (),
321                },
322                State::Quote {
323                    from,
324                    quote_char,
325                    escaped,
326                } => match ch {
327                    ch if ch == *quote_char && !*escaped => {
328                        // quoted string ended, just make a new span for it.
329                        spans.push(make_span(*from, index + 1));
330                        // go back to Bare state.
331                        state = State::Bare { from: index + 1 };
332                    }
333                    b'\\' if !*escaped && *quote_char == b'"' => {
334                        // The next token is escaped so it doesn't count (only for double quote)
335                        *escaped = true;
336                    }
337                    _ => {
338                        *escaped = false;
339                    }
340                },
341                State::BackTickQuote { from } => {
342                    if ch == b'`' {
343                        spans.push(make_span(*from, index + 1));
344                        state = State::Bare { from: index + 1 };
345                    }
346                }
347                State::Parenthesized { from, depth } => {
348                    if ch == b')' {
349                        if *depth == 1 {
350                            spans.push(make_span(*from, index + 1));
351                            state = State::Bare { from: index + 1 };
352                        } else {
353                            *depth -= 1;
354                        }
355                    } else if ch == b'(' {
356                        *depth += 1;
357                    }
358                }
359            }
360            index += 1;
361        }
362
363        // Add the final span
364        match state {
365            State::Bare { from }
366            | State::Quote { from, .. }
367            | State::Parenthesized { from, .. }
368            | State::BackTickQuote { from, .. } => {
369                if from < contents.len() {
370                    spans.push(make_span(from, contents.len()));
371                }
372            }
373        }
374
375        // Log the spans that will be parsed
376        if log::log_enabled!(log::Level::Trace) {
377            let contents = spans
378                .iter()
379                .map(|span| String::from_utf8_lossy(working_set.get_span_contents(*span)))
380                .collect::<Vec<_>>();
381
382            trace!("parsing: external string, parts: {contents:?}")
383        }
384
385        // Check if the whole thing is quoted. If not, it should be a glob
386        let quoted =
387            (contents.len() >= 3 && contents.starts_with(b"$\"") && contents.ends_with(b"\""))
388                || is_quoted(contents);
389
390        // Parse each as its own string
391        let exprs: Vec<Expression> = spans
392            .into_iter()
393            .map(|span| parse_string(working_set, span))
394            .collect();
395
396        if exprs
397            .iter()
398            .all(|expr| matches!(expr.expr, Expr::String(..)))
399        {
400            // If the exprs are all strings anyway, just collapse into a single string.
401            let string = exprs
402                .into_iter()
403                .map(|expr| {
404                    let Expr::String(contents) = expr.expr else {
405                        unreachable!("already checked that this was a String")
406                    };
407                    contents
408                })
409                .collect::<String>();
410            if quoted {
411                Expression::new(working_set, Expr::String(string), span, Type::String)
412            } else {
413                Expression::new(
414                    working_set,
415                    Expr::GlobPattern(string, false),
416                    span,
417                    Type::Glob,
418                )
419            }
420        } else {
421            // Flatten any string interpolations contained with the exprs.
422            let exprs = exprs
423                .into_iter()
424                .flat_map(|expr| match expr.expr {
425                    Expr::StringInterpolation(subexprs) => subexprs,
426                    _ => vec![expr],
427                })
428                .collect();
429            // Make an interpolation out of the expressions. Use `GlobInterpolation` if it's a bare
430            // word, so that the unquoted state can get passed through to `run-external`.
431            if quoted {
432                Expression::new(
433                    working_set,
434                    Expr::StringInterpolation(exprs),
435                    span,
436                    Type::String,
437                )
438            } else {
439                Expression::new(
440                    working_set,
441                    Expr::GlobInterpolation(exprs, false),
442                    span,
443                    Type::Glob,
444                )
445            }
446        }
447    } else {
448        parse_glob_pattern(working_set, span)
449    }
450}
451
452fn parse_external_arg(working_set: &mut StateWorkingSet, span: Span) -> ExternalArgument {
453    let contents = working_set.get_span_contents(span);
454
455    if contents.len() > 3
456        && contents.starts_with(b"...")
457        && (contents[3] == b'$' || contents[3] == b'[' || contents[3] == b'(')
458    {
459        ExternalArgument::Spread(parse_value(
460            working_set,
461            Span::new(span.start + 3, span.end),
462            &SyntaxShape::List(Box::new(SyntaxShape::Any)),
463        ))
464    } else {
465        ExternalArgument::Regular(parse_regular_external_arg(working_set, span))
466    }
467}
468
469fn parse_regular_external_arg(working_set: &mut StateWorkingSet, span: Span) -> Expression {
470    match working_set.get_span_contents(span) {
471        [b'$', ..] => parse_dollar_expr(working_set, span),
472        [b'(', ..] => parse_paren_expr(working_set, span, &SyntaxShape::Any),
473        [b'[', ..] => parse_list_expression(working_set, span, &SyntaxShape::Any),
474        _ => parse_external_string(working_set, span),
475    }
476}
477
478pub fn parse_external_call(
479    working_set: &mut StateWorkingSet,
480    spans: &[Span],
481    call_span: Span,
482) -> Expression {
483    trace!("parse external");
484
485    let head_span = spans[0];
486
487    let head_contents = working_set.get_span_contents(head_span);
488
489    let head = if let [b'$' | b'(', ..] = head_contents {
490        // the expression is inside external_call, so it's a subexpression
491        let arg = parse_expression(working_set, &[head_span]);
492        Box::new(arg)
493    } else {
494        Box::new(parse_external_string(working_set, head_span))
495    };
496
497    let args = spans[1..]
498        .iter()
499        .map(|&span| parse_external_arg(working_set, span))
500        .collect();
501
502    Expression::new(
503        working_set,
504        Expr::ExternalCall(head, args),
505        call_span,
506        Type::Any,
507    )
508}
509
510fn ensure_flag_arg_type(
511    working_set: &mut StateWorkingSet,
512    arg_name: String,
513    arg: Expression,
514    arg_shape: &SyntaxShape,
515    long_name_span: Span,
516) -> (Spanned<String>, Expression) {
517    if !type_compatible(&arg.ty, &arg_shape.to_type()) {
518        working_set.error(ParseError::TypeMismatch(
519            arg_shape.to_type(),
520            arg.ty,
521            arg.span,
522        ));
523        (
524            Spanned {
525                item: arg_name,
526                span: long_name_span,
527            },
528            Expression::garbage(working_set, arg.span),
529        )
530    } else {
531        (
532            Spanned {
533                item: arg_name,
534                span: long_name_span,
535            },
536            arg,
537        )
538    }
539}
540
541fn parse_long_flag(
542    working_set: &mut StateWorkingSet,
543    spans: &[Span],
544    spans_idx: &mut usize,
545    sig: &Signature,
546) -> (Option<Spanned<String>>, Option<Expression>) {
547    let arg_span = spans[*spans_idx];
548    let arg_contents = working_set.get_span_contents(arg_span);
549
550    if arg_contents.starts_with(b"--") {
551        // FIXME: only use the first flag you find?
552        let split: Vec<_> = arg_contents.split(|x| *x == b'=').collect();
553        let long_name = String::from_utf8(split[0].into());
554        if let Ok(long_name) = long_name {
555            let long_name = long_name[2..].to_string();
556            if let Some(flag) = sig.get_long_flag(&long_name) {
557                if let Some(arg_shape) = &flag.arg {
558                    if split.len() > 1 {
559                        // and we also have the argument
560                        let long_name_len = long_name.len();
561                        let mut span = arg_span;
562                        span.start += long_name_len + 3; //offset by long flag and '='
563
564                        let arg = parse_value(working_set, span, arg_shape);
565                        let (arg_name, val_expression) = ensure_flag_arg_type(
566                            working_set,
567                            long_name,
568                            arg,
569                            arg_shape,
570                            Span::new(arg_span.start, arg_span.start + long_name_len + 2),
571                        );
572                        (Some(arg_name), Some(val_expression))
573                    } else if let Some(arg) = spans.get(*spans_idx + 1) {
574                        let arg = parse_value(working_set, *arg, arg_shape);
575
576                        *spans_idx += 1;
577                        let (arg_name, val_expression) =
578                            ensure_flag_arg_type(working_set, long_name, arg, arg_shape, arg_span);
579                        (Some(arg_name), Some(val_expression))
580                    } else {
581                        working_set.error(ParseError::MissingFlagParam(
582                            arg_shape.to_string(),
583                            arg_span,
584                        ));
585                        // NOTE: still need to cover this incomplete flag in the final expression
586                        // see https://github.com/nushell/nushell/issues/16375
587                        (
588                            Some(Spanned {
589                                item: long_name,
590                                span: arg_span,
591                            }),
592                            None,
593                        )
594                    }
595                } else {
596                    // A flag with no argument
597                    // It can also takes a boolean value like --x=true
598                    if split.len() > 1 {
599                        // and we also have the argument
600                        let long_name_len = long_name.len();
601                        let mut span = arg_span;
602                        span.start += long_name_len + 3; //offset by long flag and '='
603
604                        let arg = parse_value(working_set, span, &SyntaxShape::Boolean);
605
606                        let (arg_name, val_expression) = ensure_flag_arg_type(
607                            working_set,
608                            long_name,
609                            arg,
610                            &SyntaxShape::Boolean,
611                            Span::new(arg_span.start, arg_span.start + long_name_len + 2),
612                        );
613                        (Some(arg_name), Some(val_expression))
614                    } else {
615                        (
616                            Some(Spanned {
617                                item: long_name,
618                                span: arg_span,
619                            }),
620                            None,
621                        )
622                    }
623                }
624            } else {
625                let suggestion = did_you_mean(sig.get_names(), &long_name)
626                    .map(|name| format!("Did you mean: `--{name}`?"))
627                    .unwrap_or("Use `--help` to see available flags".to_owned());
628                working_set.error(ParseError::UnknownFlag(
629                    sig.name.clone(),
630                    long_name.clone(),
631                    arg_span,
632                    suggestion,
633                ));
634                (
635                    Some(Spanned {
636                        item: long_name.clone(),
637                        span: arg_span,
638                    }),
639                    None,
640                )
641            }
642        } else {
643            working_set.error(ParseError::NonUtf8(arg_span));
644            (
645                Some(Spanned {
646                    item: "--".into(),
647                    span: arg_span,
648                }),
649                None,
650            )
651        }
652    } else {
653        (None, None)
654    }
655}
656
657fn parse_short_flags(
658    working_set: &mut StateWorkingSet,
659    spans: &[Span],
660    spans_idx: &mut usize,
661    positional_idx: usize,
662    sig: &Signature,
663) -> Option<Vec<Flag>> {
664    let arg_span = spans[*spans_idx];
665
666    let arg_contents = working_set.get_span_contents(arg_span);
667
668    if let Ok(arg_contents_uft8_ref) = str::from_utf8(arg_contents) {
669        if arg_contents_uft8_ref.starts_with('-') && arg_contents_uft8_ref.len() > 1 {
670            let short_flags = &arg_contents_uft8_ref[1..];
671            let num_chars = short_flags.chars().count();
672            let mut found_short_flags = vec![];
673            let mut unmatched_short_flags = vec![];
674            for (offset, short_flag) in short_flags.char_indices() {
675                let short_flag_span = Span::new(
676                    arg_span.start + 1 + offset,
677                    arg_span.start + 1 + offset + short_flag.len_utf8(),
678                );
679                if let Some(flag) = sig.get_short_flag(short_flag) {
680                    // Allow args in short flag batches as long as it is the last flag.
681                    if flag.arg.is_some() && offset < num_chars - 1 {
682                        working_set
683                            .error(ParseError::OnlyLastFlagInBatchCanTakeArg(short_flag_span));
684                        break;
685                    }
686                    found_short_flags.push(flag);
687                } else {
688                    unmatched_short_flags.push(short_flag_span);
689                }
690            }
691
692            if found_short_flags.is_empty()
693                // check to see if we have a negative number
694                && matches!(
695                    sig.get_positional(positional_idx),
696                    Some(PositionalArg {
697                        shape: SyntaxShape::Int | SyntaxShape::Number | SyntaxShape::Float,
698                        ..
699                    })
700                )
701                && String::from_utf8_lossy(working_set.get_span_contents(arg_span))
702                    .parse::<f64>()
703                    .is_ok()
704            {
705                return None;
706            } else if let Some(first) = unmatched_short_flags.first() {
707                let contents = working_set.get_span_contents(*first);
708                working_set.error(ParseError::UnknownFlag(
709                    sig.name.clone(),
710                    format!("-{}", String::from_utf8_lossy(contents)),
711                    *first,
712                    "Use `--help` to see available flags".to_owned(),
713                ));
714            }
715
716            Some(found_short_flags)
717        } else {
718            None
719        }
720    } else {
721        working_set.error(ParseError::NonUtf8(arg_span));
722        None
723    }
724}
725
726fn first_kw_idx(
727    working_set: &StateWorkingSet,
728    signature: &Signature,
729    spans: &[Span],
730    spans_idx: usize,
731    positional_idx: usize,
732) -> (Option<usize>, usize) {
733    for idx in (positional_idx + 1)..signature.num_positionals() {
734        if let Some(PositionalArg {
735            shape: SyntaxShape::Keyword(kw, ..),
736            ..
737        }) = signature.get_positional(idx)
738        {
739            for (span_idx, &span) in spans.iter().enumerate().skip(spans_idx) {
740                let contents = working_set.get_span_contents(span);
741
742                if contents == kw {
743                    return (Some(idx), span_idx);
744                }
745            }
746        }
747    }
748    (None, spans.len())
749}
750
751fn calculate_end_span(
752    working_set: &StateWorkingSet,
753    signature: &Signature,
754    spans: &[Span],
755    spans_idx: usize,
756    positional_idx: usize,
757) -> usize {
758    if signature.rest_positional.is_some() {
759        spans.len()
760    } else {
761        let (kw_pos, kw_idx) =
762            first_kw_idx(working_set, signature, spans, spans_idx, positional_idx);
763
764        if let Some(kw_pos) = kw_pos {
765            // We found a keyword. Keywords, once found, create a guidepost to
766            // show us where the positionals will lay into the arguments. Because they're
767            // keywords, they get to set this by being present
768
769            let positionals_between = kw_pos - positional_idx - 1;
770            if positionals_between >= (kw_idx - spans_idx) {
771                kw_idx
772            } else {
773                kw_idx - positionals_between
774            }
775        } else {
776            // Make space for the remaining require positionals, if we can
777            // spans_idx < spans.len() is an invariant
778            let remaining_spans = spans.len() - (spans_idx + 1);
779            // positional_idx can be larger than required_positional.len() if we have optional args
780            let remaining_positional = signature
781                .required_positional
782                .len()
783                .saturating_sub(positional_idx + 1);
784            // Saturates to 0 when we have too few args
785            let extra_spans = remaining_spans.saturating_sub(remaining_positional);
786            spans_idx + 1 + extra_spans
787        }
788    }
789}
790
791fn parse_oneof(
792    working_set: &mut StateWorkingSet,
793    spans: &[Span],
794    spans_idx: &mut usize,
795    possible_shapes: &Vec<SyntaxShape>,
796    multispan: bool,
797) -> Expression {
798    let starting_spans_idx = *spans_idx;
799    let mut best_guess = None;
800    let mut best_guess_errors = Vec::new();
801    let mut max_first_error_offset = 0;
802    let mut propagate_error = false;
803    for shape in possible_shapes {
804        let starting_error_count = working_set.parse_errors.len();
805        *spans_idx = starting_spans_idx;
806        let value = match multispan {
807            true => parse_multispan_value(working_set, spans, spans_idx, shape),
808            false => parse_value(working_set, spans[*spans_idx], shape),
809        };
810
811        let new_errors = working_set.parse_errors[starting_error_count..].to_vec();
812        // no new errors found means success
813        let Some(first_error_offset) = new_errors.iter().map(|e| e.span().start).min() else {
814            return value;
815        };
816
817        if first_error_offset > max_first_error_offset {
818            // while trying the possible shapes, ignore Expected type errors
819            // unless they're inside a block, closure, or expression
820            propagate_error = match working_set.parse_errors.last() {
821                Some(ParseError::Expected(_, error_span))
822                | Some(ParseError::ExpectedWithStringMsg(_, error_span)) => {
823                    matches!(
824                        shape,
825                        SyntaxShape::Block | SyntaxShape::Closure(_) | SyntaxShape::Expression
826                    ) && *error_span != spans[*spans_idx]
827                }
828                _ => true,
829            };
830            max_first_error_offset = first_error_offset;
831            best_guess = Some(value);
832            best_guess_errors = new_errors;
833        }
834        working_set.parse_errors.truncate(starting_error_count);
835    }
836
837    // if best_guess results in new errors further than current span, then accept it
838    // or propagate_error is marked as true for it
839    if max_first_error_offset > spans[starting_spans_idx].start || propagate_error {
840        working_set.parse_errors.extend(best_guess_errors);
841        best_guess.expect("best_guess should not be None here!")
842    } else {
843        working_set.error(ParseError::ExpectedWithStringMsg(
844            format!("one of a list of accepted shapes: {possible_shapes:?}"),
845            spans[starting_spans_idx],
846        ));
847        Expression::garbage(working_set, spans[starting_spans_idx])
848    }
849}
850
851pub fn parse_multispan_value(
852    working_set: &mut StateWorkingSet,
853    spans: &[Span],
854    spans_idx: &mut usize,
855    shape: &SyntaxShape,
856) -> Expression {
857    trace!("parse multispan value");
858    match shape {
859        SyntaxShape::VarWithOptType => {
860            trace!("parsing: var with opt type");
861
862            parse_var_with_opt_type(working_set, spans, spans_idx, false).0
863        }
864        SyntaxShape::RowCondition => {
865            trace!("parsing: row condition");
866            let arg = parse_row_condition(working_set, &spans[*spans_idx..]);
867            *spans_idx = spans.len() - 1;
868
869            arg
870        }
871        SyntaxShape::MathExpression => {
872            trace!("parsing: math expression");
873
874            let arg = parse_math_expression(working_set, &spans[*spans_idx..], None);
875            *spans_idx = spans.len() - 1;
876
877            arg
878        }
879        SyntaxShape::OneOf(possible_shapes) => {
880            parse_oneof(working_set, spans, spans_idx, possible_shapes, true)
881        }
882
883        SyntaxShape::Expression => {
884            trace!("parsing: expression");
885
886            // is it subexpression?
887            // Not sure, but let's make it not, so the behavior is the same as previous version of nushell.
888            let arg = parse_expression(working_set, &spans[*spans_idx..]);
889            *spans_idx = spans.len().saturating_sub(1);
890
891            arg
892        }
893        SyntaxShape::Signature => {
894            trace!("parsing: signature");
895
896            let sig = parse_full_signature(working_set, &spans[*spans_idx..], false);
897            *spans_idx = spans.len().saturating_sub(1);
898
899            sig
900        }
901        SyntaxShape::ExternalSignature => {
902            trace!("parsing: external signature");
903
904            let sig = parse_full_signature(working_set, &spans[*spans_idx..], true);
905            *spans_idx = spans.len().saturating_sub(1);
906
907            sig
908        }
909        SyntaxShape::Keyword(keyword, arg) => {
910            trace!(
911                "parsing: keyword({}) {:?}",
912                String::from_utf8_lossy(keyword),
913                arg
914            );
915            let arg_span = spans[*spans_idx];
916
917            let arg_contents = working_set.get_span_contents(arg_span);
918
919            if arg_contents != keyword {
920                // When keywords mismatch, this is a strong indicator of something going wrong.
921                // We won't often override the current error, but as this is a strong indicator
922                // go ahead and override the current error and tell the user about the missing
923                // keyword/literal.
924                working_set.error(ParseError::ExpectedKeyword(
925                    String::from_utf8_lossy(keyword).into(),
926                    arg_span,
927                ))
928            }
929
930            *spans_idx += 1;
931            if *spans_idx >= spans.len() {
932                working_set.error(ParseError::KeywordMissingArgument(
933                    arg.to_string(),
934                    String::from_utf8_lossy(keyword).into(),
935                    Span::new(spans[*spans_idx - 1].end, spans[*spans_idx - 1].end),
936                ));
937                let keyword = Keyword {
938                    keyword: keyword.as_slice().into(),
939                    span: spans[*spans_idx - 1],
940                    expr: Expression::garbage(working_set, arg_span),
941                };
942                return Expression::new(
943                    working_set,
944                    Expr::Keyword(Box::new(keyword)),
945                    arg_span,
946                    Type::Any,
947                );
948            }
949
950            let keyword = Keyword {
951                keyword: keyword.as_slice().into(),
952                span: spans[*spans_idx - 1],
953                expr: parse_multispan_value(working_set, spans, spans_idx, arg),
954            };
955
956            Expression::new(
957                working_set,
958                Expr::Keyword(Box::new(keyword.clone())),
959                keyword.span.merge(keyword.expr.span),
960                keyword.expr.ty,
961            )
962        }
963        _ => {
964            // All other cases are single-span values
965            let arg_span = spans[*spans_idx];
966
967            parse_value(working_set, arg_span, shape)
968        }
969    }
970}
971
972pub struct ParsedInternalCall {
973    pub call: Box<Call>,
974    pub output: Type,
975    pub call_kind: CallKind,
976}
977
978/// Sometimes the arguments of an internal command need to be parsed in dedicated functions, e.g. `parse_module`.
979/// If so, `parse_internal_call` should be called with the appropriate parsing level to avoid repetition.
980///
981/// Defaults to `ArgumentParsingLevel::Full`
982#[derive(Default)]
983pub enum ArgumentParsingLevel {
984    #[default]
985    Full,
986    /// Parse only the first `k` arguments
987    FirstK { k: usize },
988}
989
990pub fn parse_internal_call(
991    working_set: &mut StateWorkingSet,
992    command_span: Span,
993    spans: &[Span],
994    decl_id: DeclId,
995    arg_parsing_level: ArgumentParsingLevel,
996) -> ParsedInternalCall {
997    trace!("parsing: internal call (decl id: {})", decl_id.get());
998
999    let mut call = Call::new(command_span);
1000    call.decl_id = decl_id;
1001    call.head = command_span;
1002    let _ = working_set.add_span(call.head);
1003
1004    let decl = working_set.get_decl(decl_id);
1005    let signature = working_set.get_signature(decl);
1006    let output = signature.get_output_type();
1007
1008    let deprecation = decl.deprecation_info();
1009
1010    // storing the var ID for later due to borrowing issues
1011    let lib_dirs_var_id = match decl.name() {
1012        "use" | "overlay use" | "source-env" if decl.is_keyword() => {
1013            find_dirs_var(working_set, LIB_DIRS_VAR)
1014        }
1015        "nu-check" if decl.is_builtin() => find_dirs_var(working_set, LIB_DIRS_VAR),
1016        _ => None,
1017    };
1018
1019    // The index into the positional parameter in the definition
1020    let mut positional_idx = 0;
1021
1022    // The index into the spans of argument data given to parse
1023    // Starting at the first argument
1024    let mut spans_idx = 0;
1025
1026    if let Some(alias) = decl.as_alias() {
1027        if let Expression {
1028            expr: Expr::Call(wrapped_call),
1029            ..
1030        } = &alias.wrapped_call
1031        {
1032            // Replace this command's call with the aliased call, but keep the alias name
1033            call = *wrapped_call.clone();
1034            call.head = command_span;
1035            // Skip positionals passed to aliased call
1036            positional_idx = call.positional_iter().count();
1037        } else {
1038            working_set.error(ParseError::UnknownState(
1039                "Alias does not point to internal call.".to_string(),
1040                command_span,
1041            ));
1042            return ParsedInternalCall {
1043                call: Box::new(call),
1044                output: Type::Any,
1045                call_kind: CallKind::Invalid,
1046            };
1047        }
1048    }
1049
1050    if let Some(var_id) = lib_dirs_var_id {
1051        call.set_parser_info(
1052            DIR_VAR_PARSER_INFO.to_owned(),
1053            Expression::new(working_set, Expr::Var(var_id), call.head, Type::Any),
1054        );
1055    }
1056
1057    if signature.creates_scope {
1058        working_set.enter_scope();
1059    }
1060
1061    while spans_idx < spans.len() {
1062        let arg_span = spans[spans_idx];
1063
1064        let starting_error_count = working_set.parse_errors.len();
1065        // Check if we're on a long flag, if so, parse
1066        let (long_name, arg) = parse_long_flag(working_set, spans, &mut spans_idx, &signature);
1067
1068        if let Some(long_name) = long_name {
1069            // We found a long flag, like --bar
1070            if working_set.parse_errors[starting_error_count..]
1071                .iter()
1072                .any(|x| matches!(x, ParseError::UnknownFlag(_, _, _, _)))
1073                && signature.allows_unknown_args
1074            {
1075                working_set.parse_errors.truncate(starting_error_count);
1076                let arg = parse_unknown_arg(working_set, arg_span, &signature);
1077
1078                call.add_unknown(arg);
1079            } else {
1080                call.add_named((long_name, None, arg));
1081            }
1082
1083            spans_idx += 1;
1084            continue;
1085        }
1086
1087        let starting_error_count = working_set.parse_errors.len();
1088
1089        // Check if we're on a short flag or group of short flags, if so, parse
1090        let short_flags = parse_short_flags(
1091            working_set,
1092            spans,
1093            &mut spans_idx,
1094            positional_idx,
1095            &signature,
1096        );
1097
1098        if let Some(mut short_flags) = short_flags {
1099            if short_flags.is_empty() {
1100                // workaround for completions (PR #6067)
1101                short_flags.push(Flag {
1102                    long: "".to_string(),
1103                    short: Some('a'),
1104                    arg: None,
1105                    required: false,
1106                    desc: "".to_string(),
1107                    var_id: None,
1108                    default_value: None,
1109                    completion: None,
1110                })
1111            }
1112
1113            if working_set.parse_errors[starting_error_count..]
1114                .iter()
1115                .any(|x| matches!(x, ParseError::UnknownFlag(_, _, _, _)))
1116                && signature.allows_unknown_args
1117            {
1118                working_set.parse_errors.truncate(starting_error_count);
1119                let arg = parse_unknown_arg(working_set, arg_span, &signature);
1120
1121                call.add_unknown(arg);
1122            } else {
1123                for flag in short_flags {
1124                    let _ = working_set.add_span(spans[spans_idx]);
1125
1126                    if let Some(arg_shape) = flag.arg {
1127                        if let Some(arg) = spans.get(spans_idx + 1) {
1128                            let arg = parse_value(working_set, *arg, &arg_shape);
1129                            let (arg_name, val_expression) = ensure_flag_arg_type(
1130                                working_set,
1131                                flag.long.clone(),
1132                                arg.clone(),
1133                                &arg_shape,
1134                                spans[spans_idx],
1135                            );
1136
1137                            if flag.long.is_empty() {
1138                                if let Some(short) = flag.short {
1139                                    call.add_named((
1140                                        arg_name,
1141                                        Some(Spanned {
1142                                            item: short.to_string(),
1143                                            span: spans[spans_idx],
1144                                        }),
1145                                        Some(val_expression),
1146                                    ));
1147                                }
1148                            } else {
1149                                call.add_named((arg_name, None, Some(val_expression)));
1150                            }
1151                            spans_idx += 1;
1152                        } else {
1153                            working_set.error(ParseError::MissingFlagParam(
1154                                arg_shape.to_string(),
1155                                arg_span,
1156                            ));
1157                            // NOTE: still need to cover this incomplete flag in the final expression
1158                            // see https://github.com/nushell/nushell/issues/16375
1159                            call.add_named((
1160                                Spanned {
1161                                    item: String::new(),
1162                                    span: spans[spans_idx],
1163                                },
1164                                None,
1165                                None,
1166                            ));
1167                        }
1168                    } else if flag.long.is_empty() {
1169                        if let Some(short) = flag.short {
1170                            call.add_named((
1171                                Spanned {
1172                                    item: String::new(),
1173                                    span: spans[spans_idx],
1174                                },
1175                                Some(Spanned {
1176                                    item: short.to_string(),
1177                                    span: spans[spans_idx],
1178                                }),
1179                                None,
1180                            ));
1181                        }
1182                    } else {
1183                        call.add_named((
1184                            Spanned {
1185                                item: flag.long.clone(),
1186                                span: spans[spans_idx],
1187                            },
1188                            None,
1189                            None,
1190                        ));
1191                    }
1192                }
1193            }
1194
1195            spans_idx += 1;
1196            continue;
1197        }
1198
1199        {
1200            let contents = working_set.get_span_contents(spans[spans_idx]);
1201
1202            if contents.len() > 3
1203                && contents.starts_with(b"...")
1204                && (contents[3] == b'$' || contents[3] == b'[' || contents[3] == b'(')
1205            {
1206                if signature.rest_positional.is_none() && !signature.allows_unknown_args {
1207                    working_set.error(ParseError::UnexpectedSpreadArg(
1208                        signature.call_signature(),
1209                        arg_span,
1210                    ));
1211                    call.add_positional(Expression::garbage(working_set, arg_span));
1212                } else if positional_idx < signature.required_positional.len() {
1213                    working_set.error(ParseError::MissingPositional(
1214                        signature.required_positional[positional_idx].name.clone(),
1215                        Span::new(spans[spans_idx].start, spans[spans_idx].start),
1216                        signature.call_signature(),
1217                    ));
1218                    call.add_positional(Expression::garbage(working_set, arg_span));
1219                } else {
1220                    let rest_shape = match &signature.rest_positional {
1221                        Some(arg) if matches!(arg.shape, SyntaxShape::ExternalArgument) => {
1222                            // External args aren't parsed inside lists in spread position.
1223                            SyntaxShape::Any
1224                        }
1225                        Some(arg) => arg.shape.clone(),
1226                        None => SyntaxShape::Any,
1227                    };
1228                    // Parse list of arguments to be spread
1229                    let args = parse_value(
1230                        working_set,
1231                        Span::new(arg_span.start + 3, arg_span.end),
1232                        &SyntaxShape::List(Box::new(rest_shape)),
1233                    );
1234
1235                    call.add_spread(args);
1236                    // Let the parser know that it's parsing rest arguments now
1237                    positional_idx =
1238                        signature.required_positional.len() + signature.optional_positional.len();
1239                }
1240
1241                spans_idx += 1;
1242                continue;
1243            }
1244        }
1245
1246        // Parse a positional arg if there is one
1247        if let Some(positional) = signature.get_positional(positional_idx) {
1248            let end = calculate_end_span(working_set, &signature, spans, spans_idx, positional_idx);
1249
1250            // Missing arguments before next keyword
1251            if end == spans_idx {
1252                let prev_span = if spans_idx == 0 {
1253                    command_span
1254                } else {
1255                    spans[spans_idx - 1]
1256                };
1257                let whitespace_span = Span::new(prev_span.end, spans[spans_idx].start);
1258                working_set.error(ParseError::MissingPositional(
1259                    positional.name.clone(),
1260                    whitespace_span,
1261                    signature.call_signature(),
1262                ));
1263                call.add_positional(Expression::garbage(working_set, whitespace_span));
1264                positional_idx += 1;
1265                continue;
1266            }
1267            debug_assert!(end <= spans.len());
1268
1269            if spans[..end].is_empty() || spans_idx == end {
1270                working_set.error(ParseError::MissingPositional(
1271                    positional.name.clone(),
1272                    Span::new(spans[spans_idx].end, spans[spans_idx].end),
1273                    signature.call_signature(),
1274                ));
1275                positional_idx += 1;
1276                continue;
1277            }
1278
1279            let compile_error_count = working_set.compile_errors.len();
1280
1281            // HACK: avoid repeated parsing of argument values in special cases
1282            // see https://github.com/nushell/nushell/issues/16398
1283            let arg = match arg_parsing_level {
1284                ArgumentParsingLevel::FirstK { k } if k <= positional_idx => {
1285                    Expression::garbage(working_set, spans[spans_idx])
1286                }
1287                _ => parse_multispan_value(
1288                    working_set,
1289                    &spans[..end],
1290                    &mut spans_idx,
1291                    &positional.shape,
1292                ),
1293            };
1294
1295            // HACK: try-catch's signature defines the catch block as a Closure, even though it's
1296            // used like a Block. Because closures are compiled eagerly, this ends up making the
1297            // following code technically invalid:
1298            // ```nu
1299            // loop { try { } catch {|e| break } }
1300            // ```
1301            // Thus, we discard the compilation error here
1302            if let SyntaxShape::OneOf(ref shapes) = positional.shape {
1303                for one_shape in shapes {
1304                    if let SyntaxShape::Keyword(keyword, ..) = one_shape
1305                        && keyword == b"catch"
1306                        && let [nu_protocol::CompileError::NotInALoop { .. }] =
1307                            &working_set.compile_errors[compile_error_count..]
1308                    {
1309                        working_set.compile_errors.truncate(compile_error_count);
1310                    }
1311                }
1312            }
1313
1314            let arg = if !type_compatible(&positional.shape.to_type(), &arg.ty) {
1315                working_set.error(ParseError::TypeMismatch(
1316                    positional.shape.to_type(),
1317                    arg.ty,
1318                    arg.span,
1319                ));
1320                Expression::garbage(working_set, arg.span)
1321            } else {
1322                arg
1323            };
1324
1325            call.add_positional(arg);
1326            positional_idx += 1;
1327        } else if signature.allows_unknown_args {
1328            let arg = parse_unknown_arg(working_set, arg_span, &signature);
1329
1330            call.add_unknown(arg);
1331        } else {
1332            call.add_positional(Expression::garbage(working_set, arg_span));
1333            working_set.error(ParseError::ExtraPositional(
1334                signature.call_signature(),
1335                arg_span,
1336            ))
1337        }
1338
1339        spans_idx += 1;
1340    }
1341
1342    // TODO: Inline `check_call`,
1343    // move missing positional checking into the while loop above with two pointers.
1344    // Maybe more `CallKind::Invalid` if errors found during argument parsing.
1345    let call_kind = check_call(working_set, command_span, &signature, &call);
1346
1347    deprecation
1348        .into_iter()
1349        .filter_map(|entry| entry.parse_warning(&signature.name, &call))
1350        .for_each(|warning| {
1351            // FIXME: if two flags are deprecated and both are used in one command,
1352            // the second flag's deprecation won't show until the first flag is removed
1353            // (but it won't be flagged as reported until it is actually reported)
1354            working_set.warning(warning);
1355        });
1356
1357    if signature.creates_scope {
1358        working_set.exit_scope();
1359    }
1360
1361    ParsedInternalCall {
1362        call: Box::new(call),
1363        output,
1364        call_kind,
1365    }
1366}
1367
1368pub fn parse_call(working_set: &mut StateWorkingSet, spans: &[Span], head: Span) -> Expression {
1369    trace!("parsing: call");
1370    let call_span = Span::concat(spans);
1371
1372    if spans.is_empty() {
1373        working_set.error(ParseError::UnknownState(
1374            "Encountered command with zero spans".into(),
1375            call_span,
1376        ));
1377        return garbage(working_set, head);
1378    }
1379
1380    let call_sigil = match working_set.get_span_contents(spans[0]).first() {
1381        Some(b'^') => Some(b'^'),
1382        Some(b'%') => Some(b'%'),
1383        _ => None,
1384    };
1385
1386    let mut adjusted_spans = Vec::new();
1387    let resolution_spans = match call_sigil {
1388        Some(b'^') | Some(b'%') => {
1389            adjusted_spans.reserve(spans.len());
1390            adjusted_spans.push(Span::new(spans[0].start + 1, spans[0].end));
1391            adjusted_spans.extend_from_slice(&spans[1..]);
1392            adjusted_spans.as_slice()
1393        }
1394        _ => spans,
1395    };
1396
1397    // `^` always forces external command parsing and must bypass declaration
1398    // resolution, even when an internal command with the same name exists.
1399    if call_sigil == Some(b'^') {
1400        trace!("parsing: forced external call");
1401        return parse_external_call(working_set, resolution_spans, call_span);
1402    }
1403
1404    // Check if we have a percent sigil with a dynamic head (variable or expression).
1405    // Supports two token layouts:
1406    //   - single token: `%$cmd` or `%($cmd)` — stripping `%` leaves `$cmd` / `($cmd)` in [0]
1407    //   - two tokens:   `%` and `($cmd)`    — stripping `%` leaves an empty span in [0]; head is [1]
1408    // If so, defer builtin validation to runtime (the IR compiler will rewrite to `run-internal`).
1409    if call_sigil == Some(b'%') && !resolution_spans.is_empty() {
1410        // Locate the actual head span, skipping an empty leading span.
1411        let (head_idx, head_span) = {
1412            let first = working_set.get_span_contents(resolution_spans[0]);
1413            if first.is_empty() && resolution_spans.len() > 1 {
1414                (1, resolution_spans[1])
1415            } else {
1416                (0, resolution_spans[0])
1417            }
1418        };
1419
1420        let dynamic_head_contents = working_set.get_span_contents(head_span);
1421        let is_dynamic_head = !dynamic_head_contents.is_empty()
1422            && (dynamic_head_contents[0] == b'$' || dynamic_head_contents[0] == b'(');
1423
1424        if is_dynamic_head {
1425            trace!("parsing: dynamic percent builtin dispatch");
1426
1427            let head_expr = parse_expression(working_set, &[head_span]);
1428
1429            // Create a placeholder call; the IR compiler will rewrite this to `run-internal`.
1430            let mut call = Call::new(call_span);
1431            call.decl_id = DeclId::new(0);
1432
1433            // Store the head expression for the IR compiler to pick up.
1434            call.set_parser_info(PERCENT_FORCED_BUILTIN_PARSER_INFO.to_string(), head_expr);
1435
1436            // Mirror the dynamic external-call path by preserving `...expr` as an explicit spread
1437            // argument so runtime dispatch can forward it without flattening first.
1438            for arg_span in resolution_spans.iter().skip(head_idx + 1) {
1439                let contents = working_set.get_span_contents(*arg_span);
1440                if contents.len() > 3
1441                    && contents.starts_with(b"...")
1442                    && (contents[3] == b'$' || contents[3] == b'[' || contents[3] == b'(')
1443                {
1444                    let spread_expr = parse_value(
1445                        working_set,
1446                        Span::new(arg_span.start + 3, arg_span.end),
1447                        &SyntaxShape::List(Box::new(SyntaxShape::Any)),
1448                    );
1449                    call.arguments.push(Argument::Spread(spread_expr));
1450                } else {
1451                    let arg_expr = parse_value(working_set, *arg_span, &SyntaxShape::Any);
1452                    call.arguments.push(Argument::Positional(arg_expr));
1453                }
1454            }
1455
1456            return Expression::new(
1457                working_set,
1458                Expr::Call(Box::new(call)),
1459                call_span,
1460                Type::Any,
1461            );
1462        }
1463    }
1464
1465    let (cmd_start, pos, _name, maybe_decl_id) = if call_sigil == Some(b'%') {
1466        find_longest_decl_with_command_type(working_set, resolution_spans, CommandType::Builtin)
1467    } else {
1468        find_longest_decl(working_set, resolution_spans)
1469    };
1470
1471    if let Some(decl_id) = maybe_decl_id {
1472        // Before the internal parsing we check if there is no let or alias declarations
1473        // that are missing their name, e.g.: let = 1 or alias = 2
1474        if resolution_spans.len() > 1 {
1475            let test_equal = working_set.get_span_contents(resolution_spans[1]);
1476
1477            if test_equal == [b'='] {
1478                trace!("incomplete statement");
1479
1480                working_set.error(ParseError::UnknownState(
1481                    "Incomplete statement".into(),
1482                    call_span,
1483                ));
1484                return garbage(working_set, call_span);
1485            }
1486        }
1487
1488        let decl = working_set.get_decl(decl_id);
1489
1490        let parsed_call = if let Some(alias) = decl.as_alias() {
1491            if let Expression {
1492                expr: Expr::ExternalCall(head, args),
1493                span: _,
1494                span_id: _,
1495                ty,
1496            } = &alias.clone().wrapped_call
1497            {
1498                trace!("parsing: alias of external call");
1499
1500                let mut head = head.clone();
1501                head.span = Span::concat(&resolution_spans[cmd_start..pos]); // replacing the spans preserves syntax highlighting
1502
1503                let mut final_args = args.clone().into_vec();
1504                for arg_span in &resolution_spans[pos..] {
1505                    let arg = parse_external_arg(working_set, *arg_span);
1506                    final_args.push(arg);
1507                }
1508
1509                let expression = Expression::new(
1510                    working_set,
1511                    Expr::ExternalCall(head, final_args.into()),
1512                    Span::concat(spans),
1513                    ty.clone(),
1514                );
1515
1516                return expression;
1517            } else {
1518                trace!("parsing: alias of internal call");
1519                parse_internal_call(
1520                    working_set,
1521                    Span::concat(&resolution_spans[cmd_start..pos]),
1522                    &resolution_spans[pos..],
1523                    decl_id,
1524                    ArgumentParsingLevel::Full,
1525                )
1526            }
1527        } else {
1528            trace!("parsing: internal call");
1529            parse_internal_call(
1530                working_set,
1531                Span::concat(&resolution_spans[cmd_start..pos]),
1532                &resolution_spans[pos..],
1533                decl_id,
1534                ArgumentParsingLevel::Full,
1535            )
1536        };
1537
1538        Expression::new(
1539            working_set,
1540            Expr::Call(parsed_call.call),
1541            call_span,
1542            parsed_call.output,
1543        )
1544    } else {
1545        if call_sigil == Some(b'%') {
1546            working_set.error(ParseError::LabeledErrorWithHelp {
1547                error: "percent sigil requires a built-in command".into(),
1548                label: "unknown built-in command".into(),
1549                help:
1550                    "remove `%` to use normal resolution, or use `^` to run an external command explicitly".into(),
1551                span: resolution_spans[0],
1552            });
1553
1554            // Preserve expression shape for features like completion while retaining the parse error.
1555            return parse_external_call(working_set, spans, call_span);
1556        }
1557
1558        // We might be parsing left-unbounded range ("..10")
1559        let bytes = working_set.get_span_contents(spans[0]);
1560        trace!("parsing: range {bytes:?}");
1561        if let (Some(b'.'), Some(b'.')) = (bytes.first(), bytes.get(1)) {
1562            trace!("-- found leading range indicator");
1563            let starting_error_count = working_set.parse_errors.len();
1564
1565            if let Some(range_expr) = parse_range(working_set, spans[0]) {
1566                trace!("-- successfully parsed range");
1567                return range_expr;
1568            }
1569            working_set.parse_errors.truncate(starting_error_count);
1570        }
1571        trace!("parsing: external call");
1572
1573        // Otherwise, try external command
1574        parse_external_call(working_set, spans, call_span)
1575    }
1576}
1577
1578fn find_decl_with_command_type(
1579    working_set: &StateWorkingSet<'_>,
1580    name: &[u8],
1581    command_type: CommandType,
1582) -> Option<DeclId> {
1583    // Search all known declarations so `%cmd` can still resolve a built-in even when
1584    // a custom command with the same name shadows it in normal visibility lookup.
1585    for idx in (0..working_set.num_decls()).rev() {
1586        let decl_id = DeclId::new(idx);
1587        let decl = working_set.get_decl(decl_id);
1588        if decl.command_type() == command_type && decl.name().as_bytes() == name {
1589            return Some(decl_id);
1590        }
1591    }
1592
1593    None
1594}
1595
1596// Build a command name from spaced spans, preserving the existing parser command-name behavior.
1597fn command_name_from_spans(
1598    working_set: &StateWorkingSet<'_>,
1599    spans: &[Span],
1600    prefix: &[u8],
1601) -> Vec<u8> {
1602    let mut name = Vec::with_capacity(prefix.len() + spans.len() * 2);
1603    name.extend(prefix);
1604
1605    for span in spans {
1606        let name_part = working_set.get_span_contents(*span);
1607        if name.is_empty() {
1608            name.extend(name_part);
1609        } else {
1610            name.push(b' ');
1611            name.extend(name_part);
1612        }
1613    }
1614
1615    name
1616}
1617
1618// Variant of `find_longest_decl` that constrains matches to a specific command type.
1619fn find_longest_decl_with_command_type(
1620    working_set: &StateWorkingSet<'_>,
1621    spans: &[Span],
1622    command_type: CommandType,
1623) -> (
1624    usize,
1625    usize,
1626    Vec<u8>,
1627    Option<nu_protocol::Id<nu_protocol::marker::Decl>>,
1628) {
1629    let mut pos = spans.len();
1630    let cmd_start = 0;
1631    let mut name_spans = spans.to_vec();
1632
1633    let mut name = command_name_from_spans(working_set, &name_spans, b"");
1634
1635    let mut maybe_decl_id = find_decl_with_command_type(working_set, &name, command_type);
1636
1637    while maybe_decl_id.is_none() {
1638        if name_spans.len() <= 1 {
1639            break;
1640        }
1641
1642        name_spans.pop();
1643        pos -= 1;
1644
1645        name = command_name_from_spans(working_set, &name_spans, b"");
1646
1647        maybe_decl_id = find_decl_with_command_type(working_set, &name, command_type);
1648    }
1649
1650    (cmd_start, pos, name, maybe_decl_id)
1651}
1652
1653pub fn find_longest_decl(
1654    working_set: &mut StateWorkingSet<'_>,
1655    spans: &[Span],
1656) -> (
1657    usize,
1658    usize,
1659    Vec<u8>,
1660    Option<nu_protocol::Id<nu_protocol::marker::Decl>>,
1661) {
1662    find_longest_decl_with_prefix(working_set, spans, b"")
1663}
1664
1665pub fn find_longest_decl_with_prefix(
1666    working_set: &mut StateWorkingSet<'_>,
1667    spans: &[Span],
1668    prefix: &[u8],
1669) -> (
1670    usize,
1671    usize,
1672    Vec<u8>,
1673    Option<nu_protocol::Id<nu_protocol::marker::Decl>>,
1674) {
1675    let mut pos = 0;
1676    let cmd_start = pos;
1677    let mut name_spans = vec![];
1678
1679    for word_span in spans[cmd_start..].iter() {
1680        // Find the longest group of words that could form a command
1681
1682        name_spans.push(*word_span);
1683
1684        pos += 1;
1685    }
1686
1687    let mut name = command_name_from_spans(working_set, &name_spans, prefix);
1688
1689    let mut maybe_decl_id = working_set.find_decl(&name);
1690
1691    while maybe_decl_id.is_none() {
1692        // Find the longest command match
1693        if name_spans.len() <= 1 {
1694            // Keep the first word even if it does not match -- could be external command
1695            break;
1696        }
1697
1698        name_spans.pop();
1699        pos -= 1;
1700
1701        name = command_name_from_spans(working_set, &name_spans, prefix);
1702        maybe_decl_id = working_set.find_decl(&name);
1703    }
1704
1705    // If there is a declaration and there are remaining spans, check if it's an alias.
1706    // If it is, try to see if there are sub commands
1707    if let Some(decl_id) = maybe_decl_id
1708        && pos < spans.len()
1709    {
1710        let decl = working_set.get_decl(decl_id);
1711        if let Some(alias) = decl.as_alias() {
1712            // Extract the command name from the alias
1713            // The wrapped_call should be a Call expression for internal commands
1714            if let Expression {
1715                expr: Expr::Call(call),
1716                ..
1717            } = &alias.wrapped_call
1718            {
1719                let aliased_decl_id = call.decl_id;
1720                let aliased_name = working_set.get_decl(aliased_decl_id).name().to_string();
1721
1722                // Try to find a longer match using the aliased command name with remaining spans
1723                let (_, new_pos, new_name, new_decl_id) = find_longest_decl_with_prefix(
1724                    working_set,
1725                    &spans[pos..],
1726                    aliased_name.as_bytes(),
1727                );
1728
1729                // If we find a sub command, use it instead.
1730                if new_decl_id.is_some() && new_pos > 0 {
1731                    let total_pos = pos + new_pos;
1732                    return (cmd_start, total_pos, new_name, new_decl_id);
1733                }
1734            }
1735        }
1736    }
1737
1738    (cmd_start, pos, name, maybe_decl_id)
1739}
1740
1741pub fn parse_attribute(
1742    working_set: &mut StateWorkingSet,
1743    lite_command: &LiteCommand,
1744) -> (Attribute, Option<String>) {
1745    let _ = lite_command
1746        .parts
1747        .first()
1748        .filter(|s| working_set.get_span_contents(**s).starts_with(b"@"))
1749        .expect("Attributes always start with an `@`");
1750
1751    assert!(
1752        lite_command.attribute_idx.is_empty(),
1753        "attributes can't have attributes"
1754    );
1755
1756    let mut spans = lite_command.parts.clone();
1757    if let Some(first) = spans.first_mut() {
1758        first.start += 1;
1759    }
1760    let spans = spans.as_slice();
1761    let attr_span = Span::concat(spans);
1762
1763    let (cmd_start, cmd_end, mut name, decl_id) =
1764        find_longest_decl_with_prefix(working_set, spans, b"attr");
1765
1766    debug_assert!(name.starts_with(b"attr "));
1767    let _ = name.drain(..(b"attr ".len()));
1768
1769    let name_span = Span::concat(&spans[cmd_start..cmd_end]);
1770
1771    let Ok(name) = String::from_utf8(name) else {
1772        working_set.error(ParseError::NonUtf8(name_span));
1773        return (
1774            Attribute {
1775                expr: garbage(working_set, attr_span),
1776            },
1777            None,
1778        );
1779    };
1780
1781    let Some(decl_id) = decl_id else {
1782        working_set.error(ParseError::UnknownCommand(name_span));
1783        return (
1784            Attribute {
1785                expr: garbage(working_set, attr_span),
1786            },
1787            None,
1788        );
1789    };
1790
1791    let decl = working_set.get_decl(decl_id);
1792
1793    let parsed_call = match decl.as_alias() {
1794        // TODO: Once `const def` is available, we should either disallow aliases as attributes OR
1795        // allow them but rather than using the aliases' name, use the name of the aliased command
1796        Some(alias) => match &alias.clone().wrapped_call {
1797            Expression {
1798                expr: Expr::ExternalCall(..),
1799                ..
1800            } => {
1801                let shell_error = ShellError::NotAConstCommand { span: name_span };
1802                working_set.error(shell_error.wrap(working_set, attr_span));
1803                return (
1804                    Attribute {
1805                        expr: garbage(working_set, Span::concat(spans)),
1806                    },
1807                    None,
1808                );
1809            }
1810            _ => {
1811                trace!("parsing: alias of internal call");
1812                parse_internal_call(
1813                    working_set,
1814                    name_span,
1815                    &spans[cmd_end..],
1816                    decl_id,
1817                    ArgumentParsingLevel::Full,
1818                )
1819            }
1820        },
1821        None => {
1822            trace!("parsing: internal call");
1823            parse_internal_call(
1824                working_set,
1825                name_span,
1826                &spans[cmd_end..],
1827                decl_id,
1828                ArgumentParsingLevel::Full,
1829            )
1830        }
1831    };
1832
1833    (
1834        Attribute {
1835            expr: Expression::new(
1836                working_set,
1837                Expr::Call(parsed_call.call),
1838                Span::concat(spans),
1839                parsed_call.output,
1840            ),
1841        },
1842        Some(name),
1843    )
1844}
1845
1846pub fn parse_binary(working_set: &mut StateWorkingSet, span: Span) -> Expression {
1847    trace!("parsing: binary");
1848    let contents = working_set.get_span_contents(span);
1849    if contents.starts_with(b"0x[") {
1850        parse_binary_with_base(working_set, span, 16, 2, b"0x[", b"]")
1851    } else if contents.starts_with(b"0o[") {
1852        parse_binary_with_base(working_set, span, 8, 3, b"0o[", b"]")
1853    } else if contents.starts_with(b"0b[") {
1854        parse_binary_with_base(working_set, span, 2, 8, b"0b[", b"]")
1855    } else {
1856        working_set.error(ParseError::Expected("binary", span));
1857        garbage(working_set, span)
1858    }
1859}
1860
1861fn parse_binary_with_base(
1862    working_set: &mut StateWorkingSet,
1863    span: Span,
1864    base: u32,
1865    min_digits_per_byte: usize,
1866    prefix: &[u8],
1867    suffix: &[u8],
1868) -> Expression {
1869    let token = working_set.get_span_contents(span);
1870
1871    if let Some(token) = token.strip_prefix(prefix)
1872        && let Some(token) = token.strip_suffix(suffix)
1873    {
1874        let (lexed, err) = lex(
1875            token,
1876            span.start + prefix.len(),
1877            &[b',', b'\r', b'\n'],
1878            &[],
1879            true,
1880        );
1881        if let Some(err) = err {
1882            working_set.error(err);
1883        }
1884
1885        let mut binary_value = vec![];
1886        for token in lexed {
1887            match token.contents {
1888                TokenContents::Item => {
1889                    let contents = working_set.get_span_contents(token.span);
1890
1891                    binary_value.extend_from_slice(contents);
1892                }
1893                TokenContents::Pipe
1894                | TokenContents::PipePipe
1895                | TokenContents::ErrGreaterPipe
1896                | TokenContents::OutGreaterThan
1897                | TokenContents::OutErrGreaterPipe
1898                | TokenContents::OutGreaterGreaterThan
1899                | TokenContents::ErrGreaterThan
1900                | TokenContents::ErrGreaterGreaterThan
1901                | TokenContents::OutErrGreaterThan
1902                | TokenContents::OutErrGreaterGreaterThan
1903                | TokenContents::AssignmentOperator => {
1904                    working_set.error(ParseError::Expected("binary", span));
1905                    return garbage(working_set, span);
1906                }
1907                TokenContents::Comment | TokenContents::Semicolon | TokenContents::Eol => {}
1908            }
1909        }
1910
1911        let required_padding =
1912            (min_digits_per_byte - binary_value.len() % min_digits_per_byte) % min_digits_per_byte;
1913
1914        if required_padding != 0 {
1915            binary_value = {
1916                let mut tail = binary_value;
1917                let mut binary_value: Vec<u8> = vec![b'0'; required_padding];
1918                binary_value.append(&mut tail);
1919                binary_value
1920            };
1921        }
1922
1923        let str = String::from_utf8_lossy(&binary_value).to_string();
1924
1925        match decode_with_base(&str, base, min_digits_per_byte) {
1926            Ok(v) => return Expression::new(working_set, Expr::Binary(v), span, Type::Binary),
1927            Err(help) => {
1928                working_set.error(ParseError::InvalidBinaryString(span, help.to_string()));
1929                return garbage(working_set, span);
1930            }
1931        }
1932    }
1933
1934    working_set.error(ParseError::Expected("binary", span));
1935    garbage(working_set, span)
1936}
1937
1938fn decode_with_base(s: &str, base: u32, digits_per_byte: usize) -> Result<Vec<u8>, &str> {
1939    s.chars()
1940        .chunks(digits_per_byte)
1941        .into_iter()
1942        .map(|chunk| {
1943            let str: String = chunk.collect();
1944            u8::from_str_radix(&str, base).map_err(|_| match base {
1945                2 => "binary strings may contain only 0 or 1.",
1946                8 => "octal strings must have a length that is a multiple of three and contain values between 0o000 and 0o377.",
1947                16 => "hexadecimal strings may contain only the characters 0–9 and A–F.",
1948                _ => "internal error: radix other than 2, 8, or 16 is not allowed."
1949            })
1950        })
1951        .collect()
1952}
1953
1954fn strip_underscores(token: &[u8]) -> String {
1955    String::from_utf8_lossy(token)
1956        .chars()
1957        .filter(|c| *c != '_')
1958        .collect()
1959}
1960
1961pub fn parse_int(working_set: &mut StateWorkingSet, span: Span) -> Expression {
1962    let token = working_set.get_span_contents(span);
1963
1964    fn extract_int(
1965        working_set: &mut StateWorkingSet,
1966        token: &str,
1967        span: Span,
1968        radix: u32,
1969    ) -> Expression {
1970        // Parse as a u64, then cast to i64, otherwise, for numbers like "0xffffffffffffffef",
1971        // you'll get `Error parsing hex string: number too large to fit in target type`.
1972        if let Ok(num) = u64::from_str_radix(token, radix).map(|val| val as i64) {
1973            Expression::new(working_set, Expr::Int(num), span, Type::Int)
1974        } else {
1975            working_set.error(ParseError::InvalidLiteral(
1976                format!("invalid digits for radix {radix}"),
1977                "int".into(),
1978                span,
1979            ));
1980
1981            garbage(working_set, span)
1982        }
1983    }
1984
1985    let token = strip_underscores(token);
1986
1987    if token.is_empty() {
1988        working_set.error(ParseError::Expected("int", span));
1989        return garbage(working_set, span);
1990    }
1991
1992    if let Some(num) = token.strip_prefix("0b") {
1993        extract_int(working_set, num, span, 2)
1994    } else if let Some(num) = token.strip_prefix("0o") {
1995        extract_int(working_set, num, span, 8)
1996    } else if let Some(num) = token.strip_prefix("0x") {
1997        extract_int(working_set, num, span, 16)
1998    } else if let Ok(num) = token.parse::<i64>() {
1999        Expression::new(working_set, Expr::Int(num), span, Type::Int)
2000    } else {
2001        working_set.error(ParseError::Expected("int", span));
2002        garbage(working_set, span)
2003    }
2004}
2005
2006pub fn parse_float(working_set: &mut StateWorkingSet, span: Span) -> Expression {
2007    let token = working_set.get_span_contents(span);
2008    let token = strip_underscores(token);
2009
2010    if let Ok(x) = token.parse::<f64>() {
2011        Expression::new(working_set, Expr::Float(x), span, Type::Float)
2012    } else {
2013        working_set.error(ParseError::Expected("float", span));
2014
2015        garbage(working_set, span)
2016    }
2017}
2018
2019pub fn parse_number(working_set: &mut StateWorkingSet, span: Span) -> Expression {
2020    let starting_error_count = working_set.parse_errors.len();
2021
2022    let result = parse_int(working_set, span);
2023    if starting_error_count == working_set.parse_errors.len() {
2024        return result;
2025    } else if let Some(ParseError::Expected(_, _)) = working_set.parse_errors.last() {
2026        working_set.parse_errors.truncate(starting_error_count);
2027    }
2028
2029    let result = parse_float(working_set, span);
2030
2031    if starting_error_count == working_set.parse_errors.len() {
2032        return result;
2033    }
2034    working_set.parse_errors.truncate(starting_error_count);
2035
2036    working_set.error(ParseError::Expected("number", span));
2037    garbage(working_set, span)
2038}
2039
2040pub fn parse_range(working_set: &mut StateWorkingSet, span: Span) -> Option<Expression> {
2041    trace!("parsing: range");
2042    let starting_error_count = working_set.parse_errors.len();
2043
2044    // Range follows the following syntax: [<from>][<next_operator><next>]<range_operator>[<to>]
2045    //   where <next_operator> is ".."
2046    //   and  <range_operator> is "..", "..=" or "..<"
2047    //   and one of the <from> or <to> bounds must be present (just '..' is not allowed since it
2048    //     looks like parent directory)
2049    //bugbug range cannot be [..] because that looks like parent directory
2050
2051    let contents = working_set.get_span_contents(span);
2052
2053    let Ok(token) = String::from_utf8(contents.into()) else {
2054        working_set.error(ParseError::NonUtf8(span));
2055        return None;
2056    };
2057
2058    if token.starts_with("...") {
2059        working_set.error(ParseError::Expected(
2060            "range operator ('..'), got spread ('...')",
2061            span,
2062        ));
2063        return None;
2064    }
2065
2066    if !token.contains("..") {
2067        working_set.error(ParseError::Expected("at least one range bound set", span));
2068        return None;
2069    }
2070
2071    let dotdot_pos: Vec<_> = token
2072        .match_indices("..")
2073        .filter_map(|(pos, _)| {
2074            // paren_depth = count of unclosed parens prior to pos
2075            let before = &token[..pos];
2076            let paren_opened = before.chars().filter(|&c| c == '(').count();
2077            let paren_closed = before.chars().filter(|&c| c == ')').count();
2078            let paren_depth = paren_opened.checked_sub(paren_closed)?;
2079            (paren_depth == 0).then_some(pos)
2080        })
2081        .collect();
2082
2083    let (next_op_pos, range_op_pos) = match dotdot_pos.len() {
2084        1 => (None, dotdot_pos[0]),
2085        2 => (Some(dotdot_pos[0]), dotdot_pos[1]),
2086        _ => {
2087            working_set.error(ParseError::Expected(
2088                "one range operator ('..' or '..<') and optionally one next operator ('..')",
2089                span,
2090            ));
2091            return None;
2092        }
2093    };
2094    // Avoid calling sub-parsers on unmatched parens, to prevent quadratic time on things like ((((1..2))))
2095    // No need to call the expensive parse_value on "((((1"
2096    if dotdot_pos[0] > 0 {
2097        let (_tokens, err) = lex(
2098            &contents[..dotdot_pos[0]],
2099            span.start,
2100            &[],
2101            &[b'.', b'?', b'!'],
2102            true,
2103        );
2104        if let Some(_err) = err {
2105            working_set.error(ParseError::Expected("Valid expression before ..", span));
2106            return None;
2107        }
2108    }
2109
2110    let (inclusion, range_op_str, range_op_span) = if let Some(pos) = token.find("..<") {
2111        if pos == range_op_pos {
2112            let op_str = "..<";
2113            let op_span = Span::new(
2114                span.start + range_op_pos,
2115                span.start + range_op_pos + op_str.len(),
2116            );
2117            (RangeInclusion::RightExclusive, "..<", op_span)
2118        } else {
2119            working_set.error(ParseError::Expected(
2120                "inclusive operator preceding second range bound",
2121                span,
2122            ));
2123            return None;
2124        }
2125    } else {
2126        let op_str = if token[range_op_pos..].starts_with("..=") {
2127            "..="
2128        } else {
2129            ".."
2130        };
2131
2132        let op_span = Span::new(
2133            span.start + range_op_pos,
2134            span.start + range_op_pos + op_str.len(),
2135        );
2136        (RangeInclusion::Inclusive, op_str, op_span)
2137    };
2138
2139    // Now, based on the operator positions, figure out where the bounds & next are located and
2140    // parse them
2141    // TODO: Actually parse the next number in the range
2142    let from = if token.starts_with("..") {
2143        // token starts with either next operator, or range operator -- we don't care which one
2144        None
2145    } else {
2146        let from_span = Span::new(span.start, span.start + dotdot_pos[0]);
2147        Some(parse_value(working_set, from_span, &SyntaxShape::Number))
2148    };
2149
2150    let to = if token.ends_with(range_op_str) {
2151        None
2152    } else {
2153        let to_span = Span::new(range_op_span.end, span.end);
2154        Some(parse_value(working_set, to_span, &SyntaxShape::Number))
2155    };
2156
2157    trace!("-- from: {from:?} to: {to:?}");
2158
2159    if let (None, None) = (&from, &to) {
2160        working_set.error(ParseError::Expected("at least one range bound set", span));
2161        return None;
2162    }
2163
2164    let (next, next_op_span) = if let Some(pos) = next_op_pos {
2165        let next_op_span = Span::new(span.start + pos, span.start + pos + "..".len());
2166        let next_span = Span::new(next_op_span.end, range_op_span.start);
2167
2168        (
2169            Some(parse_value(working_set, next_span, &SyntaxShape::Number)),
2170            next_op_span,
2171        )
2172    } else {
2173        (None, span)
2174    };
2175
2176    if working_set.parse_errors.len() != starting_error_count {
2177        return None;
2178    }
2179
2180    let operator = RangeOperator {
2181        inclusion,
2182        span: range_op_span,
2183        next_op_span,
2184    };
2185
2186    let mut range = Range {
2187        from,
2188        next,
2189        to,
2190        operator,
2191    };
2192
2193    check_range_types(working_set, &mut range);
2194
2195    Some(Expression::new(
2196        working_set,
2197        Expr::Range(Box::new(range)),
2198        span,
2199        Type::Range,
2200    ))
2201}
2202
2203pub(crate) fn parse_dollar_expr(working_set: &mut StateWorkingSet, span: Span) -> Expression {
2204    trace!("parsing: dollar expression");
2205    let contents = working_set.get_span_contents(span);
2206
2207    if contents.starts_with(b"$\"") || contents.starts_with(b"$'") {
2208        parse_string_interpolation(working_set, span)
2209    } else if contents.starts_with(b"$.") {
2210        parse_simple_cell_path(working_set, Span::new(span.start + 2, span.end))
2211    } else {
2212        let starting_error_count = working_set.parse_errors.len();
2213
2214        if let Some(expr) = parse_range(working_set, span) {
2215            expr
2216        } else {
2217            working_set.parse_errors.truncate(starting_error_count);
2218            parse_full_cell_path(working_set, None, span)
2219        }
2220    }
2221}
2222
2223pub fn parse_raw_string(working_set: &mut StateWorkingSet, span: Span) -> Expression {
2224    trace!("parsing: raw-string, with required delimiters");
2225
2226    let bytes = working_set.get_span_contents(span);
2227
2228    let prefix_sharp_cnt = if bytes.starts_with(b"r#") {
2229        // actually `sharp_cnt` is always `index - 1`
2230        // but create a variable here to make it clearer.
2231        let mut sharp_cnt = 1;
2232        let mut index = 2;
2233        while index < bytes.len() && bytes[index] == b'#' {
2234            index += 1;
2235            sharp_cnt += 1;
2236        }
2237        sharp_cnt
2238    } else {
2239        working_set.error(ParseError::Expected("r#", span));
2240        return garbage(working_set, span);
2241    };
2242    let expect_postfix_sharp_cnt = prefix_sharp_cnt;
2243    // check the length of whole raw string.
2244    // the whole raw string should contains at least
2245    // 1(r) + prefix_sharp_cnt + 1(') + 1(') + postfix_sharp characters
2246    if bytes.len() < prefix_sharp_cnt + expect_postfix_sharp_cnt + 3 {
2247        working_set.error(ParseError::Unclosed('\''.into(), span));
2248        return garbage(working_set, span);
2249    }
2250
2251    // check for unbalanced # and single quotes.
2252    let postfix_bytes = &bytes[bytes.len() - expect_postfix_sharp_cnt..bytes.len()];
2253    if postfix_bytes.iter().any(|b| *b != b'#') {
2254        working_set.error(ParseError::Unbalanced(
2255            "prefix #".to_string(),
2256            "postfix #".to_string(),
2257            span,
2258        ));
2259        return garbage(working_set, span);
2260    }
2261    // check for unblanaced single quotes.
2262    if bytes[1 + prefix_sharp_cnt] != b'\''
2263        || bytes[bytes.len() - expect_postfix_sharp_cnt - 1] != b'\''
2264    {
2265        working_set.error(ParseError::Unclosed('\''.into(), span));
2266        return garbage(working_set, span);
2267    }
2268
2269    let bytes = &bytes[prefix_sharp_cnt + 1 + 1..bytes.len() - 1 - prefix_sharp_cnt];
2270    if let Ok(token) = String::from_utf8(bytes.into()) {
2271        Expression::new(working_set, Expr::RawString(token), span, Type::String)
2272    } else {
2273        working_set.error(ParseError::Expected("utf8 raw-string", span));
2274        garbage(working_set, span)
2275    }
2276}
2277
2278pub fn parse_paren_expr(
2279    working_set: &mut StateWorkingSet,
2280    span: Span,
2281    shape: &SyntaxShape,
2282) -> Expression {
2283    let starting_error_count = working_set.parse_errors.len();
2284
2285    if let Some(expr) = parse_range(working_set, span) {
2286        return expr;
2287    }
2288
2289    working_set.parse_errors.truncate(starting_error_count);
2290
2291    if let SyntaxShape::Signature = shape {
2292        return parse_signature(working_set, span, false);
2293    }
2294
2295    if let SyntaxShape::ExternalSignature = shape {
2296        return parse_signature(working_set, span, true);
2297    }
2298
2299    let fcp_expr = parse_full_cell_path(working_set, None, span);
2300    let fcp_error_count = working_set.parse_errors.len();
2301    if fcp_error_count > starting_error_count {
2302        let malformed_subexpr = working_set.parse_errors[starting_error_count..]
2303            .first()
2304            .is_some_and(|e| match e {
2305                ParseError::Unclosed(right, _) if (right == ")") => true,
2306                ParseError::Unbalanced(left, right, _) if left == "(" && right == ")" => true,
2307                _ => false,
2308            });
2309        if malformed_subexpr {
2310            working_set.parse_errors.truncate(starting_error_count);
2311            parse_string_interpolation(working_set, span)
2312        } else {
2313            fcp_expr
2314        }
2315    } else {
2316        fcp_expr
2317    }
2318}
2319
2320pub fn parse_brace_expr(
2321    working_set: &mut StateWorkingSet,
2322    span: Span,
2323    shape: &SyntaxShape,
2324) -> Expression {
2325    // Try to detect what kind of value we're about to parse
2326    // FIXME: In the future, we should work over the token stream so we only have to do this once
2327    // before parsing begins
2328
2329    // FIXME: we're still using the shape because we rely on it to know how to handle syntax where
2330    // the parse is ambiguous. We'll need to update the parts of the grammar where this is ambiguous
2331    // and then revisit the parsing.
2332
2333    if span.end <= (span.start + 1) {
2334        working_set.error(ParseError::ExpectedWithStringMsg(
2335            format!("non-block value: {shape}"),
2336            span,
2337        ));
2338        return Expression::garbage(working_set, span);
2339    }
2340    let bytes = working_set.get_span_contents(Span::new(span.start + 1, span.end - 1));
2341    let (tokens, _) = lex(bytes, span.start + 1, &[b'\r', b'\n', b'\t'], &[b':'], true);
2342
2343    match tokens.as_slice() {
2344        // If we're empty, that means an empty record or closure
2345        [] => match shape {
2346            SyntaxShape::Closure(_) => parse_closure_expression(working_set, shape, span),
2347            SyntaxShape::Block => parse_block_expression(working_set, span),
2348            SyntaxShape::MatchBlock => parse_match_block_expression(working_set, span),
2349            _ => parse_record(working_set, span),
2350        },
2351        [
2352            Token {
2353                contents: TokenContents::Pipe | TokenContents::PipePipe,
2354                ..
2355            },
2356            ..,
2357        ] => {
2358            if let SyntaxShape::Block = shape {
2359                working_set.error(ParseError::Mismatch("block".into(), "closure".into(), span));
2360                return Expression::garbage(working_set, span);
2361            }
2362            parse_closure_expression(working_set, shape, span)
2363        }
2364        [_, third, ..] if working_set.get_span_contents(third.span) == b":" => {
2365            parse_full_cell_path(working_set, None, span)
2366        }
2367        [second, ..] => {
2368            let second_bytes = working_set.get_span_contents(second.span);
2369            match shape {
2370                SyntaxShape::Closure(_) => parse_closure_expression(working_set, shape, span),
2371                SyntaxShape::Block => parse_block_expression(working_set, span),
2372                SyntaxShape::MatchBlock => parse_match_block_expression(working_set, span),
2373                // For edge case of `{}.foo?`, #17896
2374                _ if second_bytes == b"}" => parse_full_cell_path(working_set, None, span),
2375                _ if second_bytes.starts_with(b"...")
2376                    && second_bytes.get(3).is_some_and(|c| b"${(".contains(c)) =>
2377                {
2378                    parse_record(working_set, span)
2379                }
2380                SyntaxShape::Any => parse_closure_expression(working_set, shape, span),
2381                _ => {
2382                    working_set.error(ParseError::ExpectedWithStringMsg(
2383                        format!("non-block value: {shape}"),
2384                        span,
2385                    ));
2386
2387                    Expression::garbage(working_set, span)
2388                }
2389            }
2390        }
2391    }
2392}
2393
2394pub fn parse_string_interpolation(working_set: &mut StateWorkingSet, span: Span) -> Expression {
2395    #[derive(PartialEq, Eq, Debug)]
2396    enum InterpolationMode {
2397        String,
2398        Expression,
2399    }
2400
2401    let contents = working_set.get_span_contents(span);
2402
2403    let mut double_quote = false;
2404
2405    let (start, end) = if contents.starts_with(b"$\"") {
2406        double_quote = true;
2407
2408        if let Err(err) = check_string_no_trailing_tokens(contents, span, 1, b'\"') {
2409            working_set.error(err);
2410            return garbage(working_set, span);
2411        }
2412
2413        let end = if contents.ends_with(b"\"") && contents.len() > 2 {
2414            span.end - 1
2415        } else {
2416            span.end
2417        };
2418        (span.start + 2, end)
2419    } else if contents.starts_with(b"$'") {
2420        if let Err(err) = check_string_no_trailing_tokens(contents, span, 1, b'\'') {
2421            working_set.error(err);
2422            return garbage(working_set, span);
2423        }
2424
2425        let end = if contents.ends_with(b"'") && contents.len() > 2 {
2426            span.end - 1
2427        } else {
2428            span.end
2429        };
2430        (span.start + 2, end)
2431    } else {
2432        (span.start, span.end)
2433    };
2434
2435    let inner_span = Span::new(start, end);
2436    let contents = working_set.get_span_contents(inner_span).to_vec();
2437
2438    let mut output = vec![];
2439    let mut mode = InterpolationMode::String;
2440    let mut token_start = start;
2441
2442    #[repr(u8)]
2443    #[derive(Clone, Copy, PartialEq, Eq)]
2444    enum Delimiter {
2445        SingleQuote = b'\'',
2446        DoubleQuote = b'"',
2447        Backtick = b'`',
2448        ParenLeft = b'(',
2449        ParenRight = b')',
2450    }
2451
2452    impl Delimiter {
2453        const fn from_u8(b: u8) -> Option<Self> {
2454            Some(match b {
2455                b'\'' => Self::SingleQuote,
2456                b'"' => Self::DoubleQuote,
2457                b'`' => Self::Backtick,
2458                b'(' => Self::ParenLeft,
2459                b')' => Self::ParenRight,
2460                _ => return None,
2461            })
2462        }
2463        const fn is_paren(self) -> bool {
2464            matches!(self, Self::ParenLeft | Self::ParenRight)
2465        }
2466        const fn pair(self) -> Self {
2467            match self {
2468                Self::ParenLeft => Self::ParenRight,
2469                Self::ParenRight => Self::ParenLeft,
2470                _ => self,
2471            }
2472        }
2473    }
2474    let mut delimiter_stack: Vec<Delimiter> = vec![];
2475
2476    let mut consecutive_backslashes: usize = 0;
2477
2478    let mut b = start;
2479
2480    while b != end {
2481        let current_byte = contents[b - start];
2482
2483        if mode == InterpolationMode::String {
2484            let preceding_consecutive_backslashes = consecutive_backslashes;
2485
2486            let is_backslash = current_byte == b'\\';
2487            consecutive_backslashes = if is_backslash {
2488                preceding_consecutive_backslashes + 1
2489            } else {
2490                0
2491            };
2492
2493            if current_byte == b'('
2494                && (!double_quote || preceding_consecutive_backslashes.is_multiple_of(2))
2495            {
2496                mode = InterpolationMode::Expression;
2497                if token_start < b {
2498                    let span = Span::new(token_start, b);
2499                    let str_contents = working_set.get_span_contents(span);
2500
2501                    let (str_contents, err) = if double_quote {
2502                        unescape_string(str_contents, span)
2503                    } else {
2504                        (str_contents.to_vec(), None)
2505                    };
2506                    if let Some(err) = err {
2507                        working_set.error(err);
2508                    }
2509
2510                    output.push(Expression::new(
2511                        working_set,
2512                        Expr::String(String::from_utf8_lossy(&str_contents).to_string()),
2513                        span,
2514                        Type::String,
2515                    ));
2516                    token_start = b;
2517                }
2518            }
2519        }
2520
2521        if mode == InterpolationMode::Expression {
2522            let byte = Delimiter::from_u8(current_byte);
2523            match (delimiter_stack.last().copied(), byte) {
2524                (Some(d), Some(byte)) if !d.is_paren() => {
2525                    if byte == d {
2526                        delimiter_stack.pop();
2527                    }
2528                }
2529                (_, Some(byte)) if byte != Delimiter::ParenRight => {
2530                    delimiter_stack.push(byte.pair())
2531                }
2532                (d, Some(Delimiter::ParenRight)) => {
2533                    if let Some(Delimiter::ParenRight) = d {
2534                        delimiter_stack.pop();
2535                    }
2536                    if delimiter_stack.is_empty() {
2537                        mode = InterpolationMode::String;
2538
2539                        if token_start < b {
2540                            let span = Span::new(token_start, b + 1);
2541
2542                            let expr = parse_full_cell_path(working_set, None, span);
2543                            output.push(expr);
2544                        }
2545
2546                        token_start = b + 1;
2547                        continue;
2548                    }
2549                }
2550                _ => (),
2551            }
2552        }
2553        b += 1;
2554    }
2555
2556    match mode {
2557        InterpolationMode::String => {
2558            if token_start < end {
2559                let span = Span::new(token_start, end);
2560                let str_contents = working_set.get_span_contents(span);
2561
2562                let (str_contents, err) = if double_quote {
2563                    unescape_string(str_contents, span)
2564                } else {
2565                    (str_contents.to_vec(), None)
2566                };
2567                if let Some(err) = err {
2568                    working_set.error(err);
2569                }
2570
2571                output.push(Expression::new(
2572                    working_set,
2573                    Expr::String(String::from_utf8_lossy(&str_contents).to_string()),
2574                    span,
2575                    Type::String,
2576                ));
2577            }
2578        }
2579        InterpolationMode::Expression => {
2580            if token_start < end {
2581                let span = Span::new(token_start, end);
2582                let expr = parse_full_cell_path(working_set, None, span);
2583                output.push(expr);
2584            }
2585        }
2586    }
2587
2588    Expression::new(
2589        working_set,
2590        Expr::StringInterpolation(output),
2591        span,
2592        Type::String,
2593    )
2594}
2595
2596pub fn parse_variable_expr(working_set: &mut StateWorkingSet, span: Span) -> Expression {
2597    let contents = working_set.get_span_contents(span);
2598
2599    if contents == b"$nu" {
2600        return Expression::new(
2601            working_set,
2602            Expr::Var(nu_protocol::NU_VARIABLE_ID),
2603            span,
2604            Type::Any,
2605        );
2606    } else if contents == b"$in" {
2607        return Expression::new(
2608            working_set,
2609            Expr::Var(nu_protocol::IN_VARIABLE_ID),
2610            span,
2611            Type::Any,
2612        );
2613    } else if contents == b"$env" {
2614        return Expression::new(
2615            working_set,
2616            Expr::Var(nu_protocol::ENV_VARIABLE_ID),
2617            span,
2618            Type::Any,
2619        );
2620    }
2621
2622    let name = if contents.starts_with(b"$") {
2623        String::from_utf8_lossy(&contents[1..]).to_string()
2624    } else {
2625        String::from_utf8_lossy(contents).to_string()
2626    };
2627
2628    let bytes = working_set.get_span_contents(span);
2629    let suggestion = || {
2630        DidYouMean::new(
2631            &working_set.list_variables(),
2632            working_set.get_span_contents(span),
2633        )
2634    };
2635    if !is_variable(bytes) {
2636        working_set.error(ParseError::ExpectedWithDidYouMean(
2637            "valid variable name",
2638            suggestion(),
2639            span,
2640        ));
2641        garbage(working_set, span)
2642    } else if let Some(id) = working_set.find_variable(bytes) {
2643        Expression::new(
2644            working_set,
2645            Expr::Var(id),
2646            span,
2647            working_set.get_variable(id).ty.clone(),
2648        )
2649    } else if working_set.get_env_var(&name).is_some() {
2650        working_set.error(ParseError::EnvVarNotVar(name, span));
2651        garbage(working_set, span)
2652    } else {
2653        working_set.error(ParseError::VariableNotFound(suggestion(), span));
2654        garbage(working_set, span)
2655    }
2656}
2657
2658pub fn parse_cell_path(
2659    working_set: &mut StateWorkingSet,
2660    tokens: impl Iterator<Item = Token>,
2661    expect_dot: bool,
2662) -> Vec<PathMember> {
2663    enum TokenType {
2664        Dot,              // .
2665        DotOrSign,        // . or ? or !
2666        DotOrExclamation, // . or !
2667        DotOrQuestion,    // . or ?
2668        PathMember,       // an int or string, like `1` or `foo`
2669    }
2670
2671    enum ModifyMember {
2672        No,
2673        Optional,
2674        Insensitive,
2675    }
2676
2677    impl TokenType {
2678        fn expect(&mut self, byte: u8) -> Result<ModifyMember, &'static str> {
2679            match (&*self, byte) {
2680                (Self::PathMember, _) => {
2681                    *self = Self::DotOrSign;
2682                    Ok(ModifyMember::No)
2683                }
2684                (
2685                    Self::Dot | Self::DotOrSign | Self::DotOrExclamation | Self::DotOrQuestion,
2686                    b'.',
2687                ) => {
2688                    *self = Self::PathMember;
2689                    Ok(ModifyMember::No)
2690                }
2691                (Self::DotOrSign, b'!') => {
2692                    *self = Self::DotOrQuestion;
2693                    Ok(ModifyMember::Insensitive)
2694                }
2695                (Self::DotOrSign, b'?') => {
2696                    *self = Self::DotOrExclamation;
2697                    Ok(ModifyMember::Optional)
2698                }
2699                (Self::DotOrSign, _) => Err(". or ! or ?"),
2700                (Self::DotOrExclamation, b'!') => {
2701                    *self = Self::Dot;
2702                    Ok(ModifyMember::Insensitive)
2703                }
2704                (Self::DotOrExclamation, _) => Err(". or !"),
2705                (Self::DotOrQuestion, b'?') => {
2706                    *self = Self::Dot;
2707                    Ok(ModifyMember::Optional)
2708                }
2709                (Self::DotOrQuestion, _) => Err(". or ?"),
2710                (Self::Dot, _) => Err("."),
2711            }
2712        }
2713    }
2714
2715    // Parsing a cell path is essentially a state machine, and this is the state
2716    let mut expected_token = if expect_dot {
2717        TokenType::Dot
2718    } else {
2719        TokenType::PathMember
2720    };
2721
2722    let mut tail = vec![];
2723
2724    for path_element in tokens {
2725        let bytes = working_set.get_span_contents(path_element.span);
2726
2727        // both parse_int and parse_string require their source to be non-empty
2728        // all cases where `bytes` is empty is an error
2729        let Some((&first, rest)) = bytes.split_first() else {
2730            working_set.error(ParseError::Expected("string", path_element.span));
2731            return tail;
2732        };
2733        let single_char = rest.is_empty();
2734
2735        if let TokenType::PathMember = expected_token {
2736            let starting_error_count = working_set.parse_errors.len();
2737
2738            let expr = parse_int(working_set, path_element.span);
2739            working_set.parse_errors.truncate(starting_error_count);
2740
2741            match expr {
2742                Expression {
2743                    expr: Expr::Int(val),
2744                    span,
2745                    ..
2746                } => tail.push(PathMember::Int {
2747                    val: val as usize,
2748                    span,
2749                    optional: false,
2750                }),
2751                _ => {
2752                    let result = parse_string(working_set, path_element.span);
2753                    match result {
2754                        Expression {
2755                            expr: Expr::String(string),
2756                            span,
2757                            ..
2758                        } => {
2759                            tail.push(PathMember::String {
2760                                val: string,
2761                                span,
2762                                optional: false,
2763                                casing: Casing::Sensitive,
2764                            });
2765                        }
2766                        _ => {
2767                            working_set.error(ParseError::Expected("string", path_element.span));
2768                            return tail;
2769                        }
2770                    }
2771                }
2772            }
2773            expected_token = TokenType::DotOrSign;
2774        } else {
2775            match expected_token.expect(if single_char { first } else { b' ' }) {
2776                Ok(modify) => {
2777                    if let Some(last) = tail.last_mut() {
2778                        match modify {
2779                            ModifyMember::No => {}
2780                            ModifyMember::Optional => last.make_optional(),
2781                            ModifyMember::Insensitive => last.make_insensitive(),
2782                        }
2783                    };
2784                }
2785                Err(expected) => {
2786                    working_set.error(ParseError::Expected(expected, path_element.span));
2787                    return tail;
2788                }
2789            }
2790        }
2791    }
2792
2793    tail
2794}
2795
2796pub fn parse_simple_cell_path(working_set: &mut StateWorkingSet, span: Span) -> Expression {
2797    let source = working_set.get_span_contents(span);
2798
2799    let (tokens, err) = lex(
2800        source,
2801        span.start,
2802        &[b'\n', b'\r'],
2803        &[b'.', b'?', b'!'],
2804        true,
2805    );
2806    if let Some(err) = err {
2807        working_set.error(err)
2808    }
2809
2810    let tokens = tokens.into_iter().peekable();
2811
2812    let cell_path = parse_cell_path(working_set, tokens, false);
2813
2814    Expression::new(
2815        working_set,
2816        Expr::CellPath(CellPath { members: cell_path }),
2817        span,
2818        Type::CellPath,
2819    )
2820}
2821
2822pub fn parse_full_cell_path(
2823    working_set: &mut StateWorkingSet,
2824    implicit_head: Option<VarId>,
2825    span: Span,
2826) -> Expression {
2827    trace!("parsing: full cell path");
2828    let full_cell_span = span;
2829    let source = working_set.get_span_contents(span);
2830
2831    let (tokens, err) = lex(
2832        source,
2833        span.start,
2834        &[b'\n', b'\r'],
2835        &[b'.', b'?', b'!'],
2836        true,
2837    );
2838    if let Some(err) = err {
2839        working_set.error(err)
2840    }
2841
2842    let mut tokens = tokens.into_iter().peekable();
2843    if let Some(head) = tokens.peek() {
2844        let bytes = working_set.get_span_contents(head.span);
2845        let (head, expect_dot) = if bytes.starts_with(b"(") {
2846            trace!("parsing: paren-head of full cell path");
2847
2848            let head_span = head.span;
2849            let mut start = head.span.start;
2850            let mut end = head.span.end;
2851            let mut is_closed = true;
2852
2853            if bytes.starts_with(b"(") {
2854                start += 1;
2855            }
2856            if bytes.ends_with(b")") {
2857                end -= 1;
2858            } else {
2859                working_set.error(ParseError::Unclosed(")".into(), Span::new(end, end)));
2860                is_closed = false;
2861            }
2862
2863            let span = Span::new(start, end);
2864
2865            let source = working_set.get_span_contents(span);
2866
2867            let (output, err) = lex(source, span.start, &[b'\n', b'\r'], &[], true);
2868            if let Some(err) = err {
2869                working_set.error(err)
2870            }
2871
2872            // Creating a Type scope to parse the new block. This will keep track of
2873            // the previous input type found in that block
2874            let output = parse_block(working_set, &output, span, is_closed, true);
2875
2876            let ty = output.output_type();
2877
2878            let block_id = working_set.add_block(Arc::new(output));
2879            tokens.next();
2880
2881            (
2882                Expression::new(working_set, Expr::Subexpression(block_id), head_span, ty),
2883                true,
2884            )
2885        } else if bytes.starts_with(b"[") {
2886            trace!("parsing: table head of full cell path");
2887
2888            let output = parse_table_expression(working_set, head.span, &SyntaxShape::Any);
2889
2890            tokens.next();
2891
2892            (output, true)
2893        } else if bytes.starts_with(b"{") {
2894            trace!("parsing: record head of full cell path");
2895            let output = parse_record(working_set, head.span);
2896
2897            tokens.next();
2898
2899            (output, true)
2900        } else if bytes.starts_with(b"$") {
2901            trace!("parsing: $variable head of full cell path");
2902
2903            let out = parse_variable_expr(working_set, head.span);
2904
2905            tokens.next();
2906
2907            (out, true)
2908        } else if let Some(var_id) = implicit_head {
2909            trace!("parsing: implicit head of full cell path");
2910            (
2911                Expression::new(working_set, Expr::Var(var_id), head.span, Type::Any),
2912                false,
2913            )
2914        } else {
2915            working_set.error(ParseError::Mismatch(
2916                "variable or subexpression".into(),
2917                String::from_utf8_lossy(bytes).to_string(),
2918                span,
2919            ));
2920            return garbage(working_set, span);
2921        };
2922
2923        let tail = parse_cell_path(working_set, tokens, expect_dot);
2924        let ty = if !tail.is_empty() {
2925            if nu_experimental::CELL_PATH_TYPES.get() {
2926                head.ty
2927                    .follow_cell_path(&tail)
2928                    .map(|ty| ty.into_owned())
2929                    .unwrap_or(Type::Any)
2930            } else {
2931                Type::Any
2932            }
2933        } else {
2934            head.ty.clone()
2935        };
2936
2937        Expression::new(
2938            working_set,
2939            Expr::FullCellPath(Box::new(FullCellPath { head, tail })),
2940            full_cell_span,
2941            ty,
2942        )
2943    } else {
2944        garbage(working_set, span)
2945    }
2946}
2947
2948enum PathLikeKind {
2949    Directory,
2950    Filepath,
2951    Glob,
2952}
2953
2954impl PathLikeKind {
2955    /// Returns the name used for trace logging during parsing.
2956    fn trace_name(&self) -> &'static str {
2957        match self {
2958            PathLikeKind::Directory => "directory",
2959            PathLikeKind::Filepath => "filepath",
2960            PathLikeKind::Glob => "glob pattern",
2961        }
2962    }
2963
2964    /// Returns the error message displayed when parsing fails.
2965    fn error_msg(&self) -> &'static str {
2966        match self {
2967            PathLikeKind::Directory => "directory",
2968            PathLikeKind::Filepath => "filepath",
2969            PathLikeKind::Glob => "glob pattern string",
2970        }
2971    }
2972
2973    /// Constructs the appropriate `Expr` and its corresponding `Type` for a simple (non-interpolated) path.
2974    fn to_expr(&self, token: String, quoted: bool) -> (Expr, Type) {
2975        match self {
2976            PathLikeKind::Directory => (Expr::Directory(token, quoted), Type::String),
2977            PathLikeKind::Filepath => (Expr::Filepath(token, quoted), Type::String),
2978            PathLikeKind::Glob => (Expr::GlobPattern(token, quoted), Type::Glob),
2979        }
2980    }
2981
2982    /// Constructs the appropriate interpolation `Expr` for a path containing subexpressions.
2983    fn to_interpolation_expr(&self, exprs: Vec<Expression>, quoted: bool) -> Expr {
2984        match self {
2985            PathLikeKind::Directory | PathLikeKind::Filepath => Expr::StringInterpolation(exprs),
2986            PathLikeKind::Glob => Expr::GlobInterpolation(exprs, quoted),
2987        }
2988    }
2989}
2990
2991/// Common helper for parsing path-like expressions (filepath, directory, glob pattern).
2992///
2993/// This function consolidates the repetitive logic for parsing path types, including:
2994/// - Bare word interpolation detection
2995/// - Escape sequence processing
2996/// - Quote state tracking
2997/// - Error handling
2998///
2999/// # Arguments
3000///
3001/// * `working_set` - The current parser state
3002/// * `span` - The source span of the expression
3003/// * `kind` - The kind of path-like expression to parse
3004fn parse_path_like(
3005    working_set: &mut StateWorkingSet,
3006    span: Span,
3007    kind: PathLikeKind,
3008) -> Expression {
3009    let bytes = working_set.get_span_contents(span);
3010    let quoted = is_quoted(bytes);
3011    trace!("parsing: {}", kind.trace_name());
3012
3013    // Check for bare word interpolation
3014    if is_bare_string_interpolation(bytes) {
3015        let interpolation_expr = parse_string_interpolation(working_set, span);
3016
3017        // Convert StringInterpolation to the appropriate interpolation type
3018        if let Expr::StringInterpolation(exprs) = interpolation_expr.expr {
3019            return Expression::new(
3020                working_set,
3021                kind.to_interpolation_expr(exprs, quoted),
3022                span,
3023                interpolation_expr.ty.clone(),
3024            );
3025        }
3026
3027        return interpolation_expr;
3028    }
3029
3030    let (token, err) = unescape_unquote_string(bytes, span);
3031    let is_quoted_internal = is_quoted(bytes);
3032
3033    if err.is_none() {
3034        trace!("-- found {token}");
3035
3036        let (expr, ty) = kind.to_expr(token, is_quoted_internal);
3037
3038        Expression::new(working_set, expr, span, ty)
3039    } else {
3040        working_set.error(ParseError::Expected(kind.error_msg(), span));
3041
3042        garbage(working_set, span)
3043    }
3044}
3045
3046fn is_bare_string_interpolation(bytes: &[u8]) -> bool {
3047    match bytes {
3048        [] => false,
3049        [b'\'' | b'"' | b'`', ..] => false,
3050        _ => bytes.contains(&b'('),
3051    }
3052}
3053
3054pub fn parse_directory(working_set: &mut StateWorkingSet, span: Span) -> Expression {
3055    parse_path_like(working_set, span, PathLikeKind::Directory)
3056}
3057
3058pub fn parse_filepath(working_set: &mut StateWorkingSet, span: Span) -> Expression {
3059    parse_path_like(working_set, span, PathLikeKind::Filepath)
3060}
3061
3062/// Parse a datetime type, eg '2022-02-02'
3063pub fn parse_datetime(working_set: &mut StateWorkingSet, span: Span) -> Expression {
3064    trace!("parsing: datetime");
3065
3066    let bytes = working_set.get_span_contents(span);
3067
3068    if bytes.len() < 6
3069        || !bytes[0].is_ascii_digit()
3070        || !bytes[1].is_ascii_digit()
3071        || !bytes[2].is_ascii_digit()
3072        || !bytes[3].is_ascii_digit()
3073        || bytes[4] != b'-'
3074    {
3075        working_set.error(ParseError::Expected("datetime", span));
3076        return garbage(working_set, span);
3077    }
3078
3079    let token = String::from_utf8_lossy(bytes).to_string();
3080
3081    if let Ok(datetime) = chrono::DateTime::parse_from_rfc3339(&token) {
3082        return Expression::new(working_set, Expr::DateTime(datetime), span, Type::Date);
3083    }
3084
3085    // Just the date
3086    let just_date = token.clone() + "T00:00:00+00:00";
3087    if let Ok(datetime) = chrono::DateTime::parse_from_rfc3339(&just_date) {
3088        return Expression::new(working_set, Expr::DateTime(datetime), span, Type::Date);
3089    }
3090
3091    // Date and time, assume UTC
3092    let datetime = token + "+00:00";
3093    if let Ok(datetime) = chrono::DateTime::parse_from_rfc3339(&datetime) {
3094        return Expression::new(working_set, Expr::DateTime(datetime), span, Type::Date);
3095    }
3096
3097    working_set.error(ParseError::Expected("datetime", span));
3098
3099    garbage(working_set, span)
3100}
3101
3102/// Parse a duration type, eg '10day'
3103pub fn parse_duration(working_set: &mut StateWorkingSet, span: Span) -> Expression {
3104    trace!("parsing: duration");
3105
3106    let bytes = working_set.get_span_contents(span);
3107
3108    match parse_unit_value(bytes, span, DURATION_UNIT_GROUPS, Type::Duration, |x| x) {
3109        Some(Ok(expr)) => {
3110            let span_id = working_set.add_span(span);
3111            expr.with_span_id(span_id)
3112        }
3113        Some(Err(mk_err_for)) => {
3114            working_set.error(mk_err_for("duration"));
3115            garbage(working_set, span)
3116        }
3117        None => {
3118            working_set.error(ParseError::Expected("duration with valid units", span));
3119            garbage(working_set, span)
3120        }
3121    }
3122}
3123
3124/// Parse a unit type, eg '10kb'
3125pub fn parse_filesize(working_set: &mut StateWorkingSet, span: Span) -> Expression {
3126    trace!("parsing: filesize");
3127
3128    let bytes = working_set.get_span_contents(span);
3129
3130    // the hex digit `b` might be mistaken for the unit `b`, so check that first
3131    if bytes.starts_with(b"0x") {
3132        working_set.error(ParseError::Expected("filesize with valid units", span));
3133        return garbage(working_set, span);
3134    }
3135
3136    match parse_unit_value(bytes, span, FILESIZE_UNIT_GROUPS, Type::Filesize, |x| {
3137        x.to_ascii_uppercase()
3138    }) {
3139        Some(Ok(expr)) => {
3140            let span_id = working_set.add_span(span);
3141            expr.with_span_id(span_id)
3142        }
3143        Some(Err(mk_err_for)) => {
3144            working_set.error(mk_err_for("filesize"));
3145            garbage(working_set, span)
3146        }
3147        None => {
3148            working_set.error(ParseError::Expected("filesize with valid units", span));
3149            garbage(working_set, span)
3150        }
3151    }
3152}
3153
3154type ParseUnitResult<'res> = Result<Expression, Box<dyn Fn(&'res str) -> ParseError>>;
3155type UnitGroup<'unit> = (Unit, &'unit str, Option<(Unit, i64)>);
3156
3157pub fn parse_unit_value<'res>(
3158    bytes: &[u8],
3159    span: Span,
3160    unit_groups: &[UnitGroup],
3161    ty: Type,
3162    transform: fn(String) -> String,
3163) -> Option<ParseUnitResult<'res>> {
3164    if bytes.len() < 2
3165        || !(bytes[0].is_ascii_digit()
3166            || bytes[0] == b'.'
3167            || (bytes[0] == b'-' && bytes[1].is_ascii_digit()))
3168    {
3169        return None;
3170    }
3171
3172    // Bail if not UTF-8
3173    let value = transform(str::from_utf8(bytes).ok()?.into());
3174
3175    if let Some((unit, name, convert)) = unit_groups.iter().find(|x| value.ends_with(x.1)) {
3176        let lhs_len = value.len() - name.len();
3177        let lhs = strip_underscores(&value.as_bytes()[..lhs_len]);
3178        let lhs_span = Span::new(span.start, span.start + lhs_len);
3179        let unit_span = Span::new(span.start + lhs_len, span.end);
3180        if lhs.ends_with('$') {
3181            // If `parse_unit_value` has higher precedence over `parse_range`,
3182            // a variable with the name of a unit could otherwise not be used as the end of a range.
3183            return None;
3184        }
3185
3186        let (decimal_part, number_part) = modf(match lhs.parse::<f64>() {
3187            Ok(it) => it,
3188            Err(_) => {
3189                let mk_err = move |name| {
3190                    ParseError::LabeledError(
3191                        format!("{name} value must be a number"),
3192                        "not a number".into(),
3193                        lhs_span,
3194                    )
3195                };
3196                return Some(Err(Box::new(mk_err)));
3197            }
3198        });
3199
3200        let mut unit = match convert {
3201            Some(convert_to) => convert_to.0,
3202            None => *unit,
3203        };
3204
3205        let num_float = match convert {
3206            Some(convert_to) => {
3207                (number_part * convert_to.1 as f64) + (decimal_part * convert_to.1 as f64)
3208            }
3209            None => number_part,
3210        };
3211
3212        // Convert all durations to nanoseconds, and filesizes to bytes,
3213        // to minimize loss of precision
3214        let factor = match ty {
3215            Type::Filesize => unit_to_byte_factor(&unit),
3216            Type::Duration => unit_to_ns_factor(&unit),
3217            _ => None,
3218        };
3219
3220        let num = match factor {
3221            Some(factor) => {
3222                let num_base = num_float * factor;
3223                if i64::MIN as f64 <= num_base && num_base <= i64::MAX as f64 {
3224                    unit = if ty == Type::Filesize {
3225                        Unit::Filesize(FilesizeUnit::B)
3226                    } else {
3227                        Unit::Nanosecond
3228                    };
3229                    num_base as i64
3230                } else {
3231                    // not safe to convert, because of the overflow
3232                    num_float as i64
3233                }
3234            }
3235            None => num_float as i64,
3236        };
3237
3238        trace!("-- found {num} {unit:?}");
3239        let value = ValueWithUnit {
3240            expr: Expression::new_unknown(Expr::Int(num), lhs_span, Type::Number),
3241            unit: Spanned {
3242                item: unit,
3243                span: unit_span,
3244            },
3245        };
3246        let expr = Expression::new_unknown(Expr::ValueWithUnit(Box::new(value)), span, ty);
3247
3248        Some(Ok(expr))
3249    } else {
3250        None
3251    }
3252}
3253
3254pub const FILESIZE_UNIT_GROUPS: &[UnitGroup] = &[
3255    (
3256        Unit::Filesize(FilesizeUnit::KB),
3257        "KB",
3258        Some((Unit::Filesize(FilesizeUnit::B), 1000)),
3259    ),
3260    (
3261        Unit::Filesize(FilesizeUnit::MB),
3262        "MB",
3263        Some((Unit::Filesize(FilesizeUnit::KB), 1000)),
3264    ),
3265    (
3266        Unit::Filesize(FilesizeUnit::GB),
3267        "GB",
3268        Some((Unit::Filesize(FilesizeUnit::MB), 1000)),
3269    ),
3270    (
3271        Unit::Filesize(FilesizeUnit::TB),
3272        "TB",
3273        Some((Unit::Filesize(FilesizeUnit::GB), 1000)),
3274    ),
3275    (
3276        Unit::Filesize(FilesizeUnit::PB),
3277        "PB",
3278        Some((Unit::Filesize(FilesizeUnit::TB), 1000)),
3279    ),
3280    (
3281        Unit::Filesize(FilesizeUnit::EB),
3282        "EB",
3283        Some((Unit::Filesize(FilesizeUnit::PB), 1000)),
3284    ),
3285    (
3286        Unit::Filesize(FilesizeUnit::KiB),
3287        "KIB",
3288        Some((Unit::Filesize(FilesizeUnit::B), 1024)),
3289    ),
3290    (
3291        Unit::Filesize(FilesizeUnit::MiB),
3292        "MIB",
3293        Some((Unit::Filesize(FilesizeUnit::KiB), 1024)),
3294    ),
3295    (
3296        Unit::Filesize(FilesizeUnit::GiB),
3297        "GIB",
3298        Some((Unit::Filesize(FilesizeUnit::MiB), 1024)),
3299    ),
3300    (
3301        Unit::Filesize(FilesizeUnit::TiB),
3302        "TIB",
3303        Some((Unit::Filesize(FilesizeUnit::GiB), 1024)),
3304    ),
3305    (
3306        Unit::Filesize(FilesizeUnit::PiB),
3307        "PIB",
3308        Some((Unit::Filesize(FilesizeUnit::TiB), 1024)),
3309    ),
3310    (
3311        Unit::Filesize(FilesizeUnit::EiB),
3312        "EIB",
3313        Some((Unit::Filesize(FilesizeUnit::PiB), 1024)),
3314    ),
3315    (Unit::Filesize(FilesizeUnit::B), "B", None),
3316];
3317
3318pub const DURATION_UNIT_GROUPS: &[UnitGroup] = &[
3319    (Unit::Nanosecond, "ns", None),
3320    // todo start adding aliases for duration units here
3321    (Unit::Microsecond, "us", Some((Unit::Nanosecond, 1000))),
3322    (
3323        // µ Micro Sign
3324        Unit::Microsecond,
3325        "\u{00B5}s",
3326        Some((Unit::Nanosecond, 1000)),
3327    ),
3328    (
3329        // μ Greek small letter Mu
3330        Unit::Microsecond,
3331        "\u{03BC}s",
3332        Some((Unit::Nanosecond, 1000)),
3333    ),
3334    (Unit::Millisecond, "ms", Some((Unit::Microsecond, 1000))),
3335    (Unit::Second, "sec", Some((Unit::Millisecond, 1000))),
3336    (Unit::Minute, "min", Some((Unit::Second, 60))),
3337    (Unit::Hour, "hr", Some((Unit::Minute, 60))),
3338    (Unit::Day, "day", Some((Unit::Minute, 1440))),
3339    (Unit::Week, "wk", Some((Unit::Day, 7))),
3340];
3341
3342fn unit_to_ns_factor(unit: &Unit) -> Option<f64> {
3343    match unit {
3344        Unit::Nanosecond => Some(1.0),
3345        Unit::Microsecond => Some(1_000.0),
3346        Unit::Millisecond => Some(1_000_000.0),
3347        Unit::Second => Some(1_000_000_000.0),
3348        Unit::Minute => Some(60.0 * 1_000_000_000.0),
3349        Unit::Hour => Some(60.0 * 60.0 * 1_000_000_000.0),
3350        Unit::Day => Some(24.0 * 60.0 * 60.0 * 1_000_000_000.0),
3351        Unit::Week => Some(7.0 * 24.0 * 60.0 * 60.0 * 1_000_000_000.0),
3352        _ => None,
3353    }
3354}
3355
3356fn unit_to_byte_factor(unit: &Unit) -> Option<f64> {
3357    match unit {
3358        Unit::Filesize(FilesizeUnit::B) => Some(1.0),
3359        Unit::Filesize(FilesizeUnit::KB) => Some(1_000.0),
3360        Unit::Filesize(FilesizeUnit::MB) => Some(1_000_000.0),
3361        Unit::Filesize(FilesizeUnit::GB) => Some(1_000_000_000.0),
3362        Unit::Filesize(FilesizeUnit::TB) => Some(1_000_000_000_000.0),
3363        Unit::Filesize(FilesizeUnit::PB) => Some(1_000_000_000_000_000.0),
3364        Unit::Filesize(FilesizeUnit::EB) => Some(1_000_000_000_000_000_000.0),
3365        Unit::Filesize(FilesizeUnit::KiB) => Some(1024.0),
3366        Unit::Filesize(FilesizeUnit::MiB) => Some(1024.0 * 1024.0),
3367        Unit::Filesize(FilesizeUnit::GiB) => Some(1024.0 * 1024.0 * 1024.0),
3368        Unit::Filesize(FilesizeUnit::TiB) => Some(1024.0 * 1024.0 * 1024.0 * 1024.0),
3369        Unit::Filesize(FilesizeUnit::PiB) => Some(1024.0 * 1024.0 * 1024.0 * 1024.0 * 1024.0),
3370        Unit::Filesize(FilesizeUnit::EiB) => {
3371            Some(1024.0 * 1024.0 * 1024.0 * 1024.0 * 1024.0 * 1024.0)
3372        }
3373        _ => None,
3374    }
3375}
3376
3377// Borrowed from libm at https://github.com/rust-lang/libm/blob/master/src/math/modf.rs
3378fn modf(x: f64) -> (f64, f64) {
3379    let rv2: f64;
3380    let mut u = x.to_bits();
3381    let e = (((u >> 52) & 0x7ff) as i32) - 0x3ff;
3382
3383    /* no fractional part */
3384    if e >= 52 {
3385        rv2 = x;
3386        if e == 0x400 && (u << 12) != 0 {
3387            /* nan */
3388            return (x, rv2);
3389        }
3390        u &= 1 << 63;
3391        return (f64::from_bits(u), rv2);
3392    }
3393
3394    /* no integral part*/
3395    if e < 0 {
3396        u &= 1 << 63;
3397        rv2 = f64::from_bits(u);
3398        return (x, rv2);
3399    }
3400
3401    let mask = ((!0) >> 12) >> e;
3402    if (u & mask) == 0 {
3403        rv2 = x;
3404        u &= 1 << 63;
3405        return (f64::from_bits(u), rv2);
3406    }
3407    u &= !mask;
3408    rv2 = f64::from_bits(u);
3409    (x - rv2, rv2)
3410}
3411
3412pub fn parse_glob_pattern(working_set: &mut StateWorkingSet, span: Span) -> Expression {
3413    parse_path_like(working_set, span, PathLikeKind::Glob)
3414}
3415
3416/// Parse a hex escape sequence in the form `\xHH` (exactly 2 hex digits).
3417///
3418/// Returns the parsed byte value and the new index position.
3419fn parse_hex_escape(bytes: &[u8], start_idx: usize, span: Span) -> Result<(u8, usize), ParseError> {
3420    let hex_digits = bytes.get(start_idx + 1..start_idx + 3).ok_or_else(|| {
3421        ParseError::InvalidLiteral(
3422            "incomplete hex escape '\\xHH', expected 2 hex digits".into(),
3423            "string".into(),
3424            Span::new(span.start + start_idx, span.end),
3425        )
3426    })?;
3427    if !hex_digits.iter().all(u8::is_ascii_hexdigit) {
3428        return Err(ParseError::InvalidLiteral(
3429            "invalid hex escape '\\xHH', expected exactly 2 hex digits".into(),
3430            "string".into(),
3431            Span::new(span.start + start_idx, span.end),
3432        ));
3433    }
3434    str::from_utf8(hex_digits)
3435        .ok()
3436        .and_then(|s| u8::from_str_radix(s, 0x10).ok())
3437        .map(|byte_val| (byte_val, start_idx + 3))
3438        .ok_or_else(|| {
3439            ParseError::InvalidLiteral(
3440                "invalid hex escape '\\xHH'".into(),
3441                "string".into(),
3442                Span::new(span.start + start_idx, span.end),
3443            )
3444        })
3445}
3446
3447/// Parse a Unicode escape sequence in the form `\u{XXXXXX}` (1-6 hex digits, max 0x10FFFF).
3448///
3449/// Returns the UTF-8 encoded bytes of the Unicode character and the new index position.
3450fn parse_unicode_escape(
3451    bytes: &[u8],
3452    start_idx: usize,
3453    span: Span,
3454) -> Result<(char, usize), ParseError> {
3455    let mut slice = &bytes[(start_idx + 1)..];
3456    let mut current_idx = start_idx + 1;
3457
3458    // NOTE: this is a more defensive approach meant to avoid reading too much, but requires
3459    //       changing error messages
3460    // read no more than 8 bytes "{xxxxxx}"
3461    // slice = &slice[..(8.min(slice.len()))];
3462
3463    slice = slice.strip_prefix(b"{").ok_or_else(|| {
3464        ParseError::InvalidLiteral(
3465            "invalid unicode escape '\\u{...}', must be 1-6 hex digits, max codepoint 0x10FFFF"
3466                .into(),
3467            "string".into(),
3468            Span::new(span.start + start_idx, span.end),
3469        )
3470    })?;
3471    current_idx += 1;
3472
3473    let end = slice.iter().position(|b| *b == b'}').ok_or_else(|| {
3474        ParseError::InvalidLiteral(
3475            "incomplete unicode escape '\\u{...}', missing closing '}'".into(),
3476            "string".into(),
3477            Span::new(span.start + start_idx, span.end),
3478        )
3479    })?;
3480    let digits = &slice[..end];
3481    current_idx += end; // the digits
3482    current_idx += 1; // closing brace
3483    let current_idx = current_idx;
3484
3485    let ch = Some(digits)
3486        .filter(|b| (1..=6).contains(&b.len()))
3487        .and_then(|b| str::from_utf8(b).ok())
3488        .and_then(|s| u32::from_str_radix(s, 0x10).ok())
3489        .and_then(char::from_u32)
3490        .ok_or_else(|| {
3491            ParseError::InvalidLiteral(
3492                "invalid unicode escape '\\u{...}', must be 1-6 hex digits, max codepoint 0x10FFFF"
3493                    .into(),
3494                "string".into(),
3495                Span::new(span.start + start_idx, span.end),
3496            )
3497        })?;
3498
3499    Ok((ch, current_idx))
3500}
3501
3502/// Parse and process POSIX escape sequences in a byte string.
3503///
3504/// This function handles the following escape sequences:
3505/// - Simple: `\n`, `\r`, `\t`, `\\`, `\"`, `\'`
3506/// - Control: `\0`, `\a`, `\b`, `\e`, `\f`
3507/// - Hex: `\xHH` (exactly 2 hex digits)
3508/// - Unicode: `\u{XXXXXX}` (1-6 hex digits, max 0x10FFFF)
3509/// - Special: `\/`, `\(`, `\)`, `\{`, `\}`, `\$`, `\^`, `\#`, `\|`, `\~`
3510///
3511/// The function processes escapes in a single pass. If no backslashes are present,
3512/// the input is returned as-is for efficiency.
3513///
3514/// # Returns
3515///
3516/// A tuple of `(processed_bytes, parse_error)` where:
3517/// - `processed_bytes` contains the unescaped content
3518/// - `parse_error` is `Some` if an invalid escape sequence was encountered, `None` otherwise
3519pub fn unescape_string(bytes: &[u8], span: Span) -> (Vec<u8>, Option<ParseError>) {
3520    let mut output = Vec::new();
3521    let mut error = None;
3522
3523    let mut idx = 0;
3524
3525    if !bytes.contains(&b'\\') {
3526        return (bytes.to_vec(), None);
3527    }
3528
3529    'us_loop: while idx < bytes.len() {
3530        if bytes[idx] == b'\\' {
3531            // We're in an escape
3532            idx += 1;
3533
3534            match bytes.get(idx) {
3535                Some(b'"') => {
3536                    output.push(b'"');
3537                    idx += 1;
3538                }
3539                Some(b'\'') => {
3540                    output.push(b'\'');
3541                    idx += 1;
3542                }
3543                Some(b'\\') => {
3544                    output.push(b'\\');
3545                    idx += 1;
3546                }
3547                Some(b'/') => {
3548                    output.push(b'/');
3549                    idx += 1;
3550                }
3551                Some(b'(') => {
3552                    output.push(b'(');
3553                    idx += 1;
3554                }
3555                Some(b')') => {
3556                    output.push(b')');
3557                    idx += 1;
3558                }
3559                Some(b'{') => {
3560                    output.push(b'{');
3561                    idx += 1;
3562                }
3563                Some(b'}') => {
3564                    output.push(b'}');
3565                    idx += 1;
3566                }
3567                Some(b'$') => {
3568                    output.push(b'$');
3569                    idx += 1;
3570                }
3571                Some(b'^') => {
3572                    output.push(b'^');
3573                    idx += 1;
3574                }
3575                Some(b'#') => {
3576                    output.push(b'#');
3577                    idx += 1;
3578                }
3579                Some(b'|') => {
3580                    output.push(b'|');
3581                    idx += 1;
3582                }
3583                Some(b'~') => {
3584                    output.push(b'~');
3585                    idx += 1;
3586                }
3587                Some(b'a') => {
3588                    output.push(0x7);
3589                    idx += 1;
3590                }
3591                Some(b'b') => {
3592                    output.push(0x8);
3593                    idx += 1;
3594                }
3595                Some(b'e') => {
3596                    output.push(0x1b);
3597                    idx += 1;
3598                }
3599                Some(b'f') => {
3600                    output.push(0xc);
3601                    idx += 1;
3602                }
3603                Some(b'n') => {
3604                    output.push(b'\n');
3605                    idx += 1;
3606                }
3607                Some(b'r') => {
3608                    output.push(b'\r');
3609                    idx += 1;
3610                }
3611                Some(b't') => {
3612                    output.push(b'\t');
3613                    idx += 1;
3614                }
3615                Some(b'0') => {
3616                    output.push(b'\0');
3617                    idx += 1;
3618                }
3619                Some(b'x') => {
3620                    // Hex escape: \xHH (exactly 2 hex digits)
3621                    match parse_hex_escape(bytes, idx, span) {
3622                        Ok((byte_val, new_idx)) => {
3623                            output.push(byte_val);
3624                            idx = new_idx;
3625                        }
3626                        Err(err) => {
3627                            error = error.or(Some(err));
3628                            break 'us_loop;
3629                        }
3630                    }
3631                }
3632                Some(b'u') => {
3633                    // Unicode escape: \u{XXXXXX} (1-6 hex digits, max 0x10FFFF)
3634                    match parse_unicode_escape(bytes, idx, span) {
3635                        Ok((ch, new_idx)) => {
3636                            let mut ch_buf = [0u8; 4];
3637                            output.extend(ch.encode_utf8(&mut ch_buf).as_bytes());
3638                            idx = new_idx;
3639                        }
3640                        Err(err) => {
3641                            error = error.or(Some(err));
3642                            break 'us_loop;
3643                        }
3644                    }
3645                }
3646
3647                Some(other) => {
3648                    error = error.or(Some(ParseError::InvalidLiteral(
3649                        format!("unrecognized escape sequence '\\{}'", *other as char),
3650                        "string".into(),
3651                        Span::new(span.start + idx, span.end),
3652                    )));
3653                    break 'us_loop;
3654                }
3655                None => {
3656                    error = error.or(Some(ParseError::InvalidLiteral(
3657                        "incomplete escape sequence after '\\'".into(),
3658                        "string".into(),
3659                        Span::new(span.end.saturating_sub(1), span.end),
3660                    )));
3661                    break 'us_loop;
3662                }
3663            }
3664        } else {
3665            output.push(bytes[idx]);
3666            idx += 1;
3667        }
3668    }
3669
3670    (output, error)
3671}
3672
3673/// Unescapes and unquotes a string, returning the content and any parse errors.
3674///
3675/// This function handles both quoted and unquoted strings, processing POSIX escape
3676/// sequences only within double-quoted strings. Single-quoted and unquoted strings
3677/// are returned as-is after removing their delimiters.
3678///
3679/// # Returns
3680///
3681/// A tuple of `(unescaped_string, parse_error)` where:
3682/// - `unescaped_string` contains the processed content
3683/// - `parse_error` is `Some` if an invalid escape sequence was encountered, `None` otherwise
3684pub fn unescape_unquote_string(bytes: &[u8], span: Span) -> (String, Option<ParseError>) {
3685    if bytes.starts_with(b"\"") {
3686        // Needs unescaping
3687        let bytes = trim_quotes(bytes);
3688
3689        let (bytes, err) = unescape_string(bytes, span);
3690
3691        if let Ok(token) = String::from_utf8(bytes) {
3692            (token, err)
3693        } else {
3694            (String::new(), Some(ParseError::Expected("string", span)))
3695        }
3696    } else {
3697        let bytes = trim_quotes(bytes);
3698
3699        if let Ok(token) = String::from_utf8(bytes.into()) {
3700            (token, None)
3701        } else {
3702            (String::new(), Some(ParseError::Expected("string", span)))
3703        }
3704    }
3705}
3706
3707fn check_string_no_trailing_tokens(
3708    bytes: &[u8],
3709    span: Span,
3710    opening_quote_pos: usize,
3711    quote: u8,
3712) -> Result<(), ParseError> {
3713    let pos = bytes
3714        .iter()
3715        .rposition(|ch| *ch == quote)
3716        .expect("string begins with quote");
3717    if pos == bytes.len() - 1 {
3718        Ok(())
3719    } else if pos == opening_quote_pos {
3720        // this may look like an error, but it's not:
3721        // some code, like completions, requires allowing
3722        // unterminated strings at this stage.
3723        Ok(())
3724    } else {
3725        let span = Span::new(span.start + pos + 1, span.end);
3726        Err(ParseError::ExtraTokensAfterClosingDelimiter(span))
3727    }
3728}
3729
3730pub fn parse_string(working_set: &mut StateWorkingSet, span: Span) -> Expression {
3731    trace!("parsing: string");
3732
3733    let bytes = working_set.get_span_contents(span);
3734
3735    if bytes.is_empty() {
3736        working_set.error(ParseError::Expected("String", span));
3737        return Expression::garbage(working_set, span);
3738    }
3739
3740    // Check for bare word interpolation
3741    if is_bare_string_interpolation(bytes) {
3742        return parse_string_interpolation(working_set, span);
3743    }
3744
3745    // Check for unbalanced quotes:
3746    for quote in [b'\"', b'\''] {
3747        if bytes[0] == quote
3748            && let Err(err) = check_string_no_trailing_tokens(bytes, span, 0, quote)
3749        {
3750            working_set.error(err);
3751            return garbage(working_set, span);
3752        }
3753    }
3754
3755    let (s, err) = unescape_unquote_string(bytes, span);
3756    if let Some(err) = err {
3757        working_set.error(err);
3758    }
3759
3760    Expression::new(working_set, Expr::String(s), span, Type::String)
3761}
3762
3763/// Check if a byte sequence is quoted with either single or double quotes.
3764///
3765/// Returns `true` if the bytes start and end with matching quotes (either `"` or `'`)
3766/// and have at least one character between them.
3767fn is_quoted(bytes: &[u8]) -> bool {
3768    matches!(bytes, [b'\'', .., b'\''] | [b'"', .., b'"'])
3769}
3770
3771pub fn parse_string_strict(working_set: &mut StateWorkingSet, span: Span) -> Expression {
3772    trace!("parsing: string, with required delimiters");
3773
3774    let bytes = working_set.get_span_contents(span);
3775
3776    // Check for unbalanced quotes:
3777    {
3778        let bytes = if bytes.starts_with(b"$") {
3779            &bytes[1..]
3780        } else {
3781            bytes
3782        };
3783        if bytes.starts_with(b"\"") && (bytes.len() == 1 || !bytes.ends_with(b"\"")) {
3784            working_set.error(ParseError::Unclosed("\"".into(), span));
3785            return garbage(working_set, span);
3786        }
3787        if bytes.starts_with(b"\'") && (bytes.len() == 1 || !bytes.ends_with(b"\'")) {
3788            working_set.error(ParseError::Unclosed("\'".into(), span));
3789            return garbage(working_set, span);
3790        }
3791        if bytes.starts_with(b"r#") && (bytes.len() == 1 || !bytes.ends_with(b"#")) {
3792            working_set.error(ParseError::Unclosed("r#".into(), span));
3793            return garbage(working_set, span);
3794        }
3795    }
3796
3797    let (bytes, quoted) = if (bytes.starts_with(b"\"") && bytes.ends_with(b"\"") && bytes.len() > 1)
3798        || (bytes.starts_with(b"\'") && bytes.ends_with(b"\'") && bytes.len() > 1)
3799    {
3800        (&bytes[1..(bytes.len() - 1)], true)
3801    } else if (bytes.starts_with(b"$\"") && bytes.ends_with(b"\"") && bytes.len() > 2)
3802        || (bytes.starts_with(b"$\'") && bytes.ends_with(b"\'") && bytes.len() > 2)
3803    {
3804        (&bytes[2..(bytes.len() - 1)], true)
3805    } else {
3806        (bytes, false)
3807    };
3808
3809    if let Ok(token) = String::from_utf8(bytes.into()) {
3810        trace!("-- found {token}");
3811
3812        if quoted {
3813            Expression::new(working_set, Expr::String(token), span, Type::String)
3814        } else if token.contains(' ') {
3815            working_set.error(ParseError::Expected("string", span));
3816
3817            garbage(working_set, span)
3818        } else {
3819            Expression::new(working_set, Expr::String(token), span, Type::String)
3820        }
3821    } else {
3822        working_set.error(ParseError::Expected("string", span));
3823        garbage(working_set, span)
3824    }
3825}
3826
3827pub fn parse_import_pattern<'a>(
3828    working_set: &mut StateWorkingSet,
3829    mut arg_iter: impl Iterator<Item = &'a Expression>,
3830    spans: &[Span],
3831) -> Expression {
3832    let Some(head_expr) = arg_iter.next() else {
3833        working_set.error(ParseError::WrongImportPattern(
3834            "needs at least one component of import pattern".to_string(),
3835            Span::concat(spans),
3836        ));
3837        return garbage(working_set, Span::concat(spans));
3838    };
3839
3840    let (maybe_module_id, head_name) = match eval_constant(working_set, head_expr) {
3841        Ok(Value::Nothing { .. }) => {
3842            return Expression::new(
3843                working_set,
3844                Expr::Nothing,
3845                Span::concat(spans),
3846                Type::Nothing,
3847            );
3848        }
3849        Ok(val) => match val.coerce_into_string() {
3850            Ok(s) => (working_set.find_module(s.as_bytes()), s.into_bytes()),
3851            Err(err) => {
3852                working_set.error(err.wrap(working_set, Span::concat(spans)));
3853                return garbage(working_set, Span::concat(spans));
3854            }
3855        },
3856        Err(err) => {
3857            working_set.error(err.wrap(working_set, Span::concat(spans)));
3858            return garbage(working_set, Span::concat(spans));
3859        }
3860    };
3861
3862    let mut import_pattern = ImportPattern {
3863        head: ImportPatternHead {
3864            name: head_name,
3865            id: maybe_module_id,
3866            span: head_expr.span,
3867        },
3868        members: vec![],
3869        hidden: HashSet::new(),
3870        constants: vec![],
3871    };
3872
3873    let mut leaf_member_expr: Option<(&str, Span)> = None;
3874
3875    // TODO: box pattern syntax is experimental @rust v1.89.0
3876    let handle_list_items =
3877        |items: &Vec<ListItem>,
3878         span,
3879         working_set: &mut StateWorkingSet<'_>,
3880         import_pattern: &mut ImportPattern,
3881         leaf_member_expr: &mut Option<(&str, Span)>| {
3882            let mut output = vec![];
3883
3884            for item in items.iter() {
3885                match item {
3886                    ListItem::Item(expr) => {
3887                        if let Some(name) = expr.as_string() {
3888                            output.push((name.as_bytes().to_vec(), expr.span));
3889                        }
3890                    }
3891                    ListItem::Spread(_, spread) => {
3892                        working_set.error(ParseError::WrongImportPattern(
3893                            "cannot spread in an import pattern".into(),
3894                            spread.span,
3895                        ))
3896                    }
3897                }
3898            }
3899
3900            import_pattern
3901                .members
3902                .push(ImportPatternMember::List { names: output });
3903
3904            *leaf_member_expr = Some(("list", span));
3905        };
3906
3907    for tail_expr in arg_iter {
3908        if let Some((what, prev_span)) = leaf_member_expr {
3909            working_set.error(ParseError::WrongImportPattern(
3910                format!("{what} member can be only at the end of an import pattern"),
3911                prev_span,
3912            ));
3913            return Expression::new(
3914                working_set,
3915                Expr::ImportPattern(Box::new(import_pattern)),
3916                prev_span,
3917                Type::List(Box::new(Type::String)),
3918            );
3919        }
3920
3921        match &tail_expr.expr {
3922            Expr::String(name) => {
3923                let span = tail_expr.span;
3924                if name == "*" {
3925                    import_pattern
3926                        .members
3927                        .push(ImportPatternMember::Glob { span });
3928
3929                    leaf_member_expr = Some(("glob", span));
3930                } else {
3931                    import_pattern.members.push(ImportPatternMember::Name {
3932                        name: name.as_bytes().to_vec(),
3933                        span,
3934                    });
3935                }
3936            }
3937            Expr::FullCellPath(fcp) => {
3938                if let Expr::List(items) = &fcp.head.expr {
3939                    handle_list_items(
3940                        items,
3941                        fcp.head.span,
3942                        working_set,
3943                        &mut import_pattern,
3944                        &mut leaf_member_expr,
3945                    );
3946                }
3947            }
3948            Expr::List(items) => {
3949                handle_list_items(
3950                    items,
3951                    tail_expr.span,
3952                    working_set,
3953                    &mut import_pattern,
3954                    &mut leaf_member_expr,
3955                );
3956            }
3957            _ => {
3958                working_set.error(ParseError::WrongImportPattern(
3959                    "Wrong type of import pattern, only String and List<String> are allowed."
3960                        .into(),
3961                    tail_expr.span,
3962                ));
3963            }
3964        };
3965    }
3966
3967    Expression::new(
3968        working_set,
3969        Expr::ImportPattern(Box::new(import_pattern)),
3970        Span::concat(&spans[1..]),
3971        Type::List(Box::new(Type::String)),
3972    )
3973}
3974
3975/// Parse `spans[spans_idx..]` into a variable, with optional type annotation.
3976/// If the name of the variable ends with a colon (no space in-between allowed), then a type annotation
3977/// can appear after the variable, in which case the colon is stripped from the name of the variable.
3978/// `spans_idx` is updated to point to the last span that has been parsed.
3979pub fn parse_var_with_opt_type(
3980    working_set: &mut StateWorkingSet,
3981    spans: &[Span],
3982    spans_idx: &mut usize,
3983    mutable: bool,
3984) -> (Expression, Option<Type>) {
3985    let name_span = spans[*spans_idx];
3986    let bytes = working_set.get_span_contents(name_span).to_vec();
3987
3988    if bytes.contains(&b' ')
3989        || bytes.contains(&b'"')
3990        || bytes.contains(&b'\'')
3991        || bytes.contains(&b'`')
3992    {
3993        working_set.error(ParseError::VariableNotValid(spans[*spans_idx]));
3994        return (garbage(working_set, spans[*spans_idx]), None);
3995    }
3996
3997    if bytes.ends_with(b":") {
3998        let name_span = Span::new(name_span.start, name_span.end - 1);
3999        let var_name = bytes[0..(bytes.len() - 1)].to_vec();
4000
4001        // We end with colon, so the next span should be the type
4002        if *spans_idx + 1 < spans.len() {
4003            *spans_idx += 1;
4004            // signature like record<a: int b: int> is broken into multiple spans due to
4005            // whitespaces. Collect the rest into one span and work on it
4006            let full_span = Span::concat(&spans[*spans_idx..]);
4007            let type_bytes = working_set.get_span_contents(full_span).to_vec();
4008
4009            let (tokens, parse_error) =
4010                lex_signature(&type_bytes, full_span.start, &[], &[b','], true);
4011
4012            if let Some(parse_error) = parse_error {
4013                working_set.error(parse_error);
4014            }
4015
4016            let ty = parse_type(working_set, &type_bytes, tokens[0].span);
4017            *spans_idx = spans.len() - 1;
4018
4019            if !is_variable(&var_name) {
4020                working_set.error(ParseError::Expected(
4021                    "valid variable name",
4022                    spans[*spans_idx - 1],
4023                ));
4024                return (garbage(working_set, spans[*spans_idx - 1]), None);
4025            }
4026
4027            ensure_not_reserved_variable_name(working_set, &var_name, name_span);
4028
4029            let id = working_set.add_variable(var_name, spans[*spans_idx - 1], ty.clone(), mutable);
4030
4031            (
4032                Expression::new(working_set, Expr::VarDecl(id), name_span, ty.clone()),
4033                Some(ty),
4034            )
4035        } else {
4036            if !is_variable(&var_name) {
4037                working_set.error(ParseError::Expected(
4038                    "valid variable name",
4039                    spans[*spans_idx],
4040                ));
4041                return (garbage(working_set, spans[*spans_idx]), None);
4042            }
4043
4044            ensure_not_reserved_variable_name(working_set, &var_name, name_span);
4045
4046            let id = working_set.add_variable(var_name, spans[*spans_idx], Type::Any, mutable);
4047
4048            working_set.error(ParseError::MissingType(spans[*spans_idx]));
4049            (
4050                Expression::new(working_set, Expr::VarDecl(id), spans[*spans_idx], Type::Any),
4051                None,
4052            )
4053        }
4054    } else {
4055        let var_name = bytes;
4056
4057        if !is_variable(&var_name) {
4058            working_set.error(ParseError::Expected(
4059                "valid variable name",
4060                spans[*spans_idx],
4061            ));
4062            return (garbage(working_set, spans[*spans_idx]), None);
4063        }
4064
4065        ensure_not_reserved_variable_name(working_set, &var_name, name_span);
4066
4067        let id = working_set.add_variable(
4068            var_name,
4069            Span::concat(&spans[*spans_idx..*spans_idx + 1]),
4070            Type::Any,
4071            mutable,
4072        );
4073
4074        (
4075            Expression::new(working_set, Expr::VarDecl(id), spans[*spans_idx], Type::Any),
4076            None,
4077        )
4078    }
4079}
4080
4081const RESERVED_VARIABLE_NAMES: [&[u8]; 3] = [b"in", b"nu", b"env"];
4082
4083pub(crate) fn ensure_not_reserved_variable_name(
4084    working_set: &mut StateWorkingSet,
4085    name: &[u8],
4086    span: Span,
4087) {
4088    let var_name = name.strip_prefix(b"$").unwrap_or(name);
4089
4090    if RESERVED_VARIABLE_NAMES.contains(&var_name) {
4091        working_set.error(ParseError::NameIsBuiltinVar(
4092            String::from_utf8_lossy(var_name).to_string(),
4093            span,
4094        ))
4095    }
4096}
4097
4098pub fn expand_to_cell_path(
4099    working_set: &mut StateWorkingSet,
4100    expression: &mut Expression,
4101    var_id: VarId,
4102) {
4103    trace!("parsing: expanding to cell path");
4104    if let Expression {
4105        expr: Expr::String(_),
4106        span,
4107        ..
4108    } = expression
4109    {
4110        // Re-parse the string as if it were a cell-path
4111        let new_expression = parse_full_cell_path(working_set, Some(var_id), *span);
4112
4113        *expression = new_expression;
4114    }
4115
4116    if let Expression {
4117        expr: Expr::UnaryNot(inner),
4118        ..
4119    } = expression
4120    {
4121        expand_to_cell_path(working_set, inner, var_id);
4122    }
4123}
4124
4125pub fn parse_input_output_types(
4126    working_set: &mut StateWorkingSet,
4127    spans: &[Span],
4128) -> Vec<(Type, Type)> {
4129    let mut full_span = Span::concat(spans);
4130
4131    let mut bytes = working_set.get_span_contents(full_span);
4132
4133    if bytes.starts_with(b"[") {
4134        bytes = &bytes[1..];
4135        full_span.start += 1;
4136    }
4137
4138    if bytes.ends_with(b"]") {
4139        bytes = &bytes[..(bytes.len() - 1)];
4140        full_span.end -= 1;
4141    }
4142
4143    let (tokens, parse_error) =
4144        lex_signature(bytes, full_span.start, &[b'\n', b'\r', b','], &[], true);
4145
4146    if let Some(parse_error) = parse_error {
4147        working_set.error(parse_error);
4148    }
4149
4150    let mut output = vec![];
4151
4152    let mut idx = 0;
4153    while idx < tokens.len() {
4154        let type_bytes = working_set.get_span_contents(tokens[idx].span).to_vec();
4155        let input_type = parse_type(working_set, &type_bytes, tokens[idx].span);
4156
4157        idx += 1;
4158        if idx >= tokens.len() {
4159            working_set.error(ParseError::Expected(
4160                "arrow (->)",
4161                Span::new(tokens[idx - 1].span.end, tokens[idx - 1].span.end),
4162            ));
4163            break;
4164        }
4165
4166        let arrow = working_set.get_span_contents(tokens[idx].span);
4167        if arrow != b"->" {
4168            working_set.error(ParseError::Expected("arrow (->)", tokens[idx].span));
4169        }
4170
4171        idx += 1;
4172        if idx >= tokens.len() {
4173            working_set.error(ParseError::MissingType(Span::new(
4174                tokens[idx - 1].span.end,
4175                tokens[idx - 1].span.end,
4176            )));
4177            break;
4178        }
4179
4180        let type_bytes = working_set.get_span_contents(tokens[idx].span).to_vec();
4181        let output_type = parse_type(working_set, &type_bytes, tokens[idx].span);
4182
4183        output.push((input_type, output_type));
4184
4185        idx += 1;
4186    }
4187
4188    output
4189}
4190
4191pub fn parse_full_signature(
4192    working_set: &mut StateWorkingSet,
4193    spans: &[Span],
4194    is_external: bool,
4195) -> Expression {
4196    match spans.len() {
4197        // This case should never happen. It corresponds to declarations like `def foo {}`,
4198        // which should throw a 'Missing required positional argument.' before getting to this point
4199        0 => {
4200            working_set.error(ParseError::InternalError(
4201                "failed to catch missing positional arguments".to_string(),
4202                Span::concat(spans),
4203            ));
4204            garbage(working_set, Span::concat(spans))
4205        }
4206
4207        // e.g. `[ b"[foo: string]" ]`
4208        1 => parse_signature(working_set, spans[0], is_external),
4209
4210        // This case is needed to distinguish between e.g.
4211        // `[ b"[]", b"{ true }" ]` vs `[ b"[]:", b"int" ]`
4212        2 if working_set.get_span_contents(spans[1]).starts_with(b"{") => {
4213            parse_signature(working_set, spans[0], is_external)
4214        }
4215
4216        // This should handle every other case, e.g.
4217        // `[ b"[]:", b"int" ]`
4218        // `[ b"[]", b":", b"int" ]`
4219        // `[ b"[]", b":", b"int", b"->", b"bool" ]`
4220        _ => {
4221            let (mut arg_signature, input_output_types_pos) =
4222                if working_set.get_span_contents(spans[0]).ends_with(b":") {
4223                    (
4224                        parse_signature(
4225                            working_set,
4226                            Span::new(spans[0].start, spans[0].end.saturating_sub(1)),
4227                            is_external,
4228                        ),
4229                        1,
4230                    )
4231                } else if working_set.get_span_contents(spans[1]) == b":" {
4232                    (parse_signature(working_set, spans[0], is_external), 2)
4233                } else {
4234                    // This should be an error case, but we call parse_signature anyway
4235                    // so it can handle the various possible errors
4236                    // e.g. `[ b"[]", b"int" ]` or `[
4237                    working_set.error(ParseError::Expected(
4238                        "colon (:) before type signature",
4239                        Span::concat(&spans[1..]),
4240                    ));
4241                    // (garbage(working_set, Span::concat(spans)), 1)
4242
4243                    (parse_signature(working_set, spans[0], is_external), 1)
4244                };
4245
4246            let input_output_types =
4247                parse_input_output_types(working_set, &spans[input_output_types_pos..]);
4248
4249            if let Expression {
4250                expr: Expr::Signature(sig),
4251                span: expr_span,
4252                ..
4253            } = &mut arg_signature
4254            {
4255                sig.input_output_types = input_output_types;
4256                expr_span.end = Span::concat(&spans[input_output_types_pos..]).end;
4257            }
4258            arg_signature
4259        }
4260    }
4261}
4262
4263pub fn parse_row_condition(working_set: &mut StateWorkingSet, spans: &[Span]) -> Expression {
4264    let pos = spans.first().map(|s| s.start).unwrap_or(0);
4265    // New scope in case where there's already a variable named `$it`
4266    working_set.enter_scope();
4267    let var_id = working_set.add_variable(b"$it".to_vec(), Span::new(pos, pos), Type::Any, false);
4268    let expression = parse_math_expression(working_set, spans, Some(var_id));
4269    let span = Span::concat(spans);
4270
4271    let block_id = match expression.expr {
4272        Expr::Block(block_id) => block_id,
4273        Expr::Closure(block_id) => block_id,
4274        Expr::FullCellPath(ref box_fcp) if box_fcp.head.as_var().is_some_and(|id| id != var_id) => {
4275            let mut expression = expression;
4276            expression.ty = Type::Any;
4277            working_set.exit_scope();
4278            return expression;
4279        }
4280        Expr::Var(arg_var_id) if arg_var_id != var_id => {
4281            let mut expression = expression;
4282            expression.ty = Type::Any;
4283            working_set.exit_scope();
4284            return expression;
4285        }
4286        _ => {
4287            // We have an expression, check that it's compatible with bool
4288            if !type_compatible(&Type::Bool, &expression.ty) {
4289                working_set.error(ParseError::TypeMismatch(
4290                    Type::Bool,
4291                    expression.ty.clone(),
4292                    expression.span,
4293                ));
4294                working_set.exit_scope();
4295                return Expression::garbage(working_set, expression.span);
4296            }
4297
4298            // Convert this expression into a block.
4299            let mut block = Block::new();
4300            let mut pipeline = Pipeline::new();
4301            pipeline.elements.push(PipelineElement {
4302                pipe: None,
4303                expr: expression,
4304                redirection: None,
4305            });
4306
4307            block.pipelines.push(pipeline);
4308
4309            block.signature.required_positional.push(PositionalArg {
4310                name: "$it".into(),
4311                desc: "row condition".into(),
4312                shape: SyntaxShape::Any,
4313                var_id: Some(var_id),
4314                default_value: None,
4315                completion: None,
4316            });
4317
4318            compile_block(working_set, &mut block);
4319
4320            working_set.add_block(Arc::new(block))
4321        }
4322    };
4323    working_set.exit_scope();
4324
4325    Expression::new(working_set, Expr::RowCondition(block_id), span, Type::Bool)
4326}
4327
4328pub fn parse_signature(
4329    working_set: &mut StateWorkingSet,
4330    span: Span,
4331    is_external: bool,
4332) -> Expression {
4333    let bytes = working_set.get_span_contents(span);
4334
4335    let mut start = span.start;
4336    let mut end = span.end;
4337
4338    let mut has_paren = false;
4339
4340    if bytes.starts_with(b"[") {
4341        start += 1;
4342    } else if bytes.starts_with(b"(") {
4343        has_paren = true;
4344        start += 1;
4345    } else {
4346        working_set.error(ParseError::Expected("[ or (", Span::new(start, start + 1)));
4347        return garbage(working_set, span);
4348    }
4349
4350    if (has_paren && bytes.ends_with(b")")) || (!has_paren && bytes.ends_with(b"]")) {
4351        end -= 1;
4352    } else {
4353        working_set.error(ParseError::Unclosed("] or )".into(), Span::new(end, end)));
4354    }
4355
4356    let sig = parse_signature_helper(working_set, Span::new(start, end), is_external);
4357
4358    Expression::new(working_set, Expr::Signature(sig), span, Type::Any)
4359}
4360
4361pub fn parse_signature_helper(
4362    working_set: &mut StateWorkingSet,
4363    span: Span,
4364    is_external: bool,
4365) -> Box<Signature> {
4366    enum ParseMode {
4367        Arg,
4368        AfterCommaArg,
4369        Type,
4370        AfterType,
4371        DefaultValue,
4372    }
4373
4374    #[derive(Debug)]
4375    enum Arg {
4376        Positional {
4377            arg: PositionalArg,
4378            required: bool,
4379            type_annotated: bool,
4380        },
4381        RestPositional(PositionalArg),
4382        Flag {
4383            flag: Flag,
4384            type_annotated: bool,
4385        },
4386    }
4387
4388    let source = working_set.get_span_contents(span);
4389
4390    let (output, err) = lex_signature(
4391        source,
4392        span.start,
4393        &[b'\n', b'\r'],
4394        &[b':', b'=', b','],
4395        false,
4396    );
4397    if let Some(err) = err {
4398        working_set.error(err);
4399    }
4400
4401    let mut args: Vec<Arg> = vec![];
4402    let mut parse_mode = ParseMode::Arg;
4403    // Track variables whose name→VarId mappings have not yet been inserted
4404    // into the overlay scope
4405    //
4406    // We defer all insertions until the entire signature is parsed so that
4407    // default value expressions always resolve to outer scope variables,
4408    // not to sibling parameters
4409    //
4410    // See #15306
4411    let mut pending_scope_inserts: Vec<(Vec<u8>, VarId)> = vec![];
4412
4413    for (index, token) in output.iter().enumerate() {
4414        let last_token = index == output.len() - 1;
4415
4416        match token {
4417            Token {
4418                contents: crate::TokenContents::Item | crate::TokenContents::AssignmentOperator,
4419                span,
4420            } => {
4421                let span = *span;
4422                let contents = working_set.get_span_contents(span).to_vec();
4423
4424                // The : symbol separates types
4425                if contents == b":" {
4426                    match parse_mode {
4427                        ParseMode::Arg if last_token => working_set
4428                            .error(ParseError::Expected("type", Span::new(span.end, span.end))),
4429                        ParseMode::Arg => {
4430                            parse_mode = ParseMode::Type;
4431                        }
4432                        ParseMode::AfterCommaArg | ParseMode::AfterType => {
4433                            working_set.error(ParseError::Expected("parameter or flag", span));
4434                        }
4435                        ParseMode::Type | ParseMode::DefaultValue => {
4436                            // We're seeing two types for the same thing for some reason, error
4437                            working_set.error(ParseError::Expected("type", span));
4438                        }
4439                    }
4440                }
4441                // The = symbol separates a variable from its default value
4442                else if contents == b"=" {
4443                    match parse_mode {
4444                        ParseMode::Arg | ParseMode::AfterType if last_token => working_set.error(
4445                            ParseError::Expected("default value", Span::new(span.end, span.end)),
4446                        ),
4447                        ParseMode::Arg | ParseMode::AfterType => {
4448                            parse_mode = ParseMode::DefaultValue;
4449                        }
4450                        ParseMode::Type => {
4451                            working_set.error(ParseError::Expected("type", span));
4452                        }
4453                        ParseMode::AfterCommaArg => {
4454                            working_set.error(ParseError::Expected("parameter or flag", span));
4455                        }
4456                        ParseMode::DefaultValue => {
4457                            // We're seeing two default values for some reason, error
4458                            working_set.error(ParseError::Expected("default value", span));
4459                        }
4460                    }
4461                }
4462                // The , symbol separates params only
4463                else if contents == b"," {
4464                    match parse_mode {
4465                        ParseMode::Arg | ParseMode::AfterType => {
4466                            parse_mode = ParseMode::AfterCommaArg
4467                        }
4468                        ParseMode::AfterCommaArg => {
4469                            working_set.error(ParseError::Expected("parameter or flag", span));
4470                        }
4471                        ParseMode::Type => {
4472                            working_set.error(ParseError::Expected("type", span));
4473                        }
4474                        ParseMode::DefaultValue => {
4475                            working_set.error(ParseError::Expected("default value", span));
4476                        }
4477                    }
4478                } else {
4479                    let mut check_and_add_variable =
4480                        |working_set: &mut StateWorkingSet,
4481                         var_name: Vec<u8>,
4482                         ty: Type,
4483                         span: Span| {
4484                            if is_external {
4485                                None
4486                            } else {
4487                                ensure_not_reserved_variable_name(working_set, &var_name, span);
4488                                let var_id =
4489                                    working_set.add_variable_without_scope(span, ty, false);
4490                                pending_scope_inserts.push((var_name, var_id));
4491                                Some(var_id)
4492                            }
4493                        };
4494
4495                    match parse_mode {
4496                        ParseMode::Arg | ParseMode::AfterCommaArg | ParseMode::AfterType => {
4497                            // Long flag with optional short form following with no whitespace, e.g. --output, --age(-a)
4498                            if contents.starts_with(b"--") && contents.len() > 2 {
4499                                // Split the long flag from the short flag with the ( character as delimiter.
4500                                // The trailing ) is removed further down.
4501                                let flags: Vec<_> = contents.split(|x| x == &b'(').collect();
4502
4503                                let long = String::from_utf8_lossy(&flags[0][2..]).to_string();
4504                                let mut variable_name = flags[0][2..].to_vec();
4505                                // Replace the '-' in a variable name with '_'
4506                                for byte in variable_name.iter_mut() {
4507                                    if *byte == b'-' {
4508                                        *byte = b'_';
4509                                    }
4510                                }
4511
4512                                if !is_variable(&variable_name) {
4513                                    working_set.error(ParseError::Expected(
4514                                        "valid variable name for this long flag",
4515                                        span,
4516                                    ))
4517                                }
4518
4519                                let var_id = check_and_add_variable(
4520                                    working_set,
4521                                    variable_name,
4522                                    Type::Bool,
4523                                    span,
4524                                );
4525
4526                                // If there's no short flag, exit now. Otherwise, parse it.
4527                                if flags.len() == 1 {
4528                                    args.push(Arg::Flag {
4529                                        flag: Flag {
4530                                            arg: None,
4531                                            desc: String::new(),
4532                                            long,
4533                                            short: None,
4534                                            required: false,
4535                                            var_id,
4536                                            default_value: None,
4537                                            completion: None,
4538                                        },
4539                                        type_annotated: false,
4540                                    });
4541                                } else if flags.len() >= 3 {
4542                                    working_set.error(ParseError::Expected(
4543                                        "only one short flag alternative",
4544                                        span,
4545                                    ));
4546                                } else {
4547                                    let short_flag = &flags[1];
4548                                    let short_flag = if !short_flag.starts_with(b"-")
4549                                        || !short_flag.ends_with(b")")
4550                                    {
4551                                        working_set.error(ParseError::Expected(
4552                                            "short flag alternative for the long flag",
4553                                            span,
4554                                        ));
4555                                        short_flag
4556                                    } else {
4557                                        // Obtain the flag's name by removing the starting - and trailing )
4558                                        &short_flag[1..(short_flag.len() - 1)]
4559                                    };
4560                                    // Note that it is currently possible to make a short flag with non-alphanumeric characters,
4561                                    // like -).
4562
4563                                    let short_flag =
4564                                        String::from_utf8_lossy(short_flag).to_string();
4565                                    let chars: Vec<char> = short_flag.chars().collect();
4566
4567                                    if chars.len() == 1 {
4568                                        args.push(Arg::Flag {
4569                                            flag: Flag {
4570                                                arg: None,
4571                                                desc: String::new(),
4572                                                long,
4573                                                short: Some(chars[0]),
4574                                                required: false,
4575                                                var_id,
4576                                                default_value: None,
4577                                                completion: None,
4578                                            },
4579                                            type_annotated: false,
4580                                        });
4581                                    } else {
4582                                        working_set.error(ParseError::Expected("short flag", span));
4583                                    }
4584                                }
4585                                parse_mode = ParseMode::Arg;
4586                            }
4587                            // Mandatory short flag, e.g. -e (must be one character)
4588                            else if contents.starts_with(b"-") && contents.len() > 1 {
4589                                let short_flag = &contents[1..];
4590                                let short_flag = String::from_utf8_lossy(short_flag).to_string();
4591                                let chars: Vec<char> = short_flag.chars().collect();
4592
4593                                if chars.len() > 1 {
4594                                    working_set.error(ParseError::Expected("short flag", span));
4595                                }
4596
4597                                let mut encoded_var_name = [0u8; 4];
4598                                let len = chars[0].encode_utf8(&mut encoded_var_name).len();
4599                                let variable_name = encoded_var_name[0..len].to_vec();
4600
4601                                if !is_variable(&variable_name) {
4602                                    working_set.error(ParseError::Expected(
4603                                        "valid variable name for this short flag",
4604                                        span,
4605                                    ))
4606                                }
4607
4608                                let var_id = check_and_add_variable(
4609                                    working_set,
4610                                    variable_name,
4611                                    Type::Bool,
4612                                    span,
4613                                );
4614
4615                                args.push(Arg::Flag {
4616                                    flag: Flag {
4617                                        arg: None,
4618                                        desc: String::new(),
4619                                        long: String::new(),
4620                                        short: Some(chars[0]),
4621                                        required: false,
4622                                        var_id,
4623                                        default_value: None,
4624                                        completion: None,
4625                                    },
4626                                    type_annotated: false,
4627                                });
4628                                parse_mode = ParseMode::Arg;
4629                            }
4630                            // Short flag alias for long flag, e.g. --b (-a)
4631                            // This is the same as the short flag in --b(-a)
4632                            else if let Some(short_flag) = contents.strip_prefix(b"(-") {
4633                                if let ParseMode::AfterCommaArg = parse_mode {
4634                                    working_set
4635                                        .error(ParseError::Expected("parameter or flag", span));
4636                                }
4637
4638                                let short_flag = if !short_flag.ends_with(b")") {
4639                                    working_set.error(ParseError::Expected("short flag", span));
4640                                    short_flag
4641                                } else {
4642                                    &short_flag[..(short_flag.len() - 1)]
4643                                };
4644
4645                                let short_flag = String::from_utf8_lossy(short_flag).to_string();
4646                                let chars: Vec<char> = short_flag.chars().collect();
4647
4648                                if chars.len() == 1 {
4649                                    match args.last_mut() {
4650                                        Some(Arg::Flag { flag, .. }) => {
4651                                            if flag.short.is_some() {
4652                                                working_set.error(ParseError::Expected(
4653                                                    "one short flag",
4654                                                    span,
4655                                                ));
4656                                            } else {
4657                                                flag.short = Some(chars[0]);
4658                                            }
4659                                        }
4660                                        _ => {
4661                                            working_set
4662                                                .error(ParseError::Expected("unknown flag", span));
4663                                        }
4664                                    }
4665                                } else {
4666                                    working_set.error(ParseError::Expected("short flag", span));
4667                                }
4668                            }
4669                            // Positional arg, optional
4670                            else if let Some(optional_param) = contents.strip_suffix(b"?") {
4671                                let name = String::from_utf8_lossy(optional_param).to_string();
4672
4673                                if !is_variable(optional_param) {
4674                                    working_set.error(ParseError::Expected(
4675                                        "valid variable name for this optional parameter",
4676                                        span,
4677                                    ))
4678                                }
4679
4680                                let var_id = check_and_add_variable(
4681                                    working_set,
4682                                    optional_param.to_vec(),
4683                                    Type::Any,
4684                                    span,
4685                                );
4686
4687                                args.push(Arg::Positional {
4688                                    arg: PositionalArg {
4689                                        desc: String::new(),
4690                                        name,
4691                                        shape: SyntaxShape::Any,
4692                                        var_id,
4693                                        default_value: None,
4694                                        completion: None,
4695                                    },
4696                                    required: false,
4697                                    type_annotated: false,
4698                                });
4699                                parse_mode = ParseMode::Arg;
4700                            }
4701                            // Rest param
4702                            else if let Some(contents) = contents.strip_prefix(b"...") {
4703                                let name = String::from_utf8_lossy(contents).to_string();
4704                                let contents_vec: Vec<u8> = contents.to_vec();
4705
4706                                if !is_variable(&contents_vec) {
4707                                    working_set.error(ParseError::Expected(
4708                                        "valid variable name for this rest parameter",
4709                                        span,
4710                                    ))
4711                                }
4712
4713                                let var_id = check_and_add_variable(
4714                                    working_set,
4715                                    contents_vec,
4716                                    Type::Any,
4717                                    span,
4718                                );
4719
4720                                args.push(Arg::RestPositional(PositionalArg {
4721                                    desc: String::new(),
4722                                    name,
4723                                    shape: SyntaxShape::Any,
4724                                    var_id,
4725                                    default_value: None,
4726                                    completion: None,
4727                                }));
4728                                parse_mode = ParseMode::Arg;
4729                            }
4730                            // Normal param
4731                            else {
4732                                let name = String::from_utf8_lossy(&contents).to_string();
4733                                let contents_vec = contents.to_vec();
4734
4735                                if !is_variable(&contents_vec) {
4736                                    working_set.error(ParseError::Expected(
4737                                        "valid variable name for this parameter",
4738                                        span,
4739                                    ))
4740                                }
4741
4742                                let var_id = check_and_add_variable(
4743                                    working_set,
4744                                    contents_vec,
4745                                    Type::Any,
4746                                    span,
4747                                );
4748
4749                                // Positional arg, required
4750                                args.push(Arg::Positional {
4751                                    arg: PositionalArg {
4752                                        desc: String::new(),
4753                                        name,
4754                                        shape: SyntaxShape::Any,
4755                                        var_id,
4756                                        default_value: None,
4757                                        completion: None,
4758                                    },
4759                                    required: true,
4760                                    type_annotated: false,
4761                                });
4762                                parse_mode = ParseMode::Arg;
4763                            }
4764                        }
4765                        ParseMode::Type => {
4766                            if let Some(last) = args.last_mut() {
4767                                let (syntax_shape, completer) = contents
4768                                    .iter()
4769                                    .position(|b| *b == b'@')
4770                                    .and_then(|idx| {
4771                                        let (shape, completer) = contents.split_at_checked(idx)?;
4772                                        let (shape_span, completer_span) = span.split_at(idx)?;
4773
4774                                        let completer = completer.strip_prefix(b"@")?;
4775                                        let (_, completer_span) = completer_span.split_at(1)?;
4776
4777                                        Some((
4778                                            parse_shape_name(working_set, shape, shape_span),
4779                                            parse_completer(working_set, completer, completer_span),
4780                                        ))
4781                                    })
4782                                    .unwrap_or_else(|| {
4783                                        (parse_shape_name(working_set, &contents, span), None)
4784                                    });
4785
4786                                //TODO check if we're replacing a custom parameter already
4787                                match last {
4788                                    Arg::Positional {
4789                                        arg:
4790                                            PositionalArg {
4791                                                shape,
4792                                                var_id,
4793                                                completion,
4794                                                ..
4795                                            },
4796                                        required: _,
4797                                        type_annotated,
4798                                    } => {
4799                                        if !is_external {
4800                                            working_set.set_variable_type(
4801                                                var_id.expect(
4802                                                    "internal error: all custom parameters must have \
4803                                                    var_ids",
4804                                                ),
4805                                                syntax_shape.to_type(),
4806                                            );
4807                                        }
4808                                        *completion = completer;
4809                                        *shape = syntax_shape;
4810                                        *type_annotated = true;
4811                                    }
4812                                    Arg::RestPositional(PositionalArg {
4813                                        shape,
4814                                        var_id,
4815                                        completion,
4816                                        ..
4817                                    }) => {
4818                                        if !is_external {
4819                                            working_set.set_variable_type(
4820                                                var_id.expect(
4821                                                    "internal error: all custom parameters must have \
4822                                                    var_ids",
4823                                                ),
4824                                                Type::List(Box::new(syntax_shape.to_type())),
4825                                            );
4826                                        }
4827                                        *completion = completer;
4828                                        *shape = syntax_shape;
4829                                    }
4830                                    Arg::Flag {
4831                                        flag:
4832                                            Flag {
4833                                                arg,
4834                                                var_id,
4835                                                completion,
4836                                                ..
4837                                            },
4838                                        type_annotated,
4839                                    } => {
4840                                        if !is_external {
4841                                            working_set.set_variable_type(var_id.expect("internal error: all custom parameters must have var_ids"), syntax_shape.to_type());
4842                                        }
4843                                        if syntax_shape == SyntaxShape::Boolean {
4844                                            working_set.error(ParseError::LabeledError(
4845                                                "Type annotations are not allowed for boolean switches.".to_string(),
4846                                                "Remove the `: bool` type annotation.".to_string(),
4847                                                span,
4848                                            ));
4849                                        }
4850                                        *completion = completer;
4851                                        *arg = Some(syntax_shape);
4852                                        *type_annotated = true;
4853                                    }
4854                                }
4855                            }
4856                            parse_mode = ParseMode::AfterType;
4857                        }
4858                        ParseMode::DefaultValue => {
4859                            if !is_external && let Some(last) = args.last_mut() {
4860                                let shape = match last {
4861                                    Arg::Positional { arg, .. } => arg.shape.clone(),
4862                                    Arg::RestPositional(arg) => arg.shape.clone(),
4863                                    Arg::Flag { flag, .. } => {
4864                                        flag.arg.clone().unwrap_or(SyntaxShape::Any)
4865                                    }
4866                                };
4867
4868                                let expression = parse_value(working_set, span, &shape);
4869
4870                                //TODO check if we're replacing a custom parameter already
4871                                match last {
4872                                    Arg::Positional {
4873                                        arg:
4874                                            PositionalArg {
4875                                                shape,
4876                                                var_id,
4877                                                default_value,
4878                                                ..
4879                                            },
4880                                        required,
4881                                        type_annotated,
4882                                    } => {
4883                                        let var_id = var_id.expect("internal error: all custom parameters must have var_ids");
4884                                        let var_type = &working_set.get_variable(var_id).ty;
4885                                        if var_type == &Type::Any && !*type_annotated {
4886                                            working_set
4887                                                .set_variable_type(var_id, expression.ty.clone());
4888                                        }
4889
4890                                        *default_value = if let Ok(constant) =
4891                                            eval_constant(working_set, &expression)
4892                                        {
4893                                            Some(constant)
4894                                        } else {
4895                                            working_set.error(ParseError::NonConstantDefaultValue(
4896                                                expression.span,
4897                                            ));
4898                                            None
4899                                        };
4900
4901                                        if !*type_annotated {
4902                                            *shape = expression.ty.to_shape();
4903                                        }
4904                                        *required = false;
4905                                    }
4906                                    Arg::RestPositional(..) => {
4907                                        working_set.error(ParseError::AssignmentMismatch(
4908                                            "Rest parameter was given a default value".into(),
4909                                            "can't have default value".into(),
4910                                            expression.span,
4911                                        ))
4912                                    }
4913                                    Arg::Flag {
4914                                        flag:
4915                                            Flag {
4916                                                arg,
4917                                                var_id,
4918                                                default_value,
4919                                                ..
4920                                            },
4921                                        type_annotated,
4922                                    } => {
4923                                        let expression_span = expression.span;
4924
4925                                        *default_value = if let Ok(value) =
4926                                            eval_constant(working_set, &expression)
4927                                        {
4928                                            Some(value)
4929                                        } else {
4930                                            working_set.error(ParseError::NonConstantDefaultValue(
4931                                                expression_span,
4932                                            ));
4933                                            None
4934                                        };
4935
4936                                        let var_id = var_id.expect("internal error: all custom parameters must have var_ids");
4937                                        let expression_ty = expression.ty.clone();
4938
4939                                        // Flags without type annotations are present/not-present
4940                                        // switches *except* when they have a default value
4941                                        // assigned. In that case they are regular flags and take
4942                                        // on the type of their default value.
4943                                        if !*type_annotated {
4944                                            *arg = Some(expression_ty.to_shape());
4945                                            working_set.set_variable_type(var_id, expression_ty);
4946                                        }
4947                                    }
4948                                }
4949                            }
4950                            parse_mode = ParseMode::Arg;
4951                        }
4952                    }
4953                }
4954            }
4955            Token {
4956                contents: crate::TokenContents::Comment,
4957                span,
4958            } => {
4959                let contents = working_set.get_span_contents(Span::new(span.start + 1, span.end));
4960
4961                let mut contents = String::from_utf8_lossy(contents).to_string();
4962                contents = contents.trim().into();
4963
4964                if let Some(last) = args.last_mut() {
4965                    match last {
4966                        Arg::Flag { flag, .. } => {
4967                            if !flag.desc.is_empty() {
4968                                flag.desc.push('\n');
4969                            }
4970                            flag.desc.push_str(&contents);
4971                        }
4972                        Arg::Positional {
4973                            arg: positional, ..
4974                        } => {
4975                            if !positional.desc.is_empty() {
4976                                positional.desc.push('\n');
4977                            }
4978                            positional.desc.push_str(&contents);
4979                        }
4980                        Arg::RestPositional(positional) => {
4981                            if !positional.desc.is_empty() {
4982                                positional.desc.push('\n');
4983                            }
4984                            positional.desc.push_str(&contents);
4985                        }
4986                    }
4987                }
4988            }
4989            _ => {}
4990        }
4991    }
4992
4993    for (name, var_id) in pending_scope_inserts {
4994        working_set.insert_variable_into_scope(name, var_id);
4995    }
4996
4997    let mut sig = Signature::new(String::new());
4998
4999    for arg in args {
5000        match arg {
5001            Arg::Positional {
5002                arg: positional,
5003                required,
5004                ..
5005            } => {
5006                if required {
5007                    if !sig.optional_positional.is_empty() {
5008                        working_set.error(ParseError::RequiredAfterOptional(
5009                            positional.name.clone(),
5010                            span,
5011                        ))
5012                    }
5013                    sig.required_positional.push(positional)
5014                } else {
5015                    sig.optional_positional.push(positional)
5016                }
5017            }
5018            Arg::Flag { flag, .. } => sig.named.push(flag),
5019            Arg::RestPositional(positional) => {
5020                if positional.name.is_empty() {
5021                    working_set.error(ParseError::RestNeedsName(span))
5022                } else if sig.rest_positional.is_none() {
5023                    sig.rest_positional = Some(PositionalArg {
5024                        name: positional.name,
5025                        ..positional
5026                    })
5027                } else {
5028                    // Too many rest params
5029                    working_set.error(ParseError::MultipleRestParams(span))
5030                }
5031            }
5032        }
5033    }
5034
5035    Box::new(sig)
5036}
5037
5038pub fn parse_list_expression(
5039    working_set: &mut StateWorkingSet,
5040    span: Span,
5041    element_shape: &SyntaxShape,
5042) -> Expression {
5043    let bytes = working_set.get_span_contents(span);
5044
5045    let mut start = span.start;
5046    let mut end = span.end;
5047
5048    if bytes.starts_with(b"[") {
5049        start += 1;
5050    }
5051    if bytes.ends_with(b"]") {
5052        end -= 1;
5053    } else {
5054        working_set.error(ParseError::Unclosed("]".into(), Span::new(end, end)));
5055    }
5056
5057    let inner_span = Span::new(start, end);
5058    let source = working_set.get_span_contents(inner_span);
5059
5060    let (output, err) = lex(source, inner_span.start, &[b'\n', b'\r', b','], &[], true);
5061    if let Some(err) = err {
5062        working_set.error(err)
5063    }
5064
5065    let (mut output, err) = lite_parse(&output, working_set);
5066    if let Some(err) = err {
5067        working_set.error(err)
5068    }
5069
5070    let mut args = vec![];
5071
5072    let mut contained_type: Option<Type> = None;
5073
5074    if !output.block.is_empty() {
5075        for mut command in output.block.remove(0).commands {
5076            let mut spans_idx = 0;
5077
5078            while spans_idx < command.parts.len() {
5079                let curr_span = command.parts[spans_idx];
5080                let curr_tok = working_set.get_span_contents(curr_span);
5081                let (arg, ty) = if curr_tok.starts_with(b"...")
5082                    && curr_tok.len() > 3
5083                    && (curr_tok[3] == b'$' || curr_tok[3] == b'[' || curr_tok[3] == b'(')
5084                {
5085                    // Parse the spread operator
5086                    // Remove "..." before parsing argument to spread operator
5087                    command.parts[spans_idx] = Span::new(curr_span.start + 3, curr_span.end);
5088                    let spread_arg = parse_multispan_value(
5089                        working_set,
5090                        &command.parts,
5091                        &mut spans_idx,
5092                        &SyntaxShape::List(Box::new(element_shape.clone())),
5093                    );
5094                    let elem_ty = match &spread_arg.ty {
5095                        Type::List(elem_ty) => *elem_ty.clone(),
5096                        _ => Type::Any,
5097                    };
5098                    let span = Span::new(curr_span.start, curr_span.start + 3);
5099                    (ListItem::Spread(span, spread_arg), elem_ty)
5100                } else {
5101                    let arg = parse_multispan_value(
5102                        working_set,
5103                        &command.parts,
5104                        &mut spans_idx,
5105                        element_shape,
5106                    );
5107                    let ty = arg.ty.clone();
5108                    (ListItem::Item(arg), ty)
5109                };
5110
5111                contained_type = match contained_type {
5112                    Some(ctype) => Some(ctype.widen(ty)),
5113                    None => Some(ty),
5114                };
5115
5116                args.push(arg);
5117
5118                spans_idx += 1;
5119            }
5120        }
5121    }
5122
5123    Expression::new(
5124        working_set,
5125        Expr::List(args),
5126        span,
5127        Type::List(Box::new(if let Some(ty) = contained_type {
5128            ty
5129        } else {
5130            Type::Any
5131        })),
5132    )
5133}
5134
5135fn parse_table_row(
5136    working_set: &mut StateWorkingSet,
5137    span: Span,
5138) -> Result<(Vec<Expression>, Span), Span> {
5139    let list = parse_list_expression(working_set, span, &SyntaxShape::Any);
5140    let Expression {
5141        expr: Expr::List(list),
5142        span,
5143        ..
5144    } = list
5145    else {
5146        unreachable!("the item must be a list")
5147    };
5148
5149    list.into_iter()
5150        .map(|item| match item {
5151            ListItem::Item(expr) => Ok(expr),
5152            ListItem::Spread(_, spread) => Err(spread.span),
5153        })
5154        .collect::<Result<_, _>>()
5155        .map(|exprs| (exprs, span))
5156}
5157
5158fn parse_table_expression(
5159    working_set: &mut StateWorkingSet,
5160    span: Span,
5161    list_element_shape: &SyntaxShape,
5162) -> Expression {
5163    let bytes = working_set.get_span_contents(span);
5164    let inner_span = {
5165        let start = if bytes.starts_with(b"[") {
5166            span.start + 1
5167        } else {
5168            span.start
5169        };
5170
5171        let end = if bytes.ends_with(b"]") {
5172            span.end - 1
5173        } else {
5174            let end = span.end;
5175            working_set.error(ParseError::Unclosed("]".into(), Span::new(end, end)));
5176            span.end
5177        };
5178
5179        Span::new(start, end)
5180    };
5181
5182    let source = working_set.get_span_contents(inner_span);
5183    let (tokens, err) = lex(source, inner_span.start, &[b'\n', b'\r', b','], &[], true);
5184    if let Some(err) = err {
5185        working_set.error(err);
5186    }
5187
5188    // Check that we have all arguments first, before trying to parse the first
5189    // in order to avoid exponential parsing time
5190    let [first, second, rest @ ..] = &tokens[..] else {
5191        return parse_list_expression(working_set, span, list_element_shape);
5192    };
5193    if !working_set.get_span_contents(first.span).starts_with(b"[")
5194        || second.contents != TokenContents::Semicolon
5195        || rest.is_empty()
5196    {
5197        return parse_list_expression(working_set, span, list_element_shape);
5198    };
5199    let head = parse_table_row(working_set, first.span);
5200
5201    let errors = working_set.parse_errors.len();
5202
5203    let (head, rows) = match head {
5204        Ok((head, _)) => {
5205            let rows = rest
5206                .iter()
5207                .filter_map(|it| {
5208                    use std::cmp::Ordering;
5209
5210                    match working_set.get_span_contents(it.span) {
5211                        b"," => None,
5212                        text if !text.starts_with(b"[") => {
5213                            let err = ParseError::LabeledErrorWithHelp {
5214                                error: String::from("Table item not list"),
5215                                label: String::from("not a list"),
5216                                span: it.span,
5217                                help: String::from("All table items must be lists"),
5218                            };
5219                            working_set.error(err);
5220                            None
5221                        }
5222                        _ => match parse_table_row(working_set, it.span) {
5223                            Ok((list, span)) => {
5224                                match list.len().cmp(&head.len()) {
5225                                    Ordering::Less => {
5226                                        let err = ParseError::MissingColumns(head.len(), span);
5227                                        working_set.error(err);
5228                                    }
5229                                    Ordering::Greater => {
5230                                        let span = {
5231                                            let start = list[head.len()].span.start;
5232                                            let end = span.end;
5233                                            Span::new(start, end)
5234                                        };
5235                                        let err = ParseError::ExtraColumns(head.len(), span);
5236                                        working_set.error(err);
5237                                    }
5238                                    Ordering::Equal => {}
5239                                }
5240                                Some(list)
5241                            }
5242                            Err(span) => {
5243                                let err = ParseError::LabeledError(
5244                                    String::from("Cannot spread in a table row"),
5245                                    String::from("invalid spread here"),
5246                                    span,
5247                                );
5248                                working_set.error(err);
5249                                None
5250                            }
5251                        },
5252                    }
5253                })
5254                .collect();
5255
5256            (head, rows)
5257        }
5258        Err(span) => {
5259            let err = ParseError::LabeledError(
5260                String::from("Cannot spread in a table row"),
5261                String::from("invalid spread here"),
5262                span,
5263            );
5264            working_set.error(err);
5265            (Vec::new(), Vec::new())
5266        }
5267    };
5268
5269    let ty = if working_set.parse_errors.len() == errors {
5270        let (ty, errs) = table_type(&head, &rows);
5271        working_set.parse_errors.extend(errs);
5272        ty
5273    } else {
5274        Type::table()
5275    };
5276
5277    let table = Table {
5278        columns: head.into(),
5279        rows: rows.into_iter().map(Into::into).collect(),
5280    };
5281
5282    Expression::new(working_set, Expr::Table(table), span, ty)
5283}
5284
5285fn table_type(head: &[Expression], rows: &[Vec<Expression>]) -> (Type, Vec<ParseError>) {
5286    let mut errors = vec![];
5287    let mut rows: Vec<_> = rows.iter().map(|row| row.iter()).collect();
5288
5289    let column_types = std::iter::from_fn(move || {
5290        let column = rows
5291            .iter_mut()
5292            .filter_map(|row| row.next())
5293            .map(|col| col.ty.clone());
5294        Some(Type::supertype_of(column).unwrap_or(Type::Any))
5295    });
5296
5297    let mk_error = |span| ParseError::LabeledErrorWithHelp {
5298        error: "Table column name not string".into(),
5299        label: "must be a string".into(),
5300        help: "Table column names should be able to be converted into strings".into(),
5301        span,
5302    };
5303
5304    let ty: Box<[(String, Type)]> = head
5305        .iter()
5306        .zip(column_types)
5307        .filter_map(|(expr, col_ty)| {
5308            if !Type::String.is_subtype_of(&expr.ty) {
5309                errors.push(mk_error(expr.span));
5310                None
5311            } else {
5312                expr.as_string().zip(Some(col_ty))
5313            }
5314        })
5315        .collect();
5316
5317    (Type::Table(ty), errors)
5318}
5319
5320pub fn parse_block_expression(working_set: &mut StateWorkingSet, span: Span) -> Expression {
5321    trace!("parsing: block expression");
5322
5323    let bytes = working_set.get_span_contents(span);
5324
5325    let mut start = span.start;
5326    let mut end = span.end;
5327    let mut is_closed = true;
5328
5329    if bytes.starts_with(b"{") {
5330        start += 1;
5331    } else {
5332        working_set.error(ParseError::Expected("block", span));
5333        return garbage(working_set, span);
5334    }
5335    if bytes.ends_with(b"}") {
5336        end -= 1;
5337    } else {
5338        working_set.error(ParseError::Unclosed("}".into(), Span::new(end, end)));
5339        is_closed = false;
5340    }
5341
5342    let inner_span = Span::new(start, end);
5343
5344    let source = working_set.get_span_contents(inner_span);
5345
5346    let (output, err) = lex(source, start, &[], &[], false);
5347    if let Some(err) = err {
5348        working_set.error(err);
5349    }
5350
5351    working_set.enter_scope();
5352
5353    // Check to see if we have parameters
5354    let (signature, amt_to_skip): (Option<(Box<Signature>, Span)>, usize) = match output.first() {
5355        Some(Token {
5356            contents: TokenContents::Pipe,
5357            span,
5358        }) => {
5359            working_set.error(ParseError::Expected("block but found closure", *span));
5360            (None, 0)
5361        }
5362        _ => (None, 0),
5363    };
5364
5365    let mut output = parse_block(working_set, &output[amt_to_skip..], span, false, false);
5366
5367    if let Some(signature) = signature {
5368        output.signature = signature.0;
5369    }
5370
5371    output.span = Some(span);
5372
5373    if is_closed {
5374        working_set.exit_scope();
5375    }
5376
5377    let block_id = working_set.add_block(Arc::new(output));
5378
5379    Expression::new(working_set, Expr::Block(block_id), span, Type::Block)
5380}
5381
5382pub fn parse_match_block_expression(working_set: &mut StateWorkingSet, span: Span) -> Expression {
5383    let bytes = working_set.get_span_contents(span);
5384
5385    let mut start = span.start;
5386    let mut end = span.end;
5387    let mut is_closed = true;
5388
5389    if bytes.starts_with(b"{") {
5390        start += 1;
5391    } else {
5392        working_set.error(ParseError::Expected("closure", span));
5393        return garbage(working_set, span);
5394    }
5395    if bytes.ends_with(b"}") {
5396        end -= 1;
5397    } else {
5398        working_set.error(ParseError::Unclosed("}".into(), Span::new(end, end)));
5399        is_closed = false;
5400    }
5401
5402    let inner_span = Span::new(start, end);
5403
5404    let source = working_set.get_span_contents(inner_span);
5405
5406    let (output, err) = lex(source, start, &[b' ', b'\r', b'\n', b',', b'|'], &[], true);
5407    if let Some(err) = err {
5408        working_set.error(err);
5409    }
5410
5411    let mut position = 0;
5412
5413    let mut output_matches = vec![];
5414
5415    while position < output.len() {
5416        // Each match gets its own scope
5417
5418        working_set.enter_scope();
5419
5420        // First parse the pattern
5421        let mut pattern = parse_pattern(working_set, output[position].span);
5422
5423        position += 1;
5424
5425        if position >= output.len() {
5426            working_set.error(ParseError::Mismatch(
5427                "=>".into(),
5428                "end of input".into(),
5429                Span::new(output[position - 1].span.end, output[position - 1].span.end),
5430            ));
5431
5432            working_set.exit_scope();
5433            break;
5434        }
5435
5436        let mut connector = working_set.get_span_contents(output[position].span);
5437
5438        // Multiple patterns connected by '|'
5439        if connector == b"|" && position < output.len() {
5440            let mut or_pattern = vec![pattern];
5441
5442            while connector == b"|" && position < output.len() {
5443                connector = b"";
5444
5445                position += 1;
5446
5447                if position >= output.len() {
5448                    working_set.error(ParseError::Mismatch(
5449                        "pattern".into(),
5450                        "end of input".into(),
5451                        Span::new(output[position - 1].span.end, output[position - 1].span.end),
5452                    ));
5453                    break;
5454                }
5455
5456                let pattern = parse_pattern(working_set, output[position].span);
5457                or_pattern.push(pattern);
5458
5459                position += 1;
5460                if position >= output.len() {
5461                    working_set.error(ParseError::Mismatch(
5462                        "=>".into(),
5463                        "end of input".into(),
5464                        Span::new(output[position - 1].span.end, output[position - 1].span.end),
5465                    ));
5466                    break;
5467                } else {
5468                    connector = working_set.get_span_contents(output[position].span);
5469                }
5470            }
5471
5472            let start = or_pattern
5473                .first()
5474                .expect("internal error: unexpected state of or-pattern")
5475                .span
5476                .start;
5477            let end = or_pattern
5478                .last()
5479                .expect("internal error: unexpected state of or-pattern")
5480                .span
5481                .end;
5482
5483            pattern = MatchPattern {
5484                pattern: Pattern::Or(or_pattern),
5485                guard: None,
5486                span: Span::new(start, end),
5487            }
5488        }
5489        // A match guard
5490        if connector == b"if" {
5491            let if_end = {
5492                let end = output[position].span.end;
5493                Span::new(end, end)
5494            };
5495
5496            position += 1;
5497
5498            let mk_err = || ParseError::LabeledErrorWithHelp {
5499                error: "Match guard without an expression".into(),
5500                label: "expected an expression".into(),
5501                help: "The `if` keyword must be followed with an expression".into(),
5502                span: if_end,
5503            };
5504
5505            if output.get(position).is_none() {
5506                working_set.error(mk_err());
5507                return garbage(working_set, span);
5508            };
5509
5510            let (tokens, found) = if let Some((pos, _)) = output[position..]
5511                .iter()
5512                .find_position(|t| working_set.get_span_contents(t.span) == b"=>")
5513            {
5514                if position + pos == position {
5515                    working_set.error(mk_err());
5516                    return garbage(working_set, span);
5517                }
5518
5519                (&output[position..position + pos], true)
5520            } else {
5521                (&output[position..], false)
5522            };
5523
5524            let mut start = 0;
5525            let guard = parse_multispan_value(
5526                working_set,
5527                &tokens.iter().map(|tok| tok.span).collect_vec(),
5528                &mut start,
5529                &SyntaxShape::MathExpression,
5530            );
5531
5532            pattern.guard = Some(Box::new(guard));
5533            position += if found { start + 1 } else { start };
5534            connector = working_set.get_span_contents(output[position].span);
5535        }
5536        // Then the `=>` arrow
5537        if connector != b"=>" {
5538            working_set.error(ParseError::Mismatch(
5539                "=>".into(),
5540                "end of input".into(),
5541                Span::new(output[position - 1].span.end, output[position - 1].span.end),
5542            ));
5543        } else {
5544            position += 1;
5545        }
5546
5547        // Finally, the value/expression/block that we will run to produce the result
5548        if position >= output.len() {
5549            working_set.error(ParseError::Mismatch(
5550                "match result".into(),
5551                "end of input".into(),
5552                Span::new(output[position - 1].span.end, output[position - 1].span.end),
5553            ));
5554
5555            working_set.exit_scope();
5556            break;
5557        }
5558
5559        let result = parse_multispan_value(
5560            working_set,
5561            &[output[position].span],
5562            &mut 0,
5563            &SyntaxShape::OneOf(vec![SyntaxShape::Block, SyntaxShape::Expression]),
5564        );
5565        position += 1;
5566        if is_closed {
5567            working_set.exit_scope();
5568        }
5569
5570        output_matches.push((pattern, result));
5571    }
5572
5573    Expression::new(
5574        working_set,
5575        Expr::MatchBlock(output_matches),
5576        span,
5577        Type::Any,
5578    )
5579}
5580
5581pub fn parse_closure_expression(
5582    working_set: &mut StateWorkingSet,
5583    shape: &SyntaxShape,
5584    span: Span,
5585) -> Expression {
5586    trace!("parsing: closure expression");
5587
5588    let bytes = working_set.get_span_contents(span);
5589
5590    let mut start = span.start;
5591    let mut end = span.end;
5592    let mut is_closed = true;
5593
5594    if bytes.starts_with(b"{") {
5595        start += 1;
5596    } else {
5597        working_set.error(ParseError::Expected("closure", span));
5598        return garbage(working_set, span);
5599    }
5600    if bytes.ends_with(b"}") {
5601        end -= 1;
5602    } else {
5603        working_set.error(ParseError::Unclosed("}".into(), Span::new(end, end)));
5604        is_closed = false;
5605    }
5606
5607    let inner_span = Span::new(start, end);
5608
5609    let source = working_set.get_span_contents(inner_span);
5610
5611    let (output, err) = lex(source, start, &[], &[], false);
5612    if let Some(err) = err {
5613        working_set.error(err);
5614    }
5615
5616    working_set.enter_scope();
5617
5618    // Check to see if we have parameters
5619    let (signature, amt_to_skip): (Option<(Box<Signature>, Span)>, usize) = match output.first() {
5620        Some(Token {
5621            contents: TokenContents::Pipe,
5622            span,
5623        }) => {
5624            // We've found a parameter list
5625            let start_point = span.start;
5626            let mut token_iter = output.iter().enumerate().skip(1);
5627            let mut end_span = None;
5628            let mut amt_to_skip = 1;
5629
5630            for token in &mut token_iter {
5631                if let Token {
5632                    contents: TokenContents::Pipe,
5633                    span,
5634                } = token.1
5635                {
5636                    end_span = Some(span);
5637                    amt_to_skip += token.0;
5638                    break;
5639                }
5640            }
5641
5642            let end_point = if let Some(span) = end_span {
5643                span.end
5644            } else {
5645                working_set.error(ParseError::Unclosed("|".into(), Span::new(end, end)));
5646                end
5647            };
5648
5649            let signature_span = Span::new(start_point, end_point);
5650            let signature = parse_signature_helper(working_set, signature_span, false);
5651
5652            (Some((signature, signature_span)), amt_to_skip)
5653        }
5654        Some(Token {
5655            contents: TokenContents::PipePipe,
5656            span,
5657        }) => (
5658            Some((Box::new(Signature::new("closure".to_string())), *span)),
5659            1,
5660        ),
5661        _ => (None, 0),
5662    };
5663
5664    // TODO: Finish this
5665    if let SyntaxShape::Closure(Some(v)) = shape
5666        && let Some((sig, sig_span)) = &signature
5667    {
5668        if sig.num_positionals() > v.len() {
5669            working_set.error(ParseError::ExpectedWithStringMsg(
5670                format!(
5671                    "{} closure parameter{}",
5672                    v.len(),
5673                    if v.len() > 1 { "s" } else { "" }
5674                ),
5675                *sig_span,
5676            ));
5677        }
5678
5679        for (expected, PositionalArg { name, shape, .. }) in
5680            v.iter().zip(sig.required_positional.iter())
5681        {
5682            if expected != shape && *shape != SyntaxShape::Any {
5683                working_set.error(ParseError::ParameterMismatchType(
5684                    name.to_owned(),
5685                    expected.to_string(),
5686                    shape.to_string(),
5687                    *sig_span,
5688                ));
5689            }
5690        }
5691    }
5692
5693    let mut output = parse_block(working_set, &output[amt_to_skip..], span, false, false);
5694
5695    // NOTE: closures need to be compiled eagerly due to these reasons:
5696    //  - their `Block`s (which contains their `IrBlock`) are stored in the working_set
5697    //  - Ir compiler does not have mutable access to the working_set and can't attach `IrBlock`s
5698    //  to existing `Block`s
5699    // so they can't be compiled as part of their parent `Block`'s compilation
5700    //
5701    // If the compiler used a mechanism similar to the `EngineState`/`StateWorkingSet` divide, we
5702    // could defer all compilation and apply the generated delta to `StateWorkingSet` afterwards.
5703    if working_set.parse_errors.is_empty() {
5704        compile_block(working_set, &mut output);
5705    }
5706
5707    if let Some(signature) = signature {
5708        output.signature = signature.0;
5709    }
5710
5711    output.span = Some(span);
5712
5713    if is_closed {
5714        working_set.exit_scope();
5715    }
5716
5717    let block_id = working_set.add_block(Arc::new(output));
5718
5719    Expression::new(working_set, Expr::Closure(block_id), span, Type::Closure)
5720}
5721
5722pub fn parse_value(
5723    working_set: &mut StateWorkingSet,
5724    span: Span,
5725    shape: &SyntaxShape,
5726) -> Expression {
5727    trace!("parsing: value: {shape}");
5728
5729    let bytes = working_set.get_span_contents(span);
5730
5731    if bytes.is_empty() {
5732        working_set.error(ParseError::IncompleteParser(span));
5733        return garbage(working_set, span);
5734    }
5735
5736    match bytes[0] {
5737        b'$' => return parse_dollar_expr(working_set, span),
5738        b'(' => return parse_paren_expr(working_set, span, shape),
5739        b'{' => return parse_brace_expr(working_set, span, shape),
5740        b'[' => match shape {
5741            SyntaxShape::Any
5742            | SyntaxShape::List(_)
5743            | SyntaxShape::Table(_)
5744            | SyntaxShape::Signature
5745            | SyntaxShape::ExternalSignature
5746            | SyntaxShape::Filepath
5747            | SyntaxShape::String
5748            | SyntaxShape::GlobPattern
5749            | SyntaxShape::ExternalArgument => {}
5750            SyntaxShape::OneOf(possible_shapes) => {
5751                if !possible_shapes
5752                    .iter()
5753                    .any(|s| matches!(s, SyntaxShape::List(_)))
5754                {
5755                    working_set.error(ParseError::ExpectedWithStringMsg(shape.to_string(), span));
5756                    return Expression::garbage(working_set, span);
5757                }
5758            }
5759            _ => {
5760                working_set.error(ParseError::ExpectedWithStringMsg(shape.to_string(), span));
5761                return Expression::garbage(working_set, span);
5762            }
5763        },
5764        b'r' if bytes.len() > 1 && bytes[1] == b'#' => {
5765            return parse_raw_string(working_set, span);
5766        }
5767        _ => {}
5768    }
5769
5770    match shape {
5771        SyntaxShape::Number => parse_number(working_set, span),
5772        SyntaxShape::Float => parse_float(working_set, span),
5773        SyntaxShape::Int => parse_int(working_set, span),
5774        SyntaxShape::Duration => parse_duration(working_set, span),
5775        SyntaxShape::DateTime => parse_datetime(working_set, span),
5776        SyntaxShape::Filesize => parse_filesize(working_set, span),
5777        SyntaxShape::Range => {
5778            parse_range(working_set, span).unwrap_or_else(|| garbage(working_set, span))
5779        }
5780        // Check for reserved keyword values
5781        SyntaxShape::Nothing | SyntaxShape::Any if bytes == b"null" => {
5782            Expression::new(working_set, Expr::Nothing, span, Type::Nothing)
5783        }
5784        SyntaxShape::Boolean | SyntaxShape::Any if bytes == b"true" => {
5785            Expression::new(working_set, Expr::Bool(true), span, Type::Bool)
5786        }
5787        SyntaxShape::Boolean | SyntaxShape::Any if bytes == b"false" => {
5788            Expression::new(working_set, Expr::Bool(false), span, Type::Bool)
5789        }
5790        SyntaxShape::Filepath
5791        | SyntaxShape::Directory
5792        | SyntaxShape::GlobPattern
5793        // TODO: this serves for backward compatibility.
5794        // As a consequence, for commands like `def foo [foo: string] {}`,
5795        // it forbids usage like `foo true`, have to call it explicitly with `foo "true"`.
5796        // On the other hand, given current `SyntaxShape` based `parse_value`, `foo 10.0` doesn't raise any error.
5797        // We want to fix this discrepancy in the future.
5798        | SyntaxShape::String
5799            if matches!(bytes, b"true" | b"false" | b"null") =>
5800        {
5801            working_set.error(ParseError::ExpectedWithStringMsg(shape.to_string(), span));
5802            garbage(working_set, span)
5803        }
5804        SyntaxShape::Filepath => parse_filepath(working_set, span),
5805        SyntaxShape::Directory => parse_directory(working_set, span),
5806        SyntaxShape::GlobPattern => parse_glob_pattern(working_set, span),
5807        SyntaxShape::String => parse_string(working_set, span),
5808        SyntaxShape::Binary => parse_binary(working_set, span),
5809        SyntaxShape::Signature if bytes.starts_with(b"[") => parse_signature(working_set, span, false),
5810        SyntaxShape::ExternalSignature if bytes.starts_with(b"[") => parse_signature(working_set, span, true),
5811        SyntaxShape::List(elem) if bytes.starts_with(b"[") => {
5812            parse_table_expression(working_set, span, elem)
5813        }
5814        SyntaxShape::Table(_) if bytes.starts_with(b"[") => {
5815            parse_table_expression(working_set, span, &SyntaxShape::Any)
5816        }
5817        SyntaxShape::CellPath => parse_simple_cell_path(working_set, span),
5818
5819        // Be sure to return ParseError::Expected(..) if invoked for one of these shapes, but lex
5820        // stream doesn't start with '{'} -- parsing in SyntaxShape::Any arm depends on this error variant.
5821        SyntaxShape::Block | SyntaxShape::Closure(..) | SyntaxShape::Record(_) => {
5822            working_set.error(ParseError::Expected("block, closure or record", span));
5823
5824            Expression::garbage(working_set, span)
5825        }
5826
5827        SyntaxShape::ExternalArgument => parse_regular_external_arg(working_set, span),
5828        SyntaxShape::OneOf(possible_shapes) => {
5829            parse_oneof(working_set, &[span], &mut 0, possible_shapes, false)
5830        }
5831
5832        SyntaxShape::Any => {
5833            if bytes.starts_with(b"[") {
5834                //parse_value(working_set, span, &SyntaxShape::Table)
5835                parse_full_cell_path(working_set, None, span)
5836            } else {
5837                let shapes = [
5838                    SyntaxShape::Binary,
5839                    SyntaxShape::Range,
5840                    SyntaxShape::Filesize,
5841                    SyntaxShape::Duration,
5842                    SyntaxShape::DateTime,
5843                    SyntaxShape::Int,
5844                    SyntaxShape::Number,
5845                    SyntaxShape::String,
5846                ];
5847                for shape in shapes.iter() {
5848                    let starting_error_count = working_set.parse_errors.len();
5849
5850                    let s = parse_value(working_set, span, shape);
5851
5852                    if starting_error_count == working_set.parse_errors.len() {
5853                        return s;
5854                    } else {
5855                        match working_set.parse_errors.get(starting_error_count) {
5856                            Some(
5857                                ParseError::Expected(_, _)
5858                                | ParseError::ExpectedWithStringMsg(_, _),
5859                            ) => {
5860                                working_set.parse_errors.truncate(starting_error_count);
5861                                continue;
5862                            }
5863                            _ => {
5864                                return s;
5865                            }
5866                        }
5867                    }
5868                }
5869                working_set.error(ParseError::Expected("any shape", span));
5870                garbage(working_set, span)
5871            }
5872        }
5873        _ => {
5874            working_set.error(ParseError::ExpectedWithStringMsg(shape.to_string(), span));
5875            garbage(working_set, span)
5876        }
5877    }
5878}
5879
5880pub fn parse_assignment_operator(working_set: &mut StateWorkingSet, span: Span) -> Expression {
5881    let contents = working_set.get_span_contents(span);
5882
5883    let operator = match contents {
5884        b"=" => Operator::Assignment(Assignment::Assign),
5885        b"+=" => Operator::Assignment(Assignment::AddAssign),
5886        b"-=" => Operator::Assignment(Assignment::SubtractAssign),
5887        b"*=" => Operator::Assignment(Assignment::MultiplyAssign),
5888        b"/=" => Operator::Assignment(Assignment::DivideAssign),
5889        b"++=" => Operator::Assignment(Assignment::ConcatenateAssign),
5890        _ => {
5891            working_set.error(ParseError::Expected("assignment operator", span));
5892            return garbage(working_set, span);
5893        }
5894    };
5895
5896    Expression::new(working_set, Expr::Operator(operator), span, Type::Any)
5897}
5898
5899pub fn parse_assignment_expression(
5900    working_set: &mut StateWorkingSet,
5901    spans: &[Span],
5902) -> Expression {
5903    trace!("parsing: assignment expression");
5904    let expr_span = Span::concat(spans);
5905
5906    // Assignment always has the most precedence, and its right-hand side can be a pipeline
5907    let Some(op_index) = spans
5908        .iter()
5909        .position(|span| is_assignment_operator(working_set.get_span_contents(*span)))
5910    else {
5911        working_set.error(ParseError::Expected("assignment expression", expr_span));
5912        return garbage(working_set, expr_span);
5913    };
5914
5915    let lhs_spans = &spans[0..op_index];
5916    let op_span = spans[op_index];
5917    let rhs_spans = &spans[(op_index + 1)..];
5918
5919    if lhs_spans.is_empty() {
5920        working_set.error(ParseError::Expected(
5921            "left hand side of assignment",
5922            op_span,
5923        ));
5924        return garbage(working_set, expr_span);
5925    }
5926
5927    if rhs_spans.is_empty() {
5928        working_set.error(ParseError::Expected(
5929            "right hand side of assignment",
5930            op_span,
5931        ));
5932        return garbage(working_set, expr_span);
5933    }
5934
5935    // Parse the lhs and operator as usual for a math expression
5936    let mut lhs = parse_expression(working_set, lhs_spans);
5937    // make sure that lhs is a mutable variable.
5938    match &lhs.expr {
5939        Expr::FullCellPath(p) => {
5940            if let Expr::Var(var_id) = p.head.expr
5941                && var_id != nu_protocol::ENV_VARIABLE_ID
5942                && !working_set.get_variable(var_id).mutable
5943            {
5944                working_set.error(ParseError::AssignmentRequiresMutableVar(lhs.span))
5945            }
5946        }
5947        _ => working_set.error(ParseError::AssignmentRequiresVar(lhs.span)),
5948    }
5949
5950    let mut operator = parse_assignment_operator(working_set, op_span);
5951
5952    // Re-parse the right-hand side as a subexpression
5953    let rhs_span = Span::concat(rhs_spans);
5954
5955    let (rhs_tokens, rhs_error) = lex(
5956        working_set.get_span_contents(rhs_span),
5957        rhs_span.start,
5958        &[],
5959        &[],
5960        false,
5961    );
5962    working_set.parse_errors.extend(rhs_error);
5963
5964    trace!("parsing: assignment right-hand side subexpression");
5965    let rhs_block = parse_block(working_set, &rhs_tokens, rhs_span, false, true);
5966    let rhs_ty = rhs_block.output_type();
5967
5968    // TEMP: double-check that if the RHS block starts with an external call, it must start with a
5969    // caret. This is to mitigate the change in assignment parsing introduced in 0.97.0 which could
5970    // result in unintentional execution of commands.
5971    if let Some(Expr::ExternalCall(head, ..)) = rhs_block
5972        .pipelines
5973        .first()
5974        .and_then(|pipeline| pipeline.elements.first())
5975        .map(|element| &element.expr.expr)
5976    {
5977        let contents = working_set.get_span_contents(Span {
5978            start: head.span.start - 1,
5979            end: head.span.end,
5980        });
5981        if !contents.starts_with(b"^") {
5982            working_set.parse_errors.push(ParseError::LabeledErrorWithHelp {
5983                error: "External command calls must be explicit in assignments".into(),
5984                label: "add a caret (^) before the command name if you intended to run and capture its output".into(),
5985                help: "the parsing of assignments was changed in 0.97.0, and this would have previously been treated as a string. Alternatively, quote the string with single or double quotes to avoid it being interpreted as a command name. This restriction may be removed in a future release.".into(),
5986                span: head.span,
5987            });
5988        }
5989    }
5990
5991    let rhs_block_id = working_set.add_block(Arc::new(rhs_block));
5992    let mut rhs = Expression::new(
5993        working_set,
5994        Expr::Subexpression(rhs_block_id),
5995        rhs_span,
5996        rhs_ty,
5997    );
5998
5999    let (result_ty, err) = math_result_type(working_set, &mut lhs, &mut operator, &mut rhs);
6000    if let Some(err) = err {
6001        working_set.parse_errors.push(err);
6002    }
6003
6004    Expression::new(
6005        working_set,
6006        Expr::BinaryOp(Box::new(lhs), Box::new(operator), Box::new(rhs)),
6007        expr_span,
6008        result_ty,
6009    )
6010}
6011
6012pub fn parse_operator(working_set: &mut StateWorkingSet, span: Span) -> Expression {
6013    let contents = working_set.get_span_contents(span);
6014
6015    let operator = match contents {
6016        b"==" => Operator::Comparison(Comparison::Equal),
6017        b"!=" => Operator::Comparison(Comparison::NotEqual),
6018        b"<" => Operator::Comparison(Comparison::LessThan),
6019        b"<=" => Operator::Comparison(Comparison::LessThanOrEqual),
6020        b">" => Operator::Comparison(Comparison::GreaterThan),
6021        b">=" => Operator::Comparison(Comparison::GreaterThanOrEqual),
6022        b"=~" | b"like" => Operator::Comparison(Comparison::RegexMatch),
6023        b"!~" | b"not-like" => Operator::Comparison(Comparison::NotRegexMatch),
6024        b"in" => Operator::Comparison(Comparison::In),
6025        b"not-in" => Operator::Comparison(Comparison::NotIn),
6026        b"has" => Operator::Comparison(Comparison::Has),
6027        b"not-has" => Operator::Comparison(Comparison::NotHas),
6028        b"starts-with" => Operator::Comparison(Comparison::StartsWith),
6029        b"not-starts-with" => Operator::Comparison(Comparison::NotStartsWith),
6030        b"ends-with" => Operator::Comparison(Comparison::EndsWith),
6031        b"not-ends-with" => Operator::Comparison(Comparison::NotEndsWith),
6032        b"+" => Operator::Math(Math::Add),
6033        b"-" => Operator::Math(Math::Subtract),
6034        b"*" => Operator::Math(Math::Multiply),
6035        b"/" => Operator::Math(Math::Divide),
6036        b"//" => Operator::Math(Math::FloorDivide),
6037        b"mod" => Operator::Math(Math::Modulo),
6038        b"**" => Operator::Math(Math::Pow),
6039        b"++" => Operator::Math(Math::Concatenate),
6040        b"bit-or" => Operator::Bits(Bits::BitOr),
6041        b"bit-xor" => Operator::Bits(Bits::BitXor),
6042        b"bit-and" => Operator::Bits(Bits::BitAnd),
6043        b"bit-shl" => Operator::Bits(Bits::ShiftLeft),
6044        b"bit-shr" => Operator::Bits(Bits::ShiftRight),
6045        b"or" => Operator::Boolean(Boolean::Or),
6046        b"xor" => Operator::Boolean(Boolean::Xor),
6047        b"and" => Operator::Boolean(Boolean::And),
6048        // WARNING: not actual operators below! Error handling only
6049        pow @ (b"^" | b"pow") => {
6050            working_set.error(ParseError::UnknownOperator(
6051                match pow {
6052                    b"^" => "^",
6053                    b"pow" => "pow",
6054                    _ => unreachable!(),
6055                },
6056                "Use '**' for exponentiation or 'bit-xor' for bitwise XOR.",
6057                span,
6058            ));
6059            return garbage(working_set, span);
6060        }
6061        equality @ (b"is" | b"===") => {
6062            working_set.error(ParseError::UnknownOperator(
6063                match equality {
6064                    b"is" => "is",
6065                    b"===" => "===",
6066                    _ => unreachable!(),
6067                },
6068                "Did you mean '=='?",
6069                span,
6070            ));
6071            return garbage(working_set, span);
6072        }
6073        b"contains" => {
6074            working_set.error(ParseError::UnknownOperator(
6075                "contains",
6076                "Did you mean 'has'?",
6077                span,
6078            ));
6079            return garbage(working_set, span);
6080        }
6081        b"%" => {
6082            working_set.error(ParseError::UnknownOperator(
6083                "%",
6084                "Did you mean 'mod'?",
6085                span,
6086            ));
6087            return garbage(working_set, span);
6088        }
6089        b"&" => {
6090            working_set.error(ParseError::UnknownOperator(
6091                "&",
6092                "Did you mean 'bit-and'?",
6093                span,
6094            ));
6095            return garbage(working_set, span);
6096        }
6097        b"<<" => {
6098            working_set.error(ParseError::UnknownOperator(
6099                "<<",
6100                "Did you mean 'bit-shl'?",
6101                span,
6102            ));
6103            return garbage(working_set, span);
6104        }
6105        b">>" => {
6106            working_set.error(ParseError::UnknownOperator(
6107                ">>",
6108                "Did you mean 'bit-shr'?",
6109                span,
6110            ));
6111            return garbage(working_set, span);
6112        }
6113        bits @ (b"bits-and" | b"bits-xor" | b"bits-or" | b"bits-shl" | b"bits-shr") => {
6114            working_set.error(ParseError::UnknownOperator(
6115                match bits {
6116                    b"bits-and" => "bits-and",
6117                    b"bits-xor" => "bits-xor",
6118                    b"bits-or" => "bits-or",
6119                    b"bits-shl" => "bits-shl",
6120                    b"bits-shr" => "bits-shr",
6121                    _ => unreachable!(),
6122                },
6123                match bits {
6124                    b"bits-and" => "Did you mean 'bit-and'?",
6125                    b"bits-xor" => "Did you mean 'bit-xor'?",
6126                    b"bits-or" => "Did you mean 'bit-or'?",
6127                    b"bits-shl" => "Did you mean 'bit-shl'?",
6128                    b"bits-shr" => "Did you mean 'bit-shr'?",
6129                    _ => unreachable!(),
6130                },
6131                span,
6132            ));
6133            return garbage(working_set, span);
6134        }
6135        op if is_assignment_operator(op) => {
6136            working_set.error(ParseError::Expected("a non-assignment operator", span));
6137            return garbage(working_set, span);
6138        }
6139        _ => {
6140            working_set.error(ParseError::Expected("operator", span));
6141            return garbage(working_set, span);
6142        }
6143    };
6144
6145    Expression::new(working_set, Expr::Operator(operator), span, Type::Any)
6146}
6147
6148pub fn parse_math_expression(
6149    working_set: &mut StateWorkingSet,
6150    spans: &[Span],
6151    lhs_row_var_id: Option<VarId>,
6152) -> Expression {
6153    trace!("parsing: math expression");
6154
6155    // As the expr_stack grows, we increase the required precedence to grow larger
6156    // If, at any time, the operator we're looking at is the same or lower precedence
6157    // of what is in the expression stack, we collapse the expression stack.
6158    //
6159    // This leads to an expression stack that grows under increasing precedence and collapses
6160    // under decreasing/sustained precedence
6161    //
6162    // The end result is a stack that we can fold into binary operations as right associations
6163    // safely.
6164
6165    let mut expr_stack: Vec<Expression> = vec![];
6166
6167    let mut idx = 0;
6168    let mut last_prec = u8::MAX;
6169
6170    let first_span = working_set.get_span_contents(spans[0]);
6171
6172    let mut not_start_spans = vec![];
6173
6174    if first_span == b"if" || first_span == b"match" {
6175        // If expression
6176        if spans.len() > 1 {
6177            return parse_call(working_set, spans, spans[0]);
6178        } else {
6179            working_set.error(ParseError::Expected(
6180                "expression",
6181                Span::new(spans[0].end, spans[0].end),
6182            ));
6183            return garbage(working_set, spans[0]);
6184        }
6185    } else if first_span == b"not" {
6186        not_start_spans.push(spans[idx].start);
6187        idx += 1;
6188        while idx < spans.len() {
6189            let next_value = working_set.get_span_contents(spans[idx]);
6190
6191            if next_value == b"not" {
6192                not_start_spans.push(spans[idx].start);
6193                idx += 1;
6194            } else {
6195                break;
6196            }
6197        }
6198
6199        if idx == spans.len() {
6200            working_set.error(ParseError::Expected(
6201                "expression",
6202                Span::new(spans[idx - 1].end, spans[idx - 1].end),
6203            ));
6204            return garbage(working_set, spans[idx - 1]);
6205        }
6206    }
6207
6208    let mut lhs = parse_value(working_set, spans[idx], &SyntaxShape::Any);
6209
6210    for not_start_span in not_start_spans.iter().rev() {
6211        lhs = Expression::new(
6212            working_set,
6213            Expr::UnaryNot(Box::new(lhs)),
6214            Span::new(*not_start_span, spans[idx].end),
6215            Type::Bool,
6216        );
6217    }
6218    not_start_spans.clear();
6219
6220    idx += 1;
6221
6222    if idx >= spans.len() {
6223        // We already found the one part of our expression, so let's expand
6224        if let Some(row_var_id) = lhs_row_var_id {
6225            expand_to_cell_path(working_set, &mut lhs, row_var_id);
6226        }
6227    }
6228
6229    expr_stack.push(lhs);
6230
6231    while idx < spans.len() {
6232        let op = parse_operator(working_set, spans[idx]);
6233
6234        let op_prec = op.precedence();
6235
6236        idx += 1;
6237
6238        if idx == spans.len() {
6239            // Handle broken math expr `1 +` etc
6240            working_set.error(ParseError::IncompleteMathExpression(spans[idx - 1]));
6241
6242            expr_stack.push(Expression::garbage(working_set, spans[idx - 1]));
6243            let missing_span = Span::new(spans[idx - 1].end, spans[idx - 1].end);
6244            expr_stack.push(Expression::garbage(working_set, missing_span));
6245
6246            break;
6247        }
6248
6249        let content = working_set.get_span_contents(spans[idx]);
6250        // allow `if` to be a special value for assignment.
6251
6252        if content == b"if" || content == b"match" {
6253            let rhs = parse_call(working_set, &spans[idx..], spans[0]);
6254            expr_stack.push(op);
6255            expr_stack.push(rhs);
6256            break;
6257        } else if content == b"not" {
6258            not_start_spans.push(spans[idx].start);
6259            idx += 1;
6260            while idx < spans.len() {
6261                let next_value = working_set.get_span_contents(spans[idx]);
6262
6263                if next_value == b"not" {
6264                    not_start_spans.push(spans[idx].start);
6265                    idx += 1;
6266                } else {
6267                    break;
6268                }
6269            }
6270
6271            if idx == spans.len() {
6272                working_set.error(ParseError::Expected(
6273                    "expression",
6274                    Span::new(spans[idx - 1].end, spans[idx - 1].end),
6275                ));
6276                return garbage(working_set, spans[idx - 1]);
6277            }
6278        }
6279        let mut rhs = parse_value(working_set, spans[idx], &SyntaxShape::Any);
6280
6281        for not_start_span in not_start_spans.iter().rev() {
6282            rhs = Expression::new(
6283                working_set,
6284                Expr::UnaryNot(Box::new(rhs)),
6285                Span::new(*not_start_span, spans[idx].end),
6286                Type::Bool,
6287            );
6288        }
6289        not_start_spans.clear();
6290
6291        // Parsing power must be right-associative unlike most operations which are left
6292        // Hence, we should not collapse if the last and current operations are both power
6293        let is_left_associative =
6294            op.expr != Expr::Operator(Operator::Math(Math::Pow)) && op_prec <= last_prec;
6295
6296        while is_left_associative && expr_stack.len() > 1 {
6297            // Collapse the right associated operations first
6298            // so that we can get back to a stack with a lower precedence
6299            let mut rhs = expr_stack
6300                .pop()
6301                .expect("internal error: expression stack empty");
6302            let mut op = expr_stack
6303                .pop()
6304                .expect("internal error: expression stack empty");
6305
6306            last_prec = op.precedence();
6307
6308            if last_prec < op_prec {
6309                expr_stack.push(op);
6310                expr_stack.push(rhs);
6311                break;
6312            }
6313
6314            let mut lhs = expr_stack
6315                .pop()
6316                .expect("internal error: expression stack empty");
6317
6318            if let Some(row_var_id) = lhs_row_var_id {
6319                expand_to_cell_path(working_set, &mut lhs, row_var_id);
6320            }
6321
6322            let (result_ty, err) = math_result_type(working_set, &mut lhs, &mut op, &mut rhs);
6323            if let Some(err) = err {
6324                working_set.error(err);
6325            }
6326
6327            let op_span = Span::append(lhs.span, rhs.span);
6328            expr_stack.push(Expression::new(
6329                working_set,
6330                Expr::BinaryOp(Box::new(lhs), Box::new(op), Box::new(rhs)),
6331                op_span,
6332                result_ty,
6333            ));
6334        }
6335        expr_stack.push(op);
6336        expr_stack.push(rhs);
6337
6338        last_prec = op_prec;
6339
6340        idx += 1;
6341    }
6342
6343    while expr_stack.len() != 1 {
6344        let mut rhs = expr_stack
6345            .pop()
6346            .expect("internal error: expression stack empty");
6347        let mut op = expr_stack
6348            .pop()
6349            .expect("internal error: expression stack empty");
6350        let mut lhs = expr_stack
6351            .pop()
6352            .expect("internal error: expression stack empty");
6353
6354        if let Some(row_var_id) = lhs_row_var_id {
6355            expand_to_cell_path(working_set, &mut lhs, row_var_id);
6356        }
6357
6358        let (result_ty, err) = math_result_type(working_set, &mut lhs, &mut op, &mut rhs);
6359        if let Some(err) = err {
6360            working_set.error(err)
6361        }
6362
6363        let binary_op_span = Span::append(lhs.span, rhs.span);
6364        expr_stack.push(Expression::new(
6365            working_set,
6366            Expr::BinaryOp(Box::new(lhs), Box::new(op), Box::new(rhs)),
6367            binary_op_span,
6368            result_ty,
6369        ));
6370    }
6371
6372    expr_stack
6373        .pop()
6374        .expect("internal error: expression stack empty")
6375}
6376
6377pub fn parse_expression(working_set: &mut StateWorkingSet, spans: &[Span]) -> Expression {
6378    trace!("parsing: expression");
6379
6380    let mut pos = 0;
6381    let mut shorthand = vec![];
6382
6383    while pos < spans.len() {
6384        // Check if there is any environment shorthand
6385        let name = working_set.get_span_contents(spans[pos]);
6386
6387        let split: Vec<_> = name.splitn(2, |x| *x == b'=').collect();
6388        if split.len() != 2 || !is_env_variable_name(split[0]) {
6389            break;
6390        }
6391
6392        let point = split[0].len() + 1;
6393        let starting_error_count = working_set.parse_errors.len();
6394
6395        let rhs = if spans[pos].start + point < spans[pos].end {
6396            let rhs_span = Span::new(spans[pos].start + point, spans[pos].end);
6397            if split[1].starts_with(b"$") {
6398                parse_dollar_expr(working_set, rhs_span)
6399            } else {
6400                parse_string_strict(working_set, rhs_span)
6401            }
6402        } else {
6403            Expression::new(
6404                working_set,
6405                Expr::String(String::new()),
6406                Span::unknown(),
6407                Type::Nothing,
6408            )
6409        };
6410
6411        let lhs_span = Span::new(spans[pos].start, spans[pos].start + point - 1);
6412        let lhs = parse_string_strict(working_set, lhs_span);
6413
6414        if starting_error_count == working_set.parse_errors.len() {
6415            shorthand.push((lhs, rhs));
6416            pos += 1;
6417        } else {
6418            working_set.parse_errors.truncate(starting_error_count);
6419            break;
6420        }
6421    }
6422
6423    if pos == spans.len() {
6424        working_set.error(ParseError::UnknownCommand(spans[0]));
6425        return garbage(working_set, Span::concat(spans));
6426    }
6427
6428    let output = if spans[pos..]
6429        .iter()
6430        .any(|span| is_assignment_operator(working_set.get_span_contents(*span)))
6431    {
6432        parse_assignment_expression(working_set, &spans[pos..])
6433    } else if is_math_expression_like(working_set, spans[pos]) {
6434        parse_math_expression(working_set, &spans[pos..], None)
6435    } else {
6436        let bytes = working_set.get_span_contents(spans[pos]).to_vec();
6437
6438        // For now, check for special parses of certain keywords
6439        match bytes.as_slice() {
6440            b"def" | b"extern" | b"for" | b"module" | b"use" | b"source" | b"alias" | b"export"
6441            | b"export-env" | b"hide" => {
6442                working_set.error(ParseError::BuiltinCommandInPipeline(
6443                    String::from_utf8(bytes)
6444                        .expect("builtin commands bytes should be able to convert to string"),
6445                    spans[0],
6446                ));
6447
6448                parse_call(working_set, &spans[pos..], spans[0])
6449            }
6450            b"const" | b"mut" => {
6451                working_set.error(ParseError::AssignInPipeline(
6452                    String::from_utf8(bytes)
6453                        .expect("builtin commands bytes should be able to convert to string"),
6454                    String::from_utf8_lossy(match spans.len() {
6455                        1..=3 => b"value",
6456                        _ => working_set.get_span_contents(spans[3]),
6457                    })
6458                    .to_string(),
6459                    String::from_utf8_lossy(match spans.len() {
6460                        1 => b"variable",
6461                        _ => working_set.get_span_contents(spans[1]),
6462                    })
6463                    .to_string(),
6464                    spans[0],
6465                ));
6466                parse_call(working_set, &spans[pos..], spans[0])
6467            }
6468            b"overlay" => {
6469                if spans.len() > 1 && working_set.get_span_contents(spans[1]) == b"list" {
6470                    // whitelist 'overlay list'
6471                    parse_call(working_set, &spans[pos..], spans[0])
6472                } else {
6473                    working_set.error(ParseError::BuiltinCommandInPipeline(
6474                        "overlay".into(),
6475                        spans[0],
6476                    ));
6477
6478                    parse_call(working_set, &spans[pos..], spans[0])
6479                }
6480            }
6481            b"where" => parse_where_expr(working_set, &spans[pos..]),
6482            #[cfg(feature = "plugin")]
6483            b"plugin" => {
6484                if spans.len() > 1 && working_set.get_span_contents(spans[1]) == b"use" {
6485                    // only 'plugin use' is banned
6486                    working_set.error(ParseError::BuiltinCommandInPipeline(
6487                        "plugin use".into(),
6488                        spans[0],
6489                    ));
6490                }
6491
6492                parse_call(working_set, &spans[pos..], spans[0])
6493            }
6494
6495            _ => parse_call(working_set, &spans[pos..], spans[0]),
6496        }
6497    };
6498
6499    if !shorthand.is_empty() {
6500        let with_env = working_set.find_decl(b"with-env");
6501        if let Some(decl_id) = with_env {
6502            let mut block = Block::default();
6503            let ty = output.ty.clone();
6504            block.pipelines = vec![Pipeline::from_vec(vec![output])];
6505            block.span = Some(Span::concat(spans));
6506
6507            compile_block(working_set, &mut block);
6508
6509            let block_id = working_set.add_block(Arc::new(block));
6510
6511            let mut env_vars = vec![];
6512            for sh in shorthand {
6513                env_vars.push(RecordItem::Pair(sh.0, sh.1));
6514            }
6515
6516            let arguments = vec![
6517                Argument::Positional(Expression::new(
6518                    working_set,
6519                    Expr::Record(env_vars),
6520                    Span::concat(&spans[..pos]),
6521                    Type::Any,
6522                )),
6523                Argument::Positional(Expression::new(
6524                    working_set,
6525                    Expr::Closure(block_id),
6526                    Span::concat(&spans[pos..]),
6527                    Type::Closure,
6528                )),
6529            ];
6530
6531            let expr = Expr::Call(Box::new(Call {
6532                head: Span::unknown(),
6533                decl_id,
6534                arguments,
6535                parser_info: HashMap::new(),
6536            }));
6537
6538            Expression::new(working_set, expr, Span::concat(spans), ty)
6539        } else {
6540            output
6541        }
6542    } else {
6543        output
6544    }
6545}
6546
6547pub fn parse_builtin_commands(
6548    working_set: &mut StateWorkingSet,
6549    lite_command: &LiteCommand,
6550) -> Pipeline {
6551    trace!("parsing: builtin commands");
6552    if !is_math_expression_like(working_set, lite_command.parts[0])
6553        && !is_unaliasable_parser_keyword(working_set, &lite_command.parts)
6554    {
6555        trace!("parsing: not math expression or unaliasable parser keyword");
6556        let name = working_set.get_span_contents(lite_command.parts[0]);
6557        if let Some(decl_id) = working_set.find_decl(name) {
6558            let cmd = working_set.get_decl(decl_id);
6559            if cmd.is_alias() {
6560                // Parse keywords that can be aliased. Note that we check for "unaliasable" keywords
6561                // because alias can have any name, therefore, we can't check for "aliasable" keywords.
6562                let call_expr = parse_call(working_set, &lite_command.parts, lite_command.parts[0]);
6563
6564                if let Expression {
6565                    expr: Expr::Call(call),
6566                    ..
6567                } = call_expr
6568                    && !call
6569                        .parser_info
6570                        .contains_key(PERCENT_FORCED_BUILTIN_PARSER_INFO)
6571                {
6572                    // Apply parse keyword side effects
6573                    let cmd = working_set.get_decl(call.decl_id);
6574                    match cmd.name() {
6575                        "overlay hide" => return parse_overlay_hide(working_set, call),
6576                        "overlay new" => return parse_overlay_new(working_set, call),
6577                        "overlay use" => return parse_overlay_use(working_set, call),
6578                        _ => { /* this alias is not a parser keyword */ }
6579                    }
6580                }
6581            }
6582        }
6583    }
6584
6585    trace!("parsing: checking for keywords");
6586    let name = lite_command
6587        .command_parts()
6588        .first()
6589        .map(|s| working_set.get_span_contents(*s))
6590        .unwrap_or(b"");
6591
6592    match name {
6593        // `parse_def` and `parse_extern` work both with and without attributes
6594        b"def" => parse_def(working_set, lite_command, None).0,
6595        b"extern" => parse_extern(working_set, lite_command, None),
6596        // `parse_export_in_block` also handles attributes by itself
6597        b"export" => parse_export_in_block(working_set, lite_command),
6598        b"export-env" => parse_export_env(working_set, &lite_command.parts).0,
6599        // Other definitions can't have attributes, so we handle attributes here with parse_attribute_block
6600        _ if lite_command.has_attributes() => parse_attribute_block(working_set, lite_command),
6601        b"let" => parse_let(
6602            working_set,
6603            &lite_command
6604                .parts_including_redirection()
6605                .collect::<Vec<Span>>(),
6606        ),
6607        b"const" => parse_const(working_set, &lite_command.parts).0,
6608        b"mut" => parse_mut(
6609            working_set,
6610            &lite_command
6611                .parts_including_redirection()
6612                .collect::<Vec<Span>>(),
6613        ),
6614        b"for" => {
6615            let expr = parse_for(working_set, lite_command);
6616            Pipeline::from_vec(vec![expr])
6617        }
6618        b"alias" => parse_alias(working_set, lite_command, None),
6619        b"module" => parse_module(working_set, lite_command, None).0,
6620        b"use" => parse_use(working_set, lite_command, None).0,
6621        b"overlay" => {
6622            if let Some(redirection) = lite_command.redirection.as_ref() {
6623                working_set.error(redirecting_builtin_error("overlay", redirection));
6624                return garbage_pipeline(working_set, &lite_command.parts);
6625            }
6626            parse_keyword(working_set, lite_command)
6627        }
6628        b"source" | b"source-env" => parse_source(working_set, lite_command),
6629        b"hide" => parse_hide(working_set, lite_command),
6630        b"where" => parse_where(working_set, lite_command),
6631        // Only "plugin use" is a keyword
6632        #[cfg(feature = "plugin")]
6633        b"plugin"
6634            if lite_command
6635                .parts
6636                .get(1)
6637                .is_some_and(|span| working_set.get_span_contents(*span) == b"use") =>
6638        {
6639            if let Some(redirection) = lite_command.redirection.as_ref() {
6640                working_set.error(redirecting_builtin_error("plugin use", redirection));
6641                return garbage_pipeline(working_set, &lite_command.parts);
6642            }
6643            parse_keyword(working_set, lite_command)
6644        }
6645        _ => {
6646            let element = parse_pipeline_element(working_set, lite_command);
6647
6648            // There is still a chance to make `parse_pipeline_element` parse into
6649            // some keyword that should apply side effects first, Example:
6650            //
6651            // module a { export alias b = overlay use first.nu };
6652            // use a
6653            // a b
6654            //
6655            // In this case, `a b` will be parsed as a pipeline element, which leads
6656            // to the `overlay use` command.
6657            // In this case, we need to ensure that the side effects of these keywords
6658            // are applied.
6659            if let Expression {
6660                expr: Expr::Call(call),
6661                ..
6662            } = &element.expr
6663            {
6664                // Dynamic percent dispatch stores a placeholder call plus parser
6665                // metadata for later IR rewrite. Skip parser-keyword side-effects lookup here,
6666                // because there is no declaration to resolve yet.
6667                if call
6668                    .parser_info
6669                    .contains_key(PERCENT_FORCED_BUILTIN_PARSER_INFO)
6670                {
6671                    return Pipeline {
6672                        elements: vec![element],
6673                    };
6674                }
6675
6676                // Apply parse keyword side effects
6677                let cmd = working_set.get_decl(call.decl_id);
6678                match cmd.name() {
6679                    "overlay hide" => return parse_overlay_hide(working_set, call.clone()),
6680                    "overlay new" => return parse_overlay_new(working_set, call.clone()),
6681                    "overlay use" => return parse_overlay_use(working_set, call.clone()),
6682                    _ => { /* this alias is not a parser keyword */ }
6683                }
6684            }
6685            Pipeline {
6686                elements: vec![element],
6687            }
6688        }
6689    }
6690}
6691
6692fn check_record_key_or_value(
6693    working_set: &StateWorkingSet,
6694    expr: &Expression,
6695    position: &str,
6696) -> Option<ParseError> {
6697    let bareword_error = |string_value: &Expression| {
6698        working_set
6699            .get_span_contents(string_value.span)
6700            .iter()
6701            .find_position(|b| **b == b':')
6702            .map(|(i, _)| {
6703                let colon_position = i + string_value.span.start;
6704                ParseError::InvalidLiteral(
6705                    "colon".to_string(),
6706                    format!("bare word specifying record {position}"),
6707                    Span::new(colon_position, colon_position + 1),
6708                )
6709            })
6710    };
6711    let value_span = working_set.get_span_contents(expr.span);
6712    match expr.expr {
6713        Expr::String(_) => {
6714            if ![b'"', b'\'', b'`'].contains(&value_span[0]) {
6715                bareword_error(expr)
6716            } else {
6717                None
6718            }
6719        }
6720        Expr::StringInterpolation(ref expressions) => {
6721            if value_span[0] != b'$' {
6722                expressions
6723                    .iter()
6724                    .filter(|expr| matches!(expr.expr, Expr::String(_)))
6725                    .filter_map(bareword_error)
6726                    .next()
6727            } else {
6728                None
6729            }
6730        }
6731        _ => None,
6732    }
6733}
6734
6735pub fn parse_record(working_set: &mut StateWorkingSet, span: Span) -> Expression {
6736    let bytes = working_set.get_span_contents(span);
6737
6738    let mut start = span.start;
6739    let mut end = span.end;
6740
6741    if bytes.starts_with(b"{") {
6742        start += 1;
6743    } else {
6744        working_set.error(ParseError::Expected("{", Span::new(start, start + 1)));
6745        return garbage(working_set, span);
6746    }
6747
6748    let mut unclosed = false;
6749    let mut extra_tokens = false;
6750    if bytes.ends_with(b"}") {
6751        end -= 1;
6752    } else {
6753        unclosed = true;
6754    }
6755
6756    let inner_span = Span::new(start, end);
6757
6758    let mut lex_state = LexState {
6759        input: working_set.get_span_contents(inner_span),
6760        output: Vec::new(),
6761        error: None,
6762        span_offset: start,
6763    };
6764    while !lex_state.input.is_empty() {
6765        if let Some(ParseError::Unbalanced(left, right, _)) = lex_state.error.as_ref()
6766            && left == "{"
6767            && right == "}"
6768        {
6769            extra_tokens = true;
6770            unclosed = false;
6771            break;
6772        }
6773        let additional_whitespace = &[b'\n', b'\r', b','];
6774        if lex_n_tokens(&mut lex_state, additional_whitespace, &[b':'], true, 1) < 1 {
6775            break;
6776        };
6777        let span = lex_state
6778            .output
6779            .last()
6780            .expect("should have gotten 1 token")
6781            .span;
6782        let contents = working_set.get_span_contents(span);
6783        if contents.len() > 3
6784            && contents.starts_with(b"...")
6785            && (contents[3] == b'$' || contents[3] == b'{' || contents[3] == b'(')
6786        {
6787            // This was a spread operator, so there's no value
6788            continue;
6789        }
6790        // Get token for colon
6791        if lex_n_tokens(&mut lex_state, additional_whitespace, &[b':'], true, 1) < 1 {
6792            break;
6793        };
6794        // Get token for value
6795        if lex_n_tokens(&mut lex_state, additional_whitespace, &[], true, 1) < 1 {
6796            break;
6797        };
6798    }
6799    let (tokens, err) = (lex_state.output, lex_state.error);
6800
6801    if unclosed {
6802        working_set.error(ParseError::Unclosed("}".into(), Span::new(end, end)));
6803    } else if extra_tokens {
6804        working_set.error(ParseError::ExtraTokensAfterClosingDelimiter(Span::new(
6805            lex_state.span_offset,
6806            end,
6807        )));
6808    }
6809
6810    if let Some(err) = err {
6811        working_set.error(err);
6812    }
6813
6814    let mut output = vec![];
6815    let mut idx = 0;
6816
6817    let mut field_types = Some(vec![]);
6818    while idx < tokens.len() {
6819        let curr_span = tokens[idx].span;
6820        let curr_tok = working_set.get_span_contents(curr_span);
6821        if curr_tok.starts_with(b"...")
6822            && curr_tok.len() > 3
6823            && (curr_tok[3] == b'$' || curr_tok[3] == b'{' || curr_tok[3] == b'(')
6824        {
6825            // Parse spread operator
6826            let inner = parse_value(
6827                working_set,
6828                Span::new(curr_span.start + 3, curr_span.end),
6829                &SyntaxShape::Record(vec![]),
6830            );
6831            idx += 1;
6832
6833            match &inner.ty {
6834                Type::Record(inner_fields) => {
6835                    if let Some(fields) = &mut field_types {
6836                        for (field, ty) in inner_fields.as_ref() {
6837                            fields.push((field.clone(), ty.clone()));
6838                        }
6839                    }
6840                }
6841                _ => {
6842                    // We can't properly see all the field types
6843                    // so fall back to the Any type later
6844                    field_types = None;
6845                }
6846            }
6847            output.push(RecordItem::Spread(
6848                Span::new(curr_span.start, curr_span.start + 3),
6849                inner,
6850            ));
6851        } else {
6852            // Normal key-value pair
6853            let field_token = &tokens[idx];
6854            let field = if field_token.contents != TokenContents::Item {
6855                working_set.error(ParseError::Expected(
6856                    "item in record key position",
6857                    Span::new(field_token.span.start, field_token.span.end),
6858                ));
6859                garbage(working_set, curr_span)
6860            } else {
6861                let field = parse_value(working_set, curr_span, &SyntaxShape::String);
6862                if let Some(error) = check_record_key_or_value(working_set, &field, "key") {
6863                    working_set.error(error);
6864                    garbage(working_set, field.span)
6865                } else {
6866                    field
6867                }
6868            };
6869
6870            idx += 1;
6871            if idx == tokens.len() {
6872                working_set.error(ParseError::Expected(
6873                    "':'",
6874                    Span::new(curr_span.end, curr_span.end),
6875                ));
6876                output.push(RecordItem::Pair(
6877                    garbage(working_set, curr_span),
6878                    garbage(working_set, Span::new(curr_span.end, curr_span.end)),
6879                ));
6880                break;
6881            }
6882            let colon_span = tokens[idx].span;
6883            let colon = working_set.get_span_contents(colon_span);
6884            idx += 1;
6885            if colon != b":" {
6886                working_set.error(ParseError::Expected(
6887                    "':'",
6888                    Span::new(colon_span.start, colon_span.start),
6889                ));
6890                output.push(RecordItem::Pair(
6891                    field,
6892                    garbage(
6893                        working_set,
6894                        Span::new(colon_span.start, tokens[tokens.len() - 1].span.end),
6895                    ),
6896                ));
6897                break;
6898            }
6899            if idx == tokens.len() {
6900                working_set.error(ParseError::Expected(
6901                    "value for record field",
6902                    Span::new(colon_span.end, colon_span.end),
6903                ));
6904                output.push(RecordItem::Pair(
6905                    garbage(working_set, Span::new(curr_span.start, colon_span.end)),
6906                    garbage(
6907                        working_set,
6908                        Span::new(colon_span.end, tokens[tokens.len() - 1].span.end),
6909                    ),
6910                ));
6911                break;
6912            }
6913
6914            let value_token = &tokens[idx];
6915            let value = if value_token.contents != TokenContents::Item {
6916                working_set.error(ParseError::Expected(
6917                    "item in record value position",
6918                    Span::new(value_token.span.start, value_token.span.end),
6919                ));
6920                garbage(
6921                    working_set,
6922                    Span::new(value_token.span.start, value_token.span.end),
6923                )
6924            } else {
6925                let value = parse_value(working_set, tokens[idx].span, &SyntaxShape::Any);
6926                if let Some(parse_error) = check_record_key_or_value(working_set, &value, "value") {
6927                    working_set.error(parse_error);
6928                    garbage(working_set, value.span)
6929                } else {
6930                    value
6931                }
6932            };
6933            idx += 1;
6934
6935            if let Some(field) = field.as_string() {
6936                if let Some(fields) = &mut field_types {
6937                    fields.push((field, value.ty.clone()));
6938                }
6939            } else {
6940                // We can't properly see all the field types
6941                // so fall back to the Any type later
6942                field_types = None;
6943            }
6944            output.push(RecordItem::Pair(field, value));
6945        }
6946    }
6947
6948    Expression::new(
6949        working_set,
6950        Expr::Record(output),
6951        span,
6952        if let Some(fields) = field_types {
6953            Type::Record(fields.into())
6954        } else {
6955            Type::Any
6956        },
6957    )
6958}
6959
6960fn parse_redirection_target(
6961    working_set: &mut StateWorkingSet,
6962    target: &LiteRedirectionTarget,
6963) -> RedirectionTarget {
6964    match target {
6965        LiteRedirectionTarget::File {
6966            connector,
6967            file,
6968            append,
6969        } => RedirectionTarget::File {
6970            expr: parse_value(working_set, *file, &SyntaxShape::Any),
6971            append: *append,
6972            span: *connector,
6973        },
6974        LiteRedirectionTarget::Pipe { connector } => RedirectionTarget::Pipe { span: *connector },
6975    }
6976}
6977
6978pub(crate) fn parse_redirection(
6979    working_set: &mut StateWorkingSet,
6980    target: &LiteRedirection,
6981) -> PipelineRedirection {
6982    match target {
6983        LiteRedirection::Single { source, target } => PipelineRedirection::Single {
6984            source: *source,
6985            target: parse_redirection_target(working_set, target),
6986        },
6987        LiteRedirection::Separate { out, err } => PipelineRedirection::Separate {
6988            out: parse_redirection_target(working_set, out),
6989            err: parse_redirection_target(working_set, err),
6990        },
6991    }
6992}
6993
6994fn parse_pipeline_element(
6995    working_set: &mut StateWorkingSet,
6996    command: &LiteCommand,
6997) -> PipelineElement {
6998    trace!("parsing: pipeline element");
6999
7000    let expr = parse_expression(working_set, &command.parts);
7001
7002    let redirection = command
7003        .redirection
7004        .as_ref()
7005        .map(|r| parse_redirection(working_set, r));
7006
7007    PipelineElement {
7008        pipe: command.pipe,
7009        expr,
7010        redirection,
7011    }
7012}
7013
7014pub(crate) fn redirecting_builtin_error(
7015    name: &'static str,
7016    redirection: &LiteRedirection,
7017) -> ParseError {
7018    match redirection {
7019        LiteRedirection::Single { target, .. } => {
7020            ParseError::RedirectingBuiltinCommand(name, target.connector(), None)
7021        }
7022        LiteRedirection::Separate { out, err } => ParseError::RedirectingBuiltinCommand(
7023            name,
7024            out.connector().min(err.connector()),
7025            Some(out.connector().max(err.connector())),
7026        ),
7027    }
7028}
7029
7030pub fn parse_pipeline(working_set: &mut StateWorkingSet, pipeline: &LitePipeline) -> Pipeline {
7031    if pipeline.commands.len() > 1 {
7032        // Parse a normal multi command pipeline
7033        let elements: Vec<_> = pipeline
7034            .commands
7035            .iter()
7036            .enumerate()
7037            .map(|(index, element)| {
7038                let element = parse_pipeline_element(working_set, element);
7039                // Handle $in for pipeline elements beyond the first one
7040                if index > 0 && element.has_in_variable(working_set) {
7041                    wrap_element_with_collect(working_set, element.clone())
7042                } else {
7043                    element
7044                }
7045            })
7046            .collect();
7047
7048        Pipeline { elements }
7049    } else {
7050        // If there's only one command in the pipeline, this could be a builtin command
7051        parse_builtin_commands(working_set, &pipeline.commands[0])
7052    }
7053}
7054
7055pub fn parse_block(
7056    working_set: &mut StateWorkingSet,
7057    tokens: &[Token],
7058    span: Span,
7059    scoped: bool,
7060    is_subexpression: bool,
7061) -> Block {
7062    let (lite_block, err) = lite_parse(tokens, working_set);
7063    if let Some(err) = err {
7064        working_set.error(err);
7065    }
7066
7067    trace!("parsing block: {lite_block:?}");
7068
7069    if scoped {
7070        working_set.enter_scope();
7071    }
7072
7073    // Pre-declare any definition so that definitions
7074    // that share the same block can see each other
7075    for pipeline in &lite_block.block {
7076        if pipeline.commands.len() == 1 {
7077            parse_def_predecl(working_set, pipeline.commands[0].command_parts())
7078        }
7079    }
7080
7081    let mut block = Block::new_with_capacity(lite_block.block.len());
7082    block.span = Some(span);
7083
7084    for lite_pipeline in &lite_block.block {
7085        let pipeline = parse_pipeline(working_set, lite_pipeline);
7086        block.pipelines.push(pipeline);
7087    }
7088
7089    // If this is not a subexpression and there are any pipelines where the first element has $in,
7090    // we can wrap the whole block in collect so that they all reference the same $in
7091    if !is_subexpression
7092        && block
7093            .pipelines
7094            .iter()
7095            .flat_map(|pipeline| pipeline.elements.first())
7096            .any(|element| element.has_in_variable(working_set))
7097    {
7098        // Move the block out to prepare it to become a subexpression
7099        let inner_block = std::mem::take(&mut block);
7100        block.span = inner_block.span;
7101        let ty = inner_block.output_type();
7102        let block_id = working_set.add_block(Arc::new(inner_block));
7103
7104        // Now wrap it in a Collect expression, and put it in the block as the only pipeline
7105        let subexpression = Expression::new(working_set, Expr::Subexpression(block_id), span, ty);
7106        let collect = wrap_expr_with_collect(working_set, subexpression);
7107
7108        block.pipelines.push(Pipeline {
7109            elements: vec![PipelineElement {
7110                pipe: None,
7111                expr: collect,
7112                redirection: None,
7113            }],
7114        });
7115    }
7116
7117    if scoped {
7118        working_set.exit_scope();
7119    }
7120
7121    let errors = type_check::check_block_input_output(working_set, &block);
7122    if !errors.is_empty() {
7123        working_set.parse_errors.extend_from_slice(&errors);
7124    }
7125
7126    block
7127}
7128
7129/// Compile an [IrBlock][nu_protocol::ir::IrBlock] for the [Block], adding a compile error on
7130/// failure.
7131///
7132/// To compile a block that's already in the [StateWorkingSet] use [compile_block_with_id]
7133pub fn compile_block(working_set: &mut StateWorkingSet<'_>, block: &mut Block) {
7134    if !working_set.parse_errors.is_empty() {
7135        // This means there might be a bug in the parser, since calling this function while parse
7136        // errors are present is a logic error. However, it's not fatal and it's best to continue
7137        // without doing anything.
7138        log::error!("compile_block called with parse errors");
7139        return;
7140    }
7141
7142    match nu_engine::compile(working_set, block) {
7143        Ok(ir_block) => {
7144            block.ir_block = Some(ir_block);
7145        }
7146        Err(err) => working_set.compile_errors.push(err),
7147    }
7148}
7149
7150/// Compile an [IrBlock][nu_protocol::ir::IrBlock] for a [Block] that's already in the
7151/// [StateWorkingSet] using its id, adding a compile error on failure.
7152pub fn compile_block_with_id(working_set: &mut StateWorkingSet<'_>, block_id: BlockId) {
7153    if !working_set.parse_errors.is_empty() {
7154        // This means there might be a bug in the parser, since calling this function while parse
7155        // errors are present is a logic error. However, it's not fatal and it's best to continue
7156        // without doing anything.
7157        log::error!("compile_block_with_id called with parse errors");
7158        return;
7159    }
7160
7161    match nu_engine::compile(working_set, working_set.get_block(block_id)) {
7162        Ok(ir_block) => {
7163            working_set.get_block_mut(block_id).ir_block = Some(ir_block);
7164        }
7165        Err(err) => {
7166            working_set.compile_errors.push(err);
7167        }
7168    };
7169}
7170
7171pub fn discover_captures_in_closure(
7172    working_set: &StateWorkingSet,
7173    block: &Block,
7174    seen: &mut Vec<VarId>,
7175    seen_blocks: &mut HashMap<BlockId, Vec<(VarId, Span)>>,
7176    output: &mut Vec<(VarId, Span)>,
7177) -> Result<(), ParseError> {
7178    for flag in &block.signature.named {
7179        if let Some(var_id) = flag.var_id {
7180            seen.push(var_id);
7181        }
7182    }
7183
7184    for positional in &block.signature.required_positional {
7185        if let Some(var_id) = positional.var_id {
7186            seen.push(var_id);
7187        }
7188    }
7189    for positional in &block.signature.optional_positional {
7190        if let Some(var_id) = positional.var_id {
7191            seen.push(var_id);
7192        }
7193    }
7194    if let Some(positional) = &block.signature.rest_positional
7195        && let Some(var_id) = positional.var_id
7196    {
7197        seen.push(var_id);
7198    }
7199
7200    for pipeline in &block.pipelines {
7201        discover_captures_in_pipeline(working_set, pipeline, seen, seen_blocks, output)?;
7202    }
7203
7204    Ok(())
7205}
7206
7207fn discover_captures_in_pipeline(
7208    working_set: &StateWorkingSet,
7209    pipeline: &Pipeline,
7210    seen: &mut Vec<VarId>,
7211    seen_blocks: &mut HashMap<BlockId, Vec<(VarId, Span)>>,
7212    output: &mut Vec<(VarId, Span)>,
7213) -> Result<(), ParseError> {
7214    for element in &pipeline.elements {
7215        discover_captures_in_pipeline_element(working_set, element, seen, seen_blocks, output)?;
7216    }
7217
7218    Ok(())
7219}
7220
7221// Closes over captured variables
7222pub fn discover_captures_in_pipeline_element(
7223    working_set: &StateWorkingSet,
7224    element: &PipelineElement,
7225    seen: &mut Vec<VarId>,
7226    seen_blocks: &mut HashMap<BlockId, Vec<(VarId, Span)>>,
7227    output: &mut Vec<(VarId, Span)>,
7228) -> Result<(), ParseError> {
7229    discover_captures_in_expr(working_set, &element.expr, seen, seen_blocks, output)?;
7230
7231    if let Some(redirection) = element.redirection.as_ref() {
7232        match redirection {
7233            PipelineRedirection::Single { target, .. } => {
7234                if let Some(expr) = target.expr() {
7235                    discover_captures_in_expr(working_set, expr, seen, seen_blocks, output)?;
7236                }
7237            }
7238            PipelineRedirection::Separate { out, err } => {
7239                if let Some(expr) = out.expr() {
7240                    discover_captures_in_expr(working_set, expr, seen, seen_blocks, output)?;
7241                }
7242                if let Some(expr) = err.expr() {
7243                    discover_captures_in_expr(working_set, expr, seen, seen_blocks, output)?;
7244                }
7245            }
7246        }
7247    }
7248
7249    Ok(())
7250}
7251
7252pub fn discover_captures_in_pattern(pattern: &MatchPattern, seen: &mut Vec<VarId>) {
7253    match &pattern.pattern {
7254        Pattern::Variable(var_id) => seen.push(*var_id),
7255        Pattern::List(items) => {
7256            for item in items {
7257                discover_captures_in_pattern(item, seen)
7258            }
7259        }
7260        Pattern::Record(items) => {
7261            for item in items {
7262                discover_captures_in_pattern(&item.1, seen)
7263            }
7264        }
7265        Pattern::Or(patterns) => {
7266            for pattern in patterns {
7267                discover_captures_in_pattern(pattern, seen)
7268            }
7269        }
7270        Pattern::Rest(var_id) => seen.push(*var_id),
7271        Pattern::Expression(_)
7272        | Pattern::Value(_)
7273        | Pattern::IgnoreValue
7274        | Pattern::IgnoreRest
7275        | Pattern::Garbage => {}
7276    }
7277}
7278
7279// Closes over captured variables
7280pub fn discover_captures_in_expr(
7281    working_set: &StateWorkingSet,
7282    expr: &Expression,
7283    seen: &mut Vec<VarId>,
7284    seen_blocks: &mut HashMap<BlockId, Vec<(VarId, Span)>>,
7285    output: &mut Vec<(VarId, Span)>,
7286) -> Result<(), ParseError> {
7287    match &expr.expr {
7288        Expr::AttributeBlock(ab) => {
7289            discover_captures_in_expr(working_set, &ab.item, seen, seen_blocks, output)?;
7290        }
7291        Expr::BinaryOp(lhs, _, rhs) => {
7292            discover_captures_in_expr(working_set, lhs, seen, seen_blocks, output)?;
7293            discover_captures_in_expr(working_set, rhs, seen, seen_blocks, output)?;
7294        }
7295        Expr::UnaryNot(expr) => {
7296            discover_captures_in_expr(working_set, expr, seen, seen_blocks, output)?;
7297        }
7298        Expr::Closure(block_id) => {
7299            let block = working_set.get_block(*block_id);
7300            let results = {
7301                let mut seen = vec![];
7302                let mut results = vec![];
7303
7304                discover_captures_in_closure(
7305                    working_set,
7306                    block,
7307                    &mut seen,
7308                    seen_blocks,
7309                    &mut results,
7310                )?;
7311
7312                for (var_id, span) in results.iter() {
7313                    if !seen.contains(var_id)
7314                        && let Some(variable) = working_set.get_variable_if_possible(*var_id)
7315                        && variable.mutable
7316                    {
7317                        return Err(ParseError::CaptureOfMutableVar(*span));
7318                    }
7319                }
7320
7321                results
7322            };
7323            seen_blocks.insert(*block_id, results.clone());
7324            for (var_id, span) in results.into_iter() {
7325                if !seen.contains(&var_id) {
7326                    output.push((var_id, span))
7327                }
7328            }
7329        }
7330        Expr::Block(block_id) => {
7331            let block = working_set.get_block(*block_id);
7332            // FIXME: is this correct?
7333            let results = {
7334                let mut seen = vec![];
7335                let mut results = vec![];
7336                discover_captures_in_closure(
7337                    working_set,
7338                    block,
7339                    &mut seen,
7340                    seen_blocks,
7341                    &mut results,
7342                )?;
7343                results
7344            };
7345
7346            seen_blocks.insert(*block_id, results.clone());
7347            for (var_id, span) in results.into_iter() {
7348                if !seen.contains(&var_id) {
7349                    output.push((var_id, span))
7350                }
7351            }
7352        }
7353        Expr::Binary(_) => {}
7354        Expr::Bool(_) => {}
7355        Expr::Call(call) => {
7356            if let Some(head_expr) = call.parser_info.get(PERCENT_FORCED_BUILTIN_PARSER_INFO) {
7357                discover_captures_in_expr(working_set, head_expr, seen, seen_blocks, output)?;
7358            } else {
7359                let decl = working_set.get_decl(call.decl_id);
7360                if let Some(block_id) = decl.block_id() {
7361                    match seen_blocks.get(&block_id) {
7362                        Some(capture_list) => {
7363                            // Push captures onto the outer closure that aren't created by that outer closure
7364                            for capture in capture_list {
7365                                if !seen.contains(&capture.0) {
7366                                    output.push(*capture);
7367                                }
7368                            }
7369                        }
7370                        None => {
7371                            let block = working_set.get_block(block_id);
7372                            if !block.captures.is_empty() {
7373                                for (capture, span) in &block.captures {
7374                                    if !seen.contains(capture) {
7375                                        output.push((*capture, *span));
7376                                    }
7377                                }
7378                            } else {
7379                                let result = {
7380                                    let mut seen = vec![];
7381                                    seen_blocks.insert(block_id, vec![]);
7382
7383                                    let mut result = vec![];
7384                                    discover_captures_in_closure(
7385                                        working_set,
7386                                        block,
7387                                        &mut seen,
7388                                        seen_blocks,
7389                                        &mut result,
7390                                    )?;
7391
7392                                    result
7393                                };
7394                                // Push captures onto the outer closure that aren't created by that outer closure
7395                                for capture in &result {
7396                                    if !seen.contains(&capture.0) {
7397                                        output.push(*capture);
7398                                    }
7399                                }
7400
7401                                seen_blocks.insert(block_id, result);
7402                            }
7403                        }
7404                    }
7405                }
7406            }
7407
7408            for arg in &call.arguments {
7409                match arg {
7410                    Argument::Named(named) => {
7411                        if let Some(arg) = &named.2 {
7412                            discover_captures_in_expr(working_set, arg, seen, seen_blocks, output)?;
7413                        }
7414                    }
7415                    Argument::Positional(expr)
7416                    | Argument::Unknown(expr)
7417                    | Argument::Spread(expr) => {
7418                        discover_captures_in_expr(working_set, expr, seen, seen_blocks, output)?;
7419                    }
7420                }
7421            }
7422        }
7423        Expr::CellPath(_) => {}
7424        Expr::DateTime(_) => {}
7425        Expr::ExternalCall(head, args) => {
7426            discover_captures_in_expr(working_set, head, seen, seen_blocks, output)?;
7427
7428            for ExternalArgument::Regular(expr) | ExternalArgument::Spread(expr) in args.as_ref() {
7429                discover_captures_in_expr(working_set, expr, seen, seen_blocks, output)?;
7430            }
7431        }
7432        Expr::Filepath(_, _) => {}
7433        Expr::Directory(_, _) => {}
7434        Expr::Float(_) => {}
7435        Expr::FullCellPath(cell_path) => {
7436            discover_captures_in_expr(working_set, &cell_path.head, seen, seen_blocks, output)?;
7437        }
7438        Expr::ImportPattern(_) => {}
7439        Expr::Overlay(_) => {}
7440        Expr::Garbage => {}
7441        Expr::Nothing => {}
7442        Expr::GlobPattern(_, _) => {}
7443        Expr::Int(_) => {}
7444        Expr::Keyword(kw) => {
7445            discover_captures_in_expr(working_set, &kw.expr, seen, seen_blocks, output)?;
7446        }
7447        Expr::List(list) => {
7448            for item in list {
7449                discover_captures_in_expr(working_set, item.expr(), seen, seen_blocks, output)?;
7450            }
7451        }
7452        Expr::Operator(_) => {}
7453        Expr::Range(range) => {
7454            if let Some(from) = &range.from {
7455                discover_captures_in_expr(working_set, from, seen, seen_blocks, output)?;
7456            }
7457            if let Some(next) = &range.next {
7458                discover_captures_in_expr(working_set, next, seen, seen_blocks, output)?;
7459            }
7460            if let Some(to) = &range.to {
7461                discover_captures_in_expr(working_set, to, seen, seen_blocks, output)?;
7462            }
7463        }
7464        Expr::Record(items) => {
7465            for item in items {
7466                match item {
7467                    RecordItem::Pair(field_name, field_value) => {
7468                        discover_captures_in_expr(
7469                            working_set,
7470                            field_name,
7471                            seen,
7472                            seen_blocks,
7473                            output,
7474                        )?;
7475                        discover_captures_in_expr(
7476                            working_set,
7477                            field_value,
7478                            seen,
7479                            seen_blocks,
7480                            output,
7481                        )?;
7482                    }
7483                    RecordItem::Spread(_, record) => {
7484                        discover_captures_in_expr(working_set, record, seen, seen_blocks, output)?;
7485                    }
7486                }
7487            }
7488        }
7489        Expr::Signature(sig) => {
7490            // Something with a declaration, similar to a var decl, will introduce more VarIds into the stack at eval
7491            for pos in &sig.required_positional {
7492                if let Some(var_id) = pos.var_id {
7493                    seen.push(var_id);
7494                }
7495            }
7496            for pos in &sig.optional_positional {
7497                if let Some(var_id) = pos.var_id {
7498                    seen.push(var_id);
7499                }
7500            }
7501            if let Some(rest) = &sig.rest_positional
7502                && let Some(var_id) = rest.var_id
7503            {
7504                seen.push(var_id);
7505            }
7506            for named in &sig.named {
7507                if let Some(var_id) = named.var_id {
7508                    seen.push(var_id);
7509                }
7510            }
7511        }
7512        Expr::String(_) => {}
7513        Expr::RawString(_) => {}
7514        Expr::StringInterpolation(exprs) | Expr::GlobInterpolation(exprs, _) => {
7515            for expr in exprs {
7516                discover_captures_in_expr(working_set, expr, seen, seen_blocks, output)?;
7517            }
7518        }
7519        Expr::MatchBlock(match_block) => {
7520            for match_ in match_block {
7521                discover_captures_in_pattern(&match_.0, seen);
7522                discover_captures_in_expr(working_set, &match_.1, seen, seen_blocks, output)?;
7523            }
7524        }
7525        Expr::Collect(var_id, expr) => {
7526            seen.push(*var_id);
7527            discover_captures_in_expr(working_set, expr, seen, seen_blocks, output)?
7528        }
7529        Expr::RowCondition(block_id) | Expr::Subexpression(block_id) => {
7530            let block = working_set.get_block(*block_id);
7531
7532            let results = {
7533                let mut results = vec![];
7534                let mut seen = vec![];
7535                discover_captures_in_closure(
7536                    working_set,
7537                    block,
7538                    &mut seen,
7539                    seen_blocks,
7540                    &mut results,
7541                )?;
7542                results
7543            };
7544
7545            seen_blocks.insert(*block_id, results.clone());
7546            for (var_id, span) in results.into_iter() {
7547                if !seen.contains(&var_id) {
7548                    output.push((var_id, span))
7549                }
7550            }
7551        }
7552        Expr::Table(table) => {
7553            for header in table.columns.as_ref() {
7554                discover_captures_in_expr(working_set, header, seen, seen_blocks, output)?;
7555            }
7556            for row in table.rows.as_ref() {
7557                for cell in row.as_ref() {
7558                    discover_captures_in_expr(working_set, cell, seen, seen_blocks, output)?;
7559                }
7560            }
7561        }
7562        Expr::ValueWithUnit(value) => {
7563            discover_captures_in_expr(working_set, &value.expr, seen, seen_blocks, output)?;
7564        }
7565        Expr::Var(var_id) => {
7566            if (*var_id > ENV_VARIABLE_ID || *var_id == IN_VARIABLE_ID) && !seen.contains(var_id) {
7567                output.push((*var_id, expr.span));
7568            }
7569        }
7570        Expr::VarDecl(var_id) => {
7571            seen.push(*var_id);
7572        }
7573    }
7574    Ok(())
7575}
7576
7577fn wrap_redirection_with_collect(
7578    working_set: &mut StateWorkingSet,
7579    target: RedirectionTarget,
7580) -> RedirectionTarget {
7581    match target {
7582        RedirectionTarget::File { expr, append, span } => RedirectionTarget::File {
7583            expr: wrap_expr_with_collect(working_set, expr),
7584            span,
7585            append,
7586        },
7587        RedirectionTarget::Pipe { span } => RedirectionTarget::Pipe { span },
7588    }
7589}
7590
7591fn wrap_element_with_collect(
7592    working_set: &mut StateWorkingSet,
7593    element: PipelineElement,
7594) -> PipelineElement {
7595    PipelineElement {
7596        pipe: element.pipe,
7597        expr: wrap_expr_with_collect(working_set, element.expr),
7598        redirection: element.redirection.map(|r| match r {
7599            PipelineRedirection::Single { source, target } => PipelineRedirection::Single {
7600                source,
7601                target: wrap_redirection_with_collect(working_set, target),
7602            },
7603            PipelineRedirection::Separate { out, err } => PipelineRedirection::Separate {
7604                out: wrap_redirection_with_collect(working_set, out),
7605                err: wrap_redirection_with_collect(working_set, err),
7606            },
7607        }),
7608    }
7609}
7610
7611fn wrap_expr_with_collect(working_set: &mut StateWorkingSet, expr: Expression) -> Expression {
7612    let span = expr.span;
7613
7614    // IN_VARIABLE_ID should get replaced with a unique variable, so that we don't have to
7615    // execute as a closure
7616    let var_id = working_set.add_variable(
7617        b"$in".into(),
7618        Span::new(span.start, span.start),
7619        Type::Any,
7620        false,
7621    );
7622    let mut expr = expr.clone();
7623    expr.replace_in_variable(working_set, var_id);
7624
7625    // Bind the custom `$in` variable for that particular expression
7626    let ty = expr.ty.clone();
7627    Expression::new(
7628        working_set,
7629        Expr::Collect(var_id, Box::new(expr)),
7630        span,
7631        // We can expect it to have the same result type
7632        ty,
7633    )
7634}
7635
7636// Parses a vector of u8 to create an AST Block. If a file name is given, then
7637// the name is stored in the working set. When parsing a source without a file
7638// name, the source of bytes is stored as "source"
7639pub fn parse(
7640    working_set: &mut StateWorkingSet,
7641    fname: Option<&str>,
7642    contents: &[u8],
7643    scoped: bool,
7644) -> Arc<Block> {
7645    trace!("parse");
7646
7647    let file_id = {
7648        let fname = fname.map(nu_path::expand_to_real_path);
7649        let fname = fname.as_deref().map(|p| p.to_string_lossy());
7650        let name = fname.as_deref().unwrap_or("source");
7651        working_set.add_file(name, contents)
7652    };
7653
7654    let new_span = working_set.get_span_for_file(file_id);
7655
7656    let previously_parsed_block = working_set.find_block_by_span(new_span);
7657
7658    let mut output = {
7659        if let Some(block) = previously_parsed_block {
7660            return block;
7661        } else {
7662            let (output, err) = lex(contents, new_span.start, &[], &[], false);
7663            if let Some(err) = err {
7664                working_set.error(err)
7665            }
7666
7667            Arc::new(parse_block(working_set, &output, new_span, scoped, false))
7668        }
7669    };
7670
7671    // Top level `Block`s are compiled eagerly, as they don't have a parent which would cause them
7672    // to be compiled later.
7673    if working_set.parse_errors.is_empty() {
7674        compile_block(working_set, Arc::make_mut(&mut output));
7675    }
7676
7677    let mut seen = vec![];
7678    let mut seen_blocks = HashMap::new();
7679
7680    let mut captures = vec![];
7681    match discover_captures_in_closure(
7682        working_set,
7683        &output,
7684        &mut seen,
7685        &mut seen_blocks,
7686        &mut captures,
7687    ) {
7688        Ok(_) => {
7689            Arc::make_mut(&mut output).captures = captures;
7690        }
7691        Err(err) => working_set.error(err),
7692    }
7693
7694    // Also check other blocks that might have been imported
7695    let mut errors = vec![];
7696    for (block_idx, block) in working_set.delta.blocks.iter().enumerate() {
7697        let block_id = block_idx + working_set.permanent_state.num_blocks();
7698        let block_id = BlockId::new(block_id);
7699
7700        if !seen_blocks.contains_key(&block_id) {
7701            let mut captures = vec![];
7702
7703            match discover_captures_in_closure(
7704                working_set,
7705                block,
7706                &mut seen,
7707                &mut seen_blocks,
7708                &mut captures,
7709            ) {
7710                Ok(_) => {
7711                    seen_blocks.insert(block_id, captures);
7712                }
7713                Err(err) => {
7714                    errors.push(err);
7715                }
7716            }
7717        }
7718    }
7719    for err in errors {
7720        working_set.error(err)
7721    }
7722
7723    for (block_id, captures) in seen_blocks.into_iter() {
7724        // In theory, we should only be updating captures where we have new information
7725        // the only place where this is possible would be blocks that are newly created
7726        // by our working set delta. If we ever tried to modify the permanent state, we'd
7727        // panic (again, in theory, this shouldn't be possible)
7728        let block = working_set.get_block(block_id);
7729        let block_captures_empty = block.captures.is_empty();
7730        // need to check block_id >= working_set.permanent_state.num_blocks()
7731        // to avoid mutate a block that is in the permanent state.
7732        // this can happened if user defines a function with recursive call
7733        // and pipe a variable to the command, e.g:
7734        // def px [] { if true { 42 } else { px } };    # the block px is saved in permanent state.
7735        // let x = 3
7736        // $x | px
7737        // If we don't guard for `block_id`, it will change captures of `px`, which is
7738        // already saved in permanent state
7739        if !captures.is_empty()
7740            && block_captures_empty
7741            && block_id.get() >= working_set.permanent_state.num_blocks()
7742        {
7743            let block = working_set.get_block_mut(block_id);
7744            block.captures = captures;
7745        }
7746    }
7747
7748    output
7749}