Skip to main content

nu_parser/
parse_literals.rs

1#![allow(clippy::byte_char_slices)]
2
3use crate::{
4    Token, TokenContents,
5    lex::lex,
6    parse_helpers::{
7        SPREAD_OPERATOR_STR, extract_spread_record, garbage, is_variable, trim_quotes,
8    },
9    parse_pipelines::parse_block,
10    type_check::check_range_types,
11};
12use itertools::Itertools;
13use log::trace;
14use nu_protocol::{
15    DidYouMean, FilesizeUnit, IntoSpanned, ParseError, Span, Spanned, SyntaxShape, Type, Unit,
16    VarId, ast::*, casing::Casing, engine::StateWorkingSet,
17};
18use std::sync::Arc;
19
20use crate::parse_expressions::{
21    parse_block_expression, parse_closure_expression, parse_match_block_expression, parse_record,
22    parse_table_expression,
23};
24use crate::parse_signatures::parse_signature;
25
26pub fn parse_binary(working_set: &mut StateWorkingSet, span: Span) -> Expression {
27    trace!("parsing: binary");
28    let contents = working_set.get_span_contents(span);
29    if contents.starts_with(b"0x[") {
30        parse_binary_with_base(working_set, span, 16, 2, b"0x[", b"]")
31    } else if contents.starts_with(b"0o[") {
32        parse_binary_with_base(working_set, span, 8, 3, b"0o[", b"]")
33    } else if contents.starts_with(b"0b[") {
34        parse_binary_with_base(working_set, span, 2, 8, b"0b[", b"]")
35    } else {
36        working_set.error(ParseError::Expected("binary", span));
37        garbage(working_set, span)
38    }
39}
40
41fn parse_binary_with_base(
42    working_set: &mut StateWorkingSet,
43    span: Span,
44    base: u32,
45    min_digits_per_byte: usize,
46    prefix: &[u8],
47    suffix: &[u8],
48) -> Expression {
49    let token = working_set.get_span_contents(span);
50
51    if let Some(token) = token.strip_prefix(prefix)
52        && let Some(token) = token.strip_suffix(suffix)
53    {
54        let (lexed, err) = lex(
55            token,
56            span.start + prefix.len(),
57            &[b',', b'\r', b'\n'],
58            &[],
59            true,
60        );
61        if let Some(err) = err {
62            working_set.error(err);
63        }
64
65        let mut binary_value = vec![];
66        for token in lexed {
67            match token.contents {
68                TokenContents::Item => {
69                    let contents = working_set.get_span_contents(token.span);
70
71                    binary_value.extend_from_slice(contents);
72                }
73                TokenContents::Pipe
74                | TokenContents::PipePipe
75                | TokenContents::ErrGreaterPipe
76                | TokenContents::OutGreaterThan
77                | TokenContents::OutErrGreaterPipe
78                | TokenContents::OutGreaterGreaterThan
79                | TokenContents::ErrGreaterThan
80                | TokenContents::ErrGreaterGreaterThan
81                | TokenContents::OutErrGreaterThan
82                | TokenContents::OutErrGreaterGreaterThan
83                | TokenContents::AssignmentOperator => {
84                    working_set.error(ParseError::Expected("binary", span));
85                    return garbage(working_set, span);
86                }
87                TokenContents::Comment | TokenContents::Semicolon | TokenContents::Eol => {}
88            }
89        }
90
91        let required_padding =
92            (min_digits_per_byte - binary_value.len() % min_digits_per_byte) % min_digits_per_byte;
93
94        if required_padding != 0 {
95            binary_value = {
96                let mut tail = binary_value;
97                let mut binary_value: Vec<u8> = vec![b'0'; required_padding];
98                binary_value.append(&mut tail);
99                binary_value
100            };
101        }
102
103        let str = String::from_utf8_lossy(&binary_value).to_string();
104
105        match decode_with_base(&str, base, min_digits_per_byte) {
106            Ok(v) => return Expression::new(working_set, Expr::Binary(v), span, Type::Binary),
107            Err(help) => {
108                working_set.error(ParseError::InvalidBinaryString(span, help.to_string()));
109                return garbage(working_set, span);
110            }
111        }
112    }
113
114    working_set.error(ParseError::Expected("binary", span));
115    garbage(working_set, span)
116}
117
118fn decode_with_base(s: &str, base: u32, digits_per_byte: usize) -> Result<Vec<u8>, &str> {
119    s.chars()
120        .chunks(digits_per_byte)
121        .into_iter()
122        .map(|chunk| {
123            let str: String = chunk.collect();
124            u8::from_str_radix(&str, base).map_err(|_| match base {
125                2 => "binary strings may contain only 0 or 1.",
126                8 => "octal strings must have a length that is a multiple of three and contain values between 0o000 and 0o377.",
127                16 => "hexadecimal strings may contain only the characters 0–9 and A–F.",
128                _ => "internal error: radix other than 2, 8, or 16 is not allowed."
129            })
130        })
131        .collect()
132}
133
134fn strip_underscores(token: &[u8]) -> String {
135    String::from_utf8_lossy(token)
136        .chars()
137        .filter(|c| *c != '_')
138        .collect()
139}
140
141pub fn parse_int(working_set: &mut StateWorkingSet, span: Span) -> Expression {
142    let token = working_set.get_span_contents(span);
143
144    fn extract_int(
145        working_set: &mut StateWorkingSet,
146        token: &str,
147        span: Span,
148        radix: u32,
149    ) -> Expression {
150        // Parse as a u64, then cast to i64, otherwise, for numbers like "0xffffffffffffffef",
151        // you'll get `Error parsing hex string: number too large to fit in target type`.
152        if let Ok(num) = u64::from_str_radix(token, radix).map(|val| val as i64) {
153            Expression::new(working_set, Expr::Int(num), span, Type::Int)
154        } else {
155            working_set.error(ParseError::InvalidLiteral(
156                format!("invalid digits for radix {radix}"),
157                "int".into(),
158                span,
159            ));
160
161            garbage(working_set, span)
162        }
163    }
164
165    let token = strip_underscores(token);
166
167    if token.is_empty() {
168        working_set.error(ParseError::Expected("int", span));
169        return garbage(working_set, span);
170    }
171
172    if let Some(num) = token.strip_prefix("0b") {
173        extract_int(working_set, num, span, 2)
174    } else if let Some(num) = token.strip_prefix("0o") {
175        extract_int(working_set, num, span, 8)
176    } else if let Some(num) = token.strip_prefix("0x") {
177        extract_int(working_set, num, span, 16)
178    } else if let Ok(num) = token.parse::<i64>() {
179        Expression::new(working_set, Expr::Int(num), span, Type::Int)
180    } else {
181        working_set.error(ParseError::Expected("int", span));
182        garbage(working_set, span)
183    }
184}
185
186pub fn parse_float(working_set: &mut StateWorkingSet, span: Span) -> Expression {
187    let token = working_set.get_span_contents(span);
188    let token = strip_underscores(token);
189
190    if let Ok(x) = token.parse::<f64>() {
191        Expression::new(working_set, Expr::Float(x), span, Type::Float)
192    } else {
193        working_set.error(ParseError::Expected("float", span));
194
195        garbage(working_set, span)
196    }
197}
198
199pub fn parse_number(working_set: &mut StateWorkingSet, span: Span) -> Expression {
200    let starting_error_count = working_set.parse_errors.len();
201
202    let result = parse_int(working_set, span);
203    if starting_error_count == working_set.parse_errors.len() {
204        return result;
205    } else if let Some(ParseError::Expected(_, _)) = working_set.parse_errors.last() {
206        working_set.parse_errors.truncate(starting_error_count);
207    }
208
209    let result = parse_float(working_set, span);
210
211    if starting_error_count == working_set.parse_errors.len() {
212        return result;
213    }
214    working_set.parse_errors.truncate(starting_error_count);
215
216    working_set.error(ParseError::Expected("number", span));
217    garbage(working_set, span)
218}
219
220pub fn parse_range(working_set: &mut StateWorkingSet, span: Span) -> Option<Expression> {
221    trace!("parsing: range");
222    let starting_error_count = working_set.parse_errors.len();
223
224    // Range follows the following syntax: [<from>][<next_operator><next>]<range_operator>[<to>]
225    //   where <next_operator> is ".."
226    //   and  <range_operator> is "..", "..=" or "..<"
227    //   and one of the <from> or <to> bounds must be present (just '..' is not allowed since it
228    //     looks like parent directory)
229    //bugbug range cannot be [..] because that looks like parent directory
230
231    let contents = working_set.get_span_contents(span);
232
233    let Ok(token) = String::from_utf8(contents.into()) else {
234        working_set.error(ParseError::NonUtf8(span));
235        return None;
236    };
237
238    if token.starts_with(SPREAD_OPERATOR_STR) {
239        working_set.error(ParseError::Expected(
240            "range operator ('..'), got spread ('...')",
241            span,
242        ));
243        return None;
244    }
245
246    if !token.contains("..") {
247        working_set.error(ParseError::Expected("at least one range bound set", span));
248        return None;
249    }
250
251    let dotdot_pos: Vec<_> = token
252        .match_indices("..")
253        .filter_map(|(pos, _)| {
254            // paren_depth = count of unclosed parens prior to pos
255            let before = &token[..pos];
256            let paren_opened = before.chars().filter(|&c| c == '(').count();
257            let paren_closed = before.chars().filter(|&c| c == ')').count();
258            let paren_depth = paren_opened.checked_sub(paren_closed)?;
259            (paren_depth == 0).then_some(pos)
260        })
261        .collect();
262
263    let (next_op_pos, range_op_pos) = match dotdot_pos.len() {
264        1 => (None, dotdot_pos[0]),
265        2 => (Some(dotdot_pos[0]), dotdot_pos[1]),
266        _ => {
267            working_set.error(ParseError::Expected(
268                "one range operator ('..' or '..<') and optionally one next operator ('..')",
269                span,
270            ));
271            return None;
272        }
273    };
274    // Avoid calling sub-parsers on unmatched parens, to prevent quadratic time on things like ((((1..2))))
275    // No need to call the expensive parse_value on "((((1"
276    if dotdot_pos[0] > 0 {
277        let (_tokens, err) = lex(
278            &contents[..dotdot_pos[0]],
279            span.start,
280            &[],
281            &[b'.', b'?', b'!'],
282            true,
283        );
284        if let Some(_err) = err {
285            working_set.error(ParseError::Expected("Valid expression before ..", span));
286            return None;
287        }
288    }
289
290    let (inclusion, range_op_str, range_op_span) = if let Some(pos) = token.find("..<") {
291        if pos == range_op_pos {
292            let op_str = "..<";
293            let op_span = Span::new(
294                span.start + range_op_pos,
295                span.start + range_op_pos + op_str.len(),
296            );
297            (RangeInclusion::RightExclusive, "..<", op_span)
298        } else {
299            working_set.error(ParseError::Expected(
300                "inclusive operator preceding second range bound",
301                span,
302            ));
303            return None;
304        }
305    } else {
306        let op_str = if token[range_op_pos..].starts_with("..=") {
307            "..="
308        } else {
309            ".."
310        };
311
312        let op_span = Span::new(
313            span.start + range_op_pos,
314            span.start + range_op_pos + op_str.len(),
315        );
316        (RangeInclusion::Inclusive, op_str, op_span)
317    };
318
319    // Now, based on the operator positions, figure out where the bounds & next are located and
320    // parse them
321    // TODO: Actually parse the next number in the range
322    let from = if token.starts_with("..") {
323        // token starts with either next operator, or range operator -- we don't care which one
324        None
325    } else {
326        let from_span = Span::new(span.start, span.start + dotdot_pos[0]);
327        Some(crate::parser::parse_value(
328            working_set,
329            from_span,
330            &SyntaxShape::Number,
331            None,
332        ))
333    };
334
335    let to = if token.ends_with(range_op_str) {
336        None
337    } else {
338        let to_span = Span::new(range_op_span.end, span.end);
339        Some(crate::parser::parse_value(
340            working_set,
341            to_span,
342            &SyntaxShape::Number,
343            None,
344        ))
345    };
346
347    trace!("-- from: {from:?} to: {to:?}");
348
349    if let (None, None) = (&from, &to) {
350        working_set.error(ParseError::Expected("at least one range bound set", span));
351        return None;
352    }
353
354    let (next, next_op_span) = if let Some(pos) = next_op_pos {
355        let next_op_span = Span::new(span.start + pos, span.start + pos + "..".len());
356        let next_span = Span::new(next_op_span.end, range_op_span.start);
357
358        (
359            Some(crate::parser::parse_value(
360                working_set,
361                next_span,
362                &SyntaxShape::Number,
363                None,
364            )),
365            next_op_span,
366        )
367    } else {
368        (None, span)
369    };
370
371    if working_set.parse_errors.len() != starting_error_count {
372        return None;
373    }
374
375    let operator = RangeOperator {
376        inclusion,
377        span: range_op_span,
378        next_op_span,
379    };
380
381    let mut range = Range {
382        from,
383        next,
384        to,
385        operator,
386    };
387
388    check_range_types(working_set, &mut range);
389
390    Some(Expression::new(
391        working_set,
392        Expr::Range(Box::new(range)),
393        span,
394        Type::Range,
395    ))
396}
397
398pub(crate) fn parse_dollar_expr(
399    working_set: &mut StateWorkingSet,
400    span: Span,
401    shape: &SyntaxShape,
402    input_type: Option<&Type>,
403) -> Expression {
404    trace!("parsing: dollar expression");
405    let contents = working_set.get_span_contents(span);
406
407    if contents.starts_with(b"$\"") || contents.starts_with(b"$'") {
408        if matches!(shape, SyntaxShape::GlobPattern) && is_bare_string_interpolation(contents) {
409            parse_glob_pattern(working_set, span)
410        } else {
411            parse_string_interpolation(working_set, span)
412        }
413    } else if contents.starts_with(b"$.") {
414        parse_simple_cell_path(working_set, Span::new(span.start + 2, span.end))
415    } else {
416        let starting_error_count = working_set.parse_errors.len();
417
418        if let Some(expr) = parse_range(working_set, span) {
419            expr
420        } else {
421            working_set.parse_errors.truncate(starting_error_count);
422            parse_full_cell_path(working_set, None, span, input_type)
423        }
424    }
425}
426
427pub fn parse_raw_string(working_set: &mut StateWorkingSet, span: Span) -> Expression {
428    trace!("parsing: raw-string, with required delimiters");
429
430    let bytes = working_set.get_span_contents(span);
431
432    let prefix_sharp_cnt = if bytes.starts_with(b"r#") {
433        // actually `sharp_cnt` is always `index - 1`
434        // but create a variable here to make it clearer.
435        let mut sharp_cnt = 1;
436        let mut index = 2;
437        while index < bytes.len() && bytes[index] == b'#' {
438            index += 1;
439            sharp_cnt += 1;
440        }
441        sharp_cnt
442    } else {
443        working_set.error(ParseError::Expected("r#", span));
444        return garbage(working_set, span);
445    };
446    let expect_postfix_sharp_cnt = prefix_sharp_cnt;
447    // check the length of whole raw string.
448    // the whole raw string should contains at least
449    // 1(r) + prefix_sharp_cnt + 1(') + 1(') + postfix_sharp characters
450    if bytes.len() < prefix_sharp_cnt + expect_postfix_sharp_cnt + 3 {
451        working_set.error(ParseError::Unclosed("'", span));
452        return garbage(working_set, span);
453    }
454
455    // check for unbalanced # and single quotes.
456    let postfix_bytes = &bytes[bytes.len() - expect_postfix_sharp_cnt..bytes.len()];
457    if postfix_bytes.iter().any(|b| *b != b'#') {
458        working_set.error(ParseError::Unbalanced("prefix #", "postfix #", span));
459        return garbage(working_set, span);
460    }
461    // check for unblanaced single quotes.
462    if bytes[1 + prefix_sharp_cnt] != b'\''
463        || bytes[bytes.len() - expect_postfix_sharp_cnt - 1] != b'\''
464    {
465        working_set.error(ParseError::Unclosed("'", span));
466        return garbage(working_set, span);
467    }
468
469    let bytes = &bytes[prefix_sharp_cnt + 1 + 1..bytes.len() - 1 - prefix_sharp_cnt];
470    if let Ok(token) = String::from_utf8(bytes.into()) {
471        Expression::new(working_set, Expr::RawString(token), span, Type::String)
472    } else {
473        working_set.error(ParseError::Expected("utf8 raw-string", span));
474        garbage(working_set, span)
475    }
476}
477
478pub fn parse_paren_expr(
479    working_set: &mut StateWorkingSet,
480    span: Span,
481    shape: &SyntaxShape,
482) -> Expression {
483    let starting_error_count = working_set.parse_errors.len();
484
485    if let Some(expr) = parse_range(working_set, span) {
486        return expr;
487    }
488
489    working_set.parse_errors.truncate(starting_error_count);
490
491    if let SyntaxShape::Signature = shape {
492        return parse_signature(working_set, span, false);
493    }
494
495    if let SyntaxShape::ExternalSignature = shape {
496        return parse_signature(working_set, span, true);
497    }
498
499    let fcp_expr = parse_full_cell_path(working_set, None, span, None);
500    let fcp_error_count = working_set.parse_errors.len();
501    if fcp_error_count > starting_error_count {
502        let malformed_subexpr = working_set.parse_errors[starting_error_count..]
503            .first()
504            .is_some_and(|e| match e {
505                ParseError::Unclosed(right, _) if (*right == ")") => true,
506                ParseError::Unbalanced(left, right, _) if *left == "(" && *right == ")" => true,
507                _ => false,
508            });
509        if malformed_subexpr {
510            working_set.parse_errors.truncate(starting_error_count);
511            if matches!(shape, SyntaxShape::GlobPattern) {
512                parse_glob_pattern(working_set, span)
513            } else {
514                parse_string_interpolation(working_set, span)
515            }
516        } else {
517            fcp_expr
518        }
519    } else {
520        fcp_expr
521    }
522}
523
524pub fn parse_brace_expr(
525    working_set: &mut StateWorkingSet,
526    span: Span,
527    shape: &SyntaxShape,
528    input_type: Option<&Type>,
529) -> Expression {
530    // Try to detect what kind of value we're about to parse
531    // FIXME: In the future, we should work over the token stream so we only have to do this once
532    // before parsing begins
533
534    // FIXME: we're still using the shape because we rely on it to know how to handle syntax where
535    // the parse is ambiguous. We'll need to update the parts of the grammar where this is ambiguous
536    // and then revisit the parsing.
537
538    if span.end <= (span.start + 1) {
539        working_set.error(ParseError::ExpectedWithStringMsg(
540            format!("non-block value: {shape}"),
541            span,
542        ));
543        return Expression::garbage(working_set, span);
544    }
545    let bytes = working_set.get_span_contents(Span::new(span.start + 1, span.end - 1));
546    let (tokens, _) = lex(bytes, span.start + 1, &[b'\r', b'\n', b'\t'], &[b':'], true);
547
548    match tokens.as_slice() {
549        // If we're empty, that means an empty record or closure
550        [] => match shape {
551            SyntaxShape::Closure(_) => {
552                parse_closure_expression(working_set, shape, span, input_type)
553            }
554            SyntaxShape::Block => parse_block_expression(working_set, span, input_type),
555            SyntaxShape::MatchBlock => parse_match_block_expression(working_set, span, input_type),
556            _ => parse_record(working_set, span),
557        },
558        [
559            Token {
560                contents: TokenContents::Pipe | TokenContents::PipePipe,
561                ..
562            },
563            ..,
564        ] => {
565            if let SyntaxShape::Block = shape {
566                working_set.error(ParseError::Mismatch("block".into(), "closure".into(), span));
567                return Expression::garbage(working_set, span);
568            }
569            parse_closure_expression(working_set, shape, span, input_type)
570        }
571        [_, third, ..] if working_set.get_span_contents(third.span) == b":" => {
572            parse_full_cell_path(working_set, None, span, None)
573        }
574        [second, ..] => {
575            let second_bytes = working_set.get_span_contents(second.span);
576            match shape {
577                SyntaxShape::Closure(_) => {
578                    parse_closure_expression(working_set, shape, span, input_type)
579                }
580                SyntaxShape::Block => parse_block_expression(working_set, span, input_type),
581                SyntaxShape::MatchBlock => {
582                    parse_match_block_expression(working_set, span, input_type)
583                }
584                // For edge case of `{}.foo?`, #17896
585                _ if second_bytes == b"}" => parse_full_cell_path(working_set, None, span, None),
586                _ if extract_spread_record(second_bytes.into_spanned(second.span)).is_some() => {
587                    parse_record(working_set, span)
588                }
589                SyntaxShape::Any => parse_closure_expression(working_set, shape, span, input_type),
590                _ => {
591                    working_set.error(ParseError::ExpectedWithStringMsg(
592                        format!("non-block value: {shape}"),
593                        span,
594                    ));
595
596                    Expression::garbage(working_set, span)
597                }
598            }
599        }
600    }
601}
602
603pub fn parse_string_interpolation(working_set: &mut StateWorkingSet, span: Span) -> Expression {
604    #[derive(PartialEq, Eq, Debug)]
605    enum InterpolationMode {
606        String,
607        Expression,
608    }
609
610    let contents = working_set.get_span_contents(span);
611
612    let mut double_quote = false;
613
614    let (start, end) = if contents.starts_with(b"$\"") {
615        double_quote = true;
616
617        if let Err(err) = check_string_no_trailing_tokens(contents, span, 1, b'\"') {
618            working_set.error(err);
619            return garbage(working_set, span);
620        }
621
622        let end = if contents.ends_with(b"\"") && contents.len() > 2 {
623            span.end - 1
624        } else {
625            span.end
626        };
627        (span.start + 2, end)
628    } else if contents.starts_with(b"$'") {
629        if let Err(err) = check_string_no_trailing_tokens(contents, span, 1, b'\'') {
630            working_set.error(err);
631            return garbage(working_set, span);
632        }
633
634        let end = if contents.ends_with(b"'") && contents.len() > 2 {
635            span.end - 1
636        } else {
637            span.end
638        };
639        (span.start + 2, end)
640    } else {
641        (span.start, span.end)
642    };
643
644    let inner_span = Span::new(start, end);
645    let contents = working_set.get_span_contents(inner_span).to_vec();
646
647    let mut output = vec![];
648    let mut mode = InterpolationMode::String;
649    let mut token_start = start;
650
651    #[repr(u8)]
652    #[derive(Clone, Copy, PartialEq, Eq)]
653    enum Delimiter {
654        SingleQuote = b'\'',
655        DoubleQuote = b'"',
656        Backtick = b'`',
657        ParenLeft = b'(',
658        ParenRight = b')',
659    }
660
661    impl Delimiter {
662        const fn from_u8(b: u8) -> Option<Self> {
663            Some(match b {
664                b'\'' => Self::SingleQuote,
665                b'"' => Self::DoubleQuote,
666                b'`' => Self::Backtick,
667                b'(' => Self::ParenLeft,
668                b')' => Self::ParenRight,
669                _ => return None,
670            })
671        }
672        const fn is_paren(self) -> bool {
673            matches!(self, Self::ParenLeft | Self::ParenRight)
674        }
675        const fn pair(self) -> Self {
676            match self {
677                Self::ParenLeft => Self::ParenRight,
678                Self::ParenRight => Self::ParenLeft,
679                _ => self,
680            }
681        }
682    }
683    let mut delimiter_stack: Vec<Delimiter> = vec![];
684
685    let mut consecutive_backslashes: usize = 0;
686
687    let mut b = start;
688
689    while b != end {
690        let current_byte = contents[b - start];
691
692        if mode == InterpolationMode::String {
693            let preceding_consecutive_backslashes = consecutive_backslashes;
694
695            let is_backslash = current_byte == b'\\';
696            consecutive_backslashes = if is_backslash {
697                preceding_consecutive_backslashes + 1
698            } else {
699                0
700            };
701
702            if current_byte == b'('
703                && (!double_quote || preceding_consecutive_backslashes.is_multiple_of(2))
704            {
705                mode = InterpolationMode::Expression;
706                if token_start < b {
707                    let span = Span::new(token_start, b);
708                    let str_contents = working_set.get_span_contents(span);
709
710                    let (str_contents, err) = if double_quote {
711                        unescape_string(str_contents, span)
712                    } else {
713                        (str_contents.to_vec(), None)
714                    };
715                    if let Some(err) = err {
716                        working_set.error(err);
717                    }
718
719                    output.push(Expression::new(
720                        working_set,
721                        Expr::String(String::from_utf8_lossy(&str_contents).to_string()),
722                        span,
723                        Type::String,
724                    ));
725                    token_start = b;
726                }
727            }
728        }
729
730        if mode == InterpolationMode::Expression {
731            let byte = Delimiter::from_u8(current_byte);
732            match (delimiter_stack.last().copied(), byte) {
733                (Some(d), Some(byte)) if !d.is_paren() => {
734                    if byte == d {
735                        delimiter_stack.pop();
736                    }
737                }
738                (_, Some(byte)) if byte != Delimiter::ParenRight => {
739                    delimiter_stack.push(byte.pair())
740                }
741                (d, Some(Delimiter::ParenRight)) => {
742                    if let Some(Delimiter::ParenRight) = d {
743                        delimiter_stack.pop();
744                    }
745                    if delimiter_stack.is_empty() {
746                        mode = InterpolationMode::String;
747
748                        if token_start < b {
749                            let span = Span::new(token_start, b + 1);
750
751                            let expr = parse_full_cell_path(working_set, None, span, None);
752                            output.push(expr);
753                        }
754
755                        token_start = b + 1;
756                        continue;
757                    }
758                }
759                _ => (),
760            }
761        }
762        b += 1;
763    }
764
765    match mode {
766        InterpolationMode::String => {
767            if token_start < end {
768                let span = Span::new(token_start, end);
769                let str_contents = working_set.get_span_contents(span);
770
771                let (str_contents, err) = if double_quote {
772                    unescape_string(str_contents, span)
773                } else {
774                    (str_contents.to_vec(), None)
775                };
776                if let Some(err) = err {
777                    working_set.error(err);
778                }
779
780                output.push(Expression::new(
781                    working_set,
782                    Expr::String(String::from_utf8_lossy(&str_contents).to_string()),
783                    span,
784                    Type::String,
785                ));
786            }
787        }
788        InterpolationMode::Expression => {
789            if token_start < end {
790                let span = Span::new(token_start, end);
791                let expr = parse_full_cell_path(working_set, None, span, None);
792                output.push(expr);
793            }
794        }
795    }
796
797    Expression::new(
798        working_set,
799        Expr::StringInterpolation(output),
800        span,
801        Type::String,
802    )
803}
804
805pub fn parse_variable_expr(
806    working_set: &mut StateWorkingSet,
807    span: Span,
808    input_type: Option<&Type>,
809) -> Expression {
810    let contents = working_set.get_span_contents(span);
811
812    if contents == b"$nu" {
813        return Expression::new(
814            working_set,
815            Expr::Var(nu_protocol::NU_VARIABLE_ID),
816            span,
817            Type::Any,
818        );
819    } else if contents == b"$in" {
820        return Expression::new(
821            working_set,
822            Expr::Var(nu_protocol::IN_VARIABLE_ID),
823            span,
824            input_type.cloned().unwrap_or(Type::Any),
825        );
826    } else if contents == b"$env" {
827        return Expression::new(
828            working_set,
829            Expr::Var(nu_protocol::ENV_VARIABLE_ID),
830            span,
831            Type::Any,
832        );
833    }
834
835    let name = if contents.starts_with(b"$") {
836        String::from_utf8_lossy(&contents[1..]).to_string()
837    } else {
838        String::from_utf8_lossy(contents).to_string()
839    };
840
841    let bytes = working_set.get_span_contents(span);
842    let suggestion = || {
843        DidYouMean::new(
844            &working_set.list_variables(),
845            working_set.get_span_contents(span),
846        )
847    };
848    if !is_variable(bytes) {
849        working_set.error(ParseError::ExpectedWithDidYouMean(
850            "valid variable name",
851            suggestion(),
852            span,
853        ));
854        garbage(working_set, span)
855    } else if let Some(id) = working_set.find_variable(bytes) {
856        Expression::new(
857            working_set,
858            Expr::Var(id),
859            span,
860            working_set.get_variable(id).ty.clone(),
861        )
862    } else if working_set.get_env_var(&name).is_some() {
863        working_set.error(ParseError::EnvVarNotVar(name, span));
864        garbage(working_set, span)
865    } else {
866        working_set.error(ParseError::VariableNotFound(suggestion(), span));
867        garbage(working_set, span)
868    }
869}
870
871pub fn parse_cell_path(
872    working_set: &mut StateWorkingSet,
873    tokens: impl Iterator<Item = Token>,
874    expect_dot: bool,
875) -> Vec<PathMember> {
876    enum TokenType {
877        Dot,              // .
878        DotOrSign,        // . or ? or !
879        DotOrExclamation, // . or !
880        DotOrQuestion,    // . or ?
881        PathMember,       // an int or string, like `1` or `foo`
882    }
883
884    enum ModifyMember {
885        No,
886        Optional,
887        Insensitive,
888    }
889
890    impl TokenType {
891        fn expect(&mut self, byte: u8) -> Result<ModifyMember, &'static str> {
892            match (&*self, byte) {
893                (Self::PathMember, _) => {
894                    *self = Self::DotOrSign;
895                    Ok(ModifyMember::No)
896                }
897                (
898                    Self::Dot | Self::DotOrSign | Self::DotOrExclamation | Self::DotOrQuestion,
899                    b'.',
900                ) => {
901                    *self = Self::PathMember;
902                    Ok(ModifyMember::No)
903                }
904                (Self::DotOrSign, b'!') => {
905                    *self = Self::DotOrQuestion;
906                    Ok(ModifyMember::Insensitive)
907                }
908                (Self::DotOrSign, b'?') => {
909                    *self = Self::DotOrExclamation;
910                    Ok(ModifyMember::Optional)
911                }
912                (Self::DotOrSign, _) => Err(". or ! or ?"),
913                (Self::DotOrExclamation, b'!') => {
914                    *self = Self::Dot;
915                    Ok(ModifyMember::Insensitive)
916                }
917                (Self::DotOrExclamation, _) => Err(". or !"),
918                (Self::DotOrQuestion, b'?') => {
919                    *self = Self::Dot;
920                    Ok(ModifyMember::Optional)
921                }
922                (Self::DotOrQuestion, _) => Err(". or ?"),
923                (Self::Dot, _) => Err("."),
924            }
925        }
926    }
927
928    // Parsing a cell path is essentially a state machine, and this is the state
929    let mut expected_token = if expect_dot {
930        TokenType::Dot
931    } else {
932        TokenType::PathMember
933    };
934
935    let mut tail = vec![];
936
937    for path_element in tokens {
938        let bytes = working_set.get_span_contents(path_element.span);
939
940        // both parse_int and parse_string require their source to be non-empty
941        // all cases where `bytes` is empty is an error
942        let Some((&first, rest)) = bytes.split_first() else {
943            working_set.error(ParseError::Expected("string", path_element.span));
944            return tail;
945        };
946        let single_char = rest.is_empty();
947
948        if let TokenType::PathMember = expected_token {
949            let starting_error_count = working_set.parse_errors.len();
950
951            let expr = parse_int(working_set, path_element.span);
952            working_set.parse_errors.truncate(starting_error_count);
953
954            match expr {
955                Expression {
956                    expr: Expr::Int(val),
957                    span,
958                    ..
959                } => {
960                    if val < 0 {
961                        working_set.error(ParseError::InvalidLiteral(
962                            "negative index is not supported".into(),
963                            "cell path".into(),
964                            span,
965                        ));
966                        return tail;
967                    }
968                    tail.push(PathMember::Int {
969                        val: val as usize,
970                        span,
971                        optional: false,
972                    })
973                }
974                _ => {
975                    let result = parse_string(working_set, path_element.span);
976                    match result {
977                        Expression {
978                            expr: Expr::String(string),
979                            span,
980                            ..
981                        } => {
982                            tail.push(PathMember::String {
983                                val: string,
984                                span,
985                                optional: false,
986                                casing: Casing::Sensitive,
987                            });
988                        }
989                        _ => {
990                            working_set.error(ParseError::Expected("string", path_element.span));
991                            return tail;
992                        }
993                    }
994                }
995            }
996            expected_token = TokenType::DotOrSign;
997        } else {
998            match expected_token.expect(if single_char { first } else { b' ' }) {
999                Ok(modify) => {
1000                    if let Some(last) = tail.last_mut() {
1001                        match modify {
1002                            ModifyMember::No => {}
1003                            ModifyMember::Optional => last.make_optional(),
1004                            ModifyMember::Insensitive => last.make_insensitive(),
1005                        }
1006                    };
1007                }
1008                Err(expected) => {
1009                    working_set.error(ParseError::Expected(expected, path_element.span));
1010                    return tail;
1011                }
1012            }
1013        }
1014    }
1015
1016    tail
1017}
1018
1019pub fn parse_simple_cell_path(working_set: &mut StateWorkingSet, span: Span) -> Expression {
1020    let source = working_set.get_span_contents(span);
1021
1022    let (tokens, err) = lex(
1023        source,
1024        span.start,
1025        &[b'\n', b'\r'],
1026        &[b'.', b'?', b'!'],
1027        true,
1028    );
1029    if let Some(err) = err {
1030        working_set.error(err)
1031    }
1032
1033    let tokens = tokens.into_iter().peekable();
1034
1035    let cell_path = parse_cell_path(working_set, tokens, false);
1036
1037    Expression::new(
1038        working_set,
1039        Expr::CellPath(CellPath { members: cell_path }),
1040        span,
1041        Type::CellPath,
1042    )
1043}
1044
1045pub fn parse_full_cell_path(
1046    working_set: &mut StateWorkingSet,
1047    implicit_head: Option<VarId>,
1048    span: Span,
1049    input_type: Option<&Type>,
1050) -> Expression {
1051    trace!("parsing: full cell path");
1052    let full_cell_span = span;
1053    let source = working_set.get_span_contents(span);
1054
1055    let (tokens, err) = lex(
1056        source,
1057        span.start,
1058        &[b'\n', b'\r'],
1059        &[b'.', b'?', b'!'],
1060        true,
1061    );
1062    if let Some(err) = err {
1063        working_set.error(err)
1064    }
1065
1066    let mut tokens = tokens.into_iter().peekable();
1067    if let Some(head) = tokens.peek() {
1068        let bytes = working_set.get_span_contents(head.span);
1069        let (head, expect_dot) = if bytes.starts_with(b"(") {
1070            trace!("parsing: paren-head of full cell path");
1071
1072            let head_span = head.span;
1073            let mut start = head.span.start;
1074            let mut end = head.span.end;
1075            let mut is_closed = true;
1076
1077            if bytes.starts_with(b"(") {
1078                start += 1;
1079            }
1080            if bytes.ends_with(b")") {
1081                end -= 1;
1082            } else {
1083                working_set.error(ParseError::Unclosed(")", Span::new(end, end)));
1084                is_closed = false;
1085            }
1086
1087            let span = Span::new(start, end);
1088
1089            let source = working_set.get_span_contents(span);
1090
1091            let (output, err) = lex(source, span.start, &[b'\n', b'\r'], &[], true);
1092            if let Some(err) = err {
1093                working_set.error(err)
1094            }
1095
1096            // Creating a Type scope to parse the new block. This will keep track of
1097            // the previous input type found in that block
1098            let output = parse_block(working_set, &output, span, is_closed, true, None);
1099
1100            let ty = output.output_type();
1101
1102            let block_id = working_set.add_block(Arc::new(output));
1103            tokens.next();
1104
1105            (
1106                Expression::new(working_set, Expr::Subexpression(block_id), head_span, ty),
1107                true,
1108            )
1109        } else if bytes.starts_with(b"[") {
1110            trace!("parsing: table head of full cell path");
1111
1112            let output = parse_table_expression(working_set, head.span, &SyntaxShape::Any);
1113
1114            tokens.next();
1115
1116            (output, true)
1117        } else if bytes.starts_with(b"{") {
1118            trace!("parsing: record head of full cell path");
1119            let output = parse_record(working_set, head.span);
1120
1121            tokens.next();
1122
1123            (output, true)
1124        } else if bytes.starts_with(b"$") {
1125            trace!("parsing: $variable head of full cell path");
1126
1127            let out = parse_variable_expr(working_set, head.span, input_type);
1128
1129            tokens.next();
1130
1131            (out, true)
1132        } else if let Some(var_id) = implicit_head {
1133            trace!("parsing: implicit head of full cell path");
1134            (
1135                Expression::new(working_set, Expr::Var(var_id), head.span, Type::Any),
1136                false,
1137            )
1138        } else {
1139            working_set.error(ParseError::Mismatch(
1140                "variable or subexpression".into(),
1141                String::from_utf8_lossy(bytes).to_string(),
1142                span,
1143            ));
1144            return garbage(working_set, span);
1145        };
1146
1147        let tail = parse_cell_path(working_set, tokens, expect_dot);
1148        let ty = if !tail.is_empty() {
1149            if nu_experimental::CELL_PATH_TYPES.get() {
1150                head.ty
1151                    .follow_cell_path(&tail)
1152                    .map(|ty| ty.into_owned())
1153                    .unwrap_or(Type::Any)
1154            } else {
1155                Type::Any
1156            }
1157        } else {
1158            head.ty.clone()
1159        };
1160
1161        Expression::new(
1162            working_set,
1163            Expr::FullCellPath(Box::new(FullCellPath { head, tail })),
1164            full_cell_span,
1165            ty,
1166        )
1167    } else {
1168        garbage(working_set, span)
1169    }
1170}
1171
1172enum PathLikeKind {
1173    Directory,
1174    Filepath,
1175    Glob,
1176}
1177
1178impl PathLikeKind {
1179    /// Returns the name used for trace logging during parsing.
1180    fn trace_name(&self) -> &'static str {
1181        match self {
1182            PathLikeKind::Directory => "directory",
1183            PathLikeKind::Filepath => "filepath",
1184            PathLikeKind::Glob => "glob pattern",
1185        }
1186    }
1187
1188    /// Returns the error message displayed when parsing fails.
1189    fn error_msg(&self) -> &'static str {
1190        match self {
1191            PathLikeKind::Directory => "directory",
1192            PathLikeKind::Filepath => "filepath",
1193            PathLikeKind::Glob => "glob pattern string",
1194        }
1195    }
1196
1197    /// Constructs the appropriate `Expr` and its corresponding `Type` for a simple (non-interpolated) path.
1198    fn to_expr(&self, token: String, quoted: bool) -> (Expr, Type) {
1199        match self {
1200            PathLikeKind::Directory => (Expr::Directory(token, quoted), Type::String),
1201            PathLikeKind::Filepath => (Expr::Filepath(token, quoted), Type::String),
1202            PathLikeKind::Glob => (Expr::GlobPattern(token, quoted), Type::Glob),
1203        }
1204    }
1205
1206    /// Constructs the appropriate interpolation `Expr` for a path containing subexpressions.
1207    fn to_interpolation_expr(&self, exprs: Vec<Expression>, quoted: bool) -> Expr {
1208        match self {
1209            PathLikeKind::Directory | PathLikeKind::Filepath => Expr::StringInterpolation(exprs),
1210            PathLikeKind::Glob => Expr::GlobInterpolation(exprs, quoted),
1211        }
1212    }
1213}
1214
1215/// Common helper for parsing path-like expressions (filepath, directory, glob pattern).
1216///
1217/// This function consolidates the repetitive logic for parsing path types, including:
1218/// - Bare word interpolation detection
1219/// - Escape sequence processing
1220/// - Quote state tracking
1221/// - Error handling
1222///
1223/// # Arguments
1224///
1225/// * `working_set` - The current parser state
1226/// * `span` - The source span of the expression
1227/// * `kind` - The kind of path-like expression to parse
1228fn parse_path_like(
1229    working_set: &mut StateWorkingSet,
1230    span: Span,
1231    kind: PathLikeKind,
1232) -> Expression {
1233    let bytes = working_set.get_span_contents(span);
1234    let quoted = is_quoted(bytes);
1235    trace!("parsing: {}", kind.trace_name());
1236
1237    // Check for bare word interpolation
1238    if is_bare_string_interpolation(bytes) {
1239        let interpolation_expr = parse_string_interpolation(working_set, span);
1240
1241        // Convert StringInterpolation to the appropriate interpolation type
1242        if let Expr::StringInterpolation(exprs) = interpolation_expr.expr {
1243            return Expression::new(
1244                working_set,
1245                kind.to_interpolation_expr(exprs, quoted),
1246                span,
1247                interpolation_expr.ty.clone(),
1248            );
1249        }
1250
1251        return interpolation_expr;
1252    }
1253
1254    let (token, err) = unescape_unquote_string(bytes, span);
1255    let is_quoted_internal = is_quoted(bytes);
1256
1257    if err.is_none() {
1258        trace!("-- found {token}");
1259
1260        let (expr, ty) = kind.to_expr(token, is_quoted_internal);
1261
1262        Expression::new(working_set, expr, span, ty)
1263    } else {
1264        working_set.error(ParseError::Expected(kind.error_msg(), span));
1265
1266        garbage(working_set, span)
1267    }
1268}
1269
1270fn is_bare_string_interpolation(bytes: &[u8]) -> bool {
1271    match bytes {
1272        [] => false,
1273        [b'\'' | b'"' | b'`', ..] => false,
1274        _ => bytes.contains(&b'('),
1275    }
1276}
1277
1278pub fn parse_directory(working_set: &mut StateWorkingSet, span: Span) -> Expression {
1279    parse_path_like(working_set, span, PathLikeKind::Directory)
1280}
1281
1282pub fn parse_filepath(working_set: &mut StateWorkingSet, span: Span) -> Expression {
1283    parse_path_like(working_set, span, PathLikeKind::Filepath)
1284}
1285
1286pub fn parse_datetime(working_set: &mut StateWorkingSet, span: Span) -> Expression {
1287    trace!("parsing: datetime");
1288
1289    let bytes = working_set.get_span_contents(span);
1290
1291    if bytes.len() < 6
1292        || !bytes[0].is_ascii_digit()
1293        || !bytes[1].is_ascii_digit()
1294        || !bytes[2].is_ascii_digit()
1295        || !bytes[3].is_ascii_digit()
1296        || bytes[4] != b'-'
1297    {
1298        working_set.error(ParseError::Expected("datetime", span));
1299        return garbage(working_set, span);
1300    }
1301
1302    let token = String::from_utf8_lossy(bytes).to_string();
1303
1304    if let Ok(datetime) = chrono::DateTime::parse_from_rfc3339(&token) {
1305        return Expression::new(working_set, Expr::DateTime(datetime), span, Type::Date);
1306    }
1307
1308    // Just the date
1309    let just_date = token.clone() + "T00:00:00+00:00";
1310    if let Ok(datetime) = chrono::DateTime::parse_from_rfc3339(&just_date) {
1311        return Expression::new(working_set, Expr::DateTime(datetime), span, Type::Date);
1312    }
1313
1314    // Date and time, assume UTC
1315    let datetime = token + "+00:00";
1316    if let Ok(datetime) = chrono::DateTime::parse_from_rfc3339(&datetime) {
1317        return Expression::new(working_set, Expr::DateTime(datetime), span, Type::Date);
1318    }
1319
1320    working_set.error(ParseError::Expected("datetime", span));
1321
1322    garbage(working_set, span)
1323}
1324
1325/// Parse a duration type, eg '10day'
1326pub fn parse_duration(working_set: &mut StateWorkingSet, span: Span) -> Expression {
1327    trace!("parsing: duration");
1328
1329    let bytes = working_set.get_span_contents(span);
1330
1331    match parse_unit_value(bytes, span, DURATION_UNIT_GROUPS, Type::Duration, |x| x) {
1332        Some(Ok(expr)) => {
1333            let span_id = working_set.add_span(span);
1334            expr.with_span_id(span_id)
1335        }
1336        Some(Err(mk_err_for)) => {
1337            working_set.error(mk_err_for("duration"));
1338            garbage(working_set, span)
1339        }
1340        None => {
1341            working_set.error(ParseError::Expected("duration with valid units", span));
1342            garbage(working_set, span)
1343        }
1344    }
1345}
1346
1347/// Parse a unit type, eg '10kb'
1348pub fn parse_filesize(working_set: &mut StateWorkingSet, span: Span) -> Expression {
1349    trace!("parsing: filesize");
1350
1351    let bytes = working_set.get_span_contents(span);
1352
1353    // the hex digit `b` might be mistaken for the unit `b`, so check that first
1354    if bytes.starts_with(b"0x") {
1355        working_set.error(ParseError::Expected("filesize with valid units", span));
1356        return garbage(working_set, span);
1357    }
1358
1359    match parse_unit_value(bytes, span, FILESIZE_UNIT_GROUPS, Type::Filesize, |x| {
1360        x.to_ascii_uppercase()
1361    }) {
1362        Some(Ok(expr)) => {
1363            let span_id = working_set.add_span(span);
1364            expr.with_span_id(span_id)
1365        }
1366        Some(Err(mk_err_for)) => {
1367            working_set.error(mk_err_for("filesize"));
1368            garbage(working_set, span)
1369        }
1370        None => {
1371            working_set.error(ParseError::Expected("filesize with valid units", span));
1372            garbage(working_set, span)
1373        }
1374    }
1375}
1376
1377type ParseUnitResult<'res> = Result<Expression, Box<dyn Fn(&'res str) -> ParseError>>;
1378type UnitGroup<'unit> = (Unit, &'unit str, Option<(Unit, i64)>);
1379
1380pub fn parse_unit_value<'res>(
1381    bytes: &[u8],
1382    span: Span,
1383    unit_groups: &[UnitGroup],
1384    ty: Type,
1385    transform: fn(String) -> String,
1386) -> Option<ParseUnitResult<'res>> {
1387    if bytes.len() < 2
1388        || !(bytes[0].is_ascii_digit()
1389            || (bytes[0] == b'.' && bytes[1].is_ascii_digit())
1390            || (bytes[0] == b'-' && bytes[1].is_ascii_digit()))
1391    {
1392        return None;
1393    }
1394
1395    // Bail if not UTF-8
1396    let value = transform(str::from_utf8(bytes).ok()?.into());
1397
1398    if let Some((unit, name, convert)) = unit_groups.iter().find(|x| value.ends_with(x.1)) {
1399        let lhs_len = value.len() - name.len();
1400        let lhs = strip_underscores(&value.as_bytes()[..lhs_len]);
1401        let lhs_span = Span::new(span.start, span.start + lhs_len);
1402        let unit_span = Span::new(span.start + lhs_len, span.end);
1403        if lhs.ends_with('$') {
1404            // If `parse_unit_value` has higher precedence over `parse_range`,
1405            // a variable with the name of a unit could otherwise not be used as the end of a range.
1406            return None;
1407        }
1408
1409        let (decimal_part, number_part) = modf(match lhs.parse::<f64>() {
1410            Ok(it) => it,
1411            Err(_) => {
1412                let mk_err = move |name| {
1413                    ParseError::LabeledError(
1414                        format!("{name} value must be a number"),
1415                        "not a number".into(),
1416                        lhs_span,
1417                    )
1418                };
1419                return Some(Err(Box::new(mk_err)));
1420            }
1421        });
1422
1423        let mut unit = match convert {
1424            Some(convert_to) => convert_to.0,
1425            None => *unit,
1426        };
1427
1428        let num_float = match convert {
1429            Some(convert_to) => {
1430                (number_part * convert_to.1 as f64) + (decimal_part * convert_to.1 as f64)
1431            }
1432            None => number_part,
1433        };
1434
1435        // Convert all durations to nanoseconds, and filesizes to bytes,
1436        // to minimize loss of precision
1437        let factor = match ty {
1438            Type::Filesize => unit_to_byte_factor(&unit),
1439            Type::Duration => unit_to_ns_factor(&unit),
1440            _ => None,
1441        };
1442
1443        let num = match factor {
1444            Some(factor) => {
1445                let num_base = num_float * factor;
1446                if i64::MIN as f64 <= num_base && num_base <= i64::MAX as f64 {
1447                    unit = if ty == Type::Filesize {
1448                        Unit::Filesize(FilesizeUnit::B)
1449                    } else {
1450                        Unit::Nanosecond
1451                    };
1452                    num_base as i64
1453                } else {
1454                    // not safe to convert, because of the overflow
1455                    num_float as i64
1456                }
1457            }
1458            None => num_float as i64,
1459        };
1460
1461        trace!("-- found {num} {unit:?}");
1462        let value = ValueWithUnit {
1463            expr: Expression::new_unknown(Expr::Int(num), lhs_span, Type::Number),
1464            unit: Spanned {
1465                item: unit,
1466                span: unit_span,
1467            },
1468        };
1469        let expr = Expression::new_unknown(Expr::ValueWithUnit(Box::new(value)), span, ty);
1470
1471        Some(Ok(expr))
1472    } else {
1473        None
1474    }
1475}
1476
1477pub const FILESIZE_UNIT_GROUPS: &[UnitGroup] = &[
1478    (
1479        Unit::Filesize(FilesizeUnit::KB),
1480        "KB",
1481        Some((Unit::Filesize(FilesizeUnit::B), 1000)),
1482    ),
1483    (
1484        Unit::Filesize(FilesizeUnit::MB),
1485        "MB",
1486        Some((Unit::Filesize(FilesizeUnit::KB), 1000)),
1487    ),
1488    (
1489        Unit::Filesize(FilesizeUnit::GB),
1490        "GB",
1491        Some((Unit::Filesize(FilesizeUnit::MB), 1000)),
1492    ),
1493    (
1494        Unit::Filesize(FilesizeUnit::TB),
1495        "TB",
1496        Some((Unit::Filesize(FilesizeUnit::GB), 1000)),
1497    ),
1498    (
1499        Unit::Filesize(FilesizeUnit::PB),
1500        "PB",
1501        Some((Unit::Filesize(FilesizeUnit::TB), 1000)),
1502    ),
1503    (
1504        Unit::Filesize(FilesizeUnit::EB),
1505        "EB",
1506        Some((Unit::Filesize(FilesizeUnit::PB), 1000)),
1507    ),
1508    (
1509        Unit::Filesize(FilesizeUnit::KiB),
1510        "KIB",
1511        Some((Unit::Filesize(FilesizeUnit::B), 1024)),
1512    ),
1513    (
1514        Unit::Filesize(FilesizeUnit::MiB),
1515        "MIB",
1516        Some((Unit::Filesize(FilesizeUnit::KiB), 1024)),
1517    ),
1518    (
1519        Unit::Filesize(FilesizeUnit::GiB),
1520        "GIB",
1521        Some((Unit::Filesize(FilesizeUnit::MiB), 1024)),
1522    ),
1523    (
1524        Unit::Filesize(FilesizeUnit::TiB),
1525        "TIB",
1526        Some((Unit::Filesize(FilesizeUnit::GiB), 1024)),
1527    ),
1528    (
1529        Unit::Filesize(FilesizeUnit::PiB),
1530        "PIB",
1531        Some((Unit::Filesize(FilesizeUnit::TiB), 1024)),
1532    ),
1533    (
1534        Unit::Filesize(FilesizeUnit::EiB),
1535        "EIB",
1536        Some((Unit::Filesize(FilesizeUnit::PiB), 1024)),
1537    ),
1538    (Unit::Filesize(FilesizeUnit::B), "B", None),
1539];
1540
1541pub const DURATION_UNIT_GROUPS: &[UnitGroup] = &[
1542    (Unit::Nanosecond, "ns", None),
1543    // todo start adding aliases for duration units here
1544    (Unit::Microsecond, "us", Some((Unit::Nanosecond, 1000))),
1545    (
1546        // µ Micro Sign
1547        Unit::Microsecond,
1548        "\u{00B5}s",
1549        Some((Unit::Nanosecond, 1000)),
1550    ),
1551    (
1552        // μ Greek small letter Mu
1553        Unit::Microsecond,
1554        "\u{03BC}s",
1555        Some((Unit::Nanosecond, 1000)),
1556    ),
1557    (Unit::Millisecond, "ms", Some((Unit::Microsecond, 1000))),
1558    (Unit::Second, "sec", Some((Unit::Millisecond, 1000))),
1559    (Unit::Minute, "min", Some((Unit::Second, 60))),
1560    (Unit::Hour, "hr", Some((Unit::Minute, 60))),
1561    (Unit::Day, "day", Some((Unit::Minute, 1440))),
1562    (Unit::Week, "wk", Some((Unit::Day, 7))),
1563];
1564
1565fn unit_to_ns_factor(unit: &Unit) -> Option<f64> {
1566    match unit {
1567        Unit::Nanosecond => Some(1.0),
1568        Unit::Microsecond => Some(1_000.0),
1569        Unit::Millisecond => Some(1_000_000.0),
1570        Unit::Second => Some(1_000_000_000.0),
1571        Unit::Minute => Some(60.0 * 1_000_000_000.0),
1572        Unit::Hour => Some(60.0 * 60.0 * 1_000_000_000.0),
1573        Unit::Day => Some(24.0 * 60.0 * 60.0 * 1_000_000_000.0),
1574        Unit::Week => Some(7.0 * 24.0 * 60.0 * 60.0 * 1_000_000_000.0),
1575        _ => None,
1576    }
1577}
1578
1579fn unit_to_byte_factor(unit: &Unit) -> Option<f64> {
1580    match unit {
1581        Unit::Filesize(FilesizeUnit::B) => Some(1.0),
1582        Unit::Filesize(FilesizeUnit::KB) => Some(1_000.0),
1583        Unit::Filesize(FilesizeUnit::MB) => Some(1_000_000.0),
1584        Unit::Filesize(FilesizeUnit::GB) => Some(1_000_000_000.0),
1585        Unit::Filesize(FilesizeUnit::TB) => Some(1_000_000_000_000.0),
1586        Unit::Filesize(FilesizeUnit::PB) => Some(1_000_000_000_000_000.0),
1587        Unit::Filesize(FilesizeUnit::EB) => Some(1_000_000_000_000_000_000.0),
1588        Unit::Filesize(FilesizeUnit::KiB) => Some(1024.0),
1589        Unit::Filesize(FilesizeUnit::MiB) => Some(1024.0 * 1024.0),
1590        Unit::Filesize(FilesizeUnit::GiB) => Some(1024.0 * 1024.0 * 1024.0),
1591        Unit::Filesize(FilesizeUnit::TiB) => Some(1024.0 * 1024.0 * 1024.0 * 1024.0),
1592        Unit::Filesize(FilesizeUnit::PiB) => Some(1024.0 * 1024.0 * 1024.0 * 1024.0 * 1024.0),
1593        Unit::Filesize(FilesizeUnit::EiB) => {
1594            Some(1024.0 * 1024.0 * 1024.0 * 1024.0 * 1024.0 * 1024.0)
1595        }
1596        _ => None,
1597    }
1598}
1599
1600// Borrowed from libm at https://github.com/rust-lang/libm/blob/master/src/math/modf.rs
1601fn modf(x: f64) -> (f64, f64) {
1602    let rv2: f64;
1603    let mut u = x.to_bits();
1604    let e = (((u >> 52) & 0x7ff) as i32) - 0x3ff;
1605
1606    /* no fractional part */
1607    if e >= 52 {
1608        rv2 = x;
1609        if e == 0x400 && (u << 12) != 0 {
1610            /* nan */
1611            return (x, rv2);
1612        }
1613        u &= 1 << 63;
1614        return (f64::from_bits(u), rv2);
1615    }
1616
1617    /* no integral part*/
1618    if e < 0 {
1619        u &= 1 << 63;
1620        rv2 = f64::from_bits(u);
1621        return (x, rv2);
1622    }
1623
1624    let mask = ((!0) >> 12) >> e;
1625    if (u & mask) == 0 {
1626        rv2 = x;
1627        u &= 1 << 63;
1628        return (f64::from_bits(u), rv2);
1629    }
1630    u &= !mask;
1631    rv2 = f64::from_bits(u);
1632    (x - rv2, rv2)
1633}
1634
1635pub fn parse_glob_pattern(working_set: &mut StateWorkingSet, span: Span) -> Expression {
1636    parse_path_like(working_set, span, PathLikeKind::Glob)
1637}
1638
1639fn parse_hex_escape(bytes: &[u8], start_idx: usize, span: Span) -> Result<(u8, usize), ParseError> {
1640    let hex_digits = bytes.get(start_idx + 1..start_idx + 3).ok_or_else(|| {
1641        ParseError::InvalidLiteral(
1642            "incomplete hex escape '\\xHH', expected 2 hex digits".into(),
1643            "string".into(),
1644            Span::new(span.start + start_idx, span.end),
1645        )
1646    })?;
1647    if !hex_digits.iter().all(u8::is_ascii_hexdigit) {
1648        return Err(ParseError::InvalidLiteral(
1649            "invalid hex escape '\\xHH', expected exactly 2 hex digits".into(),
1650            "string".into(),
1651            Span::new(span.start + start_idx, span.end),
1652        ));
1653    }
1654    str::from_utf8(hex_digits)
1655        .ok()
1656        .and_then(|s| u8::from_str_radix(s, 0x10).ok())
1657        .map(|byte_val| (byte_val, start_idx + 3))
1658        .ok_or_else(|| {
1659            ParseError::InvalidLiteral(
1660                "invalid hex escape '\\xHH'".into(),
1661                "string".into(),
1662                Span::new(span.start + start_idx, span.end),
1663            )
1664        })
1665}
1666
1667fn parse_unicode_escape(
1668    bytes: &[u8],
1669    start_idx: usize,
1670    span: Span,
1671) -> Result<(char, usize), ParseError> {
1672    let mut slice = &bytes[(start_idx + 1)..];
1673    let mut current_idx = start_idx + 1;
1674
1675    // NOTE: this is a more defensive approach meant to avoid reading too much, but requires
1676    //       changing error messages
1677    // read no more than 8 bytes "{xxxxxx}"
1678    // slice = &slice[..(8.min(slice.len()))];
1679
1680    slice = slice.strip_prefix(b"{").ok_or_else(|| {
1681        ParseError::InvalidLiteral(
1682            "invalid unicode escape '\\u{...}', must be 1-6 hex digits, max codepoint 0x10FFFF"
1683                .into(),
1684            "string".into(),
1685            Span::new(span.start + start_idx, span.end),
1686        )
1687    })?;
1688    current_idx += 1;
1689
1690    let end = slice.iter().position(|b| *b == b'}').ok_or_else(|| {
1691        ParseError::InvalidLiteral(
1692            "incomplete unicode escape '\\u{...}', missing closing '}'".into(),
1693            "string".into(),
1694            Span::new(span.start + start_idx, span.end),
1695        )
1696    })?;
1697    let digits = &slice[..end];
1698    current_idx += end; // the digits
1699    current_idx += 1; // closing brace
1700    let current_idx = current_idx;
1701
1702    let ch = Some(digits)
1703        .filter(|b| (1..=6).contains(&b.len()))
1704        .and_then(|b| str::from_utf8(b).ok())
1705        .and_then(|s| u32::from_str_radix(s, 0x10).ok())
1706        .and_then(char::from_u32)
1707        .ok_or_else(|| {
1708            ParseError::InvalidLiteral(
1709                "invalid unicode escape '\\u{...}', must be 1-6 hex digits, max codepoint 0x10FFFF"
1710                    .into(),
1711                "string".into(),
1712                Span::new(span.start + start_idx, span.end),
1713            )
1714        })?;
1715
1716    Ok((ch, current_idx))
1717}
1718
1719pub fn unescape_string(bytes: &[u8], span: Span) -> (Vec<u8>, Option<ParseError>) {
1720    let mut output = Vec::new();
1721    let mut error = None;
1722
1723    let mut idx = 0;
1724
1725    if !bytes.contains(&b'\\') {
1726        return (bytes.to_vec(), None);
1727    }
1728
1729    'us_loop: while idx < bytes.len() {
1730        if bytes[idx] == b'\\' {
1731            // We're in an escape
1732            idx += 1;
1733
1734            match bytes.get(idx) {
1735                Some(b'"') => {
1736                    output.push(b'"');
1737                    idx += 1;
1738                }
1739                Some(b'\'') => {
1740                    output.push(b'\'');
1741                    idx += 1;
1742                }
1743                Some(b'\\') => {
1744                    output.push(b'\\');
1745                    idx += 1;
1746                }
1747                Some(b'/') => {
1748                    output.push(b'/');
1749                    idx += 1;
1750                }
1751                Some(b'(') => {
1752                    output.push(b'(');
1753                    idx += 1;
1754                }
1755                Some(b')') => {
1756                    output.push(b')');
1757                    idx += 1;
1758                }
1759                Some(b'{') => {
1760                    output.push(b'{');
1761                    idx += 1;
1762                }
1763                Some(b'}') => {
1764                    output.push(b'}');
1765                    idx += 1;
1766                }
1767                Some(b'$') => {
1768                    output.push(b'$');
1769                    idx += 1;
1770                }
1771                Some(b'^') => {
1772                    output.push(b'^');
1773                    idx += 1;
1774                }
1775                Some(b'#') => {
1776                    output.push(b'#');
1777                    idx += 1;
1778                }
1779                Some(b'|') => {
1780                    output.push(b'|');
1781                    idx += 1;
1782                }
1783                Some(b'~') => {
1784                    output.push(b'~');
1785                    idx += 1;
1786                }
1787                Some(b'a') => {
1788                    output.push(0x7);
1789                    idx += 1;
1790                }
1791                Some(b'b') => {
1792                    output.push(0x8);
1793                    idx += 1;
1794                }
1795                Some(b'e') => {
1796                    output.push(0x1b);
1797                    idx += 1;
1798                }
1799                Some(b'f') => {
1800                    output.push(0xc);
1801                    idx += 1;
1802                }
1803                Some(b'n') => {
1804                    output.push(b'\n');
1805                    idx += 1;
1806                }
1807                Some(b'r') => {
1808                    output.push(b'\r');
1809                    idx += 1;
1810                }
1811                Some(b't') => {
1812                    output.push(b'\t');
1813                    idx += 1;
1814                }
1815                Some(b'0') => {
1816                    output.push(b'\0');
1817                    idx += 1;
1818                }
1819                Some(b'x') => {
1820                    // Hex escape: \xHH (exactly 2 hex digits)
1821                    match parse_hex_escape(bytes, idx, span) {
1822                        Ok((byte_val, new_idx)) => {
1823                            output.push(byte_val);
1824                            idx = new_idx;
1825                        }
1826                        Err(err) => {
1827                            error = error.or(Some(err));
1828                            break 'us_loop;
1829                        }
1830                    }
1831                }
1832                Some(b'u') => {
1833                    // Unicode escape: \u{XXXXXX} (1-6 hex digits, max 0x10FFFF)
1834                    match parse_unicode_escape(bytes, idx, span) {
1835                        Ok((ch, new_idx)) => {
1836                            let mut ch_buf = [0u8; 4];
1837                            output.extend(ch.encode_utf8(&mut ch_buf).as_bytes());
1838                            idx = new_idx;
1839                        }
1840                        Err(err) => {
1841                            error = error.or(Some(err));
1842                            break 'us_loop;
1843                        }
1844                    }
1845                }
1846
1847                Some(other) => {
1848                    error = error.or(Some(ParseError::InvalidLiteral(
1849                        format!("unrecognized escape sequence '\\{}'", *other as char),
1850                        "string".into(),
1851                        Span::new(span.start + idx, span.end),
1852                    )));
1853                    break 'us_loop;
1854                }
1855                None => {
1856                    error = error.or(Some(ParseError::InvalidLiteral(
1857                        "incomplete escape sequence after '\\'".into(),
1858                        "string".into(),
1859                        Span::new(span.end.saturating_sub(1), span.end),
1860                    )));
1861                    break 'us_loop;
1862                }
1863            }
1864        } else {
1865            output.push(bytes[idx]);
1866            idx += 1;
1867        }
1868    }
1869
1870    (output, error)
1871}
1872
1873pub fn unescape_unquote_string(bytes: &[u8], span: Span) -> (String, Option<ParseError>) {
1874    if bytes.starts_with(b"\"") {
1875        // Needs unescaping
1876        let bytes = trim_quotes(bytes);
1877
1878        let (bytes, err) = unescape_string(bytes, span);
1879
1880        if let Ok(token) = String::from_utf8(bytes) {
1881            (token, err)
1882        } else {
1883            (String::new(), Some(ParseError::Expected("string", span)))
1884        }
1885    } else {
1886        let bytes = trim_quotes(bytes);
1887
1888        if let Ok(token) = String::from_utf8(bytes.into()) {
1889            (token, None)
1890        } else {
1891            (String::new(), Some(ParseError::Expected("string", span)))
1892        }
1893    }
1894}
1895
1896fn check_string_no_trailing_tokens(
1897    bytes: &[u8],
1898    span: Span,
1899    opening_quote_pos: usize,
1900    quote: u8,
1901) -> Result<(), ParseError> {
1902    let pos = bytes
1903        .iter()
1904        .rposition(|ch| *ch == quote)
1905        .expect("string begins with quote");
1906    if pos == bytes.len() - 1 {
1907        Ok(())
1908    } else if pos == opening_quote_pos {
1909        // this may look like an error, but it's not:
1910        // some code, like completions, requires allowing
1911        // unterminated strings at this stage.
1912        Ok(())
1913    } else {
1914        let span = Span::new(span.start + pos + 1, span.end);
1915        Err(ParseError::ExtraTokensAfterClosingDelimiter(span))
1916    }
1917}
1918
1919pub fn parse_string(working_set: &mut StateWorkingSet, span: Span) -> Expression {
1920    trace!("parsing: string");
1921
1922    let bytes = working_set.get_span_contents(span);
1923
1924    if bytes.is_empty() {
1925        working_set.error(ParseError::Expected("String", span));
1926        return Expression::garbage(working_set, span);
1927    }
1928
1929    // Check for bare word interpolation
1930    if is_bare_string_interpolation(bytes) {
1931        return parse_string_interpolation(working_set, span);
1932    }
1933
1934    // Check for unbalanced quotes:
1935    for quote in [b'\"', b'\''] {
1936        if bytes[0] == quote
1937            && let Err(err) = check_string_no_trailing_tokens(bytes, span, 0, quote)
1938        {
1939            working_set.error(err);
1940            return garbage(working_set, span);
1941        }
1942    }
1943
1944    let (s, err) = unescape_unquote_string(bytes, span);
1945    if let Some(err) = err {
1946        working_set.error(err);
1947    }
1948
1949    Expression::new(working_set, Expr::String(s), span, Type::String)
1950}
1951
1952fn is_quoted(bytes: &[u8]) -> bool {
1953    matches!(bytes, [b'\'', .., b'\''] | [b'"', .., b'"'])
1954}
1955
1956pub fn parse_string_strict(working_set: &mut StateWorkingSet, span: Span) -> Expression {
1957    trace!("parsing: string, with required delimiters");
1958
1959    let bytes = working_set.get_span_contents(span);
1960
1961    // Check for unbalanced quotes:
1962    {
1963        let bytes = if bytes.starts_with(b"$") {
1964            &bytes[1..]
1965        } else {
1966            bytes
1967        };
1968        if bytes.starts_with(b"\"") && (bytes.len() == 1 || !bytes.ends_with(b"\"")) {
1969            working_set.error(ParseError::Unclosed("\"", span));
1970            return garbage(working_set, span);
1971        }
1972        if bytes.starts_with(b"\'") && (bytes.len() == 1 || !bytes.ends_with(b"\'")) {
1973            working_set.error(ParseError::Unclosed("\'", span));
1974            return garbage(working_set, span);
1975        }
1976        if bytes.starts_with(b"r#") && (bytes.len() == 1 || !bytes.ends_with(b"#")) {
1977            working_set.error(ParseError::Unclosed("r#", span));
1978            return garbage(working_set, span);
1979        }
1980    }
1981
1982    let (bytes, quoted) = if (bytes.starts_with(b"\"") && bytes.ends_with(b"\"") && bytes.len() > 1)
1983        || (bytes.starts_with(b"\'") && bytes.ends_with(b"\'") && bytes.len() > 1)
1984    {
1985        (&bytes[1..(bytes.len() - 1)], true)
1986    } else if (bytes.starts_with(b"$\"") && bytes.ends_with(b"\"") && bytes.len() > 2)
1987        || (bytes.starts_with(b"$\'") && bytes.ends_with(b"\'") && bytes.len() > 2)
1988    {
1989        (&bytes[2..(bytes.len() - 1)], true)
1990    } else {
1991        (bytes, false)
1992    };
1993
1994    if let Ok(token) = String::from_utf8(bytes.into()) {
1995        trace!("-- found {token}");
1996
1997        if quoted {
1998            Expression::new(working_set, Expr::String(token), span, Type::String)
1999        } else if token.contains(' ') {
2000            working_set.error(ParseError::Expected("string", span));
2001
2002            garbage(working_set, span)
2003        } else {
2004            Expression::new(working_set, Expr::String(token), span, Type::String)
2005        }
2006    } else {
2007        working_set.error(ParseError::Expected("string", span));
2008        garbage(working_set, span)
2009    }
2010}