zshrs 0.9.2

The first compiled Unix shell — bytecode VM, worker pool, AOP intercept, SQLite caching
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
//! Constants used in the programmatic representation of fish code.

use crate::prelude::*;
use fish_fallback::{fish_wcswidth, fish_wcwidth};

pub type SourceOffset = u32;

pub const SOURCE_OFFSET_INVALID: usize = SourceOffset::MAX as _;
pub const SOURCE_LOCATION_UNKNOWN: usize = usize::MAX;

#[derive(Copy, Clone, Default)]
pub struct ParseTreeFlags {
    /// attempt to build a "parse tree" no matter what. this may result in a 'forest' of
    /// disconnected trees. this is intended to be used by syntax highlighting.
    pub continue_after_error: bool,
    /// include comment tokens.
    pub include_comments: bool,
    /// indicate that the tokenizer should accept incomplete tokens
    pub accept_incomplete_tokens: bool,
    /// indicate that the parser should not generate the terminate token, allowing an 'unfinished'
    /// tree where some nodes may have no productions.
    pub leave_unterminated: bool,
    /// indicate that the parser should generate job_list entries for blank lines.
    pub show_blank_lines: bool,
    /// indicate that extra semis should be generated.
    pub show_extra_semis: bool,
}

/// Represents parse issues found during validation.
/// If this is returned as the error of a Result, then either `error` or `incomplete` (or both) is set.
#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)]
pub struct ParseIssue {
    pub error: bool,      // An error was found.
    pub incomplete: bool, // Incomplete input, such as unclosed block or pipe.
}

impl ParseIssue {
    pub const ERROR: Result<(), Self> = Err(Self {
        error: true,
        incomplete: false,
    });

    pub const INCOMPLETE: Result<(), Self> = Err(Self {
        error: false,
        incomplete: true,
    });
}

/// A range of source code.
#[derive(PartialEq, Eq, Clone, Copy, Debug, Default)]
pub struct SourceRange {
    pub start: u32,
    pub length: u32,
}

impl SourceRange {
    pub fn as_usize(self) -> std::ops::Range<usize> {
        self.into()
    }
}

#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
pub enum ParseTokenType {
    #[default]
    Invalid = 1,

    // Terminal types.
    String,
    Pipe,
    LeftBrace,
    RightBrace,
    Redirection,
    Background,
    AndAnd,
    OrOr,
    End,
    // Special terminal type that means no more tokens forthcoming.
    Terminate,
    // Very special terminal types that don't appear in the production list.
    Error,
    TokenizerError,
    Comment,
}

#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
pub enum ParseKeyword {
    // 'None' is not a keyword, it is a sentinel indicating nothing.
    // Note it proves convenient to keep this as a value rather than using Option.
    #[default]
    None,
    And,
    Begin,
    Builtin,
    Case,
    Command,
    Else,
    End,
    Exclam,
    Exec,
    For,
    Function,
    If,
    In,
    Not,
    Or,
    Switch,
    Time,
    While,
}

// Statement decorations like 'command' or 'exec'.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum StatementDecoration {
    None,
    Command,
    Builtin,
    Exec,
}

// Parse error code list.
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
pub enum ParseErrorCode {
    #[default]
    None,

    // Matching values from enum parser_error.
    Syntax,
    CmdSubst,

    Generic, // unclassified error types

    // Tokenizer errors.
    TokenizerUnterminatedQuote,
    TokenizerUnterminatedSubshell,
    TokenizerUnterminatedSlice,
    TokenizerUnterminatedEscape,
    TokenizerOther,

    UnbalancingEnd,         // end outside of block
    UnbalancingElse,        // else outside of if
    UnbalancingCase,        // case outside of switch
    UnbalancingBrace,       // } outside of {
    BareVariableAssignment, // a=b without command
    AndOrInPipeline,        // "and" or "or" after a pipe
}

// The location of a pipeline.
#[derive(Clone, Copy, Eq, PartialEq)]
pub enum PipelinePosition {
    None,       // not part of a pipeline
    First,      // first command in a pipeline
    Subsequent, // second or further command in a pipeline
}

impl SourceRange {
    pub fn new(start: usize, length: usize) -> Self {
        SourceRange {
            start: start.try_into().unwrap(),
            length: length.try_into().unwrap(),
        }
    }
    pub fn start(self) -> usize {
        self.start.try_into().unwrap()
    }
    pub fn length(self) -> usize {
        self.length.try_into().unwrap()
    }
    pub fn end(self) -> usize {
        self.start
            .checked_add(self.length)
            .expect("Overflow")
            .try_into()
            .unwrap()
    }
    pub fn combine(self, other: Self) -> Self {
        let start = std::cmp::min(self.start, other.start);
        SourceRange {
            start,
            length: std::cmp::max(self.end(), other.end())
                .checked_sub(start.try_into().unwrap())
                .expect("Overflow")
                .try_into()
                .unwrap(),
        }
    }

    // Return true if a location is in this range, including one-past-the-end.
    pub fn contains_inclusive(self, loc: usize) -> bool {
        self.start() <= loc && loc - self.start() <= self.length()
    }
}

impl From<SourceRange> for std::ops::Range<usize> {
    fn from(value: SourceRange) -> Self {
        value.start()..value.end()
    }
}

impl ParseTokenType {
    /// Return a string describing the token type.
    pub fn to_wstr(self) -> &'static wstr {
        match self {
            ParseTokenType::Comment => L!("ParseTokenType::comment"),
            ParseTokenType::Error => L!("ParseTokenType::error"),
            ParseTokenType::TokenizerError => L!("ParseTokenType::tokenizer_error"),
            ParseTokenType::Background => L!("ParseTokenType::background"),
            ParseTokenType::End => L!("ParseTokenType::end"),
            ParseTokenType::Pipe => L!("ParseTokenType::pipe"),
            ParseTokenType::LeftBrace => L!("ParseTokenType::lbrace"),
            ParseTokenType::RightBrace => L!("ParseTokenType::rbrace"),
            ParseTokenType::Redirection => L!("ParseTokenType::redirection"),
            ParseTokenType::String => L!("ParseTokenType::string"),
            ParseTokenType::AndAnd => L!("ParseTokenType::andand"),
            ParseTokenType::OrOr => L!("ParseTokenType::oror"),
            ParseTokenType::Terminate => L!("ParseTokenType::terminate"),
            ParseTokenType::Invalid => L!("ParseTokenType::invalid"),
        }
    }
}

impl ParseKeyword {
    /// Return the keyword as a string.
    pub fn to_wstr(self) -> &'static wstr {
        match self {
            ParseKeyword::And => L!("and"),
            ParseKeyword::Begin => L!("begin"),
            ParseKeyword::Builtin => L!("builtin"),
            ParseKeyword::Case => L!("case"),
            ParseKeyword::Command => L!("command"),
            ParseKeyword::Else => L!("else"),
            ParseKeyword::End => L!("end"),
            ParseKeyword::Exclam => L!("!"),
            ParseKeyword::Exec => L!("exec"),
            ParseKeyword::For => L!("for"),
            ParseKeyword::Function => L!("function"),
            ParseKeyword::If => L!("if"),
            ParseKeyword::In => L!("in"),
            ParseKeyword::Not => L!("not"),
            ParseKeyword::Or => L!("or"),
            ParseKeyword::Switch => L!("switch"),
            ParseKeyword::Time => L!("time"),
            ParseKeyword::While => L!("while"),
            _ => L!("unknown_keyword"),
        }
    }
}

impl fish_printf::ToArg<'static> for ParseKeyword {
    fn to_arg(self) -> fish_printf::Arg<'static> {
        fish_printf::Arg::WStr(self.to_wstr())
    }
}

impl From<&wstr> for ParseKeyword {
    fn from(s: &wstr) -> Self {
        // Note this is called in hot loops.
        let c0 = s.as_char_slice().first().copied().unwrap_or('\0');
        match c0 {
            '!' if s == L!("!") => ParseKeyword::Exclam,
            'a' if s == L!("and") => ParseKeyword::And,
            'b' if s == L!("begin") => ParseKeyword::Begin,
            'b' if s == L!("builtin") => ParseKeyword::Builtin,
            'c' if s == L!("case") => ParseKeyword::Case,
            'c' if s == L!("command") => ParseKeyword::Command,
            'e' if s == L!("else") => ParseKeyword::Else,
            'e' if s == L!("end") => ParseKeyword::End,
            'e' if s == L!("exec") => ParseKeyword::Exec,
            'f' if s == L!("for") => ParseKeyword::For,
            'f' if s == L!("function") => ParseKeyword::Function,
            'i' if s == L!("if") => ParseKeyword::If,
            'i' if s == L!("in") => ParseKeyword::In,
            'n' if s == L!("not") => ParseKeyword::Not,
            'o' if s == L!("or") => ParseKeyword::Or,
            's' if s == L!("switch") => ParseKeyword::Switch,
            't' if s == L!("time") => ParseKeyword::Time,
            'w' if s == L!("while") => ParseKeyword::While,
            _ => ParseKeyword::None,
        }
    }
}

#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct ParseError {
    /// Text of the error.
    pub text: WString,
    /// Code for the error.
    pub code: ParseErrorCode,
    /// Offset and length of the token in the source code that triggered this error.
    pub source_start: usize,
    pub source_length: usize,
}

impl ParseError {
    /// Return a string describing the error, suitable for presentation to the user. If
    /// is_interactive is true, the offending line with a caret is printed as well.
    pub fn describe(self: &ParseError, src: &wstr, is_interactive: bool) -> WString {
        self.describe_with_prefix(src, L!(""), is_interactive, false)
    }

    /// Return a string describing the error, suitable for presentation to the user, with the given
    /// prefix. If skip_caret is false, the offending line with a caret is printed as well.
    pub fn describe_with_prefix(
        self: &ParseError,
        src: &wstr,
        prefix: &wstr,
        is_interactive: bool,
        skip_caret: bool,
    ) -> WString {
        if skip_caret && self.text.is_empty() {
            return L!("").to_owned();
        }

        let mut result = if prefix.is_empty() {
            self.text.clone()
        } else {
            wgettext_fmt!("%s: %s", prefix, &self.text)
        };

        if skip_caret {
            return result;
        }

        let mut start = self.source_start;
        let mut len = self.source_length;
        if start >= src.len() {
            // If we are past the source, we clamp it to the end.
            start = src.len() - 1;
            len = 0;
        }

        if start + len > src.len() {
            len = src.len() - self.source_start;
        }

        // Locate the beginning of this line of source.
        let mut line_start = 0;

        // Look for a newline prior to source_start. If we don't find one, start at the beginning of
        // the string; otherwise start one past the newline. Note that source_start may itself point
        // at a newline; we want to find the newline before it.
        if start > 0 {
            let prefix = &src.as_char_slice()[..start];
            let newline_left_of_start = prefix.iter().rev().position(|c| *c == '\n');
            if let Some(left_of_start) = newline_left_of_start {
                line_start = start - left_of_start;
            }
        }
        // Look for the newline after the source range. If the source range itself includes a
        // newline, that's the one we want, so start just before the end of the range.
        let last_char_in_range = if len == 0 { start } else { start + len - 1 };
        let line_end = src.as_char_slice()[last_char_in_range..]
            .iter()
            .position(|c| *c == '\n')
            .map_or(src.len(), |pos| pos + last_char_in_range);
        // We can only report squiggles on one line
        if start + len > line_end {
            len = line_end - start;
        }

        assert!(line_end >= line_start);
        assert!(start >= line_start);

        // Don't include the caret and line if we're interactive and this is the first line, because
        // then it's obvious.
        let interactive_skip_caret = is_interactive && start == 0;
        if interactive_skip_caret {
            return result;
        }

        // Append the line of text.
        if !result.is_empty() {
            result += "\n";
        }
        result += wstr::from_char_slice(&src.as_char_slice()[line_start..line_end]);

        // Append the caret line. The input source may include tabs; for that reason we
        // construct a "caret line" that has tabs in corresponding positions.
        let mut caret_space_line = WString::new();
        caret_space_line.reserve(start - line_start);
        for i in line_start..start {
            let wc = src.as_char_slice()[i];
            if wc == '\t' {
                caret_space_line += "\t";
            } else if wc == '\n' {
                // It's possible that the start points at a newline itself. In that case,
                // pretend it's a space. We only expect this to be at the end of the string.
                caret_space_line += " ";
            } else if let Some(width) = fish_wcwidth(wc) {
                caret_space_line += " ".repeat(width).as_str();
            }
        }
        result += "\n";
        result.push_utfstr(&caret_space_line);
        result += "^";
        if len > 1 {
            // Add a squiggle under the error location.
            // We do it like this
            //               ^~~^
            // With a "^" under the start and end, and squiggles in-between.
            let width = fish_wcswidth(&src[start..start + len]).unwrap_or_default();
            if width >= 2 {
                // Subtract one for each of the carets - this is important in case
                // the starting char has a width of > 1.
                result += "~".repeat(width - 2).as_str();
                result += "^";
            }
        }
        result
    }
}

pub fn token_type_user_presentable_description(
    type_: ParseTokenType,
    keyword: ParseKeyword,
) -> WString {
    if keyword != ParseKeyword::None {
        return sprintf!("keyword: '%s'", keyword.to_wstr());
    }
    match type_ {
        ParseTokenType::String => L!("a string").to_owned(),
        ParseTokenType::Pipe => L!("a pipe").to_owned(),
        ParseTokenType::Redirection => L!("a redirection").to_owned(),
        ParseTokenType::Background => L!("a '&'").to_owned(),
        ParseTokenType::LeftBrace => L!("a '{'").to_owned(),
        ParseTokenType::RightBrace => L!("a '}'").to_owned(),
        ParseTokenType::AndAnd => L!("'&&'").to_owned(),
        ParseTokenType::OrOr => L!("'||'").to_owned(),
        ParseTokenType::End => L!("end of the statement").to_owned(),
        ParseTokenType::Terminate => L!("end of the input").to_owned(),
        ParseTokenType::Error => L!("a parse error").to_owned(),
        ParseTokenType::TokenizerError => L!("an incomplete token").to_owned(),
        ParseTokenType::Comment => L!("a comment").to_owned(),
        _ => sprintf!("a %s", type_.to_wstr()),
    }
}

pub type ParseErrorList = Vec<ParseError>;

/// Helper function to offset error positions by the given amount. This is used when determining
/// errors in a substring of a larger source buffer.
pub fn parse_error_offset_source_start(errors: &mut ParseErrorList, amt: usize) {
    if amt > 0 {
        for ref mut error in errors.iter_mut() {
            // Preserve the special meaning of -1 as 'unknown'.
            if error.source_start != SOURCE_LOCATION_UNKNOWN {
                error.source_start += amt;
            }
        }
    }
}

/// Maximum number of function calls.
pub const FISH_MAX_STACK_DEPTH: isize = 128;

/// Maximum number of nested string substitutions (in lieu of evals)
/// Reduced under TSAN: our CI test creates 500 jobs and this is very slow with TSAN.
#[cfg(feature = "tsan")]
pub const FISH_MAX_EVAL_DEPTH: isize = 250;
#[cfg(not(feature = "tsan"))]
pub const FISH_MAX_EVAL_DEPTH: isize = 500;

localizable_consts!(
    /// Error message on a function that calls itself immediately.
    pub INFINITE_FUNC_RECURSION_ERR_MSG
    "The function '%s' calls itself immediately, which would result in an infinite loop."

    /// Error message on reaching maximum call stack depth.
    pub CALL_STACK_LIMIT_EXCEEDED_ERR_MSG
    "The call stack limit has been exceeded. Do you have an accidental infinite loop?"

    /// Error message when encountering an unknown builtin name.
    pub UNKNOWN_BUILTIN_ERR_MSG
    "Unknown builtin '%s'"

    /// Error message when encountering a failed expansion, e.g. for the variable name in for loops.
    pub FAILED_EXPANSION_VARIABLE_NAME_ERR_MSG
    "Unable to expand variable name '%s'"

    /// Error message when encountering an illegal file descriptor.
    pub ILLEGAL_FD_ERR_MSG
    "Illegal file descriptor in redirection '%s'"

    /// Error message for wildcards with no matches.
    pub WILDCARD_ERR_MSG
    "No matches for wildcard '%s'. See `help %s`."

    /// Error when using break outside of loop.
    pub INVALID_BREAK_ERR_MSG
    "'break' while not inside of loop"

    /// Error when using continue outside of loop.
    pub INVALID_CONTINUE_ERR_MSG
    "'continue' while not inside of loop"

    /// Error message when a command may not be in a pipeline.
    pub INVALID_PIPELINE_CMD_ERR_MSG
    "The '%s' command can not be used in a pipeline"

    // Error messages. The number is a reminder of how many format specifiers are contained.

    /// Error for $^.
    pub ERROR_BAD_VAR_CHAR1
    "$%c is not a valid variable in fish."

    /// Error for ${a}.
    pub ERROR_BRACKETED_VARIABLE1
    "Variables cannot be bracketed. In fish, please use {$%s}."

    /// Error for "${a}".
    pub ERROR_BRACKETED_VARIABLE_QUOTED1
    "Variables cannot be bracketed. In fish, please use \"$%s\"."

    /// Error issued on $?.
    pub ERROR_NOT_STATUS
    "$? is not the exit status. In fish, please use $status."

    /// Error issued on $$.
    pub ERROR_NOT_PID
    "$$ is not the pid. In fish, please use $fish_pid."

    /// Error issued on $#.
    pub ERROR_NOT_ARGV_COUNT
    "$# is not supported. In fish, please use 'count $argv'."

    /// Error issued on $@.
    pub ERROR_NOT_ARGV_AT
    "$@ is not supported. In fish, please use $argv."

    /// Error issued on $*.
    pub ERROR_NOT_ARGV_STAR
    "$* is not supported. In fish, please use $argv."

    /// Error issued on $.
    pub ERROR_NO_VAR_NAME
    "Expected a variable name after this $."

    /// Error message for Posix-style assignment: foo=bar.
    pub ERROR_BAD_COMMAND_ASSIGN_ERR_MSG
    "Unsupported use of '='. In fish, please use 'set %s %s'."

    /// Error message for a command like `time foo &`.
    pub ERROR_TIME_BACKGROUND
    "'time' is not supported for background jobs. Consider using 'command time'."
);