kaish-kernel 0.8.2

Core kernel for kaish: lexer, parser, interpreter, and runtime
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
//! AST type definitions.

use std::fmt;

/// A complete kaish program is a sequence of statements.
#[derive(Debug, Clone, PartialEq)]
pub struct Program {
    pub statements: Vec<Stmt>,
}

/// A single statement in kaish.
#[derive(Debug, Clone, PartialEq)]
pub enum Stmt {
    /// Variable assignment: `NAME=value` or `local NAME = value`
    Assignment(Assignment),
    /// Simple command: `tool arg1 arg2`
    Command(Command),
    /// Pipeline: `a | b | c`
    Pipeline(Pipeline),
    /// Conditional: `if cond; then ...; fi`
    If(IfStmt),
    /// Loop: `for X in items; do ...; done`
    For(ForLoop),
    /// While loop: `while cond; do ...; done`
    While(WhileLoop),
    /// Case statement: `case expr in pattern) ... ;; esac`
    Case(CaseStmt),
    /// Break out of loop: `break` or `break N`
    Break(Option<usize>),
    /// Continue to next iteration: `continue` or `continue N`
    Continue(Option<usize>),
    /// Return from tool: `return` or `return expr`
    Return(Option<Box<Expr>>),
    /// Exit the script: `exit` or `exit code`
    Exit(Option<Box<Expr>>),
    /// Tool definition: `tool name(params) { body }`
    ToolDef(ToolDef),
    /// Test expression: `[[ -f path ]]` or `[[ $X == "value" ]]`
    Test(TestExpr),
    /// Statement chain with `&&`: run right only if left succeeds
    AndChain { left: Box<Stmt>, right: Box<Stmt> },
    /// Statement chain with `||`: run right only if left fails
    OrChain { left: Box<Stmt>, right: Box<Stmt> },
    /// Inline env prefix: `NAME=value... command`. The assignments are exported
    /// for the duration of `body` only (bash-style command-scoped environment)
    /// and do not persist after it — distinct from a plain `Assignment`, which
    /// is persistent. `body` is always a command or pipeline.
    EnvScoped { assignments: Vec<Assignment>, body: Box<Stmt> },
    /// Empty statement (newline or semicolon only)
    Empty,
}

impl Stmt {
    /// Human-readable variant name for tracing spans.
    pub fn kind_name(&self) -> &'static str {
        match self {
            Stmt::Assignment(_) => "assignment",
            Stmt::Command(_) => "command",
            Stmt::Pipeline(_) => "pipeline",
            Stmt::If(_) => "if",
            Stmt::For(_) => "for",
            Stmt::While(_) => "while",
            Stmt::Case(_) => "case",
            Stmt::Break(_) => "break",
            Stmt::Continue(_) => "continue",
            Stmt::Return(_) => "return",
            Stmt::Exit(_) => "exit",
            Stmt::ToolDef(_) => "tooldef",
            Stmt::Test(_) => "test",
            Stmt::AndChain { .. } => "and_chain",
            Stmt::OrChain { .. } => "or_chain",
            Stmt::EnvScoped { .. } => "env_scoped",
            Stmt::Empty => "empty",
        }
    }
}

/// Variable assignment: `NAME=value` (bash-style) or `local NAME = value` (scoped)
#[derive(Debug, Clone, PartialEq)]
pub struct Assignment {
    pub name: String,
    pub value: Expr,
    /// True if declared with `local` keyword (explicit local scope)
    pub local: bool,
}

/// A command invocation with arguments and redirections.
#[derive(Debug, Clone, PartialEq)]
pub struct Command {
    pub name: String,
    pub args: Vec<Arg>,
    pub redirects: Vec<Redirect>,
}

/// A pipeline of commands connected by pipes.
#[derive(Debug, Clone, PartialEq)]
pub struct Pipeline {
    pub commands: Vec<Command>,
    pub background: bool,
}

/// Conditional statement.
#[derive(Debug, Clone, PartialEq)]
pub struct IfStmt {
    pub condition: Box<Expr>,
    pub then_branch: Vec<Stmt>,
    pub else_branch: Option<Vec<Stmt>>,
}

/// For loop over items.
#[derive(Debug, Clone, PartialEq)]
pub struct ForLoop {
    pub variable: String,
    /// Items to iterate over. Each is evaluated, then word-split for iteration.
    pub items: Vec<Expr>,
    pub body: Vec<Stmt>,
}

/// While loop with condition.
#[derive(Debug, Clone, PartialEq)]
pub struct WhileLoop {
    pub condition: Box<Expr>,
    pub body: Vec<Stmt>,
}

/// Case statement for pattern matching.
///
/// ```kaish
/// case $VAR in
///     pattern1) commands ;;
///     pattern2|pattern3) commands ;;
///     *) default ;;
/// esac
/// ```
#[derive(Debug, Clone, PartialEq)]
pub struct CaseStmt {
    /// The expression to match against
    pub expr: Expr,
    /// The pattern branches
    pub branches: Vec<CaseBranch>,
}

/// A single branch in a case statement.
#[derive(Debug, Clone, PartialEq)]
pub struct CaseBranch {
    /// Glob patterns to match (separated by `|`)
    pub patterns: Vec<String>,
    /// Commands to execute if matched
    pub body: Vec<Stmt>,
}

/// User-defined tool.
#[derive(Debug, Clone, PartialEq)]
pub struct ToolDef {
    pub name: String,
    pub params: Vec<ParamDef>,
    pub body: Vec<Stmt>,
}

/// Parameter definition for a tool.
#[derive(Debug, Clone, PartialEq)]
pub struct ParamDef {
    pub name: String,
    pub param_type: Option<ParamType>,
    pub default: Option<Expr>,
}

/// Parameter type annotation.
#[derive(Debug, Clone, PartialEq)]
pub enum ParamType {
    String,
    Int,
    Float,
    Bool,
}

/// A command argument (positional or named).
#[derive(Debug, Clone, PartialEq)]
pub enum Arg {
    /// Positional argument: `value`
    Positional(Expr),
    /// Long flag with attached value: `--key=value`. Always routes through
    /// `tool_args.named` regardless of the receiving command.
    Named { key: String, value: Expr },
    /// Bareword shell-assignment in argv position: `key=value`.
    ///
    /// Only commands on the kernel's shell-assignment allowlist (`export`,
    /// `alias`) consume this as a named arg; for every other command it's
    /// stringified to a positional `"key=value"`. This matches bash:
    /// `cat foo=bar` opens a file named `foo=bar`, not a magical key=value.
    WordAssign { key: String, value: Expr },
    /// Short flag: `-l`, `-v` (boolean flag)
    ShortFlag(String),
    /// Long flag: `--force`, `--verbose` (boolean flag)
    LongFlag(String),
    /// Double-dash marker: `--` - signals end of flags
    DoubleDash,
}

/// I/O redirection.
#[derive(Debug, Clone, PartialEq)]
pub struct Redirect {
    pub kind: RedirectKind,
    pub target: Expr,
}

/// Type of redirection.
#[derive(Debug, Clone, PartialEq)]
pub enum RedirectKind {
    /// `>` stdout to file (overwrite)
    StdoutOverwrite,
    /// `>>` stdout to file (append)
    StdoutAppend,
    /// `<` stdin from file
    Stdin,
    /// `<<EOF ... EOF` stdin from here-doc
    HereDoc,
    /// `<<< word` stdin from here-string (bash-style)
    HereString,
    /// `2>` stderr to file
    Stderr,
    /// `&>` both stdout and stderr to file
    Both,
    /// `2>&1` merge stderr into stdout
    MergeStderr,
    /// `1>&2` or `>&2` merge stdout into stderr
    MergeStdout,
}

/// A `StringPart` together with its byte offset in the original source.
///
/// Used by [`Expr::HereDocBody`] so the validator and interpreter can attribute
/// diagnostics to a precise location inside an interpolated heredoc body.
/// Double-quoted strings continue to use the spanless [`Expr::Interpolated`];
/// universal spanning is a separate, larger refactor (see plan
/// `make-heredocs-precious-puzzle`).
#[derive(Debug, Clone, PartialEq)]
pub struct SpannedPart {
    /// The part itself.
    pub part: StringPart,
    /// Byte offset of this part in the original source string.
    pub offset: usize,
    /// Byte length of the part's source representation.
    pub len: usize,
}

/// An expression that evaluates to a value.
#[derive(Debug, Clone, PartialEq)]
pub enum Expr {
    /// Literal value
    Literal(Value),
    /// Variable reference: `${VAR}` or `${VAR.field}` or `$VAR`
    VarRef(VarPath),
    /// String with interpolation: `"hello ${NAME}"` or `"hello $NAME"`
    Interpolated(Vec<StringPart>),
    /// Interpolated heredoc body with per-part spans for diagnostic precision.
    ///
    /// Heredoc bodies use this variant; double-quoted strings still use
    /// `Interpolated` to keep the existing path untouched. `strip_tabs` is
    /// `true` for the `<<-EOF` form — leading tabs on each body line are
    /// stripped from `StringPart::Literal` content at materialization time
    /// (POSIX semantics); offsets in `parts` reference the verbatim source
    /// so spans remain meaningful.
    HereDocBody {
        parts: Vec<SpannedPart>,
        strip_tabs: bool,
    },
    /// Binary operation: `a && b`, `a || b`
    BinaryOp {
        left: Box<Expr>,
        op: BinaryOp,
        right: Box<Expr>,
    },
    /// Command substitution: `$(...)` — runs a statement block (the full grammar:
    /// pipelines, `&&`/`||` chains, `;`/newline sequences, `#` comments) and
    /// returns its accumulated stdout. A single `$(cmd)` is a one-statement block.
    CommandSubst(Vec<Stmt>),
    /// Test expression: `[[ -f path ]]` or `[[ $X == "value" ]]`
    Test(Box<TestExpr>),
    /// Positional parameter: `$0` through `$9`
    Positional(usize),
    /// All positional arguments: `$@`
    AllArgs,
    /// Argument count: `$#`
    ArgCount,
    /// Variable string length: `${#VAR}`
    VarLength(String),
    /// Variable with default: `${VAR:-default}` - use default if VAR is unset or empty
    /// The default can contain nested variable expansions and command substitutions
    VarWithDefault { name: String, default: Vec<StringPart> },
    /// Arithmetic expansion: `$((expr))` - evaluates to integer
    Arithmetic(String),
    /// Command as condition: `if grep -q pattern file; then` - exit code determines truthiness
    Command(Command),
    /// Last exit code: `$?`
    LastExitCode,
    /// Current shell PID: `$$`
    CurrentPid,
    /// Bare glob pattern: `*.txt`, `src/**/*.rs` — expanded during arg building
    GlobPattern(String),
}

/// Test expression for `[[ ... ]]` conditionals.
#[derive(Debug, Clone, PartialEq)]
pub enum TestExpr {
    /// File test: `[[ -f path ]]`, `[[ -d path ]]`, etc.
    FileTest { op: FileTestOp, path: Box<Expr> },
    /// String test: `[[ -z str ]]`, `[[ -n str ]]`
    StringTest { op: StringTestOp, value: Box<Expr> },
    /// Comparison: `[[ $X == "value" ]]`, `[[ $NUM -gt 5 ]]`
    Comparison { left: Box<Expr>, op: TestCmpOp, right: Box<Expr> },
    /// Logical AND: `[[ -f a && -d b ]]` (short-circuit evaluation)
    And { left: Box<TestExpr>, right: Box<TestExpr> },
    /// Logical OR: `[[ -f a || -d b ]]` (short-circuit evaluation)
    Or { left: Box<TestExpr>, right: Box<TestExpr> },
    /// Logical NOT: `[[ ! -f file ]]`
    Not { expr: Box<TestExpr> },
}

/// File test operators for `[[ ]]`.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum FileTestOp {
    /// `-e` - exists
    Exists,
    /// `-f` - is regular file
    IsFile,
    /// `-d` - is directory
    IsDir,
    /// `-r` - is readable
    Readable,
    /// `-w` - is writable
    Writable,
    /// `-x` - is executable
    Executable,
}

/// String test operators for `[[ ]]`.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum StringTestOp {
    /// `-z` - string is empty
    IsEmpty,
    /// `-n` - string is non-empty
    IsNonEmpty,
}

/// Comparison operators for `[[ ]]` tests.
///
/// Mirrors POSIX `[[ ]]` semantics: `==`/`!=`/`>`/`<`/`>=`/`<=` are string
/// (lexicographic) comparisons, while `-eq`/`-ne`/`-gt`/`-lt`/`-ge`/`-le`
/// are arithmetic comparisons that coerce string operands to numbers.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TestCmpOp {
    /// `==` / `=` — string equality
    Eq,
    /// `!=` — string inequality
    NotEq,
    /// `=~` — regex match
    Match,
    /// `!~` — regex not match
    NotMatch,
    /// `>` — string greater than (lexicographic)
    Gt,
    /// `<` — string less than (lexicographic)
    Lt,
    /// `>=` — string greater than or equal (lexicographic)
    GtEq,
    /// `<=` — string less than or equal (lexicographic)
    LtEq,
    /// `-eq` — numeric equality
    NumEq,
    /// `-ne` — numeric inequality
    NumNotEq,
    /// `-gt` — numeric greater than
    NumGt,
    /// `-lt` — numeric less than
    NumLt,
    /// `-ge` — numeric greater than or equal
    NumGtEq,
    /// `-le` — numeric less than or equal
    NumLtEq,
}

// Value and BlobRef live in kaish-types.
pub use kaish_types::{BlobRef, Value};

/// Variable reference path: `${VAR}` or `${VAR.field}`.
///
/// `$?` resolves to the previous command's exit code as an int. Field access
/// on `$?` is rejected by the validator (use `kaish-last` for structured data).
/// Array indexing is not supported — use `jq` for JSON processing.
#[derive(Debug, Clone, PartialEq)]
pub struct VarPath {
    pub segments: Vec<VarSegment>,
}

impl VarPath {
    /// Create a simple variable reference with just a name.
    pub fn simple(name: impl Into<String>) -> Self {
        Self {
            segments: vec![VarSegment::Field(name.into())],
        }
    }
}

/// A segment in a variable path.
#[derive(Debug, Clone, PartialEq)]
pub enum VarSegment {
    /// Field access: `.field` or initial name
    /// Only supported for special variables like `$?`
    Field(String),
}

/// Part of an interpolated string.
#[derive(Debug, Clone, PartialEq)]
pub enum StringPart {
    /// Literal text
    Literal(String),
    /// Variable interpolation: `${VAR}` or `$VAR`
    Var(VarPath),
    /// Variable with default: `${VAR:-default}` where default can contain nested expansions
    VarWithDefault { name: String, default: Vec<StringPart> },
    /// Variable string length: `${#VAR}`
    VarLength(String),
    /// Positional parameter: `$0`, `$1`, ..., `$9`
    Positional(usize),
    /// All arguments: `$@`
    AllArgs,
    /// Argument count: `$#`
    ArgCount,
    /// Arithmetic expansion: `$((expr))`
    Arithmetic(String),
    /// Command substitution: `$(...)` embedded in a string — runs a statement
    /// block (full grammar; see `Expr::CommandSubst`) and inlines its stdout.
    CommandSubst(Vec<Stmt>),
    /// Last exit code: `$?`
    LastExitCode,
    /// Current shell PID: `$$`
    CurrentPid,
}

/// Binary operators used to chain command/test conditions with `&&` / `||`.
///
/// Value-level comparisons (`==`, `-eq`, `-gt`, …) live on
/// [`TestCmpOp`] inside `[[ ]]` and are not part of this enum.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BinaryOp {
    /// `&&` - logical and (short-circuit)
    And,
    /// `||` - logical or (short-circuit)
    Or,
}

impl fmt::Display for BinaryOp {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            BinaryOp::And => write!(f, "&&"),
            BinaryOp::Or => write!(f, "||"),
        }
    }
}

impl fmt::Display for RedirectKind {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            RedirectKind::StdoutOverwrite => write!(f, ">"),
            RedirectKind::StdoutAppend => write!(f, ">>"),
            RedirectKind::Stdin => write!(f, "<"),
            RedirectKind::HereDoc => write!(f, "<<"),
            RedirectKind::HereString => write!(f, "<<<"),
            RedirectKind::Stderr => write!(f, "2>"),
            RedirectKind::Both => write!(f, "&>"),
            RedirectKind::MergeStderr => write!(f, "2>&1"),
            RedirectKind::MergeStdout => write!(f, "1>&2"),
        }
    }
}

impl fmt::Display for FileTestOp {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            FileTestOp::Exists => write!(f, "-e"),
            FileTestOp::IsFile => write!(f, "-f"),
            FileTestOp::IsDir => write!(f, "-d"),
            FileTestOp::Readable => write!(f, "-r"),
            FileTestOp::Writable => write!(f, "-w"),
            FileTestOp::Executable => write!(f, "-x"),
        }
    }
}

impl fmt::Display for StringTestOp {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            StringTestOp::IsEmpty => write!(f, "-z"),
            StringTestOp::IsNonEmpty => write!(f, "-n"),
        }
    }
}

impl fmt::Display for TestCmpOp {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            TestCmpOp::Eq => write!(f, "=="),
            TestCmpOp::NotEq => write!(f, "!="),
            TestCmpOp::Match => write!(f, "=~"),
            TestCmpOp::NotMatch => write!(f, "!~"),
            TestCmpOp::Gt => write!(f, ">"),
            TestCmpOp::Lt => write!(f, "<"),
            TestCmpOp::GtEq => write!(f, ">="),
            TestCmpOp::LtEq => write!(f, "<="),
            TestCmpOp::NumEq => write!(f, "-eq"),
            TestCmpOp::NumNotEq => write!(f, "-ne"),
            TestCmpOp::NumGt => write!(f, "-gt"),
            TestCmpOp::NumLt => write!(f, "-lt"),
            TestCmpOp::NumGtEq => write!(f, "-ge"),
            TestCmpOp::NumLtEq => write!(f, "-le"),
        }
    }
}