kaish_kernel/scheduler/
pipeline.rs

1//! Pipeline execution for kaish.
2//!
3//! Executes a sequence of commands connected by pipes, where the stdout
4//! of each command becomes the stdin of the next.
5//!
6//! Also handles scatter/gather pipelines for parallel execution.
7
8use std::sync::Arc;
9
10use std::collections::HashMap;
11
12use crate::arithmetic;
13use crate::ast::{Arg, Command, Expr, Redirect, RedirectKind, Value};
14use crate::dispatch::{CommandDispatcher, PipelinePosition};
15use crate::interpreter::ExecResult;
16use crate::tools::{ExecContext, ToolArgs, ToolRegistry, ToolSchema};
17use tokio::io::AsyncWriteExt;
18
19use super::pipe_stream::pipe_stream_default;
20use super::scatter::{
21    parse_gather_options, parse_scatter_options, ScatterGatherRunner,
22};
23
24/// Apply redirects to an execution result.
25///
26/// Pre-execution redirects (Stdin, HereDoc) should be handled before calling.
27/// Post-execution redirects (stdout/stderr to file, merge) applied here.
28/// Redirects are processed left-to-right per POSIX.
29async fn apply_redirects(
30    mut result: ExecResult,
31    redirects: &[Redirect],
32    ctx: &ExecContext,
33) -> ExecResult {
34    // Defer materialization of OutputData → result.out to individual redirect
35    // handlers. File redirects (Overwrite/Append) can stream OutputData directly
36    // to disk via write_canonical(), avoiding OOM on large structured output.
37    // Merge redirects and the fallthrough path materialize on demand.
38    for redir in redirects {
39        match redir.kind {
40            RedirectKind::MergeStderr => {
41                // 2>&1 - append stderr to stdout
42                // Ensure output is materialized for merge
43                result.materialize();
44                if !result.err.is_empty() {
45                    let err = std::mem::take(&mut result.err);
46                    result.push_out(&err);
47                }
48            }
49            RedirectKind::MergeStdout => {
50                // 1>&2 or >&2 - append stdout to stderr
51                result.materialize();
52                if !result.text_out().is_empty() {
53                    let out = result.text_out().into_owned();
54                    result.err.push_str(&out);
55                    result.clear_out();
56                }
57            }
58            RedirectKind::StdoutOverwrite => {
59                let path = match eval_redirect_target(&redir.target, ctx).await {
60                    Ok(p) => p,
61                    Err(e) => return ExecResult::failure(1, format!("redirect: {e}")),
62                };
63                // Stream OutputData directly to file if available
64                if let Some(output) = result.take_output_for_stream() {
65                    let mut buf = Vec::new();
66                    if let Err(e) = output.write_canonical(&mut buf, None) {
67                        return ExecResult::failure(1, format!("redirect: {e}"));
68                    }
69                    if let Err(e) = redirect_write(ctx, &path, &buf).await {
70                        return ExecResult::failure(1, format!("redirect: {e}"));
71                    }
72                } else {
73                    if let Err(e) = redirect_write(ctx, &path, result.text_out().as_bytes()).await {
74                        return ExecResult::failure(1, format!("redirect: {e}"));
75                    }
76                }
77                result.clear_out();
78                result.set_output(None);
79            }
80            RedirectKind::StdoutAppend => {
81                let path = match eval_redirect_target(&redir.target, ctx).await {
82                    Ok(p) => p,
83                    Err(e) => return ExecResult::failure(1, format!("redirect: {e}")),
84                };
85                // Stream OutputData directly if available
86                if let Some(output) = result.take_output_for_stream() {
87                    let mut buf = Vec::new();
88                    if let Err(e) = output.write_canonical(&mut buf, None) {
89                        return ExecResult::failure(1, format!("redirect: {e}"));
90                    }
91                    if let Err(e) = redirect_append(ctx, &path, &buf).await {
92                        return ExecResult::failure(1, format!("redirect: {e}"));
93                    }
94                } else {
95                    if let Err(e) = redirect_append(ctx, &path, result.text_out().as_bytes()).await {
96                        return ExecResult::failure(1, format!("redirect: {e}"));
97                    }
98                }
99                result.clear_out();
100                result.set_output(None);
101            }
102            RedirectKind::Stderr => {
103                let path = match eval_redirect_target(&redir.target, ctx).await {
104                    Ok(p) => p,
105                    Err(e) => return ExecResult::failure(1, format!("redirect: {e}")),
106                };
107                if let Err(e) = redirect_write(ctx, &path, result.err.as_bytes()).await {
108                    return ExecResult::failure(1, format!("redirect: {e}"));
109                }
110                result.err.clear();
111            }
112            RedirectKind::Both => {
113                let path = match eval_redirect_target(&redir.target, ctx).await {
114                    Ok(p) => p,
115                    Err(e) => return ExecResult::failure(1, format!("redirect: {e}")),
116                };
117                let combined = format!("{}{}", result.text_out(), result.err);
118                if let Err(e) = redirect_write(ctx, &path, combined.as_bytes()).await {
119                    return ExecResult::failure(1, format!("redirect: {e}"));
120                }
121                result.clear_out();
122                result.set_output(None);
123                result.err.clear();
124            }
125            // Pre-execution redirects - already handled before command execution
126            RedirectKind::Stdin | RedirectKind::HereDoc | RedirectKind::HereString => {}
127        }
128    }
129    // Materialize any remaining OutputData into result.out.
130    // Callers (accumulate_result, pipeline piping) expect .out to be populated
131    // after apply_redirects returns. File redirects above consume .output directly
132    // via streaming; this only fires when no redirect consumed it.
133    result.materialize();
134    result
135}
136
137/// Evaluate a redirect target expression to get the file path (or heredoc body).
138///
139/// Routes through `ctx.dispatcher` so command substitution (`$(...)`) in the
140/// target runs — e.g. `cat < $(echo f)`, `echo x > $(echo f)`, and `$(...)`
141/// inside a heredoc body. Falls back to the sync evaluator (which skips
142/// command substitution) only when no dispatcher is attached.
143async fn eval_redirect_target(expr: &Expr, ctx: &ExecContext) -> Result<String, String> {
144    if let Some(dispatcher) = &ctx.dispatcher {
145        dispatcher
146            .eval_expr(expr, ctx)
147            .await
148            .map(|v| value_to_string(&v))
149            .map_err(|e| e.to_string())
150    } else {
151        eval_simple_expr(expr, ctx)
152            .map(|v| value_to_string(&v))
153            .ok_or_else(|| "could not evaluate redirect target".to_string())
154    }
155}
156
157/// Write data to a file via the VFS backend.
158///
159/// The redirect target is resolved against `ctx.cwd` (like every other path
160/// operand — see `cat`/`cp`/etc.), so a relative `> f` write and a later
161/// relative read agree on the same `$PWD/f`. Without this the router would
162/// normalize a bare relative path to `/f`, diverging from cwd-resolved reads.
163async fn redirect_write(ctx: &ExecContext, path: &str, data: &[u8]) -> Result<(), String> {
164    use crate::backend::WriteMode;
165    let resolved = ctx.resolve_path(path);
166    ctx.backend.write(&resolved, data, WriteMode::Overwrite).await.map_err(|e| e.to_string())
167}
168
169/// Append data to a file via the VFS backend.
170///
171/// Resolves the target against `ctx.cwd` for the same reason as `redirect_write`.
172async fn redirect_append(ctx: &ExecContext, path: &str, data: &[u8]) -> Result<(), String> {
173    let resolved = ctx.resolve_path(path);
174    ctx.backend.append(&resolved, data).await.map_err(|e| e.to_string())
175}
176
177/// Set up stdin from redirects (< file, <<heredoc).
178/// Called before command execution.
179///
180/// `< file` reads through the VFS backend (not the host filesystem) with the
181/// target resolved against `ctx.cwd`, mirroring how `cat` and the output
182/// redirects resolve their operands. A missing/unreadable file or non-UTF-8
183/// content is a hard error — we never silently feed the command empty stdin.
184async fn setup_stdin_redirects(cmd: &Command, ctx: &mut ExecContext) -> Result<(), String> {
185    use std::path::Path;
186    for redir in &cmd.redirects {
187        match &redir.kind {
188            RedirectKind::Stdin => {
189                let path = eval_redirect_target(&redir.target, ctx).await?;
190                let resolved = ctx.resolve_path(&path);
191                let data = ctx
192                    .backend
193                    .read(Path::new(&resolved), None)
194                    .await
195                    .map_err(|e| format!("redirect: {path}: {e}"))?;
196                let content = String::from_utf8(data)
197                    .map_err(|_| format!("redirect: {path}: invalid UTF-8"))?;
198                ctx.set_stdin(content);
199            }
200            RedirectKind::HereDoc => {
201                match &redir.target {
202                    Expr::Literal(Value::String(content)) => {
203                        ctx.set_stdin(content.clone());
204                    }
205                    // Heredoc bodies may contain `$(...)`; route through the
206                    // dispatcher so command substitution runs.
207                    expr => {
208                        let body = eval_redirect_target(expr, ctx).await?;
209                        ctx.set_stdin(body);
210                    }
211                }
212            }
213            RedirectKind::HereString => {
214                // Per bash, here-strings append a trailing newline to the
215                // expanded word so the command receives a terminated line.
216                let mut s = eval_redirect_target(&redir.target, ctx).await?;
217                s.push('\n');
218                ctx.set_stdin(s);
219            }
220            _ => {}
221        }
222    }
223    Ok(())
224}
225
226/// Runs pipelines by spawning tasks and connecting them via channels.
227#[derive(Clone)]
228pub struct PipelineRunner {
229    tools: Arc<ToolRegistry>,
230}
231
232impl PipelineRunner {
233    /// Create a new pipeline runner with the given tool registry.
234    pub fn new(tools: Arc<ToolRegistry>) -> Self {
235        Self { tools }
236    }
237
238    /// Execute a pipeline of commands.
239    ///
240    /// Each command's stdout becomes the next command's stdin.
241    /// If the pipeline contains scatter/gather, delegates to ScatterGatherRunner.
242    /// Returns the result of the last command in the pipeline.
243    ///
244    /// The `dispatcher` handles the full command resolution chain (user tools,
245    /// builtins, scripts, external commands, backend tools). The runner handles
246    /// I/O routing: stdin redirects, piping between commands, and output redirects.
247    #[tracing::instrument(level = "debug", skip(self, commands, ctx, dispatcher), fields(command_count = commands.len()))]
248    pub async fn run(
249        &self,
250        commands: &[Command],
251        ctx: &mut ExecContext,
252        dispatcher: &dyn CommandDispatcher,
253    ) -> ExecResult {
254        if commands.is_empty() {
255            return ExecResult::success("");
256        }
257
258        // Check for scatter/gather pipeline
259        if let Some((scatter_idx, gather_idx)) = find_scatter_gather(commands) {
260            return self.run_scatter_gather(commands, scatter_idx, gather_idx, ctx, dispatcher).await;
261        }
262
263        self.run_sequential(commands, ctx, dispatcher).await
264    }
265
266    /// Execute commands sequentially without scatter/gather detection.
267    ///
268    /// Used by `ScatterGatherRunner` for pre_scatter, post_gather, and parallel
269    /// workers. Breaks the async recursion chain (`run` → scatter → `run`).
270    #[tracing::instrument(level = "debug", skip(self, commands, ctx, dispatcher), fields(command_count = commands.len()))]
271    pub async fn run_sequential(
272        &self,
273        commands: &[Command],
274        ctx: &mut ExecContext,
275        dispatcher: &dyn CommandDispatcher,
276    ) -> ExecResult {
277        if commands.is_empty() {
278            return ExecResult::success("");
279        }
280
281        if commands.len() == 1 {
282            // Single command, no piping needed
283            return self.run_single(&commands[0], ctx, None, dispatcher).await;
284        }
285
286        // Multi-command pipeline
287        self.run_pipeline(commands, ctx, dispatcher).await
288    }
289
290    /// Run a scatter/gather pipeline.
291    async fn run_scatter_gather(
292        &self,
293        commands: &[Command],
294        scatter_idx: usize,
295        gather_idx: usize,
296        ctx: &mut ExecContext,
297        dispatcher: &dyn CommandDispatcher,
298    ) -> ExecResult {
299        // Split pipeline into parts
300        let pre_scatter = &commands[..scatter_idx];
301        let scatter_cmd = &commands[scatter_idx];
302        let parallel = &commands[scatter_idx + 1..gather_idx];
303        let gather_cmd = &commands[gather_idx];
304        let post_gather = &commands[gather_idx + 1..];
305
306        // Parse options from scatter and gather commands
307        // These are builtins with simple key=value syntax, no schema-driven parsing needed
308        let scatter_schema = self.tools.get("scatter").map(|t| t.schema());
309        let gather_schema = self.tools.get("gather").map(|t| t.schema());
310        let scatter_opts = parse_scatter_options(&build_tool_args(&scatter_cmd.args, ctx, scatter_schema.as_ref()));
311        let gather_opts = parse_gather_options(&build_tool_args(&gather_cmd.args, ctx, gather_schema.as_ref()));
312
313        // We need an `Arc<dyn CommandDispatcher>` to hand to `ScatterGatherRunner`.
314        // `fork_attached` produces a subkernel whose cancellation token is a
315        // child of the parent's, so a parent timeout/cancel cascades into
316        // the scatter pipeline (and into worker children via further forks).
317        let sequential_dispatcher: Arc<dyn CommandDispatcher> = dispatcher.fork_attached().await;
318
319        let runner = ScatterGatherRunner::new(self.tools.clone(), sequential_dispatcher);
320        runner
321            .run(
322                pre_scatter,
323                scatter_opts,
324                parallel,
325                gather_opts,
326                post_gather,
327                ctx,
328            )
329            .await
330    }
331
332    /// Run a single command with optional stdin.
333    ///
334    /// The dispatcher handles arg parsing, schema lookup, output format, and execution.
335    /// The runner handles stdin setup (redirects + pipeline) and output redirects.
336    #[tracing::instrument(level = "debug", skip(self, cmd, ctx, stdin, dispatcher), fields(command = %cmd.name))]
337    async fn run_single(
338        &self,
339        cmd: &Command,
340        ctx: &mut ExecContext,
341        stdin: Option<String>,
342        dispatcher: &dyn CommandDispatcher,
343    ) -> ExecResult {
344        // Set up stdin from redirects (< file, <<heredoc)
345        if let Err(e) = setup_stdin_redirects(cmd, ctx).await {
346            return ExecResult::failure(1, e);
347        }
348
349        // Set stdin from pipeline (overrides redirect stdin)
350        if let Some(input) = stdin {
351            ctx.set_stdin(input);
352        }
353
354        // Set pipeline position for stdio inheritance decisions
355        ctx.pipeline_position = PipelinePosition::Only;
356
357        // Execute via dispatcher (full resolution chain)
358        let result = match dispatcher.dispatch(cmd, ctx).await {
359            Ok(result) => result,
360            Err(e) => ExecResult::failure(1, e.to_string()),
361        };
362
363        // Apply post-execution redirects
364        apply_redirects(result, &cmd.redirects, ctx).await
365    }
366
367    /// Run a multi-command pipeline concurrently.
368    ///
369    /// Each stage runs in its own tokio task, connected by bounded pipe streams
370    /// (64KB ring buffers with backpressure). This provides:
371    /// - Bounded memory usage (no buffering entire outputs)
372    /// - Backpressure (fast producers wait for slow consumers)
373    /// - Early termination (e.g., `seq 1 1000000 | head -n 5`)
374    ///
375    /// Structured data (`stdin_data`) is passed via oneshot channels alongside pipes.
376    #[tracing::instrument(level = "debug", skip(self, commands, ctx, dispatcher), fields(stage_count = commands.len()))]
377    async fn run_pipeline(
378        &self,
379        commands: &[Command],
380        ctx: &mut ExecContext,
381        dispatcher: &dyn CommandDispatcher,
382    ) -> ExecResult {
383        let stage_count = commands.len();
384        let last_idx = stage_count - 1;
385
386        // Create N-1 pipe pairs connecting adjacent stages
387        let mut pipe_writers: Vec<Option<super::pipe_stream::PipeWriter>> = Vec::new();
388        let mut pipe_readers: Vec<Option<super::pipe_stream::PipeReader>> = Vec::new();
389
390        for _ in 0..last_idx {
391            let (writer, reader) = pipe_stream_default();
392            pipe_writers.push(Some(writer));
393            pipe_readers.push(Some(reader));
394        }
395
396        // Create N-1 oneshot channels for structured data sideband
397        let mut data_senders: Vec<Option<tokio::sync::oneshot::Sender<Option<Value>>>> = Vec::new();
398        let mut data_receivers: Vec<Option<tokio::sync::oneshot::Receiver<Option<Value>>>> = Vec::new();
399
400        for _ in 0..last_idx {
401            let (tx, rx) = tokio::sync::oneshot::channel();
402            data_senders.push(Some(tx));
403            data_receivers.push(Some(rx));
404        }
405
406        let mut handles: Vec<tokio::task::JoinHandle<(ExecResult, ExecContext)>> = Vec::with_capacity(stage_count);
407
408        for (i, cmd) in commands.iter().enumerate() {
409            let mut stage_ctx = ctx.child_for_pipeline();
410            let cmd = cmd.clone();
411
412            // Fork attached: each concurrent pipeline stage needs independent
413            // mutable state, but cancellation should still cascade from the
414            // parent (so a request timeout kills externals running in any
415            // stage, not just the foreground one).
416            let task_dispatcher: Arc<dyn CommandDispatcher> = dispatcher.fork_attached().await;
417
418            // Set up stdin from redirects on the child context. A failure here
419            // (e.g. `cmd < missing`) fails this stage; surface it from inside
420            // the spawned task so the normal join/collection path reports it.
421            let stdin_setup = setup_stdin_redirects(&cmd, &mut stage_ctx).await;
422
423            // Wire pipe_stdin: stage 0 gets parent stdin (if no redirect), others get pipe reader
424            if i == 0 {
425                // First stage inherits the parent's stdin, but only if redirects didn't
426                // already set stdin (e.g., heredoc). Don't overwrite redirect-provided stdin.
427                if stage_ctx.stdin.is_none() {
428                    stage_ctx.stdin = ctx.stdin.take();
429                }
430                if stage_ctx.stdin_data.is_none() {
431                    stage_ctx.stdin_data = ctx.stdin_data.take();
432                }
433            } else {
434                // Intermediate/last stages read from pipe
435                stage_ctx.pipe_stdin = pipe_readers[i - 1].take();
436                // Structured data received via oneshot (resolved at start of execution)
437            }
438
439            // Wire pipe_stdout: last stage writes to ExecResult, others write to pipe
440            if i < last_idx {
441                stage_ctx.pipe_stdout = pipe_writers[i].take();
442            }
443
444            // Set pipeline position
445            stage_ctx.pipeline_position = match i {
446                0 => PipelinePosition::First,
447                n if n == last_idx => PipelinePosition::Last,
448                _ => PipelinePosition::Middle,
449            };
450
451            let data_sender = if i < last_idx { data_senders[i].take() } else { None };
452            let data_receiver = if i > 0 { data_receivers[i - 1].take() } else { None };
453
454            // Propagate the embedder's trace context across the spawn boundary
455            // so each concurrent stage's spans stay in the same trace.
456            let handle: tokio::task::JoinHandle<(ExecResult, ExecContext)> =
457                tokio::spawn(crate::telemetry::bind_current_context(async move {
458                // A stdin-redirect setup failure short-circuits this stage.
459                if let Err(e) = stdin_setup {
460                    return (ExecResult::failure(1, e), stage_ctx);
461                }
462
463                // Receive structured data from previous stage (non-blocking).
464                // Using try_recv avoids a deadlock: streaming builtins (e.g. grep)
465                // write to their pipe_stdout during dispatch. If we blocked here
466                // waiting for the upstream's oneshot (sent after dispatch), the
467                // downstream couldn't start draining the pipe → circular wait.
468                // Builtins that use stdin_data (e.g. jq) fall back to pipe text.
469                if let Some(mut rx) = data_receiver {
470                    if let Ok(data) = rx.try_recv() {
471                        stage_ctx.stdin_data = data;
472                    }
473                    // Err → not ready yet; builtin will read from pipe text
474                }
475
476                // Execute the command
477                let mut result = match task_dispatcher.dispatch(&cmd, &mut stage_ctx).await {
478                    Ok(result) => result,
479                    Err(e) => ExecResult::failure(1, e.to_string()),
480                };
481
482                // Apply post-execution redirects
483                result = apply_redirects(result, &cmd.redirects, &stage_ctx).await;
484
485                // Flush buffered stderr to the kernel's stderr stream.
486                // This delivers error output from intermediate pipeline stages
487                // in real-time (via the kernel drain) instead of silently discarding it.
488                // Redirects like 2>&1 have already cleared result.err, so merged
489                // stderr goes through the pipe as expected.
490                if !result.err.is_empty() {
491                    if let Some(ref stderr) = stage_ctx.stderr {
492                        stderr.write_str(&result.err);
493                        result.err.clear();
494                    }
495                }
496
497                // Send structured data to next stage via oneshot BEFORE pipe write.
498                // The pipe write may block on backpressure (>64KB output), and the
499                // consumer awaits this oneshot before starting execution. Sending
500                // first prevents a circular wait (producer blocked on pipe write,
501                // consumer blocked on oneshot).
502                if let Some(tx) = data_sender {
503                    let _ = tx.send(result.data.clone());
504                }
505
506                // Write output to pipe for next stage (if not last).
507                // Consumer is now unblocked and can drain concurrently.
508                if let Some(mut pipe_out) = stage_ctx.pipe_stdout.take() {
509                    let text = result.text_out();
510                    if !text.is_empty() {
511                        // Write result to pipe; ignore broken pipe (reader dropped early)
512                        let _ = pipe_out.write_all(text.as_bytes()).await;
513                        let _ = pipe_out.shutdown().await;
514                    }
515                    // Drop pipe_out signals EOF to next stage's reader
516                }
517
518                (result, stage_ctx)
519            }));
520
521            handles.push(handle);
522        }
523
524        // Await all stages and return last stage's result.
525        // Sync the last stage's scope back to the parent context so that
526        // variable assignments in the last pipeline stage are visible
527        // (e.g., `echo "Alice" | read NAME`).
528        let mut last_result = ExecResult::success("");
529        let mut panics: Vec<String> = Vec::new();
530        for (i, handle) in handles.into_iter().enumerate() {
531            match handle.await {
532                Ok((result, stage_ctx)) => {
533                    if i == last_idx {
534                        last_result = result;
535                        // Sync last stage's scope and cwd changes back
536                        ctx.scope = stage_ctx.scope;
537                        ctx.cwd = stage_ctx.cwd;
538                        ctx.prev_cwd = stage_ctx.prev_cwd;
539                        ctx.aliases = stage_ctx.aliases;
540                    }
541                }
542                Err(e) => {
543                    panics.push(format!("stage {}: {}", i, e));
544                }
545            }
546        }
547
548        if !panics.is_empty() {
549            last_result = ExecResult::failure(
550                1,
551                format!("pipeline stage(s) panicked: {}", panics.join("; ")),
552            );
553        }
554
555        last_result
556    }
557}
558
559/// Extract parameter types from a tool schema.
560///
561/// Returns a map from param name → param type (e.g., "verbose" → "bool", "output" → "string").
562/// Build a map from flag name → (canonical param name, param type).
563///
564/// Includes both primary names and aliases (with dashes stripped).
565/// For short flags like `-n` aliased to `lines`, maps `"n"` → `("lines", "int", 1)`.
566/// The third tuple slot is `consumes`: how many positionals the flag pulls
567/// per occurrence (1 for standard `--flag value`, 2 for jq's `--arg NAME VAL`).
568///
569/// Positional params (`positional: true`) are excluded — they're not flags,
570/// and including them would mis-route `cat --paths foo.txt` from positional
571/// to named, regressing builtins that read from `args.positional`.
572/// Walk leading positionals to select the active subcommand leaf of a schema.
573///
574/// A flat tool (`schema.subcommands` empty) returns the root immediately —
575/// today's single-leaf behavior. For a subcommand-aware tool each leading
576/// positional, in order, must name a child (by `name` or a command-level
577/// alias) to descend; the first positional that names no child is the leaf's
578/// own argument, and selection stops there. Multi-level trees fall out by
579/// construction (`block edit insert` → two descents).
580///
581/// Routing is **literal-only**: a subcommand selector must be a bareword or
582/// quoted string (both parse to `Expr::Literal(Value::String)`). A *computed*
583/// positional (`$(…)`, `$VAR`, a glob) sitting where a subcommand is required
584/// is an **error**, not a silent guess — kaish can't see its value at parse
585/// time, so picking a leaf from it would misroute the flags that bind against
586/// the leaf's params. The fix is to spell the subcommand out, or use the
587/// `--flag=value` form (which binds without any schema lookup).
588///
589/// Returned leaf borrows from `schema`, so its `params`/`subcommands` outlive
590/// any `schema_param_lookup` taken from it.
591///
592/// **Global value flags.** A space-form value flag declared on the *root*
593/// (e.g. kj's global `--confirm <nonce>`) can legitimately precede the
594/// subcommand path. Its value is a positional in the AST, so routing must not
595/// mistake it for a subcommand selector — `select_leaf` skips the value of any
596/// root-declared non-bool flag it sees. Leaf-specific value flags can't precede
597/// their own subcommand by construction, so only the root's flags need this.
598pub fn select_leaf<'a>(schema: &'a ToolSchema, args: &[Arg]) -> anyhow::Result<&'a ToolSchema> {
599    // Names + aliases of root-declared value (non-bool, non-positional) flags,
600    // whose space-form value is a positional we must skip while routing.
601    let root_lookup = schema_param_lookup(schema);
602    let is_root_value_flag = |name: &str| -> bool {
603        root_lookup.get(name).is_some_and(|(_, typ, _)| !is_bool_type(typ))
604    };
605
606    let mut node = schema;
607    let mut skip_next_positional = false;
608    for arg in args {
609        match arg {
610            // Tokens past `--` are raw data, never subcommand selectors.
611            Arg::DoubleDash => break,
612            // A root value flag in space form consumes the next positional as
613            // its value — don't route on that positional.
614            Arg::LongFlag(name) if is_root_value_flag(name) => skip_next_positional = true,
615            Arg::ShortFlag(name) if is_root_value_flag(name) => skip_next_positional = true,
616            Arg::Positional(expr) => {
617                if skip_next_positional {
618                    skip_next_positional = false;
619                    continue; // this positional is the preceding flag's value
620                }
621                if node.subcommands.is_empty() {
622                    break; // leaf reached — remaining positionals are its args
623                }
624                match classify_subcommand_positional(expr) {
625                    SubcommandWord::Word(word) => {
626                        match node.subcommands.iter().find(|c| c.matches_command(word)) {
627                            Some(child) => node = child, // descend
628                            None => break,               // not a subcommand → leaf's own arg
629                        }
630                    }
631                    // A non-string literal (number/bool) can't be a subcommand
632                    // name but its value *is* known; treat it as the leaf's own
633                    // positional and stop — no misroute risk.
634                    SubcommandWord::OtherLiteral => break,
635                    SubcommandWord::Computed(kind) => anyhow::bail!(
636                        "{}: a subcommand name is required here, but got {kind}. \
637                         Subcommands must be literal words — spell it out \
638                         (e.g. `{} <subcommand> …`) or use the `--flag=value` form.",
639                        node.name,
640                        schema.name
641                    ),
642                }
643            }
644            // Flags are skipped during routing; they bind against the leaf.
645            _ => {}
646        }
647    }
648    Ok(node)
649}
650
651/// How a positional reads when a subcommand selector is expected.
652enum SubcommandWord<'a> {
653    /// A literal word that may name a child.
654    Word(&'a str),
655    /// A literal but non-string value — a known value, never a subcommand.
656    OtherLiteral,
657    /// A value computed at runtime; `kind` describes it for the error.
658    Computed(&'static str),
659}
660
661fn classify_subcommand_positional(expr: &Expr) -> SubcommandWord<'_> {
662    match expr {
663        Expr::Literal(Value::String(s)) => SubcommandWord::Word(s),
664        Expr::Literal(_) => SubcommandWord::OtherLiteral,
665        Expr::CommandSubst(_) | Expr::Command(_) => SubcommandWord::Computed("a command substitution `$(…)`"),
666        Expr::VarRef(_)
667        | Expr::VarWithDefault { .. }
668        | Expr::VarLength(_)
669        | Expr::Positional(_)
670        | Expr::AllArgs
671        | Expr::ArgCount
672        | Expr::CurrentPid
673        | Expr::LastExitCode => SubcommandWord::Computed("a variable reference"),
674        Expr::Interpolated(_) | Expr::HereDocBody { .. } => SubcommandWord::Computed("an interpolated string"),
675        Expr::GlobPattern(_) => SubcommandWord::Computed("a glob pattern"),
676        Expr::Arithmetic(_) => SubcommandWord::Computed("an arithmetic expansion"),
677        _ => SubcommandWord::Computed("a value computed at runtime"),
678    }
679}
680
681pub fn schema_param_lookup(schema: &ToolSchema) -> HashMap<String, (&str, &str, usize)> {
682    let mut map = HashMap::new();
683    for p in schema.params.iter().filter(|p| !p.positional) {
684        map.insert(p.name.clone(), (p.name.as_str(), p.param_type.as_str(), p.consumes));
685        for alias in &p.aliases {
686            let stripped = alias.trim_start_matches('-');
687            map.insert(stripped.to_string(), (p.name.as_str(), p.param_type.as_str(), p.consumes));
688        }
689    }
690    map
691}
692
693/// Check if a type is considered boolean.
694pub fn is_bool_type(param_type: &str) -> bool {
695    matches!(param_type.to_lowercase().as_str(), "bool" | "boolean")
696}
697
698/// Build ToolArgs from AST Args, evaluating expressions.
699///
700/// If a schema is provided, uses it to determine argument types:
701/// - For `--flag` where schema says type is non-bool: consume next positional as value
702/// - For `--flag` where schema says type is bool (or unknown): treat as boolean flag
703///
704/// This enables natural shell syntax like `mcp_tool --query "test" --limit 10`.
705pub fn build_tool_args(args: &[Arg], ctx: &ExecContext, schema: Option<&ToolSchema>) -> ToolArgs {
706    let mut tool_args = ToolArgs::new();
707    let param_lookup = schema.map(schema_param_lookup).unwrap_or_default();
708    let accepts_word_assign = schema
709        .map(|s| crate::tools::accepts_word_assign(s.name.as_str()))
710        .unwrap_or(false);
711
712    // Track which positional indices have been consumed as flag values
713    let mut consumed_positionals: std::collections::HashSet<usize> = std::collections::HashSet::new();
714    let mut past_double_dash = false;
715
716    // First pass: find positional args and their indices
717    let mut positional_indices: Vec<(usize, &Expr)> = Vec::new();
718    for (i, arg) in args.iter().enumerate() {
719        if let Arg::Positional(expr) = arg {
720            positional_indices.push((i, expr));
721        }
722    }
723
724    // Second pass: process all args
725    let mut i = 0;
726    while i < args.len() {
727        let arg = &args[i];
728
729        match arg {
730            Arg::DoubleDash => {
731                past_double_dash = true;
732            }
733            Arg::Positional(expr) => {
734                // Check if this positional was consumed by a preceding flag
735                if !consumed_positionals.contains(&i)
736                    && let Some(value) = eval_simple_expr(expr, ctx)
737                {
738                    tool_args.positional.push(value);
739                }
740            }
741            Arg::Named { key, value } => {
742                if let Some(val) = eval_simple_expr(value, ctx) {
743                    tool_args.named.insert(key.clone(), val);
744                }
745            }
746            Arg::WordAssign { key, value } => {
747                if let Some(val) = eval_simple_expr(value, ctx) {
748                    if accepts_word_assign {
749                        tool_args.named.insert(key.clone(), val);
750                    } else {
751                        let val_str = crate::interpreter::value_to_string(&val);
752                        tool_args.positional.push(Value::String(format!("{key}={val_str}")));
753                    }
754                }
755            }
756            Arg::ShortFlag(name) => {
757                if past_double_dash {
758                    tool_args.positional.push(Value::String(format!("-{name}")));
759                } else if name.len() == 1 {
760                    // Single-char short flag: look up schema to check if it takes a value.
761                    // e.g., `-n 5` where `-n` is an alias for `lines` (type: int)
762                    let flag_name = name.as_str();
763                    let lookup = param_lookup.get(flag_name);
764                    let is_bool = lookup
765                        .map(|(_, typ, _)| is_bool_type(typ))
766                        .unwrap_or(true);
767
768                    if is_bool {
769                        tool_args.flags.insert(flag_name.to_string());
770                    } else {
771                        // Non-bool: consume next positional as value, insert under canonical name
772                        let canonical = lookup.map(|(n, _, _)| *n).unwrap_or(flag_name);
773                        let next_positional = positional_indices
774                            .iter()
775                            .find(|(idx, _)| *idx > i && !consumed_positionals.contains(idx));
776
777                        if let Some((pos_idx, expr)) = next_positional {
778                            if let Some(value) = eval_simple_expr(expr, ctx) {
779                                tool_args.named.insert(canonical.to_string(), value);
780                                consumed_positionals.insert(*pos_idx);
781                            } else {
782                                tool_args.flags.insert(flag_name.to_string());
783                            }
784                        } else {
785                            tool_args.flags.insert(flag_name.to_string());
786                        }
787                    }
788                } else if let Some(&(canonical, typ, _)) = param_lookup.get(name.as_str()) {
789                    // Multi-char short flag matches a schema param (POSIX style: -name value)
790                    if is_bool_type(typ) {
791                        tool_args.flags.insert(canonical.to_string());
792                    } else {
793                        let next_positional = positional_indices
794                            .iter()
795                            .find(|(idx, _)| *idx > i && !consumed_positionals.contains(idx));
796                        if let Some((pos_idx, expr)) = next_positional {
797                            if let Some(value) = eval_simple_expr(expr, ctx) {
798                                tool_args.named.insert(canonical.to_string(), value);
799                                consumed_positionals.insert(*pos_idx);
800                            } else {
801                                tool_args.flags.insert(name.clone());
802                            }
803                        } else {
804                            tool_args.flags.insert(name.clone());
805                        }
806                    }
807                } else {
808                    // Multi-char combined flags like -la: always boolean
809                    for c in name.chars() {
810                        tool_args.flags.insert(c.to_string());
811                    }
812                }
813            }
814            Arg::LongFlag(name) => {
815                if past_double_dash {
816                    tool_args.positional.push(Value::String(format!("--{name}")));
817                } else {
818                    // Look up type in schema (checks name and aliases)
819                    let lookup = param_lookup.get(name.as_str());
820                    let is_bool = lookup
821                        .map(|(_, typ, _)| is_bool_type(typ))
822                        .unwrap_or(true); // Unknown params default to bool
823
824                    if is_bool {
825                        tool_args.flags.insert(name.clone());
826                    } else {
827                        // Non-bool: consume next positional as value, insert under canonical name
828                        // Note: the sync build_tool_args does NOT honor `consumes > 1`. The async
829                        // build_args_async in kernel.rs is the only path that supports multi-consume
830                        // flags. Sync callers (scheduler pipelines for --json-marker plumbing) don't
831                        // yet need that; if they ever do, lift the logic via a shared helper.
832                        let canonical = lookup.map(|(n, _, _)| *n).unwrap_or(name.as_str());
833                        let next_positional = positional_indices
834                            .iter()
835                            .find(|(idx, _)| *idx > i && !consumed_positionals.contains(idx));
836
837                        if let Some((pos_idx, expr)) = next_positional {
838                            if let Some(value) = eval_simple_expr(expr, ctx) {
839                                tool_args.named.insert(canonical.to_string(), value);
840                                consumed_positionals.insert(*pos_idx);
841                            } else {
842                                tool_args.flags.insert(name.clone());
843                            }
844                        } else {
845                            tool_args.flags.insert(name.clone());
846                        }
847                    }
848                }
849            }
850        }
851        i += 1;
852    }
853
854    // Map remaining positionals to unfilled non-bool schema params (in order).
855    // This enables `drift_push "abc" "hello"` → named["target_ctx"] = "abc", named["content"] = "hello"
856    // Positionals that appeared after `--` are never mapped (they're raw data).
857    // Only for MCP/external tools (map_positionals=true). Builtins handle their own positionals.
858    if let Some(schema) = schema.filter(|s| s.map_positionals) {
859        // Count how many positionals were added before `--`
860        let pre_dash_count = if past_double_dash {
861            // Find where the double-dash was in the original args to count pre-dash positionals
862            let dash_pos = args.iter().position(|a| matches!(a, Arg::DoubleDash)).unwrap_or(args.len());
863            // Count unconsumed positionals before the double-dash
864            positional_indices.iter()
865                .filter(|(idx, _)| *idx < dash_pos && !consumed_positionals.contains(idx))
866                .count()
867        } else {
868            tool_args.positional.len()
869        };
870
871        let mut remaining = Vec::new();
872        let mut positional_iter = tool_args.positional.drain(..).enumerate();
873
874        for param in &schema.params {
875            if tool_args.named.contains_key(&param.name) || tool_args.flags.contains(&param.name) {
876                continue; // Already filled by a flag or named arg
877            }
878            if is_bool_type(&param.param_type) {
879                continue; // Bool params should only be set by flags
880            }
881            // Take from pre-dash positionals only
882            loop {
883                match positional_iter.next() {
884                    Some((idx, val)) if idx < pre_dash_count => {
885                        tool_args.named.insert(param.name.clone(), val);
886                        break;
887                    }
888                    Some((_, val)) => {
889                        remaining.push(val); // Post-dash or past limit, keep as positional
890                    }
891                    None => break,
892                }
893            }
894        }
895
896        // Any leftover positionals stay positional (e.g. `cat file1 file2`)
897        remaining.extend(positional_iter.map(|(_, v)| v));
898        tool_args.positional = remaining;
899    }
900
901    tool_args
902}
903
904/// Simple expression evaluation for args (without full scope access).
905pub(crate) fn eval_simple_expr(expr: &Expr, ctx: &ExecContext) -> Option<Value> {
906    match expr {
907        Expr::Literal(value) => Some(eval_literal(value, ctx)),
908        Expr::VarRef(path) => ctx.scope.resolve_path(path),
909        Expr::Interpolated(parts) => {
910            let mut result = String::new();
911            for part in parts {
912                match part {
913                    crate::ast::StringPart::Literal(s) => result.push_str(s),
914                    crate::ast::StringPart::Var(path) => {
915                        if let Some(value) = ctx.scope.resolve_path(path) {
916                            result.push_str(&value_to_string(&value));
917                        }
918                    }
919                    crate::ast::StringPart::VarWithDefault { name, default } => {
920                        match ctx.scope.get(name) {
921                            Some(value) => {
922                                let s = value_to_string(value);
923                                if s.is_empty() {
924                                    result.push_str(&eval_string_parts_sync(default, ctx));
925                                } else {
926                                    result.push_str(&s);
927                                }
928                            }
929                            None => result.push_str(&eval_string_parts_sync(default, ctx)),
930                        }
931                    }
932                    crate::ast::StringPart::VarLength(name) => {
933                        let len = match ctx.scope.get(name) {
934                            Some(value) => value_to_string(value).len(),
935                            None => 0,
936                        };
937                        result.push_str(&len.to_string());
938                    }
939                    crate::ast::StringPart::Positional(n) => {
940                        if let Some(s) = ctx.scope.get_positional(*n) {
941                            result.push_str(s);
942                        }
943                    }
944                    crate::ast::StringPart::AllArgs => {
945                        result.push_str(&ctx.scope.all_args().join(" "));
946                    }
947                    crate::ast::StringPart::ArgCount => {
948                        result.push_str(&ctx.scope.arg_count().to_string());
949                    }
950                    crate::ast::StringPart::Arithmetic(expr) => {
951                        // Evaluate arithmetic in pipeline context
952                        if let Ok(value) = arithmetic::eval_arithmetic(expr, &ctx.scope) {
953                            result.push_str(&value.to_string());
954                        }
955                    }
956                    crate::ast::StringPart::CommandSubst(_) => {
957                        // Command substitution requires async - skip in sync context
958                    }
959                    crate::ast::StringPart::LastExitCode => {
960                        result.push_str(&ctx.scope.last_result().code.to_string());
961                    }
962                    crate::ast::StringPart::CurrentPid => {
963                        result.push_str(&ctx.scope.pid().to_string());
964                    }
965                }
966            }
967            Some(Value::String(result))
968        }
969        Expr::GlobPattern(s) => Some(Value::String(s.clone())),
970        Expr::HereDocBody { parts, strip_tabs } => {
971            // Heredoc body materialization for redirect targets. Reuses the
972            // shared sync part-walker; tab stripping is applied after the
973            // body is assembled, matching the interpreter's eval path.
974            let unwrapped: Vec<crate::ast::StringPart> =
975                parts.iter().map(|sp| sp.part.clone()).collect();
976            let raw = eval_string_parts_sync(&unwrapped, ctx);
977            let body = if *strip_tabs {
978                crate::interpreter::strip_leading_tabs(&raw)
979            } else {
980                raw
981            };
982            Some(Value::String(body))
983        }
984        _ => None, // Binary ops and command subst need more context
985    }
986}
987
988/// Evaluate a literal value.
989fn eval_literal(value: &Value, _ctx: &ExecContext) -> Value {
990    value.clone()
991}
992
993/// Convert a value to a string for interpolation.
994fn value_to_string(value: &Value) -> String {
995    match value {
996        Value::Null => "".to_string(),
997        Value::Bool(b) => b.to_string(),
998        Value::Int(i) => i.to_string(),
999        Value::Float(f) => f.to_string(),
1000        Value::String(s) => s.clone(),
1001        Value::Json(json) => json.to_string(),
1002        Value::Blob(blob) => format!("[blob: {} {}]", blob.formatted_size(), blob.content_type),
1003    }
1004}
1005
1006/// Evaluate string parts synchronously (for pipeline context).
1007/// Command substitutions are skipped as they require async.
1008fn eval_string_parts_sync(parts: &[crate::ast::StringPart], ctx: &ExecContext) -> String {
1009    let mut result = String::new();
1010    for part in parts {
1011        match part {
1012            crate::ast::StringPart::Literal(s) => result.push_str(s),
1013            crate::ast::StringPart::Var(path) => {
1014                if let Some(value) = ctx.scope.resolve_path(path) {
1015                    result.push_str(&value_to_string(&value));
1016                }
1017            }
1018            crate::ast::StringPart::VarWithDefault { name, default } => {
1019                match ctx.scope.get(name) {
1020                    Some(value) => {
1021                        let s = value_to_string(value);
1022                        if s.is_empty() {
1023                            result.push_str(&eval_string_parts_sync(default, ctx));
1024                        } else {
1025                            result.push_str(&s);
1026                        }
1027                    }
1028                    None => result.push_str(&eval_string_parts_sync(default, ctx)),
1029                }
1030            }
1031            crate::ast::StringPart::VarLength(name) => {
1032                let len = match ctx.scope.get(name) {
1033                    Some(value) => value_to_string(value).len(),
1034                    None => 0,
1035                };
1036                result.push_str(&len.to_string());
1037            }
1038            crate::ast::StringPart::Positional(n) => {
1039                if let Some(s) = ctx.scope.get_positional(*n) {
1040                    result.push_str(s);
1041                }
1042            }
1043            crate::ast::StringPart::AllArgs => {
1044                result.push_str(&ctx.scope.all_args().join(" "));
1045            }
1046            crate::ast::StringPart::ArgCount => {
1047                result.push_str(&ctx.scope.arg_count().to_string());
1048            }
1049            crate::ast::StringPart::Arithmetic(expr) => {
1050                if let Ok(value) = arithmetic::eval_arithmetic(expr, &ctx.scope) {
1051                    result.push_str(&value.to_string());
1052                }
1053            }
1054            crate::ast::StringPart::CommandSubst(_) => {
1055                // Command substitution requires async - skip in sync context
1056            }
1057            crate::ast::StringPart::LastExitCode => {
1058                result.push_str(&ctx.scope.last_result().code.to_string());
1059            }
1060            crate::ast::StringPart::CurrentPid => {
1061                result.push_str(&ctx.scope.pid().to_string());
1062            }
1063        }
1064    }
1065    result
1066}
1067
1068/// Find scatter and gather commands in a pipeline.
1069///
1070/// Returns Some((scatter_index, gather_index)) if both are found with scatter before gather.
1071/// Returns None if the pipeline doesn't have a valid scatter/gather pattern.
1072fn find_scatter_gather(commands: &[Command]) -> Option<(usize, usize)> {
1073    let scatter_idx = commands.iter().position(|c| c.name == "scatter")?;
1074    let gather_idx = commands.iter().position(|c| c.name == "gather")?;
1075
1076    // Gather must come after scatter
1077    if gather_idx > scatter_idx {
1078        Some((scatter_idx, gather_idx))
1079    } else {
1080        None
1081    }
1082}
1083
1084#[cfg(test)]
1085mod select_leaf_tests {
1086    use super::*;
1087    use crate::tools::ParamSchema;
1088
1089    /// `kj`-shaped tree: kj → context (alias ctx) → {list (alias ls), create}.
1090    /// Root carries a global `--confirm <nonce>` value flag and a `--verbose`
1091    /// bool; `create` carries a leaf `--type` value flag — enough to exercise
1092    /// global-flag skipping and leaf binding.
1093    fn kj_schema() -> ToolSchema {
1094        ToolSchema::new("kj", "kaijutsu")
1095            .param(ParamSchema::new("confirm", "string"))
1096            .param(ParamSchema::new("verbose", "bool"))
1097            .subcommand(
1098                ToolSchema::new("context", "context ops")
1099                    .with_command_aliases(["ctx"])
1100                    .subcommand(ToolSchema::new("list", "list").with_command_aliases(["ls"]))
1101                    .subcommand(
1102                        ToolSchema::new("create", "create").param(
1103                            ParamSchema::new("type", "string").with_aliases(["t"]),
1104                        ),
1105                    ),
1106            )
1107    }
1108
1109    fn word(s: &str) -> Arg {
1110        Arg::Positional(Expr::Literal(Value::String(s.to_string())))
1111    }
1112
1113    #[test]
1114    fn flat_tool_returns_root() {
1115        let schema = ToolSchema::new("cat", "concat")
1116            .param(ParamSchema::required("path", "string", "f").positional());
1117        let leaf = select_leaf(&schema, &[word("foo.txt")]).expect("flat ok");
1118        assert_eq!(leaf.name, "cat");
1119    }
1120
1121    #[test]
1122    fn single_hop() {
1123        let schema = kj_schema();
1124        let leaf = select_leaf(&schema, &[word("context")]).expect("ok");
1125        assert_eq!(leaf.name, "context");
1126    }
1127
1128    #[test]
1129    fn two_hops() {
1130        let schema = kj_schema();
1131        let leaf = select_leaf(&schema, &[word("context"), word("create")]).expect("ok");
1132        assert_eq!(leaf.name, "create");
1133        assert!(leaf.params.iter().any(|p| p.name == "type"), "leaf has --type");
1134    }
1135
1136    #[test]
1137    fn alias_hops_route() {
1138        let schema = kj_schema();
1139        // `kj ctx ls` → context.list via command aliases.
1140        let leaf = select_leaf(&schema, &[word("ctx"), word("ls")]).expect("ok");
1141        assert_eq!(leaf.name, "list");
1142    }
1143
1144    #[test]
1145    fn unknown_subcommand_stops_at_current_node() {
1146        let schema = kj_schema();
1147        // `context nonesuch` — `nonesuch` names no child, so context is the leaf
1148        // and `nonesuch` is context's own positional. No error.
1149        let leaf = select_leaf(&schema, &[word("context"), word("nonesuch")]).expect("ok");
1150        assert_eq!(leaf.name, "context");
1151    }
1152
1153    #[test]
1154    fn root_bool_flag_before_path_does_not_disrupt_routing() {
1155        let schema = kj_schema();
1156        // `kj --verbose context create` — a root bool flag is skipped, both
1157        // positionals route to create.
1158        let args = vec![Arg::LongFlag("verbose".into()), word("context"), word("create")];
1159        let leaf = select_leaf(&schema, &args).expect("ok");
1160        assert_eq!(leaf.name, "create");
1161    }
1162
1163    #[test]
1164    fn root_value_flag_space_form_before_path_skips_its_value() {
1165        let schema = kj_schema();
1166        // `kj --confirm nonce context create` — `nonce` is --confirm's value,
1167        // NOT a subcommand selector; routing skips it and reaches create.
1168        let args = vec![
1169            Arg::LongFlag("confirm".into()),
1170            word("nonce"),
1171            word("context"),
1172            word("create"),
1173        ];
1174        let leaf = select_leaf(&schema, &args).expect("ok");
1175        assert_eq!(leaf.name, "create");
1176    }
1177
1178    #[test]
1179    fn leaf_value_flag_after_path_routes_to_leaf() {
1180        let schema = kj_schema();
1181        // `kj context create --type x` — the natural form: path first, leaf flag
1182        // after. Routing reaches create; --type then binds against create.
1183        let args = vec![
1184            word("context"),
1185            word("create"),
1186            Arg::LongFlag("type".into()),
1187            word("x"),
1188        ];
1189        let leaf = select_leaf(&schema, &args).expect("ok");
1190        assert_eq!(leaf.name, "create");
1191        assert!(leaf.params.iter().any(|p| p.name == "type"));
1192    }
1193
1194    #[test]
1195    fn double_dash_stops_routing() {
1196        let schema = kj_schema();
1197        // `kj -- context` — after `--`, `context` is raw data, not a subcommand.
1198        let leaf = select_leaf(&schema, &[Arg::DoubleDash, word("context")]).expect("ok");
1199        assert_eq!(leaf.name, "kj");
1200    }
1201
1202    #[test]
1203    fn computed_subcommand_selector_errors() {
1204        let schema = kj_schema();
1205        // `kj $(echo context)` — a command substitution where a subcommand name
1206        // is required must fail loud, not silently pick a leaf.
1207        let args = vec![Arg::Positional(Expr::CommandSubst(Box::new(
1208            crate::ast::Pipeline { commands: vec![], background: false },
1209        )))];
1210        let err = select_leaf(&schema, &args).expect_err("must error");
1211        let msg = err.to_string();
1212        assert!(msg.contains("subcommand name is required"), "got: {msg}");
1213        assert!(msg.contains("command substitution"), "names the cause: {msg}");
1214    }
1215
1216    #[test]
1217    fn variable_subcommand_selector_errors() {
1218        let schema = kj_schema();
1219        let args = vec![Arg::Positional(Expr::VarRef(crate::ast::VarPath::simple("sub")))];
1220        let err = select_leaf(&schema, &args).expect_err("must error");
1221        assert!(err.to_string().contains("variable reference"), "got: {err}");
1222    }
1223
1224    #[test]
1225    fn computed_positional_after_leaf_is_fine() {
1226        let schema = kj_schema();
1227        // `kj context list $(echo x)` — once at a leaf (list has no children),
1228        // a computed positional is just an argument; routing already stopped.
1229        let args = vec![
1230            word("context"),
1231            word("list"),
1232            Arg::Positional(Expr::CommandSubst(Box::new(
1233                crate::ast::Pipeline { commands: vec![], background: false },
1234            ))),
1235        ];
1236        let leaf = select_leaf(&schema, &args).expect("ok");
1237        assert_eq!(leaf.name, "list");
1238    }
1239}
1240
1241#[cfg(test)]
1242mod tests {
1243    use super::*;
1244    use crate::dispatch::BackendDispatcher;
1245    use crate::tools::register_builtins;
1246    use crate::vfs::{Filesystem, MemoryFs, VfsRouter};
1247    use std::path::Path;
1248
1249    async fn make_runner_and_ctx() -> (PipelineRunner, ExecContext, BackendDispatcher) {
1250        let mut tools = ToolRegistry::new();
1251        register_builtins(&mut tools);
1252        let tools = Arc::new(tools);
1253        let runner = PipelineRunner::new(tools.clone());
1254        let dispatcher = BackendDispatcher::new(tools.clone());
1255
1256        let mut vfs = VfsRouter::new();
1257        let mem = MemoryFs::new();
1258        mem.write(Path::new("test.txt"), b"hello\nworld\nfoo").await.unwrap();
1259        vfs.mount("/", mem);
1260        let ctx = ExecContext::with_vfs_and_tools(Arc::new(vfs), tools);
1261
1262        (runner, ctx, dispatcher)
1263    }
1264
1265    fn make_cmd(name: &str, args: Vec<&str>) -> Command {
1266        Command {
1267            name: name.to_string(),
1268            args: args.iter().map(|s| Arg::Positional(Expr::Literal(Value::String(s.to_string())))).collect(),
1269            redirects: vec![],
1270        }
1271    }
1272
1273    #[tokio::test]
1274    async fn test_single_command() {
1275        let (runner, mut ctx, dispatcher) = make_runner_and_ctx().await;
1276        let cmd = make_cmd("echo", vec!["hello"]);
1277
1278        let result = runner.run(&[cmd], &mut ctx, &dispatcher).await;
1279        assert!(result.ok());
1280        assert_eq!(result.text_out().trim(), "hello");
1281    }
1282
1283    #[tokio::test]
1284    async fn test_pipeline_echo_grep() {
1285        let (runner, mut ctx, dispatcher) = make_runner_and_ctx().await;
1286
1287        // echo "hello\nworld" | grep pattern="world"
1288        let echo_cmd = Command {
1289            name: "echo".to_string(),
1290            args: vec![Arg::Positional(Expr::Literal(Value::String("hello\nworld".to_string())))],
1291            redirects: vec![],
1292        };
1293        let grep_cmd = Command {
1294            name: "grep".to_string(),
1295            args: vec![Arg::Positional(Expr::Literal(Value::String("world".to_string())))],
1296            redirects: vec![],
1297        };
1298
1299        let result = runner.run(&[echo_cmd, grep_cmd], &mut ctx, &dispatcher).await;
1300        assert!(result.ok());
1301        assert_eq!(result.text_out().trim(), "world");
1302    }
1303
1304    #[tokio::test]
1305    async fn test_pipeline_cat_grep() {
1306        let (runner, mut ctx, dispatcher) = make_runner_and_ctx().await;
1307
1308        // cat /test.txt | grep pattern="hello"
1309        let cat_cmd = make_cmd("cat", vec!["/test.txt"]);
1310        let grep_cmd = Command {
1311            name: "grep".to_string(),
1312            args: vec![Arg::Positional(Expr::Literal(Value::String("hello".to_string())))],
1313            redirects: vec![],
1314        };
1315
1316        let result = runner.run(&[cat_cmd, grep_cmd], &mut ctx, &dispatcher).await;
1317        assert!(result.ok());
1318        assert!(result.text_out().contains("hello"));
1319    }
1320
1321    #[tokio::test]
1322    async fn test_command_not_found() {
1323        let (runner, mut ctx, dispatcher) = make_runner_and_ctx().await;
1324        let cmd = make_cmd("nonexistent", vec![]);
1325
1326        let result = runner.run(&[cmd], &mut ctx, &dispatcher).await;
1327        assert!(!result.ok());
1328        assert_eq!(result.code, 127);
1329        assert!(result.err.contains("not found"));
1330    }
1331
1332    #[tokio::test]
1333    async fn test_pipeline_continues_on_failure() {
1334        // Standard shell semantics: pipeline runs all commands,
1335        // exit code comes from the last command
1336        let (runner, mut ctx, dispatcher) = make_runner_and_ctx().await;
1337
1338        // cat /nonexistent | grep "hello"
1339        // cat fails but grep still runs (on empty input), grep returns 1 (no match)
1340        let cat_cmd = make_cmd("cat", vec!["/nonexistent"]);
1341        let grep_cmd = Command {
1342            name: "grep".to_string(),
1343            args: vec![Arg::Positional(Expr::Literal(Value::String("hello".to_string())))],
1344            redirects: vec![],
1345        };
1346
1347        let result = runner.run(&[cat_cmd, grep_cmd], &mut ctx, &dispatcher).await;
1348        // Exit code comes from last command (grep), not from cat
1349        assert!(!result.ok());
1350    }
1351
1352    #[tokio::test]
1353    async fn test_pipeline_last_command_exit_code() {
1354        // echo hello | cat — both succeed, pipeline succeeds
1355        let (runner, mut ctx, dispatcher) = make_runner_and_ctx().await;
1356
1357        let echo_cmd = make_cmd("echo", vec!["hello"]);
1358        let cat_cmd = make_cmd("cat", vec![]);
1359
1360        let result = runner.run(&[echo_cmd, cat_cmd], &mut ctx, &dispatcher).await;
1361        assert!(result.ok());
1362        assert!(result.text_out().contains("hello"));
1363    }
1364
1365    #[tokio::test]
1366    async fn test_empty_pipeline() {
1367        let (runner, mut ctx, dispatcher) = make_runner_and_ctx().await;
1368        let result = runner.run(&[], &mut ctx, &dispatcher).await;
1369        assert!(result.ok());
1370    }
1371
1372    // === Scatter/Gather Tests ===
1373
1374    #[test]
1375    fn test_find_scatter_gather_both_present() {
1376        let commands = vec![
1377            make_cmd("echo", vec!["a"]),
1378            make_cmd("scatter", vec![]),
1379            make_cmd("process", vec![]),
1380            make_cmd("gather", vec![]),
1381        ];
1382        let result = find_scatter_gather(&commands);
1383        assert_eq!(result, Some((1, 3)));
1384    }
1385
1386    #[test]
1387    fn test_find_scatter_gather_no_scatter() {
1388        let commands = vec![
1389            make_cmd("echo", vec!["a"]),
1390            make_cmd("gather", vec![]),
1391        ];
1392        let result = find_scatter_gather(&commands);
1393        assert!(result.is_none());
1394    }
1395
1396    #[test]
1397    fn test_find_scatter_gather_no_gather() {
1398        let commands = vec![
1399            make_cmd("echo", vec!["a"]),
1400            make_cmd("scatter", vec![]),
1401        ];
1402        let result = find_scatter_gather(&commands);
1403        assert!(result.is_none());
1404    }
1405
1406    #[test]
1407    fn test_find_scatter_gather_wrong_order() {
1408        let commands = vec![
1409            make_cmd("gather", vec![]),
1410            make_cmd("scatter", vec![]),
1411        ];
1412        let result = find_scatter_gather(&commands);
1413        assert!(result.is_none());
1414    }
1415
1416    #[tokio::test]
1417    async fn test_scatter_gather_simple() {
1418        let (runner, mut ctx, dispatcher) = make_runner_and_ctx().await;
1419
1420        // split "a b c" | scatter | echo ${ITEM} | gather
1421        let split_cmd = Command {
1422            name: "split".to_string(),
1423            args: vec![Arg::Positional(Expr::Literal(Value::String("a b c".to_string())))],
1424            redirects: vec![],
1425        };
1426        let scatter_cmd = make_cmd("scatter", vec![]);
1427        let process_cmd = Command {
1428            name: "echo".to_string(),
1429            args: vec![Arg::Positional(Expr::VarRef(crate::ast::VarPath::simple("ITEM")))],
1430            redirects: vec![],
1431        };
1432        let gather_cmd = make_cmd("gather", vec![]);
1433
1434        let result = runner.run(&[split_cmd, scatter_cmd, process_cmd, gather_cmd], &mut ctx, &dispatcher).await;
1435        assert!(result.ok(), "scatter with structured data should succeed: {}", result.err);
1436        // Each echo should output the item
1437        assert!(result.text_out().contains("a"));
1438        assert!(result.text_out().contains("b"));
1439        assert!(result.text_out().contains("c"));
1440    }
1441
1442    #[tokio::test]
1443    async fn test_scatter_gather_empty_input() {
1444        let (runner, mut ctx, dispatcher) = make_runner_and_ctx().await;
1445
1446        // echo "" | scatter | echo ${ITEM} | gather
1447        let echo_cmd = Command {
1448            name: "echo".to_string(),
1449            args: vec![Arg::Positional(Expr::Literal(Value::String("".to_string())))],
1450            redirects: vec![],
1451        };
1452        let scatter_cmd = make_cmd("scatter", vec![]);
1453        let process_cmd = Command {
1454            name: "echo".to_string(),
1455            args: vec![Arg::Positional(Expr::VarRef(crate::ast::VarPath::simple("ITEM")))],
1456            redirects: vec![],
1457        };
1458        let gather_cmd = make_cmd("gather", vec![]);
1459
1460        let result = runner.run(&[echo_cmd, scatter_cmd, process_cmd, gather_cmd], &mut ctx, &dispatcher).await;
1461        assert!(result.ok());
1462        assert!(result.text_out().trim().is_empty());
1463    }
1464
1465    #[tokio::test]
1466    async fn test_scatter_gather_with_structured_stdin() {
1467        let (runner, mut ctx, dispatcher) = make_runner_and_ctx().await;
1468
1469        // Set structured stdin data (as if piped from split/seq)
1470        let data = Value::Json(serde_json::json!(["x", "y", "z"]));
1471        ctx.set_stdin_with_data("x\ny\nz".to_string(), Some(data));
1472
1473        let scatter_cmd = make_cmd("scatter", vec![]);
1474        let process_cmd = Command {
1475            name: "echo".to_string(),
1476            args: vec![Arg::Positional(Expr::VarRef(crate::ast::VarPath::simple("ITEM")))],
1477            redirects: vec![],
1478        };
1479        let gather_cmd = make_cmd("gather", vec![]);
1480
1481        let result = runner.run(&[scatter_cmd, process_cmd, gather_cmd], &mut ctx, &dispatcher).await;
1482        assert!(result.ok(), "scatter with structured stdin should succeed: {}", result.err);
1483        assert!(result.text_out().contains("x"));
1484        assert!(result.text_out().contains("y"));
1485        assert!(result.text_out().contains("z"));
1486    }
1487
1488    #[tokio::test]
1489    async fn test_scatter_gather_json_input() {
1490        let (runner, mut ctx, dispatcher) = make_runner_and_ctx().await;
1491
1492        // Structured JSON array input (as if from split/seq)
1493        let data = Value::Json(serde_json::json!(["one", "two", "three"]));
1494        ctx.set_stdin_with_data(r#"["one", "two", "three"]"#.to_string(), Some(data));
1495
1496        let scatter_cmd = make_cmd("scatter", vec![]);
1497        let process_cmd = Command {
1498            name: "echo".to_string(),
1499            args: vec![Arg::Positional(Expr::VarRef(crate::ast::VarPath::simple("ITEM")))],
1500            redirects: vec![],
1501        };
1502        let gather_cmd = make_cmd("gather", vec![]);
1503
1504        let result = runner.run(&[scatter_cmd, process_cmd, gather_cmd], &mut ctx, &dispatcher).await;
1505        assert!(result.ok(), "scatter with JSON data should succeed: {}", result.err);
1506        assert!(result.text_out().contains("one"));
1507        assert!(result.text_out().contains("two"));
1508        assert!(result.text_out().contains("three"));
1509    }
1510
1511    #[tokio::test]
1512    async fn test_scatter_gather_with_post_gather() {
1513        let (runner, mut ctx, dispatcher) = make_runner_and_ctx().await;
1514
1515        // split "a b" | scatter | echo ${ITEM} | gather | grep "a"
1516        let split_cmd = Command {
1517            name: "split".to_string(),
1518            args: vec![Arg::Positional(Expr::Literal(Value::String("a b".to_string())))],
1519            redirects: vec![],
1520        };
1521        let scatter_cmd = make_cmd("scatter", vec![]);
1522        let process_cmd = Command {
1523            name: "echo".to_string(),
1524            args: vec![Arg::Positional(Expr::VarRef(crate::ast::VarPath::simple("ITEM")))],
1525            redirects: vec![],
1526        };
1527        let gather_cmd = make_cmd("gather", vec![]);
1528        let grep_cmd = Command {
1529            name: "grep".to_string(),
1530            args: vec![Arg::Positional(Expr::Literal(Value::String("a".to_string())))],
1531            redirects: vec![],
1532        };
1533
1534        let result = runner.run(&[split_cmd, scatter_cmd, process_cmd, gather_cmd, grep_cmd], &mut ctx, &dispatcher).await;
1535        assert!(result.ok(), "scatter with post_gather should succeed: {}", result.err);
1536        assert!(result.text_out().contains("a"));
1537        assert!(!result.text_out().contains("b"));
1538    }
1539
1540    #[tokio::test]
1541    async fn test_scatter_custom_var_name() {
1542        let (runner, mut ctx, dispatcher) = make_runner_and_ctx().await;
1543
1544        // Provide structured data (as if from split/seq)
1545        let data = Value::Json(serde_json::json!(["test1", "test2"]));
1546        ctx.set_stdin_with_data("test1\ntest2".to_string(), Some(data));
1547
1548        // scatter --as URL | echo ${URL} | gather
1549        let scatter_cmd = Command {
1550            name: "scatter".to_string(),
1551            args: vec![Arg::Named {
1552                key: "as".to_string(),
1553                value: Expr::Literal(Value::String("URL".to_string())),
1554            }],
1555            redirects: vec![],
1556        };
1557        let process_cmd = Command {
1558            name: "echo".to_string(),
1559            args: vec![Arg::Positional(Expr::VarRef(crate::ast::VarPath::simple("URL")))],
1560            redirects: vec![],
1561        };
1562        let gather_cmd = make_cmd("gather", vec![]);
1563
1564        let result = runner.run(&[scatter_cmd, process_cmd, gather_cmd], &mut ctx, &dispatcher).await;
1565        assert!(result.ok(), "scatter with custom var should succeed: {}", result.err);
1566        assert!(result.text_out().contains("test1"));
1567        assert!(result.text_out().contains("test2"));
1568    }
1569
1570    // === Backend Routing Tests ===
1571
1572    #[tokio::test]
1573    async fn test_pipeline_routes_through_backend() {
1574        use crate::backend::testing::MockBackend;
1575        use std::sync::atomic::Ordering;
1576
1577        // Create mock backend
1578        let (backend, call_count) = MockBackend::new();
1579        let backend: std::sync::Arc<dyn crate::backend::KernelBackend> = std::sync::Arc::new(backend);
1580
1581        // Create context with mock backend
1582        let mut ctx = crate::tools::ExecContext::with_backend(backend);
1583
1584        // BackendDispatcher routes through backend.call_tool()
1585        let tools = std::sync::Arc::new(ToolRegistry::new());
1586        let runner = PipelineRunner::new(tools.clone());
1587        let dispatcher = BackendDispatcher::new(tools);
1588
1589        // Single command should route through backend
1590        let cmd = make_cmd("test-tool", vec!["arg1"]);
1591        let result = runner.run(&[cmd], &mut ctx, &dispatcher).await;
1592
1593        assert!(result.ok(), "Mock backend should return success");
1594        assert_eq!(call_count.load(Ordering::SeqCst), 1, "call_tool should be invoked once");
1595        assert!(result.text_out().contains("mock executed"), "Output should be from mock backend");
1596    }
1597
1598    #[tokio::test]
1599    async fn test_multi_command_pipeline_routes_through_backend() {
1600        use crate::backend::testing::MockBackend;
1601        use std::sync::atomic::Ordering;
1602
1603        let (backend, call_count) = MockBackend::new();
1604        let backend: std::sync::Arc<dyn crate::backend::KernelBackend> = std::sync::Arc::new(backend);
1605        let mut ctx = crate::tools::ExecContext::with_backend(backend);
1606
1607        let tools = std::sync::Arc::new(ToolRegistry::new());
1608        let runner = PipelineRunner::new(tools.clone());
1609        let dispatcher = BackendDispatcher::new(tools);
1610
1611        // Pipeline with 3 commands
1612        let cmd1 = make_cmd("tool1", vec![]);
1613        let cmd2 = make_cmd("tool2", vec![]);
1614        let cmd3 = make_cmd("tool3", vec![]);
1615
1616        let result = runner.run(&[cmd1, cmd2, cmd3], &mut ctx, &dispatcher).await;
1617
1618        assert!(result.ok());
1619        assert_eq!(call_count.load(Ordering::SeqCst), 3, "call_tool should be invoked for each command");
1620    }
1621
1622    // === Schema-Aware Argument Parsing Tests ===
1623
1624    use crate::tools::{ParamSchema, ToolSchema};
1625
1626    fn make_test_schema() -> ToolSchema {
1627        ToolSchema::new("test-tool", "A test tool for schema-aware parsing")
1628            .param(ParamSchema::required("query", "string", "Search query"))
1629            .param(ParamSchema::optional("limit", "int", Value::Int(10), "Max results"))
1630            .param(ParamSchema::optional("verbose", "bool", Value::Bool(false), "Verbose output"))
1631            .param(ParamSchema::optional("output", "string", Value::String("stdout".into()), "Output destination"))
1632            .with_positional_mapping()
1633    }
1634
1635    fn make_minimal_ctx() -> ExecContext {
1636        let mut vfs = VfsRouter::new();
1637        vfs.mount("/", MemoryFs::new());
1638        ExecContext::new(Arc::new(vfs))
1639    }
1640
1641    #[test]
1642    fn test_schema_aware_string_arg() {
1643        // --query "test" should become named: {"query": "test"}
1644        let args = vec![
1645            Arg::LongFlag("query".to_string()),
1646            Arg::Positional(Expr::Literal(Value::String("test".to_string()))),
1647        ];
1648        let schema = make_test_schema();
1649        let ctx = make_minimal_ctx();
1650
1651        let tool_args = build_tool_args(&args, &ctx, Some(&schema));
1652
1653        assert!(tool_args.flags.is_empty(), "No flags should be set");
1654        assert!(tool_args.positional.is_empty(), "No positionals - consumed by --query");
1655        assert_eq!(
1656            tool_args.named.get("query"),
1657            Some(&Value::String("test".to_string())),
1658            "--query should consume 'test' as its value"
1659        );
1660    }
1661
1662    #[test]
1663    fn test_schema_aware_bool_flag() {
1664        // --verbose should remain a flag since schema says bool
1665        let args = vec![
1666            Arg::LongFlag("verbose".to_string()),
1667        ];
1668        let schema = make_test_schema();
1669        let ctx = make_minimal_ctx();
1670
1671        let tool_args = build_tool_args(&args, &ctx, Some(&schema));
1672
1673        assert!(tool_args.flags.contains("verbose"), "--verbose should be a flag");
1674        assert!(tool_args.named.is_empty(), "No named args");
1675        assert!(tool_args.positional.is_empty(), "No positionals");
1676    }
1677
1678    #[test]
1679    fn test_schema_aware_mixed() {
1680        // mcp_tool file.txt --output out.txt --verbose
1681        // file.txt maps to "query" (first unfilled non-bool schema param)
1682        let args = vec![
1683            Arg::Positional(Expr::Literal(Value::String("file.txt".to_string()))),
1684            Arg::LongFlag("output".to_string()),
1685            Arg::Positional(Expr::Literal(Value::String("out.txt".to_string()))),
1686            Arg::LongFlag("verbose".to_string()),
1687        ];
1688        let schema = make_test_schema();
1689        let ctx = make_minimal_ctx();
1690
1691        let tool_args = build_tool_args(&args, &ctx, Some(&schema));
1692
1693        assert!(tool_args.positional.is_empty(), "file.txt consumed as query param");
1694        assert_eq!(
1695            tool_args.named.get("query"),
1696            Some(&Value::String("file.txt".to_string()))
1697        );
1698        assert_eq!(
1699            tool_args.named.get("output"),
1700            Some(&Value::String("out.txt".to_string()))
1701        );
1702        assert!(tool_args.flags.contains("verbose"));
1703    }
1704
1705    #[test]
1706    fn test_schema_aware_multiple_string_args() {
1707        // --query "test" --output "result.json" --verbose --limit 5
1708        let args = vec![
1709            Arg::LongFlag("query".to_string()),
1710            Arg::Positional(Expr::Literal(Value::String("test".to_string()))),
1711            Arg::LongFlag("output".to_string()),
1712            Arg::Positional(Expr::Literal(Value::String("result.json".to_string()))),
1713            Arg::LongFlag("verbose".to_string()),
1714            Arg::LongFlag("limit".to_string()),
1715            Arg::Positional(Expr::Literal(Value::Int(5))),
1716        ];
1717        let schema = make_test_schema();
1718        let ctx = make_minimal_ctx();
1719
1720        let tool_args = build_tool_args(&args, &ctx, Some(&schema));
1721
1722        assert!(tool_args.positional.is_empty(), "All positionals consumed");
1723        assert_eq!(
1724            tool_args.named.get("query"),
1725            Some(&Value::String("test".to_string()))
1726        );
1727        assert_eq!(
1728            tool_args.named.get("output"),
1729            Some(&Value::String("result.json".to_string()))
1730        );
1731        assert_eq!(
1732            tool_args.named.get("limit"),
1733            Some(&Value::Int(5))
1734        );
1735        assert!(tool_args.flags.contains("verbose"));
1736    }
1737
1738    #[test]
1739    fn test_schema_aware_double_dash() {
1740        // --output out.txt -- --this-is-data
1741        // After --, everything is positional
1742        let args = vec![
1743            Arg::LongFlag("output".to_string()),
1744            Arg::Positional(Expr::Literal(Value::String("out.txt".to_string()))),
1745            Arg::DoubleDash,
1746            Arg::Positional(Expr::Literal(Value::String("--this-is-data".to_string()))),
1747        ];
1748        let schema = make_test_schema();
1749        let ctx = make_minimal_ctx();
1750
1751        let tool_args = build_tool_args(&args, &ctx, Some(&schema));
1752
1753        assert_eq!(
1754            tool_args.named.get("output"),
1755            Some(&Value::String("out.txt".to_string()))
1756        );
1757        // After --, the --this-is-data is treated as a positional (it's a Positional in the args)
1758        assert_eq!(
1759            tool_args.positional,
1760            vec![Value::String("--this-is-data".to_string())]
1761        );
1762    }
1763
1764    #[test]
1765    fn test_no_schema_fallback() {
1766        // Without schema, all --flags are treated as bool flags
1767        let args = vec![
1768            Arg::LongFlag("query".to_string()),
1769            Arg::Positional(Expr::Literal(Value::String("test".to_string()))),
1770        ];
1771        let ctx = make_minimal_ctx();
1772
1773        let tool_args = build_tool_args(&args, &ctx, None);
1774
1775        // Without schema, --query is a flag and "test" is a positional
1776        assert!(tool_args.flags.contains("query"), "--query should be a flag");
1777        assert_eq!(
1778            tool_args.positional,
1779            vec![Value::String("test".to_string())],
1780            "'test' should be a positional"
1781        );
1782    }
1783
1784    #[test]
1785    fn test_unknown_flag_in_schema() {
1786        // --unknown-flag value: --unknown is bool (not in schema), "value" maps to query
1787        let args = vec![
1788            Arg::LongFlag("unknown".to_string()),
1789            Arg::Positional(Expr::Literal(Value::String("value".to_string()))),
1790        ];
1791        let schema = make_test_schema();
1792        let ctx = make_minimal_ctx();
1793
1794        let tool_args = build_tool_args(&args, &ctx, Some(&schema));
1795
1796        assert!(tool_args.flags.contains("unknown"));
1797        assert!(tool_args.positional.is_empty(), "value consumed as query param");
1798        assert_eq!(
1799            tool_args.named.get("query"),
1800            Some(&Value::String("value".to_string()))
1801        );
1802    }
1803
1804    #[test]
1805    fn test_named_args_unchanged() {
1806        // key=value syntax should work regardless of schema
1807        let args = vec![
1808            Arg::Named {
1809                key: "query".to_string(),
1810                value: Expr::Literal(Value::String("test".to_string())),
1811            },
1812            Arg::LongFlag("verbose".to_string()),
1813        ];
1814        let schema = make_test_schema();
1815        let ctx = make_minimal_ctx();
1816
1817        let tool_args = build_tool_args(&args, &ctx, Some(&schema));
1818
1819        assert_eq!(
1820            tool_args.named.get("query"),
1821            Some(&Value::String("test".to_string()))
1822        );
1823        assert!(tool_args.flags.contains("verbose"));
1824    }
1825
1826    #[test]
1827    fn test_short_flags_unchanged() {
1828        // Short flags -la should expand regardless of schema; file.txt maps to query
1829        let args = vec![
1830            Arg::ShortFlag("la".to_string()),
1831            Arg::Positional(Expr::Literal(Value::String("file.txt".to_string()))),
1832        ];
1833        let schema = make_test_schema();
1834        let ctx = make_minimal_ctx();
1835
1836        let tool_args = build_tool_args(&args, &ctx, Some(&schema));
1837
1838        assert!(tool_args.flags.contains("l"));
1839        assert!(tool_args.flags.contains("a"));
1840        assert!(tool_args.positional.is_empty(), "file.txt consumed as query param");
1841        assert_eq!(
1842            tool_args.named.get("query"),
1843            Some(&Value::String("file.txt".to_string()))
1844        );
1845    }
1846
1847    #[test]
1848    fn test_flag_at_end_no_value() {
1849        // --output at end with no value available - treat as flag (lenient)
1850        // file.txt maps to query (first unfilled non-bool param)
1851        let args = vec![
1852            Arg::Positional(Expr::Literal(Value::String("file.txt".to_string()))),
1853            Arg::LongFlag("output".to_string()),
1854        ];
1855        let schema = make_test_schema();
1856        let ctx = make_minimal_ctx();
1857
1858        let tool_args = build_tool_args(&args, &ctx, Some(&schema));
1859
1860        // output expects a value but none available after it, so it becomes a flag
1861        assert!(tool_args.flags.contains("output"));
1862        assert!(tool_args.positional.is_empty(), "file.txt consumed as query param");
1863        assert_eq!(
1864            tool_args.named.get("query"),
1865            Some(&Value::String("file.txt".to_string()))
1866        );
1867    }
1868
1869    #[test]
1870    fn test_positional_skips_bool_params() {
1871        // Schema: [query: string, verbose: bool, output: string]
1872        // Args: "val1" "val2"
1873        // Expected: query="val1", verbose unset, output="val2"
1874        let schema = ToolSchema::new("test", "")
1875            .param(ParamSchema::required("query", "string", ""))
1876            .param(ParamSchema::optional(
1877                "verbose",
1878                "bool",
1879                Value::Bool(false),
1880                "",
1881            ))
1882            .param(ParamSchema::optional(
1883                "output",
1884                "string",
1885                Value::Null,
1886                "",
1887            ))
1888            .with_positional_mapping();
1889        let args = vec![
1890            Arg::Positional(Expr::Literal(Value::String("val1".to_string()))),
1891            Arg::Positional(Expr::Literal(Value::String("val2".to_string()))),
1892        ];
1893        let ctx = make_minimal_ctx();
1894
1895        let tool_args = build_tool_args(&args, &ctx, Some(&schema));
1896
1897        assert_eq!(
1898            tool_args.named.get("query"),
1899            Some(&Value::String("val1".to_string()))
1900        );
1901        assert_eq!(
1902            tool_args.named.get("output"),
1903            Some(&Value::String("val2".to_string()))
1904        );
1905        assert!(!tool_args.flags.contains("verbose"));
1906        assert!(tool_args.positional.is_empty());
1907    }
1908
1909    #[test]
1910    fn test_positionals_fill_available_slots() {
1911        // Schema has query (string), limit (int), verbose (bool), output (string).
1912        // Three positionals fill the 3 non-bool slots.
1913        let args = vec![
1914            Arg::Positional(Expr::Literal(Value::String("val1".to_string()))),
1915            Arg::Positional(Expr::Literal(Value::String("val2".to_string()))),
1916            Arg::Positional(Expr::Literal(Value::String("val3".to_string()))),
1917        ];
1918        let schema = make_test_schema(); // query, limit(int), verbose(bool), output
1919        let ctx = make_minimal_ctx();
1920
1921        let tool_args = build_tool_args(&args, &ctx, Some(&schema));
1922
1923        // val1 → query, val2 → limit (int param but receives string — tool decides),
1924        // val3 → output
1925        assert_eq!(
1926            tool_args.named.get("query"),
1927            Some(&Value::String("val1".to_string()))
1928        );
1929        assert_eq!(
1930            tool_args.named.get("limit"),
1931            Some(&Value::String("val2".to_string()))
1932        );
1933        assert_eq!(
1934            tool_args.named.get("output"),
1935            Some(&Value::String("val3".to_string()))
1936        );
1937        assert!(tool_args.positional.is_empty());
1938    }
1939
1940    #[test]
1941    fn test_truly_excess_positionals() {
1942        // More positionals than non-bool schema params — leftovers stay positional
1943        let schema = ToolSchema::new("test", "")
1944            .param(ParamSchema::required("name", "string", ""))
1945            .with_positional_mapping();
1946        let args = vec![
1947            Arg::Positional(Expr::Literal(Value::String("first".to_string()))),
1948            Arg::Positional(Expr::Literal(Value::String("second".to_string()))),
1949            Arg::Positional(Expr::Literal(Value::String("third".to_string()))),
1950        ];
1951        let ctx = make_minimal_ctx();
1952
1953        let tool_args = build_tool_args(&args, &ctx, Some(&schema));
1954
1955        assert_eq!(
1956            tool_args.named.get("name"),
1957            Some(&Value::String("first".to_string()))
1958        );
1959        assert_eq!(
1960            tool_args.positional,
1961            vec![
1962                Value::String("second".to_string()),
1963                Value::String("third".to_string()),
1964            ]
1965        );
1966    }
1967
1968    #[test]
1969    fn test_double_dash_positional_not_mapped() {
1970        // `tool val1 -- val2` — val1 maps to query, val2 stays positional (post-dash)
1971        let args = vec![
1972            Arg::Positional(Expr::Literal(Value::String("val1".to_string()))),
1973            Arg::DoubleDash,
1974            Arg::Positional(Expr::Literal(Value::String("val2".to_string()))),
1975        ];
1976        let schema = make_test_schema();
1977        let ctx = make_minimal_ctx();
1978
1979        let tool_args = build_tool_args(&args, &ctx, Some(&schema));
1980
1981        assert_eq!(
1982            tool_args.named.get("query"),
1983            Some(&Value::String("val1".to_string()))
1984        );
1985        // val2 is after --, should NOT be mapped even though schema has unfilled params
1986        assert_eq!(
1987            tool_args.positional,
1988            vec![Value::String("val2".to_string())]
1989        );
1990    }
1991
1992    #[test]
1993    fn test_all_params_filled_by_flags() {
1994        // All schema params satisfied by explicit flags — no positional mapping needed
1995        let args = vec![
1996            Arg::LongFlag("query".to_string()),
1997            Arg::Positional(Expr::Literal(Value::String("search".to_string()))),
1998            Arg::LongFlag("output".to_string()),
1999            Arg::Positional(Expr::Literal(Value::String("out.txt".to_string()))),
2000            Arg::LongFlag("verbose".to_string()),
2001        ];
2002        let schema = make_test_schema();
2003        let ctx = make_minimal_ctx();
2004
2005        let tool_args = build_tool_args(&args, &ctx, Some(&schema));
2006
2007        assert_eq!(
2008            tool_args.named.get("query"),
2009            Some(&Value::String("search".to_string()))
2010        );
2011        assert_eq!(
2012            tool_args.named.get("output"),
2013            Some(&Value::String("out.txt".to_string()))
2014        );
2015        assert!(tool_args.flags.contains("verbose"));
2016        assert!(tool_args.positional.is_empty());
2017    }
2018
2019    #[test]
2020    fn test_mixed_flags_and_positional_fill() {
2021        // --output foo val1 — output is explicit, val1 maps to query
2022        let args = vec![
2023            Arg::LongFlag("output".to_string()),
2024            Arg::Positional(Expr::Literal(Value::String("foo".to_string()))),
2025            Arg::Positional(Expr::Literal(Value::String("val1".to_string()))),
2026        ];
2027        let schema = make_test_schema();
2028        let ctx = make_minimal_ctx();
2029
2030        let tool_args = build_tool_args(&args, &ctx, Some(&schema));
2031
2032        assert_eq!(
2033            tool_args.named.get("output"),
2034            Some(&Value::String("foo".to_string()))
2035        );
2036        assert_eq!(
2037            tool_args.named.get("query"),
2038            Some(&Value::String("val1".to_string()))
2039        );
2040        assert!(tool_args.positional.is_empty());
2041    }
2042
2043    #[test]
2044    fn test_alias_flag_prevents_mapping_overwrite() {
2045        // -q "search" "out.txt" — -q is alias for query, so out.txt should map to output
2046        let schema = ToolSchema::new("test", "")
2047            .param(ParamSchema::required("query", "string", "").with_aliases(["-q"]))
2048            .param(ParamSchema::required("output", "string", ""))
2049            .with_positional_mapping();
2050        let args = vec![
2051            Arg::ShortFlag("q".to_string()),
2052            Arg::Positional(Expr::Literal(Value::String("search".to_string()))),
2053            Arg::Positional(Expr::Literal(Value::String("out.txt".to_string()))),
2054        ];
2055        let ctx = make_minimal_ctx();
2056
2057        let tool_args = build_tool_args(&args, &ctx, Some(&schema));
2058
2059        assert_eq!(
2060            tool_args.named.get("query"),
2061            Some(&Value::String("search".to_string()))
2062        );
2063        assert_eq!(
2064            tool_args.named.get("output"),
2065            Some(&Value::String("out.txt".to_string()))
2066        );
2067        assert!(tool_args.positional.is_empty());
2068    }
2069
2070    #[test]
2071    fn test_builtin_schema_no_positional_mapping() {
2072        // Builtins have map_positionals=false — positionals stay positional
2073        let schema = ToolSchema::new("echo", "")
2074            .param(ParamSchema::optional("args", "any", Value::Null, ""))
2075            .param(ParamSchema::optional("no_newline", "bool", Value::Bool(false), ""));
2076        // Note: no .with_positional_mapping() — this is a builtin
2077        let args = vec![
2078            Arg::Positional(Expr::Literal(Value::String("hello".to_string()))),
2079            Arg::Positional(Expr::Literal(Value::String("world".to_string()))),
2080        ];
2081        let ctx = make_minimal_ctx();
2082
2083        let tool_args = build_tool_args(&args, &ctx, Some(&schema));
2084
2085        // Positionals should NOT be consumed as named params
2086        assert_eq!(
2087            tool_args.positional,
2088            vec![
2089                Value::String("hello".to_string()),
2090                Value::String("world".to_string()),
2091            ]
2092        );
2093        assert!(tool_args.named.get("args").is_none());
2094    }
2095
2096    #[test]
2097    fn test_short_flag_with_alias_consumes_value() {
2098        // `-n 5` where `-n` is aliased to `lines` (type: int)
2099        // Should produce named: {"lines": 5}, not flags: {"n"} + positional: [5]
2100        let schema = ToolSchema::new("head", "Output first part of files")
2101            .param(ParamSchema::optional("lines", "int", Value::Int(10), "Number of lines")
2102                .with_aliases(["-n"]));
2103        let args = vec![
2104            Arg::ShortFlag("n".to_string()),
2105            Arg::Positional(Expr::Literal(Value::Int(5))),
2106            Arg::Positional(Expr::Literal(Value::String("/tmp/file.txt".to_string()))),
2107        ];
2108        let ctx = make_minimal_ctx();
2109
2110        let tool_args = build_tool_args(&args, &ctx, Some(&schema));
2111
2112        assert!(tool_args.flags.is_empty(), "no boolean flags: {:?}", tool_args.flags);
2113        assert_eq!(tool_args.named.get("lines"), Some(&Value::Int(5)), "should resolve alias to canonical name");
2114        assert_eq!(tool_args.positional, vec![Value::String("/tmp/file.txt".to_string())]);
2115    }
2116
2117    // === Redirect Execution Tests ===
2118
2119    #[tokio::test]
2120    async fn test_merge_stderr_redirect() {
2121        // Test that 2>&1 merges stderr into stdout
2122        let result = ExecResult::from_output(0, "stdout content", "stderr content");
2123
2124        let redirects = vec![Redirect {
2125            kind: RedirectKind::MergeStderr,
2126            target: Expr::Literal(Value::Null),
2127        }];
2128
2129        let ctx = make_minimal_ctx();
2130        let result = apply_redirects(result, &redirects, &ctx).await;
2131
2132        assert_eq!(&*result.text_out(), "stdout contentstderr content");
2133        assert!(result.err.is_empty());
2134    }
2135
2136    #[tokio::test]
2137    async fn test_merge_stderr_with_empty_stderr() {
2138        // Test that 2>&1 handles empty stderr gracefully
2139        let result = ExecResult::from_output(0, "stdout only", "");
2140
2141        let redirects = vec![Redirect {
2142            kind: RedirectKind::MergeStderr,
2143            target: Expr::Literal(Value::Null),
2144        }];
2145
2146        let ctx = make_minimal_ctx();
2147        let result = apply_redirects(result, &redirects, &ctx).await;
2148
2149        assert_eq!(&*result.text_out(), "stdout only");
2150        assert!(result.err.is_empty());
2151    }
2152
2153    #[tokio::test]
2154    async fn test_merge_stderr_order_matters() {
2155        // Test redirect ordering: 2>&1 > file means:
2156        // 1. First merge stderr into stdout
2157        // 2. Then write stdout to file (leaving both empty for piping)
2158        // This verifies left-to-right processing
2159        let result = ExecResult::from_output(0, "stdout\n", "stderr\n");
2160
2161        // Just 2>&1 - should merge
2162        let redirects = vec![Redirect {
2163            kind: RedirectKind::MergeStderr,
2164            target: Expr::Literal(Value::Null),
2165        }];
2166
2167        let ctx = make_minimal_ctx();
2168        let result = apply_redirects(result, &redirects, &ctx).await;
2169
2170        assert_eq!(&*result.text_out(), "stdout\nstderr\n");
2171        assert!(result.err.is_empty());
2172    }
2173
2174    #[tokio::test]
2175    async fn test_redirect_with_command_execution() {
2176        let (runner, mut ctx, dispatcher) = make_runner_and_ctx().await;
2177
2178        // echo "hello" with 2>&1 redirect
2179        let cmd = Command {
2180            name: "echo".to_string(),
2181            args: vec![Arg::Positional(Expr::Literal(Value::String("hello".to_string())))],
2182            redirects: vec![Redirect {
2183                kind: RedirectKind::MergeStderr,
2184                target: Expr::Literal(Value::Null),
2185            }],
2186        };
2187
2188        let result = runner.run(&[cmd], &mut ctx, &dispatcher).await;
2189        assert!(result.ok());
2190        // echo produces no stderr, so this just validates the redirect doesn't break anything
2191        assert!(result.text_out().contains("hello"));
2192    }
2193
2194    #[tokio::test]
2195    async fn test_merge_stderr_in_pipeline() {
2196        let (runner, mut ctx, dispatcher) = make_runner_and_ctx().await;
2197
2198        // echo "output" 2>&1 | grep "output"
2199        // The 2>&1 should be applied to echo's result, then piped to grep
2200        let echo_cmd = Command {
2201            name: "echo".to_string(),
2202            args: vec![Arg::Positional(Expr::Literal(Value::String("output".to_string())))],
2203            redirects: vec![Redirect {
2204                kind: RedirectKind::MergeStderr,
2205                target: Expr::Literal(Value::Null),
2206            }],
2207        };
2208        let grep_cmd = Command {
2209            name: "grep".to_string(),
2210            args: vec![Arg::Positional(Expr::Literal(Value::String("output".to_string())))],
2211            redirects: vec![],
2212        };
2213
2214        let result = runner.run(&[echo_cmd, grep_cmd], &mut ctx, &dispatcher).await;
2215        assert!(result.ok(), "result failed: code={}, err={}", result.code, result.err);
2216        assert!(result.text_out().contains("output"));
2217    }
2218}
kaish_kernel/scheduler/pipeline.rs

kaish_kernel/scheduler/
pipeline.rs