Skip to main content

a3s_code_core/tools/
program_tool.rs

1//! Tool wrapper for programmatic tool calling.
2
3use crate::program::ProgramCatalog;
4use crate::text::truncate_utf8;
5use crate::tools::types::{Tool, ToolContext, ToolOutput};
6use crate::tools::ToolRegistry;
7use anyhow::{anyhow, Result};
8use async_trait::async_trait;
9use rquickjs::function::{Async, Func};
10use rquickjs::{async_with, AsyncContext, AsyncRuntime, CatchResultExt, Error as JsError, Promise};
11use serde::Deserialize;
12use std::collections::HashSet;
13use std::sync::Arc;
14use std::time::Instant;
15use tokio::sync::Mutex;
16use tokio::time::{timeout, Duration};
17
18const DEFAULT_SCRIPT_TIMEOUT_MS: u64 = 30_000;
19/// Scripts allowed to delegate (`task`/`parallel_task`) run child agents that
20/// each take a full LLM turn, so they need a far more generous default timeout.
21const DELEGATION_SCRIPT_TIMEOUT_MS: u64 = 600_000;
22const DEFAULT_SCRIPT_MAX_TOOL_CALLS: usize = 20;
23const DEFAULT_SCRIPT_MAX_OUTPUT_BYTES: usize = 64 * 1024;
24const MAX_SCRIPT_SOURCE_BYTES: usize = 64 * 1024;
25
26pub struct ProgramTool {
27    registry: Arc<ToolRegistry>,
28}
29
30impl ProgramTool {
31    pub fn new(registry: Arc<ToolRegistry>) -> Self {
32        Self { registry }
33    }
34
35    pub fn with_catalog(registry: Arc<ToolRegistry>, _catalog: ProgramCatalog) -> Self {
36        Self { registry }
37    }
38}
39
40#[async_trait]
41impl Tool for ProgramTool {
42    fn name(&self) -> &str {
43        "program"
44    }
45
46    fn description(&self) -> &str {
47        "Run a sandboxed JavaScript PTC script. The script defines async function run(ctx, inputs) and may call only allowed ctx tools."
48    }
49
50    fn parameters(&self) -> serde_json::Value {
51        serde_json::json!({
52            "type": "object",
53            "additionalProperties": false,
54            "properties": {
55                "type": {
56                    "type": "string",
57                    "description": "Required. Program kind. Only \"script\" is supported.",
58                    "enum": ["script"]
59                },
60                "inputs": {
61                    "type": "object",
62                    "description": "Optional. JSON inputs passed to the script as the second argument."
63                },
64                "language": {
65                    "type": "string",
66                    "description": "Script language. Only JavaScript is supported.",
67                    "enum": ["javascript"]
68                },
69                "source": {
70                    "type": "string",
71                    "description": "Inline JavaScript source defining async function run(ctx, inputs)."
72                },
73                "path": {
74                    "type": "string",
75                    "description": "Workspace-relative path to a .js or .mjs script defining async function run(ctx, inputs). Used when source is omitted."
76                },
77                "allowed_tools": {
78                    "type": "array",
79                    "description": "Tool names the script may call through ctx. Defaults to all registered tools except program.",
80                    "items": { "type": "string" }
81                },
82                "limits": {
83                    "type": "object",
84                    "description": "Optional timeoutMs, maxToolCalls, and maxOutputBytes.",
85                    "additionalProperties": false,
86                    "properties": {
87                        "timeoutMs": { "type": "integer", "minimum": 1 },
88                        "maxToolCalls": { "type": "integer", "minimum": 1 },
89                        "maxOutputBytes": { "type": "integer", "minimum": 1 }
90                    }
91                }
92            },
93            "required": ["type"]
94        })
95    }
96
97    async fn execute(&self, args: &serde_json::Value, ctx: &ToolContext) -> Result<ToolOutput> {
98        let Some(kind) = args.get("type").and_then(|value| value.as_str()) else {
99            return Ok(ToolOutput::error("type parameter is required"));
100        };
101        if kind != "script" {
102            return Ok(ToolOutput::error(format!(
103                "Unsupported program type: {kind}. Only \"script\" is supported."
104            )));
105        }
106        let inputs = args
107            .get("inputs")
108            .cloned()
109            .unwrap_or_else(|| serde_json::json!({}));
110
111        execute_script_program(args, inputs, Arc::clone(&self.registry), ctx).await
112    }
113}
114
115#[derive(Debug, Deserialize)]
116#[serde(rename_all = "camelCase")]
117struct ScriptLimits {
118    timeout_ms: Option<u64>,
119    max_tool_calls: Option<usize>,
120    max_output_bytes: Option<usize>,
121}
122
123#[derive(Debug, Clone)]
124struct ScriptCallRecord {
125    tool_name: String,
126    success: bool,
127    exit_code: i32,
128    output_bytes: usize,
129    metadata: Option<serde_json::Value>,
130}
131
132async fn execute_script_program(
133    args: &serde_json::Value,
134    inputs: serde_json::Value,
135    registry: Arc<ToolRegistry>,
136    ctx: &ToolContext,
137) -> Result<ToolOutput> {
138    let language = args
139        .get("language")
140        .and_then(|value| value.as_str())
141        .unwrap_or("javascript");
142    if language != "javascript" {
143        return Ok(ToolOutput::error(format!(
144            "Unsupported script language: {language}"
145        )));
146    }
147
148    let source = match load_script_source(args, ctx).await {
149        Ok(source) => source,
150        Err(message) => return Ok(ToolOutput::error(message)),
151    };
152    if source.len() > MAX_SCRIPT_SOURCE_BYTES {
153        return Ok(ToolOutput::error(format!(
154            "script source is too large: {} bytes exceeds {} bytes",
155            source.len(),
156            MAX_SCRIPT_SOURCE_BYTES
157        )));
158    }
159    if let Err(message) = validate_script_source(&source) {
160        return Ok(ToolOutput::error(message));
161    }
162
163    let allowed_tools = script_allowed_tools(args, &registry);
164    let limits = script_limits(args);
165    match run_quickjs_script(
166        &source,
167        inputs,
168        registry,
169        ctx.clone(),
170        allowed_tools,
171        limits,
172    )
173    .await
174    {
175        Ok(output) => Ok(output),
176        Err(err) => Ok(ToolOutput::error(format!("program script failed: {err}"))),
177    }
178}
179
180async fn load_script_source(
181    args: &serde_json::Value,
182    ctx: &ToolContext,
183) -> std::result::Result<String, String> {
184    if let Some(source) = args.get("source").and_then(|value| value.as_str()) {
185        return Ok(source.to_string());
186    }
187
188    let Some(path) = args.get("path").and_then(|value| value.as_str()) else {
189        return Err("program script requires either source or path".to_string());
190    };
191    if !(path.ends_with(".js") || path.ends_with(".mjs")) {
192        return Err("program script path must point to a .js or .mjs file".to_string());
193    }
194
195    let workspace_path = ctx
196        .resolve_workspace_path(path)
197        .map_err(|err| format!("failed to resolve script path: {err}"))?;
198    ctx.workspace_services
199        .fs()
200        .read_text(&workspace_path)
201        .await
202        .map_err(|err| format!("failed to read script path '{}': {err}", path))
203}
204
205fn script_allowed_tools(args: &serde_json::Value, registry: &ToolRegistry) -> HashSet<String> {
206    let mut allowed = args
207        .get("allowed_tools")
208        .and_then(|value| value.as_array())
209        .map(|items| {
210            items
211                .iter()
212                .filter_map(|item| item.as_str())
213                .map(ToString::to_string)
214                .collect::<HashSet<_>>()
215        })
216        .unwrap_or_else(|| registry.list().into_iter().collect());
217
218    allowed.remove("program");
219    // `task`/`parallel_task` ARE allowed in PTC scripts now: host tool calls run
220    // on the outer multi-threaded runtime (see execute_host_tool_json), so
221    // `ctx.tool("parallel_task", …)` fans out child agents in parallel.
222    allowed
223}
224
225fn script_limits(args: &serde_json::Value) -> ScriptLimits {
226    args.get("limits")
227        .cloned()
228        .and_then(|value| serde_json::from_value(value).ok())
229        .unwrap_or(ScriptLimits {
230            timeout_ms: None,
231            max_tool_calls: None,
232            max_output_bytes: None,
233        })
234}
235
236fn validate_script_source(source: &str) -> std::result::Result<(), String> {
237    let forbidden = [
238        ("import ", "imports are not allowed inside PTC scripts"),
239        (
240            "import(",
241            "dynamic imports are not allowed inside PTC scripts",
242        ),
243        ("eval(", "eval is not allowed inside PTC scripts"),
244        (
245            "Function(",
246            "Function constructor is not allowed inside PTC scripts",
247        ),
248        ("Worker(", "Worker is not allowed inside PTC scripts"),
249        ("WebSocket", "WebSocket is not allowed inside PTC scripts"),
250        (
251            "fetch(",
252            "fetch is not allowed inside PTC scripts; use ctx tools instead",
253        ),
254    ];
255
256    for (needle, message) in forbidden {
257        if source.contains(needle) {
258            return Err(message.to_string());
259        }
260    }
261    Ok(())
262}
263
264async fn run_quickjs_script(
265    source: &str,
266    inputs: serde_json::Value,
267    registry: Arc<ToolRegistry>,
268    ctx: ToolContext,
269    allowed_tools: HashSet<String>,
270    limits: ScriptLimits,
271) -> Result<ToolOutput> {
272    // A script that can delegate runs child agents (each a full LLM turn, often
273    // 30s to several minutes), so the 30s default is far too short and silently
274    // times out real workflows. Default delegation-capable scripts to a generous
275    // timeout; pure compute/search scripts keep the short default. An explicit
276    // limits.timeoutMs always wins.
277    let delegating = allowed_tools.contains("parallel_task") || allowed_tools.contains("task");
278    let timeout_ms = limits.timeout_ms.unwrap_or(if delegating {
279        DELEGATION_SCRIPT_TIMEOUT_MS
280    } else {
281        DEFAULT_SCRIPT_TIMEOUT_MS
282    });
283    let max_tool_calls = limits
284        .max_tool_calls
285        .unwrap_or(DEFAULT_SCRIPT_MAX_TOOL_CALLS);
286    let max_output_bytes = limits
287        .max_output_bytes
288        .unwrap_or(DEFAULT_SCRIPT_MAX_OUTPUT_BYTES);
289    let executable_source = script_source_with_host_entrypoint(source)?;
290    // Captured on the outer multi-threaded runtime (we're async here, before the
291    // VM's nested single-thread runtime is built) so host tool calls fan out.
292    let outer = tokio::runtime::Handle::current();
293    let state = Arc::new(Mutex::new(ScriptVmState {
294        registry,
295        ctx,
296        allowed_tools,
297        max_tool_calls,
298        max_output_bytes,
299        tool_calls: 0,
300        records: Vec::new(),
301        outer,
302    }));
303
304    let vm_state = Arc::clone(&state);
305    let result = timeout(
306        Duration::from_millis(timeout_ms),
307        tokio::task::spawn_blocking(move || {
308            let runtime = tokio::runtime::Builder::new_current_thread()
309                .enable_all()
310                .build()
311                .map_err(|err| anyhow!("failed to create program VM runtime: {err}"))?;
312            runtime.block_on(run_embedded_script(
313                executable_source,
314                inputs,
315                vm_state,
316                timeout_ms,
317            ))
318        }),
319    )
320    .await;
321
322    match result {
323        Ok(Ok(Ok(result))) => {
324            let records = state.lock().await.records.clone();
325            let output = render_script_output(&result, &records, "");
326            Ok(ToolOutput::success(output).with_metadata(serde_json::json!({
327                "program": {
328                    "name": "script",
329                    "language": "javascript",
330                    "runtime": "embedded-quickjs",
331                    "success": true,
332                    "tool_calls": records.iter().map(script_record_to_value).collect::<Vec<_>>(),
333                },
334                "script_result": result,
335            })))
336        }
337        Ok(Ok(Err(err))) if is_quickjs_timeout(&err) => Ok(ToolOutput::error(format!(
338            "program script timed out after {timeout_ms} ms"
339        ))),
340        Ok(Ok(Err(err))) => Ok(ToolOutput::error(format!("program script error:\n{err}"))),
341        Ok(Err(err)) => Ok(ToolOutput::error(format!(
342            "program VM thread failed: {err}"
343        ))),
344        Err(_) => Ok(ToolOutput::error(format!(
345            "program script timed out after {timeout_ms} ms"
346        ))),
347    }
348}
349
350fn script_source_with_host_entrypoint(source: &str) -> Result<String> {
351    let rewritten = if source.contains("export default async function run") {
352        source.replacen("export default async function run", "async function run", 1)
353    } else if source.contains("export default function run") {
354        source.replacen("export default function run", "function run", 1)
355    } else if source.contains("async function run") || source.contains("function run") {
356        source.to_string()
357    } else {
358        return Err(anyhow!(
359            "PTC script must define async function run(ctx, inputs)"
360        ));
361    };
362
363    Ok(format!(
364        r#"{rewritten}
365
366globalThis.__a3sResultJson = (async () => JSON.stringify(await run(globalThis.__a3sCtx, globalThis.__a3sInputs)))();
367"#
368    ))
369}
370
371async fn run_embedded_script(
372    source: String,
373    inputs: serde_json::Value,
374    state: Arc<Mutex<ScriptVmState>>,
375    timeout_ms: u64,
376) -> Result<serde_json::Value> {
377    let runtime = AsyncRuntime::new()?;
378    let started = Instant::now();
379    runtime
380        .set_interrupt_handler(Some(Box::new(move || {
381            started.elapsed() >= Duration::from_millis(timeout_ms)
382        })))
383        .await;
384    runtime.set_memory_limit(64 * 1024 * 1024).await;
385    runtime.set_max_stack_size(512 * 1024).await;
386
387    let context = AsyncContext::full(&runtime).await?;
388    let inputs_json = serde_json::to_string(&inputs)?;
389    let script = format!("{}\n{}", embedded_script_bootstrap(&inputs_json), source);
390    let result_json = async_with!(context => |ctx| {
391        let state = Arc::clone(&state);
392        let host_tool = move |tool: String, args_json: String| {
393            let state = Arc::clone(&state);
394            async move { execute_host_tool_json(state, tool, args_json).await }
395        };
396        if let Err(err) = ctx.globals().set("__a3sHostTool", Func::from(Async(host_tool))) {
397            return Err(format!("failed to install program host tool: {err}"));
398        }
399        let promise: Promise = match ctx.eval(script) {
400            Ok(promise) => promise,
401            Err(err) => return Err(format!("failed to evaluate program script: {err}")),
402        };
403        promise
404            .into_future::<String>()
405            .await
406            .catch(&ctx)
407            .map_err(|err| err.to_string())
408    })
409    .await
410    .map_err(anyhow::Error::msg)?;
411
412    serde_json::from_str(&result_json)
413        .map_err(|err| anyhow!("program script returned invalid JSON: {err}"))
414}
415
416struct ScriptVmState {
417    registry: Arc<ToolRegistry>,
418    ctx: ToolContext,
419    allowed_tools: HashSet<String>,
420    max_tool_calls: usize,
421    max_output_bytes: usize,
422    tool_calls: usize,
423    records: Vec<ScriptCallRecord>,
424    /// Handle to the OUTER multi-threaded session runtime. The script VM runs on
425    /// a nested single-thread runtime; host tool calls are dispatched here so
426    /// delegation tools (`parallel_task`/`task`) can actually fan out children.
427    outer: tokio::runtime::Handle,
428}
429
430fn embedded_script_bootstrap(inputs_json: &str) -> String {
431    format!(
432        r#"
433const __a3sCallTool = async (tool, args = {{}}) => {{
434  const response = await globalThis.__a3sHostTool(String(tool), JSON.stringify(args ?? {{}}));
435  return JSON.parse(response);
436}};
437
438const __a3sCtx = Object.freeze({{
439  tool: __a3sCallTool,
440  readFile: (path) => __a3sCallTool("read", {{ file_path: path }}).then((r) => r.output),
441  read: (path) => __a3sCallTool("read", {{ file_path: path }}),
442  grep: (pattern, options = {{}}) => __a3sCallTool("grep", {{ pattern, ...options }}).then((r) => r.output),
443  glob: (pattern, options = {{}}) => __a3sCallTool("glob", {{ pattern, ...options }}).then((r) => r.output),
444  ls: (path = ".") => __a3sCallTool("ls", {{ path }}).then((r) => r.output),
445  bash: (command) => __a3sCallTool("bash", {{ command }}).then((r) => r.output),
446  git: (args = {{}}) => __a3sCallTool("git", args),
447  webSearch: (params) => __a3sCallTool("web_search", params),
448  verify: (args) => __a3sCallTool("bash", args),
449}});
450
451Object.defineProperty(globalThis, "__a3sCtx", {{ value: __a3sCtx, configurable: false }});
452Object.defineProperty(globalThis, "__a3sInputs", {{ value: {inputs_json}, configurable: false }});
453Object.defineProperty(globalThis, "fetch", {{ value: undefined, configurable: false, writable: false }});
454Object.defineProperty(globalThis, "WebSocket", {{ value: undefined, configurable: false, writable: false }});
455Object.defineProperty(globalThis, "Worker", {{ value: undefined, configurable: false, writable: false }});
456"#
457    )
458}
459
460async fn execute_host_tool_json(
461    state: Arc<Mutex<ScriptVmState>>,
462    tool: String,
463    args_json: String,
464) -> rquickjs::Result<String> {
465    let args = serde_json::from_str(&args_json).map_err(|err| {
466        JsError::new_from_js_message("string", "object", format!("invalid tool args JSON: {err}"))
467    })?;
468    let (registry, ctx, max_output_bytes, outer) = {
469        let mut script = state.lock().await;
470        if !script.allowed_tools.contains(&tool) {
471            return Err(JsError::new_from_js_message(
472                "tool",
473                "allowed tool",
474                format!("tool '{tool}' is not allowed for this PTC script"),
475            ));
476        }
477        script.tool_calls += 1;
478        if script.tool_calls > script.max_tool_calls {
479            return Err(JsError::new_from_js_message(
480                "tool call",
481                "limited tool call",
482                format!("PTC script exceeded maxToolCalls={}", script.max_tool_calls),
483            ));
484        }
485        (
486            Arc::clone(&script.registry),
487            script.ctx.clone(),
488            script.max_output_bytes,
489            script.outer.clone(),
490        )
491    };
492
493    // Run the tool on the OUTER multi-threaded runtime (not this nested
494    // single-thread VM runtime) so delegation tools can spawn child agents that
495    // actually run in parallel — `ctx.tool("parallel_task", …)` now fans out.
496    let tool_for_spawn = tool.clone();
497    let result = outer
498        .spawn(async move {
499            registry
500                .execute_with_context(&tool_for_spawn, &args, &ctx)
501                .await
502        })
503        .await
504        .map_err(|err| JsError::new_from_js_message("tool", "spawn", err.to_string()))?
505        .map_err(|err| JsError::new_from_js_message("tool", "result", err.to_string()))?;
506    let mut output = result.output;
507    if output.len() > max_output_bytes {
508        output = truncate_utf8(&output, max_output_bytes).to_string();
509    }
510    let success = result.exit_code == 0;
511    let metadata = result.metadata.clone();
512    let exit_code = result.exit_code;
513    let name = result.name;
514
515    {
516        let mut script = state.lock().await;
517        script.records.push(ScriptCallRecord {
518            tool_name: tool,
519            success,
520            exit_code,
521            output_bytes: output.len(),
522            metadata: metadata.clone(),
523        });
524    }
525
526    serde_json::to_string(&serde_json::json!({
527        "name": name,
528        "output": output,
529        "exitCode": exit_code,
530        "metadata": metadata,
531    }))
532    .map_err(|err| JsError::new_from_js_message("tool result", "json", err.to_string()))
533}
534
535fn is_quickjs_timeout(err: &anyhow::Error) -> bool {
536    let text = err.to_string();
537    text.contains("interrupted") || text.contains("InternalError")
538}
539
540fn script_record_to_value(record: &ScriptCallRecord) -> serde_json::Value {
541    serde_json::json!({
542        "tool_name": record.tool_name,
543        "success": record.success,
544        "exit_code": record.exit_code,
545        "output_bytes": record.output_bytes,
546        "metadata": record.metadata,
547    })
548}
549
550fn render_script_output(
551    result: &serde_json::Value,
552    records: &[ScriptCallRecord],
553    stderr: &str,
554) -> String {
555    let mut output = String::from("Program script completed.");
556    if let Some(summary) = result.get("summary").and_then(|value| value.as_str()) {
557        output.push('\n');
558        output.push_str(summary);
559    }
560
561    output.push_str(&format!("\n\nTool calls: {}", records.len()));
562    for (index, record) in records.iter().enumerate() {
563        output.push_str(&format!(
564            "\n{}. {} ({}, exit_code={}, output_bytes={})",
565            index + 1,
566            record.tool_name,
567            if record.success { "ok" } else { "failed" },
568            record.exit_code,
569            record.output_bytes
570        ));
571    }
572
573    output.push_str("\n\nResult:\n");
574    output.push_str(&serde_json::to_string_pretty(result).unwrap_or_else(|_| result.to_string()));
575
576    if !stderr.is_empty() {
577        output.push_str("\n\nstderr:\n");
578        output.push_str(stderr);
579    }
580
581    output
582}
583
584#[cfg(test)]
585mod tests {
586    use super::*;
587    use async_trait::async_trait;
588    use std::path::PathBuf;
589
590    struct EchoTool;
591
592    #[async_trait]
593    impl Tool for EchoTool {
594        fn name(&self) -> &str {
595            "echo"
596        }
597
598        fn description(&self) -> &str {
599            "Echo test tool"
600        }
601
602        fn parameters(&self) -> serde_json::Value {
603            serde_json::json!({
604                "type": "object",
605                "properties": {
606                    "message": { "type": "string" }
607                }
608            })
609        }
610
611        async fn execute(
612            &self,
613            args: &serde_json::Value,
614            _ctx: &ToolContext,
615        ) -> Result<ToolOutput> {
616            let message = args
617                .get("message")
618                .and_then(|value| value.as_str())
619                .unwrap_or("");
620            Ok(ToolOutput::success(format!("echo:{message}")))
621        }
622    }
623
624    #[tokio::test]
625    async fn program_tool_rejects_non_script_type() {
626        let tool = ProgramTool::new(Arc::new(ToolRegistry::new(PathBuf::from("/tmp"))));
627        let output = tool
628            .execute(
629                &serde_json::json!({ "type": "program_code_search" }),
630                &ToolContext::new(PathBuf::from("/tmp")),
631            )
632            .await
633            .unwrap();
634
635        assert!(!output.success);
636        assert!(output.content.contains("Only \"script\" is supported"));
637    }
638
639    #[tokio::test]
640    async fn program_tool_rejects_missing_script_source_and_path() {
641        let tool = ProgramTool::new(Arc::new(ToolRegistry::new(PathBuf::from("/tmp"))));
642        let output = tool
643            .execute(
644                &serde_json::json!({ "type": "script" }),
645                &ToolContext::new(PathBuf::from("/tmp")),
646            )
647            .await
648            .unwrap();
649
650        assert!(!output.success);
651        assert!(output.content.contains("requires either source or path"));
652    }
653
654    #[tokio::test]
655    async fn program_tool_rejects_unsupported_language() {
656        let tool = ProgramTool::new(Arc::new(ToolRegistry::new(PathBuf::from("/tmp"))));
657        let output = tool
658            .execute(
659                &serde_json::json!({
660                    "type": "script",
661                    "language": "typescript",
662                    "source": "async function run() { return {}; }"
663                }),
664                &ToolContext::new(PathBuf::from("/tmp")),
665            )
666            .await
667            .unwrap();
668
669        assert!(!output.success);
670        assert!(output.content.contains("Unsupported script language"));
671    }
672
673    #[tokio::test]
674    async fn program_tool_rejects_unsupported_script_path() {
675        let dir = tempfile::tempdir().unwrap();
676        std::fs::write(dir.path().join("script.txt"), "async function run() {}").unwrap();
677        let tool = ProgramTool::new(Arc::new(ToolRegistry::new(dir.path().to_path_buf())));
678        let output = tool
679            .execute(
680                &serde_json::json!({
681                    "type": "script",
682                    "path": "script.txt"
683                }),
684                &ToolContext::new(dir.path().to_path_buf()),
685            )
686            .await
687            .unwrap();
688
689        assert!(!output.success);
690        assert!(output.content.contains(".js or .mjs file"));
691    }
692
693    #[test]
694    fn program_tool_default_allowed_tools_include_registry_tools_except_program() {
695        let registry = ToolRegistry::new(PathBuf::from("/tmp"));
696        registry.register(Arc::new(EchoTool));
697        registry.register_builtin(Arc::new(ProgramTool::new(Arc::new(ToolRegistry::new(
698            PathBuf::from("/tmp"),
699        )))));
700
701        let allowed = script_allowed_tools(&serde_json::json!({}), &registry);
702
703        assert!(allowed.contains("echo"));
704        assert!(!allowed.contains("program"));
705    }
706
707    #[test]
708    fn program_tool_allows_delegation_tools_in_scripts() {
709        // Delegation tools are allowed in PTC scripts again (host tool calls run
710        // on the outer multi-threaded runtime, so they fan out). Only `program`
711        // stays stripped (no nested PTC recursion).
712        let registry = ToolRegistry::new(PathBuf::from("/tmp"));
713        let args = serde_json::json!({
714            "allowed_tools": ["parallel_task", "task", "program", "echo"]
715        });
716        let allowed = script_allowed_tools(&args, &registry);
717        assert!(allowed.contains("parallel_task"));
718        assert!(allowed.contains("task"));
719        assert!(allowed.contains("echo"));
720        assert!(!allowed.contains("program"));
721    }
722
723    #[tokio::test]
724    async fn program_tool_source_uses_default_all_registered_tools() {
725        let registry = Arc::new(ToolRegistry::new(PathBuf::from("/tmp")));
726        registry.register(Arc::new(EchoTool));
727        let tool = ProgramTool::new(Arc::clone(&registry));
728        let output = tool
729            .execute(
730                &serde_json::json!({
731                    "type": "script",
732                    "source": r#"
733                        async function run(ctx, inputs) {
734                            const result = await ctx.tool("echo", { message: inputs.message });
735                            return { summary: result.output, result };
736                        }
737                    "#,
738                    "inputs": { "message": "hello" }
739                }),
740                &ToolContext::new(PathBuf::from("/tmp")),
741            )
742            .await
743            .unwrap();
744
745        assert!(output.success, "{}", output.content);
746        assert!(output.content.contains("echo:hello"));
747        let metadata = output.metadata.unwrap();
748        assert_eq!(metadata["program"]["runtime"], "embedded-quickjs");
749        assert_eq!(metadata["script_result"]["summary"], "echo:hello");
750    }
751
752    #[tokio::test]
753    async fn program_tool_explicit_allowed_tools_restrict_default_tools() {
754        let registry = Arc::new(ToolRegistry::new(PathBuf::from("/tmp")));
755        registry.register(Arc::new(EchoTool));
756        let tool = ProgramTool::new(Arc::clone(&registry));
757        let output = tool
758            .execute(
759                &serde_json::json!({
760                    "type": "script",
761                    "source": r#"
762                        async function run(ctx) {
763                            await ctx.tool("echo", { message: "blocked" });
764                            return {};
765                        }
766                    "#,
767                    "allowed_tools": ["read"]
768                }),
769                &ToolContext::new(PathBuf::from("/tmp")),
770            )
771            .await
772            .unwrap();
773
774        assert!(!output.success);
775        assert!(output.content.contains("tool 'echo' is not allowed"));
776    }
777
778    #[tokio::test]
779    async fn program_tool_enforces_max_tool_calls() {
780        let registry = Arc::new(ToolRegistry::new(PathBuf::from("/tmp")));
781        registry.register(Arc::new(EchoTool));
782        let tool = ProgramTool::new(Arc::clone(&registry));
783        let output = tool
784            .execute(
785                &serde_json::json!({
786                    "type": "script",
787                    "source": r#"
788                        async function run(ctx) {
789                            await ctx.tool("echo", { message: "one" });
790                            await ctx.tool("echo", { message: "two" });
791                            return {};
792                        }
793                    "#,
794                    "limits": { "maxToolCalls": 1 }
795                }),
796                &ToolContext::new(PathBuf::from("/tmp")),
797            )
798            .await
799            .unwrap();
800
801        assert!(!output.success);
802        assert!(output.content.contains("exceeded maxToolCalls=1"));
803    }
804
805    #[test]
806    fn program_tool_rejects_fetch_source_access() {
807        let err =
808            validate_script_source("export default async function run() { return fetch('/'); }")
809                .unwrap_err();
810        assert!(err.contains("fetch is not allowed"));
811    }
812
813    #[test]
814    fn program_tool_accepts_plain_function_run_entrypoint() {
815        let source = script_source_with_host_entrypoint(
816            "async function run(ctx, inputs) { return { summary: inputs.message }; }",
817        )
818        .unwrap();
819
820        assert!(source.contains("globalThis.__a3sResultJson"));
821        assert!(source.contains("async function run"));
822    }
823
824    #[test]
825    fn program_tool_renders_result_summary_and_tool_records() {
826        let output = render_script_output(
827            &serde_json::json!({ "summary": "done", "items": [1] }),
828            &[ScriptCallRecord {
829                tool_name: "echo".to_string(),
830                success: true,
831                exit_code: 0,
832                output_bytes: 8,
833                metadata: Some(serde_json::json!({ "kind": "test" })),
834            }],
835            "",
836        );
837
838        assert!(output.contains("Program script completed."));
839        assert!(output.contains("done"));
840        assert!(output.contains("echo (ok"));
841        assert!(output.contains("\"items\""));
842    }
843}