agentwerk/tools/
tool.rs

1//! Core tool infrastructure: the `ToolLike` trait, the ad-hoc `Tool` struct, and the registry the loop consults before each provider call.
2
3use std::collections::HashSet;
4use std::future::Future;
5use std::path::{Path, PathBuf};
6use std::pin::Pin;
7use std::sync::atomic::{AtomicBool, Ordering};
8use std::sync::{Arc, Mutex};
9
10use serde::{Deserialize, Serialize};
11use serde_json::Value;
12
13use crate::agents::knowledge::Knowledge;
14use crate::agents::tickets::TicketSystem;
15use crate::providers::types::ContentBlock;
16use crate::providers::{ProviderResult, ProviderToolDefinition};
17
18use super::error::ToolError;
19
20/// Per-tool ceiling on tool-result bytes. Outputs larger than this are
21/// offloaded to `<ticket-dir>/outputs/<tool_use_id>.txt` and replaced
22/// with a stub. ~12.5K tokens at the `bytes/4` estimator.
23const PER_TOOL_CAP: usize = 50_000;
24
25/// Aggregate ceiling on a single turn's tool-result bytes. When parallel
26/// tools each return moderate but legal sizes, this cap offloads the
27/// largest until the turn is under budget. ~50K tokens.
28const PER_TURN_CAP: usize = 200_000;
29
30/// Bytes of original output retained in the stub preview. Snapped to
31/// the last newline within the window when one exists, otherwise
32/// floored to a UTF-8 boundary so multi-byte code points are never
33/// split.
34const PREVIEW_CHARS: usize = 2_000;
35
36/// Context passed to tool execution. `tool_registry` and the ticket-side
37/// fields are ambient internals — only the built-in `ToolSearchTool` and
38/// the ticket tools (`Read`/`Write`/`Manage`) read them. External tool
39/// authors use `dir`, `interrupt_signal`, and
40/// `wait_for_cancel`.
41#[derive(Clone)]
42pub struct ToolContext {
43    /// Directory the tool runs in. Tools that touch the filesystem
44    /// should resolve relative paths against this.
45    pub dir: PathBuf,
46    pub interrupt_signal: Arc<AtomicBool>,
47    pub(crate) tool_registry: Option<Arc<ToolRegistry>>,
48    pub(crate) ticket_system: Option<Arc<TicketSystem>>,
49    pub(crate) agent_name: Option<String>,
50    pub(crate) ticket_key: Option<String>,
51    pub(crate) knowledge: Option<Arc<Knowledge>>,
52}
53
54impl ToolContext {
55    /// A fresh context rooted at `dir`, with no registry handle and
56    /// a fresh never-firing cancel signal. Tools that search the registry
57    /// need a context installed by the loop; bare contexts are for
58    /// standalone use and tests.
59    pub fn new(dir: PathBuf) -> Self {
60        Self {
61            dir,
62            interrupt_signal: Arc::new(AtomicBool::new(false)),
63            tool_registry: None,
64            ticket_system: None,
65            agent_name: None,
66            ticket_key: None,
67            knowledge: None,
68        }
69    }
70
71    /// Override the cancel signal — typically the one shared by the loop's
72    /// `TicketSystem` so tools cooperate with run-level cancellation.
73    pub fn interrupt_signal(mut self, signal: Arc<AtomicBool>) -> Self {
74        self.interrupt_signal = signal;
75        self
76    }
77
78    pub(crate) fn registry(mut self, registry: Arc<ToolRegistry>) -> Self {
79        self.tool_registry = Some(registry);
80        self
81    }
82
83    pub(crate) fn ticket_system(mut self, system: Arc<TicketSystem>) -> Self {
84        self.ticket_system = Some(system);
85        self
86    }
87
88    pub(crate) fn agent_name(mut self, name: String) -> Self {
89        self.agent_name = Some(name);
90        self
91    }
92
93    pub(crate) fn ticket_key(mut self, key: String) -> Self {
94        self.ticket_key = Some(key);
95        self
96    }
97
98    pub(crate) fn knowledge(mut self, knowledge: Arc<Knowledge>) -> Self {
99        self.knowledge = Some(knowledge);
100        self
101    }
102
103    pub(crate) fn ticket_system_handle(&self) -> Option<&Arc<TicketSystem>> {
104        self.ticket_system.as_ref()
105    }
106
107    pub(crate) fn agent_name_str(&self) -> Option<&str> {
108        self.agent_name.as_deref()
109    }
110
111    /// Future that resolves once the current run is cancelled. Pair with
112    /// `tokio::select!` to drop the losing branch on cancel; dropped futures
113    /// cascade to `reqwest` aborts and (with `kill_on_drop(true)`) subprocess
114    /// kills. With a fresh context the future stays pending forever and the
115    /// `select!` degrades to a plain await.
116    pub async fn wait_for_cancel(&self) {
117        const POLL: std::time::Duration = std::time::Duration::from_millis(50);
118        loop {
119            if self.interrupt_signal.load(Ordering::Relaxed) {
120                return;
121            }
122            tokio::time::sleep(POLL).await;
123        }
124    }
125
126    pub(crate) fn mark_tool_discovered(&self, name: &str) {
127        if let Some(registry) = self.tool_registry.as_ref() {
128            registry.mark_discovered(name);
129        }
130    }
131}
132
133impl std::fmt::Debug for ToolContext {
134    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
135        f.debug_struct("ToolContext")
136            .field("dir", &self.dir)
137            .field("has_registry", &self.tool_registry.is_some())
138            .field("has_ticket_system", &self.ticket_system.is_some())
139            .finish()
140    }
141}
142
143/// A tool call extracted from a provider response.
144#[derive(Debug, Clone)]
145pub struct ToolCall {
146    /// Provider-assigned call id, echoed back in the tool result block.
147    pub id: String,
148    /// Name of the tool the model chose.
149    pub name: String,
150    /// Arguments the model supplied, conforming to the tool's input schema.
151    pub input: Value,
152}
153
154/// Outcome of a tool execution: a success payload, a generic error
155/// message, or a schema-validation failure. All three flow back to
156/// the model as ordinary content blocks; the [`SchemaError`] variant
157/// is distinguished so the loop can apply
158/// `policies.max_schema_retries` to it specifically.
159///
160/// [`SchemaError`]: ToolResult::SchemaError
161#[derive(Debug, Clone, Serialize, Deserialize)]
162pub enum ToolResult {
163    /// Tool ran and produced this text payload.
164    Success(String),
165    /// Tool failed; the message is shown to the model so it can recover.
166    Error(String),
167    /// Tool rejected the input because it failed schema validation.
168    /// The message lists the violations so the model can fix them.
169    SchemaError(String),
170}
171
172impl ToolResult {
173    /// Build a successful result from a text payload.
174    pub fn success(content: impl Into<String>) -> Self {
175        Self::Success(content.into())
176    }
177
178    /// Build an error result. The message is visible to the model.
179    pub fn error(content: impl Into<String>) -> Self {
180        Self::Error(content.into())
181    }
182
183    /// Build a schema-validation failure. Mapped into
184    /// [`ToolError::SchemaValidationFailed`] by the registry and
185    /// counted against `policies.max_schema_retries`.
186    pub fn schema_error(content: impl Into<String>) -> Self {
187        Self::SchemaError(content.into())
188    }
189}
190
191/// The core tool interface. Object-safe via boxed futures.
192///
193/// Implement this on any type you want an agent to be able to invoke. For
194/// ad-hoc tools defined inline, use the [`Tool`] struct's builder
195/// (`Tool::new(name, description).schema(...).handler(...)`).
196pub trait ToolLike: Send + Sync {
197    /// Unique name the model uses to call the tool.
198    fn name(&self) -> &str;
199
200    /// Human-readable description shown to the model.
201    fn description(&self) -> &str;
202
203    /// JSON Schema describing the tool's arguments.
204    fn input_schema(&self) -> Value;
205
206    /// Whether this tool has no side effects. Read-only tools in the same
207    /// turn run concurrently; non-read-only tools run serially. Default: `false`.
208    fn is_read_only(&self) -> bool {
209        false
210    }
211
212    /// Whether the tool's full definition is hidden until it is discovered
213    /// via `ToolSearchTool`. Deferred tools appear to the model as
214    /// name-only stubs. Default: `false`.
215    fn should_defer(&self) -> bool {
216        false
217    }
218
219    /// Run the tool. The future is held by the agent loop and dropped on
220    /// cancellation; pair long-running work with [`ToolContext::wait_for_cancel`]
221    /// in a `tokio::select!` to drop the losing branch promptly.
222    fn call<'a>(
223        &'a self,
224        input: Value,
225        ctx: &'a ToolContext,
226    ) -> Pin<Box<dyn Future<Output = ProviderResult<ToolResult>> + Send + 'a>>;
227}
228
229/// Registry of tools available to an agent. Also owns the set of deferred
230/// tools discovered during the run: cloning the registry starts with an
231/// empty set.
232pub(crate) struct ToolRegistry {
233    pub(crate) tools: Vec<Arc<dyn ToolLike>>,
234    pub(crate) discovered: Mutex<HashSet<String>>,
235}
236
237impl std::fmt::Debug for ToolRegistry {
238    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
239        let names: Vec<&str> = self.tools.iter().map(|t| t.name()).collect();
240        f.debug_struct("ToolRegistry")
241            .field("tools", &names)
242            .finish()
243    }
244}
245
246impl Default for ToolRegistry {
247    fn default() -> Self {
248        Self {
249            tools: Vec::new(),
250            discovered: Mutex::new(HashSet::new()),
251        }
252    }
253}
254
255impl ToolRegistry {
256    pub(crate) fn register(&mut self, tool: impl ToolLike + 'static) {
257        self.tools.push(Arc::new(tool));
258    }
259
260    pub(crate) fn get(&self, name: &str) -> Option<Arc<dyn ToolLike>> {
261        let name = name.trim();
262        self.tools.iter().find(|t| t.name() == name).cloned()
263    }
264
265    pub(crate) fn mark_discovered(&self, name: &str) {
266        self.discovered.lock().unwrap().insert(name.to_string());
267    }
268
269    /// Tool definitions sent to the provider. Deferred tools that haven't
270    /// been discovered yet get name-only stubs; all others get full
271    /// definitions.
272    pub(crate) fn definitions(&self) -> Vec<ProviderToolDefinition> {
273        let discovered = self.discovered.lock().unwrap();
274        self.tools
275            .iter()
276            .map(|t| {
277                if t.should_defer() && !discovered.contains(t.name()) {
278                    ProviderToolDefinition {
279                        name: t.name().to_string(),
280                        description: String::new(),
281                        input_schema: serde_json::json!({}),
282                    }
283                } else {
284                    ProviderToolDefinition {
285                        name: t.name().to_string(),
286                        description: t.description().to_string(),
287                        input_schema: t.input_schema(),
288                    }
289                }
290            })
291            .collect()
292    }
293
294    /// Search tools by query string. Returns matches sorted by relevance
295    /// (highest first).
296    pub(crate) fn search(&self, query: &str) -> Vec<ProviderToolDefinition> {
297        let query_lower = query.to_lowercase();
298        let mut scored: Vec<(ProviderToolDefinition, u32)> = self
299            .tools
300            .iter()
301            .filter_map(|t| {
302                let mut score = 0u32;
303                let name = t.name().to_lowercase();
304                let desc = t.description().to_lowercase();
305
306                if name == query_lower {
307                    score += 100;
308                } else if name.contains(&query_lower) {
309                    score += 50;
310                }
311
312                if desc.contains(&query_lower) {
313                    score += 25;
314                }
315
316                if score > 0 {
317                    Some((
318                        ProviderToolDefinition {
319                            name: t.name().to_string(),
320                            description: t.description().to_string(),
321                            input_schema: t.input_schema(),
322                        },
323                        score,
324                    ))
325                } else {
326                    None
327                }
328            })
329            .collect();
330
331        scored.sort_by(|a, b| b.1.cmp(&a.1));
332        scored.into_iter().map(|(def, _)| def).collect()
333    }
334
335    /// Execute tool calls with concurrent read-only batching and serial
336    /// write execution.
337    ///
338    /// Returns `(ContentBlock, Result<String, ToolError>, Option<PathBuf>)`
339    /// triples so the caller can read the model-visible result block,
340    /// the typed verdict, and the offload path for oversized outputs.
341    pub(crate) async fn execute(
342        &self,
343        calls: &[ToolCall],
344        ctx: &ToolContext,
345    ) -> Vec<(
346        ContentBlock,
347        std::result::Result<String, ToolError>,
348        Option<PathBuf>,
349    )> {
350        let batches = partition_tool_calls(calls, self);
351        let mut results: Vec<(
352            ContentBlock,
353            std::result::Result<String, ToolError>,
354            Option<PathBuf>,
355        )> = Vec::new();
356        let semaphore = Arc::new(tokio::sync::Semaphore::new(10));
357
358        for batch in batches {
359            match batch {
360                ToolBatch::Concurrent(calls) => {
361                    let mut set = tokio::task::JoinSet::new();
362                    for call in calls {
363                        let sem = semaphore.clone();
364                        let ctx = ctx.clone();
365                        let tool_arc = self.get(&call.name);
366                        let call_id = call.id.clone();
367                        let call_name = call.name.clone();
368                        let input = call.input.clone();
369
370                        set.spawn(async move {
371                            let _permit = sem.acquire().await.unwrap();
372                            let outcome = invoke(tool_arc, &call_name, input, &ctx).await;
373                            let outcome = replace_empty_output(outcome, &call_name);
374                            let (outcome, path) =
375                                cap_oversized_result(outcome, &ctx, &call_id, PER_TOOL_CAP);
376                            (call_id, outcome, path)
377                        });
378                    }
379
380                    while let Some(join_result) = set.join_next().await {
381                        if let Ok((id, outcome, path)) = join_result {
382                            let block = content_block_for(&id, &outcome);
383                            results.push((block, outcome, path));
384                        }
385                    }
386                }
387                ToolBatch::Serial(call) => {
388                    let outcome =
389                        invoke(self.get(&call.name), &call.name, call.input.clone(), ctx).await;
390                    let outcome = replace_empty_output(outcome, &call.name);
391                    let (outcome, path) =
392                        cap_oversized_result(outcome, ctx, &call.id, PER_TOOL_CAP);
393                    let block = content_block_for(&call.id, &outcome);
394                    results.push((block, outcome, path));
395                }
396            }
397        }
398
399        cap_aggregate_outputs(&mut results, ctx, PER_TURN_CAP);
400
401        results
402    }
403}
404
405impl Clone for ToolRegistry {
406    fn clone(&self) -> Self {
407        Self {
408            tools: self.tools.clone(),
409            discovered: Mutex::new(HashSet::new()),
410        }
411    }
412}
413
414type ToolHandler = Box<
415    dyn Fn(
416            Value,
417            &ToolContext,
418        ) -> Pin<Box<dyn Future<Output = ProviderResult<ToolResult>> + Send + '_>>
419        + Send
420        + Sync,
421>;
422
423/// Ad-hoc tool defined inline with a closure handler.
424///
425/// Chain builder methods to configure, then hand the tool to an agent:
426/// ```ignore
427/// let greet = Tool::new("greet", "Say hello")
428///     .schema(serde_json::json!({"type": "object", "properties": {}}))
429///     .handler(|_input, _ctx| async {
430///         Ok(ToolResult::success("hi"))
431///     });
432/// ```
433///
434/// A handler is required — omitting [`Tool::handler`] causes the first
435/// invocation to panic. For tools with complex state, implement
436/// [`ToolLike`] directly on your own type instead.
437pub struct Tool {
438    name: String,
439    description: String,
440    schema: Value,
441    read_only: bool,
442    defer: bool,
443    handler: Option<ToolHandler>,
444}
445
446impl Tool {
447    /// A new tool with an empty-object input schema and no handler. Set the
448    /// handler with [`Tool::handler`] before handing the tool to an agent.
449    pub fn new(name: impl Into<String>, description: impl Into<String>) -> Self {
450        Self {
451            name: name.into(),
452            description: description.into(),
453            schema: serde_json::json!({"type": "object", "properties": {}}),
454            read_only: false,
455            defer: false,
456            handler: None,
457        }
458    }
459
460    /// Construct a `Tool` from a `.tool.json` definition. The returned tool
461    /// has its name, rendered description, input schema, and read-only flag
462    /// populated from the JSON; attach a handler via [`Tool::handler`]
463    /// before registering it with an agent. Panics on malformed JSON.
464    pub fn from_tool_file(json: &str) -> Self {
465        let tf = super::tool_file::ToolFile::parse(json);
466        Tool::new(tf.name.clone(), tf.render_markdown())
467            .schema(tf.input_schema.clone())
468            .read_only(tf.read_only)
469    }
470
471    /// Replace the input schema (JSON Schema). Defaults to an empty-object
472    /// schema.
473    pub fn schema(mut self, schema: Value) -> Self {
474        self.schema = schema;
475        self
476    }
477
478    /// Mark the tool read-only so the loop runs it concurrently with other
479    /// read-only calls in the same turn.
480    pub fn read_only(mut self, read_only: bool) -> Self {
481        self.read_only = read_only;
482        self
483    }
484
485    /// Hide the tool's full definition until it is discovered via
486    /// `ToolSearchTool`.
487    pub fn defer(mut self, defer: bool) -> Self {
488        self.defer = defer;
489        self
490    }
491
492    /// Install the closure that runs when the model calls this tool.
493    /// The closure may be a bare `async` block — the builder boxes the
494    /// returned future internally. Required: omitting this causes the
495    /// first invocation to panic.
496    pub fn handler<F, Fut>(mut self, f: F) -> Self
497    where
498        F: Fn(Value, ToolContext) -> Fut + Send + Sync + 'static,
499        Fut: Future<Output = ProviderResult<ToolResult>> + Send + 'static,
500    {
501        self.handler = Some(Box::new(move |v, c| Box::pin(f(v, c.clone()))));
502        self
503    }
504}
505
506impl ToolLike for Tool {
507    fn name(&self) -> &str {
508        &self.name
509    }
510
511    fn description(&self) -> &str {
512        &self.description
513    }
514
515    fn input_schema(&self) -> Value {
516        self.schema.clone()
517    }
518
519    fn is_read_only(&self) -> bool {
520        self.read_only
521    }
522
523    fn should_defer(&self) -> bool {
524        self.defer
525    }
526
527    fn call<'a>(
528        &'a self,
529        input: Value,
530        ctx: &'a ToolContext,
531    ) -> Pin<Box<dyn Future<Output = ProviderResult<ToolResult>> + Send + 'a>> {
532        let handler = self
533            .handler
534            .as_ref()
535            .expect("Tool requires a handler — set one via `.handler(...)` before use");
536        (handler)(input, ctx)
537    }
538}
539
540async fn invoke(
541    tool: Option<Arc<dyn ToolLike>>,
542    name: &str,
543    input: Value,
544    ctx: &ToolContext,
545) -> std::result::Result<String, ToolError> {
546    let Some(t) = tool else {
547        return Err(ToolError::ToolNotFound {
548            tool_name: name.into(),
549        });
550    };
551    match t.call(input, ctx).await {
552        Ok(ToolResult::Success(s)) => Ok(s),
553        Ok(ToolResult::Error(s)) => Err(ToolError::ExecutionFailed {
554            tool_name: name.into(),
555            message: s,
556        }),
557        Ok(ToolResult::SchemaError(s)) => Err(ToolError::SchemaValidationFailed {
558            tool_name: name.into(),
559            message: s,
560        }),
561        Err(e) => Err(ToolError::ExecutionFailed {
562            tool_name: name.into(),
563            message: e.to_string(),
564        }),
565    }
566}
567
568/// Substitute a placeholder when a tool returns an empty success
569/// payload. Empty `tool_result.content` has triggered provider sampling
570/// edge cases; the placeholder keeps the assistant turn well-formed.
571/// Errors and non-empty successes pass through.
572fn replace_empty_output(
573    outcome: std::result::Result<String, ToolError>,
574    tool_name: &str,
575) -> std::result::Result<String, ToolError> {
576    match outcome {
577        Ok(s) if s.is_empty() => Ok(format!("({tool_name} completed with no output)")),
578        other => other,
579    }
580}
581
582fn content_block_for(
583    tool_use_id: &str,
584    outcome: &std::result::Result<String, ToolError>,
585) -> ContentBlock {
586    let (content, succeeded) = match outcome {
587        Ok(s) => (s.clone(), true),
588        Err(e) => (e.message(), false),
589    };
590    ContentBlock::ToolResult {
591        tool_use_id: tool_use_id.to_string(),
592        content,
593        succeeded,
594    }
595}
596
597enum ToolBatch {
598    Concurrent(Vec<ToolCall>),
599    Serial(ToolCall),
600}
601
602fn partition_tool_calls(calls: &[ToolCall], registry: &ToolRegistry) -> Vec<ToolBatch> {
603    let mut batches: Vec<ToolBatch> = Vec::new();
604    let mut concurrent_batch: Vec<ToolCall> = Vec::new();
605
606    for call in calls {
607        let is_read_only = registry.get(&call.name).is_some_and(|t| t.is_read_only());
608
609        if is_read_only {
610            concurrent_batch.push(call.clone());
611        } else {
612            if !concurrent_batch.is_empty() {
613                batches.push(ToolBatch::Concurrent(std::mem::take(&mut concurrent_batch)));
614            }
615            batches.push(ToolBatch::Serial(call.clone()));
616        }
617    }
618
619    if !concurrent_batch.is_empty() {
620        batches.push(ToolBatch::Concurrent(concurrent_batch));
621    }
622
623    batches
624}
625
626/// Replace `Ok(s)` with a stub when `s.len()` exceeds `per_tool_cap`,
627/// persisting the original output under the ticket's outputs folder.
628/// Returns the outcome plus the persisted file path (when offload
629/// happened). `Err(_)` outcomes pass through: tool errors are short by
630/// construction and never need offloading. When persistence fails (no
631/// ticket bound, write error), the raw content passes through
632/// unstubbed.
633fn cap_oversized_result(
634    outcome: std::result::Result<String, ToolError>,
635    ctx: &ToolContext,
636    tool_use_id: &str,
637    per_tool_cap: usize,
638) -> (std::result::Result<String, ToolError>, Option<PathBuf>) {
639    match outcome {
640        Err(e) => (Err(e), None),
641        Ok(content) if content.len() <= per_tool_cap => (Ok(content), None),
642        Ok(content) => match persist_output(ctx, tool_use_id, &content) {
643            None => (Ok(content), None),
644            Some(p) => {
645                let preview = truncate_preview(&content);
646                let stub = format_oversized_tool_result(content.len(), &p.display, preview);
647                (Ok(stub), Some(p.rel))
648            }
649        },
650    }
651}
652
653/// Aggregate-cap pass: while the total `ContentBlock::ToolResult` bytes
654/// in `results` exceed `per_turn_cap`, replace the largest non-stub
655/// result with a stub. Stops when no further offload would help (either
656/// the cap is met or persistence has failed for every remaining
657/// candidate).
658fn cap_aggregate_outputs(
659    results: &mut [(
660        ContentBlock,
661        std::result::Result<String, ToolError>,
662        Option<PathBuf>,
663    )],
664    ctx: &ToolContext,
665    per_turn_cap: usize,
666) {
667    loop {
668        let total: usize = results
669            .iter()
670            .map(|(b, _, _)| match b {
671                ContentBlock::ToolResult { content, .. } => content.len(),
672                _ => 0,
673            })
674            .sum();
675        if total <= per_turn_cap {
676            return;
677        }
678        let mut largest: Option<(usize, usize)> = None;
679        for (i, (b, _, _)) in results.iter().enumerate() {
680            if let ContentBlock::ToolResult { content, .. } = b {
681                if content.starts_with(OVERSIZED_STUB_TAG_OPEN) {
682                    continue;
683                }
684                let len = content.len();
685                if largest.is_none_or(|(_, max_len)| len > max_len) {
686                    largest = Some((i, len));
687                }
688            }
689        }
690        let Some((i, _)) = largest else {
691            return;
692        };
693        let ContentBlock::ToolResult {
694            tool_use_id,
695            content,
696            succeeded,
697        } = &results[i].0
698        else {
699            return;
700        };
701        let tool_use_id = tool_use_id.clone();
702        let original = content.clone();
703        let succeeded = *succeeded;
704        let Some(p) = persist_output(ctx, &tool_use_id, &original) else {
705            // Persistence failed; nothing further this pass can do.
706            return;
707        };
708        let preview = truncate_preview(&original);
709        let stub = format_oversized_tool_result(original.len(), &p.display, preview);
710        results[i].0 = ContentBlock::ToolResult {
711            tool_use_id,
712            content: stub.clone(),
713            succeeded,
714        };
715        if results[i].1.is_ok() {
716            results[i].1 = Ok(stub);
717        }
718        results[i].2 = Some(p.rel);
719    }
720}
721
722/// Offload `content` to the ticket's outputs folder via the bound
723/// `TicketSystem`. Returns the path relative to the tickets dir (for
724/// the comment transcript) and the on-disk path (for the model-facing
725/// stub); `None` when no ticket key is present on the context, no
726/// ticket system is bound, or the write fails. Best-effort, matching
727/// the surrounding observational-persistence contract.
728fn persist_output(ctx: &ToolContext, tool_use_id: &str, content: &str) -> Option<PersistedOutput> {
729    let system = ctx.ticket_system.as_ref()?;
730    let key = ctx.ticket_key.as_deref()?;
731    let rel = system.write_tool_output(key, tool_use_id, content)?;
732    let display = system.dir_value().join(&rel);
733    Some(PersistedOutput { rel, display })
734}
735
736struct PersistedOutput {
737    rel: PathBuf,
738    display: PathBuf,
739}
740
741const OVERSIZED_STUB_TAG_OPEN: &str = "<persisted-output>";
742const OVERSIZED_STUB_TAG_CLOSE: &str = "</persisted-output>";
743
744/// Render the stub the model sees in place of an oversized tool result.
745/// Wraps the size summary, the offload path, and a leading preview in
746/// `<persisted-output>...</persisted-output>` tags.
747fn format_oversized_tool_result(original_len: usize, path: &Path, preview: &str) -> String {
748    let size = format_bytes(original_len);
749    let preview_size = format_bytes(preview.len());
750    format!(
751        "{OVERSIZED_STUB_TAG_OPEN}Output too large ({size}). Full output saved to: {path}\n\
752         Preview (first {preview_size}):\n\
753         {preview}\n\
754         {OVERSIZED_STUB_TAG_CLOSE}",
755        path = path.display(),
756    )
757}
758
759/// Return a leading slice of `content` up to `PREVIEW_CHARS` bytes,
760/// snapped to the last newline within that window. Falls back to a
761/// UTF-8 boundary floor when no newline is present.
762fn truncate_preview(content: &str) -> &str {
763    let window = PREVIEW_CHARS.min(content.len());
764    let cut = content[..window]
765        .rfind('\n')
766        .map(|i| i + 1)
767        .unwrap_or_else(|| utf8_boundary_floor(content, window));
768    &content[..cut]
769}
770
771fn format_bytes(n: usize) -> String {
772    const KB: f64 = 1024.0;
773    const MB: f64 = 1024.0 * 1024.0;
774    if n < 1024 {
775        format!("{n} B")
776    } else if (n as f64) < MB {
777        format!("{:.1} KB", n as f64 / KB)
778    } else {
779        format!("{:.1} MB", n as f64 / MB)
780    }
781}
782
783/// Floor an index to the largest UTF-8 char boundary `<= i`. Cheap when
784/// `i` already lands on a boundary; otherwise scans back at most three
785/// bytes.
786fn utf8_boundary_floor(s: &str, mut i: usize) -> usize {
787    while i > 0 && !s.is_char_boundary(i) {
788        i -= 1;
789    }
790    i
791}
792
793#[cfg(test)]
794mod tests {
795    use super::*;
796
797    /// Tiny mock used across registry tests.
798    struct MockTool {
799        name: String,
800        read_only: bool,
801        result: String,
802    }
803
804    impl MockTool {
805        fn new(name: &str, read_only: bool, result: &str) -> Self {
806            Self {
807                name: name.into(),
808                read_only,
809                result: result.into(),
810            }
811        }
812    }
813
814    impl ToolLike for MockTool {
815        fn name(&self) -> &str {
816            &self.name
817        }
818        fn description(&self) -> &str {
819            "mock"
820        }
821        fn input_schema(&self) -> Value {
822            serde_json::json!({"type": "object"})
823        }
824        fn is_read_only(&self) -> bool {
825            self.read_only
826        }
827        fn call<'a>(
828            &'a self,
829            _input: Value,
830            _ctx: &'a ToolContext,
831        ) -> Pin<Box<dyn Future<Output = ProviderResult<ToolResult>> + Send + 'a>> {
832            let result = self.result.clone();
833            Box::pin(async move { Ok(ToolResult::success(result)) })
834        }
835    }
836
837    struct DeferredMockTool {
838        name: String,
839    }
840
841    impl DeferredMockTool {
842        fn new(name: &str) -> Self {
843            Self { name: name.into() }
844        }
845    }
846
847    impl ToolLike for DeferredMockTool {
848        fn name(&self) -> &str {
849            &self.name
850        }
851        fn description(&self) -> &str {
852            "deferred mock"
853        }
854        fn input_schema(&self) -> Value {
855            serde_json::json!({"type": "object"})
856        }
857        fn should_defer(&self) -> bool {
858            true
859        }
860        fn call<'a>(
861            &'a self,
862            _input: Value,
863            _ctx: &'a ToolContext,
864        ) -> Pin<Box<dyn Future<Output = ProviderResult<ToolResult>> + Send + 'a>> {
865            Box::pin(async { Ok(ToolResult::success("ok")) })
866        }
867    }
868
869    fn test_ctx() -> ToolContext {
870        ToolContext::new(std::env::current_dir().unwrap())
871    }
872
873    #[test]
874    fn registry_register_and_get() {
875        let mut registry = ToolRegistry::default();
876        registry.register(MockTool::new("read_file", true, "file contents"));
877        assert!(registry.get("read_file").is_some());
878        assert!(registry.get("nonexistent").is_none());
879    }
880
881    #[test]
882    fn from_tool_file_populates_name_description_schema_read_only() {
883        let json = r#"{
884            "name": "demo_tool",
885            "summary": ["Do the demo thing."],
886            "constraints": ["Returns nothing useful."],
887            "read_only": true,
888            "input_schema": {
889                "type": "object",
890                "properties": {"x": {"type": "string"}},
891                "required": ["x"]
892            }
893        }"#;
894        let tool = Tool::from_tool_file(json);
895        assert_eq!(tool.name(), "demo_tool");
896        assert!(tool.description().contains("Do the demo thing."));
897        assert!(tool.description().contains("- Returns nothing useful."));
898        assert!(tool.is_read_only());
899        let schema = tool.input_schema();
900        assert_eq!(schema["properties"]["x"]["type"], "string");
901        assert_eq!(schema["required"][0], "x");
902    }
903
904    #[test]
905    fn registry_definitions() {
906        let mut registry = ToolRegistry::default();
907        registry.register(MockTool::new("read", true, "ok"));
908        registry.register(MockTool::new("write", false, "ok"));
909
910        let defs = registry.definitions();
911        assert_eq!(defs.len(), 2);
912        assert_eq!(defs[0].name, "read");
913        assert_eq!(defs[1].name, "write");
914    }
915
916    #[test]
917    fn registry_definitions_deferred() {
918        let mut registry = ToolRegistry::default();
919        registry.register(MockTool::new("always_visible", true, "ok"));
920        registry.register(DeferredMockTool::new("deferred_tool"));
921
922        let defs = registry.definitions();
923        assert_eq!(defs.len(), 2);
924        let deferred = defs.iter().find(|d| d.name == "deferred_tool").unwrap();
925        assert!(deferred.description.is_empty());
926        assert_eq!(deferred.input_schema, serde_json::json!({}));
927
928        registry.mark_discovered("deferred_tool");
929        let defs = registry.definitions();
930        let deferred = defs.iter().find(|d| d.name == "deferred_tool").unwrap();
931        assert!(!deferred.description.is_empty());
932    }
933
934    #[test]
935    fn registry_search_by_name() {
936        let mut registry = ToolRegistry::default();
937        registry.register(MockTool::new("read_file", true, "ok"));
938        registry.register(MockTool::new("write_file", false, "ok"));
939
940        let results = registry.search("read");
941        assert_eq!(results.len(), 1);
942        assert_eq!(results[0].name, "read_file");
943    }
944
945    #[test]
946    fn registry_clone() {
947        let mut registry = ToolRegistry::default();
948        registry.register(MockTool::new("t", true, "ok"));
949        let cloned = registry.clone();
950        assert_eq!(cloned.definitions().len(), 1);
951    }
952
953    #[tokio::test]
954    async fn execute_unknown_tool_returns_error() {
955        let registry = ToolRegistry::default();
956        let ctx = test_ctx();
957        let calls = vec![ToolCall {
958            id: "c1".into(),
959            name: "nonexistent".into(),
960            input: serde_json::json!({}),
961        }];
962
963        let results = registry.execute(&calls, &ctx).await;
964        assert_eq!(results.len(), 1);
965        match &results[0].0 {
966            ContentBlock::ToolResult {
967                succeeded, content, ..
968            } => {
969                assert!(!succeeded);
970                assert!(content.contains("Unknown tool"));
971            }
972            other => panic!("Expected ToolResult, got {other:?}"),
973        }
974    }
975
976    #[tokio::test]
977    async fn execute_read_only_tools_concurrently() {
978        let mut registry = ToolRegistry::default();
979        registry.register(MockTool::new("read1", true, "result1"));
980        registry.register(MockTool::new("read2", true, "result2"));
981        let ctx = test_ctx();
982
983        let calls = vec![
984            ToolCall {
985                id: "c1".into(),
986                name: "read1".into(),
987                input: serde_json::json!({}),
988            },
989            ToolCall {
990                id: "c2".into(),
991                name: "read2".into(),
992                input: serde_json::json!({}),
993            },
994        ];
995
996        let results = registry.execute(&calls, &ctx).await;
997        assert_eq!(results.len(), 2);
998    }
999
1000    #[tokio::test]
1001    async fn execute_serial_tool() {
1002        let mut registry = ToolRegistry::default();
1003        registry.register(MockTool::new("write_file", false, "written"));
1004        let ctx = test_ctx();
1005
1006        let calls = vec![ToolCall {
1007            id: "c1".into(),
1008            name: "write_file".into(),
1009            input: serde_json::json!({"path": "/tmp/test"}),
1010        }];
1011
1012        let results = registry.execute(&calls, &ctx).await;
1013        assert_eq!(results.len(), 1);
1014        match &results[0].0 {
1015            ContentBlock::ToolResult {
1016                content, succeeded, ..
1017            } => {
1018                assert!(succeeded);
1019                assert_eq!(content, "written");
1020            }
1021            other => panic!("Expected ToolResult, got {other:?}"),
1022        }
1023    }
1024
1025    #[test]
1026    fn tool_basic() {
1027        let tool = Tool::new("echo", "Echoes input")
1028            .schema(
1029                serde_json::json!({"type": "object", "properties": {"text": {"type": "string"}}}),
1030            )
1031            .read_only(true)
1032            .handler(|input, _ctx| async move {
1033                let text = input["text"].as_str().unwrap_or("").to_string();
1034                Ok(ToolResult::success(text))
1035            });
1036
1037        assert_eq!(tool.name(), "echo");
1038        assert!(tool.is_read_only());
1039    }
1040
1041    #[test]
1042    fn tool_defer_builder() {
1043        let tool = Tool::new("advanced", "Advanced tool")
1044            .defer(true)
1045            .handler(|_input, _ctx| async { Ok(ToolResult::success("ok")) });
1046
1047        assert!(tool.should_defer());
1048    }
1049
1050    #[tokio::test]
1051    #[should_panic(expected = "requires a handler")]
1052    async fn tool_panics_without_handler() {
1053        let tool = Tool::new("no_handler", "missing");
1054        let ctx = test_ctx();
1055        let _ = tool.call(serde_json::json!({}), &ctx).await;
1056    }
1057
1058    // ---- Layer 1: result-cap helpers ----
1059
1060    fn ticket_ctx() -> (
1061        ToolContext,
1062        Arc<TicketSystem>,
1063        String,
1064        crate::test_util::TempDir,
1065    ) {
1066        let dir = crate::test_util::TempDir::new().unwrap();
1067        let system = TicketSystem::new();
1068        system.dir(dir.path().to_path_buf());
1069        system.task("seed");
1070        let key = "TICKET-1".to_string();
1071        let ctx = test_ctx()
1072            .ticket_system(Arc::clone(&system))
1073            .ticket_key(key.clone());
1074        (ctx, system, key, dir)
1075    }
1076
1077    fn tool_result_block(id: &str, content: &str) -> ContentBlock {
1078        ContentBlock::ToolResult {
1079            tool_use_id: id.into(),
1080            content: content.into(),
1081            succeeded: true,
1082        }
1083    }
1084
1085    fn relative_outputs_path(key: &str, tool_use_id: &str) -> PathBuf {
1086        PathBuf::from("tickets")
1087            .join(key)
1088            .join("outputs")
1089            .join(format!("{tool_use_id}.txt"))
1090    }
1091
1092    fn absolute_outputs_path(dir: &std::path::Path, key: &str, tool_use_id: &str) -> PathBuf {
1093        dir.join(relative_outputs_path(key, tool_use_id))
1094    }
1095
1096    #[test]
1097    fn write_tool_output_stores_relative_path_in_comment() {
1098        let (ctx, _system, key, _dir) = ticket_ctx();
1099        let (_outcome, path) = cap_oversized_result(Ok("z".repeat(500)), &ctx, "call-rel", 100);
1100        let stored = path.expect("offload happened");
1101        assert_eq!(stored, relative_outputs_path(&key, "call-rel"));
1102        assert!(
1103            stored.is_relative(),
1104            "comment path must stay portable: {}",
1105            stored.display()
1106        );
1107    }
1108
1109    #[test]
1110    fn persisted_output_renders_absolute_path_for_model() {
1111        let (ctx, _system, key, dir) = ticket_ctx();
1112        let (outcome, _path) = cap_oversized_result(Ok("y".repeat(500)), &ctx, "call-abs", 100);
1113        let stub = outcome.unwrap();
1114        let absolute = absolute_outputs_path(dir.path(), &key, "call-abs");
1115        assert!(
1116            stub.contains(&absolute.display().to_string()),
1117            "stub must give the model the joinable on-disk path: {stub}"
1118        );
1119    }
1120
1121    #[test]
1122    fn cap_oversized_result_passes_through_under_cap() {
1123        let ctx = test_ctx();
1124        let (outcome, path) = cap_oversized_result(Ok("hello".into()), &ctx, "call-1", 100);
1125        assert_eq!(outcome.unwrap(), "hello");
1126        assert!(path.is_none());
1127    }
1128
1129    #[test]
1130    fn cap_oversized_result_replaces_oversized_ok_with_stub() {
1131        let (ctx, _system, key, dir) = ticket_ctx();
1132        let (outcome, path) = cap_oversized_result(Ok("a".repeat(500)), &ctx, "call-xyz", 100);
1133        let stub = outcome.unwrap();
1134        assert!(stub.starts_with("<persisted-output>"));
1135        assert!(stub.contains("Output too large"));
1136        assert!(stub.contains("Full output saved to:"));
1137        let absolute = absolute_outputs_path(dir.path(), &key, "call-xyz");
1138        assert!(
1139            stub.contains(&absolute.display().to_string()),
1140            "stub must name the absolute path so the model can read the file: {stub}"
1141        );
1142        assert!(stub.contains("Preview (first"));
1143        assert!(stub.ends_with("</persisted-output>"));
1144        let path = path.expect("offload path");
1145        assert_eq!(path, relative_outputs_path(&key, "call-xyz"));
1146        let body = std::fs::read_to_string(&absolute).unwrap();
1147        assert_eq!(body, "a".repeat(500));
1148    }
1149
1150    #[test]
1151    fn cap_oversized_result_passes_through_errs() {
1152        let ctx = test_ctx();
1153        let (outcome, path) = cap_oversized_result(
1154            Err(ToolError::ExecutionFailed {
1155                tool_name: "tool".into(),
1156                message: "boom".into(),
1157            }),
1158            &ctx,
1159            "call-1",
1160            10,
1161        );
1162        assert!(matches!(outcome, Err(ToolError::ExecutionFailed { .. })));
1163        assert!(path.is_none());
1164    }
1165
1166    #[test]
1167    fn cap_oversized_result_returns_raw_when_no_ticket_key() {
1168        let ctx = test_ctx();
1169        let payload = "x".repeat(500);
1170        let (outcome, path) = cap_oversized_result(Ok(payload.clone()), &ctx, "call-1", 100);
1171        assert_eq!(outcome.unwrap(), payload);
1172        assert!(path.is_none(), "no ticket key means no offload");
1173    }
1174
1175    #[test]
1176    fn cap_aggregate_offloads_largest_first() {
1177        let (ctx, _system, key, dir) = ticket_ctx();
1178        // Sizes chosen so the stub's own bytes (~200) don't dominate.
1179        let small = "a".repeat(40_000);
1180        let big = "b".repeat(80_000);
1181        let tiny = "c".repeat(30_000);
1182        let mut results = vec![
1183            (tool_result_block("c1", &small), Ok(small.clone()), None),
1184            (tool_result_block("c2", &big), Ok(big.clone()), None),
1185            (tool_result_block("c3", &tiny), Ok(tiny.clone()), None),
1186        ];
1187        cap_aggregate_outputs(&mut results, &ctx, 100_000);
1188        // c2 (the largest) was offloaded; the other two stayed inline.
1189        match &results[1].0 {
1190            ContentBlock::ToolResult { content, .. } => {
1191                assert!(content.starts_with("<persisted-output>"));
1192                assert!(content.contains("Full output saved to:"));
1193            }
1194            _ => panic!("expected ToolResult"),
1195        }
1196        let big_path = results[1].2.clone().expect("c2 path recorded");
1197        assert_eq!(big_path, relative_outputs_path(&key, "c2"));
1198        let body = std::fs::read_to_string(absolute_outputs_path(dir.path(), &key, "c2")).unwrap();
1199        assert_eq!(body, big);
1200
1201        assert!(matches!(
1202            &results[0].0,
1203            ContentBlock::ToolResult { content, .. } if content.len() == 40_000
1204        ));
1205        assert!(matches!(
1206            &results[2].0,
1207            ContentBlock::ToolResult { content, .. } if content.len() == 30_000
1208        ));
1209        assert!(results[0].2.is_none());
1210        assert!(results[2].2.is_none());
1211    }
1212
1213    #[test]
1214    fn cap_aggregate_stops_when_only_small_results_remain() {
1215        let (ctx, _system, _key, _dir) = ticket_ctx();
1216        // Many small results whose total far exceeds the cap, but
1217        // each is already a stub-marked block. Aggregate should bail
1218        // rather than spin: stubs are skipped, so no candidates.
1219        let mut results: Vec<(
1220            ContentBlock,
1221            std::result::Result<String, ToolError>,
1222            Option<PathBuf>,
1223        )> = (0..5)
1224            .map(|i| {
1225                let id = format!("c{i}");
1226                let stub = format!("<persisted-output>already stubbed {i}</persisted-output>");
1227                (tool_result_block(&id, &stub), Ok(stub), None)
1228            })
1229            .collect();
1230        let before: Vec<String> = results
1231            .iter()
1232            .map(|(b, _, _)| match b {
1233                ContentBlock::ToolResult { content, .. } => content.clone(),
1234                _ => String::new(),
1235            })
1236            .collect();
1237        cap_aggregate_outputs(&mut results, &ctx, 10);
1238        let after: Vec<String> = results
1239            .iter()
1240            .map(|(b, _, _)| match b {
1241                ContentBlock::ToolResult { content, .. } => content.clone(),
1242                _ => String::new(),
1243            })
1244            .collect();
1245        assert_eq!(
1246            before, after,
1247            "aggregate must be a no-op when only stubs remain"
1248        );
1249    }
1250
1251    #[test]
1252    fn format_oversized_tool_result_renders_template() {
1253        let path = PathBuf::from("/tmp/agentwerk/tickets/TICKET-1/outputs/call-1.txt");
1254        let stub = format_oversized_tool_result(1_048_576, &path, "preview-body");
1255        assert!(stub.starts_with("<persisted-output>"));
1256        assert!(stub.contains("Output too large (1.0 MB)."));
1257        assert!(stub
1258            .contains("Full output saved to: /tmp/agentwerk/tickets/TICKET-1/outputs/call-1.txt"));
1259        assert!(stub.contains("Preview (first 12 B):"));
1260        assert!(stub.contains("preview-body"));
1261        assert!(stub.ends_with("</persisted-output>"));
1262    }
1263
1264    #[test]
1265    fn truncate_preview_snaps_at_last_newline_in_window() {
1266        let mut content = String::new();
1267        // Build a payload where the last newline within PREVIEW_CHARS is
1268        // at byte 1_900.
1269        content.push_str(&"a".repeat(1_900));
1270        content.push('\n');
1271        content.push_str(&"b".repeat(500));
1272        let preview = truncate_preview(&content);
1273        assert_eq!(preview.len(), 1_901);
1274        assert!(preview.ends_with('\n'));
1275    }
1276
1277    #[test]
1278    fn truncate_preview_falls_back_to_utf8_boundary_when_no_newline() {
1279        let content = "x".repeat(3_000);
1280        let preview = truncate_preview(&content);
1281        assert_eq!(preview.len(), PREVIEW_CHARS);
1282        assert!(content.is_char_boundary(preview.len()));
1283    }
1284
1285    #[test]
1286    fn replace_empty_output_substitutes_placeholder() {
1287        let outcome: std::result::Result<String, ToolError> = Ok(String::new());
1288        let outcome = replace_empty_output(outcome, "bash");
1289        assert_eq!(outcome.unwrap(), "(bash completed with no output)");
1290    }
1291
1292    #[test]
1293    fn replace_empty_output_passes_non_empty_through() {
1294        let outcome: std::result::Result<String, ToolError> = Ok("hello".into());
1295        let outcome = replace_empty_output(outcome, "bash");
1296        assert_eq!(outcome.unwrap(), "hello");
1297    }
1298
1299    #[test]
1300    fn replace_empty_output_passes_errors_through() {
1301        let outcome: std::result::Result<String, ToolError> = Err(ToolError::ExecutionFailed {
1302            tool_name: "bash".into(),
1303            message: "boom".into(),
1304        });
1305        let outcome = replace_empty_output(outcome, "bash");
1306        assert!(matches!(outcome, Err(ToolError::ExecutionFailed { .. })));
1307    }
1308}
agentwerk/tools/tool.rs

agentwerk/tools/
tool.rs