1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
//! `ToolExecutor` implementation that dispatches secretary tools.
//!
//! Sprint 8 wired a shared [`ToolRegistry`] through this executor so the
//! `enable_tools` meta-tool can mutate the registry in place. Every other
//! tool still routes through the stateless [`crate::tools::dispatch_tool`].
use std::sync::{Arc, Mutex};
use crate::{ToolError, ToolExecutor};
use serde_json::{json, Value};
use crate::tool_groups::{ToolGroup, ToolRegistry};
use crate::tools::dispatch_tool;
/// Executor for the main Claudette runtime. Holds an `Arc<Mutex<ToolRegistry>>`
/// shared with the `OllamaApiClient` so that `enable_tools` calls mutate the
/// same registry the client reads from on the next request.
///
/// `Default::default()` is intentionally **not** implemented — the registry
/// must be built once per runtime and shared, so callers always go through
/// [`Self::with_registry`] (or [`Self::stateless`] for tests and agents).
pub struct SecretaryToolExecutor {
/// Shared registry. `None` = agents / tests that don't want the
/// `enable_tools` feature; in that mode `enable_tools` calls return an
/// error so the model can adapt.
registry: Option<Arc<Mutex<ToolRegistry>>>,
}
impl SecretaryToolExecutor {
/// Build an executor wired to a shared tool registry. `enable_tools`
/// calls will mutate `registry` and be visible to subsequent
/// `OllamaApiClient::build_chat_body` calls that read from the same
/// `Arc<Mutex<_>>`.
#[must_use]
pub fn with_registry(registry: Arc<Mutex<ToolRegistry>>) -> Self {
Self {
registry: Some(registry),
}
}
/// Build a stateless executor. `enable_tools` is not wired up — useful
/// for tests, for agents (who have a fixed tool allowlist), and for the
/// old single-shot path that pre-dates Sprint 8.
#[must_use]
pub fn stateless() -> Self {
Self { registry: None }
}
/// Back-compat shim for the old API. Now returns the stateless variant;
/// the main runtime always goes through [`Self::with_registry`].
#[must_use]
pub fn new() -> Self {
Self::stateless()
}
}
impl Default for SecretaryToolExecutor {
fn default() -> Self {
Self::stateless()
}
}
impl ToolExecutor for SecretaryToolExecutor {
fn execute(&mut self, tool_name: &str, input: &str) -> Result<String, ToolError> {
if tool_name == "enable_tools" {
// ReadOnly meta-tool — never recorded in the action transcript.
return run_enable_tools(self.registry.as_ref(), input).map_err(ToolError::new);
}
let result = dispatch_tool(tool_name, input)
.map(|result| format!("[tool:{tool_name}] {result}"))
.map_err(ToolError::new);
// Action transcript — mutating tools only (ReadOnly reads would be
// noise and a privacy footgun), best-effort, AFTER dispatch so the
// log reflects what actually happened. `take_pending_undo` collects
// the trash/pre-image ref a tool left on this thread (same
// thread-local pattern as `set_current_turn_paths`) — and is taken
// unconditionally so a failed call can never leak its ref onto the
// next call's line. A failed call IS still recorded when it left an
// undo ref (e.g. snapshot succeeded, write failed — the pre-image
// is exactly what the user needs to know about then).
let pending_undo = crate::transcript::take_pending_undo();
if should_record(tool_name) && (result.is_ok() || pending_undo.is_some()) {
crate::transcript::record(tool_name, input, pending_undo);
}
result
}
}
/// Whether a tool call belongs in `~/.claudette/transcript/actions.jsonl`:
/// anything the permission policy does NOT classify `ReadOnly`. Unknown
/// tools default to `DangerFullAccess` in `required_mode_for`, so they are
/// recorded — fail-safe for an audit log.
fn should_record(tool_name: &str) -> bool {
use std::sync::OnceLock;
static POLICY: OnceLock<crate::PermissionPolicy> = OnceLock::new();
POLICY
.get_or_init(crate::run::build_permission_policy)
.required_mode_for(tool_name)
!= crate::PermissionMode::ReadOnly
}
/// Handle a call to the synthetic `enable_tools` meta-tool. Parses the
/// `group` argument, flips the enabled bit on the shared registry, and
/// returns a JSON result listing the tools that are now available so the
/// model knows what to call on the next turn.
fn run_enable_tools(
registry: Option<&Arc<Mutex<ToolRegistry>>>,
input: &str,
) -> Result<String, String> {
let Some(registry) = registry else {
return Err(
"enable_tools is not available in this runtime (stateless executor)".to_string(),
);
};
let v: Value = serde_json::from_str(input)
.map_err(|e| format!("enable_tools: invalid JSON input ({e}): {input}"))?;
let group_name = v
.get("group")
.and_then(Value::as_str)
.map(str::trim)
.unwrap_or_default();
// Forgiving fallback. Small local brains routinely emit the call with the
// `group` arg dropped entirely (`<function=enable_tools></function>`), then
// spiral on a hard "missing 'group'" error until the turn times out (this
// was the single biggest failure source in the v0.8.0 daily-driver eval).
// Treat "I want tools but didn't name a group" as a request for the lean
// coding core — the most universally useful actuation set — so even a bare
// secretary session (which doesn't pre-enable it) makes progress instead of
// looping. Explicit, valid group names below are honored exactly as before.
if group_name.is_empty() {
let mut reg = lock_registry(registry);
let newly = reg.enable_coding_core();
let groups: Vec<&str> = ToolGroup::coding_core().iter().map(|g| g.name()).collect();
let tools: Vec<String> = ToolGroup::coding_core()
.into_iter()
.flat_map(|g| reg.group_tool_names(g))
.collect();
let current_count = reg.current_len();
return Ok(json!({
"ok": true,
"note": "No 'group' was provided, so I enabled the coding core (files, search, advanced, quality). Call these tools directly on your next turn. For a different group (e.g. git, github) call enable_tools with {\"group\":\"git\"}.",
"groups_enabled": groups,
"newly_enabled_groups": newly,
"tools_now_available": tools,
"total_advertised_tools": current_count,
})
.to_string());
}
let group = ToolGroup::parse(group_name).ok_or_else(|| {
// Dynamically enumerate all groups so adding a new one in
// `tool_groups.rs` doesn't silently leave this error message
// pointing at a stale list. A hard-coded subset was wrong
// for months; never again.
let available: Vec<&str> = ToolGroup::all().iter().map(|g| g.name()).collect();
format!(
"enable_tools: unknown group '{group_name}' — available: {}",
available.join(", ")
)
})?;
let mut reg = lock_registry(registry);
let newly_enabled = reg.enable(group);
let tool_names = reg.group_tool_names(group);
let current_count = reg.current_len();
Ok(json!({
"ok": true,
"group": group.name(),
"already_enabled": !newly_enabled,
"tools_now_available": tool_names,
"total_advertised_tools": current_count,
"note": "The new tools take effect on the next model call — call them directly on your next turn.",
})
.to_string())
}
/// Lock the shared registry, recovering from a poisoned mutex. A poisoned lock
/// means another thread panicked while holding it; the mutation model here is
/// single-threaded (the runtime drives calls serially) so in practice poisoning
/// only happens when a test panicked. Fall back to the inner payload so we stay
/// operational rather than propagating the poison.
fn lock_registry(registry: &Arc<Mutex<ToolRegistry>>) -> std::sync::MutexGuard<'_, ToolRegistry> {
match registry.lock() {
Ok(g) => g,
Err(poisoned) => poisoned.into_inner(),
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::ToolExecutor;
#[test]
fn transcript_records_mutating_calls_but_never_readonly() {
crate::with_temp_home(|home| {
let mut ex = SecretaryToolExecutor::stateless();
let tpath = home
.join(".claudette")
.join("transcript")
.join("actions.jsonl");
// ReadOnly tool → success, but NO transcript line (privacy:
// reads are never logged).
ex.execute("get_current_time", "{}").unwrap();
assert!(!tpath.exists(), "ReadOnly call must not be recorded");
// WorkspaceWrite tool → recorded.
ex.execute("todo_add", r#"{"text":"transcript test"}"#)
.unwrap();
let raw = std::fs::read_to_string(&tpath).expect("transcript must exist now");
assert!(raw.contains("todo_add"), "mutating call must be recorded");
assert!(
!raw.contains("get_current_time"),
"the earlier ReadOnly call must not appear"
);
});
}
#[test]
fn stateless_executor_rejects_enable_tools() {
let mut exec = SecretaryToolExecutor::stateless();
let result = exec.execute("enable_tools", r#"{"group":"git"}"#);
assert!(result.is_err());
let err = result.unwrap_err().to_string();
assert!(
err.contains("not available"),
"expected 'not available', got: {err}"
);
}
#[test]
fn stateless_executor_dispatches_core_tool() {
let mut exec = SecretaryToolExecutor::stateless();
let result = exec.execute("get_current_time", "{}");
assert!(result.is_ok(), "core tools should still work: {result:?}");
assert!(result.unwrap().contains("iso8601"));
}
#[test]
fn wired_executor_enables_git_group() {
let registry = Arc::new(Mutex::new(ToolRegistry::new()));
let mut exec = SecretaryToolExecutor::with_registry(registry.clone());
assert!(!registry.lock().unwrap().is_enabled(ToolGroup::Git));
let result = exec.execute("enable_tools", r#"{"group":"git"}"#).unwrap();
assert!(result.contains("\"ok\":true"));
assert!(result.contains("git_status"));
assert!(registry.lock().unwrap().is_enabled(ToolGroup::Git));
}
#[test]
fn wired_executor_reports_already_enabled_on_second_call() {
let registry = Arc::new(Mutex::new(ToolRegistry::new()));
let mut exec = SecretaryToolExecutor::with_registry(registry);
let first = exec.execute("enable_tools", r#"{"group":"ide"}"#).unwrap();
assert!(first.contains("\"already_enabled\":false"));
let second = exec.execute("enable_tools", r#"{"group":"ide"}"#).unwrap();
assert!(second.contains("\"already_enabled\":true"));
}
#[test]
fn wired_executor_unknown_group_errors_clearly() {
let registry = Arc::new(Mutex::new(ToolRegistry::new()));
let mut exec = SecretaryToolExecutor::with_registry(registry);
// Use a name that is not a valid group or alias.
let err = exec
.execute("enable_tools", r#"{"group":"does-not-exist-xyz"}"#)
.unwrap_err()
.to_string();
assert!(err.contains("unknown group"), "got: {err}");
// Every registered group must appear in the "available" list — guards
// against the hardcoded-subset regression the dynamic formatter was
// added to fix.
for group in ToolGroup::all() {
assert!(
err.contains(group.name()),
"error should list group '{}': {err}",
group.name()
);
}
}
#[test]
fn wired_executor_missing_group_enables_coding_core() {
// Forgiving fallback: a no-group call (the malformed shape small local
// models emit) must NOT error — it enables the lean coding core so the
// brain can keep working instead of spiraling.
for input in ["{}", r#"{"group":""}"#, r#"{"group":" "}"#] {
let registry = Arc::new(Mutex::new(ToolRegistry::new()));
let mut exec = SecretaryToolExecutor::with_registry(Arc::clone(®istry));
let out = exec
.execute("enable_tools", input)
.unwrap_or_else(|e| panic!("input {input:?} should not error: {e}"));
assert!(out.contains("\"ok\":true"), "input {input:?}: {out}");
// The actuation tools the brain needs must now be advertised.
for tool in [
"read_file",
"write_file",
"edit_file",
"grep_search",
"run_tests",
] {
assert!(out.contains(tool), "input {input:?} missing {tool}: {out}");
}
// And the registry actually reflects the coding-core groups.
let reg = registry.lock().unwrap();
for g in ToolGroup::coding_core() {
assert!(
reg.is_enabled(g),
"group {g:?} not enabled for input {input:?}"
);
}
}
}
#[test]
fn wired_executor_bad_json_errors() {
let registry = Arc::new(Mutex::new(ToolRegistry::new()));
let mut exec = SecretaryToolExecutor::with_registry(registry);
let err = exec
.execute("enable_tools", "not json at all")
.unwrap_err()
.to_string();
assert!(err.contains("invalid JSON"), "got: {err}");
}
#[test]
fn wired_executor_still_dispatches_non_meta_tools() {
let registry = Arc::new(Mutex::new(ToolRegistry::new()));
let mut exec = SecretaryToolExecutor::with_registry(registry);
let result = exec.execute("get_current_time", "{}").unwrap();
assert!(result.contains("iso8601"));
}
}