Skip to main content

devboy_executor/
enricher.rs

1//! Built-in enrichers for the executor.
2//!
3//! The `ToolEnricher` trait and `ToolSchema` live in `devboy-core`
4//! so that provider crates can implement enrichers without depending
5//! on the executor. This module provides built-in enrichers.
6
7use devboy_core::{PropertySchema, ToolCategory, ToolEnricher, ToolSchema};
8use serde_json::Value;
9
10// Re-export core enricher types for convenience
11pub use devboy_core::{ToolSchema as Schema, sanitize_field_name};
12
13/// Tools that return lists and go through the format pipeline.
14const LIST_TOOLS: &[&str] = &[
15    "get_issues",
16    "get_issue",
17    "get_issue_comments",
18    "get_merge_requests",
19    "get_merge_request",
20    "get_merge_request_discussions",
21    "get_merge_request_diffs",
22    "list_knowledge_base_pages",
23    "search_knowledge_base",
24    "get_knowledge_base_page",
25];
26
27// =============================================================================
28// Safe parameter insertion
29// =============================================================================
30
31/// Find a free parameter name: try `preferred`, then `_preferred`, `__preferred`, etc.
32///
33/// If a tool already has a parameter with the same name (e.g., the tool defines
34/// its own `chunk`), the enricher uses a prefixed variant to avoid collisions.
35fn safe_param_name(schema: &ToolSchema, preferred: &str) -> String {
36    if !schema.properties.contains_key(preferred) {
37        return preferred.to_string();
38    }
39    // Prefix with underscores until we find a free name
40    let mut name = format!("_{preferred}");
41    while schema.properties.contains_key(&name) {
42        name = format!("_{name}");
43    }
44    name
45}
46
47/// Insert a property only if the preferred name (or a safe variant) is available.
48/// Returns the actual name used.
49fn safe_insert(schema: &mut ToolSchema, preferred: &str, prop: PropertySchema) -> String {
50    let name = safe_param_name(schema, preferred);
51    schema.add_property(&name, prop);
52    name
53}
54
55// =============================================================================
56// FormatPipelineEnricher
57// =============================================================================
58
59/// Format pipeline enricher — adds pipeline-level parameters to list tools.
60///
61/// Adds these parameters (using safe naming to avoid collisions):
62/// - `format` — output format (toon/json)
63/// - `budget` — token budget for response size control
64/// - `chunk`  — chunk number for navigating large results
65///
66/// API-level `limit`/`offset` are defined by individual tools where
67/// the provider API supports pagination. The enricher does NOT add them.
68pub struct FormatPipelineEnricher;
69
70impl ToolEnricher for FormatPipelineEnricher {
71    fn supported_categories(&self) -> &[ToolCategory] {
72        &[
73            ToolCategory::IssueTracker,
74            ToolCategory::GitRepository,
75            ToolCategory::KnowledgeBase,
76        ]
77    }
78
79    fn enrich_schema(&self, tool_name: &str, schema: &mut ToolSchema) {
80        if !LIST_TOOLS.contains(&tool_name) {
81            return;
82        }
83
84        // Output format
85        safe_insert(
86            schema,
87            "format",
88            PropertySchema::string_enum(
89                &["toon", "json", "mckp"],
90                "Output format. \
91                 `toon` (default) is the legacy token-optimised custom format; \
92                 `json` is the pretty-printed baseline; \
93                 `mckp` is the format-adaptive encoder from Paper 2 — best for \
94                 array/object payloads on `o200k_base` tokenizers, key-lossless.",
95            ),
96        );
97
98        // Token budget — LLM controls response size
99        safe_insert(
100            schema,
101            "budget",
102            PropertySchema::integer(
103                "Token budget for this response. Lower = less data + chunk index for navigation. \
104                 Higher = more data per call. Default: from server config.",
105                Some(100.0),
106                Some(100000.0),
107            ),
108        );
109
110        // Chunk navigation — for fetching specific chunks from large results
111        safe_insert(
112            schema,
113            "chunk",
114            PropertySchema::integer(
115                "Chunk number to fetch (from chunk index). \
116                 When a response exceeds budget, it returns chunk 1 + an index of all chunks. \
117                 Use this parameter to fetch a specific chunk by number.",
118                Some(1.0),
119                None,
120            ),
121        );
122    }
123
124    fn transform_args(&self, _tool_name: &str, args: &mut Value) {
125        // Convert `chunk` to `offset`/`limit` for the pipeline.
126        // The chunk index includes offset/limit per chunk, but the LLM
127        // just sends a chunk number. We resolve it at runtime in the handler.
128        // For now, pass through — the handler reads `chunk` from args directly.
129        let _ = args;
130    }
131}
132
133/// Backward-compatible alias.
134pub type PipelineFormatEnricher = FormatPipelineEnricher;
135
136#[cfg(test)]
137mod tests {
138    use super::*;
139
140    #[test]
141    fn test_safe_param_name_no_conflict() {
142        let schema = ToolSchema::new();
143        assert_eq!(safe_param_name(&schema, "budget"), "budget");
144        assert_eq!(safe_param_name(&schema, "chunk"), "chunk");
145    }
146
147    #[test]
148    fn test_safe_param_name_with_conflict() {
149        let mut schema = ToolSchema::new();
150        schema.add_property("chunk", PropertySchema::string("existing"));
151        assert_eq!(safe_param_name(&schema, "chunk"), "_chunk");
152
153        schema.add_property("_chunk", PropertySchema::string("also taken"));
154        assert_eq!(safe_param_name(&schema, "chunk"), "__chunk");
155    }
156
157    #[test]
158    fn test_format_pipeline_enricher_adds_params() {
159        let enricher = FormatPipelineEnricher;
160        let mut schema = ToolSchema::new();
161        enricher.enrich_schema("get_issues", &mut schema);
162
163        // format — `mckp` was added in PR for issue #203 follow-up so the
164        // LLM can pick the Paper 2 encoder explicitly when its tokenizer
165        // family makes it preferable to TOON.
166        let format = schema.properties.get("format").unwrap();
167        assert_eq!(
168            format.enum_values,
169            Some(vec!["toon".into(), "json".into(), "mckp".into()])
170        );
171
172        // budget
173        let budget = schema.properties.get("budget").unwrap();
174        assert_eq!(budget.schema_type, "integer");
175        assert_eq!(budget.minimum, Some(100.0));
176
177        // chunk (new)
178        let chunk = schema.properties.get("chunk").unwrap();
179        assert_eq!(chunk.schema_type, "integer");
180        assert_eq!(chunk.minimum, Some(1.0));
181
182        // NO offset/limit — those are API-level, defined by tools themselves
183        assert!(!schema.properties.contains_key("offset"));
184        assert!(!schema.properties.contains_key("limit"));
185    }
186
187    #[test]
188    fn test_enricher_skips_non_list_tools() {
189        let enricher = FormatPipelineEnricher;
190        let mut schema = ToolSchema::new();
191        enricher.enrich_schema("create_issue", &mut schema);
192        assert!(schema.properties.is_empty());
193    }
194
195    #[test]
196    fn test_enricher_safe_naming_on_collision() {
197        let enricher = FormatPipelineEnricher;
198        let mut schema = ToolSchema::new();
199        // Tool already defines `chunk` for its own purpose
200        schema.add_property("chunk", PropertySchema::string("tool's own chunk param"));
201
202        enricher.enrich_schema("get_merge_request_diffs", &mut schema);
203
204        // Original `chunk` preserved
205        let original = schema.properties.get("chunk").unwrap();
206        assert_eq!(original.schema_type, "string");
207
208        // Enricher's chunk renamed to `_chunk`
209        let enriched = schema.properties.get("_chunk").unwrap();
210        assert_eq!(enriched.schema_type, "integer");
211
212        // format and budget added normally (no collision)
213        assert!(schema.properties.contains_key("format"));
214        assert!(schema.properties.contains_key("budget"));
215    }
216
217    #[test]
218    fn test_enricher_categories() {
219        let enricher = FormatPipelineEnricher;
220        let cats = enricher.supported_categories();
221        assert!(cats.contains(&ToolCategory::IssueTracker));
222        assert!(cats.contains(&ToolCategory::GitRepository));
223        assert!(cats.contains(&ToolCategory::KnowledgeBase));
224    }
225
226    #[test]
227    fn test_format_pipeline_enricher_covers_kb_tools() {
228        let enricher = FormatPipelineEnricher;
229        let mut schema = ToolSchema::new();
230
231        enricher.enrich_schema("search_knowledge_base", &mut schema);
232
233        assert!(schema.properties.contains_key("format"));
234        assert!(schema.properties.contains_key("budget"));
235        assert!(schema.properties.contains_key("chunk"));
236    }
237}