Skip to main content

devboy_core/
enricher.rs

1//! Tool enrichment traits and schema utilities.
2//!
3//! This module defines the `ToolEnricher` trait and `ToolSchema` struct
4//! that enable dynamic modification of MCP tool schemas. Provider crates
5//! implement `ToolEnricher` to adapt tool schemas to their capabilities.
6
7use serde::{Deserialize, Serialize};
8use serde_json::Value;
9use std::collections::HashMap;
10
11use crate::tool_category::ToolCategory;
12use crate::tool_value_model::ToolValueModel;
13
14/// Trait for plugins that dynamically modify tool schemas and transform arguments.
15///
16/// Enrichers are executed in registration order by the `Executor`.
17/// Each enricher declares which tool categories it supports — only tools
18/// from those categories will be enriched and shown in `list_tools()`.
19pub trait ToolEnricher: Send + Sync {
20    /// Which tool categories this provider/enricher supports.
21    /// Tools from other categories won't be shown when this enricher is active.
22    fn supported_categories(&self) -> &[ToolCategory];
23
24    /// Modify the tool schema during `tools/list`.
25    fn enrich_schema(&self, tool_name: &str, schema: &mut ToolSchema);
26
27    /// Transform arguments before tool execution.
28    fn transform_args(&self, tool_name: &str, args: &mut Value);
29
30    /// Optional: provider-shipped value model for `tool_name`. Returned
31    /// models are merged into `AdaptiveConfig.tools` at startup so the
32    /// Paper 3 enrichment planner can read them via
33    /// `effective_tool_value_model`.
34    ///
35    /// Default impl returns `None` — built-in enrichers that do not
36    /// participate in the planner can ignore the method entirely.
37    fn value_model(&self, _tool_name: &str) -> Option<ToolValueModel> {
38        None
39    }
40
41    /// Build the JSON arguments for a *speculatively pre-fetched*
42    /// follow-up call.
43    ///
44    /// Given the tool that just produced `prev_result` (`prev_tool`),
45    /// the follow-up tool's `FollowUpLink` (with `projection` /
46    /// `projection_arg` set), the host asks the enricher: "what `args`
47    /// should I pass to `<follow-up tool>`?"
48    ///
49    /// Returns:
50    ///
51    /// - `Some(json)` — emit one prefetch request per object in the
52    ///   returned array (planner caps at `max_parallel_prefetches`).
53    ///   Top-level shape is `[{ <args1> }, { <args2> }, …]`.
54    /// - `None` (default) — provider has no opinion; the host falls
55    ///   back to the generic projection in `link.projection_arg`.
56    ///
57    /// Built-in enrichers should override this for the high-volume
58    /// follow-up chains identified in `paper3_corpus_findings.md`
59    /// (Glob → Read, Grep → Read, WebSearch → WebFetch, …).
60    fn project_args(
61        &self,
62        _prev_tool: &str,
63        _prev_result: &Value,
64        _link: &crate::tool_value_model::FollowUpLink,
65    ) -> Option<Value> {
66        None
67    }
68
69    /// Optional dynamic rate-limit host for `tool_name`, derived from
70    /// runtime `args`. Provider returns the network host the call
71    /// will hit (e.g. `Some("api.github.com")`) so the speculative
72    /// dispatcher can cap concurrent in-flight prefetches per host.
73    ///
74    /// Default: `None` — host falls back to
75    /// `ToolValueModel::rate_limit_host` (the static configuration
76    /// value), and if that is also `None` the prefetch is uncapped.
77    ///
78    /// Override this for tools whose target host is per-call —
79    /// `WebFetch` (host from `url` arg), `WebSearch` against multiple
80    /// search engines, MCP wrappers around generic HTTP clients.
81    fn rate_limit_host(&self, _tool_name: &str, _args: &Value) -> Option<String> {
82        None
83    }
84}
85
86/// JSON Schema property definition for a tool parameter.
87#[derive(Debug, Clone, Serialize, Deserialize)]
88pub struct PropertySchema {
89    /// JSON Schema type: "string", "number", "integer", "boolean", "array", "object".
90    /// Empty when [`Self::any_of`] is set — JSON Schema treats `type`
91    /// and `anyOf` as alternatives, and the serializer skips empty
92    /// `type` on the wire so the rendered schema stays valid for
93    /// LLM tool-call validators.
94    #[serde(rename = "type", default, skip_serializing_if = "String::is_empty")]
95    pub schema_type: String,
96
97    /// Human-readable description of this parameter.
98    #[serde(skip_serializing_if = "Option::is_none")]
99    pub description: Option<String>,
100
101    /// Allowed values (enum constraint).
102    #[serde(rename = "enum", skip_serializing_if = "Option::is_none")]
103    pub enum_values: Option<Vec<String>>,
104
105    #[serde(skip_serializing_if = "Option::is_none")]
106    pub default: Option<Value>,
107
108    /// Minimum value (for number/integer).
109    #[serde(skip_serializing_if = "Option::is_none")]
110    pub minimum: Option<f64>,
111
112    /// Maximum value (for number/integer).
113    #[serde(skip_serializing_if = "Option::is_none")]
114    pub maximum: Option<f64>,
115
116    /// Items schema (for array type).
117    #[serde(skip_serializing_if = "Option::is_none")]
118    pub items: Option<Box<PropertySchema>>,
119
120    /// Schema alternatives — used when a parameter accepts shapes
121    /// that can't be unified under one `type` (e.g. a Jira
122    /// customfield that's a select on Project A and free text on
123    /// Project B). Mutually exclusive with `schema_type` per JSON
124    /// Schema's `anyOf` semantics — when set, [`Self::schema_type`]
125    /// is empty and the serializer skips it.
126    #[serde(rename = "anyOf", default, skip_serializing_if = "Option::is_none")]
127    pub any_of: Option<Vec<PropertySchema>>,
128
129    /// Marker that this field was added/modified by an enricher.
130    #[serde(rename = "x-enriched", skip_serializing_if = "Option::is_none")]
131    pub enriched: Option<bool>,
132}
133
134impl PropertySchema {
135    /// Create a string property.
136    pub fn string(description: &str) -> Self {
137        Self {
138            schema_type: "string".into(),
139            description: Some(description.into()),
140            ..Default::default()
141        }
142    }
143
144    /// Create a string property with enum values.
145    pub fn string_enum(values: &[&str], description: &str) -> Self {
146        Self {
147            schema_type: "string".into(),
148            description: Some(description.into()),
149            enum_values: Some(values.iter().map(|s| s.to_string()).collect()),
150            enriched: Some(true),
151            ..Default::default()
152        }
153    }
154
155    /// Create a number property.
156    pub fn number(description: &str) -> Self {
157        Self {
158            schema_type: "number".into(),
159            description: Some(description.into()),
160            ..Default::default()
161        }
162    }
163
164    /// Create an integer property with optional min/max.
165    pub fn integer(description: &str, min: Option<f64>, max: Option<f64>) -> Self {
166        Self {
167            schema_type: "integer".into(),
168            description: Some(description.into()),
169            minimum: min,
170            maximum: max,
171            ..Default::default()
172        }
173    }
174
175    /// Create a boolean property.
176    pub fn boolean(description: &str) -> Self {
177        Self {
178            schema_type: "boolean".into(),
179            description: Some(description.into()),
180            ..Default::default()
181        }
182    }
183
184    /// Create an array property with items schema.
185    pub fn array(items: PropertySchema, description: &str) -> Self {
186        Self {
187            schema_type: "array".into(),
188            description: Some(description.into()),
189            items: Some(Box::new(items)),
190            ..Default::default()
191        }
192    }
193
194    /// Create a schema that accepts any of several alternatives —
195    /// JSON Schema's `anyOf`. Used when a parameter can take
196    /// shapes that don't fit under a single `type` (e.g. a custom
197    /// field with different option lists across projects). The
198    /// outer schema carries the description and `anyOf` array;
199    /// `schema_type` is left empty so the wire format is a valid
200    /// `anyOf`-only schema.
201    pub fn any_of(description: &str, schemas: Vec<PropertySchema>) -> Self {
202        Self {
203            schema_type: String::new(),
204            description: Some(description.into()),
205            any_of: Some(schemas),
206            enriched: Some(true),
207            ..Default::default()
208        }
209    }
210}
211
212impl Default for PropertySchema {
213    fn default() -> Self {
214        Self {
215            schema_type: "string".into(),
216            description: None,
217            enum_values: None,
218            default: None,
219            minimum: None,
220            maximum: None,
221            items: None,
222            any_of: None,
223            enriched: None,
224        }
225    }
226}
227
228/// Tool input schema with typed property definitions.
229///
230/// Represents a JSON Schema `{ type: "object", properties: {...}, required: [...] }`.
231/// Uses `PropertySchema` for type-safe parameter definitions.
232#[derive(Debug, Clone, Serialize, Deserialize)]
233pub struct ToolSchema {
234    /// Parameter definitions keyed by parameter name.
235    pub properties: HashMap<String, PropertySchema>,
236    /// List of required parameter names.
237    #[serde(default, skip_serializing_if = "Vec::is_empty")]
238    pub required: Vec<String>,
239}
240
241impl ToolSchema {
242    /// Create an empty schema.
243    pub fn new() -> Self {
244        Self {
245            properties: HashMap::new(),
246            required: Vec::new(),
247        }
248    }
249
250    /// Create from a JSON Schema value (for backward compatibility).
251    pub fn from_json(schema: &Value) -> Self {
252        serde_json::from_value::<ToolSchema>(schema.clone()).unwrap_or_else(|_| {
253            // Fallback: manual parsing for non-standard JSON
254            let properties = schema
255                .get("properties")
256                .and_then(|p| {
257                    serde_json::from_value::<HashMap<String, PropertySchema>>(p.clone()).ok()
258                })
259                .unwrap_or_default();
260            let required = schema
261                .get("required")
262                .and_then(|r| r.as_array())
263                .map(|arr| {
264                    arr.iter()
265                        .filter_map(|v| v.as_str().map(String::from))
266                        .collect()
267                })
268                .unwrap_or_default();
269            Self {
270                properties,
271                required,
272            }
273        })
274    }
275
276    /// Convert to a JSON Schema value.
277    pub fn to_json(&self) -> Value {
278        let mut schema = serde_json::json!({
279            "type": "object",
280            "properties": self.properties,
281        });
282        if !self.required.is_empty() {
283            schema["required"] = serde_json::json!(self.required);
284        }
285        schema
286    }
287
288    /// Add a string parameter with enum values.
289    pub fn add_enum_param(&mut self, name: &str, values: &[&str], description: &str) {
290        self.properties.insert(
291            name.into(),
292            PropertySchema::string_enum(values, description),
293        );
294    }
295
296    /// Set enum values on an existing parameter.
297    pub fn set_enum(&mut self, param: &str, values: &[String]) {
298        if let Some(prop) = self.properties.get_mut(param) {
299            prop.enum_values = Some(values.to_vec());
300            prop.enriched = Some(true);
301        }
302    }
303
304    /// Add a typed property.
305    pub fn add_property(&mut self, name: &str, prop: PropertySchema) {
306        self.properties.insert(name.into(), prop);
307    }
308
309    /// Add a parameter with a raw JSON Schema value (backward compat).
310    pub fn add_param(&mut self, name: &str, schema: Value) {
311        if let Ok(prop) = serde_json::from_value::<PropertySchema>(schema) {
312            self.properties.insert(name.into(), prop);
313        }
314    }
315
316    /// Remove parameters not supported by the current provider.
317    pub fn remove_params(&mut self, names: &[&str]) {
318        for name in names {
319            self.properties.remove(*name);
320            self.required.retain(|r| r != *name);
321        }
322    }
323
324    /// Set whether a parameter is required.
325    pub fn set_required(&mut self, param: &str, required: bool) {
326        if required {
327            if !self.required.contains(&param.to_string()) {
328                self.required.push(param.into());
329            }
330        } else {
331            self.required.retain(|r| r != param);
332        }
333    }
334
335    /// Update a parameter's description.
336    pub fn set_description(&mut self, param: &str, desc: &str) {
337        if let Some(prop) = self.properties.get_mut(param) {
338            prop.description = Some(desc.into());
339        }
340    }
341
342    /// Set a default value for a parameter.
343    pub fn set_default(&mut self, param: &str, value: Value) {
344        if let Some(prop) = self.properties.get_mut(param) {
345            prop.default = Some(value);
346        }
347    }
348}
349
350impl Default for ToolSchema {
351    fn default() -> Self {
352        Self::new()
353    }
354}
355
356/// Convert a human-readable field name to a safe `cf_` parameter name.
357///
358/// Examples:
359/// - `"Story Points"` → `"cf_story_points"`
360/// - `"Risk Level"` → `"cf_risk_level"`
361pub fn sanitize_field_name(name: &str) -> String {
362    let sanitized: String = name
363        .chars()
364        .map(|c| {
365            if c.is_ascii_alphanumeric() {
366                c.to_ascii_lowercase()
367            } else {
368                '_'
369            }
370        })
371        .collect();
372    let collapsed = sanitized
373        .split('_')
374        .filter(|s| !s.is_empty())
375        .collect::<Vec<_>>()
376        .join("_");
377    format!("cf_{collapsed}")
378}
379
380#[cfg(test)]
381mod tests {
382    use super::*;
383
384    #[test]
385    fn test_sanitize_field_name() {
386        assert_eq!(sanitize_field_name("Story Points"), "cf_story_points");
387        assert_eq!(sanitize_field_name("Risk Level"), "cf_risk_level");
388        assert_eq!(
389            sanitize_field_name("My Custom Field!"),
390            "cf_my_custom_field"
391        );
392        assert_eq!(sanitize_field_name("simple"), "cf_simple");
393        // Non-ASCII becomes underscore
394        assert_eq!(sanitize_field_name("Приоритет"), "cf_");
395    }
396
397    #[test]
398    fn test_property_schema_constructors() {
399        let s = PropertySchema::string("A description");
400        assert_eq!(s.schema_type, "string");
401        assert_eq!(s.description.as_deref(), Some("A description"));
402
403        let e = PropertySchema::string_enum(&["a", "b"], "Pick one");
404        assert_eq!(e.enum_values, Some(vec!["a".to_string(), "b".to_string()]));
405        assert_eq!(e.enriched, Some(true));
406
407        let n = PropertySchema::number("Count");
408        assert_eq!(n.schema_type, "number");
409
410        let i = PropertySchema::integer("Limit", Some(1.0), Some(100.0));
411        assert_eq!(i.minimum, Some(1.0));
412        assert_eq!(i.maximum, Some(100.0));
413
414        let b = PropertySchema::boolean("Flag");
415        assert_eq!(b.schema_type, "boolean");
416
417        let a = PropertySchema::array(PropertySchema::string("item"), "List");
418        assert_eq!(a.schema_type, "array");
419        assert!(a.items.is_some());
420    }
421
422    /// `any_of` produces a JSON Schema with no top-level `type` —
423    /// the wire shape is `{"description": ..., "anyOf": [...]}`,
424    /// which is what JSON Schema validators expect for alternatives.
425    #[test]
426    fn test_property_schema_any_of_constructor() {
427        let alt = PropertySchema::any_of(
428            "Severity (varies per project)",
429            vec![
430                PropertySchema::string_enum(&["High", "Medium", "Low"], "Project A"),
431                PropertySchema::string_enum(&["P1", "P2", "P3"], "Project B"),
432            ],
433        );
434        assert_eq!(alt.schema_type, "");
435        assert_eq!(
436            alt.description.as_deref(),
437            Some("Severity (varies per project)")
438        );
439        assert_eq!(alt.enriched, Some(true));
440        let variants = alt.any_of.as_ref().expect("anyOf set");
441        assert_eq!(variants.len(), 2);
442        assert_eq!(variants[0].enum_values.as_ref().unwrap()[0], "High");
443        assert_eq!(variants[1].enum_values.as_ref().unwrap()[0], "P1");
444    }
445
446    /// Empty `schema_type` is skipped during JSON serialisation so
447    /// the rendered schema is valid `anyOf`-only — no stray
448    /// `"type": ""` ending up on the wire. We check the parsed
449    /// outer object specifically, since inner variants legitimately
450    /// carry their own `type`.
451    #[test]
452    fn test_property_schema_any_of_serialization_omits_empty_type() {
453        let alt = PropertySchema::any_of(
454            "alt",
455            vec![PropertySchema::string("a"), PropertySchema::number("b")],
456        );
457        let value = serde_json::to_value(&alt).unwrap();
458        let obj = value.as_object().expect("object");
459        assert!(
460            !obj.contains_key("type"),
461            "outer object must not have type: {value}"
462        );
463        assert!(obj.contains_key("anyOf"), "missing anyOf: {value}");
464        // Inner variants keep their `type` — that's expected.
465        let any_of = obj["anyOf"].as_array().unwrap();
466        assert_eq!(any_of[0]["type"], "string");
467        assert_eq!(any_of[1]["type"], "number");
468    }
469
470    #[test]
471    fn test_tool_schema_add_enum_param() {
472        let mut schema = ToolSchema::new();
473        schema.add_enum_param("status", &["open", "closed"], "Issue status");
474        let prop = schema.properties.get("status").unwrap();
475        assert_eq!(prop.schema_type, "string");
476        assert_eq!(
477            prop.enum_values,
478            Some(vec!["open".to_string(), "closed".to_string()])
479        );
480        assert_eq!(prop.enriched, Some(true));
481    }
482
483    #[test]
484    fn test_tool_schema_remove_params() {
485        let mut schema = ToolSchema::from_json(&serde_json::json!({
486            "type": "object",
487            "properties": {
488                "title": { "type": "string" },
489                "priority": { "type": "string" },
490            },
491            "required": ["title", "priority"],
492        }));
493        schema.remove_params(&["priority"]);
494        assert!(!schema.properties.contains_key("priority"));
495        assert_eq!(schema.required, vec!["title"]);
496    }
497
498    #[test]
499    fn test_tool_schema_roundtrip() {
500        let mut schema = ToolSchema::new();
501        schema.add_property("title", PropertySchema::string("Title"));
502        schema.set_required("title", true);
503
504        let json = schema.to_json();
505        assert_eq!(json["properties"]["title"]["type"], "string");
506        assert_eq!(json["required"], serde_json::json!(["title"]));
507
508        let restored = ToolSchema::from_json(&json);
509        assert!(restored.properties.contains_key("title"));
510        assert_eq!(restored.required, vec!["title"]);
511    }
512
513    #[test]
514    fn test_tool_schema_set_enum() {
515        let mut schema = ToolSchema::new();
516        schema.add_property("state", PropertySchema::string("Filter by state"));
517        schema.set_enum(
518            "state",
519            &["opened".into(), "closed".into(), "merged".into()],
520        );
521        let state = schema.properties.get("state").unwrap();
522        assert_eq!(
523            state.enum_values,
524            Some(vec![
525                "opened".to_string(),
526                "closed".to_string(),
527                "merged".to_string()
528            ])
529        );
530        assert_eq!(state.enriched, Some(true));
531        // Original description preserved
532        assert_eq!(state.description.as_deref(), Some("Filter by state"));
533    }
534
535    #[test]
536    fn test_tool_schema_set_required() {
537        let mut schema = ToolSchema::new();
538        schema.required = vec!["title".into()];
539
540        schema.set_required("description", true);
541        assert_eq!(schema.required, vec!["title", "description"]);
542
543        schema.set_required("title", false);
544        assert_eq!(schema.required, vec!["description"]);
545
546        // Idempotent
547        schema.set_required("description", true);
548        assert_eq!(schema.required, vec!["description"]);
549    }
550
551    #[test]
552    fn test_tool_schema_set_default() {
553        let mut schema = ToolSchema::new();
554        schema.add_property("limit", PropertySchema::integer("Max results", None, None));
555        schema.set_default("limit", serde_json::json!(20));
556        assert_eq!(
557            schema.properties.get("limit").unwrap().default,
558            Some(serde_json::json!(20))
559        );
560    }
561
562    #[test]
563    fn test_tool_schema_add_param_from_json() {
564        let mut schema = ToolSchema::new();
565        schema.add_param(
566            "cf_risk",
567            serde_json::json!({
568                "type": "string",
569                "enum": ["Low", "Medium", "High"],
570                "description": "Risk level",
571                "x-enriched": true,
572            }),
573        );
574        let prop = schema.properties.get("cf_risk").unwrap();
575        assert_eq!(prop.schema_type, "string");
576        assert_eq!(
577            prop.enum_values,
578            Some(vec![
579                "Low".to_string(),
580                "Medium".to_string(),
581                "High".to_string()
582            ])
583        );
584    }
585
586    #[test]
587    fn test_from_json_backward_compat() {
588        let json = serde_json::json!({
589            "type": "object",
590            "properties": {
591                "state": {
592                    "type": "string",
593                    "enum": ["open", "closed"],
594                    "description": "Issue state"
595                },
596                "limit": {
597                    "type": "integer",
598                    "minimum": 1,
599                    "maximum": 100
600                }
601            },
602            "required": ["state"]
603        });
604
605        let schema = ToolSchema::from_json(&json);
606        assert_eq!(schema.properties.len(), 2);
607        assert_eq!(schema.required, vec!["state"]);
608
609        let state = schema.properties.get("state").unwrap();
610        assert_eq!(state.schema_type, "string");
611        assert_eq!(
612            state.enum_values,
613            Some(vec!["open".to_string(), "closed".to_string()])
614        );
615
616        let limit = schema.properties.get("limit").unwrap();
617        assert_eq!(limit.schema_type, "integer");
618        assert_eq!(limit.minimum, Some(1.0));
619        assert_eq!(limit.maximum, Some(100.0));
620    }
621}