apcore_toolkit/
ai_enhancer.rs

1// AI-driven metadata enhancement using local SLMs.
2//
3// Uses an OpenAI-compatible local API (e.g., Ollama, vLLM, LM Studio) to fill
4// metadata gaps that static analysis cannot resolve.
5//
6// All AI-generated fields are tagged with `x-generated-by: slm` in the module's
7// metadata for auditability.
8
9use std::env;
10use std::time::Duration;
11
12use serde_json::{json, Value};
13use thiserror::Error;
14use tracing::warn;
15
16use apcore::module::ModuleAnnotations;
17
18use crate::types::ScannedModule;
19
20const DEFAULT_ENDPOINT: &str = "http://localhost:11434/v1";
21const DEFAULT_MODEL: &str = "qwen:0.6b";
22const DEFAULT_THRESHOLD: f64 = 0.7;
23const DEFAULT_BATCH_SIZE: usize = 5;
24const DEFAULT_TIMEOUT: u64 = 30;
25
26/// Derive the list of annotation field names the SLM may assign confidence
27/// scores for by serializing a default `ModuleAnnotations` and inspecting
28/// its object keys.
29///
30/// This mirrors the dynamic-template approach used by the Python SDK
31/// (`dataclasses.fields(ModuleAnnotations)`) and the TypeScript SDK
32/// (`Object.entries(DEFAULT_ANNOTATIONS)`). Using runtime reflection keeps
33/// the SLM prompt template automatically in sync when upstream
34/// `apcore::module::ModuleAnnotations` gains or loses fields, eliminating
35/// the drift risk of a hardcoded list.
36///
37/// The `extra` open-extension map is excluded (matches TS behavior at
38/// `apcore-toolkit-typescript/src/ai-enhancer.ts`).
39fn annotation_field_names() -> Vec<String> {
40    match serde_json::to_value(ModuleAnnotations::default()) {
41        Ok(Value::Object(map)) => map
42            .into_iter()
43            .map(|(k, _)| k)
44            .filter(|k| k != "extra")
45            .collect(),
46        _ => Vec::new(),
47    }
48}
49
50/// Errors returned by [`AIEnhancer`] operations.
51#[derive(Debug, Error)]
52pub enum AIEnhancerError {
53    /// Invalid configuration value.
54    #[error("invalid config: {0}")]
55    Config(String),
56    /// Failed to reach the SLM endpoint.
57    #[error("connection failed: {0}")]
58    Connection(String),
59    /// SLM returned an unparseable response.
60    #[error("bad response: {0}")]
61    Response(String),
62    /// Failed to build the HTTP agent.
63    #[error("agent build failed: {0}")]
64    AgentBuild(String),
65}
66
67/// Protocol for pluggable metadata enhancement.
68///
69/// # Blocking / async compatibility
70///
71/// `enhance` is a synchronous method. The bundled [`AIEnhancer`] performs
72/// blocking HTTP requests via `ureq`, so each call may park the current
73/// thread for up to `APCORE_AI_TIMEOUT` seconds (default 30) per module.
74/// **Do not call `enhance` directly from an async task** on a Tokio (or
75/// other async) runtime — it will block a runtime worker thread and can
76/// stall the scheduler under concurrent load.
77///
78/// From an async context, wrap the call in
79/// [`tokio::task::spawn_blocking`]:
80///
81/// ```ignore
82/// let enhanced = tokio::task::spawn_blocking(move || enhancer.enhance(modules)).await?;
83/// ```
84///
85/// Enhancement is a one-shot scanning-phase operation (not per-request),
86/// so this is typically invoked once during framework adapter bootstrap.
87pub trait Enhancer {
88    /// Enhance a list of ScannedModules by filling metadata gaps.
89    ///
90    /// Synchronous and potentially long-running. See the trait-level doc
91    /// comment for guidance on invoking from async contexts.
92    fn enhance(&self, modules: Vec<ScannedModule>) -> Vec<ScannedModule>;
93}
94
95/// Enhances ScannedModule metadata using a local SLM.
96///
97/// Configuration is read from environment variables or constructor parameters:
98/// - `APCORE_AI_ENABLED`: Enable enhancement (default: false).
99/// - `APCORE_AI_ENDPOINT`: OpenAI-compatible API URL.
100/// - `APCORE_AI_MODEL`: Model name.
101/// - `APCORE_AI_THRESHOLD`: Confidence threshold (0.0–1.0).
102/// - `APCORE_AI_BATCH_SIZE`: Modules per API call.
103/// - `APCORE_AI_TIMEOUT`: Timeout in seconds per API call.
104#[derive(Debug)]
105pub struct AIEnhancer {
106    pub endpoint: String,
107    pub model: String,
108    pub threshold: f64,
109    pub batch_size: usize,
110    pub timeout: u64,
111    // Reused across all call_llm() invocations to avoid rebuilding config per call.
112    agent: ureq::Agent,
113}
114
115impl AIEnhancer {
116    /// Create a new AIEnhancer with optional overrides.
117    ///
118    /// Falls back to environment variables, then defaults.
119    pub fn new(
120        endpoint: Option<String>,
121        model: Option<String>,
122        threshold: Option<f64>,
123        batch_size: Option<usize>,
124        timeout: Option<u64>,
125    ) -> Result<Self, AIEnhancerError> {
126        let endpoint = endpoint.unwrap_or_else(|| {
127            env::var("APCORE_AI_ENDPOINT").unwrap_or_else(|_| DEFAULT_ENDPOINT.into())
128        });
129        validate_endpoint_scheme(&endpoint)?;
130        let model = model.unwrap_or_else(|| {
131            env::var("APCORE_AI_MODEL").unwrap_or_else(|_| DEFAULT_MODEL.into())
132        });
133        let threshold =
134            threshold.unwrap_or_else(|| parse_float_env("APCORE_AI_THRESHOLD", DEFAULT_THRESHOLD));
135        let batch_size = batch_size
136            .unwrap_or_else(|| parse_usize_env("APCORE_AI_BATCH_SIZE", DEFAULT_BATCH_SIZE));
137        let timeout =
138            timeout.unwrap_or_else(|| parse_u64_env("APCORE_AI_TIMEOUT", DEFAULT_TIMEOUT));
139
140        if !(0.0..=1.0).contains(&threshold) {
141            return Err(AIEnhancerError::Config(
142                "APCORE_AI_THRESHOLD must be between 0.0 and 1.0".into(),
143            ));
144        }
145        if batch_size == 0 {
146            return Err(AIEnhancerError::Config(
147                "APCORE_AI_BATCH_SIZE must be a positive integer".into(),
148            ));
149        }
150        if timeout == 0 {
151            return Err(AIEnhancerError::Config(
152                "APCORE_AI_TIMEOUT must be a positive integer".into(),
153            ));
154        }
155
156        let agent = ureq::Agent::config_builder()
157            .timeout_global(Some(Duration::from_secs(timeout)))
158            .build()
159            .new_agent();
160
161        Ok(Self {
162            endpoint,
163            model,
164            threshold,
165            batch_size,
166            timeout,
167            agent,
168        })
169    }
170
171    /// Check whether AI enhancement is enabled via environment.
172    pub fn is_enabled() -> bool {
173        env::var("APCORE_AI_ENABLED")
174            .map(|v| matches!(v.to_lowercase().as_str(), "true" | "1" | "yes"))
175            .unwrap_or(false)
176    }
177
178    /// Identify which metadata fields are missing or at defaults.
179    fn identify_gaps(&self, module: &ScannedModule) -> Vec<String> {
180        let mut gaps: Vec<String> = Vec::new();
181
182        if module.description.is_empty() || module.description == module.module_id {
183            gaps.push("description".into());
184        }
185        if module.documentation.is_none() {
186            gaps.push("documentation".into());
187        }
188        if module.annotations.is_none()
189            || module
190                .annotations
191                .as_ref()
192                .is_some_and(is_default_annotations)
193        {
194            gaps.push("annotations".into());
195        }
196        if module
197            .input_schema
198            .get("properties")
199            .and_then(|p| p.as_object())
200            .map(|o| o.is_empty())
201            .unwrap_or(true)
202        {
203            gaps.push("input_schema".into());
204        }
205
206        gaps
207    }
208
209    /// Build a structured prompt for the SLM.
210    fn build_prompt(&self, module: &ScannedModule, gaps: &[String]) -> String {
211        let mut parts = vec![
212            "You are analyzing a function to generate metadata for an AI-perceivable module system.".into(),
213            String::new(),
214            format!("Module ID: {}", module.module_id),
215            format!("Target: {}", module.target),
216        ];
217
218        if !module.description.is_empty() {
219            parts.push(format!("Current description: {}", module.description));
220        }
221
222        parts.push(String::new());
223        parts.push("Please provide the following missing metadata as JSON:".into());
224        parts.push("{".into());
225
226        for gap in gaps {
227            match gap.as_str() {
228                "description" => {
229                    parts.push(
230                        r#"  "description": "<≤200 chars, what this function does>","#.into(),
231                    );
232                }
233                "documentation" => {
234                    parts.push(r#"  "documentation": "<detailed Markdown explanation>","#.into());
235                }
236                "annotations" => {
237                    parts.push(r#"  "annotations": {"#.into());
238                    parts.push(r#"    "readonly": <true if no side effects>,"#.into());
239                    parts.push(r#"    "destructive": <true if deletes/overwrites data>,"#.into());
240                    parts.push(r#"    "idempotent": <true if safe to retry>,"#.into());
241                    parts.push(r#"    "requires_approval": <true if dangerous operation>,"#.into());
242                    parts.push(r#"    "open_world": <true if calls external systems>,"#.into());
243                    parts
244                        .push(r#"    "streaming": <true if yields results incrementally>,"#.into());
245                    parts.push(r#"    "cacheable": <true if results can be cached>,"#.into());
246                    parts.push(r#"    "cache_ttl": <seconds, 0 for no expiry>,"#.into());
247                    parts.push(r#"    "cache_key_fields": <list of input field names for cache key, or null for all>,"#.into());
248                    parts.push(r#"    "paginated": <true if supports pagination>,"#.into());
249                    parts
250                        .push(r#"    "pagination_style": <"cursor" or "offset" or "page">"#.into());
251                    parts.push("  },".into());
252                }
253                "input_schema" => {
254                    parts.push(
255                        r#"  "input_schema": <JSON Schema object for function parameters>,"#.into(),
256                    );
257                }
258                _ => {}
259            }
260        }
261
262        let confidence_keys: serde_json::Value = annotation_field_names()
263            .into_iter()
264            .map(|field| (field, serde_json::json!(0.0)))
265            .collect::<serde_json::Map<_, _>>()
266            .into();
267        let confidence_str =
268            serde_json::to_string_pretty(&confidence_keys).unwrap_or_else(|_| "{}".into());
269        parts.push(format!(r#"  "confidence": {confidence_str}"#));
270        parts.push("}".into());
271        parts.push(String::new());
272        parts.push("Respond with ONLY valid JSON, no markdown fences or explanation.".into());
273
274        parts.join("\n")
275    }
276
277    /// Call the OpenAI-compatible API and return the response text.
278    fn call_llm(&self, prompt: &str) -> Result<String, AIEnhancerError> {
279        let url = format!("{}/chat/completions", self.endpoint.trim_end_matches('/'));
280        let payload = json!({
281            "model": self.model,
282            "messages": [{"role": "user", "content": prompt}],
283            "temperature": 0.1,
284        });
285
286        let body: Value = self
287            .agent
288            .post(&url)
289            .header("Content-Type", "application/json")
290            .send_json(&payload)
291            .map_err(|e| AIEnhancerError::Connection(format!("Failed to reach SLM at {url}: {e}")))?
292            .body_mut()
293            .read_json()
294            .map_err(|e| AIEnhancerError::Response(format!("Failed to parse SLM response: {e}")))?;
295
296        body["choices"][0]["message"]["content"]
297            .as_str()
298            .map(|s| s.to_string())
299            .ok_or_else(|| AIEnhancerError::Response("Unexpected API response structure".into()))
300    }
301
302    /// Parse the SLM response as JSON, stripping markdown fences if present.
303    fn parse_response(response: &str) -> Result<Value, AIEnhancerError> {
304        let mut text = response.trim().to_string();
305
306        // Strip markdown code fences if the response is more than one line
307        if text.starts_with("```") {
308            let lines: Vec<&str> = text.split('\n').collect();
309            if lines.len() > 1 {
310                let start = if lines[0].starts_with("```") { 1 } else { 0 };
311                let end = if lines.last().map(|l| l.trim()) == Some("```") {
312                    lines.len() - 1
313                } else {
314                    lines.len()
315                };
316                text = lines[start..end].join("\n");
317            }
318        }
319
320        serde_json::from_str(&text)
321            .map_err(|e| AIEnhancerError::Response(format!("SLM returned invalid JSON: {e}")))
322    }
323
324    /// Enhance a single module by calling the SLM.
325    fn enhance_module(
326        &self,
327        module: &ScannedModule,
328        gaps: &[String],
329    ) -> Result<ScannedModule, AIEnhancerError> {
330        let prompt = self.build_prompt(module, gaps);
331        let response = self.call_llm(&prompt)?;
332        let parsed = Self::parse_response(&response)?;
333
334        let mut result = module.clone();
335        let mut confidence: serde_json::Map<String, Value> = serde_json::Map::new();
336
337        // Apply description
338        if gaps.iter().any(|g| g == "description") {
339            if let Some(desc) = parsed.get("description").and_then(|v| v.as_str()) {
340                let conf = parsed
341                    .get("confidence")
342                    .and_then(|c| c.get("description"))
343                    .and_then(|v| v.as_f64())
344                    .unwrap_or(0.0);
345                confidence.insert("description".into(), json!(conf));
346                if conf >= self.threshold {
347                    result.description = clamp_str(desc, 500, &module.module_id, "description");
348                } else {
349                    result.warnings.push(format!(
350                        "Low confidence ({conf:.2}) for description — skipped. Review manually."
351                    ));
352                }
353            }
354        }
355
356        // Apply documentation
357        if gaps.iter().any(|g| g == "documentation") {
358            if let Some(doc) = parsed.get("documentation").and_then(|v| v.as_str()) {
359                let conf = parsed
360                    .get("confidence")
361                    .and_then(|c| c.get("documentation"))
362                    .and_then(|v| v.as_f64())
363                    .unwrap_or(0.0);
364                confidence.insert("documentation".into(), json!(conf));
365                if conf >= self.threshold {
366                    result.documentation = Some(strip_ansi(&clamp_str(
367                        doc,
368                        2000,
369                        &module.module_id,
370                        "documentation",
371                    )));
372                } else {
373                    result.warnings.push(format!(
374                        "Low confidence ({conf:.2}) for documentation — skipped. Review manually."
375                    ));
376                }
377            }
378        }
379
380        // Apply annotations if above threshold (per-field confidence)
381        if gaps.iter().any(|g| g == "annotations") {
382            if let Some(ann_data) = parsed.get("annotations").and_then(|v| v.as_object()) {
383                let ann_conf = parsed
384                    .get("confidence")
385                    .and_then(|v| v.as_object())
386                    .cloned()
387                    .unwrap_or_default();
388                let mut base = module.annotations.clone().unwrap_or_default();
389                let mut any_accepted = false;
390
391                // Iterate boolean fields supplied by the SLM directly.
392                // `set_bool_annotation` validates each field's existence
393                // on `ModuleAnnotations` via a serde round-trip, so the
394                // set of known bool fields lives in one place — the
395                // upstream struct — and new fields added upstream are
396                // picked up automatically.
397                for (field, field_val) in ann_data.iter() {
398                    let Some(bool_val) = field_val.as_bool() else {
399                        continue;
400                    };
401                    let field_conf = get_annotation_confidence(&ann_conf, field);
402                    confidence.insert(format!("annotations.{field}"), json!(field_conf));
403                    if field_conf >= self.threshold {
404                        if set_bool_annotation(&mut base, field, bool_val) {
405                            any_accepted = true;
406                        } else {
407                            result.warnings.push(format!(
408                                "SLM returned unknown bool annotation '{field}' — ignored."
409                            ));
410                        }
411                    } else {
412                        result.warnings.push(format!(
413                            "Low confidence ({field_conf:.2}) for annotations.{field} — skipped. Review manually."
414                        ));
415                    }
416                }
417
418                // Integer fields: cache_ttl
419                if let Some(val) = ann_data.get("cache_ttl").and_then(|v| v.as_u64()) {
420                    let field_conf = get_annotation_confidence(&ann_conf, "cache_ttl");
421                    confidence.insert("annotations.cache_ttl".into(), json!(field_conf));
422                    if field_conf >= self.threshold {
423                        base.cache_ttl = val;
424                        any_accepted = true;
425                    } else {
426                        result.warnings.push(format!(
427                            "Low confidence ({field_conf:.2}) for annotations.cache_ttl — skipped. Review manually."
428                        ));
429                    }
430                }
431
432                // String fields: pagination_style
433                if let Some(val) = ann_data.get("pagination_style").and_then(|v| v.as_str()) {
434                    let field_conf = get_annotation_confidence(&ann_conf, "pagination_style");
435                    confidence.insert("annotations.pagination_style".into(), json!(field_conf));
436                    if field_conf >= self.threshold {
437                        base.pagination_style = val.to_string();
438                        any_accepted = true;
439                    } else {
440                        result.warnings.push(format!(
441                            "Low confidence ({field_conf:.2}) for annotations.pagination_style — skipped. Review manually."
442                        ));
443                    }
444                }
445
446                // List fields: cache_key_fields
447                if let Some(arr) = ann_data.get("cache_key_fields").and_then(|v| v.as_array()) {
448                    let field_conf = get_annotation_confidence(&ann_conf, "cache_key_fields");
449                    confidence.insert("annotations.cache_key_fields".into(), json!(field_conf));
450                    if field_conf >= self.threshold {
451                        let keys: Vec<String> = arr
452                            .iter()
453                            .filter_map(|v| v.as_str().map(|s| s.to_string()))
454                            .collect();
455                        base.cache_key_fields = Some(keys);
456                        any_accepted = true;
457                    } else {
458                        result.warnings.push(format!(
459                            "Low confidence ({field_conf:.2}) for annotations.cache_key_fields — skipped. Review manually."
460                        ));
461                    }
462                }
463
464                if any_accepted {
465                    result.annotations = Some(base);
466                }
467            }
468        }
469
470        // Apply input_schema if above threshold
471        if gaps.iter().any(|g| g == "input_schema") {
472            if let Some(schema) = parsed.get("input_schema") {
473                let conf = parsed
474                    .get("confidence")
475                    .and_then(|c| c.get("input_schema"))
476                    .and_then(|v| v.as_f64())
477                    .unwrap_or(0.0);
478                confidence.insert("input_schema".into(), json!(conf));
479                if conf >= self.threshold {
480                    result.input_schema = schema.clone();
481                } else {
482                    result.warnings.push(format!(
483                        "Low confidence ({conf:.2}) for input_schema — skipped. Review manually."
484                    ));
485                }
486            }
487        }
488
489        // Tag AI-generated fields
490        if !confidence.is_empty() {
491            result
492                .metadata
493                .insert("x-generated-by".into(), Value::String("slm".into()));
494            result
495                .metadata
496                .insert("x-ai-confidence".into(), Value::Object(confidence));
497        }
498
499        Ok(result)
500    }
501}
502
503impl Enhancer for AIEnhancer {
504    fn enhance(&self, modules: Vec<ScannedModule>) -> Vec<ScannedModule> {
505        let mut results: Vec<ScannedModule> = Vec::with_capacity(modules.len());
506
507        let mut pending: Vec<(usize, Vec<String>)> = Vec::new();
508        for (idx, module) in modules.iter().enumerate() {
509            let gaps = self.identify_gaps(module);
510            results.push(module.clone());
511            if !gaps.is_empty() {
512                pending.push((idx, gaps));
513            }
514        }
515
516        for batch in pending.chunks(self.batch_size) {
517            for (idx, gaps) in batch {
518                match self.enhance_module(&modules[*idx], gaps) {
519                    Ok(enhanced) => results[*idx] = enhanced,
520                    Err(e) => {
521                        warn!("AI enhancement failed for {}: {e}", modules[*idx].module_id);
522                    }
523                }
524            }
525        }
526
527        results
528    }
529}
530
531/// Check whether annotations are at their default values.
532///
533/// Uses `serde_json` round-trip equality so the comparison automatically
534/// covers any new field added to `apcore::module::ModuleAnnotations` upstream
535/// (including the `extra` extension map). `ModuleAnnotations` does not
536/// implement `PartialEq`, so direct `==` is unavailable.
537fn is_default_annotations(ann: &ModuleAnnotations) -> bool {
538    match (
539        serde_json::to_value(ann),
540        serde_json::to_value(ModuleAnnotations::default()),
541    ) {
542        (Ok(a), Ok(b)) => a == b,
543        _ => false,
544    }
545}
546
547/// Get confidence for an annotation field, checking both `annotations.<field>` and `<field>` keys.
548fn get_annotation_confidence(conf: &serde_json::Map<String, Value>, field: &str) -> f64 {
549    conf.get(&format!("annotations.{field}"))
550        .or_else(|| conf.get(field))
551        .and_then(|v| v.as_f64())
552        .unwrap_or(0.0)
553}
554
555/// Set a boolean field on `ModuleAnnotations` by name via a serde
556/// round-trip. Returns `true` if the field exists on the struct and is a
557/// boolean; `false` if the field is unknown, non-boolean, or the
558/// round-trip fails. Using serde rather than a hardcoded match removes
559/// the two-list drift risk — new bool fields added to
560/// `apcore::module::ModuleAnnotations` upstream are picked up
561/// automatically.
562fn set_bool_annotation(ann: &mut ModuleAnnotations, field: &str, value: bool) -> bool {
563    let mut serialized = match serde_json::to_value(&ann) {
564        Ok(v) => v,
565        Err(e) => {
566            warn!("set_bool_annotation: serialize failed: {e}");
567            return false;
568        }
569    };
570    let Some(obj) = serialized.as_object_mut() else {
571        return false;
572    };
573    match obj.get(field) {
574        Some(Value::Bool(_)) => {
575            obj.insert(field.to_string(), Value::Bool(value));
576        }
577        // Field absent, or present but not a bool — reject rather than
578        // fabricate a new key (serde would happily accept unknown keys
579        // via `#[serde(extra)]` on ModuleAnnotations, but misclassifying
580        // a non-bool field as bool would corrupt the struct).
581        _ => return false,
582    }
583    match serde_json::from_value::<ModuleAnnotations>(serialized) {
584        Ok(new_ann) => {
585            *ann = new_ann;
586            true
587        }
588        Err(e) => {
589            warn!("set_bool_annotation: deserialize failed: {e}");
590            false
591        }
592    }
593}
594
595/// Validate that an endpoint URL uses an HTTP(S) scheme.
596///
597/// Matches the construction-time scheme validation performed by the Python
598/// and TypeScript SDKs (see `apcore-toolkit-python/src/apcore_toolkit/ai_enhancer.py`
599/// and `apcore-toolkit-typescript/src/ai-enhancer.ts`). Rejecting non-HTTP
600/// schemes (e.g. `file://`, `ftp://`) at construction prevents misleading
601/// connection errors later inside `call_llm` and removes a small but real
602/// SSRF-adjacent surface.
603fn validate_endpoint_scheme(endpoint: &str) -> Result<(), AIEnhancerError> {
604    // Manual parsing: we cannot rely on the `url` crate because it would
605    // add a transitive dependency when the optional `http-proxy` feature
606    // is disabled. The check is intentionally simple — extract the part
607    // before "://" and compare case-insensitively against the allowed set.
608    let Some(scheme_end) = endpoint.find("://") else {
609        return Err(AIEnhancerError::Config(format!(
610            "Invalid endpoint URL (missing scheme): {endpoint}"
611        )));
612    };
613    let scheme = &endpoint[..scheme_end];
614    if scheme.is_empty() {
615        return Err(AIEnhancerError::Config(format!(
616            "Invalid endpoint URL (empty scheme): {endpoint}"
617        )));
618    }
619    let scheme_lower = scheme.to_ascii_lowercase();
620    if scheme_lower != "http" && scheme_lower != "https" {
621        return Err(AIEnhancerError::Config(format!(
622            "Invalid endpoint URL scheme: {scheme}"
623        )));
624    }
625    Ok(())
626}
627
628fn parse_float_env(name: &str, default: f64) -> f64 {
629    match env::var(name) {
630        Ok(v) => v.parse().unwrap_or_else(|_| {
631            warn!(env_var = name, value = %v, "unparseable float env var — using default {default}");
632            default
633        }),
634        Err(_) => default,
635    }
636}
637
638fn parse_usize_env(name: &str, default: usize) -> usize {
639    match env::var(name) {
640        Ok(v) => v.parse().unwrap_or_else(|_| {
641            warn!(env_var = name, value = %v, "unparseable usize env var — using default {default}");
642            default
643        }),
644        Err(_) => default,
645    }
646}
647
648fn parse_u64_env(name: &str, default: u64) -> u64 {
649    match env::var(name) {
650        Ok(v) => v.parse().unwrap_or_else(|_| {
651            warn!(env_var = name, value = %v, "unparseable u64 env var — using default {default}");
652            default
653        }),
654        Err(_) => default,
655    }
656}
657
658/// Clamp an SLM-supplied string to `max_chars` bytes, warning if truncated.
659fn clamp_str(s: &str, max_chars: usize, module_id: &str, field: &str) -> String {
660    if s.len() <= max_chars {
661        return s.to_string();
662    }
663    // Truncate at a char boundary.
664    let truncated = &s[..s
665        .char_indices()
666        .take_while(|(i, _)| *i < max_chars)
667        .last()
668        .map(|(i, c)| i + c.len_utf8())
669        .unwrap_or(max_chars)];
670    tracing::warn!(
671        module_id = %module_id,
672        field = %field,
673        original_len = s.len(),
674        clamped_len = truncated.len(),
675        "SLM-supplied field truncated to prevent oversized output"
676    );
677    truncated.to_string()
678}
679
680/// Strip ANSI CSI escape sequences (ESC [ ... letter) from a string.
681fn strip_ansi(s: &str) -> String {
682    let mut out = String::with_capacity(s.len());
683    let mut chars = s.chars().peekable();
684    while let Some(c) = chars.next() {
685        if c == '\x1b' && chars.peek() == Some(&'[') {
686            chars.next(); // consume '['
687            for c2 in chars.by_ref() {
688                if c2.is_ascii_alphabetic() {
689                    break;
690                }
691            }
692        } else {
693            out.push(c);
694        }
695    }
696    out
697}
698
699#[cfg(test)]
700mod tests {
701    use super::*;
702    use apcore::module::ModuleAnnotations;
703    use serde_json::json;
704
705    #[test]
706    fn test_ai_enhancer_new_defaults() {
707        let enhancer = AIEnhancer::new(None, None, None, None, None).unwrap();
708        assert_eq!(enhancer.endpoint, DEFAULT_ENDPOINT);
709        assert_eq!(enhancer.model, DEFAULT_MODEL);
710        assert!((enhancer.threshold - DEFAULT_THRESHOLD).abs() < f64::EPSILON);
711        assert_eq!(enhancer.batch_size, DEFAULT_BATCH_SIZE);
712        assert_eq!(enhancer.timeout, DEFAULT_TIMEOUT);
713    }
714
715    #[test]
716    fn test_ai_enhancer_new_with_overrides() {
717        let enhancer = AIEnhancer::new(
718            Some("http://custom:8080".into()),
719            Some("llama3".into()),
720            Some(0.5),
721            Some(10),
722            Some(60),
723        )
724        .unwrap();
725        assert_eq!(enhancer.endpoint, "http://custom:8080");
726        assert_eq!(enhancer.model, "llama3");
727        assert!((enhancer.threshold - 0.5).abs() < f64::EPSILON);
728    }
729
730    #[test]
731    fn test_ai_enhancer_threshold_validation() {
732        let result = AIEnhancer::new(None, None, Some(1.5), None, None);
733        assert!(result.is_err());
734    }
735
736    #[test]
737    fn test_ai_enhancer_batch_size_validation() {
738        let result = AIEnhancer::new(None, None, None, Some(0), None);
739        assert!(result.is_err());
740    }
741
742    #[test]
743    fn test_identify_gaps_complete_module() {
744        let enhancer = AIEnhancer::new(None, None, None, None, None).unwrap();
745        let mut module = ScannedModule::new(
746            "test".into(),
747            "A real description".into(),
748            json!({"type": "object", "properties": {"x": {"type": "string"}}}),
749            json!({}),
750            vec![],
751            "app:func".into(),
752        );
753        module.documentation = Some("Full docs".into());
754        module.annotations = Some(ModuleAnnotations {
755            readonly: true,
756            ..Default::default()
757        });
758        let gaps = enhancer.identify_gaps(&module);
759        assert!(gaps.is_empty());
760    }
761
762    #[test]
763    fn test_identify_gaps_missing_fields() {
764        let enhancer = AIEnhancer::new(None, None, None, None, None).unwrap();
765        let module = ScannedModule::new(
766            "test".into(),
767            String::new(),
768            json!({"type": "object"}),
769            json!({}),
770            vec![],
771            "app:func".into(),
772        );
773        let gaps = enhancer.identify_gaps(&module);
774        assert!(gaps.iter().any(|g| g == "description"));
775        assert!(gaps.iter().any(|g| g == "documentation"));
776        assert!(gaps.iter().any(|g| g == "annotations"));
777        assert!(gaps.iter().any(|g| g == "input_schema"));
778    }
779
780    #[test]
781    fn test_parse_response_valid_json() {
782        let response = r#"{"description": "hello", "confidence": {"description": 0.9}}"#;
783        let result = AIEnhancer::parse_response(response).unwrap();
784        assert_eq!(result["description"], "hello");
785    }
786
787    #[test]
788    fn test_parse_response_with_fences() {
789        let response = "```json\n{\"key\": \"value\"}\n```";
790        let result = AIEnhancer::parse_response(response).unwrap();
791        assert_eq!(result["key"], "value");
792    }
793
794    #[test]
795    fn test_parse_response_invalid() {
796        let result = AIEnhancer::parse_response("not json");
797        assert!(result.is_err());
798    }
799
800    #[test]
801    fn test_is_enabled_default() {
802        // Assuming env var is not set in test environment
803        env::remove_var("APCORE_AI_ENABLED");
804        assert!(!AIEnhancer::is_enabled());
805    }
806
807    #[test]
808    fn test_build_prompt_contains_module_info() {
809        let enhancer = AIEnhancer::new(None, None, None, None, None).unwrap();
810        let module = ScannedModule::new(
811            "users.get".into(),
812            "Get user".into(),
813            json!({}),
814            json!({}),
815            vec![],
816            "app:get_user".into(),
817        );
818        let prompt = enhancer.build_prompt(&module, &["description".into()]);
819        assert!(prompt.contains("users.get"));
820        assert!(prompt.contains("app:get_user"));
821        assert!(prompt.contains("description"));
822    }
823
824    #[test]
825    fn test_identify_gaps_description_equals_module_id() {
826        let enhancer = AIEnhancer::new(None, None, None, None, None).unwrap();
827        let module = ScannedModule::new(
828            "my_module".into(),
829            "my_module".into(), // description == module_id
830            json!({"type": "object", "properties": {"x": {"type": "string"}}}),
831            json!({}),
832            vec![],
833            "app:func".into(),
834        );
835        let gaps = enhancer.identify_gaps(&module);
836        assert!(
837            gaps.iter().any(|g| g == "description"),
838            "description matching module_id should be identified as a gap"
839        );
840    }
841
842    #[test]
843    fn test_ai_enhancer_timeout_validation() {
844        let result = AIEnhancer::new(None, None, None, None, Some(0));
845        assert!(result.is_err());
846        let err = result.unwrap_err();
847        assert!(err
848            .to_string()
849            .contains("APCORE_AI_TIMEOUT must be a positive integer"));
850    }
851
852    // All is_enabled tests are combined into one to prevent env var races
853    // when tests run in parallel (env vars are process-global).
854    #[test]
855    fn test_is_enabled_variants() {
856        use std::sync::Mutex;
857        static ENV_LOCK: Mutex<()> = Mutex::new(());
858        let _guard = ENV_LOCK.lock().unwrap();
859
860        // Default (unset) → disabled
861        unsafe { env::remove_var("APCORE_AI_ENABLED") };
862        assert!(!AIEnhancer::is_enabled(), "should be disabled by default");
863
864        // "true" → enabled
865        unsafe { env::set_var("APCORE_AI_ENABLED", "true") };
866        assert!(AIEnhancer::is_enabled(), "\"true\" should enable");
867
868        // "yes" → enabled
869        unsafe { env::set_var("APCORE_AI_ENABLED", "yes") };
870        assert!(AIEnhancer::is_enabled(), "\"yes\" should enable");
871
872        // "1" → enabled
873        unsafe { env::set_var("APCORE_AI_ENABLED", "1") };
874        assert!(AIEnhancer::is_enabled(), "\"1\" should enable");
875
876        // "false" → disabled
877        unsafe { env::set_var("APCORE_AI_ENABLED", "false") };
878        assert!(!AIEnhancer::is_enabled(), "\"false\" should disable");
879
880        // Cleanup
881        unsafe { env::remove_var("APCORE_AI_ENABLED") };
882    }
883
884    #[test]
885    fn test_parse_response_strips_json_fence() {
886        let response = "```json\n{\"description\": \"hello world\"}\n```";
887        let result = AIEnhancer::parse_response(response).unwrap();
888        assert_eq!(result["description"], "hello world");
889    }
890
891    #[test]
892    fn test_build_prompt_requests_annotations() {
893        let enhancer = AIEnhancer::new(None, None, None, None, None).unwrap();
894        let module = ScannedModule::new(
895            "test".into(),
896            "desc".into(),
897            json!({}),
898            json!({}),
899            vec![],
900            "app:func".into(),
901        );
902        let prompt = enhancer.build_prompt(&module, &["annotations".into()]);
903        assert!(
904            prompt.contains("readonly"),
905            "prompt should mention annotations fields"
906        );
907        assert!(prompt.contains("destructive"));
908        assert!(prompt.contains("idempotent"));
909    }
910
911    #[test]
912    fn test_build_prompt_requests_input_schema() {
913        let enhancer = AIEnhancer::new(None, None, None, None, None).unwrap();
914        let module = ScannedModule::new(
915            "test".into(),
916            "desc".into(),
917            json!({}),
918            json!({}),
919            vec![],
920            "app:func".into(),
921        );
922        let prompt = enhancer.build_prompt(&module, &["input_schema".into()]);
923        assert!(
924            prompt.contains("input_schema"),
925            "prompt should mention input_schema"
926        );
927        assert!(prompt.contains("JSON Schema"));
928    }
929
930    #[test]
931    fn test_build_prompt_requests_documentation() {
932        let enhancer = AIEnhancer::new(None, None, None, None, None).unwrap();
933        let module = ScannedModule::new(
934            "test".into(),
935            "desc".into(),
936            json!({}),
937            json!({}),
938            vec![],
939            "app:func".into(),
940        );
941        let prompt = enhancer.build_prompt(&module, &["documentation".into()]);
942        assert!(
943            prompt.contains("documentation"),
944            "prompt should mention documentation"
945        );
946        assert!(prompt.contains("Markdown"));
947    }
948
949    #[test]
950    fn test_parse_response_single_line_fence_does_not_panic() {
951        // A single-line ``` response used to panic with lines[1..0].
952        let response = "```";
953        let result = AIEnhancer::parse_response(response);
954        assert!(result.is_err(), "single-line fence is not valid JSON");
955    }
956
957    #[test]
958    fn test_parse_response_backtick_only_line_treated_as_json() {
959        // Regression: must not panic, must return an error gracefully.
960        let response = "```\n```";
961        let result = AIEnhancer::parse_response(response);
962        // Empty string after stripping is invalid JSON.
963        assert!(result.is_err());
964    }
965
966    // ---- set_bool_annotation (serde round-trip, D4-1 regression guards) ----
967
968    #[test]
969    fn test_set_bool_annotation_readonly() {
970        let mut ann = ModuleAnnotations::default();
971        assert!(set_bool_annotation(&mut ann, "readonly", true));
972        assert!(ann.readonly);
973    }
974
975    #[test]
976    fn test_set_bool_annotation_destructive() {
977        let mut ann = ModuleAnnotations::default();
978        assert!(set_bool_annotation(&mut ann, "destructive", true));
979        assert!(ann.destructive);
980    }
981
982    #[test]
983    fn test_set_bool_annotation_unknown_field_rejected() {
984        let mut ann = ModuleAnnotations::default();
985        assert!(!set_bool_annotation(
986            &mut ann,
987            "nonexistent_field_xyz",
988            true
989        ));
990        // Annotations unchanged.
991        assert!(is_default_annotations(&ann));
992    }
993
994    #[test]
995    fn test_set_bool_annotation_non_bool_field_rejected() {
996        let mut ann = ModuleAnnotations::default();
997        // `cache_ttl` is an integer field on ModuleAnnotations.
998        // Round-trip rejects setting it to a bool.
999        assert!(!set_bool_annotation(&mut ann, "cache_ttl", true));
1000        assert_eq!(ann.cache_ttl, 0); // unchanged default
1001    }
1002
1003    #[test]
1004    fn test_set_bool_annotation_preserves_other_fields() {
1005        let mut ann = ModuleAnnotations {
1006            destructive: true,
1007            cache_ttl: 99,
1008            ..Default::default()
1009        };
1010        assert!(set_bool_annotation(&mut ann, "readonly", true));
1011        // Original fields survive the serde round-trip.
1012        assert!(ann.readonly);
1013        assert!(ann.destructive);
1014        assert_eq!(ann.cache_ttl, 99);
1015    }
1016
1017    #[test]
1018    fn test_clamp_str_under_limit() {
1019        let s = "hello";
1020        assert_eq!(clamp_str(s, 500, "mod", "desc"), "hello");
1021    }
1022
1023    #[test]
1024    fn test_clamp_str_over_limit_truncates() {
1025        let s = "a".repeat(600);
1026        let result = clamp_str(&s, 500, "mod", "desc");
1027        assert_eq!(result.len(), 500);
1028    }
1029
1030    #[test]
1031    fn test_clamp_str_unicode_boundary() {
1032        // "é" is 2 bytes — ensure we don't split it
1033        let s = "é".repeat(300); // 600 bytes
1034        let result = clamp_str(&s, 500, "mod", "desc");
1035        assert!(result.len() <= 500);
1036        assert!(std::str::from_utf8(result.as_bytes()).is_ok());
1037    }
1038
1039    #[test]
1040    fn test_strip_ansi_no_sequences() {
1041        assert_eq!(strip_ansi("hello world"), "hello world");
1042    }
1043
1044    #[test]
1045    fn test_strip_ansi_removes_color_codes() {
1046        let input = "\x1b[31mred text\x1b[0m";
1047        assert_eq!(strip_ansi(input), "red text");
1048    }
1049
1050    #[test]
1051    fn test_strip_ansi_mixed_content() {
1052        let input = "normal \x1b[1mbold\x1b[0m text";
1053        assert_eq!(strip_ansi(input), "normal bold text");
1054    }
1055
1056    // ---- Endpoint scheme validation (A-D-003 parity with Python/TS) ----
1057
1058    #[test]
1059    fn test_ai_enhancer_rejects_file_scheme() {
1060        let result = AIEnhancer::new(Some("file:///etc/passwd".into()), None, None, None, None);
1061        assert!(result.is_err(), "file:// scheme must be rejected");
1062        let err = result.unwrap_err().to_string();
1063        assert!(
1064            err.contains("Invalid endpoint URL scheme"),
1065            "error should call out invalid scheme, got: {err}"
1066        );
1067    }
1068
1069    #[test]
1070    fn test_ai_enhancer_rejects_ftp_scheme() {
1071        let result = AIEnhancer::new(Some("ftp://example.com".into()), None, None, None, None);
1072        assert!(result.is_err(), "ftp:// scheme must be rejected");
1073    }
1074
1075    #[test]
1076    fn test_ai_enhancer_rejects_missing_scheme() {
1077        let result = AIEnhancer::new(Some("localhost:11434".into()), None, None, None, None);
1078        assert!(result.is_err(), "URL without scheme must be rejected");
1079    }
1080
1081    #[test]
1082    fn test_ai_enhancer_accepts_http_scheme() {
1083        let result = AIEnhancer::new(
1084            Some("http://localhost:11434/v1".into()),
1085            None,
1086            None,
1087            None,
1088            None,
1089        );
1090        assert!(result.is_ok(), "http:// must be accepted");
1091    }
1092
1093    #[test]
1094    fn test_ai_enhancer_accepts_https_scheme() {
1095        let result = AIEnhancer::new(
1096            Some("https://api.example.com/v1".into()),
1097            None,
1098            None,
1099            None,
1100            None,
1101        );
1102        assert!(result.is_ok(), "https:// must be accepted");
1103    }
1104
1105    // ---- Dynamic annotation field discovery (A-D-002 parity with Python/TS) ----
1106
1107    #[test]
1108    fn test_annotation_field_names_match_struct() {
1109        let names = annotation_field_names();
1110        // Must include real ModuleAnnotations fields.
1111        assert!(names.iter().any(|n| n == "readonly"));
1112        assert!(names.iter().any(|n| n == "destructive"));
1113        assert!(names.iter().any(|n| n == "idempotent"));
1114        assert!(names.iter().any(|n| n == "cacheable"));
1115        assert!(names.iter().any(|n| n == "cache_ttl"));
1116        assert!(names.iter().any(|n| n == "paginated"));
1117        // Must NOT include phantom fields from the stale const list.
1118        assert!(!names.iter().any(|n| n == "tags"));
1119        assert!(!names.iter().any(|n| n == "version"));
1120        assert!(!names.iter().any(|n| n == "category"));
1121        assert!(!names.iter().any(|n| n == "requires_confirmation"));
1122        assert!(!names.iter().any(|n| n == "long_running"));
1123        // Must NOT include the open-extension map key.
1124        assert!(!names.iter().any(|n| n == "extra"));
1125        // No duplicates.
1126        let mut sorted = names.clone();
1127        sorted.sort();
1128        sorted.dedup();
1129        assert_eq!(sorted.len(), names.len(), "field names must be unique");
1130    }
1131}
apcore_toolkit/ai_enhancer.rs

apcore_toolkit/
ai_enhancer.rs