Skip to main content

zeph_skills/
proactive.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Proactive world-knowledge exploration (#3320).
5//!
6//! [`ProactiveExplorer`] classifies incoming queries against a keyword map of recognisable
7//! technology domains and, for domains with no existing SKILL.md, generates one. The skill
8//! is written to disk and registered in the [`SkillRegistry`] immediately, but becomes
9//! **visible to [`crate::matcher::SkillMatcher`]** only on the next turn — this is an intentional MVP
10//! trade-off that avoids an expensive synchronous re-embed on the hot path.
11//!
12//! # Domain keyword map (MVP)
13//!
14//! The classifier uses a static keyword → domain table. Any query word that is an exact
15//! lowercase match for an entry in the table produces a [`DomainLabel`]. Only the first
16//! matching domain is returned; no disambiguation is attempted.
17//!
18//! # Evaluator gate
19//!
20//! When constructed with an `Option<Arc<SkillEvaluator>>`, each generated skill is scored
21//! before being written to disk. On evaluator rejection the method returns `Ok(())` with
22//! a `tracing::info!` log — rejection is a normal outcome, not a fault.
23
24use std::path::PathBuf;
25use std::sync::Arc;
26
27use crate::error::SkillError;
28use crate::evaluator::{SkillEvaluationRequest, SkillEvaluator, SkillVerdict};
29use crate::generator::{SkillGenerationRequest, SkillGenerator};
30use crate::registry::SkillRegistry;
31
32/// Keyword → domain mapping used by [`ProactiveExplorer::classify`].
33///
34/// Each entry is `(keyword, domain_slug)`. Keyword matching is case-insensitive
35/// and performed on whitespace-separated tokens in the query.
36static DOMAIN_KEYWORDS: &[(&str, &str)] = &[
37    ("rust", "rust"),
38    ("python", "python"),
39    ("docker", "docker"),
40    ("git", "git"),
41    ("sql", "sql"),
42    ("http", "http"),
43    ("kubernetes", "kubernetes"),
44    ("k8s", "kubernetes"),
45    ("typescript", "typescript"),
46    ("go", "go"),
47    ("golang", "go"),
48    ("terraform", "terraform"),
49    ("react", "react"),
50    ("postgres", "postgres"),
51    ("postgresql", "postgres"),
52    ("bash", "bash"),
53    ("shell", "bash"),
54    ("yaml", "yaml"),
55    ("json", "json"),
56    ("toml", "toml"),
57    ("grpc", "grpc"),
58    ("redis", "redis"),
59    ("kafka", "kafka"),
60    ("aws", "aws"),
61    ("gcp", "gcp"),
62    ("azure", "azure"),
63];
64
65/// A canonical domain identifier produced by [`ProactiveExplorer::classify`].
66///
67/// Wraps a lowercase slug like `"rust"` or `"kubernetes"`.
68#[derive(Debug, Clone, PartialEq, Eq)]
69pub struct DomainLabel(pub String);
70
71impl DomainLabel {
72    /// Return the canonical skill name for this domain: `"world-knowledge-{slug}"`.
73    ///
74    /// # Examples
75    ///
76    /// ```
77    /// use zeph_skills::proactive::DomainLabel;
78    /// let d = DomainLabel("rust".into());
79    /// assert_eq!(d.to_skill_name(), "world-knowledge-rust");
80    /// ```
81    #[must_use]
82    pub fn to_skill_name(&self) -> String {
83        format!("world-knowledge-{}", self.0)
84    }
85}
86
87/// Classifies queries and generates world-knowledge SKILL.md files on demand.
88///
89/// Constructed by the agent builder when
90/// `config.skills.proactive_exploration.enabled = true`. Attach an evaluator via the
91/// constructor to apply the quality gate (Feature B, #3319) to generated skills.
92///
93/// # Examples
94///
95/// ```rust,no_run
96/// use std::path::PathBuf;
97/// use std::sync::Arc;
98/// use zeph_skills::proactive::ProactiveExplorer;
99/// use zeph_skills::generator::SkillGenerator;
100///
101/// # async fn demo(provider: zeph_llm::any::AnyProvider, registry: &zeph_skills::registry::SkillRegistry) {
102/// let generator = SkillGenerator::new(provider, PathBuf::from("/tmp/skills"));
103/// let explorer = ProactiveExplorer::new(generator, None, PathBuf::from("/tmp/skills"), 8_000, 30_000, vec![]);
104/// if let Some(domain) = explorer.classify("how do I use docker volumes?") {
105///     if !explorer.has_knowledge(registry, &domain) {
106///         explorer.explore(&domain).await.ok();
107///     }
108/// }
109/// # }
110/// ```
111pub struct ProactiveExplorer {
112    generator: SkillGenerator,
113    evaluator: Option<Arc<SkillEvaluator>>,
114    output_dir: PathBuf,
115    max_chars: usize,
116    timeout_ms: u64,
117    excluded_domains: Vec<String>,
118}
119
120impl ProactiveExplorer {
121    /// Create a new explorer.
122    ///
123    /// - `generator`: drives SKILL.md generation.
124    /// - `evaluator`: optional quality gate (Feature B).
125    /// - `output_dir`: where generated skills are written.
126    /// - `max_chars`: approximate target size hint passed in the generation prompt.
127    /// - `timeout_ms`: per-exploration timeout covering the full generate → write path.
128    /// - `excluded_domains`: domain slugs to skip (e.g. `["rust"]`).
129    #[must_use]
130    pub fn new(
131        generator: SkillGenerator,
132        evaluator: Option<Arc<SkillEvaluator>>,
133        output_dir: PathBuf,
134        max_chars: usize,
135        timeout_ms: u64,
136        excluded_domains: Vec<String>,
137    ) -> Self {
138        Self {
139            generator,
140            evaluator,
141            output_dir,
142            max_chars,
143            timeout_ms,
144            excluded_domains,
145        }
146    }
147
148    /// Expose the configured timeout so callers can set `tokio::time::timeout` correctly.
149    #[must_use]
150    pub fn timeout_ms(&self) -> u64 {
151        self.timeout_ms
152    }
153
154    /// Classify `query` against the keyword map.
155    ///
156    /// Returns `None` when no keyword in the query matches a known domain.
157    /// Returns the first matching [`DomainLabel`] otherwise.
158    #[tracing::instrument(name = "core.proactive.classify", skip_all)]
159    pub fn classify(&self, query: &str) -> Option<DomainLabel> {
160        let lower = query.to_lowercase();
161        for token in lower.split_whitespace() {
162            // Strip trailing punctuation from tokens.
163            let token = token.trim_end_matches(|c: char| !c.is_alphanumeric());
164            for &(keyword, domain) in DOMAIN_KEYWORDS {
165                if token == keyword {
166                    return Some(DomainLabel(domain.to_string()));
167                }
168            }
169        }
170        None
171    }
172
173    /// Return `true` if the registry already contains a skill for `domain`.
174    #[must_use]
175    pub fn has_knowledge(&self, registry: &SkillRegistry, domain: &DomainLabel) -> bool {
176        let name = domain.to_skill_name();
177        registry.all_meta().iter().any(|m| m.name == name)
178    }
179
180    /// Return `true` if `domain` is in the configured exclusion list.
181    #[must_use]
182    pub fn is_excluded(&self, domain: &DomainLabel) -> bool {
183        self.excluded_domains.iter().any(|e| e == &domain.0)
184    }
185
186    /// Generate and persist a SKILL.md for `domain`.
187    ///
188    /// Applies the evaluator gate when configured. On evaluator rejection returns
189    /// `Ok(())` with an info-level log — rejection is not an error.
190    ///
191    /// # Errors
192    ///
193    /// Returns [`SkillError`] if SKILL.md generation or the filesystem write fails.
194    #[tracing::instrument(name = "core.proactive.explore", skip_all, fields(domain = %domain.0))]
195    pub async fn explore(&self, domain: &DomainLabel) -> Result<(), SkillError> {
196        let description = format!(
197            "World-knowledge reference skill for {domain}. \
198             Provide concise, authoritative quick-reference information about {domain}: \
199             key commands, idioms, and best practices. Keep the body under {max_chars} characters.",
200            domain = domain.0,
201            max_chars = self.max_chars,
202        );
203
204        let req = SkillGenerationRequest {
205            description: description.clone(),
206            category: Some("dev".into()),
207            allowed_tools: vec![],
208        };
209
210        let skill = self.generator.generate(req).await?;
211
212        // Evaluator gate (S3 fix — see arch spec §2.3).
213        if let Some(ref evaluator) = self.evaluator {
214            let eval_req = SkillEvaluationRequest {
215                name: &skill.name,
216                description: &skill.meta.description,
217                body: &skill.content,
218                original_intent: &description,
219            };
220            match evaluator.evaluate(&eval_req).await? {
221                SkillVerdict::Accept(_) | SkillVerdict::AcceptOnEvalError(_) => {}
222                SkillVerdict::Reject { score: _, reason } => {
223                    tracing::info!(
224                        domain = %domain.0,
225                        %reason,
226                        "proactive skill rejected by evaluator — skipping write"
227                    );
228                    return Ok(());
229                }
230            }
231        }
232
233        // Write SKILL.md to disk. Skip if already exists (idempotent).
234        let skill_dir = self.output_dir.join(&skill.name);
235        if skill_dir.exists() {
236            tracing::debug!(
237                domain = %domain.0,
238                skill = %skill.name,
239                "proactive skill already exists, skipping"
240            );
241            return Ok(());
242        }
243        tokio::fs::create_dir_all(&skill_dir).await?;
244        let skill_path = skill_dir.join("SKILL.md");
245        tokio::fs::write(&skill_path, &skill.content).await?;
246        tracing::info!(
247            domain = %domain.0,
248            skill = %skill.name,
249            path = %skill_path.display(),
250            "proactive skill written to disk"
251        );
252        Ok(())
253    }
254}
255
256#[cfg(test)]
257mod tests {
258    use super::*;
259
260    #[test]
261    fn classify_rust_query() {
262        let generator = SkillGenerator::new(
263            zeph_llm::any::AnyProvider::Mock(zeph_llm::mock::MockProvider::default()),
264            PathBuf::from("/tmp"),
265        );
266        let explorer = ProactiveExplorer::new(
267            generator,
268            None,
269            PathBuf::from("/tmp"),
270            8_000,
271            30_000,
272            vec![],
273        );
274
275        let label = explorer.classify("how do I use rust async");
276        assert_eq!(label, Some(DomainLabel("rust".into())));
277    }
278
279    #[test]
280    fn classify_returns_none_for_unknown_domain() {
281        let generator = SkillGenerator::new(
282            zeph_llm::any::AnyProvider::Mock(zeph_llm::mock::MockProvider::default()),
283            PathBuf::from("/tmp"),
284        );
285        let explorer = ProactiveExplorer::new(
286            generator,
287            None,
288            PathBuf::from("/tmp"),
289            8_000,
290            30_000,
291            vec![],
292        );
293
294        assert_eq!(explorer.classify("how are you today"), None);
295    }
296
297    #[test]
298    fn classify_docker_with_punctuation() {
299        let generator = SkillGenerator::new(
300            zeph_llm::any::AnyProvider::Mock(zeph_llm::mock::MockProvider::default()),
301            PathBuf::from("/tmp"),
302        );
303        let explorer = ProactiveExplorer::new(
304            generator,
305            None,
306            PathBuf::from("/tmp"),
307            8_000,
308            30_000,
309            vec![],
310        );
311
312        // Token "docker," with trailing comma — should still match.
313        let label = explorer.classify("docker, how do I mount volumes?");
314        assert_eq!(label, Some(DomainLabel("docker".into())));
315    }
316
317    #[test]
318    fn is_excluded_matches_configured_domains() {
319        let generator = SkillGenerator::new(
320            zeph_llm::any::AnyProvider::Mock(zeph_llm::mock::MockProvider::default()),
321            PathBuf::from("/tmp"),
322        );
323        let explorer = ProactiveExplorer::new(
324            generator,
325            None,
326            PathBuf::from("/tmp"),
327            8_000,
328            30_000,
329            vec!["rust".into(), "go".into()],
330        );
331
332        assert!(explorer.is_excluded(&DomainLabel("rust".into())));
333        assert!(explorer.is_excluded(&DomainLabel("go".into())));
334        assert!(!explorer.is_excluded(&DomainLabel("python".into())));
335    }
336
337    #[test]
338    fn domain_label_to_skill_name() {
339        assert_eq!(
340            DomainLabel("rust".into()).to_skill_name(),
341            "world-knowledge-rust"
342        );
343        assert_eq!(
344            DomainLabel("kubernetes".into()).to_skill_name(),
345            "world-knowledge-kubernetes"
346        );
347    }
348
349    #[test]
350    fn has_knowledge_empty_registry() {
351        let registry = SkillRegistry::load(&[] as &[std::path::PathBuf]);
352        let generator = SkillGenerator::new(
353            zeph_llm::any::AnyProvider::Mock(zeph_llm::mock::MockProvider::default()),
354            PathBuf::from("/tmp"),
355        );
356        let explorer = ProactiveExplorer::new(
357            generator,
358            None,
359            PathBuf::from("/tmp"),
360            8_000,
361            30_000,
362            vec![],
363        );
364
365        assert!(!explorer.has_knowledge(&registry, &DomainLabel("rust".into())));
366    }
367}