Skip to main content

matrixcode_core/prompt/
preprocess.rs

1//! Pre-processing Hook for Skills/Workflows Trigger Detection
2//!
3//! This module implements the **backend-side** trigger detection that was
4//! previously described in the prompt. By moving this logic to code:
5//! - Eliminates ambiguity in pattern matching
6//! - Provides deterministic behavior
7//! - Reduces prompt token cost (~100 lines removed from prompt)
8//! - Enables easier testing and debugging
9//!
10//! # Dynamic Trigger Loading
11//!
12//! Triggers are now loaded dynamically from skill files' `trigger` field,
13//! instead of being hardcoded. This allows skills to define their own
14//! trigger patterns without modifying this code.
15//!
16//! # Auto-loading Skills
17//!
18//! When a skill is triggered, the system can optionally auto-load the
19//! skill content, saving an extra round-trip.
20
21use regex::Regex;
22use std::collections::HashMap;
23use std::sync::Arc;
24
25use crate::skills::Skill;
26
27/// Trigger type detection result
28#[derive(Debug, Clone, PartialEq)]
29pub enum ProcessResult {
30    /// A skill was triggered
31    SkillTriggered {
32        skill_id: String,
33        confidence: f32,
34        /// Auto-loaded skill body (if available)
35        skill_body: Option<String>,
36    },
37    /// A workflow was triggered
38    WorkflowTriggered {
39        workflow_id: String,
40        inputs: HashMap<String, String>,
41    },
42    /// Continue normal processing
43    Continue,
44}
45
46/// Type of trigger detected
47#[derive(Debug, Clone, Copy, PartialEq, Eq)]
48pub enum TriggerType {
49    Skill,
50    Workflow,
51    SkillKeyword,
52    WorkflowKeyword,
53}
54
55/// Skill trigger pattern
56#[derive(Debug, Clone)]
57pub struct SkillPattern {
58    /// Skill identifier (e.g., "code-review", "refactor")
59    pub skill_id: String,
60    /// Primary trigger patterns (regex or keyword)
61    pub patterns: Vec<String>,
62    /// Compiled regex patterns
63    pub compiled: Vec<Regex>,
64    /// Confidence weight (0.0 - 1.0)
65    pub weight: f32,
66    /// Reference to the skill (for auto-loading)
67    pub skill: Option<Skill>,
68}
69
70impl SkillPattern {
71    pub fn new(skill_id: impl Into<String>, patterns: Vec<&str>, weight: f32) -> Self {
72        let patterns: Vec<String> = patterns.into_iter().map(|s| s.to_string()).collect();
73        let compiled = patterns
74            .iter()
75            .filter_map(|p| Regex::new(&format!("(?i){}", p)).ok())
76            .collect();
77
78        Self {
79            skill_id: skill_id.into(),
80            patterns,
81            compiled,
82            weight,
83            skill: None,
84        }
85    }
86
87    /// Create from a Skill with trigger field
88    pub fn from_skill(skill: &Skill) -> Option<Self> {
89        let trigger = skill.trigger.as_ref()?;
90
91        // Parse trigger field: comma-separated patterns
92        // Format: "/review, 审查代码, review"
93        let patterns: Vec<&str> = trigger
94            .split(',')
95            .map(|s| s.trim())
96            .filter(|s| !s.is_empty())
97            .collect();
98
99        if patterns.is_empty() {
100            return None;
101        }
102
103        let compiled = patterns
104            .iter()
105            .filter_map(|p| {
106                // If pattern starts with '/', treat as slash command
107                // Otherwise, treat as keyword match
108                let regex_pattern = if p.starts_with('/') {
109                    // Exact match for slash commands
110                    format!("^{}(?:\\s|$)", p)
111                } else {
112                    // Contains match for keywords
113                    format!("(?i){}", p)
114                };
115                Regex::new(&regex_pattern).ok()
116            })
117            .collect();
118
119        Some(Self {
120            skill_id: skill.name.clone(),
121            patterns: patterns.iter().map(|s| s.to_string()).collect(),
122            compiled,
123            weight: 0.9, // High confidence for skill-defined triggers
124            skill: Some(skill.clone()),
125        })
126    }
127
128    /// Check if user message matches this skill
129    pub fn matches(&self, message: &str) -> Option<f32> {
130        for regex in &self.compiled {
131            if regex.is_match(message) {
132                return Some(self.weight);
133            }
134        }
135        None
136    }
137
138    /// Get skill body if available
139    pub fn get_skill_body(&self) -> Option<&str> {
140        self.skill.as_ref().map(|s| s.body.as_str())
141    }
142}
143
144/// Workflow trigger configuration
145#[derive(Debug, Clone)]
146pub struct WorkflowTrigger {
147    /// Workflow identifier
148    pub workflow_id: String,
149    /// Trigger keywords
150    pub keywords: Vec<String>,
151    /// Required inputs that can be extracted from message
152    pub extractable_inputs: Vec<String>,
153}
154
155impl WorkflowTrigger {
156    pub fn new(workflow_id: impl Into<String>, keywords: Vec<&str>, inputs: Vec<&str>) -> Self {
157        Self {
158            workflow_id: workflow_id.into(),
159            keywords: keywords.into_iter().map(|s| s.to_string()).collect(),
160            extractable_inputs: inputs.into_iter().map(|s| s.to_string()).collect(),
161        }
162    }
163
164    /// Check if message triggers this workflow
165    pub fn matches(&self, message: &str) -> bool {
166        let msg_lower = message.to_lowercase();
167        self.keywords
168            .iter()
169            .any(|k| msg_lower.contains(&k.to_lowercase()))
170    }
171
172    /// Extract inputs from message (simple extraction)
173    pub fn extract_inputs(&self, message: &str) -> HashMap<String, String> {
174        let mut inputs = HashMap::new();
175
176        // Simple topic extraction for common patterns
177        if self.extractable_inputs.contains(&"topic".to_string()) {
178            // Pattern: "generate article about X" or "X article"
179            let patterns = [
180                r"(?i)(?:generate|create|write).*(?:article|post|content).*?about\s+(.+?)(?:\.|$)",
181                r"(?i)(?:article|post|content)\s+about\s+(.+?)(?:\.|$)",
182            ];
183
184            for pattern in patterns {
185                if let Ok(re) = Regex::new(pattern) {
186                    if let Some(caps) = re.captures(message) {
187                        if let Some(topic) = caps.get(1) {
188                            inputs.insert("topic".to_string(), topic.as_str().trim().to_string());
189                            break;
190                        }
191                    }
192                }
193            }
194        }
195
196        inputs
197    }
198}
199
200/// Pre-processing hook for trigger detection
201pub struct PreProcessHook {
202    /// Skill patterns
203    skills: Vec<SkillPattern>,
204    /// Workflow triggers
205    workflows: Vec<WorkflowTrigger>,
206    /// Minimum confidence threshold
207    confidence_threshold: f32,
208}
209
210impl Default for PreProcessHook {
211    fn default() -> Self {
212        Self::new()
213    }
214}
215
216impl PreProcessHook {
217    /// Create with default patterns (fallback when no skills loaded)
218    pub fn new() -> Self {
219        Self {
220            skills: Self::default_skill_patterns(),
221            workflows: Self::default_workflow_triggers(),
222            confidence_threshold: 0.7,
223        }
224    }
225
226    /// Create from loaded skills (dynamic trigger loading)
227    /// This is the preferred way to create a PreProcessHook.
228    pub fn from_skills(skills: &[Skill]) -> Self {
229        // Convert skills with triggers to patterns
230        let skill_patterns: Vec<SkillPattern> = skills
231            .iter()
232            .filter_map(|s| SkillPattern::from_skill(s))
233            .collect();
234
235        // If no skills have triggers, fall back to default patterns
236        let skills = if skill_patterns.is_empty() {
237            Self::default_skill_patterns()
238        } else {
239            skill_patterns
240        };
241
242        Self {
243            skills,
244            workflows: Self::default_workflow_triggers(),
245            confidence_threshold: 0.7,
246        }
247    }
248
249    /// Default skill patterns based on analysis
250    fn default_skill_patterns() -> Vec<SkillPattern> {
251        vec![
252            // Code review skill
253            SkillPattern::new(
254                "code-review",
255                vec![
256                    r"/review",
257                    r"审查.*代码",
258                    r"检查.*代码",
259                    r"code\s*review",
260                    r"review.*code",
261                ],
262                0.9,
263            ),
264            // Refactor skill
265            SkillPattern::new(
266                "refactor",
267                vec![r"/refactor", r"重构.*代码", r"优化.*结构", r"refactor"],
268                0.9,
269            ),
270            // Debug skill
271            SkillPattern::new(
272                "debug",
273                vec![r"/debug", r"调试.*问题", r"排查.*问题", r"debug", r"调试"],
274                0.9,
275            ),
276            // Planning skill
277            SkillPattern::new(
278                "planning",
279                vec![r"/plan", r"规划.*方案", r"设计.*方案", r"plan"],
280                0.9,
281            ),
282            // Security review skill
283            SkillPattern::new(
284                "security-review",
285                vec![
286                    r"/security",
287                    r"安全.*审查",
288                    r"安全.*检查",
289                    r"security\s*review",
290                ],
291                0.9,
292            ),
293            // Demo skill
294            SkillPattern::new("demo", vec![r"/demo", r"演示", r"demo"], 0.8),
295            // Git commit skill
296            SkillPattern::new(
297                "git-commit",
298                vec![r"/commit", r"提交.*代码", r"commit"],
299                0.8,
300            ),
301        ]
302    }
303
304    /// Default workflow triggers based on analysis
305    fn default_workflow_triggers() -> Vec<WorkflowTrigger> {
306        vec![
307            // Image article workflow
308            WorkflowTrigger::new(
309                "image-article",
310                vec!["generate article", "生成文章", "create article", "图片文章"],
311                vec!["topic"],
312            ),
313            // Analysis workflow
314            WorkflowTrigger::new(
315                "code-analysis",
316                vec!["analyze code", "分析代码", "代码分析", "code analysis"],
317                vec!["target"],
318            ),
319            // Test workflow
320            WorkflowTrigger::new(
321                "test-runner",
322                vec!["run tests", "运行测试", "执行测试", "test suite"],
323                vec!["test_path"],
324            ),
325        ]
326    }
327
328    /// Process user message and detect triggers
329    pub fn process(&self, message: &str) -> ProcessResult {
330        // Step 1: Check for skill triggers
331        for skill in &self.skills {
332            if let Some(confidence) = skill.matches(message) {
333                if confidence >= self.confidence_threshold {
334                    // Auto-load skill body if available
335                    let skill_body = skill.get_skill_body().map(|s| s.to_string());
336                    return ProcessResult::SkillTriggered {
337                        skill_id: skill.skill_id.clone(),
338                        confidence,
339                        skill_body,
340                    };
341                }
342            }
343        }
344
345        // Step 2: Check for workflow triggers
346        for workflow in &self.workflows {
347            if workflow.matches(message) {
348                let inputs = workflow.extract_inputs(message);
349                return ProcessResult::WorkflowTriggered {
350                    workflow_id: workflow.workflow_id.clone(),
351                    inputs,
352                };
353            }
354        }
355
356        // Step 3: Continue normal processing
357        ProcessResult::Continue
358    }
359
360    /// Add a custom skill pattern
361    pub fn add_skill(&mut self, skill: SkillPattern) {
362        self.skills.push(skill);
363    }
364
365    /// Add a custom workflow trigger
366    pub fn add_workflow(&mut self, workflow: WorkflowTrigger) {
367        self.workflows.push(workflow);
368    }
369
370    /// Set confidence threshold
371    pub fn with_confidence_threshold(mut self, threshold: f32) -> Self {
372        self.confidence_threshold = threshold;
373        self
374    }
375
376    /// Check if message contains skill-like patterns (for heuristics)
377    pub fn has_skill_intent(&self, message: &str) -> bool {
378        let msg_lower = message.to_lowercase();
379
380        // Check for common skill indicators
381        let skill_indicators = [
382            "review", "refactor", "debug", "plan", "security", "审查", "重构", "调试", "规划",
383            "安全",
384        ];
385
386        skill_indicators.iter().any(|ind| msg_lower.contains(ind))
387    }
388
389    /// Check if message contains workflow-like patterns (multiple steps)
390    pub fn has_workflow_intent(&self, message: &str) -> bool {
391        let msg_lower = message.to_lowercase();
392
393        // Check for multi-step indicators
394        let workflow_indicators = [
395            "generate", "create", "analyze", "process", "batch", "生成", "创建", "分析", "处理",
396            "批量", "and then", "then", "after", "然后", "接着",
397        ];
398
399        // Count how many indicators are present
400        let count = workflow_indicators
401            .iter()
402            .filter(|ind| msg_lower.contains(*ind))
403            .count();
404
405        count >= 2
406    }
407
408    /// Get all registered skills
409    pub fn list_skills(&self) -> Vec<&str> {
410        self.skills.iter().map(|s| s.skill_id.as_str()).collect()
411    }
412
413    /// Get all registered workflows
414    pub fn list_workflows(&self) -> Vec<&str> {
415        self.workflows
416            .iter()
417            .map(|w| w.workflow_id.as_str())
418            .collect()
419    }
420}
421
422/// Global preprocessor instance
423static GLOBAL_PREPROCESSOR: std::sync::OnceLock<Arc<PreProcessHook>> = std::sync::OnceLock::new();
424
425/// Get the global preprocessor
426pub fn global_preprocessor() -> Arc<PreProcessHook> {
427    GLOBAL_PREPROCESSOR
428        .get_or_init(|| Arc::new(PreProcessHook::new()))
429        .clone()
430}
431
432/// Process message with global preprocessor (without skills)
433/// Use `preprocess_with_skills` for dynamic trigger loading.
434pub fn preprocess(message: &str) -> ProcessResult {
435    global_preprocessor().process(message)
436}
437
438/// Process message with skills for dynamic trigger loading.
439/// This is the preferred function when skills are available.
440///
441/// # Arguments
442/// * `message` - User message to process
443/// * `skills` - Loaded skills to extract triggers from
444///
445/// # Returns
446/// * `SkillTriggered` - If a skill trigger matches, includes auto-loaded skill body
447/// * `WorkflowTriggered` - If a workflow trigger matches
448/// * `Continue` - If no triggers match
449pub fn preprocess_with_skills(message: &str, skills: &[Skill]) -> ProcessResult {
450    let hook = PreProcessHook::from_skills(skills);
451    hook.process(message)
452}
453
454#[cfg(test)]
455mod tests {
456    use super::*;
457
458    #[test]
459    fn test_skill_trigger_slash_command() {
460        let hook = PreProcessHook::new();
461
462        let result = hook.process("/review this code");
463        assert!(
464            matches!(result, ProcessResult::SkillTriggered { skill_id, .. } if skill_id == "code-review")
465        );
466
467        let result = hook.process("/refactor the module");
468        assert!(
469            matches!(result, ProcessResult::SkillTriggered { skill_id, .. } if skill_id == "refactor")
470        );
471    }
472
473    #[test]
474    fn test_skill_trigger_chinese() {
475        let hook = PreProcessHook::new();
476
477        let result = hook.process("审查这段代码");
478        assert!(
479            matches!(result, ProcessResult::SkillTriggered { skill_id, .. } if skill_id == "code-review")
480        );
481
482        let result = hook.process("调试这个bug");
483        assert!(
484            matches!(result, ProcessResult::SkillTriggered { skill_id, .. } if skill_id == "debug")
485        );
486    }
487
488    #[test]
489    fn test_workflow_trigger() {
490        let hook = PreProcessHook::new();
491
492        let result = hook.process("generate article about Rust performance");
493        assert!(
494            matches!(result, ProcessResult::WorkflowTriggered { workflow_id, .. } if workflow_id == "image-article")
495        );
496    }
497
498    #[test]
499    fn test_continue_normal() {
500        let hook = PreProcessHook::new();
501
502        let result = hook.process("What is the weather today?");
503        assert!(matches!(result, ProcessResult::Continue));
504
505        let result = hook.process("Help me write a function");
506        assert!(matches!(result, ProcessResult::Continue));
507    }
508
509    #[test]
510    fn test_confidence_threshold() {
511        let hook = PreProcessHook::new().with_confidence_threshold(0.85);
512
513        // Should still work for high-confidence matches (0.9 > 0.85)
514        let result = hook.process("/review");
515        assert!(matches!(result, ProcessResult::SkillTriggered { .. }));
516    }
517
518    #[test]
519    fn test_custom_skill() {
520        let mut hook = PreProcessHook::new();
521        hook.add_skill(SkillPattern::new(
522            "custom",
523            vec!["/custom", "custom skill"],
524            0.9,
525        ));
526
527        let result = hook.process("/custom task");
528        assert!(
529            matches!(result, ProcessResult::SkillTriggered { skill_id, .. } if skill_id == "custom")
530        );
531    }
532
533    #[test]
534    fn test_extract_inputs() {
535        let hook = PreProcessHook::new();
536
537        let result = hook.process("generate article about Rust async programming");
538        if let ProcessResult::WorkflowTriggered { inputs, .. } = result {
539            assert!(inputs.contains_key("topic"));
540            assert!(inputs["topic"].to_lowercase().contains("rust"));
541        } else {
542            panic!("Expected WorkflowTriggered");
543        }
544    }
545
546    #[test]
547    fn test_has_skill_intent() {
548        let hook = PreProcessHook::new();
549
550        assert!(hook.has_skill_intent("Please review my code"));
551        assert!(hook.has_skill_intent("审查代码"));
552        assert!(!hook.has_skill_intent("What's the time?"));
553    }
554
555    #[test]
556    fn test_has_workflow_intent() {
557        let hook = PreProcessHook::new();
558
559        assert!(hook.has_workflow_intent("Analyze the code and then generate a report"));
560        assert!(hook.has_workflow_intent("分析代码,然后生成报告"));
561        assert!(!hook.has_workflow_intent("Just a simple question"));
562    }
563
564    #[test]
565    fn test_list_skills() {
566        let hook = PreProcessHook::new();
567        let skills = hook.list_skills();
568
569        assert!(skills.contains(&"code-review"));
570        assert!(skills.contains(&"refactor"));
571        assert!(skills.contains(&"debug"));
572    }
573
574    #[test]
575    fn test_list_workflows() {
576        let hook = PreProcessHook::new();
577        let workflows = hook.list_workflows();
578
579        assert!(workflows.contains(&"image-article"));
580        assert!(workflows.contains(&"code-analysis"));
581    }
582}