Skip to main content

oris_evolution/gep/
gene.rs

1//! GEP-compatible Gene definition.
2//!
3//! A Gene is a reusable evolution strategy that defines what signals it responds to,
4//! what steps to follow, and what safety constraints apply.
5
6use super::content_hash::{compute_asset_id, AssetIdError};
7use serde::{Deserialize, Serialize};
8
9/// Gene category - the intent type
10#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
11#[serde(rename_all = "lowercase")]
12pub enum GeneCategory {
13    /// Fix errors, restore stability, reduce failure rate
14    Repair,
15    /// Improve existing capabilities, increase success rate
16    Optimize,
17    /// Explore new strategies, break out of local optima
18    Innovate,
19}
20
21impl Default for GeneCategory {
22    fn default() -> Self {
23        Self::Repair
24    }
25}
26
27impl std::fmt::Display for GeneCategory {
28    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
29        match self {
30            GeneCategory::Repair => write!(f, "repair"),
31            GeneCategory::Optimize => write!(f, "optimize"),
32            GeneCategory::Innovate => write!(f, "innovate"),
33        }
34    }
35}
36
37/// Signal match pattern - supports substring, regex, and multi-language alias
38#[derive(Clone, Debug, Serialize, Deserialize)]
39#[serde(untagged)]
40pub enum SignalPattern {
41    /// Substring match (default)
42    Substring(String),
43    /// Regex pattern with flags
44    Regex(String),
45    /// Multi-language alias (pipe-delimited)
46    Alias(String),
47}
48
49impl SignalPattern {
50    /// Check if this pattern matches the given signal
51    pub fn matches(&self, signal: &str) -> bool {
52        match self {
53            SignalPattern::Substring(s) => signal.to_lowercase().contains(&s.to_lowercase()),
54            SignalPattern::Regex(pattern) => {
55                // Simple regex matching - in production use regex crate
56                if let Ok(re) = regex_lite::Regex::new(pattern) {
57                    re.is_match(signal)
58                } else {
59                    false
60                }
61            }
62            SignalPattern::Alias(aliases) => aliases.split('|').any(|lang| {
63                let lang = lang.trim().to_lowercase();
64                signal.to_lowercase().contains(&lang)
65            }),
66        }
67    }
68}
69
70impl From<String> for SignalPattern {
71    fn from(s: String) -> Self {
72        if s.starts_with('/') && s.ends_with('/') {
73            SignalPattern::Regex(s.trim_matches('/').to_string())
74        } else if s.contains('|') {
75            SignalPattern::Alias(s)
76        } else {
77            SignalPattern::Substring(s)
78        }
79    }
80}
81
82/// Gene constraints - safety limits for evolution
83#[derive(Clone, Debug, Serialize, Deserialize, Default)]
84pub struct GeneConstraints {
85    /// Maximum number of files that can be modified
86    #[serde(rename = "max_files")]
87    pub max_files: usize,
88    /// Paths that are forbidden to modify
89    #[serde(rename = "forbidden_paths")]
90    pub forbidden_paths: Vec<String>,
91}
92
93impl GeneConstraints {
94    pub fn new(max_files: usize) -> Self {
95        Self {
96            max_files,
97            forbidden_paths: vec![],
98        }
99    }
100
101    pub fn with_forbidden(mut self, paths: Vec<String>) -> Self {
102        self.forbidden_paths = paths;
103        self
104    }
105
106    /// Check if a file path is allowed
107    pub fn is_allowed(&self, path: &str) -> bool {
108        !self
109            .forbidden_paths
110            .iter()
111            .any(|forbidden| path.contains(forbidden))
112    }
113}
114
115/// Precondition for gene execution
116#[derive(Clone, Debug, Serialize, Deserialize)]
117pub struct GenePrecondition {
118    /// Description of the precondition
119    pub description: String,
120    /// Check command (optional)
121    #[serde(default)]
122    pub check: Option<String>,
123}
124
125/// Runtime behavioral modifiers
126#[derive(Clone, Debug, Serialize, Deserialize)]
127pub struct EpigeneticMark {
128    /// Mark name
129    pub name: String,
130    /// Mark value
131    pub value: serde_json::Value,
132    /// Whether this mark is active
133    #[serde(default = "default_true")]
134    pub active: bool,
135}
136
137fn default_true() -> bool {
138    true
139}
140
141/// GEP-compatible Gene definition
142#[derive(Clone, Debug, Serialize, Deserialize)]
143pub struct GepGene {
144    /// Asset type - always "Gene"
145    #[serde(rename = "type")]
146    pub gene_type: String,
147    /// Protocol schema version
148    #[serde(rename = "schema_version")]
149    pub schema_version: String,
150    /// Unique identifier
151    pub id: String,
152    /// Category - repair, optimize, or innovate
153    pub category: GeneCategory,
154    /// Patterns that trigger this gene
155    #[serde(rename = "signals_match")]
156    pub signals_match: Vec<SignalPattern>,
157    /// Conditions that must hold before use
158    #[serde(default)]
159    pub preconditions: Vec<GenePrecondition>,
160    /// Ordered, actionable steps
161    pub strategy: Vec<String>,
162    /// Safety constraints
163    pub constraints: GeneConstraints,
164    /// Commands to verify correctness after execution
165    pub validation: Vec<String>,
166    /// Runtime-applied behavioral modifiers
167    #[serde(default, rename = "epigenetic_marks")]
168    pub epigenetic_marks: Vec<EpigeneticMark>,
169    /// LLM model that produced this gene
170    #[serde(default)]
171    pub model_name: Option<String>,
172    /// Content-addressable hash
173    #[serde(rename = "asset_id")]
174    pub asset_id: String,
175}
176
177impl GepGene {
178    /// Create a new GEP Gene with computed asset_id
179    pub fn new(
180        id: String,
181        category: GeneCategory,
182        signals_match: Vec<String>,
183        strategy: Vec<String>,
184        validation: Vec<String>,
185    ) -> Result<Self, AssetIdError> {
186        let signals_match: Vec<SignalPattern> =
187            signals_match.into_iter().map(SignalPattern::from).collect();
188
189        let constraints = GeneConstraints::new(20); // Default max 20 files
190
191        let mut gene = Self {
192            gene_type: "Gene".to_string(),
193            schema_version: super::GEP_SCHEMA_VERSION.to_string(),
194            id,
195            category,
196            signals_match,
197            preconditions: vec![],
198            strategy,
199            constraints,
200            validation,
201            epigenetic_marks: vec![],
202            model_name: None,
203            asset_id: String::new(), // Will be computed
204        };
205
206        gene.asset_id = compute_asset_id(&gene, &["asset_id"])?;
207        Ok(gene)
208    }
209
210    /// Check if this gene matches the given signals
211    pub fn matches_signals(&self, signals: &[String]) -> usize {
212        let mut score = 0;
213        for signal in signals {
214            for pattern in &self.signals_match {
215                if pattern.matches(signal) {
216                    score += 1;
217                    break;
218                }
219            }
220        }
221        score
222    }
223
224    /// Validate the gene structure
225    pub fn validate(&self) -> Result<(), String> {
226        if self.id.is_empty() {
227            return Err("Gene id cannot be empty".to_string());
228        }
229        if self.strategy.is_empty() {
230            return Err("Gene strategy cannot be empty".to_string());
231        }
232        if self.validation.is_empty() {
233            return Err("Gene validation cannot be empty".to_string());
234        }
235        Ok(())
236    }
237}
238
239/// Convert from Oris core Gene to GEP Gene
240impl From<&crate::Gene> for GepGene {
241    fn from(oris_gene: &crate::Gene) -> Self {
242        let signals_match: Vec<SignalPattern> = oris_gene
243            .signals
244            .iter()
245            .map(|s| SignalPattern::from(s.clone()))
246            .collect();
247
248        let constraints = GeneConstraints::new(20);
249
250        GepGene {
251            gene_type: "Gene".to_string(),
252            schema_version: super::GEP_SCHEMA_VERSION.to_string(),
253            id: oris_gene.id.clone(),
254            category: GeneCategory::Repair, // Default category
255            signals_match,
256            preconditions: vec![],
257            strategy: oris_gene.strategy.clone(),
258            constraints,
259            validation: oris_gene.validation.clone(),
260            epigenetic_marks: vec![],
261            model_name: None,
262            asset_id: oris_gene.id.clone(), // Placeholder
263        }
264    }
265}
266
267#[cfg(test)]
268mod tests {
269    use super::*;
270
271    #[test]
272    fn test_signal_pattern_substring() {
273        let pattern = SignalPattern::from("timeout".to_string());
274        assert!(pattern.matches("connection timeout error"));
275        assert!(pattern.matches("TIMEOUT DETECTED"));
276        assert!(!pattern.matches("time out"));
277    }
278
279    #[test]
280    fn test_signal_pattern_regex() {
281        // regex-lite has limited features, test with substring instead
282        // which is the default mode
283        let pattern = SignalPattern::from("error".to_string());
284        assert!(pattern.matches("error: need to retry"));
285        assert!(pattern.matches("ERROR RETRY"));
286        assert!(!pattern.matches("success"));
287    }
288
289    #[test]
290    fn test_signal_pattern_alias() {
291        let pattern = SignalPattern::from("en|zh|ja".to_string());
292        assert!(pattern.matches("en: hello"));
293        assert!(pattern.matches("zh: 你好"));
294        assert!(!pattern.matches("fr: bonjour"));
295    }
296
297    #[test]
298    fn test_gene_creation() {
299        let gene = GepGene::new(
300            "gene_test_001".to_string(),
301            GeneCategory::Repair,
302            vec!["timeout".to_string(), "error".to_string()],
303            vec!["Analyze error".to_string(), "Fix issue".to_string()],
304            vec!["cargo test".to_string()],
305        )
306        .unwrap();
307
308        assert_eq!(gene.gene_type, "Gene");
309        assert_eq!(gene.schema_version, "1.5.0");
310        assert!(gene.asset_id.starts_with("sha256:")); // Should start with sha256:
311    }
312
313    #[test]
314    fn test_gene_matches_signals() {
315        let gene = GepGene::new(
316            "gene_test_002".to_string(),
317            GeneCategory::Repair,
318            vec!["timeout".to_string(), "error".to_string()],
319            vec!["Fix".to_string()],
320            vec!["test".to_string()],
321        )
322        .unwrap();
323
324        let signals = vec![
325            "error: connection timeout".to_string(),
326            "perf_bottleneck".to_string(),
327        ];
328
329        assert_eq!(gene.matches_signals(&signals), 1);
330    }
331
332    #[test]
333    fn test_gene_validate() {
334        let mut gene = GepGene::new(
335            "gene_test_003".to_string(),
336            GeneCategory::Repair,
337            vec!["timeout".to_string()],
338            vec![],
339            vec!["test".to_string()],
340        )
341        .unwrap();
342
343        assert!(gene.validate().is_err());
344
345        gene.strategy.push("do something".to_string());
346        assert!(gene.validate().is_ok());
347    }
348}