helix/dna/mds/
semantic.rs

1use std::collections::{HashMap, HashSet};
2use crate::dna::atp::ast::*;
3use crate::dna::atp::ast::AstVisitor;
4use crate::dna::atp::ast::HelixAst;
5use crate::dna::atp::ast::Declaration;
6use crate::dna::atp::ast::ProjectDecl;
7use crate::dna::atp::ast::AgentDecl;
8use crate::dna::atp::ast::WorkflowDecl;
9use crate::dna::atp::ast::MemoryDecl;
10use crate::dna::atp::ast::ContextDecl;
11use crate::dna::atp::ast::CrewDecl;
12use crate::dna::atp::ast::SectionDecl;
13use crate::dna::atp::ast::Expression;
14use crate::dna::atp::ast::PipelineNode;
15use crate::dna::atp::types::SecretRef;
16#[derive(Debug, Clone)]
17pub enum SemanticError {
18    UndefinedAgent { name: String, location: String },
19    UndefinedWorkflow { name: String, location: String },
20    UndefinedStep { name: String, workflow: String },
21    UndefinedReference { reference: String, location: String },
22    DuplicateDefinition { name: String, kind: String },
23    TypeMismatch { expected: String, found: String, location: String },
24    CircularDependency { items: Vec<String> },
25    InvalidDuration { value: String, location: String },
26    MissingRequiredField { field: String, declaration: String },
27    InvalidTriggerType { trigger: String },
28    InvalidProcessType { process: String },
29    InvalidBackoffStrategy { strategy: String },
30}
31impl std::fmt::Display for SemanticError {
32    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
33        match self {
34            SemanticError::UndefinedAgent { name, location } => {
35                write!(f, "Undefined agent '{}' referenced in {}", name, location)
36            }
37            SemanticError::UndefinedWorkflow { name, location } => {
38                write!(f, "Undefined workflow '{}' referenced in {}", name, location)
39            }
40            SemanticError::UndefinedStep { name, workflow } => {
41                write!(f, "Undefined step '{}' in workflow '{}'", name, workflow)
42            }
43            SemanticError::UndefinedReference { reference, location } => {
44                write!(f, "Undefined reference '{}' in {}", reference, location)
45            }
46            SemanticError::DuplicateDefinition { name, kind } => {
47                write!(f, "Duplicate {} definition: '{}'", kind, name)
48            }
49            SemanticError::TypeMismatch { expected, found, location } => {
50                write!(
51                    f, "Type mismatch in {}: expected {}, found {}", location, expected,
52                    found
53                )
54            }
55            SemanticError::CircularDependency { items } => {
56                write!(f, "Circular dependency detected: {}", items.join(" -> "))
57            }
58            SemanticError::InvalidDuration { value, location } => {
59                write!(f, "Invalid duration '{}' in {}", value, location)
60            }
61            SemanticError::MissingRequiredField { field, declaration } => {
62                write!(f, "Missing required field '{}' in {}", field, declaration)
63            }
64            SemanticError::InvalidTriggerType { trigger } => {
65                write!(f, "Invalid trigger type: '{}'", trigger)
66            }
67            SemanticError::InvalidProcessType { process } => {
68                write!(
69                    f,
70                    "Invalid process type: '{}'. Must be one of: sequential, hierarchical, parallel, consensus",
71                    process
72                )
73            }
74            SemanticError::InvalidBackoffStrategy { strategy } => {
75                write!(
76                    f,
77                    "Invalid backoff strategy: '{}'. Must be one of: fixed, linear, exponential",
78                    strategy
79                )
80            }
81        }
82    }
83}
84pub struct SemanticAnalyzer {
85    pub agents: HashMap<String, AgentDecl>,
86    pub workflows: HashMap<String, WorkflowDecl>,
87    pub contexts: HashMap<String, ContextDecl>,
88    pub crews: HashMap<String, CrewDecl>,
89    pub expected_env_vars: HashSet<String>,
90    pub _expected_memory_refs: HashSet<String>,
91    pub errors: Vec<SemanticError>,
92}
93impl AstVisitor for SemanticAnalyzer {
94    type Result = ();
95    fn visit_ast(&mut self, ast: &HelixAst) -> Self::Result {
96        for decl in &ast.declarations {
97            self.visit_declaration(decl);
98        }
99    }
100    fn visit_declaration(&mut self, decl: &Declaration) -> Self::Result {
101        match decl {
102            Declaration::Project(p) => self.visit_project(p),
103            Declaration::Agent(a) => self.visit_agent(a),
104            Declaration::Workflow(w) => self.visit_workflow(w),
105            Declaration::Memory(m) => self.visit_memory(m),
106            Declaration::Context(c) => self.visit_context(c),
107            Declaration::Crew(crew) => self.visit_crew(crew),
108            _ => {}
109        }
110    }
111    fn visit_project(&mut self, _project: &ProjectDecl) -> Self::Result {}
112    fn visit_agent(&mut self, _agent: &AgentDecl) -> Self::Result {}
113    fn visit_workflow(&mut self, _workflow: &WorkflowDecl) -> Self::Result {}
114    fn visit_memory(&mut self, memory: &MemoryDecl) -> Self::Result {
115        for (_key, expr) in &memory.properties {
116            self.visit_expression(expr);
117        }
118    }
119    fn visit_context(&mut self, _context: &ContextDecl) -> Self::Result {}
120    fn visit_crew(&mut self, _crew: &CrewDecl) -> Self::Result {}
121    fn visit_section(&mut self, section: &SectionDecl) -> Self::Result {
122        for (_key, expr) in &section.properties {
123            self.visit_expression(expr);
124        }
125    }
126    fn visit_expression(&mut self, expr: &Expression) -> Self::Result {
127        match expr {
128            Expression::Variable(var) => {
129                self.expected_env_vars.insert(var.clone());
130            }
131            Expression::Reference(ref_name) => {
132                self._expected_memory_refs.insert(ref_name.clone());
133            }
134            _ => {}
135        }
136    }
137}
138impl SemanticAnalyzer {
139    pub fn new() -> Self {
140        SemanticAnalyzer {
141            agents: HashMap::new(),
142            workflows: HashMap::new(),
143            contexts: HashMap::new(),
144            crews: HashMap::new(),
145            expected_env_vars: HashSet::new(),
146            _expected_memory_refs: HashSet::new(),
147            errors: Vec::new(),
148        }
149    }
150    pub fn analyze(&mut self, ast: &HelixAst) -> Result<(), Vec<SemanticError>> {
151        for decl in &ast.declarations {
152            match decl {
153                Declaration::Agent(agent) => self.visit_agent(agent),
154                Declaration::Workflow(workflow) => self.visit_workflow(workflow),
155                Declaration::Context(context) => self.visit_context(context),
156                Declaration::Crew(crew) => self.visit_crew(crew),
157                _ => {}
158            }
159        }
160        self.collect_definitions(ast)?;
161        self.validate_references(ast);
162        let type_checker = TypeChecker::new();
163        self.type_check_with_checker(ast, &type_checker);
164        self.analyze_dependencies(ast);
165        if !self.errors.is_empty() { Err(self.errors.clone()) } else { Ok(()) }
166    }
167    fn collect_definitions(&mut self, ast: &HelixAst) -> Result<(), Vec<SemanticError>> {
168        for decl in &ast.declarations {
169            match decl {
170                Declaration::Agent(agent) => {
171                    if self.agents.contains_key(&agent.name) {
172                        self.errors
173                            .push(SemanticError::DuplicateDefinition {
174                                name: agent.name.clone(),
175                                kind: "agent".to_string(),
176                            });
177                    } else {
178                        self.agents.insert(agent.name.clone(), agent.clone());
179                    }
180                }
181                Declaration::Workflow(workflow) => {
182                    if self.workflows.contains_key(&workflow.name) {
183                        self.errors
184                            .push(SemanticError::DuplicateDefinition {
185                                name: workflow.name.clone(),
186                                kind: "workflow".to_string(),
187                            });
188                    } else {
189                        self.workflows.insert(workflow.name.clone(), workflow.clone());
190                    }
191                }
192                Declaration::Context(context) => {
193                    if self.contexts.contains_key(&context.name) {
194                        self.errors
195                            .push(SemanticError::DuplicateDefinition {
196                                name: context.name.clone(),
197                                kind: "context".to_string(),
198                            });
199                    } else {
200                        if let Some(secrets) = &context.secrets {
201                            for (_key, secret_ref) in secrets {
202                                if let SecretRef::Environment(var) = secret_ref {
203                                    self.expected_env_vars.insert(var.clone());
204                                }
205                            }
206                        }
207                        self.contexts.insert(context.name.clone(), context.clone());
208                    }
209                }
210                Declaration::Crew(crew) => {
211                    if self.crews.contains_key(&crew.name) {
212                        self.errors
213                            .push(SemanticError::DuplicateDefinition {
214                                name: crew.name.clone(),
215                                kind: "crew".to_string(),
216                            });
217                    } else {
218                        self.crews.insert(crew.name.clone(), crew.clone());
219                    }
220                }
221                _ => {}
222            }
223        }
224        if !self.errors.is_empty() { Err(self.errors.clone()) } else { Ok(()) }
225    }
226    fn validate_references(&mut self, ast: &HelixAst) {
227        for decl in &ast.declarations {
228            match decl {
229                Declaration::Workflow(workflow) => {
230                    self.validate_workflow_references(workflow);
231                }
232                Declaration::Crew(crew) => {
233                    self.validate_crew_references(crew);
234                }
235                _ => {}
236            }
237        }
238    }
239    fn validate_workflow_references(&mut self, workflow: &WorkflowDecl) {
240        for step in &workflow.steps {
241            if let Some(agent_name) = &step.agent {
242                if !self.agents.contains_key(agent_name) {
243                    self.errors
244                        .push(SemanticError::UndefinedAgent {
245                            name: agent_name.clone(),
246                            location: format!(
247                                "workflow '{}', step '{}'", workflow.name, step.name
248                            ),
249                        });
250                }
251            }
252            if let Some(crew_agents) = &step.crew {
253                for agent_name in crew_agents {
254                    if !self.agents.contains_key(agent_name) {
255                        self.errors
256                            .push(SemanticError::UndefinedAgent {
257                                name: agent_name.clone(),
258                                location: format!(
259                                    "workflow '{}', step '{}'", workflow.name, step.name
260                                ),
261                            });
262                    }
263                }
264            }
265            if let Some(depends_on) = step.properties.get("depends_on") {
266                if let Some(deps) = depends_on.as_array() {
267                    for dep in deps {
268                        if let Some(dep_name) = dep.as_string() {
269                            let step_exists = workflow
270                                .steps
271                                .iter()
272                                .any(|s| s.name == dep_name);
273                            if !step_exists {
274                                self.errors
275                                    .push(SemanticError::UndefinedStep {
276                                        name: dep_name,
277                                        workflow: workflow.name.clone(),
278                                    });
279                            }
280                        }
281                    }
282                }
283            }
284        }
285        if let Some(pipeline) = &workflow.pipeline {
286            for node in &pipeline.flow {
287                if let PipelineNode::Step(step_name) = node {
288                    let step_exists = workflow
289                        .steps
290                        .iter()
291                        .any(|s| s.name == *step_name);
292                    if !step_exists {
293                        self.errors
294                            .push(SemanticError::UndefinedStep {
295                                name: step_name.clone(),
296                                workflow: workflow.name.clone(),
297                            });
298                    }
299                }
300            }
301        }
302        if let Some(trigger) = &workflow.trigger {
303            self.validate_trigger(trigger, &workflow.name);
304        }
305    }
306    fn validate_trigger(&mut self, trigger: &Expression, workflow_name: &str) {
307        match trigger {
308            Expression::String(s) | Expression::Identifier(s) => {
309                let valid_triggers = ["manual", "webhook", "event", "file_watch"];
310                if !valid_triggers.contains(&s.as_str()) && !s.starts_with("schedule:") {
311                    self.errors
312                        .push(SemanticError::InvalidTriggerType {
313                            trigger: s.clone(),
314                        });
315                }
316            }
317            Expression::Object(map) => {
318                if let Some(trigger_type) = map.get("type") {
319                    self.validate_trigger(trigger_type, workflow_name);
320                }
321            }
322            _ => {}
323        }
324    }
325    fn validate_crew_references(&mut self, crew: &CrewDecl) {
326        for agent_name in &crew.agents {
327            if !self.agents.contains_key(agent_name) {
328                self.errors
329                    .push(SemanticError::UndefinedAgent {
330                        name: agent_name.clone(),
331                        location: format!("crew '{}'", crew.name),
332                    });
333            }
334        }
335        if let Some(process_type) = &crew.process_type {
336            let valid_types = ["sequential", "hierarchical", "parallel", "consensus"];
337            if !valid_types.contains(&process_type.as_str()) {
338                self.errors
339                    .push(SemanticError::InvalidProcessType {
340                        process: process_type.clone(),
341                    });
342            }
343        }
344        if let Some(process) = &crew.process_type {
345            if process == "hierarchical" {
346                if let Some(manager) = crew.properties.get("manager") {
347                    if let Some(manager_name) = manager.as_string() {
348                        if !self.agents.contains_key(&manager_name) {
349                            self.errors
350                                .push(SemanticError::UndefinedAgent {
351                                    name: manager_name,
352                                    location: format!("crew '{}' manager", crew.name),
353                                });
354                        }
355                    }
356                } else {
357                    self.errors
358                        .push(SemanticError::MissingRequiredField {
359                            field: "manager".to_string(),
360                            declaration: format!("hierarchical crew '{}'", crew.name),
361                        });
362                }
363            }
364        }
365    }
366    #[allow(dead_code)]
367    fn type_check(&mut self, ast: &HelixAst) {
368        self.type_check_with_checker(ast, &TypeChecker::new());
369    }
370    fn type_check_with_checker(&mut self, ast: &HelixAst, checker: &TypeChecker) {
371        for decl in &ast.declarations {
372            match decl {
373                Declaration::Agent(agent) => {
374                    for (key, expr) in &agent.properties {
375                        if let Err(_msg) = checker.check_type(key, expr) {
376                            self.errors
377                                .push(SemanticError::TypeMismatch {
378                                    expected: "valid type".to_string(),
379                                    found: checker.infer_type(expr).to_string(),
380                                    location: format!("agent '{}'", agent.name),
381                                });
382                        }
383                    }
384                    self.type_check_agent(agent);
385                }
386                Declaration::Workflow(workflow) => {
387                    self.type_check_workflow(workflow);
388                }
389                _ => {}
390            }
391        }
392    }
393    fn type_check_agent(&mut self, agent: &AgentDecl) {
394        if let Some(temp) = agent.properties.get("temperature") {
395            if let Some(temp_val) = temp.as_number() {
396                if temp_val < 0.0 || temp_val > 2.0 {
397                    self.errors
398                        .push(SemanticError::TypeMismatch {
399                            expected: "number between 0 and 2".to_string(),
400                            found: format!("{}", temp_val),
401                            location: format!("agent '{}' temperature", agent.name),
402                        });
403                }
404            }
405        }
406        if let Some(tokens) = agent.properties.get("max_tokens") {
407            if let Some(tokens_val) = tokens.as_number() {
408                if tokens_val <= 0.0 {
409                    self.errors
410                        .push(SemanticError::TypeMismatch {
411                            expected: "positive number".to_string(),
412                            found: format!("{}", tokens_val),
413                            location: format!("agent '{}' max_tokens", agent.name),
414                        });
415                }
416            }
417        }
418    }
419    fn type_check_workflow(&mut self, workflow: &WorkflowDecl) {
420        for step in &workflow.steps {
421            if let Some(retry) = step.properties.get("retry") {
422                if let Some(retry_obj) = retry.as_object() {
423                    if let Some(max_attempts) = retry_obj.get("max_attempts") {
424                        if let Some(attempts) = max_attempts.as_number() {
425                            if attempts <= 0.0 {
426                                self.errors
427                                    .push(SemanticError::TypeMismatch {
428                                        expected: "positive number".to_string(),
429                                        found: format!("{}", attempts),
430                                        location: format!(
431                                            "workflow '{}', step '{}' retry.max_attempts", workflow
432                                            .name, step.name
433                                        ),
434                                    });
435                            }
436                        }
437                    }
438                    if let Some(backoff) = retry_obj.get("backoff") {
439                        if let Some(strategy) = backoff.as_string() {
440                            let valid_strategies = ["fixed", "linear", "exponential"];
441                            if !valid_strategies.contains(&strategy.as_str()) {
442                                self.errors
443                                    .push(SemanticError::InvalidBackoffStrategy {
444                                        strategy,
445                                    });
446                            }
447                        }
448                    }
449                }
450            }
451        }
452    }
453    fn analyze_dependencies(&mut self, ast: &HelixAst) {
454        for decl in &ast.declarations {
455            if let Declaration::Workflow(workflow) = decl {
456                self.check_circular_dependencies(workflow);
457            }
458        }
459    }
460    fn check_circular_dependencies(&mut self, workflow: &WorkflowDecl) {
461        let mut dependency_graph: HashMap<String, Vec<String>> = HashMap::new();
462        for step in &workflow.steps {
463            let deps = if let Some(depends_on) = step.properties.get("depends_on") {
464                if let Some(deps_array) = depends_on.as_array() {
465                    deps_array.iter().filter_map(|d| d.as_string()).collect()
466                } else {
467                    Vec::new()
468                }
469            } else {
470                Vec::new()
471            };
472            dependency_graph.insert(step.name.clone(), deps);
473        }
474        let mut visited = HashSet::new();
475        let mut rec_stack = HashSet::new();
476        for step in &workflow.steps {
477            if !visited.contains(&step.name) {
478                if let Some(cycle) = self
479                    .has_cycle(
480                        &step.name,
481                        &dependency_graph,
482                        &mut visited,
483                        &mut rec_stack,
484                        &mut Vec::new(),
485                    )
486                {
487                    self.errors
488                        .push(SemanticError::CircularDependency {
489                            items: cycle,
490                        });
491                    break;
492                }
493            }
494        }
495    }
496    fn has_cycle(
497        &self,
498        node: &str,
499        graph: &HashMap<String, Vec<String>>,
500        visited: &mut HashSet<String>,
501        rec_stack: &mut HashSet<String>,
502        path: &mut Vec<String>,
503    ) -> Option<Vec<String>> {
504        visited.insert(node.to_string());
505        rec_stack.insert(node.to_string());
506        path.push(node.to_string());
507        if let Some(neighbors) = graph.get(node) {
508            for neighbor in neighbors {
509                if !visited.contains(neighbor) {
510                    if let Some(cycle) = self
511                        .has_cycle(neighbor, graph, visited, rec_stack, path)
512                    {
513                        return Some(cycle);
514                    }
515                } else if rec_stack.contains(neighbor) {
516                    let cycle_start = path.iter().position(|n| n == neighbor).unwrap();
517                    let mut cycle = path[cycle_start..].to_vec();
518                    cycle.push(neighbor.clone());
519                    return Some(cycle);
520                }
521            }
522        }
523        rec_stack.remove(node);
524        path.pop();
525        None
526    }
527}
528pub struct TypeChecker {
529    expected_types: HashMap<String, ExpressionType>,
530}
531#[derive(Debug, Clone, PartialEq)]
532pub enum ExpressionType {
533    String,
534    Number,
535    Bool,
536    Duration,
537    Array(Box<ExpressionType>),
538    Object,
539    Any,
540}
541impl std::fmt::Display for ExpressionType {
542    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
543        match self {
544            ExpressionType::String => write!(f, "string"),
545            ExpressionType::Number => write!(f, "number"),
546            ExpressionType::Bool => write!(f, "boolean"),
547            ExpressionType::Duration => write!(f, "duration"),
548            ExpressionType::Array(inner) => write!(f, "array<{}>", inner),
549            ExpressionType::Object => write!(f, "object"),
550            ExpressionType::Any => write!(f, "any"),
551        }
552    }
553}
554impl TypeChecker {
555    pub fn new() -> Self {
556        let mut expected_types = HashMap::new();
557        expected_types.insert("temperature".to_string(), ExpressionType::Number);
558        expected_types.insert("max_tokens".to_string(), ExpressionType::Number);
559        expected_types.insert("timeout".to_string(), ExpressionType::Duration);
560        expected_types.insert("debug".to_string(), ExpressionType::Bool);
561        expected_types.insert("parallel".to_string(), ExpressionType::Bool);
562        expected_types.insert("verbose".to_string(), ExpressionType::Bool);
563        expected_types.insert("persistence".to_string(), ExpressionType::Bool);
564        expected_types.insert("dimensions".to_string(), ExpressionType::Number);
565        expected_types.insert("batch_size".to_string(), ExpressionType::Number);
566        expected_types.insert("max_iterations".to_string(), ExpressionType::Number);
567        expected_types.insert("cache_size".to_string(), ExpressionType::Number);
568        TypeChecker { expected_types }
569    }
570    pub fn infer_type(&self, expr: &Expression) -> ExpressionType {
571        match expr {
572            Expression::String(_) | Expression::Identifier(_) => ExpressionType::String,
573            Expression::Number(_) => ExpressionType::Number,
574            Expression::Bool(_) => ExpressionType::Bool,
575            Expression::Duration(_) => ExpressionType::Duration,
576            Expression::Array(items) => {
577                if items.is_empty() {
578                    ExpressionType::Array(Box::new(ExpressionType::Any))
579                } else {
580                    let first_type = self.infer_type(&items[0]);
581                    ExpressionType::Array(Box::new(first_type))
582                }
583            }
584            Expression::Object(_) => ExpressionType::Object,
585            Expression::Variable(_) | Expression::Reference(_) => ExpressionType::Any,
586            _ => ExpressionType::Any,
587        }
588    }
589    pub fn check_type(&self, field: &str, expr: &Expression) -> Result<(), String> {
590        if let Some(expected) = self.expected_types.get(field) {
591            let actual = self.infer_type(expr);
592            if actual != *expected && actual != ExpressionType::Any {
593                return Err(
594                    format!(
595                        "Type mismatch for field '{}': expected {:?}, found {:?}", field,
596                        expected, actual
597                    ),
598                );
599            }
600        }
601        Ok(())
602    }
603}
604#[cfg(test)]
605mod tests {
606    use super::*;
607    #[test]
608    fn test_duplicate_detection() {
609        let mut analyzer = SemanticAnalyzer::new();
610        let mut ast = HelixAst::new();
611        ast.add_declaration(
612            Declaration::Agent(AgentDecl {
613                name: "test_agent".to_string(),
614                properties: HashMap::new(),
615                capabilities: None,
616                backstory: None,
617                tools: None,
618            }),
619        );
620        ast.add_declaration(
621            Declaration::Agent(AgentDecl {
622                name: "test_agent".to_string(),
623                properties: HashMap::new(),
624                capabilities: None,
625                backstory: None,
626                tools: None,
627            }),
628        );
629        let result = analyzer.analyze(&ast);
630        assert!(result.is_err());
631        if let Err(errors) = result {
632            assert!(
633                errors.iter().any(| e | matches!(e, SemanticError::DuplicateDefinition {
634                name, kind } if name == "test_agent" && kind == "agent"))
635            );
636        }
637    }
638    #[test]
639    fn test_undefined_agent_reference() {
640        let mut analyzer = SemanticAnalyzer::new();
641        let mut ast = HelixAst::new();
642        let mut step = StepDecl {
643            name: "test_step".to_string(),
644            agent: Some("undefined_agent".to_string()),
645            crew: None,
646            task: None,
647            properties: HashMap::new(),
648        };
649        ast.add_declaration(
650            Declaration::Workflow(WorkflowDecl {
651                name: "test_workflow".to_string(),
652                trigger: None,
653                steps: vec![step],
654                pipeline: None,
655                properties: HashMap::new(),
656            }),
657        );
658        let result = analyzer.analyze(&ast);
659        assert!(result.is_err());
660        if let Err(errors) = result {
661            assert!(
662                errors.iter().any(| e | matches!(e, SemanticError::UndefinedAgent { name,
663                .. } if name == "undefined_agent"))
664            );
665        }
666    }
667}