helix_core/
semantic.rs

1use std::collections::{HashMap, HashSet};
2use super::ast::*;
3use crate::ast::AstVisitor;
4#[derive(Debug, Clone)]
5pub enum SemanticError {
6    UndefinedAgent { name: String, location: String },
7    UndefinedWorkflow { name: String, location: String },
8    UndefinedStep { name: String, workflow: String },
9    UndefinedReference { reference: String, location: String },
10    DuplicateDefinition { name: String, kind: String },
11    TypeMismatch { expected: String, found: String, location: String },
12    CircularDependency { items: Vec<String> },
13    InvalidDuration { value: String, location: String },
14    MissingRequiredField { field: String, declaration: String },
15    InvalidTriggerType { trigger: String },
16    InvalidProcessType { process: String },
17    InvalidBackoffStrategy { strategy: String },
18}
19impl std::fmt::Display for SemanticError {
20    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
21        match self {
22            SemanticError::UndefinedAgent { name, location } => {
23                write!(f, "Undefined agent '{}' referenced in {}", name, location)
24            }
25            SemanticError::UndefinedWorkflow { name, location } => {
26                write!(f, "Undefined workflow '{}' referenced in {}", name, location)
27            }
28            SemanticError::UndefinedStep { name, workflow } => {
29                write!(f, "Undefined step '{}' in workflow '{}'", name, workflow)
30            }
31            SemanticError::UndefinedReference { reference, location } => {
32                write!(f, "Undefined reference '{}' in {}", reference, location)
33            }
34            SemanticError::DuplicateDefinition { name, kind } => {
35                write!(f, "Duplicate {} definition: '{}'", kind, name)
36            }
37            SemanticError::TypeMismatch { expected, found, location } => {
38                write!(
39                    f, "Type mismatch in {}: expected {}, found {}", location, expected,
40                    found
41                )
42            }
43            SemanticError::CircularDependency { items } => {
44                write!(f, "Circular dependency detected: {}", items.join(" -> "))
45            }
46            SemanticError::InvalidDuration { value, location } => {
47                write!(f, "Invalid duration '{}' in {}", value, location)
48            }
49            SemanticError::MissingRequiredField { field, declaration } => {
50                write!(f, "Missing required field '{}' in {}", field, declaration)
51            }
52            SemanticError::InvalidTriggerType { trigger } => {
53                write!(f, "Invalid trigger type: '{}'", trigger)
54            }
55            SemanticError::InvalidProcessType { process } => {
56                write!(
57                    f,
58                    "Invalid process type: '{}'. Must be one of: sequential, hierarchical, parallel, consensus",
59                    process
60                )
61            }
62            SemanticError::InvalidBackoffStrategy { strategy } => {
63                write!(
64                    f,
65                    "Invalid backoff strategy: '{}'. Must be one of: fixed, linear, exponential",
66                    strategy
67                )
68            }
69        }
70    }
71}
72pub struct SemanticAnalyzer {
73    agents: HashMap<String, AgentDecl>,
74    workflows: HashMap<String, WorkflowDecl>,
75    contexts: HashMap<String, ContextDecl>,
76    crews: HashMap<String, CrewDecl>,
77    expected_env_vars: HashSet<String>,
78    _expected_memory_refs: HashSet<String>,
79    errors: Vec<SemanticError>,
80}
81impl AstVisitor for SemanticAnalyzer {
82    type Result = ();
83    fn visit_ast(&mut self, ast: &HelixAst) -> Self::Result {
84        for decl in &ast.declarations {
85            self.visit_declaration(decl);
86        }
87    }
88    fn visit_declaration(&mut self, decl: &Declaration) -> Self::Result {
89        match decl {
90            Declaration::Project(p) => self.visit_project(p),
91            Declaration::Agent(a) => self.visit_agent(a),
92            Declaration::Workflow(w) => self.visit_workflow(w),
93            Declaration::Memory(m) => self.visit_memory(m),
94            Declaration::Context(c) => self.visit_context(c),
95            Declaration::Crew(crew) => self.visit_crew(crew),
96            _ => {}
97        }
98    }
99    fn visit_project(&mut self, _project: &ProjectDecl) -> Self::Result {}
100    fn visit_agent(&mut self, _agent: &AgentDecl) -> Self::Result {}
101    fn visit_workflow(&mut self, _workflow: &WorkflowDecl) -> Self::Result {}
102    fn visit_memory(&mut self, memory: &MemoryDecl) -> Self::Result {
103        for (_key, expr) in &memory.properties {
104            self.visit_expression(expr);
105        }
106    }
107    fn visit_context(&mut self, _context: &ContextDecl) -> Self::Result {}
108    fn visit_crew(&mut self, _crew: &CrewDecl) -> Self::Result {}
109    fn visit_expression(&mut self, expr: &Expression) -> Self::Result {
110        match expr {
111            Expression::Variable(var) => {
112                self.expected_env_vars.insert(var.clone());
113            }
114            Expression::Reference(ref_name) => {
115                self._expected_memory_refs.insert(ref_name.clone());
116            }
117            _ => {}
118        }
119    }
120}
121impl SemanticAnalyzer {
122    pub fn new() -> Self {
123        SemanticAnalyzer {
124            agents: HashMap::new(),
125            workflows: HashMap::new(),
126            contexts: HashMap::new(),
127            crews: HashMap::new(),
128            expected_env_vars: HashSet::new(),
129            _expected_memory_refs: HashSet::new(),
130            errors: Vec::new(),
131        }
132    }
133    pub fn analyze(&mut self, ast: &HelixAst) -> Result<(), Vec<SemanticError>> {
134        for decl in &ast.declarations {
135            match decl {
136                Declaration::Agent(agent) => self.visit_agent(agent),
137                Declaration::Workflow(workflow) => self.visit_workflow(workflow),
138                Declaration::Context(context) => self.visit_context(context),
139                Declaration::Crew(crew) => self.visit_crew(crew),
140                _ => {}
141            }
142        }
143        self.collect_definitions(ast)?;
144        self.validate_references(ast);
145        let type_checker = TypeChecker::new();
146        self.type_check_with_checker(ast, &type_checker);
147        self.analyze_dependencies(ast);
148        if !self.errors.is_empty() { Err(self.errors.clone()) } else { Ok(()) }
149    }
150    fn collect_definitions(&mut self, ast: &HelixAst) -> Result<(), Vec<SemanticError>> {
151        for decl in &ast.declarations {
152            match decl {
153                Declaration::Agent(agent) => {
154                    if self.agents.contains_key(&agent.name) {
155                        self.errors
156                            .push(SemanticError::DuplicateDefinition {
157                                name: agent.name.clone(),
158                                kind: "agent".to_string(),
159                            });
160                    } else {
161                        self.agents.insert(agent.name.clone(), agent.clone());
162                    }
163                }
164                Declaration::Workflow(workflow) => {
165                    if self.workflows.contains_key(&workflow.name) {
166                        self.errors
167                            .push(SemanticError::DuplicateDefinition {
168                                name: workflow.name.clone(),
169                                kind: "workflow".to_string(),
170                            });
171                    } else {
172                        self.workflows.insert(workflow.name.clone(), workflow.clone());
173                    }
174                }
175                Declaration::Context(context) => {
176                    if self.contexts.contains_key(&context.name) {
177                        self.errors
178                            .push(SemanticError::DuplicateDefinition {
179                                name: context.name.clone(),
180                                kind: "context".to_string(),
181                            });
182                    } else {
183                        if let Some(secrets) = &context.secrets {
184                            for (_key, secret_ref) in secrets {
185                                if let SecretRef::Environment(var) = secret_ref {
186                                    self.expected_env_vars.insert(var.clone());
187                                }
188                            }
189                        }
190                        self.contexts.insert(context.name.clone(), context.clone());
191                    }
192                }
193                Declaration::Crew(crew) => {
194                    if self.crews.contains_key(&crew.name) {
195                        self.errors
196                            .push(SemanticError::DuplicateDefinition {
197                                name: crew.name.clone(),
198                                kind: "crew".to_string(),
199                            });
200                    } else {
201                        self.crews.insert(crew.name.clone(), crew.clone());
202                    }
203                }
204                _ => {}
205            }
206        }
207        if !self.errors.is_empty() { Err(self.errors.clone()) } else { Ok(()) }
208    }
209    fn validate_references(&mut self, ast: &HelixAst) {
210        for decl in &ast.declarations {
211            match decl {
212                Declaration::Workflow(workflow) => {
213                    self.validate_workflow_references(workflow);
214                }
215                Declaration::Crew(crew) => {
216                    self.validate_crew_references(crew);
217                }
218                _ => {}
219            }
220        }
221    }
222    fn validate_workflow_references(&mut self, workflow: &WorkflowDecl) {
223        for step in &workflow.steps {
224            if let Some(agent_name) = &step.agent {
225                if !self.agents.contains_key(agent_name) {
226                    self.errors
227                        .push(SemanticError::UndefinedAgent {
228                            name: agent_name.clone(),
229                            location: format!(
230                                "workflow '{}', step '{}'", workflow.name, step.name
231                            ),
232                        });
233                }
234            }
235            if let Some(crew_agents) = &step.crew {
236                for agent_name in crew_agents {
237                    if !self.agents.contains_key(agent_name) {
238                        self.errors
239                            .push(SemanticError::UndefinedAgent {
240                                name: agent_name.clone(),
241                                location: format!(
242                                    "workflow '{}', step '{}'", workflow.name, step.name
243                                ),
244                            });
245                    }
246                }
247            }
248            if let Some(depends_on) = step.properties.get("depends_on") {
249                if let Some(deps) = depends_on.as_array() {
250                    for dep in deps {
251                        if let Some(dep_name) = dep.as_string() {
252                            let step_exists = workflow
253                                .steps
254                                .iter()
255                                .any(|s| s.name == dep_name);
256                            if !step_exists {
257                                self.errors
258                                    .push(SemanticError::UndefinedStep {
259                                        name: dep_name,
260                                        workflow: workflow.name.clone(),
261                                    });
262                            }
263                        }
264                    }
265                }
266            }
267        }
268        if let Some(pipeline) = &workflow.pipeline {
269            for node in &pipeline.flow {
270                if let PipelineNode::Step(step_name) = node {
271                    let step_exists = workflow
272                        .steps
273                        .iter()
274                        .any(|s| s.name == *step_name);
275                    if !step_exists {
276                        self.errors
277                            .push(SemanticError::UndefinedStep {
278                                name: step_name.clone(),
279                                workflow: workflow.name.clone(),
280                            });
281                    }
282                }
283            }
284        }
285        if let Some(trigger) = &workflow.trigger {
286            self.validate_trigger(trigger, &workflow.name);
287        }
288    }
289    fn validate_trigger(&mut self, trigger: &Expression, workflow_name: &str) {
290        match trigger {
291            Expression::String(s) | Expression::Identifier(s) => {
292                let valid_triggers = ["manual", "webhook", "event", "file_watch"];
293                if !valid_triggers.contains(&s.as_str()) && !s.starts_with("schedule:") {
294                    self.errors
295                        .push(SemanticError::InvalidTriggerType {
296                            trigger: s.clone(),
297                        });
298                }
299            }
300            Expression::Object(map) => {
301                if let Some(trigger_type) = map.get("type") {
302                    self.validate_trigger(trigger_type, workflow_name);
303                }
304            }
305            _ => {}
306        }
307    }
308    fn validate_crew_references(&mut self, crew: &CrewDecl) {
309        for agent_name in &crew.agents {
310            if !self.agents.contains_key(agent_name) {
311                self.errors
312                    .push(SemanticError::UndefinedAgent {
313                        name: agent_name.clone(),
314                        location: format!("crew '{}'", crew.name),
315                    });
316            }
317        }
318        if let Some(process_type) = &crew.process_type {
319            let valid_types = ["sequential", "hierarchical", "parallel", "consensus"];
320            if !valid_types.contains(&process_type.as_str()) {
321                self.errors
322                    .push(SemanticError::InvalidProcessType {
323                        process: process_type.clone(),
324                    });
325            }
326        }
327        if let Some(process) = &crew.process_type {
328            if process == "hierarchical" {
329                if let Some(manager) = crew.properties.get("manager") {
330                    if let Some(manager_name) = manager.as_string() {
331                        if !self.agents.contains_key(&manager_name) {
332                            self.errors
333                                .push(SemanticError::UndefinedAgent {
334                                    name: manager_name,
335                                    location: format!("crew '{}' manager", crew.name),
336                                });
337                        }
338                    }
339                } else {
340                    self.errors
341                        .push(SemanticError::MissingRequiredField {
342                            field: "manager".to_string(),
343                            declaration: format!("hierarchical crew '{}'", crew.name),
344                        });
345                }
346            }
347        }
348    }
349    #[allow(dead_code)]
350    fn type_check(&mut self, ast: &HelixAst) {
351        self.type_check_with_checker(ast, &TypeChecker::new());
352    }
353    fn type_check_with_checker(&mut self, ast: &HelixAst, checker: &TypeChecker) {
354        for decl in &ast.declarations {
355            match decl {
356                Declaration::Agent(agent) => {
357                    for (key, expr) in &agent.properties {
358                        if let Err(_msg) = checker.check_type(key, expr) {
359                            self.errors
360                                .push(SemanticError::TypeMismatch {
361                                    expected: "valid type".to_string(),
362                                    found: checker.infer_type(expr).to_string(),
363                                    location: format!("agent '{}'", agent.name),
364                                });
365                        }
366                    }
367                    self.type_check_agent(agent);
368                }
369                Declaration::Workflow(workflow) => {
370                    self.type_check_workflow(workflow);
371                }
372                _ => {}
373            }
374        }
375    }
376    fn type_check_agent(&mut self, agent: &AgentDecl) {
377        if let Some(temp) = agent.properties.get("temperature") {
378            if let Some(temp_val) = temp.as_number() {
379                if temp_val < 0.0 || temp_val > 2.0 {
380                    self.errors
381                        .push(SemanticError::TypeMismatch {
382                            expected: "number between 0 and 2".to_string(),
383                            found: format!("{}", temp_val),
384                            location: format!("agent '{}' temperature", agent.name),
385                        });
386                }
387            }
388        }
389        if let Some(tokens) = agent.properties.get("max_tokens") {
390            if let Some(tokens_val) = tokens.as_number() {
391                if tokens_val <= 0.0 {
392                    self.errors
393                        .push(SemanticError::TypeMismatch {
394                            expected: "positive number".to_string(),
395                            found: format!("{}", tokens_val),
396                            location: format!("agent '{}' max_tokens", agent.name),
397                        });
398                }
399            }
400        }
401    }
402    fn type_check_workflow(&mut self, workflow: &WorkflowDecl) {
403        for step in &workflow.steps {
404            if let Some(retry) = step.properties.get("retry") {
405                if let Some(retry_obj) = retry.as_object() {
406                    if let Some(max_attempts) = retry_obj.get("max_attempts") {
407                        if let Some(attempts) = max_attempts.as_number() {
408                            if attempts <= 0.0 {
409                                self.errors
410                                    .push(SemanticError::TypeMismatch {
411                                        expected: "positive number".to_string(),
412                                        found: format!("{}", attempts),
413                                        location: format!(
414                                            "workflow '{}', step '{}' retry.max_attempts", workflow
415                                            .name, step.name
416                                        ),
417                                    });
418                            }
419                        }
420                    }
421                    if let Some(backoff) = retry_obj.get("backoff") {
422                        if let Some(strategy) = backoff.as_string() {
423                            let valid_strategies = ["fixed", "linear", "exponential"];
424                            if !valid_strategies.contains(&strategy.as_str()) {
425                                self.errors
426                                    .push(SemanticError::InvalidBackoffStrategy {
427                                        strategy,
428                                    });
429                            }
430                        }
431                    }
432                }
433            }
434        }
435    }
436    fn analyze_dependencies(&mut self, ast: &HelixAst) {
437        for decl in &ast.declarations {
438            if let Declaration::Workflow(workflow) = decl {
439                self.check_circular_dependencies(workflow);
440            }
441        }
442    }
443    fn check_circular_dependencies(&mut self, workflow: &WorkflowDecl) {
444        let mut dependency_graph: HashMap<String, Vec<String>> = HashMap::new();
445        for step in &workflow.steps {
446            let deps = if let Some(depends_on) = step.properties.get("depends_on") {
447                if let Some(deps_array) = depends_on.as_array() {
448                    deps_array.iter().filter_map(|d| d.as_string()).collect()
449                } else {
450                    Vec::new()
451                }
452            } else {
453                Vec::new()
454            };
455            dependency_graph.insert(step.name.clone(), deps);
456        }
457        let mut visited = HashSet::new();
458        let mut rec_stack = HashSet::new();
459        for step in &workflow.steps {
460            if !visited.contains(&step.name) {
461                if let Some(cycle) = self
462                    .has_cycle(
463                        &step.name,
464                        &dependency_graph,
465                        &mut visited,
466                        &mut rec_stack,
467                        &mut Vec::new(),
468                    )
469                {
470                    self.errors
471                        .push(SemanticError::CircularDependency {
472                            items: cycle,
473                        });
474                    break;
475                }
476            }
477        }
478    }
479    fn has_cycle(
480        &self,
481        node: &str,
482        graph: &HashMap<String, Vec<String>>,
483        visited: &mut HashSet<String>,
484        rec_stack: &mut HashSet<String>,
485        path: &mut Vec<String>,
486    ) -> Option<Vec<String>> {
487        visited.insert(node.to_string());
488        rec_stack.insert(node.to_string());
489        path.push(node.to_string());
490        if let Some(neighbors) = graph.get(node) {
491            for neighbor in neighbors {
492                if !visited.contains(neighbor) {
493                    if let Some(cycle) = self
494                        .has_cycle(neighbor, graph, visited, rec_stack, path)
495                    {
496                        return Some(cycle);
497                    }
498                } else if rec_stack.contains(neighbor) {
499                    let cycle_start = path.iter().position(|n| n == neighbor).unwrap();
500                    let mut cycle = path[cycle_start..].to_vec();
501                    cycle.push(neighbor.clone());
502                    return Some(cycle);
503                }
504            }
505        }
506        rec_stack.remove(node);
507        path.pop();
508        None
509    }
510}
511pub struct TypeChecker {
512    expected_types: HashMap<String, ExpressionType>,
513}
514#[derive(Debug, Clone, PartialEq)]
515pub enum ExpressionType {
516    String,
517    Number,
518    Bool,
519    Duration,
520    Array(Box<ExpressionType>),
521    Object,
522    Any,
523}
524impl std::fmt::Display for ExpressionType {
525    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
526        match self {
527            ExpressionType::String => write!(f, "string"),
528            ExpressionType::Number => write!(f, "number"),
529            ExpressionType::Bool => write!(f, "boolean"),
530            ExpressionType::Duration => write!(f, "duration"),
531            ExpressionType::Array(inner) => write!(f, "array<{}>", inner),
532            ExpressionType::Object => write!(f, "object"),
533            ExpressionType::Any => write!(f, "any"),
534        }
535    }
536}
537impl TypeChecker {
538    pub fn new() -> Self {
539        let mut expected_types = HashMap::new();
540        expected_types.insert("temperature".to_string(), ExpressionType::Number);
541        expected_types.insert("max_tokens".to_string(), ExpressionType::Number);
542        expected_types.insert("timeout".to_string(), ExpressionType::Duration);
543        expected_types.insert("debug".to_string(), ExpressionType::Bool);
544        expected_types.insert("parallel".to_string(), ExpressionType::Bool);
545        expected_types.insert("verbose".to_string(), ExpressionType::Bool);
546        expected_types.insert("persistence".to_string(), ExpressionType::Bool);
547        expected_types.insert("dimensions".to_string(), ExpressionType::Number);
548        expected_types.insert("batch_size".to_string(), ExpressionType::Number);
549        expected_types.insert("max_iterations".to_string(), ExpressionType::Number);
550        expected_types.insert("cache_size".to_string(), ExpressionType::Number);
551        TypeChecker { expected_types }
552    }
553    pub fn infer_type(&self, expr: &Expression) -> ExpressionType {
554        match expr {
555            Expression::String(_) | Expression::Identifier(_) => ExpressionType::String,
556            Expression::Number(_) => ExpressionType::Number,
557            Expression::Bool(_) => ExpressionType::Bool,
558            Expression::Duration(_) => ExpressionType::Duration,
559            Expression::Array(items) => {
560                if items.is_empty() {
561                    ExpressionType::Array(Box::new(ExpressionType::Any))
562                } else {
563                    let first_type = self.infer_type(&items[0]);
564                    ExpressionType::Array(Box::new(first_type))
565                }
566            }
567            Expression::Object(_) => ExpressionType::Object,
568            Expression::Variable(_) | Expression::Reference(_) => ExpressionType::Any,
569            _ => ExpressionType::Any,
570        }
571    }
572    pub fn check_type(&self, field: &str, expr: &Expression) -> Result<(), String> {
573        if let Some(expected) = self.expected_types.get(field) {
574            let actual = self.infer_type(expr);
575            if actual != *expected && actual != ExpressionType::Any {
576                return Err(
577                    format!(
578                        "Type mismatch for field '{}': expected {:?}, found {:?}", field,
579                        expected, actual
580                    ),
581                );
582            }
583        }
584        Ok(())
585    }
586}
587#[cfg(test)]
588mod tests {
589    use super::*;
590    #[test]
591    fn test_duplicate_detection() {
592        let mut analyzer = SemanticAnalyzer::new();
593        let mut ast = HelixAst::new();
594        ast.add_declaration(
595            Declaration::Agent(AgentDecl {
596                name: "test_agent".to_string(),
597                properties: HashMap::new(),
598                capabilities: None,
599                backstory: None,
600                tools: None,
601            }),
602        );
603        ast.add_declaration(
604            Declaration::Agent(AgentDecl {
605                name: "test_agent".to_string(),
606                properties: HashMap::new(),
607                capabilities: None,
608                backstory: None,
609                tools: None,
610            }),
611        );
612        let result = analyzer.analyze(&ast);
613        assert!(result.is_err());
614        if let Err(errors) = result {
615            assert!(
616                errors.iter().any(| e | matches!(e, SemanticError::DuplicateDefinition {
617                name, kind } if name == "test_agent" && kind == "agent"))
618            );
619        }
620    }
621    #[test]
622    fn test_undefined_agent_reference() {
623        let mut analyzer = SemanticAnalyzer::new();
624        let mut ast = HelixAst::new();
625        let mut step = StepDecl {
626            name: "test_step".to_string(),
627            agent: Some("undefined_agent".to_string()),
628            crew: None,
629            task: None,
630            properties: HashMap::new(),
631        };
632        ast.add_declaration(
633            Declaration::Workflow(WorkflowDecl {
634                name: "test_workflow".to_string(),
635                trigger: None,
636                steps: vec![step],
637                pipeline: None,
638                properties: HashMap::new(),
639            }),
640        );
641        let result = analyzer.analyze(&ast);
642        assert!(result.is_err());
643        if let Err(errors) = result {
644            assert!(
645                errors.iter().any(| e | matches!(e, SemanticError::UndefinedAgent { name,
646                .. } if name == "undefined_agent"))
647            );
648        }
649    }
650}