1use std::collections::{HashMap, HashSet};
2use super::ast::*;
3use crate::ast::AstVisitor;
4#[derive(Debug, Clone)]
5pub enum SemanticError {
6 UndefinedAgent { name: String, location: String },
7 UndefinedWorkflow { name: String, location: String },
8 UndefinedStep { name: String, workflow: String },
9 UndefinedReference { reference: String, location: String },
10 DuplicateDefinition { name: String, kind: String },
11 TypeMismatch { expected: String, found: String, location: String },
12 CircularDependency { items: Vec<String> },
13 InvalidDuration { value: String, location: String },
14 MissingRequiredField { field: String, declaration: String },
15 InvalidTriggerType { trigger: String },
16 InvalidProcessType { process: String },
17 InvalidBackoffStrategy { strategy: String },
18}
19impl std::fmt::Display for SemanticError {
20 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
21 match self {
22 SemanticError::UndefinedAgent { name, location } => {
23 write!(f, "Undefined agent '{}' referenced in {}", name, location)
24 }
25 SemanticError::UndefinedWorkflow { name, location } => {
26 write!(f, "Undefined workflow '{}' referenced in {}", name, location)
27 }
28 SemanticError::UndefinedStep { name, workflow } => {
29 write!(f, "Undefined step '{}' in workflow '{}'", name, workflow)
30 }
31 SemanticError::UndefinedReference { reference, location } => {
32 write!(f, "Undefined reference '{}' in {}", reference, location)
33 }
34 SemanticError::DuplicateDefinition { name, kind } => {
35 write!(f, "Duplicate {} definition: '{}'", kind, name)
36 }
37 SemanticError::TypeMismatch { expected, found, location } => {
38 write!(
39 f, "Type mismatch in {}: expected {}, found {}", location, expected,
40 found
41 )
42 }
43 SemanticError::CircularDependency { items } => {
44 write!(f, "Circular dependency detected: {}", items.join(" -> "))
45 }
46 SemanticError::InvalidDuration { value, location } => {
47 write!(f, "Invalid duration '{}' in {}", value, location)
48 }
49 SemanticError::MissingRequiredField { field, declaration } => {
50 write!(f, "Missing required field '{}' in {}", field, declaration)
51 }
52 SemanticError::InvalidTriggerType { trigger } => {
53 write!(f, "Invalid trigger type: '{}'", trigger)
54 }
55 SemanticError::InvalidProcessType { process } => {
56 write!(
57 f,
58 "Invalid process type: '{}'. Must be one of: sequential, hierarchical, parallel, consensus",
59 process
60 )
61 }
62 SemanticError::InvalidBackoffStrategy { strategy } => {
63 write!(
64 f,
65 "Invalid backoff strategy: '{}'. Must be one of: fixed, linear, exponential",
66 strategy
67 )
68 }
69 }
70 }
71}
72pub struct SemanticAnalyzer {
73 agents: HashMap<String, AgentDecl>,
74 workflows: HashMap<String, WorkflowDecl>,
75 contexts: HashMap<String, ContextDecl>,
76 crews: HashMap<String, CrewDecl>,
77 expected_env_vars: HashSet<String>,
78 _expected_memory_refs: HashSet<String>,
79 errors: Vec<SemanticError>,
80}
81impl AstVisitor for SemanticAnalyzer {
82 type Result = ();
83 fn visit_ast(&mut self, ast: &HelixAst) -> Self::Result {
84 for decl in &ast.declarations {
85 self.visit_declaration(decl);
86 }
87 }
88 fn visit_declaration(&mut self, decl: &Declaration) -> Self::Result {
89 match decl {
90 Declaration::Project(p) => self.visit_project(p),
91 Declaration::Agent(a) => self.visit_agent(a),
92 Declaration::Workflow(w) => self.visit_workflow(w),
93 Declaration::Memory(m) => self.visit_memory(m),
94 Declaration::Context(c) => self.visit_context(c),
95 Declaration::Crew(crew) => self.visit_crew(crew),
96 _ => {}
97 }
98 }
99 fn visit_project(&mut self, _project: &ProjectDecl) -> Self::Result {}
100 fn visit_agent(&mut self, _agent: &AgentDecl) -> Self::Result {}
101 fn visit_workflow(&mut self, _workflow: &WorkflowDecl) -> Self::Result {}
102 fn visit_memory(&mut self, memory: &MemoryDecl) -> Self::Result {
103 for (_key, expr) in &memory.properties {
104 self.visit_expression(expr);
105 }
106 }
107 fn visit_context(&mut self, _context: &ContextDecl) -> Self::Result {}
108 fn visit_crew(&mut self, _crew: &CrewDecl) -> Self::Result {}
109 fn visit_expression(&mut self, expr: &Expression) -> Self::Result {
110 match expr {
111 Expression::Variable(var) => {
112 self.expected_env_vars.insert(var.clone());
113 }
114 Expression::Reference(ref_name) => {
115 self._expected_memory_refs.insert(ref_name.clone());
116 }
117 _ => {}
118 }
119 }
120}
121impl SemanticAnalyzer {
122 pub fn new() -> Self {
123 SemanticAnalyzer {
124 agents: HashMap::new(),
125 workflows: HashMap::new(),
126 contexts: HashMap::new(),
127 crews: HashMap::new(),
128 expected_env_vars: HashSet::new(),
129 _expected_memory_refs: HashSet::new(),
130 errors: Vec::new(),
131 }
132 }
133 pub fn analyze(&mut self, ast: &HelixAst) -> Result<(), Vec<SemanticError>> {
134 for decl in &ast.declarations {
135 match decl {
136 Declaration::Agent(agent) => self.visit_agent(agent),
137 Declaration::Workflow(workflow) => self.visit_workflow(workflow),
138 Declaration::Context(context) => self.visit_context(context),
139 Declaration::Crew(crew) => self.visit_crew(crew),
140 _ => {}
141 }
142 }
143 self.collect_definitions(ast)?;
144 self.validate_references(ast);
145 let type_checker = TypeChecker::new();
146 self.type_check_with_checker(ast, &type_checker);
147 self.analyze_dependencies(ast);
148 if !self.errors.is_empty() { Err(self.errors.clone()) } else { Ok(()) }
149 }
150 fn collect_definitions(&mut self, ast: &HelixAst) -> Result<(), Vec<SemanticError>> {
151 for decl in &ast.declarations {
152 match decl {
153 Declaration::Agent(agent) => {
154 if self.agents.contains_key(&agent.name) {
155 self.errors
156 .push(SemanticError::DuplicateDefinition {
157 name: agent.name.clone(),
158 kind: "agent".to_string(),
159 });
160 } else {
161 self.agents.insert(agent.name.clone(), agent.clone());
162 }
163 }
164 Declaration::Workflow(workflow) => {
165 if self.workflows.contains_key(&workflow.name) {
166 self.errors
167 .push(SemanticError::DuplicateDefinition {
168 name: workflow.name.clone(),
169 kind: "workflow".to_string(),
170 });
171 } else {
172 self.workflows.insert(workflow.name.clone(), workflow.clone());
173 }
174 }
175 Declaration::Context(context) => {
176 if self.contexts.contains_key(&context.name) {
177 self.errors
178 .push(SemanticError::DuplicateDefinition {
179 name: context.name.clone(),
180 kind: "context".to_string(),
181 });
182 } else {
183 if let Some(secrets) = &context.secrets {
184 for (_key, secret_ref) in secrets {
185 if let SecretRef::Environment(var) = secret_ref {
186 self.expected_env_vars.insert(var.clone());
187 }
188 }
189 }
190 self.contexts.insert(context.name.clone(), context.clone());
191 }
192 }
193 Declaration::Crew(crew) => {
194 if self.crews.contains_key(&crew.name) {
195 self.errors
196 .push(SemanticError::DuplicateDefinition {
197 name: crew.name.clone(),
198 kind: "crew".to_string(),
199 });
200 } else {
201 self.crews.insert(crew.name.clone(), crew.clone());
202 }
203 }
204 _ => {}
205 }
206 }
207 if !self.errors.is_empty() { Err(self.errors.clone()) } else { Ok(()) }
208 }
209 fn validate_references(&mut self, ast: &HelixAst) {
210 for decl in &ast.declarations {
211 match decl {
212 Declaration::Workflow(workflow) => {
213 self.validate_workflow_references(workflow);
214 }
215 Declaration::Crew(crew) => {
216 self.validate_crew_references(crew);
217 }
218 _ => {}
219 }
220 }
221 }
222 fn validate_workflow_references(&mut self, workflow: &WorkflowDecl) {
223 for step in &workflow.steps {
224 if let Some(agent_name) = &step.agent {
225 if !self.agents.contains_key(agent_name) {
226 self.errors
227 .push(SemanticError::UndefinedAgent {
228 name: agent_name.clone(),
229 location: format!(
230 "workflow '{}', step '{}'", workflow.name, step.name
231 ),
232 });
233 }
234 }
235 if let Some(crew_agents) = &step.crew {
236 for agent_name in crew_agents {
237 if !self.agents.contains_key(agent_name) {
238 self.errors
239 .push(SemanticError::UndefinedAgent {
240 name: agent_name.clone(),
241 location: format!(
242 "workflow '{}', step '{}'", workflow.name, step.name
243 ),
244 });
245 }
246 }
247 }
248 if let Some(depends_on) = step.properties.get("depends_on") {
249 if let Some(deps) = depends_on.as_array() {
250 for dep in deps {
251 if let Some(dep_name) = dep.as_string() {
252 let step_exists = workflow
253 .steps
254 .iter()
255 .any(|s| s.name == dep_name);
256 if !step_exists {
257 self.errors
258 .push(SemanticError::UndefinedStep {
259 name: dep_name,
260 workflow: workflow.name.clone(),
261 });
262 }
263 }
264 }
265 }
266 }
267 }
268 if let Some(pipeline) = &workflow.pipeline {
269 for node in &pipeline.flow {
270 if let PipelineNode::Step(step_name) = node {
271 let step_exists = workflow
272 .steps
273 .iter()
274 .any(|s| s.name == *step_name);
275 if !step_exists {
276 self.errors
277 .push(SemanticError::UndefinedStep {
278 name: step_name.clone(),
279 workflow: workflow.name.clone(),
280 });
281 }
282 }
283 }
284 }
285 if let Some(trigger) = &workflow.trigger {
286 self.validate_trigger(trigger, &workflow.name);
287 }
288 }
289 fn validate_trigger(&mut self, trigger: &Expression, workflow_name: &str) {
290 match trigger {
291 Expression::String(s) | Expression::Identifier(s) => {
292 let valid_triggers = ["manual", "webhook", "event", "file_watch"];
293 if !valid_triggers.contains(&s.as_str()) && !s.starts_with("schedule:") {
294 self.errors
295 .push(SemanticError::InvalidTriggerType {
296 trigger: s.clone(),
297 });
298 }
299 }
300 Expression::Object(map) => {
301 if let Some(trigger_type) = map.get("type") {
302 self.validate_trigger(trigger_type, workflow_name);
303 }
304 }
305 _ => {}
306 }
307 }
308 fn validate_crew_references(&mut self, crew: &CrewDecl) {
309 for agent_name in &crew.agents {
310 if !self.agents.contains_key(agent_name) {
311 self.errors
312 .push(SemanticError::UndefinedAgent {
313 name: agent_name.clone(),
314 location: format!("crew '{}'", crew.name),
315 });
316 }
317 }
318 if let Some(process_type) = &crew.process_type {
319 let valid_types = ["sequential", "hierarchical", "parallel", "consensus"];
320 if !valid_types.contains(&process_type.as_str()) {
321 self.errors
322 .push(SemanticError::InvalidProcessType {
323 process: process_type.clone(),
324 });
325 }
326 }
327 if let Some(process) = &crew.process_type {
328 if process == "hierarchical" {
329 if let Some(manager) = crew.properties.get("manager") {
330 if let Some(manager_name) = manager.as_string() {
331 if !self.agents.contains_key(&manager_name) {
332 self.errors
333 .push(SemanticError::UndefinedAgent {
334 name: manager_name,
335 location: format!("crew '{}' manager", crew.name),
336 });
337 }
338 }
339 } else {
340 self.errors
341 .push(SemanticError::MissingRequiredField {
342 field: "manager".to_string(),
343 declaration: format!("hierarchical crew '{}'", crew.name),
344 });
345 }
346 }
347 }
348 }
349 #[allow(dead_code)]
350 fn type_check(&mut self, ast: &HelixAst) {
351 self.type_check_with_checker(ast, &TypeChecker::new());
352 }
353 fn type_check_with_checker(&mut self, ast: &HelixAst, checker: &TypeChecker) {
354 for decl in &ast.declarations {
355 match decl {
356 Declaration::Agent(agent) => {
357 for (key, expr) in &agent.properties {
358 if let Err(_msg) = checker.check_type(key, expr) {
359 self.errors
360 .push(SemanticError::TypeMismatch {
361 expected: "valid type".to_string(),
362 found: checker.infer_type(expr).to_string(),
363 location: format!("agent '{}'", agent.name),
364 });
365 }
366 }
367 self.type_check_agent(agent);
368 }
369 Declaration::Workflow(workflow) => {
370 self.type_check_workflow(workflow);
371 }
372 _ => {}
373 }
374 }
375 }
376 fn type_check_agent(&mut self, agent: &AgentDecl) {
377 if let Some(temp) = agent.properties.get("temperature") {
378 if let Some(temp_val) = temp.as_number() {
379 if temp_val < 0.0 || temp_val > 2.0 {
380 self.errors
381 .push(SemanticError::TypeMismatch {
382 expected: "number between 0 and 2".to_string(),
383 found: format!("{}", temp_val),
384 location: format!("agent '{}' temperature", agent.name),
385 });
386 }
387 }
388 }
389 if let Some(tokens) = agent.properties.get("max_tokens") {
390 if let Some(tokens_val) = tokens.as_number() {
391 if tokens_val <= 0.0 {
392 self.errors
393 .push(SemanticError::TypeMismatch {
394 expected: "positive number".to_string(),
395 found: format!("{}", tokens_val),
396 location: format!("agent '{}' max_tokens", agent.name),
397 });
398 }
399 }
400 }
401 }
402 fn type_check_workflow(&mut self, workflow: &WorkflowDecl) {
403 for step in &workflow.steps {
404 if let Some(retry) = step.properties.get("retry") {
405 if let Some(retry_obj) = retry.as_object() {
406 if let Some(max_attempts) = retry_obj.get("max_attempts") {
407 if let Some(attempts) = max_attempts.as_number() {
408 if attempts <= 0.0 {
409 self.errors
410 .push(SemanticError::TypeMismatch {
411 expected: "positive number".to_string(),
412 found: format!("{}", attempts),
413 location: format!(
414 "workflow '{}', step '{}' retry.max_attempts", workflow
415 .name, step.name
416 ),
417 });
418 }
419 }
420 }
421 if let Some(backoff) = retry_obj.get("backoff") {
422 if let Some(strategy) = backoff.as_string() {
423 let valid_strategies = ["fixed", "linear", "exponential"];
424 if !valid_strategies.contains(&strategy.as_str()) {
425 self.errors
426 .push(SemanticError::InvalidBackoffStrategy {
427 strategy,
428 });
429 }
430 }
431 }
432 }
433 }
434 }
435 }
436 fn analyze_dependencies(&mut self, ast: &HelixAst) {
437 for decl in &ast.declarations {
438 if let Declaration::Workflow(workflow) = decl {
439 self.check_circular_dependencies(workflow);
440 }
441 }
442 }
443 fn check_circular_dependencies(&mut self, workflow: &WorkflowDecl) {
444 let mut dependency_graph: HashMap<String, Vec<String>> = HashMap::new();
445 for step in &workflow.steps {
446 let deps = if let Some(depends_on) = step.properties.get("depends_on") {
447 if let Some(deps_array) = depends_on.as_array() {
448 deps_array.iter().filter_map(|d| d.as_string()).collect()
449 } else {
450 Vec::new()
451 }
452 } else {
453 Vec::new()
454 };
455 dependency_graph.insert(step.name.clone(), deps);
456 }
457 let mut visited = HashSet::new();
458 let mut rec_stack = HashSet::new();
459 for step in &workflow.steps {
460 if !visited.contains(&step.name) {
461 if let Some(cycle) = self
462 .has_cycle(
463 &step.name,
464 &dependency_graph,
465 &mut visited,
466 &mut rec_stack,
467 &mut Vec::new(),
468 )
469 {
470 self.errors
471 .push(SemanticError::CircularDependency {
472 items: cycle,
473 });
474 break;
475 }
476 }
477 }
478 }
479 fn has_cycle(
480 &self,
481 node: &str,
482 graph: &HashMap<String, Vec<String>>,
483 visited: &mut HashSet<String>,
484 rec_stack: &mut HashSet<String>,
485 path: &mut Vec<String>,
486 ) -> Option<Vec<String>> {
487 visited.insert(node.to_string());
488 rec_stack.insert(node.to_string());
489 path.push(node.to_string());
490 if let Some(neighbors) = graph.get(node) {
491 for neighbor in neighbors {
492 if !visited.contains(neighbor) {
493 if let Some(cycle) = self
494 .has_cycle(neighbor, graph, visited, rec_stack, path)
495 {
496 return Some(cycle);
497 }
498 } else if rec_stack.contains(neighbor) {
499 let cycle_start = path.iter().position(|n| n == neighbor).unwrap();
500 let mut cycle = path[cycle_start..].to_vec();
501 cycle.push(neighbor.clone());
502 return Some(cycle);
503 }
504 }
505 }
506 rec_stack.remove(node);
507 path.pop();
508 None
509 }
510}
511pub struct TypeChecker {
512 expected_types: HashMap<String, ExpressionType>,
513}
514#[derive(Debug, Clone, PartialEq)]
515pub enum ExpressionType {
516 String,
517 Number,
518 Bool,
519 Duration,
520 Array(Box<ExpressionType>),
521 Object,
522 Any,
523}
524impl std::fmt::Display for ExpressionType {
525 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
526 match self {
527 ExpressionType::String => write!(f, "string"),
528 ExpressionType::Number => write!(f, "number"),
529 ExpressionType::Bool => write!(f, "boolean"),
530 ExpressionType::Duration => write!(f, "duration"),
531 ExpressionType::Array(inner) => write!(f, "array<{}>", inner),
532 ExpressionType::Object => write!(f, "object"),
533 ExpressionType::Any => write!(f, "any"),
534 }
535 }
536}
537impl TypeChecker {
538 pub fn new() -> Self {
539 let mut expected_types = HashMap::new();
540 expected_types.insert("temperature".to_string(), ExpressionType::Number);
541 expected_types.insert("max_tokens".to_string(), ExpressionType::Number);
542 expected_types.insert("timeout".to_string(), ExpressionType::Duration);
543 expected_types.insert("debug".to_string(), ExpressionType::Bool);
544 expected_types.insert("parallel".to_string(), ExpressionType::Bool);
545 expected_types.insert("verbose".to_string(), ExpressionType::Bool);
546 expected_types.insert("persistence".to_string(), ExpressionType::Bool);
547 expected_types.insert("dimensions".to_string(), ExpressionType::Number);
548 expected_types.insert("batch_size".to_string(), ExpressionType::Number);
549 expected_types.insert("max_iterations".to_string(), ExpressionType::Number);
550 expected_types.insert("cache_size".to_string(), ExpressionType::Number);
551 TypeChecker { expected_types }
552 }
553 pub fn infer_type(&self, expr: &Expression) -> ExpressionType {
554 match expr {
555 Expression::String(_) | Expression::Identifier(_) => ExpressionType::String,
556 Expression::Number(_) => ExpressionType::Number,
557 Expression::Bool(_) => ExpressionType::Bool,
558 Expression::Duration(_) => ExpressionType::Duration,
559 Expression::Array(items) => {
560 if items.is_empty() {
561 ExpressionType::Array(Box::new(ExpressionType::Any))
562 } else {
563 let first_type = self.infer_type(&items[0]);
564 ExpressionType::Array(Box::new(first_type))
565 }
566 }
567 Expression::Object(_) => ExpressionType::Object,
568 Expression::Variable(_) | Expression::Reference(_) => ExpressionType::Any,
569 _ => ExpressionType::Any,
570 }
571 }
572 pub fn check_type(&self, field: &str, expr: &Expression) -> Result<(), String> {
573 if let Some(expected) = self.expected_types.get(field) {
574 let actual = self.infer_type(expr);
575 if actual != *expected && actual != ExpressionType::Any {
576 return Err(
577 format!(
578 "Type mismatch for field '{}': expected {:?}, found {:?}", field,
579 expected, actual
580 ),
581 );
582 }
583 }
584 Ok(())
585 }
586}
587#[cfg(test)]
588mod tests {
589 use super::*;
590 #[test]
591 fn test_duplicate_detection() {
592 let mut analyzer = SemanticAnalyzer::new();
593 let mut ast = HelixAst::new();
594 ast.add_declaration(
595 Declaration::Agent(AgentDecl {
596 name: "test_agent".to_string(),
597 properties: HashMap::new(),
598 capabilities: None,
599 backstory: None,
600 tools: None,
601 }),
602 );
603 ast.add_declaration(
604 Declaration::Agent(AgentDecl {
605 name: "test_agent".to_string(),
606 properties: HashMap::new(),
607 capabilities: None,
608 backstory: None,
609 tools: None,
610 }),
611 );
612 let result = analyzer.analyze(&ast);
613 assert!(result.is_err());
614 if let Err(errors) = result {
615 assert!(
616 errors.iter().any(| e | matches!(e, SemanticError::DuplicateDefinition {
617 name, kind } if name == "test_agent" && kind == "agent"))
618 );
619 }
620 }
621 #[test]
622 fn test_undefined_agent_reference() {
623 let mut analyzer = SemanticAnalyzer::new();
624 let mut ast = HelixAst::new();
625 let mut step = StepDecl {
626 name: "test_step".to_string(),
627 agent: Some("undefined_agent".to_string()),
628 crew: None,
629 task: None,
630 properties: HashMap::new(),
631 };
632 ast.add_declaration(
633 Declaration::Workflow(WorkflowDecl {
634 name: "test_workflow".to_string(),
635 trigger: None,
636 steps: vec![step],
637 pipeline: None,
638 properties: HashMap::new(),
639 }),
640 );
641 let result = analyzer.analyze(&ast);
642 assert!(result.is_err());
643 if let Err(errors) = result {
644 assert!(
645 errors.iter().any(| e | matches!(e, SemanticError::UndefinedAgent { name,
646 .. } if name == "undefined_agent"))
647 );
648 }
649 }
650}