use oxify_model::{NodeKind, Workflow};
use std::collections::HashSet;
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum HealthSeverity {
Info,
Warning,
Error,
Critical,
}
#[derive(Debug, Clone)]
pub struct HealthIssue {
pub severity: HealthSeverity,
pub category: String,
pub description: String,
pub recommendation: String,
pub affected_nodes: Vec<String>,
}
impl HealthIssue {
pub fn new(
severity: HealthSeverity,
category: impl Into<String>,
description: impl Into<String>,
recommendation: impl Into<String>,
) -> Self {
Self {
severity,
category: category.into(),
description: description.into(),
recommendation: recommendation.into(),
affected_nodes: Vec::new(),
}
}
pub fn with_nodes(mut self, nodes: Vec<String>) -> Self {
self.affected_nodes = nodes;
self
}
}
#[derive(Debug, Clone)]
pub struct HealthReport {
pub issues: Vec<HealthIssue>,
pub health_score: u8,
pub is_healthy: bool,
}
impl HealthReport {
pub fn new(issues: Vec<HealthIssue>) -> Self {
let is_healthy = !issues
.iter()
.any(|i| matches!(i.severity, HealthSeverity::Error | HealthSeverity::Critical));
let health_score = Self::calculate_health_score(&issues);
Self {
issues,
health_score,
is_healthy,
}
}
fn calculate_health_score(issues: &[HealthIssue]) -> u8 {
let mut score: u8 = 100;
for issue in issues {
let penalty: u8 = match issue.severity {
HealthSeverity::Info => 0,
HealthSeverity::Warning => 5,
HealthSeverity::Error => 15,
HealthSeverity::Critical => 30,
};
score = score.saturating_sub(penalty);
}
score
}
pub fn get_issues_by_severity(&self, severity: HealthSeverity) -> Vec<&HealthIssue> {
self.issues
.iter()
.filter(|i| i.severity == severity)
.collect()
}
pub fn count_by_severity(&self, severity: HealthSeverity) -> usize {
self.issues
.iter()
.filter(|i| i.severity == severity)
.count()
}
}
pub struct HealthChecker;
impl HealthChecker {
pub fn new() -> Self {
Self
}
pub fn check(&self, workflow: &Workflow) -> HealthReport {
let mut issues = Vec::new();
self.check_node_count(workflow, &mut issues);
self.check_dag_depth(workflow, &mut issues);
self.check_parallel_width(workflow, &mut issues);
self.check_node_names(workflow, &mut issues);
self.check_isolated_nodes(workflow, &mut issues);
self.check_terminal_nodes(workflow, &mut issues);
self.check_loop_safety(workflow, &mut issues);
self.check_retry_config(workflow, &mut issues);
self.check_llm_configuration(workflow, &mut issues);
HealthReport::new(issues)
}
fn check_node_count(&self, workflow: &Workflow, issues: &mut Vec<HealthIssue>) {
let node_count = workflow.nodes.len();
if node_count == 0 {
issues.push(HealthIssue::new(
HealthSeverity::Critical,
"Structure",
"Workflow has no nodes",
"Add at least a start and end node to the workflow",
));
} else if node_count == 1 {
issues.push(HealthIssue::new(
HealthSeverity::Error,
"Structure",
"Workflow has only one node",
"Add processing nodes between start and end",
));
} else if node_count > 1000 {
issues.push(HealthIssue::new(
HealthSeverity::Warning,
"Performance",
format!(
"Workflow has {} nodes, which may impact performance",
node_count
),
"Consider splitting into sub-workflows or simplifying the workflow",
));
} else if node_count > 500 {
issues.push(HealthIssue::new(
HealthSeverity::Info,
"Performance",
format!("Workflow has {} nodes", node_count),
"Monitor execution performance and consider optimizations if needed",
));
}
}
fn check_dag_depth(&self, workflow: &Workflow, issues: &mut Vec<HealthIssue>) {
let mut depths = std::collections::HashMap::new();
let start_nodes: Vec<_> = workflow
.nodes
.iter()
.filter(|n| matches!(n.kind, NodeKind::Start))
.map(|n| n.id)
.collect();
for start_id in start_nodes {
depths.insert(start_id, 0);
}
let mut queue: Vec<_> = depths.keys().copied().collect();
let mut visited = HashSet::new();
while let Some(node_id) = queue.pop() {
if !visited.insert(node_id) {
continue;
}
let current_depth = *depths.get(&node_id).unwrap_or(&0);
for edge in &workflow.edges {
if edge.from == node_id {
let new_depth = current_depth + 1;
depths
.entry(edge.to)
.and_modify(|d| *d = (*d).max(new_depth))
.or_insert(new_depth);
queue.push(edge.to);
}
}
}
let max_depth = depths.values().max().copied().unwrap_or(0);
if max_depth > 50 {
issues.push(HealthIssue::new(
HealthSeverity::Warning,
"Performance",
format!(
"Workflow has a depth of {}, which may serialize execution",
max_depth
),
"Flatten the workflow by identifying opportunities for parallel execution",
));
} else if max_depth > 20 {
issues.push(HealthIssue::new(
HealthSeverity::Info,
"Performance",
format!("Workflow has a depth of {}", max_depth),
"Consider opportunities to reduce sequential dependencies",
));
}
}
fn check_parallel_width(&self, workflow: &Workflow, issues: &mut Vec<HealthIssue>) {
let mut levels = std::collections::HashMap::new();
for node in &workflow.nodes {
let incoming = workflow.edges.iter().filter(|e| e.to == node.id).count();
let level = incoming; *levels.entry(level).or_insert(0) += 1;
}
let max_width = levels.values().max().copied().unwrap_or(0);
if max_width > 100 {
issues.push(HealthIssue::new(
HealthSeverity::Warning,
"Resource Usage",
format!(
"Workflow has up to {} parallel nodes at one level",
max_width
),
"Configure max_concurrent_nodes in ExecutionConfig to limit parallel execution",
));
} else if max_width > 50 {
issues.push(HealthIssue::new(
HealthSeverity::Info,
"Resource Usage",
format!(
"Workflow has up to {} parallel nodes at one level",
max_width
),
"Ensure sufficient system resources for parallel execution",
));
}
}
fn check_node_names(&self, workflow: &Workflow, issues: &mut Vec<HealthIssue>) {
let mut name_counts = std::collections::HashMap::new();
let mut empty_names = Vec::new();
for node in &workflow.nodes {
if node.name.trim().is_empty() {
empty_names.push(format!("{}", node.id));
} else {
*name_counts.entry(&node.name).or_insert(0) += 1;
}
}
if !empty_names.is_empty() {
issues.push(
HealthIssue::new(
HealthSeverity::Warning,
"Naming",
format!("{} nodes have empty names", empty_names.len()),
"Provide descriptive names for all nodes to improve debugging",
)
.with_nodes(empty_names),
);
}
for (name, count) in name_counts {
if count > 1 {
issues.push(HealthIssue::new(
HealthSeverity::Warning,
"Naming",
format!("Node name '{}' is used {} times", name, count),
"Use unique names for nodes to avoid confusion",
));
}
}
}
fn check_isolated_nodes(&self, workflow: &Workflow, issues: &mut Vec<HealthIssue>) {
let mut connected_nodes = HashSet::new();
for edge in &workflow.edges {
connected_nodes.insert(edge.from);
connected_nodes.insert(edge.to);
}
let isolated: Vec<_> = workflow
.nodes
.iter()
.filter(|n| !connected_nodes.contains(&n.id))
.filter(|n| !matches!(n.kind, NodeKind::Start | NodeKind::End))
.map(|n| n.name.clone())
.collect();
if !isolated.is_empty() {
issues.push(
HealthIssue::new(
HealthSeverity::Error,
"Structure",
format!("{} isolated nodes detected", isolated.len()),
"Connect all nodes to the workflow DAG or remove unused nodes",
)
.with_nodes(isolated),
);
}
}
fn check_terminal_nodes(&self, workflow: &Workflow, issues: &mut Vec<HealthIssue>) {
let start_count = workflow
.nodes
.iter()
.filter(|n| matches!(n.kind, NodeKind::Start))
.count();
let end_count = workflow
.nodes
.iter()
.filter(|n| matches!(n.kind, NodeKind::End))
.count();
if start_count == 0 {
issues.push(HealthIssue::new(
HealthSeverity::Critical,
"Structure",
"Workflow has no Start node",
"Add a Start node to mark the workflow entry point",
));
} else if start_count > 1 {
issues.push(HealthIssue::new(
HealthSeverity::Warning,
"Structure",
format!("Workflow has {} Start nodes", start_count),
"Typically, workflows should have exactly one Start node",
));
}
if end_count == 0 {
issues.push(HealthIssue::new(
HealthSeverity::Error,
"Structure",
"Workflow has no End node",
"Add an End node to mark the workflow completion point",
));
} else if end_count > 1 {
issues.push(HealthIssue::new(
HealthSeverity::Info,
"Structure",
format!("Workflow has {} End nodes (multiple paths)", end_count),
"Multiple end nodes are valid for conditional workflows",
));
}
}
fn check_loop_safety(&self, workflow: &Workflow, issues: &mut Vec<HealthIssue>) {
let loop_nodes: Vec<_> = workflow
.nodes
.iter()
.filter(|n| matches!(n.kind, NodeKind::Loop(_)))
.collect();
if loop_nodes.len() > 10 {
issues.push(HealthIssue::new(
HealthSeverity::Warning,
"Complexity",
format!("Workflow has {} loop nodes", loop_nodes.len()),
"Many loops may indicate complex logic; consider simplifying or using sub-workflows",
));
}
}
fn check_retry_config(&self, workflow: &Workflow, issues: &mut Vec<HealthIssue>) {
let nodes_with_retries: Vec<_> = workflow
.nodes
.iter()
.filter(|n| n.retry_config.is_some())
.collect();
if nodes_with_retries.is_empty() {
issues.push(HealthIssue::new(
HealthSeverity::Info,
"Resilience",
"No nodes have retry configuration",
"Consider adding retry logic to LLM and API nodes for improved resilience",
));
}
for node in nodes_with_retries {
if let Some(ref retry_config) = node.retry_config {
if retry_config.max_retries > 10 {
issues.push(HealthIssue::new(
HealthSeverity::Warning,
"Resilience",
format!(
"Node '{}' has {} max retries, which may cause long delays",
node.name, retry_config.max_retries
),
"Consider reducing max retries to 3-5 for faster failure detection",
));
}
}
}
}
fn check_llm_configuration(&self, workflow: &Workflow, issues: &mut Vec<HealthIssue>) {
for node in &workflow.nodes {
if let NodeKind::LLM(ref config) = node.kind {
if config.prompt_template.trim().is_empty() {
issues.push(HealthIssue::new(
HealthSeverity::Error,
"Configuration",
format!("LLM node '{}' has empty prompt template", node.name),
"Provide a meaningful prompt template for LLM execution",
));
}
if config.prompt_template.len() > 10000 {
issues.push(HealthIssue::new(
HealthSeverity::Warning,
"Performance",
format!(
"LLM node '{}' has a very long prompt ({} chars)",
node.name,
config.prompt_template.len()
),
"Long prompts may hit token limits and increase costs",
));
}
if let Some(max_tokens) = config.max_tokens {
if max_tokens > 4000 {
issues.push(HealthIssue::new(
HealthSeverity::Info,
"Cost",
format!(
"LLM node '{}' has max_tokens set to {} (high cost)",
node.name, max_tokens
),
"Review if such high token limits are necessary",
));
}
}
}
}
}
}
impl Default for HealthChecker {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
use oxify_model::{Edge, LlmConfig, Node, RetryConfig};
#[test]
fn test_empty_workflow() {
let workflow = Workflow::new("test".to_string());
let checker = HealthChecker::new();
let report = checker.check(&workflow);
assert!(!report.is_healthy);
assert!(report.count_by_severity(HealthSeverity::Critical) > 0);
}
#[test]
fn test_workflow_with_isolated_nodes() {
let mut workflow = Workflow::new("test".to_string());
let start = Node::new("start".to_string(), NodeKind::Start);
let end = Node::new("end".to_string(), NodeKind::End);
let start_id = start.id;
let end_id = end.id;
workflow.add_node(start);
workflow.add_node(end);
workflow.add_edge(Edge::new(start_id, end_id));
let isolated = Node::new(
"isolated".to_string(),
NodeKind::LLM(LlmConfig {
provider: "openai".to_string(),
model: "gpt-4".to_string(),
system_prompt: None,
prompt_template: "test".to_string(),
temperature: Some(0.7),
max_tokens: Some(100),
tools: Vec::new(),
images: Vec::new(),
extra_params: serde_json::Value::Null,
}),
);
workflow.add_node(isolated);
let checker = HealthChecker::new();
let report = checker.check(&workflow);
assert!(!report.is_healthy);
let errors = report.get_issues_by_severity(HealthSeverity::Error);
assert!(errors.iter().any(|i| i.category == "Structure"));
}
#[test]
fn test_workflow_with_duplicate_names() {
let mut workflow = Workflow::new("test".to_string());
let node1 = Node::new("duplicate".to_string(), NodeKind::Start);
let node2 = Node::new("duplicate".to_string(), NodeKind::End);
workflow.add_node(node1);
workflow.add_node(node2);
let checker = HealthChecker::new();
let report = checker.check(&workflow);
let warnings = report.get_issues_by_severity(HealthSeverity::Warning);
assert!(warnings.iter().any(|i| i.category == "Naming"));
}
#[test]
fn test_workflow_with_excessive_retries() {
let mut workflow = Workflow::new("test".to_string());
let mut node = Node::new("llm".to_string(), NodeKind::Start);
node.retry_config = Some(RetryConfig {
max_retries: 20,
initial_delay_ms: 1000,
backoff_multiplier: 2.0,
max_delay_ms: 60000,
});
workflow.add_node(node);
let checker = HealthChecker::new();
let report = checker.check(&workflow);
let warnings = report.get_issues_by_severity(HealthSeverity::Warning);
assert!(warnings.iter().any(|i| i.category == "Resilience"));
}
#[test]
fn test_workflow_with_empty_prompt() {
let mut workflow = Workflow::new("test".to_string());
let node = Node::new(
"llm".to_string(),
NodeKind::LLM(LlmConfig {
provider: "openai".to_string(),
model: "gpt-4".to_string(),
system_prompt: None,
prompt_template: "".to_string(),
temperature: Some(0.7),
max_tokens: Some(100),
tools: Vec::new(),
images: Vec::new(),
extra_params: serde_json::Value::Null,
}),
);
workflow.add_node(node);
let checker = HealthChecker::new();
let report = checker.check(&workflow);
let errors = report.get_issues_by_severity(HealthSeverity::Error);
assert!(errors.iter().any(|i| i.category == "Configuration"));
}
#[test]
fn test_health_score_calculation() {
let mut workflow = Workflow::new("test".to_string());
let start = Node::new("start".to_string(), NodeKind::Start);
let end = Node::new("end".to_string(), NodeKind::End);
let start_id = start.id;
let end_id = end.id;
workflow.add_node(start);
workflow.add_node(end);
workflow.add_edge(Edge::new(start_id, end_id));
let checker = HealthChecker::new();
let report = checker.check(&workflow);
assert!(report.health_score >= 80); assert!(report.is_healthy);
}
}