mod common;
use common::{ContextBuilder, setup_test_memory};
use do_memory_core::{
ComplexityLevel, ExecutionResult, ExecutionStep, SelfLearningMemory, TaskContext, TaskOutcome,
TaskType,
};
use uuid::Uuid;
async fn create_episode_with_decision_points(
memory: &SelfLearningMemory,
domain: &str,
num_decisions: usize,
) -> Uuid {
let context = ContextBuilder::new(domain)
.language("rust")
.framework("tokio")
.complexity(ComplexityLevel::Moderate)
.tag("decisions")
.build();
let episode_id = memory
.start_episode(
format!("Task with {num_decisions} decision points"),
context,
TaskType::CodeGeneration,
)
.await;
for i in 0..num_decisions {
let mut decision_step = ExecutionStep::new(
(i * 2) + 1,
"validator".to_string(),
"Check if input is valid".to_string(), );
decision_step.result = Some(ExecutionResult::Success {
output: "Input is valid".to_string(),
});
memory.log_step(episode_id, decision_step).await;
let mut action_step = ExecutionStep::new(
(i * 2) + 2,
"processor".to_string(),
"Process the data".to_string(), );
action_step.result = Some(ExecutionResult::Success {
output: "Processed".to_string(),
});
memory.log_step(episode_id, action_step).await;
}
episode_id
}
#[tokio::test]
#[ignore = "slow integration test - run with --ignored or in release CI"]
async fn test_heuristic_extraction_from_episode() {
let memory = setup_test_memory();
let context = ContextBuilder::new("data-validation")
.language("rust")
.complexity(ComplexityLevel::Moderate)
.build();
let episode_id = memory
.start_episode(
"Validate and process data".to_string(),
context.clone(),
TaskType::CodeGeneration,
)
.await;
for i in 0..3 {
let mut decision_step = ExecutionStep::new(
(i * 2) + 1,
"validator".to_string(),
"Check data integrity".to_string(), );
decision_step.result = Some(ExecutionResult::Success {
output: "Data valid".to_string(),
});
memory.log_step(episode_id, decision_step).await;
let mut action_step = ExecutionStep::new(
(i * 2) + 2,
"sanitizer".to_string(),
"Sanitize input".to_string(), );
action_step.result = Some(ExecutionResult::Success {
output: "Sanitized".to_string(),
});
memory.log_step(episode_id, action_step).await;
}
memory
.complete_episode(
episode_id,
TaskOutcome::Success {
verdict: "Data validated and processed".to_string(),
artifacts: vec![],
},
)
.await
.unwrap();
let episode = memory.get_episode(episode_id).await.unwrap();
assert!(
!episode.heuristics.is_empty(),
"Episode should have extracted heuristics. Got {} heuristics",
episode.heuristics.len()
);
let heuristics = memory.retrieve_relevant_heuristics(&context, 10).await;
assert!(
!heuristics.is_empty(),
"Should retrieve extracted heuristics"
);
for heuristic in &heuristics {
assert!(
!heuristic.condition.is_empty(),
"Heuristic condition should not be empty"
);
assert!(
!heuristic.action.is_empty(),
"Heuristic action should not be empty"
);
assert!(
heuristic.confidence >= 0.0,
"Confidence should be non-negative, got {}",
heuristic.confidence
);
assert!(
heuristic.evidence.sample_size >= 2,
"Sample size should meet minimum threshold of 2, got {}",
heuristic.evidence.sample_size
);
assert!(
heuristic.evidence.success_rate > 0.0,
"Success rate should be positive for successful episode, got {}",
heuristic.evidence.success_rate
);
assert!(
!heuristic.evidence.episode_ids.is_empty(),
"Evidence should contain episode IDs"
);
}
}
#[tokio::test]
#[ignore = "slow integration test - run with --ignored or in release CI"]
async fn test_heuristic_storage_in_learning_cycle() {
let memory = setup_test_memory();
let episode_id = create_episode_with_decision_points(&memory, "api-validation", 3).await;
memory
.complete_episode(
episode_id,
TaskOutcome::Success {
verdict: "API validation successful".to_string(),
artifacts: vec![],
},
)
.await
.unwrap();
let episode = memory.get_episode(episode_id).await.unwrap();
assert!(
!episode.heuristics.is_empty(),
"Episode should have heuristics stored"
);
for heuristic_id in &episode.heuristics {
let context = TaskContext {
domain: "api-validation".to_string(),
language: Some("rust".to_string()),
framework: Some("tokio".to_string()),
complexity: ComplexityLevel::Moderate,
tags: vec!["decisions".to_string()],
};
let heuristics = memory.retrieve_relevant_heuristics(&context, 10).await;
let found = heuristics.iter().any(|h| h.heuristic_id == *heuristic_id);
assert!(
found,
"Should be able to retrieve stored heuristic {heuristic_id}"
);
}
}
#[tokio::test]
#[ignore = "slow integration test - run with --ignored or in release CI"]
async fn test_heuristic_retrieval_by_context() {
let memory = setup_test_memory();
let domains = vec!["web-api", "database", "web-api"];
for domain in &domains {
let episode_id = create_episode_with_decision_points(&memory, domain, 3).await;
memory
.complete_episode(
episode_id,
TaskOutcome::Success {
verdict: format!("Completed in {domain}"),
artifacts: vec![],
},
)
.await
.unwrap();
}
let web_api_context = ContextBuilder::new("web-api")
.language("rust")
.framework("tokio")
.tag("decisions")
.build();
let web_api_heuristics = memory
.retrieve_relevant_heuristics(&web_api_context, 10)
.await;
assert!(
!web_api_heuristics.is_empty(),
"Should retrieve heuristics for web-api context"
);
let database_context = ContextBuilder::new("database")
.language("rust")
.framework("tokio")
.tag("decisions")
.build();
let database_heuristics = memory
.retrieve_relevant_heuristics(&database_context, 10)
.await;
assert!(
!database_heuristics.is_empty(),
"Should retrieve heuristics for database context"
);
for i in 1..web_api_heuristics.len() {
let prev = &web_api_heuristics[i - 1];
let curr = &web_api_heuristics[i];
assert!(prev.confidence >= 0.0);
assert!(curr.confidence >= 0.0);
}
}
#[tokio::test]
#[allow(clippy::float_cmp)]
#[ignore = "slow integration test - run with --ignored or in release CI"]
async fn test_heuristic_confidence_updates() {
let memory = setup_test_memory();
let episode_id = create_episode_with_decision_points(&memory, "error-handling", 3).await;
memory
.complete_episode(
episode_id,
TaskOutcome::Success {
verdict: "Error handling implemented".to_string(),
artifacts: vec![],
},
)
.await
.unwrap();
let context = ContextBuilder::new("error-handling")
.language("rust")
.framework("tokio")
.tag("decisions")
.build();
let initial_heuristics = memory.retrieve_relevant_heuristics(&context, 1).await;
assert!(
!initial_heuristics.is_empty(),
"Should have extracted initial heuristic"
);
let heuristic_id = initial_heuristics[0].heuristic_id;
let initial_confidence = initial_heuristics[0].confidence;
let initial_sample_size = initial_heuristics[0].evidence.sample_size;
let initial_success_rate = initial_heuristics[0].evidence.success_rate;
let new_episode_id = Uuid::new_v4();
memory
.update_heuristic_confidence(
heuristic_id,
new_episode_id,
TaskOutcome::Success {
verdict: "Applied heuristic successfully".to_string(),
artifacts: vec![],
},
)
.await
.unwrap();
let updated_heuristics = memory.retrieve_relevant_heuristics(&context, 1).await;
let updated = updated_heuristics
.iter()
.find(|h| h.heuristic_id == heuristic_id)
.expect("Should find updated heuristic");
assert_eq!(
updated.evidence.sample_size,
initial_sample_size + 1,
"Sample size should increase by 1"
);
assert!(
updated.evidence.success_rate >= initial_success_rate,
"Success rate should increase or stay same after successful outcome"
);
#[allow(clippy::cast_precision_loss)]
let expected_confidence =
updated.evidence.success_rate * (updated.evidence.sample_size as f32).sqrt();
assert!(
(updated.confidence - expected_confidence).abs() < 0.01,
"Confidence should be recalculated correctly. Expected ~{}, got {}",
expected_confidence,
updated.confidence
);
let failure_episode_id = Uuid::new_v4();
memory
.update_heuristic_confidence(
heuristic_id,
failure_episode_id,
TaskOutcome::Failure {
reason: "Heuristic didn't work".to_string(),
error_details: None,
},
)
.await
.unwrap();
let final_heuristics = memory.retrieve_relevant_heuristics(&context, 1).await;
let final_heuristic = final_heuristics
.iter()
.find(|h| h.heuristic_id == heuristic_id)
.expect("Should find heuristic after failure update");
assert_eq!(
final_heuristic.evidence.sample_size,
initial_sample_size + 2,
"Sample size should increase by 2 after two updates"
);
assert!(
final_heuristic.evidence.success_rate < updated.evidence.success_rate,
"Success rate should decrease after failure"
);
assert_ne!(
final_heuristic.confidence, initial_confidence,
"Confidence should have changed from initial value"
);
}
#[tokio::test]
#[ignore = "slow integration test - run with --ignored or in release CI"]
async fn test_heuristic_filtering_by_confidence() {
let memory = setup_test_memory();
let high_success_id = create_episode_with_decision_points(&memory, "high-confidence", 4).await;
memory
.complete_episode(
high_success_id,
TaskOutcome::Success {
verdict: "Highly successful".to_string(),
artifacts: vec![],
},
)
.await
.unwrap();
let context = ContextBuilder::new("low-confidence")
.language("rust")
.complexity(ComplexityLevel::Simple)
.build();
let partial_id = memory
.start_episode(
"Partial success task".to_string(),
context.clone(),
TaskType::CodeGeneration,
)
.await;
let mut decision_step =
ExecutionStep::new(1, "validator".to_string(), "Check if valid".to_string());
decision_step.result = Some(ExecutionResult::Success {
output: "Valid".to_string(),
});
memory.log_step(partial_id, decision_step).await;
memory
.complete_episode(
partial_id,
TaskOutcome::PartialSuccess {
verdict: "Partially successful".to_string(),
completed: vec!["part1".to_string()],
failed: vec!["part2".to_string()],
},
)
.await
.unwrap();
let high_context = ContextBuilder::new("high-confidence")
.language("rust")
.framework("tokio")
.tag("decisions")
.build();
let high_heuristics = memory.retrieve_relevant_heuristics(&high_context, 10).await;
assert!(
!high_heuristics.is_empty(),
"Should extract heuristics from high-confidence episode"
);
for heuristic in &high_heuristics {
assert!(
heuristic.confidence >= 0.7,
"Extracted heuristic should meet minimum confidence threshold of 0.7, got {}",
heuristic.confidence
);
}
let low_episode = memory.get_episode(partial_id).await.unwrap();
assert!(
low_episode.heuristics.is_empty(),
"Low confidence episode should not have extracted heuristics"
);
}
#[tokio::test]
#[ignore = "slow integration test - run with --ignored or in release CI"]
async fn test_end_to_end_heuristic_learning() {
let memory = setup_test_memory();
for i in 0..3 {
let episode_id = create_episode_with_decision_points(&memory, "authentication", 3).await;
memory
.complete_episode(
episode_id,
TaskOutcome::Success {
verdict: format!("Authentication flow {i} completed"),
artifacts: vec![],
},
)
.await
.unwrap();
}
let auth_context = ContextBuilder::new("authentication")
.language("rust")
.framework("tokio")
.tag("decisions")
.build();
let learned_heuristics = memory.retrieve_relevant_heuristics(&auth_context, 10).await;
assert!(
!learned_heuristics.is_empty(),
"Should have learned heuristics from multiple episodes"
);
for heuristic in &learned_heuristics {
assert!(
heuristic.evidence.sample_size >= 2,
"Heuristics should be based on multiple samples, got {}",
heuristic.evidence.sample_size
);
assert!(
heuristic.evidence.success_rate >= 0.9,
"Success rate should be high for successful episodes, got {}",
heuristic.evidence.success_rate
);
}
let new_task_context = ContextBuilder::new("authentication")
.language("rust")
.framework("tokio")
.tag("security")
.build();
let relevant = memory
.retrieve_relevant_heuristics(&new_task_context, 5)
.await;
assert!(
!relevant.is_empty(),
"Should retrieve relevant learned heuristics for similar task"
);
for heuristic in &relevant {
assert!(heuristic.condition.len() > 10);
assert!(heuristic.action.len() > 10);
assert!(heuristic.confidence > 0.0);
}
let first_heuristic = &relevant[0];
assert!(
first_heuristic.condition.contains("authentication")
|| first_heuristic.condition.contains("rust")
|| first_heuristic.condition.contains("Check")
|| first_heuristic.condition.contains("Validate"),
"Heuristic condition should be contextually relevant: {}",
first_heuristic.condition
);
}
#[tokio::test]
async fn test_no_heuristic_extraction_from_incomplete_episode() {
let memory = setup_test_memory();
let context = ContextBuilder::new("incomplete-test")
.language("rust")
.build();
let episode_id = memory
.start_episode(
"Incomplete task".to_string(),
context.clone(),
TaskType::CodeGeneration,
)
.await;
for i in 0..3 {
let mut step =
ExecutionStep::new(i + 1, "validator".to_string(), "Check validity".to_string());
step.result = Some(ExecutionResult::Success {
output: "Valid".to_string(),
});
memory.log_step(episode_id, step).await;
}
let episode = memory.get_episode(episode_id).await.unwrap();
assert!(
episode.heuristics.is_empty(),
"Incomplete episode should have no heuristics"
);
assert!(!episode.is_complete());
}
#[tokio::test]
#[ignore = "slow integration test - run with --ignored or in release CI"]
async fn test_no_heuristic_extraction_from_failed_episode() {
let memory = setup_test_memory();
let episode_id = create_episode_with_decision_points(&memory, "failed-task", 3).await;
memory
.complete_episode(
episode_id,
TaskOutcome::Failure {
reason: "Task failed completely".to_string(),
error_details: Some("Critical error".to_string()),
},
)
.await
.unwrap();
let episode = memory.get_episode(episode_id).await.unwrap();
assert!(
episode.heuristics.is_empty(),
"Failed episode should not have extracted heuristics"
);
let context = ContextBuilder::new("failed-task")
.language("rust")
.framework("tokio")
.build();
let heuristics = memory.retrieve_relevant_heuristics(&context, 10).await;
assert!(
heuristics.is_empty(),
"Should not retrieve heuristics from failed episodes"
);
}
#[tokio::test]
#[ignore = "slow integration test - run with --ignored or in release CI"]
async fn test_heuristic_edge_cases() {
let memory = setup_test_memory();
let context = ContextBuilder::new("no-decisions").language("rust").build();
let no_decision_id = memory
.start_episode(
"Task without decisions".to_string(),
context.clone(),
TaskType::CodeGeneration,
)
.await;
let mut step = ExecutionStep::new(1, "reader".to_string(), "Read file".to_string());
step.result = Some(ExecutionResult::Success {
output: "Read".to_string(),
});
memory.log_step(no_decision_id, step).await;
memory
.complete_episode(
no_decision_id,
TaskOutcome::Success {
verdict: "Done".to_string(),
artifacts: vec![],
},
)
.await
.unwrap();
let episode = memory.get_episode(no_decision_id).await.unwrap();
assert!(
episode.heuristics.is_empty(),
"Episode without decision points should have no heuristics"
);
let all_failed_id = memory
.start_episode(
"All failures".to_string(),
context.clone(),
TaskType::CodeGeneration,
)
.await;
for i in 0..3 {
let mut step =
ExecutionStep::new(i + 1, "validator".to_string(), "Check if valid".to_string());
step.result = Some(ExecutionResult::Error {
message: "Validation failed".to_string(),
});
memory.log_step(all_failed_id, step).await;
}
memory
.complete_episode(
all_failed_id,
TaskOutcome::Failure {
reason: "All validations failed".to_string(),
error_details: None,
},
)
.await
.unwrap();
let failed_episode = memory.get_episode(all_failed_id).await.unwrap();
assert!(
failed_episode.heuristics.is_empty(),
"Episode with all failed steps should have no heuristics"
);
}