do-memory-core 0.1.30

//! Integration tests for `PREMem` (Pre-Storage Reasoning) Phase 1
//!
//! Tests quality assessment and salient feature extraction integration
//! into the `SelfLearningMemory` workflow.

use do_memory_core::ExecutionStep;
use do_memory_core::memory::SelfLearningMemory;
use do_memory_core::types::{
    ComplexityLevel, ExecutionResult, MemoryConfig, TaskContext, TaskOutcome, TaskType,
};

/// Helper to create a high-quality episode (complex, diverse, reflective)
fn create_high_quality_episode_data() -> (String, TaskContext, TaskType, Vec<ExecutionStep>) {
    let context = TaskContext {
        language: Some("rust".to_string()),
        framework: Some("tokio".to_string()),
        complexity: ComplexityLevel::Complex,
        domain: "async-web-api".to_string(),
        tags: vec!["http".to_string(), "rest".to_string()],
    };

    let task_description =
        "Implement comprehensive async REST API with authentication and validation".to_string();
    let task_type = TaskType::CodeGeneration;

    // Create diverse, complex steps
    let mut steps = Vec::new();

    // Step 1: Planning with decision
    let mut execution_step1 = ExecutionStep::new(
        1,
        "planner".to_string(),
        "Choose async implementation strategy".to_string(),
    );
    execution_step1.parameters = serde_json::json!({
        "strategy": "tokio-async",
        "approach": "layered-architecture"
    });
    execution_step1.result = Some(ExecutionResult::Success {
        output: "Strategy selected: async with layered architecture".to_string(),
    });
    execution_step1.latency_ms = 120;
    steps.push(execution_step1);

    // Step 2-4: Tool sequence
    for i in 2..=4 {
        let mut step = ExecutionStep::new(i, format!("builder_{i}"), format!("Build layer {i}"));
        step.result = Some(ExecutionResult::Success {
            output: format!("Layer {i} complete"),
        });
        step.latency_ms = 100 + (i as u64 * 10);
        steps.push(step);
    }

    // Step 5: Error
    let mut error_step = ExecutionStep::new(
        5,
        "validator".to_string(),
        "Validate API endpoints".to_string(),
    );
    error_step.result = Some(ExecutionResult::Error {
        message: "Validation failed: missing authentication header".to_string(),
    });
    error_step.latency_ms = 80;
    steps.push(error_step);

    // Step 6: Recovery
    let mut recovery_step = ExecutionStep::new(
        6,
        "validator".to_string(),
        "Add authentication header validation".to_string(),
    );
    recovery_step.result = Some(ExecutionResult::Success {
        output: "Authentication validation added".to_string(),
    });
    recovery_step.latency_ms = 150;
    steps.push(recovery_step);

    // Steps 7-10: More diverse work
    for i in 7..=10 {
        let mut step = ExecutionStep::new(
            i,
            format!("integrator_{}", i % 3),
            format!("Integration step {i}"),
        );
        step.result = Some(ExecutionResult::Success {
            output: format!("Integration {i} complete"),
        });
        step.latency_ms = 90 + (i as u64 * 5);
        steps.push(step);
    }

    (task_description, context, task_type, steps)
}

/// Helper to create a low-quality episode (simple, few steps, no diversity)
fn create_low_quality_episode_data() -> (String, TaskContext, TaskType, Vec<ExecutionStep>) {
    let context = TaskContext {
        language: Some("rust".to_string()),
        framework: None,
        complexity: ComplexityLevel::Simple,
        domain: "testing".to_string(),
        tags: vec![],
    };

    let task_description = "Test".to_string(); // Very short description
    let task_type = TaskType::Testing;

    // Only 2 simple steps with same tool
    let mut steps = Vec::new();
    for i in 1..=2 {
        let mut step =
            ExecutionStep::new(i, "simple_tool".to_string(), "Simple action".to_string());
        step.result = Some(ExecutionResult::Success {
            output: "OK".to_string(),
        });
        step.latency_ms = 10;
        steps.push(step);
    }

    (task_description, context, task_type, steps)
}

#[tokio::test]
async fn test_high_quality_episode_accepted() {
    // Create memory system with threshold set to 0.5 for testing
    // (The episode we create scores ~0.59, which is acceptable quality)
    let config = MemoryConfig {
        quality_threshold: 0.5,
        ..Default::default()
    };
    let memory = SelfLearningMemory::with_config(config);

    let (task_description, context, task_type, steps) = create_high_quality_episode_data();

    // Start episode
    let episode_id = memory
        .start_episode(task_description, context, task_type)
        .await;

    // Log all steps
    for step in steps {
        memory.log_step(episode_id, step).await;
    }

    // Flush steps before completing
    memory.flush_steps(episode_id).await.unwrap();

    // Complete with success
    let result = memory
        .complete_episode(
            episode_id,
            TaskOutcome::Success {
                verdict: "API implemented successfully with comprehensive authentication, validation, and error recovery patterns"
                    .to_string(),
                artifacts: vec![
                    "api.rs".to_string(),
                    "auth.rs".to_string(),
                    "validation.rs".to_string(),
                    "tests.rs".to_string(),
                ],
            },
        )
        .await;

    // Should succeed - high quality episode
    assert!(
        result.is_ok(),
        "High-quality episode should be accepted, got error: {:?}",
        result.err()
    );

    // Verify episode was stored with salient features
    let episode = memory.get_episode(episode_id).await.unwrap();
    assert!(episode.is_complete());
    assert!(episode.reward.is_some());
    assert!(episode.reflection.is_some());
    assert!(
        episode.salient_features.is_some(),
        "Salient features should be extracted for high-quality episode"
    );

    // Verify salient features contain meaningful data
    let features = episode.salient_features.as_ref().unwrap();
    assert!(!features.is_empty(), "Salient features should not be empty");
    assert!(features.count() > 0, "Should have extracted some features");

    // Should have critical decisions (from strategy choice)
    assert!(
        !features.critical_decisions.is_empty(),
        "Should have extracted critical decisions"
    );

    // Should have tool combinations (from multi-step sequence)
    assert!(
        !features.tool_combinations.is_empty(),
        "Should have extracted tool combinations"
    );

    // Should have error recovery patterns
    assert!(
        !features.error_recovery_patterns.is_empty(),
        "Should have extracted error recovery patterns"
    );

    // Should have key insights
    assert!(
        !features.key_insights.is_empty(),
        "Should have extracted key insights"
    );
}

#[tokio::test]
async fn test_low_quality_episode_rejected() {
    // Create memory system with default quality threshold (0.7)
    let memory = SelfLearningMemory::new();

    let (task_description, context, task_type, steps) = create_low_quality_episode_data();

    // Start episode
    let episode_id = memory
        .start_episode(task_description, context, task_type)
        .await;

    // Log steps
    for step in steps {
        memory.log_step(episode_id, step).await;
    }

    // Complete with minimal success
    let result = memory
        .complete_episode(
            episode_id,
            TaskOutcome::Success {
                verdict: "Done".to_string(),
                artifacts: vec![],
            },
        )
        .await;

    // Should fail - low quality episode
    assert!(result.is_err(), "Low-quality episode should be rejected");

    // Verify error is ValidationFailed
    match result.unwrap_err() {
        do_memory_core::Error::ValidationFailed(msg) => {
            assert!(msg.contains("quality score"));
            assert!(msg.contains("below threshold"));
        }
        other => panic!("Expected ValidationFailed error, got: {other:?}"),
    }
}

#[tokio::test]
async fn test_custom_quality_threshold() {
    // Create memory system with lower threshold (0.4)
    let config = MemoryConfig {
        quality_threshold: 0.4,
        ..Default::default()
    };
    let memory = SelfLearningMemory::with_config(config);

    let (task_description, context, task_type, steps) = create_low_quality_episode_data();

    // Start episode
    let episode_id = memory
        .start_episode(task_description, context, task_type)
        .await;

    // Log steps
    for step in steps {
        memory.log_step(episode_id, step).await;
    }

    // Complete
    let result = memory
        .complete_episode(
            episode_id,
            TaskOutcome::Success {
                verdict: "Done".to_string(),
                artifacts: vec![],
            },
        )
        .await;

    // With lower threshold, this episode might be accepted
    // (depends on exact quality score calculation)
    match result {
        Ok(()) => {
            let episode = memory.get_episode(episode_id).await.unwrap();
            assert!(episode.salient_features.is_some());
        }
        Err(err) => {
            // If still rejected, that's also valid - quality might still be below 0.4
            match err {
                do_memory_core::Error::ValidationFailed(_) => {
                    // Expected if quality is below 0.4
                }
                other => panic!("Unexpected error: {other:?}"),
            }
        }
    }
}

#[tokio::test]
async fn test_salient_features_storage_in_cache() {
    // This test would require actual storage backends (Turso + redb)
    // For now, we test that the in-memory storage includes salient features

    let config = MemoryConfig {
        quality_threshold: 0.5, // Lower threshold for testing
        ..Default::default()
    };
    let memory = SelfLearningMemory::with_config(config);

    let (task_description, context, task_type, steps) = create_high_quality_episode_data();

    let episode_id = memory
        .start_episode(task_description, context, task_type)
        .await;

    for step in steps {
        memory.log_step(episode_id, step).await;
    }

    memory
        .complete_episode(
            episode_id,
            TaskOutcome::Success {
                verdict: "Success".to_string(),
                artifacts: vec!["artifact.rs".to_string()],
            },
        )
        .await
        .unwrap();

    // Retrieve episode
    let episode = memory.get_episode(episode_id).await.unwrap();

    // Verify salient features are present and correctly deserialized
    assert!(episode.salient_features.is_some());
    let features = episode.salient_features.unwrap();

    // Verify individual feature types are extractable
    assert!(!features.critical_decisions.is_empty());
    for decision in &features.critical_decisions {
        assert!(!decision.is_empty(), "Decision should have content");
    }

    assert!(!features.tool_combinations.is_empty());
    for combo in &features.tool_combinations {
        assert!(
            combo.len() >= 2,
            "Tool combination should have at least 2 tools"
        );
    }

    assert!(!features.error_recovery_patterns.is_empty());
    for pattern in &features.error_recovery_patterns {
        assert!(
            pattern.contains("->"),
            "Recovery pattern should show error->recovery"
        );
    }
}

#[tokio::test]
async fn test_performance_overhead() {
    use std::time::Instant;

    // Measure overhead of quality assessment and salient extraction
    let config = MemoryConfig {
        quality_threshold: 0.5, // Lower threshold for testing
        ..Default::default()
    };
    let memory = SelfLearningMemory::with_config(config);

    let (task_description, context, task_type, steps) = create_high_quality_episode_data();

    let episode_id = memory
        .start_episode(task_description, context, task_type)
        .await;

    for step in steps {
        memory.log_step(episode_id, step).await;
    }

    let start = Instant::now();
    let result = memory
        .complete_episode(
            episode_id,
            TaskOutcome::Success {
                verdict: "Success".to_string(),
                artifacts: vec!["artifact.rs".to_string()],
            },
        )
        .await;
    let elapsed = start.elapsed();

    assert!(result.is_ok());

    // Verify overhead is reasonable (< 100ms for in-memory operations)
    assert!(
        elapsed.as_millis() < 100,
        "Complete episode with PREMem should take < 100ms, took: {}ms",
        elapsed.as_millis()
    );

    println!(
        "Complete episode with PREMem overhead: {}ms",
        elapsed.as_millis()
    );
}

#[tokio::test]
async fn test_rejection_logging() {
    // This test verifies that rejections are logged appropriately
    // In a real environment, you'd capture logs, but for now we just verify the error

    let memory = SelfLearningMemory::new();

    let (task_description, context, task_type, steps) = create_low_quality_episode_data();

    let episode_id = memory
        .start_episode(task_description, context, task_type)
        .await;

    for step in steps {
        memory.log_step(episode_id, step).await;
    }

    let result = memory
        .complete_episode(
            episode_id,
            TaskOutcome::Success {
                verdict: "Done".to_string(),
                artifacts: vec![],
            },
        )
        .await;

    // Verify rejection with clear error message
    assert!(result.is_err());
    let err = result.unwrap_err();
    let err_msg = format!("{err}");

    // Error message should include quality score and threshold
    assert!(err_msg.contains("quality score"));
    assert!(err_msg.contains("threshold"));

    // Should be able to parse the quality score from the message
    // Format: "Episode quality score (0.XX) below threshold (0.70)"
    assert!(err_msg.contains('(') && err_msg.contains(')'));
}