codex-memory 3.0.15

use crate::common::TestDatabaseManager;
use anyhow::Result;
use codex_memory::{mcp_server::MCPHandlers, Storage};
use serde_json::json;
use std::sync::Arc;

/// Test LLM service failures and degraded mode operation
#[tokio::test]
async fn test_ollama_service_unavailable() -> Result<()> {
    let mut manager = TestDatabaseManager::new()?;
    let pool = manager.setup_test_database().await?;
    let storage = Arc::new(Storage::new(pool));
    let handlers = MCPHandlers::new(storage.clone());

    // Store content when LLM might be unavailable - should still work
    let params = json!({
        "content": "This content should be stored even if Ollama is down",
        "context": "Test context for Ollama service unavailable test",
        "summary": "Test summary for Ollama service unavailable test",
        "tags": ["ollama-down", "fallback"]
    });

    let result = handlers.handle_tool_call("store_memory", params).await;

    match result {
        Ok(response) => {
            println!("Content stored successfully despite potential LLM unavailability");

            // Verify content was stored with fallback behavior
            if let Some(id) = response["id"].as_str() {
                let retrieved = storage
                    .get(uuid::Uuid::parse_str(id)?)
                    .await?
                    .expect("Should retrieve stored content");

                assert_eq!(
                    retrieved.content,
                    "This content should be stored even if Ollama is down"
                );
                assert_eq!(retrieved.tags, vec!["ollama-down", "fallback"]);

                // Context and summary should be None if LLM failed
                // (Application should gracefully degrade)
                println!(
                    "Context: {:?}, Summary: {:?}",
                    retrieved.context, retrieved.summary
                );
            }
        }
        Err(e) => {
            println!("Storage failed when LLM unavailable: {}", e);
            // This might be expected behavior depending on implementation
            // The important thing is that it fails gracefully, not with panic
        }
    }

    manager.cleanup().await?;
    Ok(())
}

#[tokio::test]
async fn test_llm_timeout_scenarios() -> Result<()> {
    let mut manager = TestDatabaseManager::new()?;
    let pool = manager.setup_test_database().await?;
    let storage = Arc::new(Storage::new(pool));
    let handlers = MCPHandlers::new(storage.clone());

    // Test with content that might cause LLM to timeou
    let timeout_triggering_content = [
        // Extremely repetitive conten
        "repeat ".repeat(10000),

        // Very long single sentence
        "This is an extremely long run-on sentence that goes on and on without any punctuation or breaks and might cause the LLM to struggle with processing or generating appropriate summaries or context ".repeat(500),

        // Mixed languages that might confuse LLM
        "English text 中文内容 русский текст العربية मराठी ภาษาไทย 한국어 日本語 ".repeat(100),

        // Technical content with many special tokens
        "fn main() { let mut vec: Vec<Arc<RwLock<HashMap<String, Box<dyn Trait>>>>> = Vec::new(); }".repeat(1000),
    ];

    for (i, content) in timeout_triggering_content.iter().enumerate() {
        println!("Testing potential timeout content #{}", i);

        let params = json!({
            "content": content,
            "context": format!("Timeout test context {}", i),
            "summary": format!("Timeout test summary {}", i),
            "tags": [format!("timeout-test-{}", i)]
        });

        let start = std::time::Instant::now();
        let result = handlers.handle_tool_call("store_memory", params).await;
        let duration = start.elapsed();

        println!("Processing took: {:?}", duration);

        match result {
            Ok(response) => {
                println!("Content #{} processed successfully", i);

                // Verify reasonable processing time (should have timeout)
                if duration > std::time::Duration::from_secs(60) {
                    println!(
                        "WARNING: Processing took longer than expected: {:?}",
                        duration
                    );
                }

                if let Some(id) = response["id"].as_str() {
                    let retrieved = storage
                        .get(uuid::Uuid::parse_str(id)?)
                        .await?
                        .expect("Should retrieve content");

                    assert_eq!(retrieved.content.len(), content.len());
                }
            }
            Err(e) => {
                println!("Content #{} failed (timeout expected): {}", i, e);

                // Verify it's a timeout/processing error, not a crash
                let error_msg = e.to_string().to_lowercase();
                assert!(
                    error_msg.contains("timeout")
                        || error_msg.contains("processing")
                        || error_msg.contains("connection")
                        || error_msg.contains("unavailable"),
                    "Error should indicate processing/timeout issue: {}",
                    e
                );
            }
        }
    }

    manager.cleanup().await?;
    Ok(())
}

#[tokio::test]
async fn test_llm_malformed_response_handling() -> Result<()> {
    let mut manager = TestDatabaseManager::new()?;
    let pool = manager.setup_test_database().await?;
    let storage = Arc::new(Storage::new(pool));
    let handlers = MCPHandlers::new(storage.clone());

    // Content that might cause LLM to return malformed responses
    let problematic_inputs = [
        // Content with lots of quotes and special chars that might break JSON
        r#"Content with "quotes" and 'apostrophes' and `backticks` and \backslashes"#,
        // Content that looks like code/JSON that might confuse response parsing
        r#"{"this": "looks like json", "but": "it's actually content"}"#,
        // Content with control characters
        "Content with\nnewlines\tand\ttabs\rand\0nulls",
        // Empty-like content that might cause LLM confusion
        "\n\n\n   \t\t\t   \n\n",
        // Content that might trigger safety filters
        "This content discusses security vulnerabilities and potential attack vectors",
    ];

    for (i, content) in problematic_inputs.iter().enumerate() {
        println!(
            "Testing problematic input #{}: {}",
            i,
            content.escape_debug()
        );

        let params = json!({
            "content": content,
            "context": format!("Malformed test context {}", i),
            "summary": format!("Malformed test summary {}", i),
            "tags": [format!("malformed-test-{}", i)]
        });

        let result = handlers.handle_tool_call("store_memory", params).await;

        match result {
            Ok(response) => {
                println!("Problematic input #{} handled successfully", i);

                if let Some(id) = response["id"].as_str() {
                    let retrieved = storage
                        .get(uuid::Uuid::parse_str(id)?)
                        .await?
                        .expect("Should retrieve content");

                    // Verify content was preserved exactly
                    assert_eq!(retrieved.content, *content);

                    // Check that context/summary are reasonable (not corrupted)
                    // Context is now required, not optional
                    assert!(
                        !retrieved.context.is_empty(),
                        "Context should not be empty string"
                    );
                    assert!(
                        retrieved.context.len() < 10000,
                        "Context should not be excessively long"
                    );

                    // Summary is now required, not optional
                    assert!(
                        !retrieved.summary.is_empty(),
                        "Summary should not be empty string"
                    );
                    assert!(
                        retrieved.summary.len() < 5000,
                        "Summary should not be excessively long"
                    );
                }
            }
            Err(e) => {
                println!("Problematic input #{} failed: {}", i, e);

                // Should fail gracefully, not crash
                let error_msg = e.to_string();
                assert!(
                    !error_msg.contains("panic"),
                    "Should not panic on malformed input"
                );
                assert!(
                    !error_msg.contains("unwrap"),
                    "Should handle errors gracefully"
                );
            }
        }
    }

    manager.cleanup().await?;
    Ok(())
}

#[tokio::test]
async fn test_llm_context_length_exceeded() -> Result<()> {
    let mut manager = TestDatabaseManager::new()?;
    let pool = manager.setup_test_database().await?;
    let storage = Arc::new(Storage::new(pool));
    let handlers = MCPHandlers::new(storage.clone());

    // Create content that exceeds typical LLM context windows
    let base_text = "This is a sample paragraph that will be repeated many times to create content that exceeds the context window limits of most language models. It contains various concepts and ideas that might be interesting to summarize. ";

    let context_length_tests = vec![
        (1000, base_text.repeat(1000)), // ~130K chars
        (2000, base_text.repeat(2000)), // ~260K chars
        (4000, base_text.repeat(4000)), // ~520K chars
        (8000, base_text.repeat(8000)), // ~1M chars
    ];

    for (repeat_count, content) in context_length_tests {
        println!(
            "Testing context length with {} repeats ({} chars)",
            repeat_count,
            content.len()
        );

        let params = json!({
            "content": content,
            "context": format!("Context length test context {}", repeat_count),
            "summary": format!("Context length test summary {}", repeat_count),
            "tags": [format!("context-length-{}", repeat_count)]
        });

        let start = std::time::Instant::now();
        let result = handlers.handle_tool_call("store_memory", params).await;
        let duration = start.elapsed();

        println!("Processing took: {:?}", duration);

        match result {
            Ok(response) => {
                println!(
                    "Large content ({} chars) processed successfully",
                    content.len()
                );

                if let Some(id) = response["id"].as_str() {
                    let retrieved = storage
                        .get(uuid::Uuid::parse_str(id)?)
                        .await?
                        .expect("Should retrieve content");

                    // Verify full content preserved
                    assert_eq!(retrieved.content.len(), content.len());

                    // Check if LLM managed to generate reasonable summaries despite size
                    // Summary is now required
                    // Summary should be much shorter than original
                    let compression_ratio = content.len() as f64 / retrieved.summary.len() as f64;
                    println!("Compression ratio: {:.2}:1", compression_ratio);

                    assert!(
                        compression_ratio > 10.0,
                        "Summary should compress content significantly"
                    );
                }
            }
            Err(e) => {
                println!("Large content ({} chars) failed: {}", content.len(), e);

                // Check that failure is due to size/processing limits, not crashes
                let error_msg = e.to_string().to_lowercase();
                assert!(
                    error_msg.contains("too large")
                        || error_msg.contains("context")
                        || error_msg.contains("limit")
                        || error_msg.contains("timeout")
                        || error_msg.contains("memory"),
                    "Error should indicate size/processing limitation: {}",
                    e
                );
            }
        }
    }

    manager.cleanup().await?;
    Ok(())
}

#[tokio::test]
async fn test_concurrent_llm_requests() -> Result<()> {
    let mut manager = TestDatabaseManager::new()?;
    let pool = manager.setup_test_database().await?;
    let storage = Arc::new(Storage::new(pool));
    let handlers = Arc::new(MCPHandlers::new(storage));

    // Launch many concurrent requests that require LLM processing
    let mut handles = vec![];

    for i in 0..20 {
        let handlers_clone = handlers.clone();
        let handle = tokio::spawn(async move {
            let content = format!(
                "Concurrent LLM test content #{} - This is a longer piece of text that should
                 trigger LLM processing for context and summary generation. It contains multiple
                 sentences and concepts that need to be analyzed and processed by the language model
                 to generate appropriate metadata and summaries.",
                i
            );

            let params = json!({
                "content": content,
                "context": format!("Concurrent LLM test context {}", i),
                "summary": format!("Concurrent LLM test summary {}", i),
                "tags": [format!("concurrent-llm-{}", i), "stress-test"]
            });

            handlers_clone
                .handle_tool_call("store_memory", params)
                .await
        });
        handles.push(handle);
    }

    // Wait for all requests with timeout
    let timeout_duration = std::time::Duration::from_secs(120); // Generous timeout for LLM
    let start = std::time::Instant::now();

    let mut successes = 0;
    let mut failures = 0;
    let mut timeouts = 0;

    for handle in handles {
        match tokio::time::timeout(timeout_duration, handle).await {
            Ok(Ok(Ok(_))) => successes += 1,
            Ok(Ok(Err(e))) => {
                println!("LLM request failed: {}", e);
                failures += 1;
            }
            Ok(Err(e)) => {
                println!("Task failed: {}", e);
                failures += 1;
            }
            Err(_) => {
                println!("Request timed out");
                timeouts += 1;
            }
        }
    }

    let total_duration = start.elapsed();
    println!("Concurrent LLM requests completed in {:?}", total_duration);
    println!(
        "Results: {} succeeded, {} failed, {} timed out",
        successes, failures, timeouts
    );

    // At least some requests should succeed (allow for LLM service issues)
    assert!(
        successes > 0,
        "At least some concurrent LLM requests should succeed"
    );

    // Total time shouldn't be excessive (proper concurrent processing)
    assert!(
        total_duration < std::time::Duration::from_secs(300),
        "Concurrent requests should complete within reasonable time"
    );

    manager.cleanup().await?;
    Ok(())
}

#[tokio::test]
async fn test_llm_service_recovery() -> Result<()> {
    let mut manager = TestDatabaseManager::new()?;
    let pool = manager.setup_test_database().await?;
    let storage = Arc::new(Storage::new(pool));
    let handlers = MCPHandlers::new(storage.clone());

    // Test recovery after simulated LLM service issues
    println!("Testing LLM service recovery scenarios");

    // First request - might fail if service is having issues
    let params1 = json!({
        "content": "First request to test service availability",
        "context": "Recovery test context for first request",
        "summary": "Recovery test summary for first request",
        "tags": ["recovery-test", "first"]
    });

    let result1 = handlers.handle_tool_call("store_memory", params1).await;

    // Wait a bit to allow for potential recovery
    tokio::time::sleep(std::time::Duration::from_secs(2)).await;

    // Second request - test if service recovers
    let params2 = json!({
        "content": "Second request after potential recovery",
        "context": "Recovery test context for second request",
        "summary": "Recovery test summary for second request",
        "tags": ["recovery-test", "second"]
    });

    let result2 = handlers.handle_tool_call("store_memory", params2).await;

    // Analyze recovery patterns
    match (result1, result2) {
        (Ok(_), Ok(_)) => {
            println!("Both requests succeeded - service is stable");
        }
        (Err(e1), Ok(_)) => {
            println!("Service recovered: first failed ({}), second succeeded", e1);
            // This demonstrates good recovery behavior
        }
        (Ok(_), Err(e2)) => {
            println!("Service degraded: first succeeded, second failed ({})", e2);
        }
        (Err(e1), Err(e2)) => {
            println!("Service unavailable: both failed ({}, {})", e1, e2);
            // This is acceptable if LLM service is truly down
        }
    }

    // Test that basic storage still works even if LLM processing fails
    let basic_params = json!({
        "content": "Basic storage test without LLM dependency",
        "context": "Basic storage test context",
        "summary": "Basic storage test summary"
    });

    let basic_result = handlers
        .handle_tool_call("store_memory", basic_params)
        .await;

    // This should always work (content storage doesn't depend on LLM)
    match basic_result {
        Ok(response) => {
            println!("Basic storage works independent of LLM service");

            if let Some(id) = response["id"].as_str() {
                let retrieved = storage
                    .get(uuid::Uuid::parse_str(id)?)
                    .await?
                    .expect("Should retrieve basic content");
                assert_eq!(
                    retrieved.content,
                    "Basic storage test without LLM dependency"
                );
            }
        }
        Err(e) => {
            println!("WARNING: Basic storage failed: {}", e);
            // This indicates a fundamental problem, not just LLM issues
        }
    }

    manager.cleanup().await?;
    Ok(())
}

#[tokio::test]
async fn test_llm_response_validation() -> Result<()> {
    let mut manager = TestDatabaseManager::new()?;
    let pool = manager.setup_test_database().await?;
    let storage = Arc::new(Storage::new(pool));
    let handlers = MCPHandlers::new(storage.clone());

    // Test content that might cause LLM to return invalid/unexpected responses
    let validation_tests = vec![
        ("Empty response trigger", ""),
        (
            "JSON breaking quotes",
            r#"Content with "nested "quotes" and 'mixed quotes'"#,
        ),
        ("Unicode stress test", "🎉🚀💾🔥⭐🎯📝🏷️🌈🔧✨🎨🎪🎭🎪🎨"),
        (
            "Code-like content",
            "fn main() { println!(\"Hello, world!\"); }",
        ),
        ("Markdown-like", "# Header\n- List item\n**bold** *italic*"),
    ];

    for (test_name, content) in validation_tests {
        println!("Testing LLM response validation: {}", test_name);

        let params = json!({
            "content": content,
            "context": "Validation test context",
            "summary": "Validation test summary",
            "tags": ["validation-test"]
        });

        let result = handlers.handle_tool_call("store_memory", params).await;

        match result {
            Ok(response) => {
                if let Some(id) = response["id"].as_str() {
                    let retrieved = storage
                        .get(uuid::Uuid::parse_str(id)?)
                        .await?
                        .expect("Should retrieve content");

                    // Validate that LLM-generated fields are reasonable
                    // Context is now required
                    // Context should be valid UTF-8 and reasonable length
                    assert!(
                        retrieved.context.is_ascii() || content.chars().all(|c| !c.is_control()),
                        "Context should not contain control characters"
                    );
                    assert!(
                        retrieved.context.len() < 50000,
                        "Context should not be excessively long"
                    );

                    // Summary is now required
                    // Summary should be shorter than original (unless original is very short)
                    if content.len() > 100 {
                        assert!(
                            retrieved.summary.len() < content.len(),
                            "Summary should be shorter than original content"
                        );
                    }
                    assert!(
                        retrieved.summary.len() < 10000,
                        "Summary should not be excessively long"
                    );

                    println!("  ✅ {} passed validation", test_name);
                }
            }
            Err(e) => {
                println!("  ⚠️  {} failed: {}", test_name, e);
                // Failure is acceptable, but should be graceful
            }
        }
    }

    manager.cleanup().await?;
    Ok(())
}