codex-memory 3.0.15

A simple memory storage service with MCP interface for Claude Desktop
Documentation
use crate::common::test_db_manager::TestDatabaseManager;
use codex_memory::chunking::{ChunkingStrategy, FileChunker};
use codex_memory::error::Result;
use codex_memory::mcp_server::handlers::MCPHandlers;
use codex_memory::storage::Storage;
use serde_json::json;
use std::sync::Arc;
use tokio::fs;

/// Test the store_file MCP tool with different chunking strategies
#[tokio::test]
async fn test_store_file_basic_functionality() -> Result<()> {
    let mut db_manager = TestDatabaseManager::new()?;
    let pool = db_manager.setup_test_database().await?;
    let storage = Arc::new(Storage::new(pool));
    let handlers = MCPHandlers::new(storage.clone());

    // Create a test file with unique content
    let temp_dir = std::env::temp_dir();
    let unique_id = uuid::Uuid::new_v4();
    let test_file_path = temp_dir.join(format!("test_store_file_{}.txt", unique_id));
    
    // Create content that's long enough to be split into multiple chunks
    let mut test_content = format!("Test file content for basic functionality test. Unique ID: {}\n", unique_id);
    test_content.push_str(&"This is sample content that should be split into multiple chunks when processed. ".repeat(50));
    test_content.push_str(&format!("\nEnd of test content. ID: {}", unique_id));
    
    fs::write(&test_file_path, &test_content).await.unwrap();

    let params = json!({
        "file_path": test_file_path.to_string_lossy(),
        "chunk_size": 1500,
        "overlap": 200,
        "chunking_strategy": "hybrid",
        "tags": ["test", "document"]
    });

    let result = handlers.handle_tool_call("store_file", params).await?;
    
    // Verify response structure
    assert!(result["file_path"].is_string());
    assert!(result["file_size"].is_number());
    assert!(result["chunks_created"].is_number());
    assert!(result["chunk_ids"].is_array());
    assert!(result["chunking_strategy"].is_string());
    assert!(result["message"].is_string());

    let chunks_created = result["chunks_created"].as_u64().unwrap();
    assert!(chunks_created > 1, "Should create multiple chunks");

    let chunk_ids = result["chunk_ids"].as_array().unwrap();
    assert_eq!(chunk_ids.len() as u64, chunks_created);

    // Verify chunks were stored correctly
    for chunk_id_value in chunk_ids {
        let chunk_id = uuid::Uuid::parse_str(chunk_id_value.as_str().unwrap())
            .map_err(|e| codex_memory::error::Error::InternalError(format!("UUID parse error: {}", e)))?;
        let memory = storage.get(chunk_id).await?;
        assert!(memory.is_some(), "Chunk should be stored in database");
        
        let memory = memory.unwrap();
        assert!(memory.content.len() > 0, "Chunk content should not be empty");
        assert!(memory.tags.contains(&"test".to_string()), "Should contain test tag");
        assert!(memory.tags.contains(&"document".to_string()), "Should contain document tag");
        assert!(memory.tags.iter().any(|t| t.starts_with("chunk_")), "Should have chunk number tag");
        assert!(memory.tags.iter().any(|t| t.starts_with("strategy_")), "Should have strategy tag");
    }

    // Clean up
    fs::remove_file(&test_file_path).await.ok();
    db_manager.cleanup().await?;
    Ok(())
}

/// Test different chunking strategies produce different results
#[tokio::test]
async fn test_chunking_strategies_comparison() -> Result<()> {
    let mut db_manager = TestDatabaseManager::new()?;
    let pool = db_manager.setup_test_database().await?;
    let storage = Arc::new(Storage::new(pool));
    let handlers = MCPHandlers::new(storage);

    // Create test content with clear semantic boundaries
    let temp_dir = std::env::temp_dir();
    let test_file_path = temp_dir.join("test_chunking_strategies.md");
    let test_content = "# Introduction\n\nThis is the introduction paragraph. It explains the purpose of the document.\n\n## First Section\n\nThis section contains detailed information. It has multiple sentences that provide context and examples.\n\n### Subsection\n\nA subsection with code:\n\n```python\ndef hello_world():\n    print(\"Hello, world!\")\n    return True\n```\n\n## Second Section\n\nThis is another major section. It provides additional information and concludes the document.\n\nThe end.";
    
    fs::write(&test_file_path, test_content).await.unwrap();

    let strategies = ["sentence", "paragraph", "semantic", "hybrid"];
    let mut results = Vec::new();

    for strategy in strategies {
        // Create unique content for each strategy to avoid deduplication
        let unique_content = format!("{}\n\n--- {} strategy test ---", test_content, strategy);
        let strategy_file_path = temp_dir.join(format!("test_chunking_strategy_{}.md", strategy));
        fs::write(&strategy_file_path, &unique_content).await.unwrap();

        let params = json!({
            "file_path": strategy_file_path.to_string_lossy(),
            "chunk_size": 2000,
            "overlap": 300,
            "chunking_strategy": strategy,
            "tags": [format!("test_{}", strategy)]
        });

        let result = handlers.handle_tool_call("store_file", params).await?;
        let chunks_created = result["chunks_created"].as_u64().unwrap();
        
        results.push((strategy, chunks_created));
        
        // Verify strategy is recorded correctly (case-insensitive check)
        let recorded_strategy = result["chunking_strategy"].as_str().unwrap().to_lowercase();
        assert_eq!(recorded_strategy, strategy.to_lowercase());
        
        // Clean up strategy file
        fs::remove_file(&strategy_file_path).await.ok();
    }

    // Different strategies should produce different chunk counts
    // (This is a heuristic - exact counts depend on implementation)
    let unique_counts: std::collections::HashSet<_> = results.iter().map(|(_, count)| count).collect();
    assert!(unique_counts.len() >= 2, "Different strategies should produce different chunk counts: {:?}", results);

    // Clean up
    fs::remove_file(&test_file_path).await.ok();
    db_manager.cleanup().await?;
    Ok(())
}

/// Test error handling for invalid file paths and parameters
#[tokio::test]
async fn test_store_file_error_handling() -> Result<()> {
    let mut db_manager = TestDatabaseManager::new()?;
    let pool = db_manager.setup_test_database().await?;
    let storage = Arc::new(Storage::new(pool));
    let handlers = MCPHandlers::new(storage);

    // Test non-existent file
    let params = json!({
        "file_path": "/path/that/does/not/exist.txt"
    });

    let result = handlers.handle_tool_call("store_file", params).await;
    assert!(result.is_err(), "Should fail for non-existent file");

    // Test invalid chunk size (too small)
    let temp_dir = std::env::temp_dir();
    let test_file_path = temp_dir.join("test_error_handling.txt");
    fs::write(&test_file_path, "test content").await.unwrap();

    let params = json!({
        "file_path": test_file_path.to_string_lossy(),
        "chunk_size": 100  // Too small (< 1024)
    });

    let result = handlers.handle_tool_call("store_file", params).await;
    assert!(result.is_err(), "Should fail for chunk size too small");

    // Test invalid chunk size (too large)
    let params = json!({
        "file_path": test_file_path.to_string_lossy(),
        "chunk_size": 200000  // Too large (> 102400)
    });

    let result = handlers.handle_tool_call("store_file", params).await;
    assert!(result.is_err(), "Should fail for chunk size too large");

    // Test invalid overlap (too large relative to chunk size)
    let params = json!({
        "file_path": test_file_path.to_string_lossy(),
        "chunk_size": 2000,
        "overlap": 1500  // >= chunk_size/2
    });

    let result = handlers.handle_tool_call("store_file", params).await;
    assert!(result.is_err(), "Should fail for overlap too large");

    // Test missing file_path parameter
    let params = json!({
        "chunk_size": 2000
    });

    let result = handlers.handle_tool_call("store_file", params).await;
    assert!(result.is_err(), "Should fail for missing file_path");

    // Clean up
    fs::remove_file(&test_file_path).await.ok();
    db_manager.cleanup().await?;
    Ok(())
}

/// Test chunking algorithms directly
#[tokio::test]
async fn test_chunking_algorithms_directly() -> Result<()> {
    let test_content = "First sentence. Second sentence! Third question? \n\nNew paragraph starts here. It continues with more text. And even more content.\n\n```code\nlet x = 42;\nprintln!(\"{}\", x);\n```\n\nFinal paragraph.";
    
    // Test sentence chunking
    let sentence_chunker = FileChunker::with_strategy(100, 20, ChunkingStrategy::Sentence);
    let sentence_chunks = sentence_chunker.chunk_content(test_content)?;
    assert!(sentence_chunks.len() > 0, "Sentence chunking should produce chunks");
    
    // Verify sentence chunks respect sentence boundaries
    for chunk in &sentence_chunks {
        let content = &chunk.content;
        if content.len() > 20 {  // Only check substantial chunks
            // Should not end in the middle of a word (basic heuristic)
            assert!(!content.ends_with(' ') || content.trim().ends_with(['.', '!', '?']), 
                   "Chunk should end at sentence boundary: '{}'", content.chars().rev().take(20).collect::<String>());
        }
    }
    
    // Test paragraph chunking
    let paragraph_chunker = FileChunker::with_strategy(200, 0, ChunkingStrategy::Paragraph);
    let paragraph_chunks = paragraph_chunker.chunk_content(test_content)?;
    assert!(paragraph_chunks.len() > 0, "Paragraph chunking should produce chunks");
    
    // Test semantic chunking
    let semantic_chunker = FileChunker::with_strategy(150, 30, ChunkingStrategy::Semantic);
    let semantic_chunks = semantic_chunker.chunk_content(test_content)?;
    assert!(semantic_chunks.len() > 0, "Semantic chunking should produce chunks");
    
    // Test hybrid chunking
    let hybrid_chunker = FileChunker::with_strategy(120, 25, ChunkingStrategy::Hybrid);
    let hybrid_chunks = hybrid_chunker.chunk_content(test_content)?;
    assert!(hybrid_chunks.len() > 0, "Hybrid chunking should produce chunks");
    
    // Verify all strategies produce different chunk patterns
    let sentence_count = sentence_chunks.len();
    let paragraph_count = paragraph_chunks.len();
    let semantic_count = semantic_chunks.len();
    let hybrid_count = hybrid_chunks.len();
    
    println!("Chunk counts - Sentence: {}, Paragraph: {}, Semantic: {}, Hybrid: {}", 
             sentence_count, paragraph_count, semantic_count, hybrid_count);
    
    // At least some strategies should produce different counts
    let all_counts = vec![sentence_count, paragraph_count, semantic_count, hybrid_count];
    let unique_counts: std::collections::HashSet<_> = all_counts.iter().collect();
    assert!(unique_counts.len() >= 2, "Different chunking strategies should produce different results");
    
    Ok(())
}

/// Test file size limits
#[tokio::test]
async fn test_file_size_limits() -> Result<()> {
    let mut db_manager = TestDatabaseManager::new()?;
    let pool = db_manager.setup_test_database().await?;
    let storage = Arc::new(Storage::new(pool));
    let handlers = MCPHandlers::new(storage);

    // Create a file that's too large (> 10MB)
    let temp_dir = std::env::temp_dir();
    let large_file_path = temp_dir.join("large_test_file.txt");
    
    // Create ~11MB of content
    let large_content = "a".repeat(11 * 1024 * 1024);
    fs::write(&large_file_path, large_content).await.unwrap();

    let params = json!({
        "file_path": large_file_path.to_string_lossy(),
        "chunk_size": 8000,
        "overlap": 200
    });

    let result = handlers.handle_tool_call("store_file", params).await;
    assert!(result.is_err(), "Should fail for files larger than 10MB");

    // Verify error message mentions file size limit
    let error_msg = format!("{}", result.unwrap_err());
    assert!(error_msg.to_lowercase().contains("file size") || error_msg.to_lowercase().contains("exceeds"), 
           "Error should mention file size limit: {}", error_msg);

    // Clean up
    fs::remove_file(&large_file_path).await.ok();
    db_manager.cleanup().await?;
    Ok(())
}

/// Test chunk overlap functionality
#[tokio::test]
async fn test_chunk_overlap() -> Result<()> {
    let test_content = "Word1 Word2 Word3 Word4 Word5 Word6 Word7 Word8 Word9 Word10 Word11 Word12 Word13 Word14 Word15 Word16 Word17 Word18 Word19 Word20";
    
    let chunker = FileChunker::with_strategy(50, 10, ChunkingStrategy::Hybrid);
    let chunks = chunker.chunk_content(test_content)?;
    
    if chunks.len() > 1 {
        // Verify overlap exists between consecutive chunks
        for i in 0..chunks.len()-1 {
            let current_chunk = &chunks[i];
            let next_chunk = &chunks[i+1];
            
            // Check if there's some shared content (basic overlap detection)
            // This is a heuristic since exact overlap depends on semantic boundaries
            let current_words: std::collections::HashSet<_> = current_chunk.content
                .split_whitespace()
                .collect();
            let next_words: std::collections::HashSet<_> = next_chunk.content
                .split_whitespace() 
                .collect();
            
            let intersection: Vec<_> = current_words.intersection(&next_words).collect();
            
            // Should have some overlapping words (or at least similar boundaries)
            println!("Chunk {} ({}..{}): '{}'", i, current_chunk.start_byte, current_chunk.end_byte, 
                    current_chunk.content.chars().take(30).collect::<String>());
            println!("Chunk {} ({}..{}): '{}'", i+1, next_chunk.start_byte, next_chunk.end_byte, 
                    next_chunk.content.chars().take(30).collect::<String>());
            println!("Overlapping words: {:?}", intersection);
        }
    }
    
    Ok(())
}

/// Test single chunk files (files that fit in one chunk)
#[tokio::test]
async fn test_single_chunk_files() -> Result<()> {
    let mut db_manager = TestDatabaseManager::new()?;
    let pool = db_manager.setup_test_database().await?;
    let storage = Arc::new(Storage::new(pool));
    let handlers = MCPHandlers::new(storage.clone());

    // Create a small test file
    let temp_dir = std::env::temp_dir();
    let small_file_path = temp_dir.join("small_test_file.txt");
    let small_content = "This is a small file that should fit in a single chunk.";
    
    fs::write(&small_file_path, small_content).await.unwrap();

    let params = json!({
        "file_path": small_file_path.to_string_lossy(),
        "chunk_size": 8000,
        "overlap": 200,
        "chunking_strategy": "hybrid",
        "tags": ["small", "single_chunk"]
    });

    let result = handlers.handle_tool_call("store_file", params).await?;
    
    // Should create exactly one chunk
    let chunks_created = result["chunks_created"].as_u64().unwrap();
    assert_eq!(chunks_created, 1, "Small file should create exactly one chunk");

    let chunk_ids = result["chunk_ids"].as_array().unwrap();
    assert_eq!(chunk_ids.len(), 1);

    // Verify the chunk was stored correctly
    let chunk_id = uuid::Uuid::parse_str(chunk_ids[0].as_str().unwrap())
        .map_err(|e| codex_memory::error::Error::InternalError(format!("UUID parse error: {}", e)))?;
    let memory = storage.get(chunk_id).await?;
    assert!(memory.is_some());

    let memory = memory.unwrap();
    assert_eq!(memory.content.trim(), small_content.trim());
    assert!(memory.tags.contains(&"small".to_string()));
    assert!(memory.tags.contains(&"single_chunk".to_string()));

    // Clean up
    fs::remove_file(&small_file_path).await.ok();
    db_manager.cleanup().await?;
    Ok(())
}