codex-memory 3.0.15

A simple memory storage service with MCP interface for Claude Desktop
Documentation
use tests::common::test_db_manager::TestDatabaseManager;
use codex_memory::storage::Storage;
use codex_memory::models::Memory;

/// Comprehensive tests for context-aware deduplication implementing encoding specificity principle
/// Based on Tulving & Thomson (1973) research on memory encoding and retrieval

#[tokio::test]
async fn test_encoding_specificity_principle() -> Result<(), Box<dyn std::error::Error>> {
    // Setup isolated test database
    let mut manager = TestDatabaseManager::new()?;
    let pool = manager.setup_test_database().await?;
    let storage = Storage::new(pool.clone());

    let content = "The quick brown fox jumps over the lazy dog";
    
    // Same content in different contexts should create separate memories
    let memory1_id = storage.store(
        content,
        "Programming tutorial example".to_string(),
        "Common text used in programming demonstrations".to_string(),
        Some(vec!["programming".to_string(), "tutorial".to_string()]),
    ).await?;

    let memory2_id = storage.store(
        content,
        "Typography font testing".to_string(),
        "Standard text for font rendering tests".to_string(),
        Some(vec!["typography".to_string(), "fonts".to_string()]),
    ).await?;

    // Should create different memories despite same content
    assert_ne!(memory1_id, memory2_id, "Same content in different contexts should create different memories");

    // Retrieve both memories
    let memory1 = storage.get(memory1_id).await?.expect("Memory 1 should exist");
    let memory2 = storage.get(memory2_id).await?.expect("Memory 2 should exist");

    // Verify different context fingerprints
    assert_ne!(
        memory1.context_fingerprint, 
        memory2.context_fingerprint,
        "Different contexts should produce different fingerprints"
    );

    // Verify same content hash
    assert_eq!(
        memory1.content_hash, 
        memory2.content_hash,
        "Same content should produce same content hash"
    );

    // Verify contexts are preserved correctly
    assert_eq!(memory1.context, "Programming tutorial example");
    assert_eq!(memory2.context, "Typography font testing");

    manager.cleanup().await?;
    Ok(())
}

#[tokio::test] 
async fn test_exact_duplicate_prevention() -> Result<(), Box<dyn std::error::Error>> {
    // Test that exact duplicates (same content + context) are still deduplicated
    let mut manager = TestDatabaseManager::new()?;
    let pool = manager.setup_test_database().await?;
    let storage = Storage::new(pool.clone());

    let content = "Duplicate prevention test";
    let context = "Testing exact duplicate detection";
    let summary = "Should not create duplicate entry";
    let tags = Some(vec!["test".to_string()]);

    // Store same content+context twice
    let memory1_id = storage.store(content, context.to_string(), summary.to_string(), tags.clone()).await?;
    let memory2_id = storage.store(content, context.to_string(), summary.to_string(), tags.clone()).await?;

    // Should return same ID for exact duplicates
    assert_eq!(memory1_id, memory2_id, "Exact duplicates should return same memory ID");

    // Verify only one memory exists in database
    let similar_memories = storage.find_similar_content(
        &Memory::new(content.to_string(), context.to_string(), summary.to_string(), tags.clone()).content_hash,
        10
    ).await?;
    
    assert_eq!(similar_memories.len(), 1, "Should only have one memory for exact duplicate");

    manager.cleanup().await?;
    Ok(())
}

#[tokio::test]
async fn test_context_variations_preservation() -> Result<(), Box<dyn std::error::Error>> {
    // Test that multiple context variations are preserved
    let mut manager = TestDatabaseManager::new()?;
    let pool = manager.setup_test_database().await?;
    let storage = Storage::new(pool.clone());

    let content = "Machine learning model training";
    
    // Create multiple context variations
    let contexts = vec![
        ("Academic research project", "Research context for ML paper"),
        ("Production system deployment", "Real-world ML system in production"),
        ("Educational coursework", "Learning material for ML course"),
        ("Personal hobby project", "Individual exploration of ML concepts"),
    ];

    let mut memory_ids = Vec::new();
    for (context, summary) in &contexts {
        let id = storage.store(
            content,
            context.to_string(),
            summary.to_string(),
            Some(vec!["machine-learning".to_string()]),
        ).await?;
        memory_ids.push(id);
    }

    // All should be unique memories
    let unique_ids: std::collections::HashSet<_> = memory_ids.iter().collect();
    assert_eq!(unique_ids.len(), contexts.len(), "All context variations should create unique memories");

    // Find all variations using content hash
    let base_memory = Memory::new(
        content.to_string(),
        contexts[0].0.to_string(),
        contexts[0].1.to_string(),
        Some(vec!["machine-learning".to_string()]),
    );

    let variations = storage.find_similar_content(&base_memory.content_hash, 10).await?;
    assert_eq!(variations.len(), contexts.len(), "Should find all context variations");

    // Verify each context is preserved
    let found_contexts: Vec<String> = variations.iter().map(|m| m.context.clone()).collect();
    for (expected_context, _) in &contexts {
        assert!(found_contexts.contains(&expected_context.to_string()), 
                "Context '{}' should be preserved", expected_context);
    }

    manager.cleanup().await?;
    Ok(())
}

#[tokio::test]
async fn test_semantic_similarity_detection() -> Result<(), Box<dyn std::error::Error>> {
    // Test the semantic similarity feature in Memory model
    let memory1 = Memory::new(
        "The cat sat on the mat".to_string(),
        "Pet behavior observation".to_string(),
        "Observing feline resting habits".to_string(),
        Some(vec!["pets".to_string(), "cats".to_string()]),
    );

    let memory2 = Memory::new(
        "The cat sat on the mat".to_string(),
        "Pet behavior study".to_string(),
        "Research on domestic cat behavior".to_string(),
        Some(vec!["pets".to_string(), "research".to_string()]),
    );

    let memory3 = Memory::new(
        "Quantum computing algorithms".to_string(),
        "Advanced computational theory".to_string(),
        "Exploring quantum algorithm design".to_string(),
        Some(vec!["quantum".to_string(), "computing".to_string()]),
    );

    // Same content, similar context should be semantically similar
    assert!(
        memory1.is_semantically_similar(&memory2, 0.5),
        "Similar content and context should be detected as semantically similar"
    );

    // Different content should not be similar
    assert!(
        !memory1.is_semantically_similar(&memory3, 0.5),
        "Different content should not be considered semantically similar"
    );

    Ok(())
}

#[tokio::test]
async fn test_chunked_content_context_preservation() -> Result<(), Box<dyn std::error::Error>> {
    // Test that chunked content preserves context specificity
    let mut manager = TestDatabaseManager::new()?;
    let pool = manager.setup_test_database().await?;
    let storage = Storage::new(pool.clone());

    // Create parent document
    let parent_content = "Large document requiring chunking";
    let parent_id = storage.store(
        parent_content,
        "Document processing pipeline".to_string(),
        "Testing chunked document handling".to_string(),
        Some(vec!["documents".to_string()]),
    ).await?;

    // Create chunks with different contexts
    let chunk1_id = storage.store_chunk(
        "Chunk 1 content",
        "First section context".to_string(),
        "Beginning of the document".to_string(),
        Some(vec!["chunk".to_string(), "first".to_string()]),
        1,
        3,
        parent_id,
    ).await?;

    let chunk2_id = storage.store_chunk(
        "Chunk 1 content", // Same content, different context
        "Second section context".to_string(),
        "Middle of the document".to_string(),
        Some(vec!["chunk".to_string(), "second".to_string()]),
        2,
        3,
        parent_id,
    ).await?;

    // Should create different chunks despite same content
    assert_ne!(chunk1_id, chunk2_id, "Same content in different chunk contexts should create different memories");

    // Retrieve chunks
    let chunk1 = storage.get(chunk1_id).await?.expect("Chunk 1 should exist");
    let chunk2 = storage.get(chunk2_id).await?.expect("Chunk 2 should exist");

    // Verify different context fingerprints due to different chunk positions
    assert_ne!(
        chunk1.context_fingerprint,
        chunk2.context_fingerprint,
        "Different chunk contexts should produce different fingerprints"
    );

    // Verify chunk-specific information is preserved
    assert_eq!(chunk1.chunk_index, Some(1));
    assert_eq!(chunk2.chunk_index, Some(2));
    assert_eq!(chunk1.context, "First section context");
    assert_eq!(chunk2.context, "Second section context");

    manager.cleanup().await?;
    Ok(())
}

#[tokio::test]
async fn test_context_statistics() -> Result<(), Box<dyn std::error::Error>> {
    // Test context statistics for understanding encoding specificity utilization
    let mut manager = TestDatabaseManager::new()?;
    let pool = manager.setup_test_database().await?;
    let storage = Storage::new(pool.clone());

    let base_content = "Statistical analysis content";
    
    // Create multiple context variations for the same content
    for i in 1..=5 {
        storage.store(
            base_content,
            format!("Context variation {}", i),
            format!("Summary for variation {}", i),
            Some(vec![format!("tag-{}", i)]),
        ).await?;
    }

    // Create single-context content
    storage.store(
        "Unique content without variations",
        "Single context".to_string(),
        "Content with no variations".to_string(),
        Some(vec!["unique".to_string()]),
    ).await?;

    // Get context statistics
    let stats = storage.get_context_stats().await?;
    
    // Should find the multi-context content
    assert!(!stats.is_empty(), "Should find content with multiple context variations");
    
    let (_, total_variations, unique_contexts) = &stats[0];
    assert_eq!(*total_variations, 5, "Should have 5 total variations");
    assert_eq!(*unique_contexts, 5, "Should have 5 unique contexts");

    manager.cleanup().await?;
    Ok(())
}

#[tokio::test]
async fn test_exists_with_context() -> Result<(), Box<dyn std::error::Error>> {
    // Test precise existence checking with context
    let mut manager = TestDatabaseManager::new()?;
    let pool = manager.setup_test_database().await?;
    let storage = Storage::new(pool.clone());

    let content = "Context existence test";
    let context = "Specific test context";
    let summary = "Testing context-aware existence checks";

    let memory = Memory::new(
        content.to_string(),
        context.to_string(),
        summary.to_string(),
        Some(vec!["test".to_string()]),
    );

    // Should not exist initially
    assert!(
        !storage.exists_with_context(&memory.content_hash, &memory.context_fingerprint).await?,
        "Memory should not exist initially"
    );

    // Store the memory
    storage.store(content, context.to_string(), summary.to_string(), Some(vec!["test".to_string()])).await?;

    // Should exist now
    assert!(
        storage.exists_with_context(&memory.content_hash, &memory.context_fingerprint).await?,
        "Memory should exist after storage"
    );

    // Different context should not exist
    let different_memory = Memory::new(
        content.to_string(),
        "Different context".to_string(),
        summary.to_string(),
        Some(vec!["test".to_string()]),
    );

    assert!(
        !storage.exists_with_context(&different_memory.content_hash, &different_memory.context_fingerprint).await?,
        "Memory with different context should not exist"
    );

    manager.cleanup().await?;
    Ok(())
}

#[tokio::test]
async fn test_retrieval_cue_effectiveness() -> Result<(), Box<dyn std::error::Error>> {
    // Test that different contexts serve as effective retrieval cues
    let mut manager = TestDatabaseManager::new()?;
    let pool = manager.setup_test_database().await?;
    let storage = Storage::new(pool.clone());

    let content = "Python programming best practices";
    
    // Store with specific contexts that serve as retrieval cues
    let contexts = vec![
        ("Web development project", vec!["python", "web", "django"]),
        ("Data science analysis", vec!["python", "data", "pandas"]),
        ("Machine learning model", vec!["python", "ml", "tensorflow"]),
    ];

    for (context, tags) in &contexts {
        storage.store(
            content,
            context.to_string(),
            format!("Best practices for {}", context),
            Some(tags.iter().map(|s| s.to_string()).collect()),
        ).await?;
    }

    // Find all context variations
    let base_memory = Memory::new(
        content.to_string(),
        contexts[0].0.to_string(),
        format!("Best practices for {}", contexts[0].0),
        Some(contexts[0].1.iter().map(|s| s.to_string()).collect()),
    );

    let all_variations = storage.find_similar_content(&base_memory.content_hash, 10).await?;
    
    assert_eq!(all_variations.len(), 3, "Should retrieve all context variations");

    // Verify each variation maintains its retrieval cues (context and tags)
    for (expected_context, expected_tags) in &contexts {
        let found = all_variations.iter().find(|m| m.context == *expected_context);
        assert!(found.is_some(), "Should find memory with context '{}'", expected_context);
        
        if let Some(memory) = found {
            // Verify tags serve as additional retrieval cues
            for expected_tag in expected_tags {
                assert!(
                    memory.tags.contains(&expected_tag.to_string()),
                    "Memory should contain tag '{}' as retrieval cue", expected_tag
                );
            }
        }
    }

    manager.cleanup().await?;
    Ok(())
}