use tests::common::test_db_manager::TestDatabaseManager;
use codex_memory::storage::Storage;
use codex_memory::models::Memory;
#[tokio::test]
async fn test_encoding_specificity_principle() -> Result<(), Box<dyn std::error::Error>> {
let mut manager = TestDatabaseManager::new()?;
let pool = manager.setup_test_database().await?;
let storage = Storage::new(pool.clone());
let content = "The quick brown fox jumps over the lazy dog";
let memory1_id = storage.store(
content,
"Programming tutorial example".to_string(),
"Common text used in programming demonstrations".to_string(),
Some(vec!["programming".to_string(), "tutorial".to_string()]),
).await?;
let memory2_id = storage.store(
content,
"Typography font testing".to_string(),
"Standard text for font rendering tests".to_string(),
Some(vec!["typography".to_string(), "fonts".to_string()]),
).await?;
assert_ne!(memory1_id, memory2_id, "Same content in different contexts should create different memories");
let memory1 = storage.get(memory1_id).await?.expect("Memory 1 should exist");
let memory2 = storage.get(memory2_id).await?.expect("Memory 2 should exist");
assert_ne!(
memory1.context_fingerprint,
memory2.context_fingerprint,
"Different contexts should produce different fingerprints"
);
assert_eq!(
memory1.content_hash,
memory2.content_hash,
"Same content should produce same content hash"
);
assert_eq!(memory1.context, "Programming tutorial example");
assert_eq!(memory2.context, "Typography font testing");
manager.cleanup().await?;
Ok(())
}
#[tokio::test]
async fn test_exact_duplicate_prevention() -> Result<(), Box<dyn std::error::Error>> {
let mut manager = TestDatabaseManager::new()?;
let pool = manager.setup_test_database().await?;
let storage = Storage::new(pool.clone());
let content = "Duplicate prevention test";
let context = "Testing exact duplicate detection";
let summary = "Should not create duplicate entry";
let tags = Some(vec!["test".to_string()]);
let memory1_id = storage.store(content, context.to_string(), summary.to_string(), tags.clone()).await?;
let memory2_id = storage.store(content, context.to_string(), summary.to_string(), tags.clone()).await?;
assert_eq!(memory1_id, memory2_id, "Exact duplicates should return same memory ID");
let similar_memories = storage.find_similar_content(
&Memory::new(content.to_string(), context.to_string(), summary.to_string(), tags.clone()).content_hash,
10
).await?;
assert_eq!(similar_memories.len(), 1, "Should only have one memory for exact duplicate");
manager.cleanup().await?;
Ok(())
}
#[tokio::test]
async fn test_context_variations_preservation() -> Result<(), Box<dyn std::error::Error>> {
let mut manager = TestDatabaseManager::new()?;
let pool = manager.setup_test_database().await?;
let storage = Storage::new(pool.clone());
let content = "Machine learning model training";
let contexts = vec![
("Academic research project", "Research context for ML paper"),
("Production system deployment", "Real-world ML system in production"),
("Educational coursework", "Learning material for ML course"),
("Personal hobby project", "Individual exploration of ML concepts"),
];
let mut memory_ids = Vec::new();
for (context, summary) in &contexts {
let id = storage.store(
content,
context.to_string(),
summary.to_string(),
Some(vec!["machine-learning".to_string()]),
).await?;
memory_ids.push(id);
}
let unique_ids: std::collections::HashSet<_> = memory_ids.iter().collect();
assert_eq!(unique_ids.len(), contexts.len(), "All context variations should create unique memories");
let base_memory = Memory::new(
content.to_string(),
contexts[0].0.to_string(),
contexts[0].1.to_string(),
Some(vec!["machine-learning".to_string()]),
);
let variations = storage.find_similar_content(&base_memory.content_hash, 10).await?;
assert_eq!(variations.len(), contexts.len(), "Should find all context variations");
let found_contexts: Vec<String> = variations.iter().map(|m| m.context.clone()).collect();
for (expected_context, _) in &contexts {
assert!(found_contexts.contains(&expected_context.to_string()),
"Context '{}' should be preserved", expected_context);
}
manager.cleanup().await?;
Ok(())
}
#[tokio::test]
async fn test_semantic_similarity_detection() -> Result<(), Box<dyn std::error::Error>> {
let memory1 = Memory::new(
"The cat sat on the mat".to_string(),
"Pet behavior observation".to_string(),
"Observing feline resting habits".to_string(),
Some(vec!["pets".to_string(), "cats".to_string()]),
);
let memory2 = Memory::new(
"The cat sat on the mat".to_string(),
"Pet behavior study".to_string(),
"Research on domestic cat behavior".to_string(),
Some(vec!["pets".to_string(), "research".to_string()]),
);
let memory3 = Memory::new(
"Quantum computing algorithms".to_string(),
"Advanced computational theory".to_string(),
"Exploring quantum algorithm design".to_string(),
Some(vec!["quantum".to_string(), "computing".to_string()]),
);
assert!(
memory1.is_semantically_similar(&memory2, 0.5),
"Similar content and context should be detected as semantically similar"
);
assert!(
!memory1.is_semantically_similar(&memory3, 0.5),
"Different content should not be considered semantically similar"
);
Ok(())
}
#[tokio::test]
async fn test_chunked_content_context_preservation() -> Result<(), Box<dyn std::error::Error>> {
let mut manager = TestDatabaseManager::new()?;
let pool = manager.setup_test_database().await?;
let storage = Storage::new(pool.clone());
let parent_content = "Large document requiring chunking";
let parent_id = storage.store(
parent_content,
"Document processing pipeline".to_string(),
"Testing chunked document handling".to_string(),
Some(vec!["documents".to_string()]),
).await?;
let chunk1_id = storage.store_chunk(
"Chunk 1 content",
"First section context".to_string(),
"Beginning of the document".to_string(),
Some(vec!["chunk".to_string(), "first".to_string()]),
1,
3,
parent_id,
).await?;
let chunk2_id = storage.store_chunk(
"Chunk 1 content", "Second section context".to_string(),
"Middle of the document".to_string(),
Some(vec!["chunk".to_string(), "second".to_string()]),
2,
3,
parent_id,
).await?;
assert_ne!(chunk1_id, chunk2_id, "Same content in different chunk contexts should create different memories");
let chunk1 = storage.get(chunk1_id).await?.expect("Chunk 1 should exist");
let chunk2 = storage.get(chunk2_id).await?.expect("Chunk 2 should exist");
assert_ne!(
chunk1.context_fingerprint,
chunk2.context_fingerprint,
"Different chunk contexts should produce different fingerprints"
);
assert_eq!(chunk1.chunk_index, Some(1));
assert_eq!(chunk2.chunk_index, Some(2));
assert_eq!(chunk1.context, "First section context");
assert_eq!(chunk2.context, "Second section context");
manager.cleanup().await?;
Ok(())
}
#[tokio::test]
async fn test_context_statistics() -> Result<(), Box<dyn std::error::Error>> {
let mut manager = TestDatabaseManager::new()?;
let pool = manager.setup_test_database().await?;
let storage = Storage::new(pool.clone());
let base_content = "Statistical analysis content";
for i in 1..=5 {
storage.store(
base_content,
format!("Context variation {}", i),
format!("Summary for variation {}", i),
Some(vec![format!("tag-{}", i)]),
).await?;
}
storage.store(
"Unique content without variations",
"Single context".to_string(),
"Content with no variations".to_string(),
Some(vec!["unique".to_string()]),
).await?;
let stats = storage.get_context_stats().await?;
assert!(!stats.is_empty(), "Should find content with multiple context variations");
let (_, total_variations, unique_contexts) = &stats[0];
assert_eq!(*total_variations, 5, "Should have 5 total variations");
assert_eq!(*unique_contexts, 5, "Should have 5 unique contexts");
manager.cleanup().await?;
Ok(())
}
#[tokio::test]
async fn test_exists_with_context() -> Result<(), Box<dyn std::error::Error>> {
let mut manager = TestDatabaseManager::new()?;
let pool = manager.setup_test_database().await?;
let storage = Storage::new(pool.clone());
let content = "Context existence test";
let context = "Specific test context";
let summary = "Testing context-aware existence checks";
let memory = Memory::new(
content.to_string(),
context.to_string(),
summary.to_string(),
Some(vec!["test".to_string()]),
);
assert!(
!storage.exists_with_context(&memory.content_hash, &memory.context_fingerprint).await?,
"Memory should not exist initially"
);
storage.store(content, context.to_string(), summary.to_string(), Some(vec!["test".to_string()])).await?;
assert!(
storage.exists_with_context(&memory.content_hash, &memory.context_fingerprint).await?,
"Memory should exist after storage"
);
let different_memory = Memory::new(
content.to_string(),
"Different context".to_string(),
summary.to_string(),
Some(vec!["test".to_string()]),
);
assert!(
!storage.exists_with_context(&different_memory.content_hash, &different_memory.context_fingerprint).await?,
"Memory with different context should not exist"
);
manager.cleanup().await?;
Ok(())
}
#[tokio::test]
async fn test_retrieval_cue_effectiveness() -> Result<(), Box<dyn std::error::Error>> {
let mut manager = TestDatabaseManager::new()?;
let pool = manager.setup_test_database().await?;
let storage = Storage::new(pool.clone());
let content = "Python programming best practices";
let contexts = vec![
("Web development project", vec!["python", "web", "django"]),
("Data science analysis", vec!["python", "data", "pandas"]),
("Machine learning model", vec!["python", "ml", "tensorflow"]),
];
for (context, tags) in &contexts {
storage.store(
content,
context.to_string(),
format!("Best practices for {}", context),
Some(tags.iter().map(|s| s.to_string()).collect()),
).await?;
}
let base_memory = Memory::new(
content.to_string(),
contexts[0].0.to_string(),
format!("Best practices for {}", contexts[0].0),
Some(contexts[0].1.iter().map(|s| s.to_string()).collect()),
);
let all_variations = storage.find_similar_content(&base_memory.content_hash, 10).await?;
assert_eq!(all_variations.len(), 3, "Should retrieve all context variations");
for (expected_context, expected_tags) in &contexts {
let found = all_variations.iter().find(|m| m.context == *expected_context);
assert!(found.is_some(), "Should find memory with context '{}'", expected_context);
if let Some(memory) = found {
for expected_tag in expected_tags {
assert!(
memory.tags.contains(&expected_tag.to_string()),
"Memory should contain tag '{}' as retrieval cue", expected_tag
);
}
}
}
manager.cleanup().await?;
Ok(())
}