#[cfg_attr(coverage_nightly, coverage(off))]
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::TempDir;
fn create_test_project() -> TempDir {
let temp_dir = TempDir::new().expect("create temp dir");
fs::write(
temp_dir.path().join("main.rs"),
r#"
fn main() {
println!("Hello, world!");
let x = calculate_sum(1, 2);
println!("Sum: {}", x);
}
fn calculate_sum(a: i32, b: i32) -> i32 {
a + b
}
"#,
)
.expect("write main.rs");
fs::write(
temp_dir.path().join("lib.rs"),
r#"
pub mod utils;
/// Process data.
pub fn process_data(data: &[u8]) -> Vec<u8> {
data.iter().map(|&b| b * 2).collect()
}
/// Validate input.
pub fn validate_input(input: &str) -> bool {
!input.is_empty() && input.len() < 1000
}
"#,
)
.expect("write lib.rs");
fs::write(
temp_dir.path().join("utils.rs"),
r#"
use std::collections::HashMap;
/// Create cache.
pub fn create_cache() -> HashMap<String, String> {
HashMap::new()
}
/// Parse config.
pub fn parse_config(config: &str) -> Option<Config> {
if config.is_empty() {
return None;
}
Some(Config { name: config.to_string() })
}
/// Configuration for config.
pub struct Config {
pub name: String,
}
"#,
)
.expect("write utils.rs");
temp_dir
}
#[test]
fn test_index_directory() {
let temp_dir = create_test_project();
let mut engine = LocalSemanticEngine::new();
let count = engine.index_directory(temp_dir.path(), None).unwrap();
assert_eq!(count, 3, "Should index 3 Rust files");
assert_eq!(engine.documents.len(), 3);
}
#[test]
fn test_index_with_language_filter() {
let temp_dir = create_test_project();
fs::write(
temp_dir.path().join("script.py"),
"print('hello')\n# comment",
)
.expect("write script.py");
let mut engine = LocalSemanticEngine::new();
let count = engine
.index_directory(temp_dir.path(), Some("rust"))
.unwrap();
assert_eq!(count, 3, "Should only index Rust files");
}
#[test]
fn test_extract_topics() {
let temp_dir = create_test_project();
let mut engine = LocalSemanticEngine::new();
engine.index_directory(temp_dir.path(), None).unwrap();
let result = engine.extract_topics(2, None).unwrap();
assert_eq!(result.topics.len(), 2);
assert_eq!(result.num_documents, 3);
for topic in &result.topics {
assert!(!topic.top_terms.is_empty());
}
}
#[test]
fn test_cluster_kmeans() {
let temp_dir = create_test_project();
let mut engine = LocalSemanticEngine::new();
engine.index_directory(temp_dir.path(), None).unwrap();
let result = engine.cluster("kmeans", Some(2)).unwrap();
assert_eq!(result.method, "kmeans");
assert!(result.clusters.len() <= 2);
assert_eq!(result.num_documents, 3);
}
#[test]
fn test_cluster_hierarchical() {
let temp_dir = create_test_project();
let mut engine = LocalSemanticEngine::new();
engine.index_directory(temp_dir.path(), None).unwrap();
let result = engine.cluster("hierarchical", Some(2)).unwrap();
assert_eq!(result.method, "hierarchical");
assert!(!result.clusters.is_empty());
}
#[test]
fn test_cluster_dbscan() {
let temp_dir = create_test_project();
let mut engine = LocalSemanticEngine::new();
engine.index_directory(temp_dir.path(), None).unwrap();
let result = engine.cluster("dbscan", None).unwrap();
assert_eq!(result.method, "dbscan");
}
#[test]
fn test_invalid_num_topics() {
let temp_dir = create_test_project();
let mut engine = LocalSemanticEngine::new();
engine.index_directory(temp_dir.path(), None).unwrap();
assert!(engine.extract_topics(0, None).is_err());
assert!(engine.extract_topics(21, None).is_err());
}
#[test]
fn test_empty_directory() {
let temp_dir = TempDir::new().expect("create temp dir");
let mut engine = LocalSemanticEngine::new();
let result = engine.index_directory(temp_dir.path(), None);
assert!(result.is_err());
}
#[test]
fn test_trueno_rag_tfidf_embedder_basic() {
use trueno_rag::embed::{Embedder, TfIdfEmbedder};
let documents = ["fn main() { println!(\"hello world\"); }",
"fn add(a: i32, b: i32) -> i32 { a + b }",
"fn subtract(a: i32, b: i32) -> i32 { a - b }"];
let mut embedder = TfIdfEmbedder::new(50);
embedder.fit(documents.as_ref());
let embedding = embedder.embed(documents[0]).unwrap();
assert_eq!(
embedding.len(),
50,
"Should produce embeddings of specified dimension"
);
assert!(
embedding.iter().any(|&x| x != 0.0),
"Embedding should have non-zero values"
);
}
#[test]
fn test_trueno_rag_tfidf_normalization() {
use trueno_rag::embed::{Embedder, TfIdfEmbedder};
let documents = vec![
"rust code function implementation",
"python script module import",
"javascript nodejs express api",
];
let mut embedder = TfIdfEmbedder::new(100);
embedder.fit(documents.as_ref());
for doc in &documents {
let embedding = embedder.embed(doc).unwrap();
let norm: f32 = embedding.iter().map(|x| x * x).sum::<f32>().sqrt();
if norm > 0.0 {
assert!(
(norm - 1.0).abs() < 0.01 || embedding.iter().all(|&x| x == 0.0),
"Embedding should be L2 normalized: norm = {}",
norm
);
}
}
}
#[test]
fn test_trueno_rag_tfidf_similarity() {
use trueno_rag::embed::{Embedder, TfIdfEmbedder};
let documents = [
"fn main() { println!(\"hello\"); }",
"fn main() { println!(\"world\"); }", "class Animal { def speak(self): pass }", ];
let mut embedder = TfIdfEmbedder::new(100);
embedder.fit(documents.as_ref());
let emb1 = embedder.embed(documents[0]).unwrap();
let emb2 = embedder.embed(documents[1]).unwrap();
let emb3 = embedder.embed(documents[2]).unwrap();
let sim_12 = cosine_similarity_f32(&emb1, &emb2);
let sim_13 = cosine_similarity_f32(&emb1, &emb3);
assert!(
sim_12 > sim_13,
"Similar Rust functions should have higher similarity than Rust vs Python: sim_12={}, sim_13={}",
sim_12,
sim_13
);
}
#[test]
fn test_trueno_rag_tfidf_batch() {
use trueno_rag::embed::{Embedder, TfIdfEmbedder};
let documents = ["function first() { return 1; }",
"function second() { return 2; }",
"function third() { return 3; }"];
let mut embedder = TfIdfEmbedder::new(50);
embedder.fit(documents.as_ref());
let batch_embeddings = embedder
.embed_batch(documents.as_ref())
.unwrap();
assert_eq!(batch_embeddings.len(), 3);
for emb in &batch_embeddings {
assert_eq!(emb.len(), 50);
}
}
#[test]
fn test_trueno_rag_tfidf_memory_efficiency() {
use trueno_rag::embed::{Embedder, TfIdfEmbedder};
let embedder = TfIdfEmbedder::new(1000);
assert_eq!(embedder.dimension(), 1000);
let memory_per_doc_f32 = 4 * 1000; let memory_per_doc_f64 = 8 * 1000;
assert_eq!(
memory_per_doc_f32 * 2,
memory_per_doc_f64,
"f32 should use half the memory of f64"
);
}
#[test]
fn test_trueno_rag_tfidf_sparsity() {
use trueno_rag::embed::{Embedder, TfIdfEmbedder};
let documents = ["fn main() {}", "def test(): pass", "function x() {}"];
let mut embedder = TfIdfEmbedder::new(100);
embedder.fit(documents.as_ref());
let embedding = embedder.embed("fn main()").unwrap();
let non_zero_count = embedding.iter().filter(|&&x| x != 0.0).count();
let sparsity = 1.0 - (non_zero_count as f64 / embedding.len() as f64);
assert!(
sparsity > 0.5,
"Short documents should have sparse embeddings: sparsity = {}",
sparsity
);
}
#[test]
fn test_trueno_rag_tfidf_idf_correctness() {
use trueno_rag::embed::{Embedder, TfIdfEmbedder};
let documents = ["common word common word",
"common another common word",
"rare unique common word"];
let mut embedder = TfIdfEmbedder::new(50);
embedder.fit(documents.as_ref());
let common_emb = embedder.embed("common").unwrap();
let rare_emb = embedder.embed("rare").unwrap();
let common_nonzero = common_emb.iter().filter(|&&x| x != 0.0).count();
let rare_nonzero = rare_emb.iter().filter(|&&x| x != 0.0).count();
assert!(common_nonzero >= 1 || rare_nonzero >= 1);
}
fn cosine_similarity_f32(v1: &[f32], v2: &[f32]) -> f32 {
let dot: f32 = v1.iter().zip(v2.iter()).map(|(a, b)| a * b).sum();
let norm1: f32 = v1.iter().map(|x| x * x).sum::<f32>().sqrt();
let norm2: f32 = v2.iter().map(|x| x * x).sum::<f32>().sqrt();
if norm1 == 0.0 || norm2 == 0.0 {
0.0
} else {
dot / (norm1 * norm2)
}
}
}