use pmat::services::semantic::topic_modeling::*;
use pmat::services::semantic::TursoVectorDB;
use std::sync::Arc;
use tempfile::TempDir;
async fn setup_engine() -> (TopicEngine, TempDir) {
let temp_dir = TempDir::new().unwrap();
let db_path = temp_dir.path().join("topic_test.db");
let vector_db = TursoVectorDB::new_local(db_path).await.unwrap();
let engine = TopicEngine::new(Arc::new(vector_db));
(engine, temp_dir)
}
#[tokio::test]
async fn test_extract_topics_basic() {
let (engine, _temp) = setup_engine().await;
let result = engine
.extract_topics(3, TopicFilters::default())
.await
.unwrap();
assert_eq!(result.num_topics, 3);
assert!(result.topics.len() <= 3);
assert!(result.coherence_score >= 0.0 && result.coherence_score <= 1.0);
}
#[tokio::test]
async fn test_topic_result_structure() {
let (engine, _temp) = setup_engine().await;
let result = engine
.extract_topics(2, TopicFilters::default())
.await
.unwrap();
assert_eq!(result.num_topics, 2);
for topic in &result.topics {
assert!(topic.id < result.num_topics);
assert!(!topic.keywords.is_empty());
assert!(topic.strength >= 0.0 && topic.strength <= 1.0);
for chunk in &topic.top_chunks {
assert!(!chunk.file_path.is_empty());
assert!(!chunk.chunk_name.is_empty());
assert!(chunk.topic_probability >= 0.0 && chunk.topic_probability <= 1.0);
}
}
}
#[tokio::test]
async fn test_extract_topics_invalid_count() {
let (engine, _temp) = setup_engine().await;
let result = engine.extract_topics(0, TopicFilters::default()).await;
assert!(result.is_err());
let result = engine.extract_topics(25, TopicFilters::default()).await;
assert!(result.is_err());
}
#[tokio::test]
async fn test_extract_topics_empty_data() {
let (engine, _temp) = setup_engine().await;
let result = engine.extract_topics(3, TopicFilters::default()).await;
assert!(result.is_ok());
let topics = result.unwrap();
assert_eq!(topics.total_chunks, 0);
}
#[tokio::test]
async fn test_topic_keywords_extraction() {
let (engine, _temp) = setup_engine().await;
let chunk_names = vec![
"handle_error".to_string(),
"error_handler".to_string(),
"process_data".to_string(),
"data_processor".to_string(),
];
let keywords = engine.extract_keywords(&chunk_names, 3);
assert!(!keywords.is_empty());
assert!(keywords.len() <= 3);
let mut unique_keywords = keywords.clone();
unique_keywords.sort();
unique_keywords.dedup();
assert_eq!(keywords.len(), unique_keywords.len());
}
#[tokio::test]
async fn test_topic_strength_computation() {
let (engine, _temp) = setup_engine().await;
let result = engine
.extract_topics(2, TopicFilters::default())
.await
.unwrap();
for topic in &result.topics {
assert!(topic.strength >= 0.0);
assert!(topic.strength <= 1.0);
}
}
#[tokio::test]
async fn test_coherence_score_computation() {
let (engine, _temp) = setup_engine().await;
let result = engine
.extract_topics(3, TopicFilters::default())
.await
.unwrap();
assert!(result.coherence_score >= 0.0);
assert!(result.coherence_score <= 1.0);
if result.topics.len() > 1 {
assert!(result.coherence_score > 0.0);
}
}
#[tokio::test]
async fn test_extract_topics_with_language_filter() {
let (engine, _temp) = setup_engine().await;
let filters = TopicFilters {
language: Some("rust".to_string()),
chunk_type: None,
file_pattern: None,
};
let result = engine.extract_topics(2, filters).await.unwrap();
for topic in &result.topics {
for chunk in &topic.top_chunks {
assert_eq!(chunk.language, "rust");
}
}
}
#[tokio::test]
async fn test_chunk_topic_assignment() {
let (engine, _temp) = setup_engine().await;
let result = engine
.extract_topics(3, TopicFilters::default())
.await
.unwrap();
for topic in &result.topics {
for chunk in &topic.top_chunks {
assert!(chunk.topic_probability > 0.0);
}
}
}
#[tokio::test]
async fn test_topic_probability_distribution() {
let (engine, _temp) = setup_engine().await;
let result = engine
.extract_topics(3, TopicFilters::default())
.await
.unwrap();
for topic in &result.topics {
for chunk in &topic.top_chunks {
assert!(chunk.topic_probability >= 0.0);
assert!(chunk.topic_probability <= 1.0);
}
}
}