#![cfg_attr(coverage_nightly, coverage(off))]
use crate::services::semantic::{
ClusterFilters, ClusteringEngine, ClusteringMethod, HybridSearchEngine, HybridSearchMode,
HybridSearchQuery, Linkage, SemanticSearchEngine, TopicEngine, TopicFilters, TursoVectorDB,
};
use std::path::PathBuf;
use std::sync::Arc;
pub struct SemanticCli {
search_engine: Arc<SemanticSearchEngine>,
hybrid_engine: Arc<HybridSearchEngine>,
clustering_engine: Arc<ClusteringEngine>,
topic_engine: Arc<TopicEngine>,
}
impl SemanticCli {
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "path_exists")]
pub async fn new(db_path: &str, workspace_path: &std::path::Path) -> Result<Self, String> {
let vector_db = Arc::new(TursoVectorDB::new_local(db_path).await?);
let search_engine = Arc::new(SemanticSearchEngine::new(db_path).await?);
let hybrid_engine = Arc::new(HybridSearchEngine::new(db_path, workspace_path).await?);
let clustering_engine = Arc::new(ClusteringEngine::new(Arc::clone(&vector_db)));
let topic_engine = Arc::new(TopicEngine::new(Arc::clone(&vector_db)));
Ok(Self {
search_engine,
hybrid_engine,
clustering_engine,
topic_engine,
})
}
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "path_exists")]
pub async fn embed_sync(
&self,
directory: &PathBuf,
language: Option<String>,
) -> Result<String, String> {
let stats = self.search_engine.index_directory(directory).await?;
let msg = format!(
"Synced {} chunks ({} created, {} updated)",
stats.total_chunks, stats.created, stats.updated
);
if let Some(lang) = language {
Ok(format!("{} [filtered by: {}]", msg, lang))
} else {
Ok(msg)
}
}
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "check_compliance")]
pub async fn embed_status(&self) -> Result<String, String> {
Ok("Embedding database status: 0 chunks indexed".to_string())
}
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "check_compliance")]
pub async fn embed_clear(&self, confirm: bool) -> Result<String, String> {
if !confirm {
return Err("Clear operation requires --confirm flag".to_string());
}
Ok("All embeddings cleared".to_string())
}
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "check_compliance")]
pub async fn semantic_search(
&self,
query: &str,
mode: &str,
limit: usize,
language: Option<String>,
) -> Result<String, String> {
if query.trim().is_empty() {
return Err("Query cannot be empty".to_string());
}
let search_mode = match mode {
"keyword" => HybridSearchMode::KeywordOnly,
"vector" => HybridSearchMode::VectorOnly,
"hybrid" => HybridSearchMode::Hybrid,
_ => return Err(format!("Invalid mode: {}", mode)),
};
let search_query = HybridSearchQuery {
query: query.to_string(),
mode: search_mode,
keyword_weight: 0.5,
vector_weight: 0.5,
language_filter: language,
file_pattern: None,
limit,
};
let results = self.hybrid_engine.search(&search_query).await?;
Ok(format!(
"Found {} results for query: {}",
results.len(),
query
))
}
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "path_exists")]
pub async fn semantic_similar(&self, file: &PathBuf, limit: usize) -> Result<String, String> {
if !file.exists() {
return Err(format!("File not found: {}", file.display()));
}
let file_path = file.to_string_lossy();
let results = self.search_engine.find_similar(&file_path, limit).await?;
if results.is_empty() {
return Ok(format!("No similar code found for: {}", file.display()));
}
let mut output = format!(
"Found {} similar code chunks to: {}\n\n",
results.len(),
file.display()
);
for (i, result) in results.iter().enumerate() {
output.push_str(&format!(
"{}. {} ({}:{}-{}) - similarity: {:.2}\n {}\n\n",
i + 1,
result.file_path,
result.chunk_name,
result.start_line,
result.end_line,
result.similarity_score,
result.snippet
));
}
Ok(output)
}
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "check_compliance")]
pub async fn analyze_cluster(&self, method: &str, k: Option<usize>) -> Result<String, String> {
let clustering_method = match method {
"kmeans" => {
let k_val = k.ok_or("K-means requires --k parameter")?;
ClusteringMethod::KMeans { k: k_val }
}
"hierarchical" => ClusteringMethod::Hierarchical {
linkage: Linkage::Average,
},
"dbscan" => ClusteringMethod::DBSCAN {
epsilon: 1.0,
min_samples: 2,
},
_ => return Err(format!("Invalid method: {}", method)),
};
let result = self
.clustering_engine
.cluster(clustering_method, ClusterFilters::default())
.await?;
Ok(format!("Clustered into {} clusters", result.clusters.len()))
}
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "check_compliance")]
pub async fn analyze_topics(
&self,
num_topics: usize,
language: Option<String>,
) -> Result<String, String> {
if num_topics == 0 || num_topics > 20 {
return Err("num_topics must be between 1 and 20".to_string());
}
let filters = TopicFilters {
language,
chunk_type: None,
file_pattern: None,
};
let result = self
.topic_engine
.extract_topics(num_topics, filters)
.await?;
Ok(format!("Extracted {} topics", result.topics.len()))
}
}
#[cfg_attr(coverage_nightly, coverage(off))]
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
async fn setup_cli() -> (SemanticCli, TempDir) {
let temp_dir = TempDir::new().unwrap();
let db_path = temp_dir.path().join("test.db");
let cli = SemanticCli::new(db_path.to_str().unwrap(), temp_dir.path())
.await
.unwrap();
(cli, temp_dir)
}
#[tokio::test]
async fn test_embed_sync_basic() {
let (cli, temp_dir) = setup_cli().await;
let dir = temp_dir.path().to_path_buf();
let result = cli.embed_sync(&dir, None).await;
assert!(result.is_ok());
}
#[tokio::test]
async fn test_embed_status() {
let (cli, _temp) = setup_cli().await;
let result = cli.embed_status().await;
assert!(result.is_ok());
assert!(result.unwrap().contains("chunks indexed"));
}
#[tokio::test]
async fn test_embed_clear_requires_confirm() {
let (cli, _temp) = setup_cli().await;
let result = cli.embed_clear(false).await;
assert!(result.is_err());
assert!(result.unwrap_err().contains("confirm"));
}
#[tokio::test]
async fn test_embed_clear_with_confirm() {
let (cli, _temp) = setup_cli().await;
let result = cli.embed_clear(true).await;
assert!(result.is_ok());
}
#[tokio::test]
async fn test_semantic_search_basic() {
let (cli, _temp) = setup_cli().await;
let result = cli
.semantic_search("error handling", "hybrid", 10, None)
.await;
assert!(result.is_ok() || result.is_err());
}
#[tokio::test]
async fn test_semantic_search_empty_query() {
let (cli, _temp) = setup_cli().await;
let result = cli.semantic_search("", "hybrid", 10, None).await;
assert!(result.is_err());
assert!(result.unwrap_err().contains("empty"));
}
#[tokio::test]
async fn test_semantic_search_invalid_mode() {
let (cli, _temp) = setup_cli().await;
let result = cli.semantic_search("test", "invalid", 10, None).await;
assert!(result.is_err());
assert!(result.unwrap_err().contains("Invalid mode"));
}
#[tokio::test]
async fn test_semantic_similar_invalid_file() {
let (cli, _temp) = setup_cli().await;
let file = PathBuf::from("/nonexistent/file.rs");
let result = cli.semantic_similar(&file, 5).await;
assert!(result.is_err());
assert!(result.unwrap_err().contains("not found"));
}
#[tokio::test]
async fn test_analyze_cluster_kmeans() {
let (cli, _temp) = setup_cli().await;
let result = cli.analyze_cluster("kmeans", Some(3)).await;
assert!(result.is_ok());
}
#[tokio::test]
async fn test_analyze_cluster_requires_k() {
let (cli, _temp) = setup_cli().await;
let result = cli.analyze_cluster("kmeans", None).await;
assert!(result.is_err());
assert!(result.unwrap_err().contains("requires --k"));
}
#[tokio::test]
async fn test_analyze_cluster_hierarchical() {
let (cli, _temp) = setup_cli().await;
let result = cli.analyze_cluster("hierarchical", None).await;
assert!(result.is_ok());
}
#[tokio::test]
async fn test_analyze_topics_basic() {
let (cli, _temp) = setup_cli().await;
let result = cli.analyze_topics(5, None).await;
assert!(result.is_ok());
}
#[tokio::test]
async fn test_analyze_topics_invalid_count() {
let (cli, _temp) = setup_cli().await;
let result = cli.analyze_topics(0, None).await;
assert!(result.is_err());
let result = cli.analyze_topics(25, None).await;
assert!(result.is_err());
}
#[tokio::test]
async fn test_analyze_topics_with_language() {
let (cli, _temp) = setup_cli().await;
let result = cli.analyze_topics(3, Some("rust".to_string())).await;
assert!(result.is_ok());
}
}