use anyhow::Result;
use async_trait::async_trait;
use serde::{Deserialize, Serialize};
use std::path::Path;
use std::sync::Arc;
pub mod duplicates;
pub mod integration_tests;
pub mod polyglot;
pub mod satd;
#[async_trait]
pub trait Detector: Send + Sync {
type Input;
type Output;
type Config;
async fn detect(&self, input: Self::Input, config: Self::Config) -> Result<Self::Output>;
fn name(&self) -> &'static str;
fn capabilities(&self) -> DetectorCapabilities;
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct DetectorCapabilities {
pub supports_batch: bool,
pub supports_streaming: bool,
pub language_agnostic: bool,
pub requires_ast: bool,
}
pub struct DetectionRegistry {
detectors: std::collections::HashMap<
String,
Arc<
dyn Detector<
Input = DetectionInput,
Output = DetectionOutput,
Config = DetectionConfig,
>,
>,
>,
}
#[derive(Debug, Clone)]
pub enum DetectionInput {
SingleFile(std::path::PathBuf),
MultipleFiles(Vec<std::path::PathBuf>),
ProjectDirectory(std::path::PathBuf),
Content(String),
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum DetectionOutput {
Duplicates(duplicates::DuplicateDetectionResult),
SATD(satd::SATDAnalysisResult),
Polyglot(polyglot::PolyglotAnalysis),
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DetectionConfig {
pub max_files: Option<usize>,
pub parallel_processing: bool,
pub output_format: OutputFormat,
pub detector_specific: DetectorSpecificConfig,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum OutputFormat {
Json,
Yaml,
Summary,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum DetectorSpecificConfig {
Duplicates(duplicates::DuplicateConfig),
SATD(satd::SATDConfig),
Polyglot(polyglot::PolyglotConfig),
}
impl DetectionRegistry {
#[must_use]
pub fn new() -> Self {
let mut registry = Self {
detectors: std::collections::HashMap::new(),
};
registry.register_defaults();
registry
}
fn register_defaults(&mut self) {
self.register("duplicates", Arc::new(duplicates::DuplicateDetector::new()));
self.register("satd", Arc::new(satd::SATDDetector::new()));
self.register("polyglot", Arc::new(polyglot::PolyglotDetector::new()));
}
pub fn register(
&mut self,
name: &str,
detector: Arc<
dyn Detector<
Input = DetectionInput,
Output = DetectionOutput,
Config = DetectionConfig,
>,
>,
) {
self.detectors.insert(name.to_string(), detector);
}
#[must_use]
pub fn get_detector(
&self,
name: &str,
) -> Option<
Arc<
dyn Detector<
Input = DetectionInput,
Output = DetectionOutput,
Config = DetectionConfig,
>,
>,
> {
self.detectors.get(name).cloned()
}
#[must_use]
pub fn list_detectors(&self) -> Vec<&str> {
self.detectors.keys().map(std::string::String::as_str).collect()
}
pub async fn detect(
&self,
detector_name: &str,
input: DetectionInput,
config: DetectionConfig,
) -> Result<DetectionOutput> {
if let Some(detector) = self.get_detector(detector_name) {
detector.detect(input, config).await
} else {
Err(anyhow::anyhow!("Unknown detector: {detector_name}"))
}
}
}
impl Default for DetectionRegistry {
fn default() -> Self {
Self::new()
}
}
impl Default for DetectionConfig {
fn default() -> Self {
Self {
max_files: None,
parallel_processing: true,
output_format: OutputFormat::Json,
detector_specific: DetectorSpecificConfig::Duplicates(
duplicates::DuplicateConfig::default(),
),
}
}
}
pub struct UnifiedDetectionProcessor {
registry: DetectionRegistry,
}
impl UnifiedDetectionProcessor {
#[must_use]
pub fn new() -> Self {
Self {
registry: DetectionRegistry::new(),
}
}
pub async fn detect_duplicates(
&self,
files: Vec<std::path::PathBuf>,
) -> Result<duplicates::DuplicateDetectionResult> {
let input = DetectionInput::MultipleFiles(files);
let config = DetectionConfig {
detector_specific: DetectorSpecificConfig::Duplicates(
duplicates::DuplicateConfig::default(),
),
..Default::default()
};
match self.registry.detect("duplicates", input, config).await? {
DetectionOutput::Duplicates(result) => Ok(result),
_ => Err(anyhow::anyhow!(
"Invalid output type for duplicates detector"
)),
}
}
pub async fn detect_satd(&self, project_path: &Path) -> Result<satd::SATDAnalysisResult> {
let input = DetectionInput::ProjectDirectory(project_path.to_path_buf());
let config = DetectionConfig {
detector_specific: DetectorSpecificConfig::SATD(satd::SATDConfig::default()),
..Default::default()
};
match self.registry.detect("satd", input, config).await? {
DetectionOutput::SATD(result) => Ok(result),
_ => Err(anyhow::anyhow!("Invalid output type for SATD detector")),
}
}
pub async fn analyze_polyglot(
&self,
project_path: &Path,
) -> Result<polyglot::PolyglotAnalysis> {
let input = DetectionInput::ProjectDirectory(project_path.to_path_buf());
let config = DetectionConfig {
detector_specific: DetectorSpecificConfig::Polyglot(polyglot::PolyglotConfig::default()),
..Default::default()
};
match self.registry.detect("polyglot", input, config).await? {
DetectionOutput::Polyglot(result) => Ok(result),
_ => Err(anyhow::anyhow!("Invalid output type for polyglot detector")),
}
}
#[must_use]
pub fn available_detectors(&self) -> Vec<&str> {
self.registry.list_detectors()
}
}
impl Default for UnifiedDetectionProcessor {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn test_detection_registry_creation() {
let registry = DetectionRegistry::new();
let detectors = registry.list_detectors();
assert!(detectors.contains(&"duplicates"));
assert!(detectors.contains(&"satd"));
assert!(detectors.contains(&"polyglot"));
assert_eq!(detectors.len(), 3);
}
#[tokio::test]
async fn test_unified_processor() {
let processor = UnifiedDetectionProcessor::new();
let available = processor.available_detectors();
assert!(available.contains(&"duplicates"));
assert!(available.contains(&"satd"));
assert!(available.contains(&"polyglot"));
assert_eq!(available.len(), 3);
}
#[test]
fn test_detection_config_default() {
let config = DetectionConfig::default();
assert!(config.parallel_processing);
assert!(config.max_files.is_none());
assert_eq!(config.output_format, OutputFormat::Json);
assert!(matches!(
config.detector_specific,
DetectorSpecificConfig::Duplicates(_)
));
}
#[test]
fn test_detector_capabilities() {
let caps = DetectorCapabilities {
supports_batch: true,
supports_streaming: false,
language_agnostic: true,
requires_ast: false,
};
assert!(caps.supports_batch);
assert!(!caps.supports_streaming);
assert!(caps.language_agnostic);
assert!(!caps.requires_ast);
}
}
#[cfg(test)]
mod additional_tests {
use super::*;
use std::fs;
use tempfile::TempDir;
#[test]
fn test_detector_capabilities() {
let caps = DetectorCapabilities {
supports_batch: true,
supports_streaming: false,
language_agnostic: true,
requires_ast: false,
};
assert!(caps.supports_batch);
assert!(!caps.supports_streaming);
assert!(caps.language_agnostic);
assert!(!caps.requires_ast);
}
#[test]
fn test_detection_config_custom() {
let config = DetectionConfig {
max_files: Some(100),
parallel_processing: false,
output_format: OutputFormat::Yaml,
detector_specific: DetectorSpecificConfig::SATD(satd::SATDConfig::default()),
};
assert_eq!(config.max_files, Some(100));
assert!(!config.parallel_processing);
assert_eq!(config.output_format, OutputFormat::Yaml);
assert!(matches!(
config.detector_specific,
DetectorSpecificConfig::SATD(_)
));
}
#[test]
fn test_output_format_enum() {
let json_format = OutputFormat::Json;
let yaml_format = OutputFormat::Yaml;
let summary_format = OutputFormat::Summary;
assert_eq!(json_format, OutputFormat::Json);
assert_eq!(yaml_format, OutputFormat::Yaml);
assert_eq!(summary_format, OutputFormat::Summary);
assert_ne!(json_format, yaml_format);
}
#[test]
fn test_detection_input_variants() {
let single = DetectionInput::SingleFile(std::path::PathBuf::from("/test.rs"));
let multiple = DetectionInput::MultipleFiles(vec![
std::path::PathBuf::from("/file1.rs"),
std::path::PathBuf::from("/file2.rs"),
]);
let project = DetectionInput::ProjectDirectory(std::path::PathBuf::from("/project"));
let content = DetectionInput::Content("test content".to_string());
assert!(matches!(single, DetectionInput::SingleFile(_)));
assert!(matches!(multiple, DetectionInput::MultipleFiles(_)));
assert!(matches!(project, DetectionInput::ProjectDirectory(_)));
assert!(matches!(content, DetectionInput::Content(_)));
}
#[tokio::test]
async fn test_unified_detection_processor_creation() {
let processor = UnifiedDetectionProcessor::new();
let detectors = processor.available_detectors();
assert_eq!(detectors.len(), 3);
assert!(detectors.contains(&"duplicates"));
assert!(detectors.contains(&"satd"));
assert!(detectors.contains(&"polyglot"));
}
#[tokio::test]
async fn test_detect_duplicates_empty_list() {
let processor = UnifiedDetectionProcessor::new();
let result = processor.detect_duplicates(vec![]).await;
match result {
Ok(_duplicates) => {
assert!(true);
}
Err(_) => {
assert!(true);
}
}
}
#[tokio::test]
async fn test_detect_satd_with_temp_project() {
let temp_dir = TempDir::new().unwrap();
let test_file = temp_dir.path().join("test.rs");
fs::write(
&test_file,
r#"
// TODO: Fix this later
fn bad_function() {
// FIXME: This is broken
panic!("Not implemented");
}
"#,
)
.unwrap();
let processor = UnifiedDetectionProcessor::new();
let result = processor.detect_satd(temp_dir.path()).await;
match result {
Ok(_satd_result) => {
assert!(true);
}
Err(_) => {
assert!(true);
}
}
}
#[tokio::test]
async fn test_analyze_polyglot_project() {
let temp_dir = TempDir::new().unwrap();
fs::write(temp_dir.path().join("main.rs"), "fn main() {}").unwrap();
fs::write(temp_dir.path().join("script.py"), "def main(): pass").unwrap();
fs::write(temp_dir.path().join("app.js"), "function main() {}").unwrap();
let processor = UnifiedDetectionProcessor::new();
let result = processor.analyze_polyglot(temp_dir.path()).await;
match result {
Ok(polyglot_analysis) => {
assert!(polyglot_analysis.languages.len() >= 1);
}
Err(_) => {
assert!(true);
}
}
}
#[test]
fn test_registry_list_detectors() {
let registry = DetectionRegistry::new();
let detectors = registry.list_detectors();
assert_eq!(detectors.len(), 3);
let detector_set: std::collections::HashSet<_> = detectors.into_iter().collect();
assert!(detector_set.contains("duplicates"));
assert!(detector_set.contains("satd"));
assert!(detector_set.contains("polyglot"));
}
#[tokio::test]
async fn test_registry_detect_with_unknown_detector() {
let registry = DetectionRegistry::new();
let input = DetectionInput::Content("test".to_string());
let config = DetectionConfig::default();
let result = registry.detect("unknown_detector", input, config).await;
assert!(result.is_err());
if let Err(e) = result {
assert!(e.to_string().contains("Unknown detector"));
}
}
}
#[cfg(test)]
mod property_tests {
use proptest::prelude::*;
proptest! {
#[test]
fn basic_property_stability(_input in ".*") {
prop_assert!(true);
}
#[test]
fn module_consistency_check(_x in 0u32..1000) {
prop_assert!(_x < 1001);
}
}
}