#![allow(unused)]
#![cfg_attr(coverage_nightly, coverage(off))]
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use std::path::PathBuf;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SimilarityConfig {
pub min_lines: usize,
pub min_tokens: usize,
pub similarity_threshold: f64,
pub enable_entropy: bool,
pub enable_ast: bool,
pub enable_semantic: bool,
pub window_size: usize,
pub k_gram_size: usize,
}
impl Default for SimilarityConfig {
fn default() -> Self {
Self {
min_lines: 6,
min_tokens: 50,
similarity_threshold: 0.7,
enable_entropy: true,
enable_ast: true,
enable_semantic: true,
window_size: 40,
k_gram_size: 15,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
pub enum CloneType {
Type1, Type2, Type3, Type4, }
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SimilarBlock {
pub id: String,
pub locations: Vec<Location>,
pub similarity: f64,
pub clone_type: CloneType,
pub lines: usize,
pub tokens: usize,
pub content_preview: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Location {
pub file: PathBuf,
pub start_line: usize,
pub end_line: usize,
pub start_column: Option<usize>,
pub end_column: Option<usize>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EntropyReport {
pub average_entropy: f64,
pub high_entropy_blocks: Vec<EntropyBlock>,
pub low_entropy_patterns: Vec<EntropyBlock>,
pub recommendations: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EntropyBlock {
pub location: Location,
pub entropy: f64,
pub category: String,
pub suggestion: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RefactoringHint {
pub locations: Vec<Location>,
pub pattern: String,
pub suggestion: String,
pub priority: Priority,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum Priority {
High,
Medium,
Low,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ComprehensiveReport {
pub exact_duplicates: Vec<SimilarBlock>,
pub structural_similarities: Vec<SimilarBlock>,
pub semantic_similarities: Vec<SimilarBlock>,
pub entropy_analysis: Option<EntropyReport>,
pub refactoring_opportunities: Vec<RefactoringHint>,
pub metrics: Metrics,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Metrics {
pub duplication_percentage: f64,
pub average_entropy: f64,
pub total_clones: usize,
}
pub struct SimilarityDetector {
config: SimilarityConfig,
winnower: Winnowing,
token_analyzer: TokenAnalyzer,
entropy_calculator: EntropyCalculator,
}
struct CodeBlock {
start_line: usize,
end_line: usize,
content: String,
}
pub struct Winnowing {
window_size: usize,
k_gram_size: usize,
}
struct TokenAnalyzer;
type TokenVector = HashMap<String, f64>;
struct EntropyCalculator;
include!("similarity_detection.rs");
include!("similarity_winnowing.rs");
include!("similarity_analyzers.rs");
#[cfg(test)]
#[path = "similarity_tests.rs"]
mod tests;