use anyhow::Result;
use std::collections::HashMap;
use tree_sitter::{Node, Tree};
use crate::tdg::{Language, MetricCategory, PenaltyTracker, TdgConfig};
use super::{Scorer, walk_tree, get_node_text};
pub struct DuplicationDetector {
min_token_sequence: usize,
similarity_threshold: f32,
}
impl DuplicationDetector {
pub fn new() -> Self {
Self {
min_token_sequence: 50,
similarity_threshold: 0.85,
}
}
}
#[derive(Clone, Debug)]
struct Token {
kind: String,
text: String,
normalized: String,
}
#[derive(Clone, Debug)]
struct TokenSequence {
tokens: Vec<Token>,
start_byte: usize,
end_byte: usize,
}
#[derive(Debug)]
enum CloneType {
Exact,
Renamed,
Modified,
}
#[derive(Debug)]
struct CloneSet {
clones: Vec<(CloneType, Vec<TokenSequence>)>,
}
include!("duplication_analysis.rs");
include!("duplication_scoring.rs");
include!("duplication_tests.rs");
include!("duplication_tests_scoring.rs");
include!("duplication_property_tests.rs");