Skip to main content

jscpd_rs/detector/
statistics.rs

1use super::model::{CloneMatch, StatisticRow, Statistics};
2
3#[derive(Clone, Debug, Default)]
4pub struct Statistic {
5    statistics: Statistics,
6}
7
8impl Statistic {
9    pub fn new() -> Self {
10        Self::default()
11    }
12
13    pub fn get_statistic(&self) -> &Statistics {
14        &self.statistics
15    }
16
17    pub fn into_statistics(self) -> Statistics {
18        self.statistics
19    }
20
21    pub fn match_source(
22        &mut self,
23        source_id: impl AsRef<str>,
24        format_name: impl AsRef<str>,
25        lines: usize,
26        tokens: usize,
27    ) {
28        update_source_statistics(
29            &mut self.statistics,
30            source_id.as_ref(),
31            format_name.as_ref(),
32            lines,
33            tokens,
34        );
35        finalize_percentages(&mut self.statistics);
36    }
37
38    pub fn clone_found(&mut self, clone: &CloneMatch) {
39        update_clone_statistics(&mut self.statistics, clone);
40        finalize_percentages(&mut self.statistics);
41    }
42}
43
44pub fn clone_lines(clone: &CloneMatch) -> usize {
45    clone
46        .duplication_a
47        .end
48        .line
49        .saturating_sub(clone.duplication_a.start.line)
50        + 1
51}
52
53pub(super) fn clone_stat_lines(clone: &CloneMatch) -> usize {
54    clone
55        .duplication_a
56        .end
57        .line
58        .saturating_sub(clone.duplication_a.start.line)
59}
60
61fn clone_stat_tokens(clone: &CloneMatch) -> usize {
62    clone
63        .duplication_a
64        .end
65        .position
66        .saturating_sub(clone.duplication_a.start.position)
67}
68
69pub(super) fn update_source_statistics(
70    statistics: &mut Statistics,
71    source_id: &str,
72    format_name: &str,
73    lines: usize,
74    tokens: usize,
75) {
76    statistics.total.sources += 1;
77    statistics.total.lines += lines;
78    statistics.total.tokens += tokens;
79
80    let format = statistics
81        .formats
82        .entry(format_name.to_string())
83        .or_default();
84    format.total.sources += 1;
85    format.total.lines += lines;
86    format.total.tokens += tokens;
87
88    let source = format.sources.entry(source_id.to_string()).or_default();
89    source.sources = 1;
90    source.lines += lines;
91    source.tokens += tokens;
92}
93
94pub(super) fn update_clone_statistics(statistics: &mut Statistics, clone: &CloneMatch) {
95    let lines = clone_stat_lines(clone);
96    let tokens = clone_stat_tokens(clone);
97    statistics.total.clones += 1;
98    statistics.total.duplicated_lines += lines;
99    statistics.total.duplicated_tokens += tokens;
100
101    let format = statistics.formats.entry(clone.format.clone()).or_default();
102    format.total.clones += 1;
103    format.total.duplicated_lines += lines;
104    format.total.duplicated_tokens += tokens;
105
106    for source_id in [
107        &clone.duplication_a.source_id,
108        &clone.duplication_b.source_id,
109    ] {
110        let source = format.sources.entry(source_id.clone()).or_default();
111        source.clones += 1;
112        source.duplicated_lines += lines;
113        source.duplicated_tokens += tokens;
114    }
115}
116
117pub(super) fn finalize_percentages(statistics: &mut Statistics) {
118    update_row_percentages(&mut statistics.total);
119    for format in statistics.formats.values_mut() {
120        update_row_percentages(&mut format.total);
121        for source in format.sources.values_mut() {
122            update_row_percentages(source);
123        }
124    }
125}
126
127fn update_row_percentages(row: &mut StatisticRow) {
128    row.percentage = percentage(row.lines, row.duplicated_lines);
129    row.percentage_tokens = percentage(row.tokens, row.duplicated_tokens);
130}
131
132fn percentage(total: usize, duplicated: usize) -> f64 {
133    if total == 0 {
134        0.0
135    } else {
136        ((duplicated as f64 * 10000.0) / total as f64).round() / 100.0
137    }
138}