1use std::collections::{BTreeMap, HashMap};
2use std::sync::Arc;
3
4use serde::Serialize;
5
6use crate::tokenizer::Location;
7
8pub type BlamedLines = BTreeMap<String, BlamedLine>;
10
11#[derive(Clone, Debug, Serialize)]
13pub struct BlamedLine {
14 pub rev: String,
16 pub author: String,
18 pub date: String,
20 pub line: String,
22}
23
24#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
25pub(super) struct SourceId(pub(super) usize);
26
27#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
28pub(super) struct FormatId(pub(super) usize);
29
30#[derive(Clone, Debug, Serialize)]
32pub struct Fragment {
33 #[serde(rename = "sourceId")]
34 pub source_id: String,
36 pub start: Location,
38 pub end: Location,
40 pub range: [usize; 2],
42 #[serde(skip_serializing_if = "Option::is_none")]
43 pub blame: Option<BlamedLines>,
45}
46
47#[derive(Clone, Debug, Serialize)]
49pub struct CloneMatch {
50 pub format: String,
52 #[serde(rename = "duplicationA")]
53 pub duplication_a: Fragment,
55 #[serde(rename = "duplicationB")]
56 pub duplication_b: Fragment,
58 pub tokens: usize,
60}
61
62#[derive(Clone, Debug)]
64pub struct SkippedClone {
65 pub clone: CloneMatch,
67 pub message: Vec<String>,
69}
70
71#[derive(Clone, Debug, Default, Serialize)]
73pub struct StatisticRow {
74 pub lines: usize,
76 pub tokens: usize,
78 pub sources: usize,
80 pub clones: usize,
82 #[serde(rename = "duplicatedLines")]
83 pub duplicated_lines: usize,
85 #[serde(rename = "duplicatedTokens")]
86 pub duplicated_tokens: usize,
88 pub percentage: f64,
90 #[serde(rename = "percentageTokens")]
91 pub percentage_tokens: f64,
93 #[serde(rename = "newDuplicatedLines")]
94 pub new_duplicated_lines: usize,
96 #[serde(rename = "newClones")]
97 pub new_clones: usize,
99}
100
101#[derive(Clone, Debug, Default, Serialize)]
103pub struct FormatStatistic {
104 pub sources: HashMap<String, StatisticRow>,
106 pub total: StatisticRow,
108}
109
110#[derive(Clone, Debug, Default, Serialize)]
112pub struct Statistics {
113 pub total: StatisticRow,
115 pub formats: HashMap<String, FormatStatistic>,
117}
118
119#[derive(Clone, Debug, Serialize)]
121pub struct SourceSummary {
122 pub path: String,
124 pub format: String,
126 pub lines: usize,
128 pub tokens: usize,
130}
131
132#[derive(Clone, Debug, Serialize)]
134pub struct DetectionResult {
135 pub clones: Vec<CloneMatch>,
137 #[serde(skip)]
138 pub skipped_clones: Vec<SkippedClone>,
140 pub statistics: Statistics,
142 pub sources: Vec<SourceSummary>,
144 #[serde(skip)]
145 pub source_contents: HashMap<String, String>,
148}
149
150#[derive(Clone, Debug)]
151pub(super) struct TokenSpan {
152 pub(super) start: Location,
153 pub(super) end: Location,
154 pub(super) range: [usize; 2],
155}
156
157#[derive(Clone, Debug)]
158pub(super) struct SourceMeta {
159 pub(super) source_id: String,
160 pub(super) format: String,
161 pub(super) lines: usize,
162 pub(super) tokens: usize,
163}
164
165#[derive(Clone, Debug)]
166pub(super) struct TokenStream {
167 pub(super) source_id: SourceId,
168 pub(super) format_id: FormatId,
169 pub(super) hashes: Vec<u64>,
170 pub(super) spans: Vec<TokenSpan>,
171}
172
173#[derive(Clone, Copy, Debug)]
174pub(super) struct Occurrence {
175 pub(super) source_id: SourceId,
176 pub(super) token_start: usize,
177}
178
179#[derive(Clone, Debug)]
180pub(super) struct PreparedSource {
181 pub(super) meta: SourceMeta,
182 pub(super) stream: TokenStream,
183}
184
185#[derive(Clone, Debug)]
186pub(crate) struct PreparedSourceDraft {
187 pub(super) meta: SourceMeta,
188 pub(super) content: Arc<str>,
189 pub(super) hashes: Arc<Vec<u64>>,
190 pub(super) spans: Arc<Vec<TokenSpan>>,
191}