1use serde::{Deserialize, Serialize};
2use std::collections::HashMap;
3
4#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
5#[serde(rename_all = "snake_case")]
6pub enum TokenKind {
7 Keyword,
8 Identifier,
9 Literal,
10 Operator,
11 Punctuation,
12 Comment,
13 BlockComment,
14 Whitespace,
15 Ignore,
16 Other,
17}
18
19impl TokenKind {
20 pub fn discriminant(&self) -> u8 {
22 match self {
23 Self::Keyword => 1,
24 Self::Identifier => 2,
25 Self::Literal => 3,
26 Self::Operator => 4,
27 Self::Punctuation => 5,
28 Self::Comment => 6,
29 Self::BlockComment => 7,
30 Self::Whitespace => 8,
31 Self::Ignore => 9,
32 Self::Other => 10,
33 }
34 }
35}
36
37#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
38pub struct Location {
39 pub line: u32,
40 pub column: u32,
41 pub offset: u32,
42}
43
44#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
45pub struct Token {
46 pub kind: TokenKind,
47 pub value: String,
48 pub start: Location,
49 pub end: Location,
50}
51
52#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
53pub struct BlameEntry {
54 pub commit_sha: String,
55 pub author: String,
56 pub timestamp: i64,
57}
58
59#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
60pub struct Fragment {
61 pub source_id: String,
62 pub start: Location,
63 pub end: Location,
64 pub range: [u32; 2],
65 pub blame: Option<BlameEntry>,
66}
67
68#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
69pub struct CpdClone {
70 pub format: String,
71 pub fragment_a: Fragment,
72 pub fragment_b: Fragment,
73 pub token_count: u32,
74}
75
76#[derive(Debug, Clone, PartialEq, Eq)]
83pub struct DetectionToken {
84 pub hash: u64,
86 pub start: Location,
87 pub end: Location,
88 pub range: [usize; 2],
90}
91
92#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
94pub struct SourceFile {
95 pub id: String,
96 pub format: String,
97 pub tokens: Vec<Token>,
98}
99
100#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
102#[serde(rename_all = "camelCase")]
103pub struct StatRow {
104 pub lines: u64,
105 pub tokens: u64,
106 pub sources: u64,
107 pub clones: u64,
108 pub duplicated_lines: u64,
109 pub duplicated_tokens: u64,
110 pub percentage: f64,
111 pub percentage_tokens: f64,
112 #[serde(default)]
113 pub new_duplicated_lines: u64,
114 #[serde(default)]
115 pub new_clones: u64,
116}
117
118#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
120#[serde(rename_all = "camelCase")]
121pub struct Statistics {
122 pub total: StatRow,
123 pub formats: HashMap<String, StatRow>,
124 pub detection_date: String,
125}
126
127#[cfg(test)]
128mod tests {
129 use super::*;
130 use serde_json;
131
132 #[test]
133 fn statistics_default_total_is_zero() {
134 let stats = Statistics {
135 total: StatRow {
136 lines: 0,
137 tokens: 0,
138 sources: 0,
139 clones: 0,
140 duplicated_lines: 0,
141 duplicated_tokens: 0,
142 percentage: 0.0,
143 percentage_tokens: 0.0,
144 new_duplicated_lines: 0,
145 new_clones: 0,
146 },
147 formats: HashMap::new(),
148 detection_date: "2026-01-01T00:00:00Z".to_string(),
149 };
150 assert_eq!(stats.total.clones, 0);
151 }
152
153 #[test]
154 fn token_serializes_and_deserializes() {
155 let token = Token {
156 kind: TokenKind::Keyword,
157 value: "function".to_string(),
158 start: Location {
159 line: 1,
160 column: 0,
161 offset: 0,
162 },
163 end: Location {
164 line: 1,
165 column: 8,
166 offset: 8,
167 },
168 };
169 let json = serde_json::to_string(&token).unwrap();
170 let back: Token = serde_json::from_str(&json).unwrap();
171 assert_eq!(token, back);
172 }
173
174 #[test]
175 fn cpd_clone_serializes_with_blame() {
176 let loc = Location {
177 line: 1,
178 column: 0,
179 offset: 0,
180 };
181 let blame = BlameEntry {
182 commit_sha: "abc123".to_string(),
183 author: "Alice".to_string(),
184 timestamp: 1700000000,
185 };
186 let frag = Fragment {
187 source_id: "a.js".to_string(),
188 start: loc.clone(),
189 end: loc.clone(),
190 range: [0, 10],
191 blame: Some(blame),
192 };
193 let clone = CpdClone {
194 format: "javascript".to_string(),
195 fragment_a: frag.clone(),
196 fragment_b: frag,
197 token_count: 50,
198 };
199 let json = serde_json::to_string(&clone).unwrap();
200 assert!(json.contains("abc123"));
201 assert!(json.contains("fragment_a"));
202 }
203
204 #[test]
205 fn fragment_blame_none_serializes_as_null() {
206 let loc = Location {
207 line: 1,
208 column: 0,
209 offset: 0,
210 };
211 let frag = Fragment {
212 source_id: "b.js".to_string(),
213 start: loc.clone(),
214 end: loc.clone(),
215 range: [0, 5],
216 blame: None,
217 };
218 let json = serde_json::to_string(&frag).unwrap();
219 assert!(json.contains("\"blame\":null"));
220 }
221}