1use serde::{Deserialize, Serialize};
2use std::collections::HashMap;
3
4#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
5#[serde(rename_all = "snake_case")]
6pub enum TokenKind {
7 Keyword,
8 Identifier,
9 Literal,
10 Operator,
11 Punctuation,
12 Comment,
13 BlockComment,
14 Whitespace,
15 Ignore,
16 Other,
17}
18
19impl TokenKind {
20 pub fn discriminant(&self) -> u8 {
22 match self {
23 Self::Keyword => 1,
24 Self::Identifier => 2,
25 Self::Literal => 3,
26 Self::Operator => 4,
27 Self::Punctuation => 5,
28 Self::Comment => 6,
29 Self::BlockComment => 7,
30 Self::Whitespace => 8,
31 Self::Ignore => 9,
32 Self::Other => 10,
33 }
34 }
35}
36
37#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
38pub struct Location {
39 pub line: u32,
40 pub column: u32,
41 pub offset: u32,
42}
43
44#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
45pub struct Token {
46 pub kind: TokenKind,
47 pub value: String,
48 pub start: Location,
49 pub end: Location,
50}
51
52#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
53pub struct BlameEntry {
54 pub commit_sha: String,
55 pub author: String,
56 pub timestamp: i64,
57}
58
59#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
60pub struct Fragment {
61 pub source_id: String,
62 pub start: Location,
63 pub end: Location,
64 pub range: [u32; 2],
65 pub blame: Option<BlameEntry>,
66}
67
68#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
69pub struct CpdClone {
70 pub format: String,
71 pub fragment_a: Fragment,
72 pub fragment_b: Fragment,
73 pub token_count: u32,
74}
75
76#[derive(Debug, Clone, PartialEq, Eq)]
83pub struct DetectionToken {
84 pub hash: u64,
86 pub start: Location,
87 pub end: Location,
88 pub range: [usize; 2],
90}
91
92#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
94pub struct SourceFile {
95 pub id: String,
96 pub format: String,
97 pub tokens: Vec<Token>,
98}
99
100#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
102pub struct StatRow {
103 pub lines: u64,
104 pub tokens: u64,
105 pub sources: u64,
106 pub clones: u64,
107 pub duplicated_lines: u64,
108 pub duplicated_tokens: u64,
109 pub percentage: f64,
110 pub percentage_tokens: f64,
111}
112
113#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
115pub struct Statistics {
116 pub total: StatRow,
117 pub formats: HashMap<String, StatRow>,
118 pub detection_date: String,
119}
120
121#[cfg(test)]
122mod tests {
123 use super::*;
124 use serde_json;
125
126 #[test]
127 fn statistics_default_total_is_zero() {
128 let stats = Statistics {
129 total: StatRow {
130 lines: 0,
131 tokens: 0,
132 sources: 0,
133 clones: 0,
134 duplicated_lines: 0,
135 duplicated_tokens: 0,
136 percentage: 0.0,
137 percentage_tokens: 0.0,
138 },
139 formats: HashMap::new(),
140 detection_date: "2026-01-01T00:00:00Z".to_string(),
141 };
142 assert_eq!(stats.total.clones, 0);
143 }
144
145 #[test]
146 fn token_serializes_and_deserializes() {
147 let token = Token {
148 kind: TokenKind::Keyword,
149 value: "function".to_string(),
150 start: Location {
151 line: 1,
152 column: 0,
153 offset: 0,
154 },
155 end: Location {
156 line: 1,
157 column: 8,
158 offset: 8,
159 },
160 };
161 let json = serde_json::to_string(&token).unwrap();
162 let back: Token = serde_json::from_str(&json).unwrap();
163 assert_eq!(token, back);
164 }
165
166 #[test]
167 fn cpd_clone_serializes_with_blame() {
168 let loc = Location {
169 line: 1,
170 column: 0,
171 offset: 0,
172 };
173 let blame = BlameEntry {
174 commit_sha: "abc123".to_string(),
175 author: "Alice".to_string(),
176 timestamp: 1700000000,
177 };
178 let frag = Fragment {
179 source_id: "a.js".to_string(),
180 start: loc.clone(),
181 end: loc.clone(),
182 range: [0, 10],
183 blame: Some(blame),
184 };
185 let clone = CpdClone {
186 format: "javascript".to_string(),
187 fragment_a: frag.clone(),
188 fragment_b: frag,
189 token_count: 50,
190 };
191 let json = serde_json::to_string(&clone).unwrap();
192 assert!(json.contains("abc123"));
193 assert!(json.contains("fragment_a"));
194 }
195
196 #[test]
197 fn fragment_blame_none_serializes_as_null() {
198 let loc = Location {
199 line: 1,
200 column: 0,
201 offset: 0,
202 };
203 let frag = Fragment {
204 source_id: "b.js".to_string(),
205 start: loc.clone(),
206 end: loc.clone(),
207 range: [0, 5],
208 blame: None,
209 };
210 let json = serde_json::to_string(&frag).unwrap();
211 assert!(json.contains("\"blame\":null"));
212 }
213}