1use crate::citl::{ErrorCodeClass, SuggestionApplicability};
9use crate::perf::{BatchProcessor, PerfStats};
10use anyhow::Result;
11use chrono::{Datelike, Timelike};
12use serde::{Deserialize, Serialize};
13use std::time::Instant;
14
15#[derive(Debug, Clone, Serialize, Deserialize)]
19pub struct CommitFeatures {
20 pub defect_category: u8, pub files_changed: f32,
25 pub lines_added: f32,
26 pub lines_deleted: f32,
27 pub complexity_delta: f32, pub timestamp: f64, pub hour_of_day: u8, pub day_of_week: u8, #[serde(default)]
37 pub error_code_class: u8,
38 #[serde(default)]
40 pub has_suggestion: u8,
41 #[serde(default)]
43 pub suggestion_applicability: u8,
44 #[serde(default)]
46 pub clippy_lint_count: u8,
47 #[serde(default)]
49 pub span_line_delta: f32,
50 #[serde(default)]
52 pub diagnostic_confidence: f32,
53}
54
55impl CommitFeatures {
56 pub fn to_vector(&self) -> Vec<f32> {
61 vec![
62 self.defect_category as f32,
63 self.files_changed,
64 self.lines_added,
65 self.lines_deleted,
66 self.complexity_delta,
67 self.timestamp as f32,
68 self.hour_of_day as f32,
69 self.day_of_week as f32,
70 self.error_code_class as f32,
72 self.has_suggestion as f32,
73 self.suggestion_applicability as f32,
74 self.clippy_lint_count as f32,
75 self.span_line_delta,
76 self.diagnostic_confidence,
77 ]
78 }
79
80 pub const DIMENSION: usize = 14;
83}
84
85impl Default for CommitFeatures {
86 fn default() -> Self {
87 Self {
88 defect_category: 0,
89 files_changed: 0.0,
90 lines_added: 0.0,
91 lines_deleted: 0.0,
92 complexity_delta: 0.0,
93 timestamp: 0.0,
94 hour_of_day: 0,
95 day_of_week: 0,
96 error_code_class: ErrorCodeClass::Other.as_u8(),
97 has_suggestion: 0,
98 suggestion_applicability: SuggestionApplicability::None.as_u8(),
99 clippy_lint_count: 0,
100 span_line_delta: 0.0,
101 diagnostic_confidence: 0.0,
102 }
103 }
104}
105
106pub struct FeatureExtractor;
108
109impl FeatureExtractor {
110 pub fn new() -> Self {
111 Self
112 }
113
114 pub fn extract(
118 &self,
119 category: u8,
120 files_changed: usize,
121 lines_added: usize,
122 lines_deleted: usize,
123 timestamp: i64,
124 ) -> Result<CommitFeatures> {
125 self.extract_with_citl(
126 category,
127 files_changed,
128 lines_added,
129 lines_deleted,
130 timestamp,
131 ErrorCodeClass::Other,
132 false,
133 SuggestionApplicability::None,
134 0,
135 0.0,
136 0.0,
137 )
138 }
139
140 #[allow(clippy::too_many_arguments)]
142 pub fn extract_with_citl(
143 &self,
144 category: u8,
145 files_changed: usize,
146 lines_added: usize,
147 lines_deleted: usize,
148 timestamp: i64,
149 error_code_class: ErrorCodeClass,
150 has_suggestion: bool,
151 suggestion_applicability: SuggestionApplicability,
152 clippy_lint_count: u8,
153 span_line_delta: f32,
154 diagnostic_confidence: f32,
155 ) -> Result<CommitFeatures> {
156 let datetime = chrono::DateTime::from_timestamp(timestamp, 0)
158 .ok_or_else(|| anyhow::anyhow!("Invalid timestamp"))?;
159
160 let hour_of_day = datetime.hour() as u8;
161 let day_of_week = datetime.weekday().num_days_from_monday() as u8;
162
163 Ok(CommitFeatures {
164 defect_category: category,
165 files_changed: files_changed as f32,
166 lines_added: lines_added as f32,
167 lines_deleted: lines_deleted as f32,
168 complexity_delta: 0.0, timestamp: timestamp as f64,
170 hour_of_day,
171 day_of_week,
172 error_code_class: error_code_class.as_u8(),
174 has_suggestion: u8::from(has_suggestion),
175 suggestion_applicability: suggestion_applicability.as_u8(),
176 clippy_lint_count,
177 span_line_delta,
178 diagnostic_confidence,
179 })
180 }
181}
182
183impl Default for FeatureExtractor {
184 fn default() -> Self {
185 Self::new()
186 }
187}
188
189#[derive(Debug, Clone)]
191pub struct FeatureInput {
192 pub category: u8,
193 pub files_changed: usize,
194 pub lines_added: usize,
195 pub lines_deleted: usize,
196 pub timestamp: i64,
197}
198
199pub struct BatchFeatureExtractor {
203 extractor: FeatureExtractor,
204 batch_processor: BatchProcessor<FeatureInput>,
205 stats: PerfStats,
206}
207
208impl BatchFeatureExtractor {
209 pub fn new() -> Self {
211 Self::with_batch_size(1000)
212 }
213
214 pub fn with_batch_size(batch_size: usize) -> Self {
216 Self {
217 extractor: FeatureExtractor::new(),
218 batch_processor: BatchProcessor::new(batch_size),
219 stats: PerfStats::new(),
220 }
221 }
222
223 pub fn add(&mut self, input: FeatureInput) -> Option<Vec<CommitFeatures>> {
225 self.batch_processor
226 .add(input)
227 .map(|batch| self.extract_batch(batch))
228 }
229
230 pub fn flush(&mut self) -> Vec<CommitFeatures> {
232 let batch = self.batch_processor.flush();
233 if batch.is_empty() {
234 Vec::new()
235 } else {
236 self.extract_batch(batch)
237 }
238 }
239
240 fn extract_batch(&mut self, inputs: Vec<FeatureInput>) -> Vec<CommitFeatures> {
242 let start = Instant::now();
243
244 let features: Vec<CommitFeatures> = inputs
245 .into_iter()
246 .filter_map(|input| {
247 self.extractor
248 .extract(
249 input.category,
250 input.files_changed,
251 input.lines_added,
252 input.lines_deleted,
253 input.timestamp,
254 )
255 .ok()
256 })
257 .collect();
258
259 let duration_ns = start.elapsed().as_nanos() as u64;
260 self.stats.record(duration_ns);
261
262 features
263 }
264
265 pub fn extract_all(&mut self, inputs: Vec<FeatureInput>) -> Vec<CommitFeatures> {
267 let start = Instant::now();
268
269 let features: Vec<CommitFeatures> = inputs
270 .into_iter()
271 .filter_map(|input| {
272 self.extractor
273 .extract(
274 input.category,
275 input.files_changed,
276 input.lines_added,
277 input.lines_deleted,
278 input.timestamp,
279 )
280 .ok()
281 })
282 .collect();
283
284 let duration_ns = start.elapsed().as_nanos() as u64;
285 self.stats.record(duration_ns);
286
287 features
288 }
289
290 pub fn stats(&self) -> &PerfStats {
292 &self.stats
293 }
294
295 pub fn pending(&self) -> usize {
297 self.batch_processor.len()
298 }
299}
300
301impl Default for BatchFeatureExtractor {
302 fn default() -> Self {
303 Self::new()
304 }
305}
306
307#[cfg(test)]
308mod tests {
309 use super::*;
310
311 #[test]
312 fn test_feature_extractor_creation() {
313 let _extractor = FeatureExtractor::new();
314 }
316
317 #[test]
318 fn test_extract_basic_features() {
319 let extractor = FeatureExtractor::new();
320
321 let features = extractor
323 .extract(
324 2, 3, 100, 50, 1700000000, )
326 .unwrap();
327
328 assert_eq!(features.defect_category, 2);
329 assert_eq!(features.files_changed, 3.0);
330 assert_eq!(features.lines_added, 100.0);
331 assert_eq!(features.lines_deleted, 50.0);
332 }
333
334 #[test]
335 fn test_to_vector_dimension() {
336 let features = CommitFeatures {
337 defect_category: 1,
338 files_changed: 2.0,
339 lines_added: 10.0,
340 lines_deleted: 5.0,
341 complexity_delta: 0.0,
342 timestamp: 1700000000.0,
343 hour_of_day: 14,
344 day_of_week: 2,
345 error_code_class: 0,
347 has_suggestion: 1,
348 suggestion_applicability: 2,
349 clippy_lint_count: 3,
350 span_line_delta: 4.5,
351 diagnostic_confidence: 0.95,
352 };
353
354 let vec = features.to_vector();
355 assert_eq!(vec.len(), CommitFeatures::DIMENSION);
356 assert_eq!(vec.len(), 14); assert_eq!(vec[0], 1.0); assert_eq!(vec[1], 2.0); assert_eq!(vec[2], 10.0); assert_eq!(vec[8], 0.0); assert_eq!(vec[9], 1.0); assert_eq!(vec[10], 2.0); assert_eq!(vec[11], 3.0); assert!((vec[12] - 4.5).abs() < 0.001); assert!((vec[13] - 0.95).abs() < 0.001); }
368
369 #[test]
370 fn test_temporal_features() {
371 let extractor = FeatureExtractor::new();
372
373 let features = extractor.extract(0, 1, 1, 1, 1699971000).unwrap();
375
376 assert_eq!(features.hour_of_day, 14);
377 assert_eq!(features.day_of_week, 1); }
379
380 #[test]
381 fn test_invalid_timestamp() {
382 let extractor = FeatureExtractor::new();
383
384 let result = extractor.extract(0, 1, 1, 1, i64::MAX);
386 assert!(result.is_err());
387 }
388
389 #[test]
390 fn test_batch_extractor_creation() {
391 let extractor = BatchFeatureExtractor::new();
392 assert_eq!(extractor.pending(), 0);
393 }
394
395 #[test]
396 fn test_batch_extractor_add() {
397 let mut extractor = BatchFeatureExtractor::with_batch_size(3);
398
399 let input1 = FeatureInput {
400 category: 0,
401 files_changed: 1,
402 lines_added: 10,
403 lines_deleted: 5,
404 timestamp: 1700000000,
405 };
406
407 assert!(extractor.add(input1.clone()).is_none());
409 assert!(extractor.add(input1.clone()).is_none());
410 assert_eq!(extractor.pending(), 2);
411
412 let batch = extractor.add(input1);
414 assert!(batch.is_some());
415 assert_eq!(batch.unwrap().len(), 3);
416 assert_eq!(extractor.pending(), 0);
417 }
418
419 #[test]
420 fn test_batch_extractor_flush() {
421 let mut extractor = BatchFeatureExtractor::with_batch_size(10);
422
423 let input = FeatureInput {
424 category: 1,
425 files_changed: 2,
426 lines_added: 20,
427 lines_deleted: 10,
428 timestamp: 1700000000,
429 };
430
431 extractor.add(input.clone());
432 extractor.add(input.clone());
433 extractor.add(input);
434
435 let remaining = extractor.flush();
436 assert_eq!(remaining.len(), 3);
437 assert_eq!(extractor.pending(), 0);
438 }
439
440 #[test]
441 fn test_batch_extractor_extract_all() {
442 let mut extractor = BatchFeatureExtractor::new();
443
444 let inputs: Vec<FeatureInput> = (0..5)
445 .map(|i| FeatureInput {
446 category: i as u8,
447 files_changed: i + 1,
448 lines_added: (i + 1) * 10,
449 lines_deleted: (i + 1) * 5,
450 timestamp: 1700000000 + i as i64,
451 })
452 .collect();
453
454 let features = extractor.extract_all(inputs);
455 assert_eq!(features.len(), 5);
456 assert_eq!(features[0].defect_category, 0);
457 assert_eq!(features[4].defect_category, 4);
458 }
459
460 #[test]
461 fn test_batch_extractor_stats() {
462 let mut extractor = BatchFeatureExtractor::new();
463
464 let inputs: Vec<FeatureInput> = (0..100)
465 .map(|i| FeatureInput {
466 category: (i % 10) as u8,
467 files_changed: i + 1,
468 lines_added: (i + 1) * 10,
469 lines_deleted: (i + 1) * 5,
470 timestamp: 1700000000 + i as i64,
471 })
472 .collect();
473
474 extractor.extract_all(inputs);
475
476 let stats = extractor.stats();
477 assert_eq!(stats.operation_count, 1);
478 assert!(stats.avg_ns() > 0);
479 }
480}