1use serde::Deserialize;
2use anyhow::Result;
3use std::{fs::File, path::Path, io::BufReader};
4
5#[derive(Debug, Clone, Deserialize)]
6pub struct ConfigRecord {
7 pub file_name: String,
8 pub mime_label: String,
9 pub config_type: String,
10}
11
12#[derive(Debug, Clone)]
13pub struct ClassifiedResult {
14 pub file_name: String,
15 pub file_path: String,
16 pub is_ci_cd: bool,
17}
18
19pub struct ConfigClassifier {
20 records: Vec<ConfigRecord>,
21}
22
23impl ConfigClassifier {
24 pub fn from_csv<P: AsRef<Path>>(path: P) -> Result<Self> {
25 let file = File::open(path)?;
26 let mut rdr = csv::Reader::from_reader(BufReader::new(file));
27 let mut records = Vec::new();
28
29 for result in rdr.deserialize() {
30 let record: ConfigRecord = result?;
31 records.push(record);
32 }
33
34 Ok(Self { records })
35 }
36
37 pub fn classify<P: AsRef<Path>>(
38 &self,
39 file_path: P,
40 mime_label: &str,
41 ) -> Option<ClassifiedResult> {
42 let file_path = file_path.as_ref();
43 let file_name = file_path.file_name()?.to_string_lossy().to_string();
44
45 self.records
46 .iter()
47 .filter(|r| r.mime_label == mime_label)
48 .min_by_key(|r| levenshtein(&r.file_name, &file_name))
49 .map(|r| {
50 let normalized_path = normalize_path(file_path);
51 let is_ci_cd = r.config_type != "non_config";
52 ClassifiedResult {
53 file_name: r.file_name.clone(),
54 file_path: normalized_path,
55 is_ci_cd,
56 }
57 })
58 }
59}
60
61fn normalize_path(path: &Path) -> String {
62 let cwd = std::env::current_dir().unwrap_or_default();
63 match path.strip_prefix(&cwd) {
64 Ok(p) => {
65 let path_str = p.to_string_lossy();
66 if path_str.contains('/') {
67 format!("./{}", path_str)
68 } else {
69 format!("./{}", path_str)
70 }
71 }
72 Err(_) => path.to_string_lossy().to_string(),
73 }
74}
75
76fn levenshtein(a: &str, b: &str) -> usize {
77 let mut costs = (0..=b.len()).collect::<Vec<_>>();
78 for (i, ca) in a.chars().enumerate() {
79 let mut last_val = i;
80 costs[0] = i + 1;
81 for (j, cb) in b.chars().enumerate() {
82 let new_val = if ca == cb {
83 last_val
84 } else {
85 1 + *[last_val, costs[j], costs[j + 1]].iter().min().unwrap()
86 };
87 last_val = costs[j + 1];
88 costs[j + 1] = new_val;
89 }
90 }
91 costs[b.len()]
92}