1use serde::Deserialize;
2use anyhow::Result;
3use std::{fs::File, path::Path, io::BufReader};
4
5#[derive(Debug, Clone, Deserialize)]
6pub struct ConfigRecord {
7 pub file_name: String,
8 pub mime_label: String,
9 pub config_type: String,
10}
11
12#[derive(Debug, Clone)]
13pub struct ClassifiedResult {
14 pub file_name: String,
15 pub file_path: String,
16 pub is_ci_cd: bool,
17}
18
19pub struct ConfigClassifier {
20 records: Vec<ConfigRecord>,
21}
22
23impl ConfigClassifier {
24 pub fn from_csv<P: AsRef<Path>>(path: P) -> Result<Self> {
25 let file = File::open(path)?;
26 let mut rdr = csv::Reader::from_reader(BufReader::new(file));
27 let mut records = Vec::new();
28
29 for result in rdr.deserialize() {
30 let record: ConfigRecord = result?;
31 records.push(record);
32 }
33
34 Ok(Self { records })
35 }
36
37 pub fn from_csv_str(csv_data: &str) -> Result<Self> {
38 let mut rdr = csv::Reader::from_reader(csv_data.as_bytes());
39 let mut records = Vec::new();
40
41 for result in rdr.deserialize() {
42 let record: ConfigRecord = result?;
43 records.push(record);
44 }
45
46 Ok(Self { records })
47 }
48
49 pub fn classify<P: AsRef<Path>>(
50 &self,
51 file_path: P,
52 mime_label: &str,
53 ) -> Option<ClassifiedResult> {
54 let file_path = file_path.as_ref();
55 let file_name = file_path.file_name()?.to_string_lossy().to_string();
56
57 self.records
58 .iter()
59 .filter(|r| r.mime_label == mime_label)
60 .min_by_key(|r| levenshtein(&r.file_name, &file_name))
61 .map(|r| {
62 let normalized_path = normalize_path(file_path);
63 let is_ci_cd = r.config_type != "non_config";
64 ClassifiedResult {
65 file_name: r.file_name.clone(),
66 file_path: normalized_path,
67 is_ci_cd,
68 }
69 })
70 }
71}
72
73fn normalize_path(path: &Path) -> String {
74 let cwd = std::env::current_dir().unwrap_or_default();
75 match path.strip_prefix(&cwd) {
76 Ok(p) => {
77 let path_str = p.to_string_lossy();
78 if path_str.contains('/') {
79 format!("./{}", path_str)
80 } else {
81 format!("./{}", path_str)
82 }
83 }
84 Err(_) => path.to_string_lossy().to_string(),
85 }
86}
87
88fn levenshtein(a: &str, b: &str) -> usize {
89 let mut costs = (0..=b.len()).collect::<Vec<_>>();
90 for (i, ca) in a.chars().enumerate() {
91 let mut last_val = i;
92 costs[0] = i + 1;
93 for (j, cb) in b.chars().enumerate() {
94 let new_val = if ca == cb {
95 last_val
96 } else {
97 1 + *[last_val, costs[j], costs[j + 1]].iter().min().unwrap()
98 };
99 last_val = costs[j + 1];
100 costs[j + 1] = new_val;
101 }
102 }
103 costs[b.len()]
104}