confignet/
lib.rs

1use serde::Deserialize;
2use anyhow::Result;
3use std::{fs::File, path::Path, io::BufReader};
4
5#[derive(Debug, Clone, Deserialize)]
6pub struct ConfigRecord {
7    pub file_name: String,
8    pub mime_label: String,
9    pub config_type: String,
10}
11
12#[derive(Debug, Clone)]
13pub struct ClassifiedResult {
14    pub file_name: String,
15    pub file_path: String,
16    pub is_ci_cd: bool,
17}
18
19pub struct ConfigClassifier {
20    records: Vec<ConfigRecord>,
21}
22
23impl ConfigClassifier {
24    pub fn from_csv<P: AsRef<Path>>(path: P) -> Result<Self> {
25        let file = File::open(path)?;
26        let mut rdr = csv::Reader::from_reader(BufReader::new(file));
27        let mut records = Vec::new();
28
29        for result in rdr.deserialize() {
30            let record: ConfigRecord = result?;
31            records.push(record);
32        }
33
34        Ok(Self { records })
35    }
36
37    pub fn from_csv_str(csv_data: &str) -> Result<Self> {
38        let mut rdr = csv::Reader::from_reader(csv_data.as_bytes());
39        let mut records = Vec::new();
40
41        for result in rdr.deserialize() {
42            let record: ConfigRecord = result?;
43            records.push(record);
44        }
45
46        Ok(Self { records })
47    }
48
49    pub fn classify<P: AsRef<Path>>(
50        &self,
51        file_path: P,
52        mime_label: &str,
53    ) -> Option<ClassifiedResult> {
54        let file_path = file_path.as_ref();
55        let file_name = file_path.file_name()?.to_string_lossy().to_string();
56
57        self.records
58            .iter()
59            .filter(|r| r.mime_label == mime_label)
60            .min_by_key(|r| levenshtein(&r.file_name, &file_name))
61            .map(|r| {
62                let normalized_path = normalize_path(file_path);
63                let is_ci_cd = r.config_type != "non_config";
64                ClassifiedResult {
65                    file_name: r.file_name.clone(),
66                    file_path: normalized_path,
67                    is_ci_cd,
68                }
69            })
70    }
71}
72
73fn normalize_path(path: &Path) -> String {
74    let cwd = std::env::current_dir().unwrap_or_default();
75    match path.strip_prefix(&cwd) {
76        Ok(p) => {
77            let path_str = p.to_string_lossy();
78            if path_str.contains('/') {
79                format!("./{}", path_str)
80            } else {
81                format!("./{}", path_str)
82            }
83        }
84        Err(_) => path.to_string_lossy().to_string(),
85    }
86}
87
88fn levenshtein(a: &str, b: &str) -> usize {
89    let mut costs = (0..=b.len()).collect::<Vec<_>>();
90    for (i, ca) in a.chars().enumerate() {
91        let mut last_val = i;
92        costs[0] = i + 1;
93        for (j, cb) in b.chars().enumerate() {
94            let new_val = if ca == cb {
95                last_val
96            } else {
97                1 + *[last_val, costs[j], costs[j + 1]].iter().min().unwrap()
98            };
99            last_val = costs[j + 1];
100            costs[j + 1] = new_val;
101        }
102    }
103    costs[b.len()]
104}