confignet/
lib.rs

1use serde::Deserialize;
2use anyhow::Result;
3use std::{fs::File, path::Path, io::BufReader};
4
5#[derive(Debug, Clone, Deserialize)]
6pub struct ConfigRecord {
7    pub file_name: String,
8    pub mime_label: String,
9    pub config_type: String,
10}
11
12#[derive(Debug, Clone)]
13pub struct ClassifiedResult {
14    pub file_name: String,
15    pub file_path: String,
16    pub is_ci_cd: bool,
17}
18
19pub struct ConfigClassifier {
20    records: Vec<ConfigRecord>,
21}
22
23impl ConfigClassifier {
24    pub fn from_csv<P: AsRef<Path>>(path: P) -> Result<Self> {
25        let file = File::open(path)?;
26        let mut rdr = csv::Reader::from_reader(BufReader::new(file));
27        let mut records = Vec::new();
28
29        for result in rdr.deserialize() {
30            let record: ConfigRecord = result?;
31            records.push(record);
32        }
33
34        Ok(Self { records })
35    }
36
37    pub fn classify<P: AsRef<Path>>(
38        &self,
39        file_path: P,
40        mime_label: &str,
41    ) -> Option<ClassifiedResult> {
42        let file_path = file_path.as_ref();
43        let file_name = file_path.file_name()?.to_string_lossy().to_string();
44
45        self.records
46            .iter()
47            .filter(|r| r.mime_label == mime_label)
48            .min_by_key(|r| levenshtein(&r.file_name, &file_name))
49            .map(|r| {
50                let normalized_path = normalize_path(file_path);
51                let is_ci_cd = r.config_type != "non_config";
52                ClassifiedResult {
53                    file_name: r.file_name.clone(),
54                    file_path: normalized_path,
55                    is_ci_cd,
56                }
57            })
58    }
59}
60
61fn normalize_path(path: &Path) -> String {
62    let cwd = std::env::current_dir().unwrap_or_default();
63    match path.strip_prefix(&cwd) {
64        Ok(p) => {
65            let path_str = p.to_string_lossy();
66            if path_str.contains('/') {
67                format!("./{}", path_str)
68            } else {
69                format!("./{}", path_str)
70            }
71        }
72        Err(_) => path.to_string_lossy().to_string(),
73    }
74}
75
76fn levenshtein(a: &str, b: &str) -> usize {
77    let mut costs = (0..=b.len()).collect::<Vec<_>>();
78    for (i, ca) in a.chars().enumerate() {
79        let mut last_val = i;
80        costs[0] = i + 1;
81        for (j, cb) in b.chars().enumerate() {
82            let new_val = if ca == cb {
83                last_val
84            } else {
85                1 + *[last_val, costs[j], costs[j + 1]].iter().min().unwrap()
86            };
87            last_val = costs[j + 1];
88            costs[j + 1] = new_val;
89        }
90    }
91    costs[b.len()]
92}