Skip to main content

cmdhub_cli/
robustness.rs

1const FAMOUS_COMMANDS: &[&str] = &[
2    "mkdir",
3    "rmdir",
4    "chmod",
5    "chown",
6    "grep",
7    "unzip",
8    "curl",
9    "ping",
10    "clear",
11    "terminal",
12    "history",
13    "systemctl",
14    "journalctl",
15    "uptime",
16    "whoami",
17    "touch",
18    "head",
19    "tail",
20    "kill",
21    "pkill",
22    "alias",
23];
24
25#[allow(clippy::needless_range_loop)]
26fn levenshtein_distance(s1: &str, s2: &str) -> usize {
27    let len1 = s1.chars().count();
28    let len2 = s2.chars().count();
29    if len1 == 0 {
30        return len2;
31    }
32    if len2 == 0 {
33        return len1;
34    }
35
36    let mut dp = vec![vec![0; len2 + 1]; len1 + 1];
37    for i in 0..=len1 {
38        dp[i][0] = i;
39    }
40    for j in 0..=len2 {
41        dp[0][j] = j;
42    }
43
44    for (i, c1) in s1.chars().enumerate() {
45        for (j, c2) in s2.chars().enumerate() {
46            let cost = if c1 == c2 { 0 } else { 1 };
47            dp[i + 1][j + 1] = std::cmp::min(
48                std::cmp::min(dp[i][j + 1] + 1, dp[i + 1][j] + 1),
49                dp[i][j] + cost,
50            );
51        }
52    }
53    dp[len1][len2]
54}
55
56fn correct_typo(token: &str) -> String {
57    // 1. Precise mapping for high-frequency short command typos
58    let exact_mapping = [
59        ("makdir", "mkdir"),
60        ("rmdier", "rmdir"),
61        ("chomd", "chmod"),
62        ("chownn", "chown"),
63        ("gerp", "grep"),
64        ("unzipp", "unzip"),
65        ("curll", "curl"),
66        ("pign", "ping"),
67        ("rerminal", "terminal"),
68        ("psx", "ps"),
69        ("hstry", "history"),
70        ("lss", "ls"),
71        ("pwdd", "pwd"),
72        ("cdd", "cd"),
73        ("tars", "tar"),
74        ("systmctl", "systemctl"),
75        ("journlctl", "journalctl"),
76        ("upime", "uptime"),
77        ("whoamii", "whoami"),
78        ("copiy", "cp"),
79        ("mve", "mv"),
80        ("toutch", "touch"),
81        ("headd", "head"),
82        ("taill", "tail"),
83        ("dff", "df"),
84        ("duu", "du"),
85        ("kll", "kill"),
86        ("pkll", "pkill"),
87        ("aliass", "alias"),
88    ];
89
90    for &(typo, correction) in &exact_mapping {
91        if token == typo {
92            return correction.to_string();
93        }
94    }
95
96    // 2. Generic Levenshtein correction for longer commands (length > 3)
97    if token.len() <= 3 {
98        return token.to_string();
99    }
100
101    let mut best_match = None;
102    let mut min_dist = 2; // Threshold is 1 for most words
103
104    let limit = if token.len() >= 8 { 2 } else { 1 };
105
106    for &cmd in FAMOUS_COMMANDS {
107        let dist = levenshtein_distance(token, cmd);
108        if dist <= limit && dist < min_dist {
109            min_dist = dist;
110            best_match = Some(cmd);
111        }
112    }
113
114    if let Some(cmd) = best_match {
115        cmd.to_string()
116    } else {
117        token.to_string()
118    }
119}
120
121fn translate_pinyin_token(token: &str) -> String {
122    let translation = match token {
123        "shanchu" => "delete",
124        "wenjian" => "file",
125        "chaxun" => "search",
126        "jincheng" => "process",
127        "jiazai" => "mount",
128        "cipan" => "disk",
129        "kaishi" => "start",
130        "jiancha" => "check",
131        "neicun" => "memory",
132        "qingchu" => "clear",
133        "pingmu" => "screen",
134        "liechu" => "list",
135        "mulu" => "directory",
136        "duibi" => "diff",
137        "xiazai" => "download",
138        "chuangjian" => "create",
139        "fuzhi" => "copy",
140        "yidong" => "move",
141        "kong" => "empty",
142        "xiugai" => "modify",
143        "quanxian" => "permission",
144        "suoyouzhi" => "owner",
145        "sousuo" => "search",
146        "ping" => "ping",
147        "wangluo" => "network",
148        "dakai" => "open",
149        "yuancheng" => "remote",
150        "yasuo" => "compress",
151        "jieya" => "decompress",
152        _ => token,
153    };
154    translation.to_string()
155}
156
157pub fn preprocess_robustness(query: &str) -> String {
158    let mut current_word = String::new();
159    let mut result = String::new();
160
161    for c in query.chars() {
162        if c.is_alphanumeric() {
163            current_word.push(c);
164        } else {
165            if !current_word.is_empty() {
166                let lower = current_word.to_lowercase();
167                let pinyin_translation = translate_pinyin_token(&lower);
168                let corrected = if pinyin_translation != lower {
169                    pinyin_translation
170                } else {
171                    correct_typo(&lower)
172                };
173
174                if corrected == lower {
175                    result.push_str(&current_word);
176                } else {
177                    result.push_str(&corrected);
178                }
179                current_word.clear();
180            }
181            result.push(c);
182        }
183    }
184
185    if !current_word.is_empty() {
186        let lower = current_word.to_lowercase();
187        let pinyin_translation = translate_pinyin_token(&lower);
188        let corrected = if pinyin_translation != lower {
189            pinyin_translation
190        } else {
191            correct_typo(&lower)
192        };
193
194        if corrected == lower {
195            result.push_str(&current_word);
196        } else {
197            result.push_str(&corrected);
198        }
199    }
200
201    result
202}
203
204#[cfg(test)]
205mod tests {
206    use super::*;
207
208    #[test]
209    fn test_levenshtein_distance() {
210        assert_eq!(levenshtein_distance("mkdir", "makdir"), 1);
211        assert_eq!(levenshtein_distance("chmod", "chomd"), 2); // Transposition = 2 in standard Levenshtein
212        assert_eq!(levenshtein_distance("journalctl", "journlctl"), 1);
213    }
214
215    #[test]
216    fn test_robustness_preprocessing() {
217        // Pinyin mapping
218        assert_eq!(preprocess_robustness("shanchu wenjian"), "delete file");
219        assert_eq!(
220            preprocess_robustness("kaishi systemd jincheng"),
221            "start systemd process"
222        );
223
224        // Typo correction
225        assert_eq!(
226            preprocess_robustness("makdir test_folder"),
227            "mkdir test_folder"
228        );
229        assert_eq!(preprocess_robustness("cdd .."), "cd ..");
230        assert_eq!(
231            preprocess_robustness("systmctl status nginx"),
232            "systemctl status nginx"
233        );
234    }
235}