1const FAMOUS_COMMANDS: &[&str] = &[
2 "mkdir",
3 "rmdir",
4 "chmod",
5 "chown",
6 "grep",
7 "unzip",
8 "curl",
9 "ping",
10 "clear",
11 "terminal",
12 "history",
13 "systemctl",
14 "journalctl",
15 "uptime",
16 "whoami",
17 "touch",
18 "head",
19 "tail",
20 "kill",
21 "pkill",
22 "alias",
23];
24
25#[allow(clippy::needless_range_loop)]
26fn levenshtein_distance(s1: &str, s2: &str) -> usize {
27 let len1 = s1.chars().count();
28 let len2 = s2.chars().count();
29 if len1 == 0 {
30 return len2;
31 }
32 if len2 == 0 {
33 return len1;
34 }
35
36 let mut dp = vec![vec![0; len2 + 1]; len1 + 1];
37 for i in 0..=len1 {
38 dp[i][0] = i;
39 }
40 for j in 0..=len2 {
41 dp[0][j] = j;
42 }
43
44 for (i, c1) in s1.chars().enumerate() {
45 for (j, c2) in s2.chars().enumerate() {
46 let cost = if c1 == c2 { 0 } else { 1 };
47 dp[i + 1][j + 1] = std::cmp::min(
48 std::cmp::min(dp[i][j + 1] + 1, dp[i + 1][j] + 1),
49 dp[i][j] + cost,
50 );
51 }
52 }
53 dp[len1][len2]
54}
55
56fn correct_typo(token: &str) -> String {
57 let exact_mapping = [
59 ("makdir", "mkdir"),
60 ("rmdier", "rmdir"),
61 ("chomd", "chmod"),
62 ("chownn", "chown"),
63 ("gerp", "grep"),
64 ("unzipp", "unzip"),
65 ("curll", "curl"),
66 ("pign", "ping"),
67 ("rerminal", "terminal"),
68 ("psx", "ps"),
69 ("hstry", "history"),
70 ("lss", "ls"),
71 ("pwdd", "pwd"),
72 ("cdd", "cd"),
73 ("tars", "tar"),
74 ("systmctl", "systemctl"),
75 ("journlctl", "journalctl"),
76 ("upime", "uptime"),
77 ("whoamii", "whoami"),
78 ("copiy", "cp"),
79 ("mve", "mv"),
80 ("toutch", "touch"),
81 ("headd", "head"),
82 ("taill", "tail"),
83 ("dff", "df"),
84 ("duu", "du"),
85 ("kll", "kill"),
86 ("pkll", "pkill"),
87 ("aliass", "alias"),
88 ];
89
90 for &(typo, correction) in &exact_mapping {
91 if token == typo {
92 return correction.to_string();
93 }
94 }
95
96 if token.len() <= 3 {
98 return token.to_string();
99 }
100
101 let mut best_match = None;
102 let mut min_dist = 2; let limit = if token.len() >= 8 { 2 } else { 1 };
105
106 for &cmd in FAMOUS_COMMANDS {
107 let dist = levenshtein_distance(token, cmd);
108 if dist <= limit && dist < min_dist {
109 min_dist = dist;
110 best_match = Some(cmd);
111 }
112 }
113
114 if let Some(cmd) = best_match {
115 cmd.to_string()
116 } else {
117 token.to_string()
118 }
119}
120
121fn translate_pinyin_token(token: &str) -> String {
122 let translation = match token {
123 "shanchu" => "delete",
124 "wenjian" => "file",
125 "chaxun" => "search",
126 "jincheng" => "process",
127 "jiazai" => "mount",
128 "cipan" => "disk",
129 "kaishi" => "start",
130 "jiancha" => "check",
131 "neicun" => "memory",
132 "qingchu" => "clear",
133 "pingmu" => "screen",
134 "liechu" => "list",
135 "mulu" => "directory",
136 "duibi" => "diff",
137 "xiazai" => "download",
138 "chuangjian" => "create",
139 "fuzhi" => "copy",
140 "yidong" => "move",
141 "kong" => "empty",
142 "xiugai" => "modify",
143 "quanxian" => "permission",
144 "suoyouzhi" => "owner",
145 "sousuo" => "search",
146 "ping" => "ping",
147 "wangluo" => "network",
148 "dakai" => "open",
149 "yuancheng" => "remote",
150 "yasuo" => "compress",
151 "jieya" => "decompress",
152 _ => token,
153 };
154 translation.to_string()
155}
156
157pub fn preprocess_robustness(query: &str) -> String {
158 let mut current_word = String::new();
159 let mut result = String::new();
160
161 for c in query.chars() {
162 if c.is_alphanumeric() {
163 current_word.push(c);
164 } else {
165 if !current_word.is_empty() {
166 let lower = current_word.to_lowercase();
167 let pinyin_translation = translate_pinyin_token(&lower);
168 let corrected = if pinyin_translation != lower {
169 pinyin_translation
170 } else {
171 correct_typo(&lower)
172 };
173
174 if corrected == lower {
175 result.push_str(¤t_word);
176 } else {
177 result.push_str(&corrected);
178 }
179 current_word.clear();
180 }
181 result.push(c);
182 }
183 }
184
185 if !current_word.is_empty() {
186 let lower = current_word.to_lowercase();
187 let pinyin_translation = translate_pinyin_token(&lower);
188 let corrected = if pinyin_translation != lower {
189 pinyin_translation
190 } else {
191 correct_typo(&lower)
192 };
193
194 if corrected == lower {
195 result.push_str(¤t_word);
196 } else {
197 result.push_str(&corrected);
198 }
199 }
200
201 result
202}
203
204#[cfg(test)]
205mod tests {
206 use super::*;
207
208 #[test]
209 fn test_levenshtein_distance() {
210 assert_eq!(levenshtein_distance("mkdir", "makdir"), 1);
211 assert_eq!(levenshtein_distance("chmod", "chomd"), 2); assert_eq!(levenshtein_distance("journalctl", "journlctl"), 1);
213 }
214
215 #[test]
216 fn test_robustness_preprocessing() {
217 assert_eq!(preprocess_robustness("shanchu wenjian"), "delete file");
219 assert_eq!(
220 preprocess_robustness("kaishi systemd jincheng"),
221 "start systemd process"
222 );
223
224 assert_eq!(
226 preprocess_robustness("makdir test_folder"),
227 "mkdir test_folder"
228 );
229 assert_eq!(preprocess_robustness("cdd .."), "cd ..");
230 assert_eq!(
231 preprocess_robustness("systmctl status nginx"),
232 "systemctl status nginx"
233 );
234 }
235}