1use super::{calculate_dir_size, get_mtime, CleanableItem, SafetyLevel};
12use crate::error::Result;
13use std::path::PathBuf;
14
15pub struct MlCleaner {
17 home: PathBuf,
18}
19
20impl MlCleaner {
21 pub fn new() -> Option<Self> {
23 let home = dirs::home_dir()?;
24 Some(Self { home })
25 }
26
27 pub fn detect(&self) -> Result<Vec<CleanableItem>> {
29 let mut items = Vec::new();
30
31 items.extend(self.detect_huggingface()?);
33
34 items.extend(self.detect_ollama()?);
36
37 items.extend(self.detect_pytorch()?);
39
40 items.extend(self.detect_keras()?);
42
43 items.extend(self.detect_tensorflow()?);
45
46 items.extend(self.detect_jupyter()?);
48
49 items.extend(self.detect_lmstudio()?);
51
52 items.extend(self.detect_gpt4all()?);
54
55 Ok(items)
56 }
57
58 fn detect_huggingface(&self) -> Result<Vec<CleanableItem>> {
60 let hf_paths = [
61 (".cache/huggingface/hub", "HF Models"),
62 (".cache/huggingface/datasets", "HF Datasets"),
63 (".cache/huggingface/transformers", "HF Transformers"),
64 ];
65
66 let mut items = Vec::new();
67
68 for (rel_path, name) in hf_paths {
69 let path = self.home.join(rel_path);
70 if !path.exists() {
71 continue;
72 }
73
74 if let Ok(entries) = std::fs::read_dir(&path) {
76 for entry in entries.filter_map(|e| e.ok()) {
77 let entry_path = entry.path();
78 if !entry_path.is_dir() {
79 continue;
80 }
81
82 let entry_name = entry_path.file_name()
83 .map(|n| n.to_string_lossy().to_string())
84 .unwrap_or_default();
85
86 if entry_name.starts_with('.') || entry_name == "version.txt" {
88 continue;
89 }
90
91 let (size, file_count) = calculate_dir_size(&entry_path)?;
92 if size < 10_000_000 {
93 continue;
95 }
96
97 let display_name = entry_name
99 .replace("models--", "")
100 .replace("datasets--", "")
101 .replace("--", "/");
102
103 items.push(CleanableItem {
104 name: format!("{}: {}", name, display_name),
105 category: "ML/AI".to_string(),
106 subcategory: "Hugging Face".to_string(),
107 icon: "🤗",
108 path: entry_path,
109 size,
110 file_count: Some(file_count),
111 last_modified: get_mtime(&entry.path()),
112 description: "Downloaded ML model or dataset. Can be re-downloaded.",
113 safe_to_delete: SafetyLevel::SafeWithCost,
114 clean_command: None,
115 });
116 }
117 }
118 }
119
120 Ok(items)
121 }
122
123 fn detect_ollama(&self) -> Result<Vec<CleanableItem>> {
125 let ollama_path = self.home.join(".ollama/models");
126
127 if !ollama_path.exists() {
128 return Ok(vec![]);
129 }
130
131 let mut items = Vec::new();
132
133 let blobs_path = ollama_path.join("blobs");
135 let manifests_path = ollama_path.join("manifests");
136
137 if manifests_path.exists() {
139 self.scan_ollama_manifests(&manifests_path, &blobs_path, &mut items)?;
140 }
141
142 if items.is_empty() && blobs_path.exists() {
144 let (size, file_count) = calculate_dir_size(&blobs_path)?;
145 if size > 0 {
146 items.push(CleanableItem {
147 name: "Ollama Models (all)".to_string(),
148 category: "ML/AI".to_string(),
149 subcategory: "Ollama".to_string(),
150 icon: "🦙",
151 path: ollama_path,
152 size,
153 file_count: Some(file_count),
154 last_modified: None,
155 description: "Local LLM models. Can be re-downloaded with 'ollama pull'.",
156 safe_to_delete: SafetyLevel::SafeWithCost,
157 clean_command: Some("ollama rm <model>".to_string()),
158 });
159 }
160 }
161
162 Ok(items)
163 }
164
165 fn scan_ollama_manifests(&self, manifests_path: &PathBuf, _blobs_path: &PathBuf, items: &mut Vec<CleanableItem>) -> Result<()> {
167 let registry_path = manifests_path.join("registry.ollama.ai/library");
169 if !registry_path.exists() {
170 return Ok(());
171 }
172
173 if let Ok(models) = std::fs::read_dir(®istry_path) {
174 for model in models.filter_map(|e| e.ok()) {
175 let model_path = model.path();
176 if !model_path.is_dir() {
177 continue;
178 }
179
180 let model_name = model_path.file_name()
181 .map(|n| n.to_string_lossy().to_string())
182 .unwrap_or_default();
183
184 let (size, file_count) = calculate_dir_size(&model_path)?;
186
187 let estimated_size = size * 1000; if estimated_size > 100_000_000 {
192 items.push(CleanableItem {
193 name: format!("Ollama: {}", model_name),
194 category: "ML/AI".to_string(),
195 subcategory: "Ollama".to_string(),
196 icon: "🦙",
197 path: model_path,
198 size: estimated_size,
199 file_count: Some(file_count),
200 last_modified: get_mtime(&model.path()),
201 description: "Local LLM model. Use 'ollama rm' to remove.",
202 safe_to_delete: SafetyLevel::SafeWithCost,
203 clean_command: Some(format!("ollama rm {}", model_name)),
204 });
205 }
206 }
207 }
208
209 Ok(())
210 }
211
212 fn detect_pytorch(&self) -> Result<Vec<CleanableItem>> {
214 let torch_paths = [
215 (".cache/torch", "PyTorch Cache"),
216 (".cache/torch/hub", "PyTorch Hub Models"),
217 ];
218
219 let mut items = Vec::new();
220
221 for (rel_path, name) in torch_paths {
222 let path = self.home.join(rel_path);
223 if !path.exists() {
224 continue;
225 }
226
227 let (size, file_count) = calculate_dir_size(&path)?;
228 if size < 10_000_000 {
229 continue;
230 }
231
232 items.push(CleanableItem {
233 name: name.to_string(),
234 category: "ML/AI".to_string(),
235 subcategory: "PyTorch".to_string(),
236 icon: "🔥",
237 path,
238 size,
239 file_count: Some(file_count),
240 last_modified: None,
241 description: "PyTorch model cache. Can be re-downloaded.",
242 safe_to_delete: SafetyLevel::SafeWithCost,
243 clean_command: None,
244 });
245 }
246
247 Ok(items)
248 }
249
250 fn detect_keras(&self) -> Result<Vec<CleanableItem>> {
252 let keras_path = self.home.join(".keras");
253
254 if !keras_path.exists() {
255 return Ok(vec![]);
256 }
257
258 let models_path = keras_path.join("models");
259 if models_path.exists() {
260 let (size, file_count) = calculate_dir_size(&models_path)?;
261 if size > 10_000_000 {
262 return Ok(vec![CleanableItem {
263 name: "Keras Models".to_string(),
264 category: "ML/AI".to_string(),
265 subcategory: "Keras".to_string(),
266 icon: "🧠",
267 path: models_path,
268 size,
269 file_count: Some(file_count),
270 last_modified: None,
271 description: "Keras pre-trained models. Can be re-downloaded.",
272 safe_to_delete: SafetyLevel::SafeWithCost,
273 clean_command: None,
274 }]);
275 }
276 }
277
278 Ok(vec![])
279 }
280
281 fn detect_tensorflow(&self) -> Result<Vec<CleanableItem>> {
283 let tf_paths = [
284 (".tensorflow", "TensorFlow Cache"),
285 (".cache/tensorflow", "TensorFlow Hub Cache"),
286 ];
287
288 let mut items = Vec::new();
289
290 for (rel_path, name) in tf_paths {
291 let path = self.home.join(rel_path);
292 if !path.exists() {
293 continue;
294 }
295
296 let (size, file_count) = calculate_dir_size(&path)?;
297 if size < 10_000_000 {
298 continue;
299 }
300
301 items.push(CleanableItem {
302 name: name.to_string(),
303 category: "ML/AI".to_string(),
304 subcategory: "TensorFlow".to_string(),
305 icon: "📊",
306 path,
307 size,
308 file_count: Some(file_count),
309 last_modified: None,
310 description: "TensorFlow model cache. Can be re-downloaded.",
311 safe_to_delete: SafetyLevel::SafeWithCost,
312 clean_command: None,
313 });
314 }
315
316 Ok(items)
317 }
318
319 fn detect_jupyter(&self) -> Result<Vec<CleanableItem>> {
321 let jupyter_paths = [
322 (".cache/jupyter", "Jupyter Cache"),
323 (".jupyter", "Jupyter Config & Data"),
324 (".local/share/jupyter", "Jupyter Data"),
325 ];
326
327 let mut items = Vec::new();
328
329 for (rel_path, name) in jupyter_paths {
330 let path = self.home.join(rel_path);
331 if !path.exists() {
332 continue;
333 }
334
335 let (size, file_count) = calculate_dir_size(&path)?;
336 if size < 10_000_000 {
337 continue;
338 }
339
340 items.push(CleanableItem {
341 name: name.to_string(),
342 category: "ML/AI".to_string(),
343 subcategory: "Jupyter".to_string(),
344 icon: "📓",
345 path,
346 size,
347 file_count: Some(file_count),
348 last_modified: None,
349 description: "Jupyter notebook cache and runtime data.",
350 safe_to_delete: SafetyLevel::Safe,
351 clean_command: None,
352 });
353 }
354
355 Ok(items)
356 }
357
358 fn detect_lmstudio(&self) -> Result<Vec<CleanableItem>> {
360 let lmstudio_path = self.home.join(".lmstudio/models");
361
362 if !lmstudio_path.exists() {
363 let alt_path = self.home.join(".cache/lm-studio");
365 if !alt_path.exists() {
366 return Ok(vec![]);
367 }
368 }
369
370 let (size, file_count) = calculate_dir_size(&lmstudio_path)?;
371 if size == 0 {
372 return Ok(vec![]);
373 }
374
375 Ok(vec![CleanableItem {
376 name: "LM Studio Models".to_string(),
377 category: "ML/AI".to_string(),
378 subcategory: "LM Studio".to_string(),
379 icon: "🎯",
380 path: lmstudio_path,
381 size,
382 file_count: Some(file_count),
383 last_modified: None,
384 description: "LM Studio downloaded models. Can be re-downloaded.",
385 safe_to_delete: SafetyLevel::SafeWithCost,
386 clean_command: None,
387 }])
388 }
389
390 fn detect_gpt4all(&self) -> Result<Vec<CleanableItem>> {
392 let gpt4all_paths = [
393 ".cache/gpt4all",
394 "Library/Application Support/nomic.ai/GPT4All",
395 ];
396
397 for rel_path in gpt4all_paths {
398 let path = self.home.join(rel_path);
399 if !path.exists() {
400 continue;
401 }
402
403 let (size, file_count) = calculate_dir_size(&path)?;
404 if size > 100_000_000 {
405 return Ok(vec![CleanableItem {
406 name: "GPT4All Models".to_string(),
407 category: "ML/AI".to_string(),
408 subcategory: "GPT4All".to_string(),
409 icon: "🤖",
410 path,
411 size,
412 file_count: Some(file_count),
413 last_modified: None,
414 description: "GPT4All downloaded models. Can be re-downloaded.",
415 safe_to_delete: SafetyLevel::SafeWithCost,
416 clean_command: None,
417 }]);
418 }
419 }
420
421 Ok(vec![])
422 }
423}
424
425impl Default for MlCleaner {
426 fn default() -> Self {
427 Self::new().expect("MlCleaner requires home directory")
428 }
429}
430
431#[cfg(test)]
432mod tests {
433 use super::*;
434
435 #[test]
436 fn test_ml_cleaner_creation() {
437 let cleaner = MlCleaner::new();
438 assert!(cleaner.is_some());
439 }
440
441 #[test]
442 fn test_ml_detection() {
443 if let Some(cleaner) = MlCleaner::new() {
444 let items = cleaner.detect().unwrap();
445 println!("Found {} ML items", items.len());
446 for item in &items {
447 println!(" {} {} ({} bytes)", item.icon, item.name, item.size);
448 }
449 }
450 }
451}