1use crate::error::{DatasetsError, Result};
4use scirs2_core::cache::{CacheBuilder, TTLSizedCache};
5use std::cell::RefCell;
6use std::fs::{self, File};
7use std::hash::{Hash, Hasher};
8use std::io::{Read, Write};
9use std::path::{Path, PathBuf};
10
11const CACHE_DIR_NAME: &str = "scirs2-datasets";
13
14const DEFAULT_CACHE_SIZE: usize = 100;
16
17const DEFAULT_CACHE_TTL: u64 = 3600; const DEFAULT_MAX_CACHE_SIZE: u64 = 500 * 1024 * 1024;
22
23const CACHE_DIR_ENV: &str = "SCIRS2_CACHE_DIR";
25
26#[allow(dead_code)]
28pub fn sha256_hash_file(path: &Path) -> std::result::Result<String, String> {
29 use sha2::{Digest, Sha256};
30
31 let mut file = File::open(path).map_err(|e| format!("Failed to open file: {e}"))?;
32 let mut hasher = Sha256::new();
33 let mut buffer = [0; 8192];
34
35 loop {
36 let bytes_read = file
37 .read(&mut buffer)
38 .map_err(|e| format!("Failed to read file: {e}"))?;
39 if bytes_read == 0 {
40 break;
41 }
42 hasher.update(&buffer[..bytes_read]);
43 }
44
45 Ok(format!("{:x}", hasher.finalize()))
46}
47
48pub struct RegistryEntry {
50 pub sha256: &'static str,
52 pub url: &'static str,
54}
55
56#[allow(dead_code)]
66pub fn get_cachedir() -> Result<PathBuf> {
67 if let Ok(cachedir) = std::env::var(CACHE_DIR_ENV) {
69 let cachepath = PathBuf::from(cachedir);
70 ensuredirectory_exists(&cachepath)?;
71 return Ok(cachepath);
72 }
73
74 if let Some(cachedir) = get_platform_cachedir() {
76 ensuredirectory_exists(&cachedir)?;
77 return Ok(cachedir);
78 }
79
80 let homedir = crate::platform_dirs::home_dir()
82 .ok_or_else(|| DatasetsError::CacheError("Could not find home directory".to_string()))?;
83 let cachedir = homedir.join(format!(".{CACHE_DIR_NAME}"));
84 ensuredirectory_exists(&cachedir)?;
85
86 Ok(cachedir)
87}
88
89#[allow(dead_code)]
91fn get_platform_cachedir() -> Option<PathBuf> {
92 #[cfg(target_os = "windows")]
93 {
94 crate::platform_dirs::data_local_dir().map(|dir| dir.join(CACHE_DIR_NAME))
95 }
96 #[cfg(target_os = "macos")]
97 {
98 crate::platform_dirs::home_dir()
99 .map(|dir| dir.join("Library").join("Caches").join(CACHE_DIR_NAME))
100 }
101 #[cfg(not(any(target_os = "windows", target_os = "macos")))]
102 {
103 if let Ok(xdg_cache) = std::env::var("XDG_CACHE_HOME") {
105 Some(PathBuf::from(xdg_cache).join(CACHE_DIR_NAME))
106 } else {
107 crate::platform_dirs::home_dir().map(|home| home.join(".cache").join(CACHE_DIR_NAME))
108 }
109 }
110}
111
112#[allow(dead_code)]
114fn ensuredirectory_exists(dir: &Path) -> Result<()> {
115 if !dir.exists() {
116 fs::create_dir_all(dir).map_err(|e| {
117 DatasetsError::CacheError(format!("Failed to create cache directory: {e}"))
118 })?;
119 }
120 Ok(())
121}
122
123#[cfg(feature = "download-sync")]
141#[allow(dead_code)]
142pub fn fetch_data(
143 filename: &str,
144 registry_entry: Option<&RegistryEntry>,
145) -> std::result::Result<PathBuf, String> {
146 let cachedir = match get_cachedir() {
148 Ok(dir) => dir,
149 Err(e) => return Err(format!("Failed to get cache directory: {e}")),
150 };
151
152 let cachepath = cachedir.join(filename);
154 if cachepath.exists() {
155 return Ok(cachepath);
156 }
157
158 let entry = match registry_entry {
160 Some(entry) => entry,
161 None => return Err(format!("No registry entry found for {filename}")),
162 };
163
164 let tempdir = tempfile::tempdir().map_err(|e| format!("Failed to create temp dir: {e}"))?;
166 let temp_file = tempdir.path().join(filename);
167
168 let response = ureq::get(entry.url)
170 .call()
171 .map_err(|e| format!("Failed to download {filename}: {e}"))?;
172
173 let mut body = response.into_body();
175 let bytes = body
176 .read_to_vec()
177 .map_err(|e| format!("Failed to read response body: {e}"))?;
178 let mut file = std::fs::File::create(&temp_file)
179 .map_err(|e| format!("Failed to create temp file: {e}"))?;
180 file.write_all(&bytes)
181 .map_err(|e| format!("Failed to write downloaded file: {e}"))?;
182
183 if !entry.sha256.is_empty() {
185 let computed_hash = sha256_hash_file(&temp_file)?;
186 if computed_hash != entry.sha256 {
187 return Err(format!(
188 "SHA256 hash mismatch for {filename}: expected {}, got {computed_hash}",
189 entry.sha256
190 ));
191 }
192 }
193
194 fs::create_dir_all(&cachedir).map_err(|e| format!("Failed to create cache dir: {e}"))?;
196 if let Some(parent) = cachepath.parent() {
197 fs::create_dir_all(parent).map_err(|e| format!("Failed to create cache dir: {e}"))?;
198 }
199
200 fs::copy(&temp_file, &cachepath).map_err(|e| format!("Failed to copy to cache: {e}"))?;
201
202 Ok(cachepath)
203}
204
205#[cfg(not(feature = "download-sync"))]
207#[allow(dead_code)]
208pub fn fetch_data(
209 _filename: &str,
210 _registry_entry: Option<&RegistryEntry>,
211) -> std::result::Result<PathBuf, String> {
212 Err("Synchronous download feature is disabled. Enable 'download-sync' feature.".to_string())
213}
214
215#[derive(Clone, Debug, Eq, PartialEq, Hash)]
217pub struct CacheKey {
218 name: String,
219 config_hash: String,
220}
221
222impl CacheKey {
223 pub fn new(name: &str, config: &crate::real_world::RealWorldConfig) -> Self {
225 use std::collections::hash_map::DefaultHasher;
226 use std::hash::{Hash, Hasher};
227
228 let mut hasher = DefaultHasher::new();
229 config.use_cache.hash(&mut hasher);
230 config.download_if_missing.hash(&mut hasher);
231 config.return_preprocessed.hash(&mut hasher);
232 config.subset.hash(&mut hasher);
233 config.random_state.hash(&mut hasher);
234
235 Self {
236 name: name.to_string(),
237 config_hash: format!("{:x}", hasher.finish()),
238 }
239 }
240
241 pub fn as_string(&self) -> String {
243 format!("{}_{}", self.name, self.config_hash)
244 }
245}
246
247#[derive(Clone, Debug, Eq, PartialEq)]
249struct FileCacheKey(String);
250
251impl Hash for FileCacheKey {
252 fn hash<H: Hasher>(&self, state: &mut H) {
253 self.0.hash(state);
254 }
255}
256
257pub struct DatasetCache {
262 cachedir: PathBuf,
264 mem_cache: RefCell<TTLSizedCache<FileCacheKey, Vec<u8>>>,
266 max_cache_size: u64,
268 offline_mode: bool,
270}
271
272impl Default for DatasetCache {
273 fn default() -> Self {
274 let cachedir = get_cachedir().expect("Could not get cache directory");
275
276 let mem_cache = RefCell::new(
277 CacheBuilder::new()
278 .with_size(DEFAULT_CACHE_SIZE)
279 .with_ttl(DEFAULT_CACHE_TTL)
280 .build_sized_cache(),
281 );
282
283 let offline_mode = std::env::var("SCIRS2_OFFLINE")
285 .map(|v| v.to_lowercase() == "true" || v == "1")
286 .unwrap_or(false);
287
288 DatasetCache {
289 cachedir,
290 mem_cache,
291 max_cache_size: DEFAULT_MAX_CACHE_SIZE,
292 offline_mode,
293 }
294 }
295}
296
297impl DatasetCache {
298 pub fn new(cachedir: PathBuf) -> Self {
300 let mem_cache = RefCell::new(
301 CacheBuilder::new()
302 .with_size(DEFAULT_CACHE_SIZE)
303 .with_ttl(DEFAULT_CACHE_TTL)
304 .build_sized_cache(),
305 );
306
307 let offline_mode = std::env::var("SCIRS2_OFFLINE")
308 .map(|v| v.to_lowercase() == "true" || v == "1")
309 .unwrap_or(false);
310
311 DatasetCache {
312 cachedir,
313 mem_cache,
314 max_cache_size: DEFAULT_MAX_CACHE_SIZE,
315 offline_mode,
316 }
317 }
318
319 pub fn with_config(cachedir: PathBuf, cache_size: usize, ttl_seconds: u64) -> Self {
321 let mem_cache = RefCell::new(
322 CacheBuilder::new()
323 .with_size(cache_size)
324 .with_ttl(ttl_seconds)
325 .build_sized_cache(),
326 );
327
328 let offline_mode = std::env::var("SCIRS2_OFFLINE")
329 .map(|v| v.to_lowercase() == "true" || v == "1")
330 .unwrap_or(false);
331
332 DatasetCache {
333 cachedir,
334 mem_cache,
335 max_cache_size: DEFAULT_MAX_CACHE_SIZE,
336 offline_mode,
337 }
338 }
339
340 pub fn with_full_config(
342 cachedir: PathBuf,
343 cache_size: usize,
344 ttl_seconds: u64,
345 max_cache_size: u64,
346 offline_mode: bool,
347 ) -> Self {
348 let mem_cache = RefCell::new(
349 CacheBuilder::new()
350 .with_size(cache_size)
351 .with_ttl(ttl_seconds)
352 .build_sized_cache(),
353 );
354
355 DatasetCache {
356 cachedir,
357 mem_cache,
358 max_cache_size,
359 offline_mode,
360 }
361 }
362
363 pub fn ensure_cachedir(&self) -> Result<()> {
365 if !self.cachedir.exists() {
366 fs::create_dir_all(&self.cachedir).map_err(|e| {
367 DatasetsError::CacheError(format!("Failed to create cache directory: {e}"))
368 })?;
369 }
370 Ok(())
371 }
372
373 pub fn get_cachedpath(&self, name: &str) -> PathBuf {
375 self.cachedir.join(name)
376 }
377
378 pub fn is_cached(&self, name: &str) -> bool {
380 let key = FileCacheKey(name.to_string());
382 if self.mem_cache.borrow_mut().get(&key).is_some() {
383 return true;
384 }
385
386 self.get_cachedpath(name).exists()
388 }
389
390 pub fn read_cached(&self, name: &str) -> Result<Vec<u8>> {
395 let key = FileCacheKey(name.to_string());
397 if let Some(data) = self.mem_cache.borrow_mut().get(&key) {
398 return Ok(data);
399 }
400
401 let path = self.get_cachedpath(name);
403 if !path.exists() {
404 return Err(DatasetsError::CacheError(format!(
405 "Cached file does not exist: {name}"
406 )));
407 }
408
409 let mut file = File::open(path)
410 .map_err(|e| DatasetsError::CacheError(format!("Failed to open cached file: {e}")))?;
411
412 let mut buffer = Vec::new();
413 file.read_to_end(&mut buffer)
414 .map_err(|e| DatasetsError::CacheError(format!("Failed to read cached file: {e}")))?;
415
416 self.mem_cache.borrow_mut().insert(key, buffer.clone());
418
419 Ok(buffer)
420 }
421
422 pub fn write_cached(&self, name: &str, data: &[u8]) -> Result<()> {
424 self.ensure_cachedir()?;
425
426 if self.max_cache_size > 0 {
428 let current_size = self.get_cache_size_bytes()?;
429 let new_file_size = data.len() as u64;
430
431 if current_size + new_file_size > self.max_cache_size {
432 self.cleanup_cache_to_fit(new_file_size)?;
433 }
434 }
435
436 let path = self.get_cachedpath(name);
438 let mut file = File::create(path)
439 .map_err(|e| DatasetsError::CacheError(format!("Failed to create cache file: {e}")))?;
440
441 file.write_all(data).map_err(|e| {
442 DatasetsError::CacheError(format!("Failed to write to cache file: {e}"))
443 })?;
444
445 let key = FileCacheKey(name.to_string());
447 self.mem_cache.borrow_mut().insert(key, data.to_vec());
448
449 Ok(())
450 }
451
452 pub fn clear_cache(&self) -> Result<()> {
454 if self.cachedir.exists() {
456 fs::remove_dir_all(&self.cachedir)
457 .map_err(|e| DatasetsError::CacheError(format!("Failed to clear cache: {e}")))?;
458 }
459
460 self.mem_cache.borrow_mut().clear();
462
463 Ok(())
464 }
465
466 pub fn remove_cached(&self, name: &str) -> Result<()> {
468 let path = self.get_cachedpath(name);
470 if path.exists() {
471 fs::remove_file(path).map_err(|e| {
472 DatasetsError::CacheError(format!("Failed to remove cached file: {e}"))
473 })?;
474 }
475
476 let key = FileCacheKey(name.to_string());
478 self.mem_cache.borrow_mut().remove(&key);
479
480 Ok(())
481 }
482
483 pub fn hash_filename(name: &str) -> String {
485 let hash = blake3::hash(name.as_bytes());
486 hash.to_hex().to_string()
487 }
488
489 pub fn get_cache_size_bytes(&self) -> Result<u64> {
491 let mut total_size = 0u64;
492
493 if self.cachedir.exists() {
494 let entries = fs::read_dir(&self.cachedir).map_err(|e| {
495 DatasetsError::CacheError(format!("Failed to read cache directory: {e}"))
496 })?;
497
498 for entry in entries {
499 let entry = entry.map_err(|e| {
500 DatasetsError::CacheError(format!("Failed to read directory entry: {e}"))
501 })?;
502
503 if let Ok(metadata) = entry.metadata() {
504 if metadata.is_file() {
505 total_size += metadata.len();
506 }
507 }
508 }
509 }
510
511 Ok(total_size)
512 }
513
514 fn cleanup_cache_to_fit(&self, needed_size: u64) -> Result<()> {
519 if self.max_cache_size == 0 {
520 return Ok(()); }
522
523 let current_size = self.get_cache_size_bytes()?;
524 let target_size = (self.max_cache_size as f64 * 0.8) as u64; let total_needed = current_size + needed_size;
526
527 if total_needed <= target_size {
528 return Ok(()); }
530
531 let size_to_free = total_needed - target_size;
532
533 let mut files_with_times = Vec::new();
535
536 if self.cachedir.exists() {
537 let entries = fs::read_dir(&self.cachedir).map_err(|e| {
538 DatasetsError::CacheError(format!("Failed to read cache directory: {e}"))
539 })?;
540
541 for entry in entries {
542 let entry = entry.map_err(|e| {
543 DatasetsError::CacheError(format!("Failed to read directory entry: {e}"))
544 })?;
545
546 if let Ok(metadata) = entry.metadata() {
547 if metadata.is_file() {
548 if let Ok(modified) = metadata.modified() {
549 files_with_times.push((entry.path(), metadata.len(), modified));
550 }
551 }
552 }
553 }
554 }
555
556 files_with_times.sort_by_key(|(_path, _size, modified)| *modified);
558
559 let mut freed_size = 0u64;
561 for (path, size, _modified) in files_with_times {
562 if freed_size >= size_to_free {
563 break;
564 }
565
566 if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
568 let key = FileCacheKey(filename.to_string());
569 self.mem_cache.borrow_mut().remove(&key);
570 }
571
572 if let Err(e) = fs::remove_file(&path) {
574 eprintln!("Warning: Failed to remove cache file {path:?}: {e}");
575 } else {
576 freed_size += size;
577 }
578 }
579
580 Ok(())
581 }
582
583 pub fn set_offline_mode(&mut self, offline: bool) {
585 self.offline_mode = offline;
586 }
587
588 pub fn is_offline(&self) -> bool {
590 self.offline_mode
591 }
592
593 pub fn set_max_cache_size(&mut self, max_size: u64) {
595 self.max_cache_size = max_size;
596 }
597
598 pub fn max_cache_size(&self) -> u64 {
600 self.max_cache_size
601 }
602
603 pub fn put(&self, name: &str, data: &[u8]) -> Result<()> {
605 self.write_cached(name, data)
606 }
607
608 pub fn get_detailed_stats(&self) -> Result<DetailedCacheStats> {
610 let mut total_size = 0u64;
611 let mut file_count = 0usize;
612 let mut files = Vec::new();
613
614 if self.cachedir.exists() {
615 let entries = fs::read_dir(&self.cachedir).map_err(|e| {
616 DatasetsError::CacheError(format!("Failed to read cache directory: {e}"))
617 })?;
618
619 for entry in entries {
620 let entry = entry.map_err(|e| {
621 DatasetsError::CacheError(format!("Failed to read directory entry: {e}"))
622 })?;
623
624 if let Ok(metadata) = entry.metadata() {
625 if metadata.is_file() {
626 let size = metadata.len();
627 total_size += size;
628 file_count += 1;
629
630 if let Some(filename) = entry.file_name().to_str() {
631 files.push(CacheFileInfo {
632 name: filename.to_string(),
633 size_bytes: size,
634 modified: metadata.modified().ok(),
635 });
636 }
637 }
638 }
639 }
640 }
641
642 files.sort_by_key(|f| std::cmp::Reverse(f.size_bytes));
644
645 Ok(DetailedCacheStats {
646 total_size_bytes: total_size,
647 file_count,
648 cachedir: self.cachedir.clone(),
649 max_cache_size: self.max_cache_size,
650 offline_mode: self.offline_mode,
651 files,
652 })
653 }
654}
655
656#[cfg(feature = "download")]
658#[allow(dead_code)]
659pub fn download_data(_url: &str, force_download: bool) -> Result<Vec<u8>> {
660 let cache = DatasetCache::default();
661 let cache_key = DatasetCache::hash_filename(_url);
662
663 if !force_download && cache.is_cached(&cache_key) {
665 return cache.read_cached(&cache_key);
666 }
667
668 let response = reqwest::blocking::get(_url).map_err(|e| {
670 DatasetsError::DownloadError(format!("Failed to download from {_url}: {e}"))
671 })?;
672
673 if !response.status().is_success() {
674 return Err(DatasetsError::DownloadError(format!(
675 "Failed to download from {_url}: HTTP status {}",
676 response.status()
677 )));
678 }
679
680 let data = response
681 .bytes()
682 .map_err(|e| DatasetsError::DownloadError(format!("Failed to read response data: {e}")))?;
683
684 let data_vec = data.to_vec();
685
686 cache.write_cached(&cache_key, &data_vec)?;
688
689 Ok(data_vec)
690}
691
692#[cfg(not(feature = "download"))]
694#[allow(dead_code)]
708pub fn download_data(_url: &str, _force_download: bool) -> Result<Vec<u8>> {
709 Err(DatasetsError::Other(
710 "Download feature is not enabled. Recompile with --features download".to_string(),
711 ))
712}
713
714pub struct CacheManager {
716 cache: DatasetCache,
717}
718
719impl CacheManager {
720 pub fn new() -> Result<Self> {
722 let cachedir = get_cachedir()?;
723 Ok(Self {
724 cache: DatasetCache::with_config(cachedir, DEFAULT_CACHE_SIZE, DEFAULT_CACHE_TTL),
725 })
726 }
727
728 pub fn with_config(cachedir: PathBuf, cache_size: usize, ttl_seconds: u64) -> Self {
730 Self {
731 cache: DatasetCache::with_config(cachedir, cache_size, ttl_seconds),
732 }
733 }
734
735 pub fn get(&self, key: &CacheKey) -> Result<Option<crate::utils::Dataset>> {
737 let name = key.as_string();
738 if self.cache.is_cached(&name) {
739 match self.cache.read_cached(&name) {
740 Ok(cached_data) => {
741 match serde_json::from_slice::<crate::utils::Dataset>(&cached_data) {
742 Ok(dataset) => Ok(Some(dataset)),
743 Err(e) => {
744 self.cache
746 .mem_cache
747 .borrow_mut()
748 .remove(&FileCacheKey(name.clone()));
749 Err(DatasetsError::CacheError(format!(
750 "Failed to deserialize cached dataset: {e}"
751 )))
752 }
753 }
754 }
755 Err(e) => Err(DatasetsError::CacheError(format!(
756 "Failed to read cached data: {e}"
757 ))),
758 }
759 } else {
760 Ok(None)
761 }
762 }
763
764 pub fn put(&self, key: &CacheKey, dataset: &crate::utils::Dataset) -> Result<()> {
766 let name = key.as_string();
767
768 let serialized = serde_json::to_vec(dataset)
770 .map_err(|e| DatasetsError::CacheError(format!("Failed to serialize dataset: {e}")))?;
771
772 self.cache
774 .write_cached(&name, &serialized)
775 .map_err(|e| DatasetsError::CacheError(format!("Failed to write to cache: {e}")))
776 }
777
778 pub fn with_full_config(
780 cachedir: PathBuf,
781 cache_size: usize,
782 ttl_seconds: u64,
783 max_cache_size: u64,
784 offline_mode: bool,
785 ) -> Self {
786 Self {
787 cache: DatasetCache::with_full_config(
788 cachedir,
789 cache_size,
790 ttl_seconds,
791 max_cache_size,
792 offline_mode,
793 ),
794 }
795 }
796
797 pub fn get_stats(&self) -> CacheStats {
799 let cachedir = &self.cache.cachedir;
800 let mut total_size = 0u64;
801 let mut file_count = 0usize;
802
803 if cachedir.exists() {
804 if let Ok(entries) = fs::read_dir(cachedir) {
805 for entry in entries.flatten() {
806 if let Ok(metadata) = entry.metadata() {
807 if metadata.is_file() {
808 total_size += metadata.len();
809 file_count += 1;
810 }
811 }
812 }
813 }
814 }
815
816 CacheStats {
817 total_size_bytes: total_size,
818 file_count,
819 cachedir: cachedir.clone(),
820 }
821 }
822
823 pub fn get_detailed_stats(&self) -> Result<DetailedCacheStats> {
825 self.cache.get_detailed_stats()
826 }
827
828 pub fn set_offline_mode(&mut self, offline: bool) {
830 self.cache.set_offline_mode(offline);
831 }
832
833 pub fn is_offline(&self) -> bool {
835 self.cache.is_offline()
836 }
837
838 pub fn set_max_cache_size(&mut self, max_size: u64) {
840 self.cache.set_max_cache_size(max_size);
841 }
842
843 pub fn max_cache_size(&self) -> u64 {
845 self.cache.max_cache_size()
846 }
847
848 pub fn clear_all(&self) -> Result<()> {
850 self.cache.clear_cache()
851 }
852
853 pub fn remove(&self, name: &str) -> Result<()> {
855 self.cache.remove_cached(name)
856 }
857
858 pub fn cleanup_old_files(&self, target_size: u64) -> Result<()> {
860 self.cache.cleanup_cache_to_fit(target_size)
861 }
862
863 pub fn list_cached_files(&self) -> Result<Vec<String>> {
865 let cachedir = &self.cache.cachedir;
866 let mut files = Vec::new();
867
868 if cachedir.exists() {
869 let entries = fs::read_dir(cachedir).map_err(|e| {
870 DatasetsError::CacheError(format!("Failed to read cache directory: {e}"))
871 })?;
872
873 for entry in entries {
874 let entry = entry.map_err(|e| {
875 DatasetsError::CacheError(format!("Failed to read directory entry: {e}"))
876 })?;
877
878 if let Some(filename) = entry.file_name().to_str() {
879 files.push(filename.to_string());
880 }
881 }
882 }
883
884 files.sort();
885 Ok(files)
886 }
887
888 pub fn cachedir(&self) -> &PathBuf {
890 &self.cache.cachedir
891 }
892
893 pub fn is_cached(&self, name: &str) -> bool {
895 self.cache.is_cached(name)
896 }
897
898 pub fn print_cache_report(&self) -> Result<()> {
900 let stats = self.get_detailed_stats()?;
901
902 println!("=== Cache Report ===");
903 println!("Cache Directory: {}", stats.cachedir.display());
904 println!(
905 "Total Size: {} ({} files)",
906 stats.formatted_size(),
907 stats.file_count
908 );
909 println!("Max Size: {}", stats.formatted_max_size());
910
911 if stats.max_cache_size > 0 {
912 println!("Usage: {:.1}%", stats.usage_percentage() * 100.0);
913 }
914
915 println!(
916 "Offline Mode: {}",
917 if stats.offline_mode {
918 "Enabled"
919 } else {
920 "Disabled"
921 }
922 );
923
924 if !stats.files.is_empty() {
925 println!("\nCached Files:");
926 for file in &stats.files {
927 println!(
928 " {} - {} ({})",
929 file.name,
930 file.formatted_size(),
931 file.formatted_modified()
932 );
933 }
934 }
935
936 Ok(())
937 }
938}
939
940pub struct CacheStats {
942 pub total_size_bytes: u64,
944 pub file_count: usize,
946 pub cachedir: PathBuf,
948}
949
950pub struct DetailedCacheStats {
952 pub total_size_bytes: u64,
954 pub file_count: usize,
956 pub cachedir: PathBuf,
958 pub max_cache_size: u64,
960 pub offline_mode: bool,
962 pub files: Vec<CacheFileInfo>,
964}
965
966#[derive(Debug, Clone)]
968pub struct CacheFileInfo {
969 pub name: String,
971 pub size_bytes: u64,
973 pub modified: Option<std::time::SystemTime>,
975}
976
977impl CacheStats {
978 pub fn formatted_size(&self) -> String {
980 format_bytes(self.total_size_bytes)
981 }
982}
983
984impl DetailedCacheStats {
985 pub fn formatted_size(&self) -> String {
987 format_bytes(self.total_size_bytes)
988 }
989
990 pub fn formatted_max_size(&self) -> String {
992 if self.max_cache_size == 0 {
993 "Unlimited".to_string()
994 } else {
995 format_bytes(self.max_cache_size)
996 }
997 }
998
999 pub fn usage_percentage(&self) -> f64 {
1001 if self.max_cache_size == 0 {
1002 0.0
1003 } else {
1004 self.total_size_bytes as f64 / self.max_cache_size as f64
1005 }
1006 }
1007}
1008
1009impl CacheFileInfo {
1010 pub fn formatted_size(&self) -> String {
1012 format_bytes(self.size_bytes)
1013 }
1014
1015 pub fn formatted_modified(&self) -> String {
1017 match &self.modified {
1018 Some(time) => {
1019 if let Ok(now) = std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH)
1020 {
1021 if let Ok(modified) = time.duration_since(std::time::UNIX_EPOCH) {
1022 let diff_secs = now.as_secs().saturating_sub(modified.as_secs());
1023 let days = diff_secs / 86400;
1024 let hours = (diff_secs % 86400) / 3600;
1025 let mins = (diff_secs % 3600) / 60;
1026
1027 if days > 0 {
1028 format!("{days} days ago")
1029 } else if hours > 0 {
1030 format!("{hours} hours ago")
1031 } else if mins > 0 {
1032 format!("{mins} minutes ago")
1033 } else {
1034 "Just now".to_string()
1035 }
1036 } else {
1037 "Unknown".to_string()
1038 }
1039 } else {
1040 "Unknown".to_string()
1041 }
1042 }
1043 None => "Unknown".to_string(),
1044 }
1045 }
1046}
1047
1048#[allow(dead_code)]
1050fn format_bytes(bytes: u64) -> String {
1051 let size = bytes as f64;
1052 if size < 1024.0 {
1053 format!("{size} B")
1054 } else if size < 1024.0 * 1024.0 {
1055 format!("{:.1} KB", size / 1024.0)
1056 } else if size < 1024.0 * 1024.0 * 1024.0 {
1057 format!("{:.1} MB", size / (1024.0 * 1024.0))
1058 } else {
1059 format!("{:.1} GB", size / (1024.0 * 1024.0 * 1024.0))
1060 }
1061}
1062
1063#[derive(Debug, Clone)]
1065pub struct BatchResult {
1066 pub success_count: usize,
1068 pub failure_count: usize,
1070 pub failures: Vec<(String, String)>,
1072 pub total_bytes: u64,
1074 pub elapsed_time: std::time::Duration,
1076}
1077
1078impl BatchResult {
1079 pub fn new() -> Self {
1081 Self {
1082 success_count: 0,
1083 failure_count: 0,
1084 failures: Vec::new(),
1085 total_bytes: 0,
1086 elapsed_time: std::time::Duration::ZERO,
1087 }
1088 }
1089
1090 pub fn is_all_success(&self) -> bool {
1092 self.failure_count == 0
1093 }
1094
1095 pub fn success_rate(&self) -> f64 {
1097 let total = self.success_count + self.failure_count;
1098 if total == 0 {
1099 0.0
1100 } else {
1101 (self.success_count as f64 / total as f64) * 100.0
1102 }
1103 }
1104
1105 pub fn summary(&self) -> String {
1107 format!(
1108 "Batch completed: {}/{} successful ({:.1}%), {} bytes processed in {:.2}s",
1109 self.success_count,
1110 self.success_count + self.failure_count,
1111 self.success_rate(),
1112 format_bytes(self.total_bytes),
1113 self.elapsed_time.as_secs_f64()
1114 )
1115 }
1116}
1117
1118impl Default for BatchResult {
1119 fn default() -> Self {
1120 Self::new()
1121 }
1122}
1123
1124pub struct BatchOperations {
1126 cache: CacheManager,
1127 parallel: bool,
1128 max_retries: usize,
1129 retry_delay: std::time::Duration,
1130}
1131
1132impl BatchOperations {
1133 pub fn new(cache: CacheManager) -> Self {
1135 Self {
1136 cache,
1137 parallel: true,
1138 max_retries: 3,
1139 retry_delay: std::time::Duration::from_millis(1000),
1140 }
1141 }
1142
1143 pub fn with_parallel(mut self, parallel: bool) -> Self {
1145 self.parallel = parallel;
1146 self
1147 }
1148
1149 pub fn with_retry_config(
1151 mut self,
1152 max_retries: usize,
1153 retry_delay: std::time::Duration,
1154 ) -> Self {
1155 self.max_retries = max_retries;
1156 self.retry_delay = retry_delay;
1157 self
1158 }
1159
1160 #[cfg(feature = "download")]
1162 pub fn batch_download(&self, urls_andnames: &[(&str, &str)]) -> BatchResult {
1163 let start_time = std::time::Instant::now();
1164 let mut result = BatchResult::new();
1165
1166 if self.parallel {
1167 self.batch_download_parallel(urls_andnames, &mut result)
1168 } else {
1169 self.batch_download_sequential(urls_andnames, &mut result)
1170 }
1171
1172 result.elapsed_time = start_time.elapsed();
1173 result
1174 }
1175
1176 #[cfg(feature = "download")]
1177 fn batch_download_parallel(&self, urls_andnames: &[(&str, &str)], result: &mut BatchResult) {
1178 use std::fs::File;
1179 use std::io::Write;
1180 use std::sync::{Arc, Mutex};
1181 use std::thread;
1182
1183 if let Err(e) = self.cache.cache.ensure_cachedir() {
1185 result.failure_count += urls_andnames.len();
1186 for &(_, name) in urls_andnames {
1187 result
1188 .failures
1189 .push((name.to_string(), format!("Cache setup failed: {e}")));
1190 }
1191 return;
1192 }
1193
1194 let result_arc = Arc::new(Mutex::new(BatchResult::new()));
1195 let cachedir = self.cache.cache.cachedir.clone();
1196 let max_retries = self.max_retries;
1197 let retry_delay = self.retry_delay;
1198
1199 let handles: Vec<_> = urls_andnames
1200 .iter()
1201 .map(|&(url, name)| {
1202 let result_clone = Arc::clone(&result_arc);
1203 let url = url.to_string();
1204 let name = name.to_string();
1205 let cachedir = cachedir.clone();
1206
1207 thread::spawn(move || {
1208 let mut success = false;
1209 let mut last_error = String::new();
1210 let mut downloaded_data = Vec::new();
1211
1212 for attempt in 0..=max_retries {
1213 match download_data(&url, false) {
1214 Ok(data) => {
1215 let path = cachedir.join(&name);
1217 match File::create(&path) {
1218 Ok(mut file) => match file.write_all(&data) {
1219 Ok(_) => {
1220 let mut r =
1221 result_clone.lock().expect("Operation failed");
1222 r.success_count += 1;
1223 r.total_bytes += data.len() as u64;
1224 downloaded_data = data;
1225 success = true;
1226 break;
1227 }
1228 Err(e) => {
1229 last_error = format!("Failed to write cache file: {e}");
1230 }
1231 },
1232 Err(e) => {
1233 last_error = format!("Failed to create cache file: {e}");
1234 }
1235 }
1236 }
1237 Err(e) => {
1238 last_error = format!("Download failed: {e}");
1239 if attempt < max_retries {
1240 thread::sleep(retry_delay);
1241 }
1242 }
1243 }
1244 }
1245
1246 if !success {
1247 let mut r = result_clone.lock().expect("Operation failed");
1248 r.failure_count += 1;
1249 r.failures.push((name.clone(), last_error));
1250 }
1251
1252 (name, success, downloaded_data)
1253 })
1254 })
1255 .collect();
1256
1257 let mut successful_downloads = Vec::new();
1259 for handle in handles {
1260 if let Ok((name, success, data)) = handle.join() {
1261 if success && !data.is_empty() {
1262 successful_downloads.push((name, data));
1263 }
1264 }
1265 }
1266
1267 if let Ok(arc_result) = result_arc.lock() {
1269 result.success_count += arc_result.success_count;
1270 result.failure_count += arc_result.failure_count;
1271 result.failures.extend(arc_result.failures.clone());
1272 }
1273
1274 for (name, data) in successful_downloads {
1276 let key = FileCacheKey(name);
1277 self.cache.cache.mem_cache.borrow_mut().insert(key, data);
1278 }
1279 }
1280
1281 #[cfg(feature = "download")]
1282 fn batch_download_sequential(&self, urls_andnames: &[(&str, &str)], result: &mut BatchResult) {
1283 for &(url, name) in urls_andnames {
1284 let mut success = false;
1285 let mut last_error = String::new();
1286
1287 for attempt in 0..=self.max_retries {
1288 match download_data(url, false) {
1289 Ok(data) => match self.cache.cache.write_cached(name, &data) {
1290 Ok(_) => {
1291 result.success_count += 1;
1292 result.total_bytes += data.len() as u64;
1293 success = true;
1294 break;
1295 }
1296 Err(e) => {
1297 last_error = format!("Cache write failed: {e}");
1298 }
1299 },
1300 Err(e) => {
1301 last_error = format!("Download failed: {e}");
1302 if attempt < self.max_retries {
1303 std::thread::sleep(self.retry_delay);
1304 }
1305 }
1306 }
1307 }
1308
1309 if !success {
1310 result.failure_count += 1;
1311 result.failures.push((name.to_string(), last_error));
1312 }
1313 }
1314 }
1315
1316 pub fn batch_verify_integrity(&self, files_andhashes: &[(&str, &str)]) -> BatchResult {
1318 let start_time = std::time::Instant::now();
1319 let mut result = BatchResult::new();
1320
1321 for &(filename, expected_hash) in files_andhashes {
1322 match self.cache.cache.get_cachedpath(filename).exists() {
1323 true => match sha256_hash_file(&self.cache.cache.get_cachedpath(filename)) {
1324 Ok(actual_hash) => {
1325 if actual_hash == expected_hash {
1326 result.success_count += 1;
1327 if let Ok(metadata) =
1328 std::fs::metadata(self.cache.cache.get_cachedpath(filename))
1329 {
1330 result.total_bytes += metadata.len();
1331 }
1332 } else {
1333 result.failure_count += 1;
1334 result.failures.push((
1335 filename.to_string(),
1336 format!(
1337 "Hash mismatch: expected {expected_hash}, got {actual_hash}"
1338 ),
1339 ));
1340 }
1341 }
1342 Err(e) => {
1343 result.failure_count += 1;
1344 result.failures.push((
1345 filename.to_string(),
1346 format!("Hash computation failed: {e}"),
1347 ));
1348 }
1349 },
1350 false => {
1351 result.failure_count += 1;
1352 result
1353 .failures
1354 .push((filename.to_string(), "File not found in cache".to_string()));
1355 }
1356 }
1357 }
1358
1359 result.elapsed_time = start_time.elapsed();
1360 result
1361 }
1362
1363 pub fn selective_cleanup(
1365 &self,
1366 patterns: &[&str],
1367 max_age_days: Option<u32>,
1368 ) -> Result<BatchResult> {
1369 let start_time = std::time::Instant::now();
1370 let mut result = BatchResult::new();
1371
1372 let cached_files = self.cache.list_cached_files()?;
1373 let now = std::time::SystemTime::now();
1374
1375 for filename in cached_files {
1376 let should_remove = patterns.iter().any(|pattern| {
1377 filename.contains(pattern) || matches_glob_pattern(&filename, pattern)
1378 });
1379
1380 if should_remove {
1381 let filepath = self.cache.cache.get_cachedpath(&filename);
1382
1383 let remove_due_to_age = if let Some(max_age) = max_age_days {
1385 if let Ok(metadata) = std::fs::metadata(&filepath) {
1386 if let Ok(modified) = metadata.modified() {
1387 if let Ok(age) = now.duration_since(modified) {
1388 age.as_secs() > (max_age as u64 * 24 * 3600)
1389 } else {
1390 false
1391 }
1392 } else {
1393 false
1394 }
1395 } else {
1396 false
1397 }
1398 } else {
1399 true };
1401
1402 if remove_due_to_age {
1403 match self.cache.remove(&filename) {
1404 Ok(_) => {
1405 result.success_count += 1;
1406 if let Ok(metadata) = std::fs::metadata(&filepath) {
1407 result.total_bytes += metadata.len();
1408 }
1409 }
1410 Err(e) => {
1411 result.failure_count += 1;
1412 result
1413 .failures
1414 .push((filename, format!("Removal failed: {e}")));
1415 }
1416 }
1417 }
1418 }
1419 }
1420
1421 result.elapsed_time = start_time.elapsed();
1422 Ok(result)
1423 }
1424
1425 pub fn batch_process<F, T, E>(&self, names: &[String], processor: F) -> BatchResult
1427 where
1428 F: Fn(&str, &[u8]) -> std::result::Result<T, E> + Sync + Send + 'static,
1429 E: std::fmt::Display,
1430 T: Send,
1431 {
1432 let start_time = std::time::Instant::now();
1433 let mut result = BatchResult::new();
1434
1435 if self.parallel {
1436 self.batch_process_parallel(names, processor, &mut result)
1437 } else {
1438 self.batch_process_sequential(names, processor, &mut result)
1439 }
1440
1441 result.elapsed_time = start_time.elapsed();
1442 result
1443 }
1444
1445 fn batch_process_parallel<F, T, E>(
1446 &self,
1447 names: &[String],
1448 processor: F,
1449 result: &mut BatchResult,
1450 ) where
1451 F: Fn(&str, &[u8]) -> std::result::Result<T, E> + Sync + Send + 'static,
1452 E: std::fmt::Display,
1453 T: Send,
1454 {
1455 let mut data_pairs = Vec::new();
1458
1459 for name in names {
1461 match self.cache.cache.read_cached(name) {
1462 Ok(data) => data_pairs.push((name.clone(), data)),
1463 Err(e) => {
1464 result.failure_count += 1;
1465 result
1466 .failures
1467 .push((name.clone(), format!("Cache read failed: {e}")));
1468 }
1469 }
1470 }
1471
1472 if !data_pairs.is_empty() {
1474 use std::sync::{Arc, Mutex};
1475 use std::thread;
1476
1477 let parallel_result = Arc::new(Mutex::new(BatchResult::new()));
1478 let processor = Arc::new(processor);
1479
1480 let handles: Vec<_> = data_pairs
1481 .into_iter()
1482 .map(|(name, data)| {
1483 let result_clone = Arc::clone(¶llel_result);
1484 let processor_clone = Arc::clone(&processor);
1485
1486 thread::spawn(move || match processor_clone(&name, &data) {
1487 Ok(_) => {
1488 let mut r = result_clone.lock().expect("Operation failed");
1489 r.success_count += 1;
1490 r.total_bytes += data.len() as u64;
1491 }
1492 Err(e) => {
1493 let mut r = result_clone.lock().expect("Operation failed");
1494 r.failure_count += 1;
1495 r.failures.push((name, format!("Processing failed: {e}")));
1496 }
1497 })
1498 })
1499 .collect();
1500
1501 for handle in handles {
1502 let _ = handle.join();
1503 }
1504
1505 let parallel_result = parallel_result.lock().expect("Operation failed");
1507 result.success_count += parallel_result.success_count;
1508 result.failure_count += parallel_result.failure_count;
1509 result.total_bytes += parallel_result.total_bytes;
1510 result.failures.extend(parallel_result.failures.clone());
1511 }
1512 }
1513
1514 fn batch_process_sequential<F, T, E>(
1515 &self,
1516 names: &[String],
1517 processor: F,
1518 result: &mut BatchResult,
1519 ) where
1520 F: Fn(&str, &[u8]) -> std::result::Result<T, E>,
1521 E: std::fmt::Display,
1522 {
1523 for name in names {
1524 match self.cache.cache.read_cached(name) {
1525 Ok(data) => match processor(name, &data) {
1526 Ok(_) => {
1527 result.success_count += 1;
1528 result.total_bytes += data.len() as u64;
1529 }
1530 Err(e) => {
1531 result.failure_count += 1;
1532 result
1533 .failures
1534 .push((name.clone(), format!("Processing failed: {e}")));
1535 }
1536 },
1537 Err(e) => {
1538 result.failure_count += 1;
1539 result
1540 .failures
1541 .push((name.clone(), format!("Cache read failed: {e}")));
1542 }
1543 }
1544 }
1545 }
1546
1547 pub fn cache_manager(&self) -> &CacheManager {
1549 &self.cache
1550 }
1551
1552 pub fn write_cached(&self, name: &str, data: &[u8]) -> Result<()> {
1554 self.cache.cache.write_cached(name, data)
1555 }
1556
1557 pub fn read_cached(&self, name: &str) -> Result<Vec<u8>> {
1559 self.cache.cache.read_cached(name)
1560 }
1561
1562 pub fn list_cached_files(&self) -> Result<Vec<String>> {
1564 self.cache.list_cached_files()
1565 }
1566
1567 pub fn print_cache_report(&self) -> Result<()> {
1569 self.cache.print_cache_report()
1570 }
1571
1572 pub fn get_cache_statistics(&self) -> Result<BatchResult> {
1574 let start_time = std::time::Instant::now();
1575 let mut result = BatchResult::new();
1576
1577 let cached_files = self.cache.list_cached_files()?;
1578
1579 for filename in cached_files {
1580 let filepath = self.cache.cache.get_cachedpath(&filename);
1581 match std::fs::metadata(&filepath) {
1582 Ok(metadata) => {
1583 result.success_count += 1;
1584 result.total_bytes += metadata.len();
1585 }
1586 Err(e) => {
1587 result.failure_count += 1;
1588 result
1589 .failures
1590 .push((filename, format!("Metadata read failed: {e}")));
1591 }
1592 }
1593 }
1594
1595 result.elapsed_time = start_time.elapsed();
1596 Ok(result)
1597 }
1598}
1599
1600#[allow(dead_code)]
1602fn matches_glob_pattern(filename: &str, pattern: &str) -> bool {
1603 if pattern == "*" {
1604 return true;
1605 }
1606
1607 if pattern.contains('*') {
1608 let parts: Vec<&str> = pattern.split('*').collect();
1609 if parts.len() == 2 {
1610 let prefix = parts[0];
1611 let suffix = parts[1];
1612 return filename.starts_with(prefix) && filename.ends_with(suffix);
1613 }
1614 }
1615
1616 filename == pattern
1617}
1618
1619#[cfg(test)]
1620mod tests {
1621 use super::*;
1622 use tempfile::TempDir;
1623
1624 #[test]
1625 fn test_batch_result() {
1626 let mut result = BatchResult::new();
1627 assert_eq!(result.success_count, 0);
1628 assert_eq!(result.failure_count, 0);
1629 assert!(result.is_all_success());
1630 assert_eq!(result.success_rate(), 0.0);
1631
1632 result.success_count = 8;
1633 result.failure_count = 2;
1634 result.total_bytes = 1024;
1635
1636 assert!(!result.is_all_success());
1637 assert_eq!(result.success_rate(), 80.0);
1638 assert!(result.summary().contains("8/10 successful"));
1639 assert!(result.summary().contains("80.0%"));
1640 }
1641
1642 #[test]
1643 fn test_batch_operations_creation() {
1644 let tempdir = TempDir::new().expect("Operation failed");
1645 let cache_manager = CacheManager::with_config(tempdir.path().to_path_buf(), 10, 3600);
1646 let batch_ops = BatchOperations::new(cache_manager)
1647 .with_parallel(false)
1648 .with_retry_config(2, std::time::Duration::from_millis(500));
1649
1650 assert!(!batch_ops.parallel);
1651 assert_eq!(batch_ops.max_retries, 2);
1652 }
1653
1654 #[test]
1655 fn test_selective_cleanup() {
1656 let tempdir = TempDir::new().expect("Operation failed");
1657 let cache_manager = CacheManager::with_config(tempdir.path().to_path_buf(), 10, 3600);
1658 let batch_ops = BatchOperations::new(cache_manager);
1659
1660 let test_data = vec![0u8; 100];
1662 batch_ops
1663 .cache
1664 .cache
1665 .write_cached("test1.csv", &test_data)
1666 .expect("Test: cache operation failed");
1667 batch_ops
1668 .cache
1669 .cache
1670 .write_cached("test2.csv", &test_data)
1671 .expect("Test: cache operation failed");
1672 batch_ops
1673 .cache
1674 .cache
1675 .write_cached("data.json", &test_data)
1676 .expect("Test: cache operation failed");
1677
1678 let result = batch_ops
1680 .selective_cleanup(&["*.csv"], None)
1681 .expect("Operation failed");
1682
1683 assert_eq!(result.success_count, 2); assert!(!batch_ops.cache.is_cached("test1.csv"));
1685 assert!(!batch_ops.cache.is_cached("test2.csv"));
1686 assert!(batch_ops.cache.is_cached("data.json")); }
1688
1689 #[test]
1690 fn test_batch_process() {
1691 let tempdir = TempDir::new().expect("Operation failed");
1692 let cache_manager = CacheManager::with_config(tempdir.path().to_path_buf(), 10, 3600);
1693 let batch_ops = BatchOperations::new(cache_manager).with_parallel(false);
1694
1695 let test_data1 = vec![1u8; 100];
1697 let test_data2 = vec![2u8; 200];
1698 batch_ops
1699 .cache
1700 .cache
1701 .write_cached("file1.dat", &test_data1)
1702 .expect("Test: cache operation failed");
1703 batch_ops
1704 .cache
1705 .cache
1706 .write_cached("file2.dat", &test_data2)
1707 .expect("Test: cache operation failed");
1708
1709 let files = vec!["file1.dat".to_string(), "file2.dat".to_string()];
1710
1711 let result = batch_ops.batch_process(&files, |_name, data| {
1713 if data.is_empty() {
1714 Err("Empty file")
1715 } else {
1716 Ok(data.len())
1717 }
1718 });
1719
1720 assert_eq!(result.success_count, 2);
1721 assert_eq!(result.failure_count, 0);
1722 assert_eq!(result.total_bytes, 300); }
1724
1725 #[test]
1726 fn test_get_cache_statistics() {
1727 let tempdir = TempDir::new().expect("Operation failed");
1728 let cache_manager = CacheManager::with_config(tempdir.path().to_path_buf(), 10, 3600);
1729 let batch_ops = BatchOperations::new(cache_manager);
1730
1731 let result = batch_ops.get_cache_statistics().expect("Operation failed");
1733 assert_eq!(result.success_count, 0);
1734
1735 let test_data = vec![0u8; 500];
1737 batch_ops
1738 .cache
1739 .cache
1740 .write_cached("test1.dat", &test_data)
1741 .expect("Test: cache operation failed");
1742 batch_ops
1743 .cache
1744 .cache
1745 .write_cached("test2.dat", &test_data)
1746 .expect("Test: cache operation failed");
1747
1748 let result = batch_ops.get_cache_statistics().expect("Operation failed");
1749 assert_eq!(result.success_count, 2);
1750 assert_eq!(result.total_bytes, 1000);
1751 }
1752
1753 #[test]
1754 fn test_matches_glob_pattern() {
1755 assert!(matches_glob_pattern("test.csv", "*"));
1756 assert!(matches_glob_pattern("test.csv", "*.csv"));
1757 assert!(matches_glob_pattern("test.csv", "test.*"));
1758 assert!(matches_glob_pattern("test.csv", "test.csv"));
1759
1760 assert!(!matches_glob_pattern("test.json", "*.csv"));
1761 assert!(!matches_glob_pattern("other.csv", "test.*"));
1762 }
1763
1764 #[test]
1765 fn test_cache_manager_creation() {
1766 let tempdir = TempDir::new().expect("Operation failed");
1767 let manager = CacheManager::with_config(tempdir.path().to_path_buf(), 10, 3600);
1768 let stats = manager.get_stats();
1769 assert_eq!(stats.file_count, 0);
1770 }
1771
1772 #[test]
1773 fn test_cache_stats_formatting() {
1774 let tempdir = TempDir::new().expect("Operation failed");
1775 let stats = CacheStats {
1776 total_size_bytes: 1024,
1777 file_count: 1,
1778 cachedir: tempdir.path().to_path_buf(),
1779 };
1780
1781 assert_eq!(stats.formatted_size(), "1.0 KB");
1782
1783 let stats_large = CacheStats {
1784 total_size_bytes: 1024 * 1024 * 1024,
1785 file_count: 1,
1786 cachedir: tempdir.path().to_path_buf(),
1787 };
1788
1789 assert_eq!(stats_large.formatted_size(), "1.0 GB");
1790 }
1791
1792 #[test]
1793 fn test_hash_file_name() {
1794 let hash1 = DatasetCache::hash_filename("test.csv");
1795 let hash2 = DatasetCache::hash_filename("test.csv");
1796 let hash3 = DatasetCache::hash_filename("different.csv");
1797
1798 assert_eq!(hash1, hash2);
1799 assert_ne!(hash1, hash3);
1800 assert_eq!(hash1.len(), 64); }
1802
1803 #[test]
1804 fn test_platform_cachedir() {
1805 let cachedir = get_platform_cachedir();
1806 assert!(cachedir.is_some() || cfg!(target_os = "unknown"));
1808
1809 if let Some(dir) = cachedir {
1810 assert!(dir.to_string_lossy().contains("scirs2-datasets"));
1811 }
1812 }
1813
1814 #[test]
1815 fn test_cache_size_management() {
1816 let tempdir = TempDir::new().expect("Operation failed");
1817 let cache = DatasetCache::with_full_config(
1818 tempdir.path().to_path_buf(),
1819 10,
1820 3600,
1821 2048, false,
1823 );
1824
1825 let small_data1 = vec![0u8; 400];
1827 cache
1828 .write_cached("small1.dat", &small_data1)
1829 .expect("Operation failed");
1830
1831 let small_data2 = vec![0u8; 400];
1832 cache
1833 .write_cached("small2.dat", &small_data2)
1834 .expect("Operation failed");
1835
1836 let small_data3 = vec![0u8; 400];
1837 cache
1838 .write_cached("small3.dat", &small_data3)
1839 .expect("Operation failed");
1840
1841 let medium_data = vec![0u8; 800];
1843 cache
1844 .write_cached("medium.dat", &medium_data)
1845 .expect("Operation failed");
1846
1847 let stats = cache.get_detailed_stats().expect("Operation failed");
1849 assert!(stats.total_size_bytes <= cache.max_cache_size());
1850
1851 assert!(cache.is_cached("medium.dat"));
1853 }
1854
1855 #[test]
1856 fn test_offline_mode() {
1857 let tempdir = TempDir::new().expect("Operation failed");
1858 let mut cache = DatasetCache::new(tempdir.path().to_path_buf());
1859
1860 assert!(!cache.is_offline());
1861 cache.set_offline_mode(true);
1862 assert!(cache.is_offline());
1863 }
1864
1865 #[test]
1866 fn test_detailed_stats() {
1867 let tempdir = TempDir::new().expect("Operation failed");
1868 let cache = DatasetCache::new(tempdir.path().to_path_buf());
1869
1870 let test_data = vec![1, 2, 3, 4, 5];
1871 cache
1872 .write_cached("test.dat", &test_data)
1873 .expect("Operation failed");
1874
1875 let stats = cache.get_detailed_stats().expect("Operation failed");
1876 assert_eq!(stats.file_count, 1);
1877 assert_eq!(stats.total_size_bytes, test_data.len() as u64);
1878 assert_eq!(stats.files.len(), 1);
1879 assert_eq!(stats.files[0].name, "test.dat");
1880 assert_eq!(stats.files[0].size_bytes, test_data.len() as u64);
1881 }
1882
1883 #[test]
1884 fn test_cache_manager() {
1885 let tempdir = TempDir::new().expect("Operation failed");
1886 let manager = CacheManager::with_config(tempdir.path().to_path_buf(), 10, 3600);
1887
1888 let stats = manager.get_stats();
1889 assert_eq!(stats.file_count, 0);
1890 assert_eq!(stats.total_size_bytes, 0);
1891
1892 assert_eq!(manager.cachedir(), &tempdir.path().to_path_buf());
1893 }
1894
1895 #[test]
1896 fn test_format_bytes() {
1897 assert_eq!(format_bytes(512), "512 B");
1898 assert_eq!(format_bytes(1024), "1.0 KB");
1899 assert_eq!(format_bytes(1024 * 1024), "1.0 MB");
1900 assert_eq!(format_bytes(1024 * 1024 * 1024), "1.0 GB");
1901 }
1902}