1use crate::error::{DatasetsError, Result};
4use scirs2_core::cache::{CacheBuilder, TTLSizedCache};
5use std::cell::RefCell;
6use std::fs::{self, File};
7use std::hash::{Hash, Hasher};
8use std::io::{Read, Write};
9use std::path::{Path, PathBuf};
10
11const CACHE_DIR_NAME: &str = "scirs2-datasets";
13
14const DEFAULT_CACHE_SIZE: usize = 100;
16
17const DEFAULT_CACHE_TTL: u64 = 3600; const DEFAULT_MAX_CACHE_SIZE: u64 = 500 * 1024 * 1024;
22
23const CACHE_DIR_ENV: &str = "SCIRS2_CACHE_DIR";
25
26#[allow(dead_code)]
28pub fn sha256_hash_file(path: &Path) -> std::result::Result<String, String> {
29 use sha2::{Digest, Sha256};
30
31 let mut file = File::open(path).map_err(|e| format!("Failed to open file: {e}"))?;
32 let mut hasher = Sha256::new();
33 let mut buffer = [0; 8192];
34
35 loop {
36 let bytes_read = file
37 .read(&mut buffer)
38 .map_err(|e| format!("Failed to read file: {e}"))?;
39 if bytes_read == 0 {
40 break;
41 }
42 hasher.update(&buffer[..bytes_read]);
43 }
44
45 Ok(format!("{:x}", hasher.finalize()))
46}
47
48pub struct RegistryEntry {
50 pub sha256: &'static str,
52 pub url: &'static str,
54}
55
56#[allow(dead_code)]
66pub fn get_cachedir() -> Result<PathBuf> {
67 if let Ok(cachedir) = std::env::var(CACHE_DIR_ENV) {
69 let cachepath = PathBuf::from(cachedir);
70 ensuredirectory_exists(&cachepath)?;
71 return Ok(cachepath);
72 }
73
74 if let Some(cachedir) = get_platform_cachedir() {
76 ensuredirectory_exists(&cachedir)?;
77 return Ok(cachedir);
78 }
79
80 let homedir = dirs::home_dir()
82 .ok_or_else(|| DatasetsError::CacheError("Could not find home directory".to_string()))?;
83 let cachedir = homedir.join(format!(".{CACHE_DIR_NAME}"));
84 ensuredirectory_exists(&cachedir)?;
85
86 Ok(cachedir)
87}
88
89#[allow(dead_code)]
91fn get_platform_cachedir() -> Option<PathBuf> {
92 #[cfg(target_os = "windows")]
93 {
94 dirs::data_local_dir().map(|dir| dir.join(CACHE_DIR_NAME))
95 }
96 #[cfg(target_os = "macos")]
97 {
98 dirs::home_dir().map(|dir| dir.join("Library").join("Caches").join(CACHE_DIR_NAME))
99 }
100 #[cfg(not(any(target_os = "windows", target_os = "macos")))]
101 {
102 if let Ok(xdg_cache) = std::env::var("XDG_CACHE_HOME") {
104 Some(PathBuf::from(xdg_cache).join(CACHE_DIR_NAME))
105 } else {
106 dirs::home_dir().map(|home| home.join(".cache").join(CACHE_DIR_NAME))
107 }
108 }
109}
110
111#[allow(dead_code)]
113fn ensuredirectory_exists(dir: &Path) -> Result<()> {
114 if !dir.exists() {
115 fs::create_dir_all(dir).map_err(|e| {
116 DatasetsError::CacheError(format!("Failed to create cache directory: {e}"))
117 })?;
118 }
119 Ok(())
120}
121
122#[cfg(feature = "download-sync")]
140#[allow(dead_code)]
141pub fn fetch_data(
142 filename: &str,
143 registry_entry: Option<&RegistryEntry>,
144) -> std::result::Result<PathBuf, String> {
145 let cachedir = match get_cachedir() {
147 Ok(dir) => dir,
148 Err(e) => return Err(format!("Failed to get cache directory: {e}")),
149 };
150
151 let cachepath = cachedir.join(filename);
153 if cachepath.exists() {
154 return Ok(cachepath);
155 }
156
157 let entry = match registry_entry {
159 Some(entry) => entry,
160 None => return Err(format!("No registry entry found for {filename}")),
161 };
162
163 let tempdir = tempfile::tempdir().map_err(|e| format!("Failed to create temp dir: {e}"))?;
165 let temp_file = tempdir.path().join(filename);
166
167 let response = ureq::get(entry.url)
169 .call()
170 .map_err(|e| format!("Failed to download {filename}: {e}"))?;
171
172 let mut body = response.into_body();
174 let bytes = body
175 .read_to_vec()
176 .map_err(|e| format!("Failed to read response body: {e}"))?;
177 let mut file = std::fs::File::create(&temp_file)
178 .map_err(|e| format!("Failed to create temp file: {e}"))?;
179 file.write_all(&bytes)
180 .map_err(|e| format!("Failed to write downloaded file: {e}"))?;
181
182 if !entry.sha256.is_empty() {
184 let computed_hash = sha256_hash_file(&temp_file)?;
185 if computed_hash != entry.sha256 {
186 return Err(format!(
187 "SHA256 hash mismatch for {filename}: expected {}, got {computed_hash}",
188 entry.sha256
189 ));
190 }
191 }
192
193 fs::create_dir_all(&cachedir).map_err(|e| format!("Failed to create cache dir: {e}"))?;
195 if let Some(parent) = cachepath.parent() {
196 fs::create_dir_all(parent).map_err(|e| format!("Failed to create cache dir: {e}"))?;
197 }
198
199 fs::copy(&temp_file, &cachepath).map_err(|e| format!("Failed to copy to cache: {e}"))?;
200
201 Ok(cachepath)
202}
203
204#[cfg(not(feature = "download-sync"))]
206#[allow(dead_code)]
207pub fn fetch_data(
208 _filename: &str,
209 _registry_entry: Option<&RegistryEntry>,
210) -> std::result::Result<PathBuf, String> {
211 Err("Synchronous download feature is disabled. Enable 'download-sync' feature.".to_string())
212}
213
214#[derive(Clone, Debug, Eq, PartialEq, Hash)]
216pub struct CacheKey {
217 name: String,
218 config_hash: String,
219}
220
221impl CacheKey {
222 pub fn new(name: &str, config: &crate::real_world::RealWorldConfig) -> Self {
224 use std::collections::hash_map::DefaultHasher;
225 use std::hash::{Hash, Hasher};
226
227 let mut hasher = DefaultHasher::new();
228 config.use_cache.hash(&mut hasher);
229 config.download_if_missing.hash(&mut hasher);
230 config.return_preprocessed.hash(&mut hasher);
231 config.subset.hash(&mut hasher);
232 config.random_state.hash(&mut hasher);
233
234 Self {
235 name: name.to_string(),
236 config_hash: format!("{:x}", hasher.finish()),
237 }
238 }
239
240 pub fn as_string(&self) -> String {
242 format!("{}_{}", self.name, self.config_hash)
243 }
244}
245
246#[derive(Clone, Debug, Eq, PartialEq)]
248struct FileCacheKey(String);
249
250impl Hash for FileCacheKey {
251 fn hash<H: Hasher>(&self, state: &mut H) {
252 self.0.hash(state);
253 }
254}
255
256pub struct DatasetCache {
261 cachedir: PathBuf,
263 mem_cache: RefCell<TTLSizedCache<FileCacheKey, Vec<u8>>>,
265 max_cache_size: u64,
267 offline_mode: bool,
269}
270
271impl Default for DatasetCache {
272 fn default() -> Self {
273 let cachedir = get_cachedir().expect("Could not get cache directory");
274
275 let mem_cache = RefCell::new(
276 CacheBuilder::new()
277 .with_size(DEFAULT_CACHE_SIZE)
278 .with_ttl(DEFAULT_CACHE_TTL)
279 .build_sized_cache(),
280 );
281
282 let offline_mode = std::env::var("SCIRS2_OFFLINE")
284 .map(|v| v.to_lowercase() == "true" || v == "1")
285 .unwrap_or(false);
286
287 DatasetCache {
288 cachedir,
289 mem_cache,
290 max_cache_size: DEFAULT_MAX_CACHE_SIZE,
291 offline_mode,
292 }
293 }
294}
295
296impl DatasetCache {
297 pub fn new(cachedir: PathBuf) -> Self {
299 let mem_cache = RefCell::new(
300 CacheBuilder::new()
301 .with_size(DEFAULT_CACHE_SIZE)
302 .with_ttl(DEFAULT_CACHE_TTL)
303 .build_sized_cache(),
304 );
305
306 let offline_mode = std::env::var("SCIRS2_OFFLINE")
307 .map(|v| v.to_lowercase() == "true" || v == "1")
308 .unwrap_or(false);
309
310 DatasetCache {
311 cachedir,
312 mem_cache,
313 max_cache_size: DEFAULT_MAX_CACHE_SIZE,
314 offline_mode,
315 }
316 }
317
318 pub fn with_config(cachedir: PathBuf, cache_size: usize, ttl_seconds: u64) -> Self {
320 let mem_cache = RefCell::new(
321 CacheBuilder::new()
322 .with_size(cache_size)
323 .with_ttl(ttl_seconds)
324 .build_sized_cache(),
325 );
326
327 let offline_mode = std::env::var("SCIRS2_OFFLINE")
328 .map(|v| v.to_lowercase() == "true" || v == "1")
329 .unwrap_or(false);
330
331 DatasetCache {
332 cachedir,
333 mem_cache,
334 max_cache_size: DEFAULT_MAX_CACHE_SIZE,
335 offline_mode,
336 }
337 }
338
339 pub fn with_full_config(
341 cachedir: PathBuf,
342 cache_size: usize,
343 ttl_seconds: u64,
344 max_cache_size: u64,
345 offline_mode: bool,
346 ) -> Self {
347 let mem_cache = RefCell::new(
348 CacheBuilder::new()
349 .with_size(cache_size)
350 .with_ttl(ttl_seconds)
351 .build_sized_cache(),
352 );
353
354 DatasetCache {
355 cachedir,
356 mem_cache,
357 max_cache_size,
358 offline_mode,
359 }
360 }
361
362 pub fn ensure_cachedir(&self) -> Result<()> {
364 if !self.cachedir.exists() {
365 fs::create_dir_all(&self.cachedir).map_err(|e| {
366 DatasetsError::CacheError(format!("Failed to create cache directory: {e}"))
367 })?;
368 }
369 Ok(())
370 }
371
372 pub fn get_cachedpath(&self, name: &str) -> PathBuf {
374 self.cachedir.join(name)
375 }
376
377 pub fn is_cached(&self, name: &str) -> bool {
379 let key = FileCacheKey(name.to_string());
381 if self.mem_cache.borrow_mut().get(&key).is_some() {
382 return true;
383 }
384
385 self.get_cachedpath(name).exists()
387 }
388
389 pub fn read_cached(&self, name: &str) -> Result<Vec<u8>> {
394 let key = FileCacheKey(name.to_string());
396 if let Some(data) = self.mem_cache.borrow_mut().get(&key) {
397 return Ok(data);
398 }
399
400 let path = self.get_cachedpath(name);
402 if !path.exists() {
403 return Err(DatasetsError::CacheError(format!(
404 "Cached file does not exist: {name}"
405 )));
406 }
407
408 let mut file = File::open(path)
409 .map_err(|e| DatasetsError::CacheError(format!("Failed to open cached file: {e}")))?;
410
411 let mut buffer = Vec::new();
412 file.read_to_end(&mut buffer)
413 .map_err(|e| DatasetsError::CacheError(format!("Failed to read cached file: {e}")))?;
414
415 self.mem_cache.borrow_mut().insert(key, buffer.clone());
417
418 Ok(buffer)
419 }
420
421 pub fn write_cached(&self, name: &str, data: &[u8]) -> Result<()> {
423 self.ensure_cachedir()?;
424
425 if self.max_cache_size > 0 {
427 let current_size = self.get_cache_size_bytes()?;
428 let new_file_size = data.len() as u64;
429
430 if current_size + new_file_size > self.max_cache_size {
431 self.cleanup_cache_to_fit(new_file_size)?;
432 }
433 }
434
435 let path = self.get_cachedpath(name);
437 let mut file = File::create(path)
438 .map_err(|e| DatasetsError::CacheError(format!("Failed to create cache file: {e}")))?;
439
440 file.write_all(data).map_err(|e| {
441 DatasetsError::CacheError(format!("Failed to write to cache file: {e}"))
442 })?;
443
444 let key = FileCacheKey(name.to_string());
446 self.mem_cache.borrow_mut().insert(key, data.to_vec());
447
448 Ok(())
449 }
450
451 pub fn clear_cache(&self) -> Result<()> {
453 if self.cachedir.exists() {
455 fs::remove_dir_all(&self.cachedir)
456 .map_err(|e| DatasetsError::CacheError(format!("Failed to clear cache: {e}")))?;
457 }
458
459 self.mem_cache.borrow_mut().clear();
461
462 Ok(())
463 }
464
465 pub fn remove_cached(&self, name: &str) -> Result<()> {
467 let path = self.get_cachedpath(name);
469 if path.exists() {
470 fs::remove_file(path).map_err(|e| {
471 DatasetsError::CacheError(format!("Failed to remove cached file: {e}"))
472 })?;
473 }
474
475 let key = FileCacheKey(name.to_string());
477 self.mem_cache.borrow_mut().remove(&key);
478
479 Ok(())
480 }
481
482 pub fn hash_filename(name: &str) -> String {
484 let hash = blake3::hash(name.as_bytes());
485 hash.to_hex().to_string()
486 }
487
488 pub fn get_cache_size_bytes(&self) -> Result<u64> {
490 let mut total_size = 0u64;
491
492 if self.cachedir.exists() {
493 let entries = fs::read_dir(&self.cachedir).map_err(|e| {
494 DatasetsError::CacheError(format!("Failed to read cache directory: {e}"))
495 })?;
496
497 for entry in entries {
498 let entry = entry.map_err(|e| {
499 DatasetsError::CacheError(format!("Failed to read directory entry: {e}"))
500 })?;
501
502 if let Ok(metadata) = entry.metadata() {
503 if metadata.is_file() {
504 total_size += metadata.len();
505 }
506 }
507 }
508 }
509
510 Ok(total_size)
511 }
512
513 fn cleanup_cache_to_fit(&self, needed_size: u64) -> Result<()> {
518 if self.max_cache_size == 0 {
519 return Ok(()); }
521
522 let current_size = self.get_cache_size_bytes()?;
523 let target_size = (self.max_cache_size as f64 * 0.8) as u64; let total_needed = current_size + needed_size;
525
526 if total_needed <= target_size {
527 return Ok(()); }
529
530 let size_to_free = total_needed - target_size;
531
532 let mut files_with_times = Vec::new();
534
535 if self.cachedir.exists() {
536 let entries = fs::read_dir(&self.cachedir).map_err(|e| {
537 DatasetsError::CacheError(format!("Failed to read cache directory: {e}"))
538 })?;
539
540 for entry in entries {
541 let entry = entry.map_err(|e| {
542 DatasetsError::CacheError(format!("Failed to read directory entry: {e}"))
543 })?;
544
545 if let Ok(metadata) = entry.metadata() {
546 if metadata.is_file() {
547 if let Ok(modified) = metadata.modified() {
548 files_with_times.push((entry.path(), metadata.len(), modified));
549 }
550 }
551 }
552 }
553 }
554
555 files_with_times.sort_by_key(|(_path, _size, modified)| *modified);
557
558 let mut freed_size = 0u64;
560 for (path, size, _modified) in files_with_times {
561 if freed_size >= size_to_free {
562 break;
563 }
564
565 if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
567 let key = FileCacheKey(filename.to_string());
568 self.mem_cache.borrow_mut().remove(&key);
569 }
570
571 if let Err(e) = fs::remove_file(&path) {
573 eprintln!("Warning: Failed to remove cache file {path:?}: {e}");
574 } else {
575 freed_size += size;
576 }
577 }
578
579 Ok(())
580 }
581
582 pub fn set_offline_mode(&mut self, offline: bool) {
584 self.offline_mode = offline;
585 }
586
587 pub fn is_offline(&self) -> bool {
589 self.offline_mode
590 }
591
592 pub fn set_max_cache_size(&mut self, max_size: u64) {
594 self.max_cache_size = max_size;
595 }
596
597 pub fn max_cache_size(&self) -> u64 {
599 self.max_cache_size
600 }
601
602 pub fn put(&self, name: &str, data: &[u8]) -> Result<()> {
604 self.write_cached(name, data)
605 }
606
607 pub fn get_detailed_stats(&self) -> Result<DetailedCacheStats> {
609 let mut total_size = 0u64;
610 let mut file_count = 0usize;
611 let mut files = Vec::new();
612
613 if self.cachedir.exists() {
614 let entries = fs::read_dir(&self.cachedir).map_err(|e| {
615 DatasetsError::CacheError(format!("Failed to read cache directory: {e}"))
616 })?;
617
618 for entry in entries {
619 let entry = entry.map_err(|e| {
620 DatasetsError::CacheError(format!("Failed to read directory entry: {e}"))
621 })?;
622
623 if let Ok(metadata) = entry.metadata() {
624 if metadata.is_file() {
625 let size = metadata.len();
626 total_size += size;
627 file_count += 1;
628
629 if let Some(filename) = entry.file_name().to_str() {
630 files.push(CacheFileInfo {
631 name: filename.to_string(),
632 size_bytes: size,
633 modified: metadata.modified().ok(),
634 });
635 }
636 }
637 }
638 }
639 }
640
641 files.sort_by(|a, b| b.size_bytes.cmp(&a.size_bytes));
643
644 Ok(DetailedCacheStats {
645 total_size_bytes: total_size,
646 file_count,
647 cachedir: self.cachedir.clone(),
648 max_cache_size: self.max_cache_size,
649 offline_mode: self.offline_mode,
650 files,
651 })
652 }
653}
654
655#[cfg(feature = "download")]
657#[allow(dead_code)]
658pub fn download_data(_url: &str, force_download: bool) -> Result<Vec<u8>> {
659 let cache = DatasetCache::default();
660 let cache_key = DatasetCache::hash_filename(_url);
661
662 if !force_download && cache.is_cached(&cache_key) {
664 return cache.read_cached(&cache_key);
665 }
666
667 let response = reqwest::blocking::get(_url).map_err(|e| {
669 DatasetsError::DownloadError(format!("Failed to download from {_url}: {e}"))
670 })?;
671
672 if !response.status().is_success() {
673 return Err(DatasetsError::DownloadError(format!(
674 "Failed to download from {_url}: HTTP status {}",
675 response.status()
676 )));
677 }
678
679 let data = response
680 .bytes()
681 .map_err(|e| DatasetsError::DownloadError(format!("Failed to read response data: {e}")))?;
682
683 let data_vec = data.to_vec();
684
685 cache.write_cached(&cache_key, &data_vec)?;
687
688 Ok(data_vec)
689}
690
691#[cfg(not(feature = "download"))]
693#[allow(dead_code)]
707pub fn download_data(_url: &str, _force_download: bool) -> Result<Vec<u8>> {
708 Err(DatasetsError::Other(
709 "Download feature is not enabled. Recompile with --features download".to_string(),
710 ))
711}
712
713pub struct CacheManager {
715 cache: DatasetCache,
716}
717
718impl CacheManager {
719 pub fn new() -> Result<Self> {
721 let cachedir = get_cachedir()?;
722 Ok(Self {
723 cache: DatasetCache::with_config(cachedir, DEFAULT_CACHE_SIZE, DEFAULT_CACHE_TTL),
724 })
725 }
726
727 pub fn with_config(cachedir: PathBuf, cache_size: usize, ttl_seconds: u64) -> Self {
729 Self {
730 cache: DatasetCache::with_config(cachedir, cache_size, ttl_seconds),
731 }
732 }
733
734 pub fn get(&self, key: &CacheKey) -> Result<Option<crate::utils::Dataset>> {
736 let name = key.as_string();
737 if self.cache.is_cached(&name) {
738 match self.cache.read_cached(&name) {
739 Ok(cached_data) => {
740 match serde_json::from_slice::<crate::utils::Dataset>(&cached_data) {
741 Ok(dataset) => Ok(Some(dataset)),
742 Err(e) => {
743 self.cache
745 .mem_cache
746 .borrow_mut()
747 .remove(&FileCacheKey(name.clone()));
748 Err(DatasetsError::CacheError(format!(
749 "Failed to deserialize cached dataset: {e}"
750 )))
751 }
752 }
753 }
754 Err(e) => Err(DatasetsError::CacheError(format!(
755 "Failed to read cached data: {e}"
756 ))),
757 }
758 } else {
759 Ok(None)
760 }
761 }
762
763 pub fn put(&self, key: &CacheKey, dataset: &crate::utils::Dataset) -> Result<()> {
765 let name = key.as_string();
766
767 let serialized = serde_json::to_vec(dataset)
769 .map_err(|e| DatasetsError::CacheError(format!("Failed to serialize dataset: {e}")))?;
770
771 self.cache
773 .write_cached(&name, &serialized)
774 .map_err(|e| DatasetsError::CacheError(format!("Failed to write to cache: {e}")))
775 }
776
777 pub fn with_full_config(
779 cachedir: PathBuf,
780 cache_size: usize,
781 ttl_seconds: u64,
782 max_cache_size: u64,
783 offline_mode: bool,
784 ) -> Self {
785 Self {
786 cache: DatasetCache::with_full_config(
787 cachedir,
788 cache_size,
789 ttl_seconds,
790 max_cache_size,
791 offline_mode,
792 ),
793 }
794 }
795
796 pub fn get_stats(&self) -> CacheStats {
798 let cachedir = &self.cache.cachedir;
799 let mut total_size = 0u64;
800 let mut file_count = 0usize;
801
802 if cachedir.exists() {
803 if let Ok(entries) = fs::read_dir(cachedir) {
804 for entry in entries.flatten() {
805 if let Ok(metadata) = entry.metadata() {
806 if metadata.is_file() {
807 total_size += metadata.len();
808 file_count += 1;
809 }
810 }
811 }
812 }
813 }
814
815 CacheStats {
816 total_size_bytes: total_size,
817 file_count,
818 cachedir: cachedir.clone(),
819 }
820 }
821
822 pub fn get_detailed_stats(&self) -> Result<DetailedCacheStats> {
824 self.cache.get_detailed_stats()
825 }
826
827 pub fn set_offline_mode(&mut self, offline: bool) {
829 self.cache.set_offline_mode(offline);
830 }
831
832 pub fn is_offline(&self) -> bool {
834 self.cache.is_offline()
835 }
836
837 pub fn set_max_cache_size(&mut self, max_size: u64) {
839 self.cache.set_max_cache_size(max_size);
840 }
841
842 pub fn max_cache_size(&self) -> u64 {
844 self.cache.max_cache_size()
845 }
846
847 pub fn clear_all(&self) -> Result<()> {
849 self.cache.clear_cache()
850 }
851
852 pub fn remove(&self, name: &str) -> Result<()> {
854 self.cache.remove_cached(name)
855 }
856
857 pub fn cleanup_old_files(&self, target_size: u64) -> Result<()> {
859 self.cache.cleanup_cache_to_fit(target_size)
860 }
861
862 pub fn list_cached_files(&self) -> Result<Vec<String>> {
864 let cachedir = &self.cache.cachedir;
865 let mut files = Vec::new();
866
867 if cachedir.exists() {
868 let entries = fs::read_dir(cachedir).map_err(|e| {
869 DatasetsError::CacheError(format!("Failed to read cache directory: {e}"))
870 })?;
871
872 for entry in entries {
873 let entry = entry.map_err(|e| {
874 DatasetsError::CacheError(format!("Failed to read directory entry: {e}"))
875 })?;
876
877 if let Some(filename) = entry.file_name().to_str() {
878 files.push(filename.to_string());
879 }
880 }
881 }
882
883 files.sort();
884 Ok(files)
885 }
886
887 pub fn cachedir(&self) -> &PathBuf {
889 &self.cache.cachedir
890 }
891
892 pub fn is_cached(&self, name: &str) -> bool {
894 self.cache.is_cached(name)
895 }
896
897 pub fn print_cache_report(&self) -> Result<()> {
899 let stats = self.get_detailed_stats()?;
900
901 println!("=== Cache Report ===");
902 println!("Cache Directory: {}", stats.cachedir.display());
903 println!(
904 "Total Size: {} ({} files)",
905 stats.formatted_size(),
906 stats.file_count
907 );
908 println!("Max Size: {}", stats.formatted_max_size());
909
910 if stats.max_cache_size > 0 {
911 println!("Usage: {:.1}%", stats.usage_percentage() * 100.0);
912 }
913
914 println!(
915 "Offline Mode: {}",
916 if stats.offline_mode {
917 "Enabled"
918 } else {
919 "Disabled"
920 }
921 );
922
923 if !stats.files.is_empty() {
924 println!("\nCached Files:");
925 for file in &stats.files {
926 println!(
927 " {} - {} ({})",
928 file.name,
929 file.formatted_size(),
930 file.formatted_modified()
931 );
932 }
933 }
934
935 Ok(())
936 }
937}
938
939pub struct CacheStats {
941 pub total_size_bytes: u64,
943 pub file_count: usize,
945 pub cachedir: PathBuf,
947}
948
949pub struct DetailedCacheStats {
951 pub total_size_bytes: u64,
953 pub file_count: usize,
955 pub cachedir: PathBuf,
957 pub max_cache_size: u64,
959 pub offline_mode: bool,
961 pub files: Vec<CacheFileInfo>,
963}
964
965#[derive(Debug, Clone)]
967pub struct CacheFileInfo {
968 pub name: String,
970 pub size_bytes: u64,
972 pub modified: Option<std::time::SystemTime>,
974}
975
976impl CacheStats {
977 pub fn formatted_size(&self) -> String {
979 format_bytes(self.total_size_bytes)
980 }
981}
982
983impl DetailedCacheStats {
984 pub fn formatted_size(&self) -> String {
986 format_bytes(self.total_size_bytes)
987 }
988
989 pub fn formatted_max_size(&self) -> String {
991 if self.max_cache_size == 0 {
992 "Unlimited".to_string()
993 } else {
994 format_bytes(self.max_cache_size)
995 }
996 }
997
998 pub fn usage_percentage(&self) -> f64 {
1000 if self.max_cache_size == 0 {
1001 0.0
1002 } else {
1003 self.total_size_bytes as f64 / self.max_cache_size as f64
1004 }
1005 }
1006}
1007
1008impl CacheFileInfo {
1009 pub fn formatted_size(&self) -> String {
1011 format_bytes(self.size_bytes)
1012 }
1013
1014 pub fn formatted_modified(&self) -> String {
1016 match &self.modified {
1017 Some(time) => {
1018 if let Ok(now) = std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH)
1019 {
1020 if let Ok(modified) = time.duration_since(std::time::UNIX_EPOCH) {
1021 let diff_secs = now.as_secs().saturating_sub(modified.as_secs());
1022 let days = diff_secs / 86400;
1023 let hours = (diff_secs % 86400) / 3600;
1024 let mins = (diff_secs % 3600) / 60;
1025
1026 if days > 0 {
1027 format!("{days} days ago")
1028 } else if hours > 0 {
1029 format!("{hours} hours ago")
1030 } else if mins > 0 {
1031 format!("{mins} minutes ago")
1032 } else {
1033 "Just now".to_string()
1034 }
1035 } else {
1036 "Unknown".to_string()
1037 }
1038 } else {
1039 "Unknown".to_string()
1040 }
1041 }
1042 None => "Unknown".to_string(),
1043 }
1044 }
1045}
1046
1047#[allow(dead_code)]
1049fn format_bytes(bytes: u64) -> String {
1050 let size = bytes as f64;
1051 if size < 1024.0 {
1052 format!("{size} B")
1053 } else if size < 1024.0 * 1024.0 {
1054 format!("{:.1} KB", size / 1024.0)
1055 } else if size < 1024.0 * 1024.0 * 1024.0 {
1056 format!("{:.1} MB", size / (1024.0 * 1024.0))
1057 } else {
1058 format!("{:.1} GB", size / (1024.0 * 1024.0 * 1024.0))
1059 }
1060}
1061
1062#[derive(Debug, Clone)]
1064pub struct BatchResult {
1065 pub success_count: usize,
1067 pub failure_count: usize,
1069 pub failures: Vec<(String, String)>,
1071 pub total_bytes: u64,
1073 pub elapsed_time: std::time::Duration,
1075}
1076
1077impl BatchResult {
1078 pub fn new() -> Self {
1080 Self {
1081 success_count: 0,
1082 failure_count: 0,
1083 failures: Vec::new(),
1084 total_bytes: 0,
1085 elapsed_time: std::time::Duration::ZERO,
1086 }
1087 }
1088
1089 pub fn is_all_success(&self) -> bool {
1091 self.failure_count == 0
1092 }
1093
1094 pub fn success_rate(&self) -> f64 {
1096 let total = self.success_count + self.failure_count;
1097 if total == 0 {
1098 0.0
1099 } else {
1100 (self.success_count as f64 / total as f64) * 100.0
1101 }
1102 }
1103
1104 pub fn summary(&self) -> String {
1106 format!(
1107 "Batch completed: {}/{} successful ({:.1}%), {} bytes processed in {:.2}s",
1108 self.success_count,
1109 self.success_count + self.failure_count,
1110 self.success_rate(),
1111 format_bytes(self.total_bytes),
1112 self.elapsed_time.as_secs_f64()
1113 )
1114 }
1115}
1116
1117impl Default for BatchResult {
1118 fn default() -> Self {
1119 Self::new()
1120 }
1121}
1122
1123pub struct BatchOperations {
1125 cache: CacheManager,
1126 parallel: bool,
1127 max_retries: usize,
1128 retry_delay: std::time::Duration,
1129}
1130
1131impl BatchOperations {
1132 pub fn new(cache: CacheManager) -> Self {
1134 Self {
1135 cache,
1136 parallel: true,
1137 max_retries: 3,
1138 retry_delay: std::time::Duration::from_millis(1000),
1139 }
1140 }
1141
1142 pub fn with_parallel(mut self, parallel: bool) -> Self {
1144 self.parallel = parallel;
1145 self
1146 }
1147
1148 pub fn with_retry_config(
1150 mut self,
1151 max_retries: usize,
1152 retry_delay: std::time::Duration,
1153 ) -> Self {
1154 self.max_retries = max_retries;
1155 self.retry_delay = retry_delay;
1156 self
1157 }
1158
1159 #[cfg(feature = "download")]
1161 pub fn batch_download(&self, urls_andnames: &[(&str, &str)]) -> BatchResult {
1162 let start_time = std::time::Instant::now();
1163 let mut result = BatchResult::new();
1164
1165 if self.parallel {
1166 self.batch_download_parallel(urls_andnames, &mut result)
1167 } else {
1168 self.batch_download_sequential(urls_andnames, &mut result)
1169 }
1170
1171 result.elapsed_time = start_time.elapsed();
1172 result
1173 }
1174
1175 #[cfg(feature = "download")]
1176 fn batch_download_parallel(&self, urls_andnames: &[(&str, &str)], result: &mut BatchResult) {
1177 use std::fs::File;
1178 use std::io::Write;
1179 use std::sync::{Arc, Mutex};
1180 use std::thread;
1181
1182 if let Err(e) = self.cache.cache.ensure_cachedir() {
1184 result.failure_count += urls_andnames.len();
1185 for &(_, name) in urls_andnames {
1186 result
1187 .failures
1188 .push((name.to_string(), format!("Cache setup failed: {e}")));
1189 }
1190 return;
1191 }
1192
1193 let result_arc = Arc::new(Mutex::new(BatchResult::new()));
1194 let cachedir = self.cache.cache.cachedir.clone();
1195 let max_retries = self.max_retries;
1196 let retry_delay = self.retry_delay;
1197
1198 let handles: Vec<_> = urls_andnames
1199 .iter()
1200 .map(|&(url, name)| {
1201 let result_clone = Arc::clone(&result_arc);
1202 let url = url.to_string();
1203 let name = name.to_string();
1204 let cachedir = cachedir.clone();
1205
1206 thread::spawn(move || {
1207 let mut success = false;
1208 let mut last_error = String::new();
1209 let mut downloaded_data = Vec::new();
1210
1211 for attempt in 0..=max_retries {
1212 match download_data(&url, false) {
1213 Ok(data) => {
1214 let path = cachedir.join(&name);
1216 match File::create(&path) {
1217 Ok(mut file) => match file.write_all(&data) {
1218 Ok(_) => {
1219 let mut r =
1220 result_clone.lock().expect("Operation failed");
1221 r.success_count += 1;
1222 r.total_bytes += data.len() as u64;
1223 downloaded_data = data;
1224 success = true;
1225 break;
1226 }
1227 Err(e) => {
1228 last_error = format!("Failed to write cache file: {e}");
1229 }
1230 },
1231 Err(e) => {
1232 last_error = format!("Failed to create cache file: {e}");
1233 }
1234 }
1235 }
1236 Err(e) => {
1237 last_error = format!("Download failed: {e}");
1238 if attempt < max_retries {
1239 thread::sleep(retry_delay);
1240 }
1241 }
1242 }
1243 }
1244
1245 if !success {
1246 let mut r = result_clone.lock().expect("Operation failed");
1247 r.failure_count += 1;
1248 r.failures.push((name.clone(), last_error));
1249 }
1250
1251 (name, success, downloaded_data)
1252 })
1253 })
1254 .collect();
1255
1256 let mut successful_downloads = Vec::new();
1258 for handle in handles {
1259 if let Ok((name, success, data)) = handle.join() {
1260 if success && !data.is_empty() {
1261 successful_downloads.push((name, data));
1262 }
1263 }
1264 }
1265
1266 if let Ok(arc_result) = result_arc.lock() {
1268 result.success_count += arc_result.success_count;
1269 result.failure_count += arc_result.failure_count;
1270 result.failures.extend(arc_result.failures.clone());
1271 }
1272
1273 for (name, data) in successful_downloads {
1275 let key = FileCacheKey(name);
1276 self.cache.cache.mem_cache.borrow_mut().insert(key, data);
1277 }
1278 }
1279
1280 #[cfg(feature = "download")]
1281 fn batch_download_sequential(&self, urls_andnames: &[(&str, &str)], result: &mut BatchResult) {
1282 for &(url, name) in urls_andnames {
1283 let mut success = false;
1284 let mut last_error = String::new();
1285
1286 for attempt in 0..=self.max_retries {
1287 match download_data(url, false) {
1288 Ok(data) => match self.cache.cache.write_cached(name, &data) {
1289 Ok(_) => {
1290 result.success_count += 1;
1291 result.total_bytes += data.len() as u64;
1292 success = true;
1293 break;
1294 }
1295 Err(e) => {
1296 last_error = format!("Cache write failed: {e}");
1297 }
1298 },
1299 Err(e) => {
1300 last_error = format!("Download failed: {e}");
1301 if attempt < self.max_retries {
1302 std::thread::sleep(self.retry_delay);
1303 }
1304 }
1305 }
1306 }
1307
1308 if !success {
1309 result.failure_count += 1;
1310 result.failures.push((name.to_string(), last_error));
1311 }
1312 }
1313 }
1314
1315 pub fn batch_verify_integrity(&self, files_andhashes: &[(&str, &str)]) -> BatchResult {
1317 let start_time = std::time::Instant::now();
1318 let mut result = BatchResult::new();
1319
1320 for &(filename, expected_hash) in files_andhashes {
1321 match self.cache.cache.get_cachedpath(filename).exists() {
1322 true => match sha256_hash_file(&self.cache.cache.get_cachedpath(filename)) {
1323 Ok(actual_hash) => {
1324 if actual_hash == expected_hash {
1325 result.success_count += 1;
1326 if let Ok(metadata) =
1327 std::fs::metadata(self.cache.cache.get_cachedpath(filename))
1328 {
1329 result.total_bytes += metadata.len();
1330 }
1331 } else {
1332 result.failure_count += 1;
1333 result.failures.push((
1334 filename.to_string(),
1335 format!(
1336 "Hash mismatch: expected {expected_hash}, got {actual_hash}"
1337 ),
1338 ));
1339 }
1340 }
1341 Err(e) => {
1342 result.failure_count += 1;
1343 result.failures.push((
1344 filename.to_string(),
1345 format!("Hash computation failed: {e}"),
1346 ));
1347 }
1348 },
1349 false => {
1350 result.failure_count += 1;
1351 result
1352 .failures
1353 .push((filename.to_string(), "File not found in cache".to_string()));
1354 }
1355 }
1356 }
1357
1358 result.elapsed_time = start_time.elapsed();
1359 result
1360 }
1361
1362 pub fn selective_cleanup(
1364 &self,
1365 patterns: &[&str],
1366 max_age_days: Option<u32>,
1367 ) -> Result<BatchResult> {
1368 let start_time = std::time::Instant::now();
1369 let mut result = BatchResult::new();
1370
1371 let cached_files = self.cache.list_cached_files()?;
1372 let now = std::time::SystemTime::now();
1373
1374 for filename in cached_files {
1375 let should_remove = patterns.iter().any(|pattern| {
1376 filename.contains(pattern) || matches_glob_pattern(&filename, pattern)
1377 });
1378
1379 if should_remove {
1380 let filepath = self.cache.cache.get_cachedpath(&filename);
1381
1382 let remove_due_to_age = if let Some(max_age) = max_age_days {
1384 if let Ok(metadata) = std::fs::metadata(&filepath) {
1385 if let Ok(modified) = metadata.modified() {
1386 if let Ok(age) = now.duration_since(modified) {
1387 age.as_secs() > (max_age as u64 * 24 * 3600)
1388 } else {
1389 false
1390 }
1391 } else {
1392 false
1393 }
1394 } else {
1395 false
1396 }
1397 } else {
1398 true };
1400
1401 if remove_due_to_age {
1402 match self.cache.remove(&filename) {
1403 Ok(_) => {
1404 result.success_count += 1;
1405 if let Ok(metadata) = std::fs::metadata(&filepath) {
1406 result.total_bytes += metadata.len();
1407 }
1408 }
1409 Err(e) => {
1410 result.failure_count += 1;
1411 result
1412 .failures
1413 .push((filename, format!("Removal failed: {e}")));
1414 }
1415 }
1416 }
1417 }
1418 }
1419
1420 result.elapsed_time = start_time.elapsed();
1421 Ok(result)
1422 }
1423
1424 pub fn batch_process<F, T, E>(&self, names: &[String], processor: F) -> BatchResult
1426 where
1427 F: Fn(&str, &[u8]) -> std::result::Result<T, E> + Sync + Send + 'static,
1428 E: std::fmt::Display,
1429 T: Send,
1430 {
1431 let start_time = std::time::Instant::now();
1432 let mut result = BatchResult::new();
1433
1434 if self.parallel {
1435 self.batch_process_parallel(names, processor, &mut result)
1436 } else {
1437 self.batch_process_sequential(names, processor, &mut result)
1438 }
1439
1440 result.elapsed_time = start_time.elapsed();
1441 result
1442 }
1443
1444 fn batch_process_parallel<F, T, E>(
1445 &self,
1446 names: &[String],
1447 processor: F,
1448 result: &mut BatchResult,
1449 ) where
1450 F: Fn(&str, &[u8]) -> std::result::Result<T, E> + Sync + Send + 'static,
1451 E: std::fmt::Display,
1452 T: Send,
1453 {
1454 let mut data_pairs = Vec::new();
1457
1458 for name in names {
1460 match self.cache.cache.read_cached(name) {
1461 Ok(data) => data_pairs.push((name.clone(), data)),
1462 Err(e) => {
1463 result.failure_count += 1;
1464 result
1465 .failures
1466 .push((name.clone(), format!("Cache read failed: {e}")));
1467 }
1468 }
1469 }
1470
1471 if !data_pairs.is_empty() {
1473 use std::sync::{Arc, Mutex};
1474 use std::thread;
1475
1476 let parallel_result = Arc::new(Mutex::new(BatchResult::new()));
1477 let processor = Arc::new(processor);
1478
1479 let handles: Vec<_> = data_pairs
1480 .into_iter()
1481 .map(|(name, data)| {
1482 let result_clone = Arc::clone(¶llel_result);
1483 let processor_clone = Arc::clone(&processor);
1484
1485 thread::spawn(move || match processor_clone(&name, &data) {
1486 Ok(_) => {
1487 let mut r = result_clone.lock().expect("Operation failed");
1488 r.success_count += 1;
1489 r.total_bytes += data.len() as u64;
1490 }
1491 Err(e) => {
1492 let mut r = result_clone.lock().expect("Operation failed");
1493 r.failure_count += 1;
1494 r.failures.push((name, format!("Processing failed: {e}")));
1495 }
1496 })
1497 })
1498 .collect();
1499
1500 for handle in handles {
1501 let _ = handle.join();
1502 }
1503
1504 let parallel_result = parallel_result.lock().expect("Operation failed");
1506 result.success_count += parallel_result.success_count;
1507 result.failure_count += parallel_result.failure_count;
1508 result.total_bytes += parallel_result.total_bytes;
1509 result.failures.extend(parallel_result.failures.clone());
1510 }
1511 }
1512
1513 fn batch_process_sequential<F, T, E>(
1514 &self,
1515 names: &[String],
1516 processor: F,
1517 result: &mut BatchResult,
1518 ) where
1519 F: Fn(&str, &[u8]) -> std::result::Result<T, E>,
1520 E: std::fmt::Display,
1521 {
1522 for name in names {
1523 match self.cache.cache.read_cached(name) {
1524 Ok(data) => match processor(name, &data) {
1525 Ok(_) => {
1526 result.success_count += 1;
1527 result.total_bytes += data.len() as u64;
1528 }
1529 Err(e) => {
1530 result.failure_count += 1;
1531 result
1532 .failures
1533 .push((name.clone(), format!("Processing failed: {e}")));
1534 }
1535 },
1536 Err(e) => {
1537 result.failure_count += 1;
1538 result
1539 .failures
1540 .push((name.clone(), format!("Cache read failed: {e}")));
1541 }
1542 }
1543 }
1544 }
1545
1546 pub fn cache_manager(&self) -> &CacheManager {
1548 &self.cache
1549 }
1550
1551 pub fn write_cached(&self, name: &str, data: &[u8]) -> Result<()> {
1553 self.cache.cache.write_cached(name, data)
1554 }
1555
1556 pub fn read_cached(&self, name: &str) -> Result<Vec<u8>> {
1558 self.cache.cache.read_cached(name)
1559 }
1560
1561 pub fn list_cached_files(&self) -> Result<Vec<String>> {
1563 self.cache.list_cached_files()
1564 }
1565
1566 pub fn print_cache_report(&self) -> Result<()> {
1568 self.cache.print_cache_report()
1569 }
1570
1571 pub fn get_cache_statistics(&self) -> Result<BatchResult> {
1573 let start_time = std::time::Instant::now();
1574 let mut result = BatchResult::new();
1575
1576 let cached_files = self.cache.list_cached_files()?;
1577
1578 for filename in cached_files {
1579 let filepath = self.cache.cache.get_cachedpath(&filename);
1580 match std::fs::metadata(&filepath) {
1581 Ok(metadata) => {
1582 result.success_count += 1;
1583 result.total_bytes += metadata.len();
1584 }
1585 Err(e) => {
1586 result.failure_count += 1;
1587 result
1588 .failures
1589 .push((filename, format!("Metadata read failed: {e}")));
1590 }
1591 }
1592 }
1593
1594 result.elapsed_time = start_time.elapsed();
1595 Ok(result)
1596 }
1597}
1598
1599#[allow(dead_code)]
1601fn matches_glob_pattern(filename: &str, pattern: &str) -> bool {
1602 if pattern == "*" {
1603 return true;
1604 }
1605
1606 if pattern.contains('*') {
1607 let parts: Vec<&str> = pattern.split('*').collect();
1608 if parts.len() == 2 {
1609 let prefix = parts[0];
1610 let suffix = parts[1];
1611 return filename.starts_with(prefix) && filename.ends_with(suffix);
1612 }
1613 }
1614
1615 filename == pattern
1616}
1617
1618#[cfg(test)]
1619mod tests {
1620 use super::*;
1621 use tempfile::TempDir;
1622
1623 #[test]
1624 fn test_batch_result() {
1625 let mut result = BatchResult::new();
1626 assert_eq!(result.success_count, 0);
1627 assert_eq!(result.failure_count, 0);
1628 assert!(result.is_all_success());
1629 assert_eq!(result.success_rate(), 0.0);
1630
1631 result.success_count = 8;
1632 result.failure_count = 2;
1633 result.total_bytes = 1024;
1634
1635 assert!(!result.is_all_success());
1636 assert_eq!(result.success_rate(), 80.0);
1637 assert!(result.summary().contains("8/10 successful"));
1638 assert!(result.summary().contains("80.0%"));
1639 }
1640
1641 #[test]
1642 fn test_batch_operations_creation() {
1643 let tempdir = TempDir::new().expect("Operation failed");
1644 let cache_manager = CacheManager::with_config(tempdir.path().to_path_buf(), 10, 3600);
1645 let batch_ops = BatchOperations::new(cache_manager)
1646 .with_parallel(false)
1647 .with_retry_config(2, std::time::Duration::from_millis(500));
1648
1649 assert!(!batch_ops.parallel);
1650 assert_eq!(batch_ops.max_retries, 2);
1651 }
1652
1653 #[test]
1654 fn test_selective_cleanup() {
1655 let tempdir = TempDir::new().expect("Operation failed");
1656 let cache_manager = CacheManager::with_config(tempdir.path().to_path_buf(), 10, 3600);
1657 let batch_ops = BatchOperations::new(cache_manager);
1658
1659 let test_data = vec![0u8; 100];
1661 batch_ops
1662 .cache
1663 .cache
1664 .write_cached("test1.csv", &test_data)
1665 .expect("Test: cache operation failed");
1666 batch_ops
1667 .cache
1668 .cache
1669 .write_cached("test2.csv", &test_data)
1670 .expect("Test: cache operation failed");
1671 batch_ops
1672 .cache
1673 .cache
1674 .write_cached("data.json", &test_data)
1675 .expect("Test: cache operation failed");
1676
1677 let result = batch_ops
1679 .selective_cleanup(&["*.csv"], None)
1680 .expect("Operation failed");
1681
1682 assert_eq!(result.success_count, 2); assert!(!batch_ops.cache.is_cached("test1.csv"));
1684 assert!(!batch_ops.cache.is_cached("test2.csv"));
1685 assert!(batch_ops.cache.is_cached("data.json")); }
1687
1688 #[test]
1689 fn test_batch_process() {
1690 let tempdir = TempDir::new().expect("Operation failed");
1691 let cache_manager = CacheManager::with_config(tempdir.path().to_path_buf(), 10, 3600);
1692 let batch_ops = BatchOperations::new(cache_manager).with_parallel(false);
1693
1694 let test_data1 = vec![1u8; 100];
1696 let test_data2 = vec![2u8; 200];
1697 batch_ops
1698 .cache
1699 .cache
1700 .write_cached("file1.dat", &test_data1)
1701 .expect("Test: cache operation failed");
1702 batch_ops
1703 .cache
1704 .cache
1705 .write_cached("file2.dat", &test_data2)
1706 .expect("Test: cache operation failed");
1707
1708 let files = vec!["file1.dat".to_string(), "file2.dat".to_string()];
1709
1710 let result = batch_ops.batch_process(&files, |_name, data| {
1712 if data.is_empty() {
1713 Err("Empty file")
1714 } else {
1715 Ok(data.len())
1716 }
1717 });
1718
1719 assert_eq!(result.success_count, 2);
1720 assert_eq!(result.failure_count, 0);
1721 assert_eq!(result.total_bytes, 300); }
1723
1724 #[test]
1725 fn test_get_cache_statistics() {
1726 let tempdir = TempDir::new().expect("Operation failed");
1727 let cache_manager = CacheManager::with_config(tempdir.path().to_path_buf(), 10, 3600);
1728 let batch_ops = BatchOperations::new(cache_manager);
1729
1730 let result = batch_ops.get_cache_statistics().expect("Operation failed");
1732 assert_eq!(result.success_count, 0);
1733
1734 let test_data = vec![0u8; 500];
1736 batch_ops
1737 .cache
1738 .cache
1739 .write_cached("test1.dat", &test_data)
1740 .expect("Test: cache operation failed");
1741 batch_ops
1742 .cache
1743 .cache
1744 .write_cached("test2.dat", &test_data)
1745 .expect("Test: cache operation failed");
1746
1747 let result = batch_ops.get_cache_statistics().expect("Operation failed");
1748 assert_eq!(result.success_count, 2);
1749 assert_eq!(result.total_bytes, 1000);
1750 }
1751
1752 #[test]
1753 fn test_matches_glob_pattern() {
1754 assert!(matches_glob_pattern("test.csv", "*"));
1755 assert!(matches_glob_pattern("test.csv", "*.csv"));
1756 assert!(matches_glob_pattern("test.csv", "test.*"));
1757 assert!(matches_glob_pattern("test.csv", "test.csv"));
1758
1759 assert!(!matches_glob_pattern("test.json", "*.csv"));
1760 assert!(!matches_glob_pattern("other.csv", "test.*"));
1761 }
1762
1763 #[test]
1764 fn test_cache_manager_creation() {
1765 let tempdir = TempDir::new().expect("Operation failed");
1766 let manager = CacheManager::with_config(tempdir.path().to_path_buf(), 10, 3600);
1767 let stats = manager.get_stats();
1768 assert_eq!(stats.file_count, 0);
1769 }
1770
1771 #[test]
1772 fn test_cache_stats_formatting() {
1773 let tempdir = TempDir::new().expect("Operation failed");
1774 let stats = CacheStats {
1775 total_size_bytes: 1024,
1776 file_count: 1,
1777 cachedir: tempdir.path().to_path_buf(),
1778 };
1779
1780 assert_eq!(stats.formatted_size(), "1.0 KB");
1781
1782 let stats_large = CacheStats {
1783 total_size_bytes: 1024 * 1024 * 1024,
1784 file_count: 1,
1785 cachedir: tempdir.path().to_path_buf(),
1786 };
1787
1788 assert_eq!(stats_large.formatted_size(), "1.0 GB");
1789 }
1790
1791 #[test]
1792 fn test_hash_file_name() {
1793 let hash1 = DatasetCache::hash_filename("test.csv");
1794 let hash2 = DatasetCache::hash_filename("test.csv");
1795 let hash3 = DatasetCache::hash_filename("different.csv");
1796
1797 assert_eq!(hash1, hash2);
1798 assert_ne!(hash1, hash3);
1799 assert_eq!(hash1.len(), 64); }
1801
1802 #[test]
1803 fn test_platform_cachedir() {
1804 let cachedir = get_platform_cachedir();
1805 assert!(cachedir.is_some() || cfg!(target_os = "unknown"));
1807
1808 if let Some(dir) = cachedir {
1809 assert!(dir.to_string_lossy().contains("scirs2-datasets"));
1810 }
1811 }
1812
1813 #[test]
1814 fn test_cache_size_management() {
1815 let tempdir = TempDir::new().expect("Operation failed");
1816 let cache = DatasetCache::with_full_config(
1817 tempdir.path().to_path_buf(),
1818 10,
1819 3600,
1820 2048, false,
1822 );
1823
1824 let small_data1 = vec![0u8; 400];
1826 cache
1827 .write_cached("small1.dat", &small_data1)
1828 .expect("Operation failed");
1829
1830 let small_data2 = vec![0u8; 400];
1831 cache
1832 .write_cached("small2.dat", &small_data2)
1833 .expect("Operation failed");
1834
1835 let small_data3 = vec![0u8; 400];
1836 cache
1837 .write_cached("small3.dat", &small_data3)
1838 .expect("Operation failed");
1839
1840 let medium_data = vec![0u8; 800];
1842 cache
1843 .write_cached("medium.dat", &medium_data)
1844 .expect("Operation failed");
1845
1846 let stats = cache.get_detailed_stats().expect("Operation failed");
1848 assert!(stats.total_size_bytes <= cache.max_cache_size());
1849
1850 assert!(cache.is_cached("medium.dat"));
1852 }
1853
1854 #[test]
1855 fn test_offline_mode() {
1856 let tempdir = TempDir::new().expect("Operation failed");
1857 let mut cache = DatasetCache::new(tempdir.path().to_path_buf());
1858
1859 assert!(!cache.is_offline());
1860 cache.set_offline_mode(true);
1861 assert!(cache.is_offline());
1862 }
1863
1864 #[test]
1865 fn test_detailed_stats() {
1866 let tempdir = TempDir::new().expect("Operation failed");
1867 let cache = DatasetCache::new(tempdir.path().to_path_buf());
1868
1869 let test_data = vec![1, 2, 3, 4, 5];
1870 cache
1871 .write_cached("test.dat", &test_data)
1872 .expect("Operation failed");
1873
1874 let stats = cache.get_detailed_stats().expect("Operation failed");
1875 assert_eq!(stats.file_count, 1);
1876 assert_eq!(stats.total_size_bytes, test_data.len() as u64);
1877 assert_eq!(stats.files.len(), 1);
1878 assert_eq!(stats.files[0].name, "test.dat");
1879 assert_eq!(stats.files[0].size_bytes, test_data.len() as u64);
1880 }
1881
1882 #[test]
1883 fn test_cache_manager() {
1884 let tempdir = TempDir::new().expect("Operation failed");
1885 let manager = CacheManager::with_config(tempdir.path().to_path_buf(), 10, 3600);
1886
1887 let stats = manager.get_stats();
1888 assert_eq!(stats.file_count, 0);
1889 assert_eq!(stats.total_size_bytes, 0);
1890
1891 assert_eq!(manager.cachedir(), &tempdir.path().to_path_buf());
1892 }
1893
1894 #[test]
1895 fn test_format_bytes() {
1896 assert_eq!(format_bytes(512), "512 B");
1897 assert_eq!(format_bytes(1024), "1.0 KB");
1898 assert_eq!(format_bytes(1024 * 1024), "1.0 MB");
1899 assert_eq!(format_bytes(1024 * 1024 * 1024), "1.0 GB");
1900 }
1901}