1use crate::error::{DatasetsError, Result};
4use scirs2_core::cache::{CacheBuilder, TTLSizedCache};
5use std::cell::RefCell;
6use std::fs::{self, File};
7use std::hash::{Hash, Hasher};
8use std::io::{Read, Write};
9use std::path::{Path, PathBuf};
10
11const CACHE_DIR_NAME: &str = "scirs2-datasets";
13
14const DEFAULT_CACHE_SIZE: usize = 100;
16
17const DEFAULT_CACHE_TTL: u64 = 3600; const DEFAULT_MAX_CACHE_SIZE: u64 = 500 * 1024 * 1024;
22
23const CACHE_DIR_ENV: &str = "SCIRS2_CACHE_DIR";
25
26#[allow(dead_code)]
28pub fn sha256_hash_file(path: &Path) -> std::result::Result<String, String> {
29 use sha2::{Digest, Sha256};
30
31 let mut file = File::open(path).map_err(|e| format!("Failed to open file: {e}"))?;
32 let mut hasher = Sha256::new();
33 let mut buffer = [0; 8192];
34
35 loop {
36 let bytes_read = file
37 .read(&mut buffer)
38 .map_err(|e| format!("Failed to read file: {e}"))?;
39 if bytes_read == 0 {
40 break;
41 }
42 hasher.update(&buffer[..bytes_read]);
43 }
44
45 Ok(format!("{:x}", hasher.finalize()))
46}
47
48pub struct RegistryEntry {
50 pub sha256: &'static str,
52 pub url: &'static str,
54}
55
56#[allow(dead_code)]
66pub fn get_cachedir() -> Result<PathBuf> {
67 if let Ok(cachedir) = std::env::var(CACHE_DIR_ENV) {
69 let cachepath = PathBuf::from(cachedir);
70 ensuredirectory_exists(&cachepath)?;
71 return Ok(cachepath);
72 }
73
74 if let Some(cachedir) = get_platform_cachedir() {
76 ensuredirectory_exists(&cachedir)?;
77 return Ok(cachedir);
78 }
79
80 let homedir = dirs::home_dir()
82 .ok_or_else(|| DatasetsError::CacheError("Could not find home directory".to_string()))?;
83 let cachedir = homedir.join(format!(".{CACHE_DIR_NAME}"));
84 ensuredirectory_exists(&cachedir)?;
85
86 Ok(cachedir)
87}
88
89#[allow(dead_code)]
91fn get_platform_cachedir() -> Option<PathBuf> {
92 #[cfg(target_os = "windows")]
93 {
94 dirs::data_local_dir().map(|dir| dir.join(CACHE_DIR_NAME))
95 }
96 #[cfg(target_os = "macos")]
97 {
98 dirs::home_dir().map(|dir| dir.join("Library").join("Caches").join(CACHE_DIR_NAME))
99 }
100 #[cfg(not(any(target_os = "windows", target_os = "macos")))]
101 {
102 if let Ok(xdg_cache) = std::env::var("XDG_CACHE_HOME") {
104 Some(PathBuf::from(xdg_cache).join(CACHE_DIR_NAME))
105 } else {
106 dirs::home_dir().map(|home| home.join(".cache").join(CACHE_DIR_NAME))
107 }
108 }
109}
110
111#[allow(dead_code)]
113fn ensuredirectory_exists(dir: &Path) -> Result<()> {
114 if !dir.exists() {
115 fs::create_dir_all(dir).map_err(|e| {
116 DatasetsError::CacheError(format!("Failed to create cache directory: {e}"))
117 })?;
118 }
119 Ok(())
120}
121
122#[allow(dead_code)]
140pub fn fetch_data(
141 filename: &str,
142 registry_entry: Option<&RegistryEntry>,
143) -> std::result::Result<PathBuf, String> {
144 let cachedir = match get_cachedir() {
146 Ok(dir) => dir,
147 Err(e) => return Err(format!("Failed to get cache directory: {e}")),
148 };
149
150 let cachepath = cachedir.join(filename);
152 if cachepath.exists() {
153 return Ok(cachepath);
154 }
155
156 let entry = match registry_entry {
158 Some(entry) => entry,
159 None => return Err(format!("No registry entry found for {filename}")),
160 };
161
162 let tempdir = tempfile::tempdir().map_err(|e| format!("Failed to create temp dir: {e}"))?;
164 let temp_file = tempdir.path().join(filename);
165
166 let response = ureq::get(entry.url)
168 .call()
169 .map_err(|e| format!("Failed to download {filename}: {e}"))?;
170
171 let mut body = response.into_body();
173 let bytes = body
174 .read_to_vec()
175 .map_err(|e| format!("Failed to read response body: {e}"))?;
176 let mut file = std::fs::File::create(&temp_file)
177 .map_err(|e| format!("Failed to create temp file: {e}"))?;
178 file.write_all(&bytes)
179 .map_err(|e| format!("Failed to write downloaded file: {e}"))?;
180
181 if !entry.sha256.is_empty() {
183 let computed_hash = sha256_hash_file(&temp_file)?;
184 if computed_hash != entry.sha256 {
185 return Err(format!(
186 "SHA256 hash mismatch for {filename}: expected {}, got {computed_hash}",
187 entry.sha256
188 ));
189 }
190 }
191
192 fs::create_dir_all(&cachedir).map_err(|e| format!("Failed to create cache dir: {e}"))?;
194 if let Some(parent) = cachepath.parent() {
195 fs::create_dir_all(parent).map_err(|e| format!("Failed to create cache dir: {e}"))?;
196 }
197
198 fs::copy(&temp_file, &cachepath).map_err(|e| format!("Failed to copy to cache: {e}"))?;
199
200 Ok(cachepath)
201}
202
203#[derive(Clone, Debug, Eq, PartialEq, Hash)]
205pub struct CacheKey {
206 name: String,
207 config_hash: String,
208}
209
210impl CacheKey {
211 pub fn new(name: &str, config: &crate::real_world::RealWorldConfig) -> Self {
213 use std::collections::hash_map::DefaultHasher;
214 use std::hash::{Hash, Hasher};
215
216 let mut hasher = DefaultHasher::new();
217 config.use_cache.hash(&mut hasher);
218 config.download_if_missing.hash(&mut hasher);
219 config.return_preprocessed.hash(&mut hasher);
220 config.subset.hash(&mut hasher);
221 config.random_state.hash(&mut hasher);
222
223 Self {
224 name: name.to_string(),
225 config_hash: format!("{:x}", hasher.finish()),
226 }
227 }
228
229 pub fn as_string(&self) -> String {
231 format!("{}_{}", self.name, self.config_hash)
232 }
233}
234
235#[derive(Clone, Debug, Eq, PartialEq)]
237struct FileCacheKey(String);
238
239impl Hash for FileCacheKey {
240 fn hash<H: Hasher>(&self, state: &mut H) {
241 self.0.hash(state);
242 }
243}
244
245pub struct DatasetCache {
250 cachedir: PathBuf,
252 mem_cache: RefCell<TTLSizedCache<FileCacheKey, Vec<u8>>>,
254 max_cache_size: u64,
256 offline_mode: bool,
258}
259
260impl Default for DatasetCache {
261 fn default() -> Self {
262 let cachedir = get_cachedir().expect("Could not get cache directory");
263
264 let mem_cache = RefCell::new(
265 CacheBuilder::new()
266 .with_size(DEFAULT_CACHE_SIZE)
267 .with_ttl(DEFAULT_CACHE_TTL)
268 .build_sized_cache(),
269 );
270
271 let offline_mode = std::env::var("SCIRS2_OFFLINE")
273 .map(|v| v.to_lowercase() == "true" || v == "1")
274 .unwrap_or(false);
275
276 DatasetCache {
277 cachedir,
278 mem_cache,
279 max_cache_size: DEFAULT_MAX_CACHE_SIZE,
280 offline_mode,
281 }
282 }
283}
284
285impl DatasetCache {
286 pub fn new(cachedir: PathBuf) -> Self {
288 let mem_cache = RefCell::new(
289 CacheBuilder::new()
290 .with_size(DEFAULT_CACHE_SIZE)
291 .with_ttl(DEFAULT_CACHE_TTL)
292 .build_sized_cache(),
293 );
294
295 let offline_mode = std::env::var("SCIRS2_OFFLINE")
296 .map(|v| v.to_lowercase() == "true" || v == "1")
297 .unwrap_or(false);
298
299 DatasetCache {
300 cachedir,
301 mem_cache,
302 max_cache_size: DEFAULT_MAX_CACHE_SIZE,
303 offline_mode,
304 }
305 }
306
307 pub fn with_config(cachedir: PathBuf, cache_size: usize, ttl_seconds: u64) -> Self {
309 let mem_cache = RefCell::new(
310 CacheBuilder::new()
311 .with_size(cache_size)
312 .with_ttl(ttl_seconds)
313 .build_sized_cache(),
314 );
315
316 let offline_mode = std::env::var("SCIRS2_OFFLINE")
317 .map(|v| v.to_lowercase() == "true" || v == "1")
318 .unwrap_or(false);
319
320 DatasetCache {
321 cachedir,
322 mem_cache,
323 max_cache_size: DEFAULT_MAX_CACHE_SIZE,
324 offline_mode,
325 }
326 }
327
328 pub fn with_full_config(
330 cachedir: PathBuf,
331 cache_size: usize,
332 ttl_seconds: u64,
333 max_cache_size: u64,
334 offline_mode: bool,
335 ) -> Self {
336 let mem_cache = RefCell::new(
337 CacheBuilder::new()
338 .with_size(cache_size)
339 .with_ttl(ttl_seconds)
340 .build_sized_cache(),
341 );
342
343 DatasetCache {
344 cachedir,
345 mem_cache,
346 max_cache_size,
347 offline_mode,
348 }
349 }
350
351 pub fn ensure_cachedir(&self) -> Result<()> {
353 if !self.cachedir.exists() {
354 fs::create_dir_all(&self.cachedir).map_err(|e| {
355 DatasetsError::CacheError(format!("Failed to create cache directory: {e}"))
356 })?;
357 }
358 Ok(())
359 }
360
361 pub fn get_cachedpath(&self, name: &str) -> PathBuf {
363 self.cachedir.join(name)
364 }
365
366 pub fn is_cached(&self, name: &str) -> bool {
368 let key = FileCacheKey(name.to_string());
370 if self.mem_cache.borrow_mut().get(&key).is_some() {
371 return true;
372 }
373
374 self.get_cachedpath(name).exists()
376 }
377
378 pub fn read_cached(&self, name: &str) -> Result<Vec<u8>> {
383 let key = FileCacheKey(name.to_string());
385 if let Some(data) = self.mem_cache.borrow_mut().get(&key) {
386 return Ok(data);
387 }
388
389 let path = self.get_cachedpath(name);
391 if !path.exists() {
392 return Err(DatasetsError::CacheError(format!(
393 "Cached file does not exist: {name}"
394 )));
395 }
396
397 let mut file = File::open(path)
398 .map_err(|e| DatasetsError::CacheError(format!("Failed to open cached file: {e}")))?;
399
400 let mut buffer = Vec::new();
401 file.read_to_end(&mut buffer)
402 .map_err(|e| DatasetsError::CacheError(format!("Failed to read cached file: {e}")))?;
403
404 self.mem_cache.borrow_mut().insert(key, buffer.clone());
406
407 Ok(buffer)
408 }
409
410 pub fn write_cached(&self, name: &str, data: &[u8]) -> Result<()> {
412 self.ensure_cachedir()?;
413
414 if self.max_cache_size > 0 {
416 let current_size = self.get_cache_size_bytes()?;
417 let new_file_size = data.len() as u64;
418
419 if current_size + new_file_size > self.max_cache_size {
420 self.cleanup_cache_to_fit(new_file_size)?;
421 }
422 }
423
424 let path = self.get_cachedpath(name);
426 let mut file = File::create(path)
427 .map_err(|e| DatasetsError::CacheError(format!("Failed to create cache file: {e}")))?;
428
429 file.write_all(data).map_err(|e| {
430 DatasetsError::CacheError(format!("Failed to write to cache file: {e}"))
431 })?;
432
433 let key = FileCacheKey(name.to_string());
435 self.mem_cache.borrow_mut().insert(key, data.to_vec());
436
437 Ok(())
438 }
439
440 pub fn clear_cache(&self) -> Result<()> {
442 if self.cachedir.exists() {
444 fs::remove_dir_all(&self.cachedir)
445 .map_err(|e| DatasetsError::CacheError(format!("Failed to clear cache: {e}")))?;
446 }
447
448 self.mem_cache.borrow_mut().clear();
450
451 Ok(())
452 }
453
454 pub fn remove_cached(&self, name: &str) -> Result<()> {
456 let path = self.get_cachedpath(name);
458 if path.exists() {
459 fs::remove_file(path).map_err(|e| {
460 DatasetsError::CacheError(format!("Failed to remove cached file: {e}"))
461 })?;
462 }
463
464 let key = FileCacheKey(name.to_string());
466 self.mem_cache.borrow_mut().remove(&key);
467
468 Ok(())
469 }
470
471 pub fn hash_filename(name: &str) -> String {
473 let hash = blake3::hash(name.as_bytes());
474 hash.to_hex().to_string()
475 }
476
477 pub fn get_cache_size_bytes(&self) -> Result<u64> {
479 let mut total_size = 0u64;
480
481 if self.cachedir.exists() {
482 let entries = fs::read_dir(&self.cachedir).map_err(|e| {
483 DatasetsError::CacheError(format!("Failed to read cache directory: {e}"))
484 })?;
485
486 for entry in entries {
487 let entry = entry.map_err(|e| {
488 DatasetsError::CacheError(format!("Failed to read directory entry: {e}"))
489 })?;
490
491 if let Ok(metadata) = entry.metadata() {
492 if metadata.is_file() {
493 total_size += metadata.len();
494 }
495 }
496 }
497 }
498
499 Ok(total_size)
500 }
501
502 fn cleanup_cache_to_fit(&self, needed_size: u64) -> Result<()> {
507 if self.max_cache_size == 0 {
508 return Ok(()); }
510
511 let current_size = self.get_cache_size_bytes()?;
512 let target_size = (self.max_cache_size as f64 * 0.8) as u64; let total_needed = current_size + needed_size;
514
515 if total_needed <= target_size {
516 return Ok(()); }
518
519 let size_to_free = total_needed - target_size;
520
521 let mut files_with_times = Vec::new();
523
524 if self.cachedir.exists() {
525 let entries = fs::read_dir(&self.cachedir).map_err(|e| {
526 DatasetsError::CacheError(format!("Failed to read cache directory: {e}"))
527 })?;
528
529 for entry in entries {
530 let entry = entry.map_err(|e| {
531 DatasetsError::CacheError(format!("Failed to read directory entry: {e}"))
532 })?;
533
534 if let Ok(metadata) = entry.metadata() {
535 if metadata.is_file() {
536 if let Ok(modified) = metadata.modified() {
537 files_with_times.push((entry.path(), metadata.len(), modified));
538 }
539 }
540 }
541 }
542 }
543
544 files_with_times.sort_by_key(|(_path, _size, modified)| *modified);
546
547 let mut freed_size = 0u64;
549 for (path, size, _modified) in files_with_times {
550 if freed_size >= size_to_free {
551 break;
552 }
553
554 if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
556 let key = FileCacheKey(filename.to_string());
557 self.mem_cache.borrow_mut().remove(&key);
558 }
559
560 if let Err(e) = fs::remove_file(&path) {
562 eprintln!("Warning: Failed to remove cache file {path:?}: {e}");
563 } else {
564 freed_size += size;
565 }
566 }
567
568 Ok(())
569 }
570
571 pub fn set_offline_mode(&mut self, offline: bool) {
573 self.offline_mode = offline;
574 }
575
576 pub fn is_offline(&self) -> bool {
578 self.offline_mode
579 }
580
581 pub fn set_max_cache_size(&mut self, max_size: u64) {
583 self.max_cache_size = max_size;
584 }
585
586 pub fn max_cache_size(&self) -> u64 {
588 self.max_cache_size
589 }
590
591 pub fn put(&self, name: &str, data: &[u8]) -> Result<()> {
593 self.write_cached(name, data)
594 }
595
596 pub fn get_detailed_stats(&self) -> Result<DetailedCacheStats> {
598 let mut total_size = 0u64;
599 let mut file_count = 0usize;
600 let mut files = Vec::new();
601
602 if self.cachedir.exists() {
603 let entries = fs::read_dir(&self.cachedir).map_err(|e| {
604 DatasetsError::CacheError(format!("Failed to read cache directory: {e}"))
605 })?;
606
607 for entry in entries {
608 let entry = entry.map_err(|e| {
609 DatasetsError::CacheError(format!("Failed to read directory entry: {e}"))
610 })?;
611
612 if let Ok(metadata) = entry.metadata() {
613 if metadata.is_file() {
614 let size = metadata.len();
615 total_size += size;
616 file_count += 1;
617
618 if let Some(filename) = entry.file_name().to_str() {
619 files.push(CacheFileInfo {
620 name: filename.to_string(),
621 size_bytes: size,
622 modified: metadata.modified().ok(),
623 });
624 }
625 }
626 }
627 }
628 }
629
630 files.sort_by(|a, b| b.size_bytes.cmp(&a.size_bytes));
632
633 Ok(DetailedCacheStats {
634 total_size_bytes: total_size,
635 file_count,
636 cachedir: self.cachedir.clone(),
637 max_cache_size: self.max_cache_size,
638 offline_mode: self.offline_mode,
639 files,
640 })
641 }
642}
643
644#[cfg(feature = "download")]
646#[allow(dead_code)]
647pub fn download_data(_url: &str, force_download: bool) -> Result<Vec<u8>> {
648 let cache = DatasetCache::default();
649 let cache_key = DatasetCache::hash_filename(_url);
650
651 if !force_download && cache.is_cached(&cache_key) {
653 return cache.read_cached(&cache_key);
654 }
655
656 let response = reqwest::blocking::get(_url).map_err(|e| {
658 DatasetsError::DownloadError(format!("Failed to download from {_url}: {e}"))
659 })?;
660
661 if !response.status().is_success() {
662 return Err(DatasetsError::DownloadError(format!(
663 "Failed to download from {_url}: HTTP status {}",
664 response.status()
665 )));
666 }
667
668 let data = response
669 .bytes()
670 .map_err(|e| DatasetsError::DownloadError(format!("Failed to read response data: {e}")))?;
671
672 let data_vec = data.to_vec();
673
674 cache.write_cached(&cache_key, &data_vec)?;
676
677 Ok(data_vec)
678}
679
680#[cfg(not(feature = "download"))]
682#[allow(dead_code)]
696pub fn download_data(_url: &str, _force_download: bool) -> Result<Vec<u8>> {
697 Err(DatasetsError::Other(
698 "Download feature is not enabled. Recompile with --features download".to_string(),
699 ))
700}
701
702pub struct CacheManager {
704 cache: DatasetCache,
705}
706
707impl CacheManager {
708 pub fn new() -> Result<Self> {
710 let cachedir = get_cachedir()?;
711 Ok(Self {
712 cache: DatasetCache::with_config(cachedir, DEFAULT_CACHE_SIZE, DEFAULT_CACHE_TTL),
713 })
714 }
715
716 pub fn with_config(cachedir: PathBuf, cache_size: usize, ttl_seconds: u64) -> Self {
718 Self {
719 cache: DatasetCache::with_config(cachedir, cache_size, ttl_seconds),
720 }
721 }
722
723 pub fn get(&self, key: &CacheKey) -> Result<Option<crate::utils::Dataset>> {
725 let name = key.as_string();
726 if self.cache.is_cached(&name) {
727 match self.cache.read_cached(&name) {
728 Ok(cached_data) => {
729 match serde_json::from_slice::<crate::utils::Dataset>(&cached_data) {
730 Ok(dataset) => Ok(Some(dataset)),
731 Err(e) => {
732 self.cache
734 .mem_cache
735 .borrow_mut()
736 .remove(&FileCacheKey(name.clone()));
737 Err(DatasetsError::CacheError(format!(
738 "Failed to deserialize cached dataset: {e}"
739 )))
740 }
741 }
742 }
743 Err(e) => Err(DatasetsError::CacheError(format!(
744 "Failed to read cached data: {e}"
745 ))),
746 }
747 } else {
748 Ok(None)
749 }
750 }
751
752 pub fn put(&self, key: &CacheKey, dataset: &crate::utils::Dataset) -> Result<()> {
754 let name = key.as_string();
755
756 let serialized = serde_json::to_vec(dataset)
758 .map_err(|e| DatasetsError::CacheError(format!("Failed to serialize dataset: {e}")))?;
759
760 self.cache
762 .write_cached(&name, &serialized)
763 .map_err(|e| DatasetsError::CacheError(format!("Failed to write to cache: {e}")))
764 }
765
766 pub fn with_full_config(
768 cachedir: PathBuf,
769 cache_size: usize,
770 ttl_seconds: u64,
771 max_cache_size: u64,
772 offline_mode: bool,
773 ) -> Self {
774 Self {
775 cache: DatasetCache::with_full_config(
776 cachedir,
777 cache_size,
778 ttl_seconds,
779 max_cache_size,
780 offline_mode,
781 ),
782 }
783 }
784
785 pub fn get_stats(&self) -> CacheStats {
787 let cachedir = &self.cache.cachedir;
788 let mut total_size = 0u64;
789 let mut file_count = 0usize;
790
791 if cachedir.exists() {
792 if let Ok(entries) = fs::read_dir(cachedir) {
793 for entry in entries.flatten() {
794 if let Ok(metadata) = entry.metadata() {
795 if metadata.is_file() {
796 total_size += metadata.len();
797 file_count += 1;
798 }
799 }
800 }
801 }
802 }
803
804 CacheStats {
805 total_size_bytes: total_size,
806 file_count,
807 cachedir: cachedir.clone(),
808 }
809 }
810
811 pub fn get_detailed_stats(&self) -> Result<DetailedCacheStats> {
813 self.cache.get_detailed_stats()
814 }
815
816 pub fn set_offline_mode(&mut self, offline: bool) {
818 self.cache.set_offline_mode(offline);
819 }
820
821 pub fn is_offline(&self) -> bool {
823 self.cache.is_offline()
824 }
825
826 pub fn set_max_cache_size(&mut self, max_size: u64) {
828 self.cache.set_max_cache_size(max_size);
829 }
830
831 pub fn max_cache_size(&self) -> u64 {
833 self.cache.max_cache_size()
834 }
835
836 pub fn clear_all(&self) -> Result<()> {
838 self.cache.clear_cache()
839 }
840
841 pub fn remove(&self, name: &str) -> Result<()> {
843 self.cache.remove_cached(name)
844 }
845
846 pub fn cleanup_old_files(&self, target_size: u64) -> Result<()> {
848 self.cache.cleanup_cache_to_fit(target_size)
849 }
850
851 pub fn list_cached_files(&self) -> Result<Vec<String>> {
853 let cachedir = &self.cache.cachedir;
854 let mut files = Vec::new();
855
856 if cachedir.exists() {
857 let entries = fs::read_dir(cachedir).map_err(|e| {
858 DatasetsError::CacheError(format!("Failed to read cache directory: {e}"))
859 })?;
860
861 for entry in entries {
862 let entry = entry.map_err(|e| {
863 DatasetsError::CacheError(format!("Failed to read directory entry: {e}"))
864 })?;
865
866 if let Some(filename) = entry.file_name().to_str() {
867 files.push(filename.to_string());
868 }
869 }
870 }
871
872 files.sort();
873 Ok(files)
874 }
875
876 pub fn cachedir(&self) -> &PathBuf {
878 &self.cache.cachedir
879 }
880
881 pub fn is_cached(&self, name: &str) -> bool {
883 self.cache.is_cached(name)
884 }
885
886 pub fn print_cache_report(&self) -> Result<()> {
888 let stats = self.get_detailed_stats()?;
889
890 println!("=== Cache Report ===");
891 println!("Cache Directory: {}", stats.cachedir.display());
892 println!(
893 "Total Size: {} ({} files)",
894 stats.formatted_size(),
895 stats.file_count
896 );
897 println!("Max Size: {}", stats.formatted_max_size());
898
899 if stats.max_cache_size > 0 {
900 println!("Usage: {:.1}%", stats.usage_percentage() * 100.0);
901 }
902
903 println!(
904 "Offline Mode: {}",
905 if stats.offline_mode {
906 "Enabled"
907 } else {
908 "Disabled"
909 }
910 );
911
912 if !stats.files.is_empty() {
913 println!("\nCached Files:");
914 for file in &stats.files {
915 println!(
916 " {} - {} ({})",
917 file.name,
918 file.formatted_size(),
919 file.formatted_modified()
920 );
921 }
922 }
923
924 Ok(())
925 }
926}
927
928pub struct CacheStats {
930 pub total_size_bytes: u64,
932 pub file_count: usize,
934 pub cachedir: PathBuf,
936}
937
938pub struct DetailedCacheStats {
940 pub total_size_bytes: u64,
942 pub file_count: usize,
944 pub cachedir: PathBuf,
946 pub max_cache_size: u64,
948 pub offline_mode: bool,
950 pub files: Vec<CacheFileInfo>,
952}
953
954#[derive(Debug, Clone)]
956pub struct CacheFileInfo {
957 pub name: String,
959 pub size_bytes: u64,
961 pub modified: Option<std::time::SystemTime>,
963}
964
965impl CacheStats {
966 pub fn formatted_size(&self) -> String {
968 format_bytes(self.total_size_bytes)
969 }
970}
971
972impl DetailedCacheStats {
973 pub fn formatted_size(&self) -> String {
975 format_bytes(self.total_size_bytes)
976 }
977
978 pub fn formatted_max_size(&self) -> String {
980 if self.max_cache_size == 0 {
981 "Unlimited".to_string()
982 } else {
983 format_bytes(self.max_cache_size)
984 }
985 }
986
987 pub fn usage_percentage(&self) -> f64 {
989 if self.max_cache_size == 0 {
990 0.0
991 } else {
992 self.total_size_bytes as f64 / self.max_cache_size as f64
993 }
994 }
995}
996
997impl CacheFileInfo {
998 pub fn formatted_size(&self) -> String {
1000 format_bytes(self.size_bytes)
1001 }
1002
1003 pub fn formatted_modified(&self) -> String {
1005 match &self.modified {
1006 Some(time) => {
1007 if let Ok(now) = std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH)
1008 {
1009 if let Ok(modified) = time.duration_since(std::time::UNIX_EPOCH) {
1010 let diff_secs = now.as_secs().saturating_sub(modified.as_secs());
1011 let days = diff_secs / 86400;
1012 let hours = (diff_secs % 86400) / 3600;
1013 let mins = (diff_secs % 3600) / 60;
1014
1015 if days > 0 {
1016 format!("{days} days ago")
1017 } else if hours > 0 {
1018 format!("{hours} hours ago")
1019 } else if mins > 0 {
1020 format!("{mins} minutes ago")
1021 } else {
1022 "Just now".to_string()
1023 }
1024 } else {
1025 "Unknown".to_string()
1026 }
1027 } else {
1028 "Unknown".to_string()
1029 }
1030 }
1031 None => "Unknown".to_string(),
1032 }
1033 }
1034}
1035
1036#[allow(dead_code)]
1038fn format_bytes(bytes: u64) -> String {
1039 let size = bytes as f64;
1040 if size < 1024.0 {
1041 format!("{size} B")
1042 } else if size < 1024.0 * 1024.0 {
1043 format!("{:.1} KB", size / 1024.0)
1044 } else if size < 1024.0 * 1024.0 * 1024.0 {
1045 format!("{:.1} MB", size / (1024.0 * 1024.0))
1046 } else {
1047 format!("{:.1} GB", size / (1024.0 * 1024.0 * 1024.0))
1048 }
1049}
1050
1051#[derive(Debug, Clone)]
1053pub struct BatchResult {
1054 pub success_count: usize,
1056 pub failure_count: usize,
1058 pub failures: Vec<(String, String)>,
1060 pub total_bytes: u64,
1062 pub elapsed_time: std::time::Duration,
1064}
1065
1066impl BatchResult {
1067 pub fn new() -> Self {
1069 Self {
1070 success_count: 0,
1071 failure_count: 0,
1072 failures: Vec::new(),
1073 total_bytes: 0,
1074 elapsed_time: std::time::Duration::ZERO,
1075 }
1076 }
1077
1078 pub fn is_all_success(&self) -> bool {
1080 self.failure_count == 0
1081 }
1082
1083 pub fn success_rate(&self) -> f64 {
1085 let total = self.success_count + self.failure_count;
1086 if total == 0 {
1087 0.0
1088 } else {
1089 (self.success_count as f64 / total as f64) * 100.0
1090 }
1091 }
1092
1093 pub fn summary(&self) -> String {
1095 format!(
1096 "Batch completed: {}/{} successful ({:.1}%), {} bytes processed in {:.2}s",
1097 self.success_count,
1098 self.success_count + self.failure_count,
1099 self.success_rate(),
1100 format_bytes(self.total_bytes),
1101 self.elapsed_time.as_secs_f64()
1102 )
1103 }
1104}
1105
1106impl Default for BatchResult {
1107 fn default() -> Self {
1108 Self::new()
1109 }
1110}
1111
1112pub struct BatchOperations {
1114 cache: CacheManager,
1115 parallel: bool,
1116 max_retries: usize,
1117 retry_delay: std::time::Duration,
1118}
1119
1120impl BatchOperations {
1121 pub fn new(cache: CacheManager) -> Self {
1123 Self {
1124 cache,
1125 parallel: true,
1126 max_retries: 3,
1127 retry_delay: std::time::Duration::from_millis(1000),
1128 }
1129 }
1130
1131 pub fn with_parallel(mut self, parallel: bool) -> Self {
1133 self.parallel = parallel;
1134 self
1135 }
1136
1137 pub fn with_retry_config(
1139 mut self,
1140 max_retries: usize,
1141 retry_delay: std::time::Duration,
1142 ) -> Self {
1143 self.max_retries = max_retries;
1144 self.retry_delay = retry_delay;
1145 self
1146 }
1147
1148 #[cfg(feature = "download")]
1150 pub fn batch_download(&self, urls_andnames: &[(&str, &str)]) -> BatchResult {
1151 let start_time = std::time::Instant::now();
1152 let mut result = BatchResult::new();
1153
1154 if self.parallel {
1155 self.batch_download_parallel(urls_andnames, &mut result)
1156 } else {
1157 self.batch_download_sequential(urls_andnames, &mut result)
1158 }
1159
1160 result.elapsed_time = start_time.elapsed();
1161 result
1162 }
1163
1164 #[cfg(feature = "download")]
1165 fn batch_download_parallel(&self, urls_andnames: &[(&str, &str)], result: &mut BatchResult) {
1166 use std::fs::File;
1167 use std::io::Write;
1168 use std::sync::{Arc, Mutex};
1169 use std::thread;
1170
1171 if let Err(e) = self.cache.cache.ensure_cachedir() {
1173 result.failure_count += urls_andnames.len();
1174 for &(_, name) in urls_andnames {
1175 result
1176 .failures
1177 .push((name.to_string(), format!("Cache setup failed: {e}")));
1178 }
1179 return;
1180 }
1181
1182 let result_arc = Arc::new(Mutex::new(BatchResult::new()));
1183 let cachedir = self.cache.cache.cachedir.clone();
1184 let max_retries = self.max_retries;
1185 let retry_delay = self.retry_delay;
1186
1187 let handles: Vec<_> = urls_andnames
1188 .iter()
1189 .map(|&(url, name)| {
1190 let result_clone = Arc::clone(&result_arc);
1191 let url = url.to_string();
1192 let name = name.to_string();
1193 let cachedir = cachedir.clone();
1194
1195 thread::spawn(move || {
1196 let mut success = false;
1197 let mut last_error = String::new();
1198 let mut downloaded_data = Vec::new();
1199
1200 for attempt in 0..=max_retries {
1201 match download_data(&url, false) {
1202 Ok(data) => {
1203 let path = cachedir.join(&name);
1205 match File::create(&path) {
1206 Ok(mut file) => match file.write_all(&data) {
1207 Ok(_) => {
1208 let mut r =
1209 result_clone.lock().expect("Operation failed");
1210 r.success_count += 1;
1211 r.total_bytes += data.len() as u64;
1212 downloaded_data = data;
1213 success = true;
1214 break;
1215 }
1216 Err(e) => {
1217 last_error = format!("Failed to write cache file: {e}");
1218 }
1219 },
1220 Err(e) => {
1221 last_error = format!("Failed to create cache file: {e}");
1222 }
1223 }
1224 }
1225 Err(e) => {
1226 last_error = format!("Download failed: {e}");
1227 if attempt < max_retries {
1228 thread::sleep(retry_delay);
1229 }
1230 }
1231 }
1232 }
1233
1234 if !success {
1235 let mut r = result_clone.lock().expect("Operation failed");
1236 r.failure_count += 1;
1237 r.failures.push((name.clone(), last_error));
1238 }
1239
1240 (name, success, downloaded_data)
1241 })
1242 })
1243 .collect();
1244
1245 let mut successful_downloads = Vec::new();
1247 for handle in handles {
1248 if let Ok((name, success, data)) = handle.join() {
1249 if success && !data.is_empty() {
1250 successful_downloads.push((name, data));
1251 }
1252 }
1253 }
1254
1255 if let Ok(arc_result) = result_arc.lock() {
1257 result.success_count += arc_result.success_count;
1258 result.failure_count += arc_result.failure_count;
1259 result.failures.extend(arc_result.failures.clone());
1260 }
1261
1262 for (name, data) in successful_downloads {
1264 let key = FileCacheKey(name);
1265 self.cache.cache.mem_cache.borrow_mut().insert(key, data);
1266 }
1267 }
1268
1269 #[cfg(feature = "download")]
1270 fn batch_download_sequential(&self, urls_andnames: &[(&str, &str)], result: &mut BatchResult) {
1271 for &(url, name) in urls_andnames {
1272 let mut success = false;
1273 let mut last_error = String::new();
1274
1275 for attempt in 0..=self.max_retries {
1276 match download_data(url, false) {
1277 Ok(data) => match self.cache.cache.write_cached(name, &data) {
1278 Ok(_) => {
1279 result.success_count += 1;
1280 result.total_bytes += data.len() as u64;
1281 success = true;
1282 break;
1283 }
1284 Err(e) => {
1285 last_error = format!("Cache write failed: {e}");
1286 }
1287 },
1288 Err(e) => {
1289 last_error = format!("Download failed: {e}");
1290 if attempt < self.max_retries {
1291 std::thread::sleep(self.retry_delay);
1292 }
1293 }
1294 }
1295 }
1296
1297 if !success {
1298 result.failure_count += 1;
1299 result.failures.push((name.to_string(), last_error));
1300 }
1301 }
1302 }
1303
1304 pub fn batch_verify_integrity(&self, files_andhashes: &[(&str, &str)]) -> BatchResult {
1306 let start_time = std::time::Instant::now();
1307 let mut result = BatchResult::new();
1308
1309 for &(filename, expected_hash) in files_andhashes {
1310 match self.cache.cache.get_cachedpath(filename).exists() {
1311 true => match sha256_hash_file(&self.cache.cache.get_cachedpath(filename)) {
1312 Ok(actual_hash) => {
1313 if actual_hash == expected_hash {
1314 result.success_count += 1;
1315 if let Ok(metadata) =
1316 std::fs::metadata(self.cache.cache.get_cachedpath(filename))
1317 {
1318 result.total_bytes += metadata.len();
1319 }
1320 } else {
1321 result.failure_count += 1;
1322 result.failures.push((
1323 filename.to_string(),
1324 format!(
1325 "Hash mismatch: expected {expected_hash}, got {actual_hash}"
1326 ),
1327 ));
1328 }
1329 }
1330 Err(e) => {
1331 result.failure_count += 1;
1332 result.failures.push((
1333 filename.to_string(),
1334 format!("Hash computation failed: {e}"),
1335 ));
1336 }
1337 },
1338 false => {
1339 result.failure_count += 1;
1340 result
1341 .failures
1342 .push((filename.to_string(), "File not found in cache".to_string()));
1343 }
1344 }
1345 }
1346
1347 result.elapsed_time = start_time.elapsed();
1348 result
1349 }
1350
1351 pub fn selective_cleanup(
1353 &self,
1354 patterns: &[&str],
1355 max_age_days: Option<u32>,
1356 ) -> Result<BatchResult> {
1357 let start_time = std::time::Instant::now();
1358 let mut result = BatchResult::new();
1359
1360 let cached_files = self.cache.list_cached_files()?;
1361 let now = std::time::SystemTime::now();
1362
1363 for filename in cached_files {
1364 let should_remove = patterns.iter().any(|pattern| {
1365 filename.contains(pattern) || matches_glob_pattern(&filename, pattern)
1366 });
1367
1368 if should_remove {
1369 let filepath = self.cache.cache.get_cachedpath(&filename);
1370
1371 let remove_due_to_age = if let Some(max_age) = max_age_days {
1373 if let Ok(metadata) = std::fs::metadata(&filepath) {
1374 if let Ok(modified) = metadata.modified() {
1375 if let Ok(age) = now.duration_since(modified) {
1376 age.as_secs() > (max_age as u64 * 24 * 3600)
1377 } else {
1378 false
1379 }
1380 } else {
1381 false
1382 }
1383 } else {
1384 false
1385 }
1386 } else {
1387 true };
1389
1390 if remove_due_to_age {
1391 match self.cache.remove(&filename) {
1392 Ok(_) => {
1393 result.success_count += 1;
1394 if let Ok(metadata) = std::fs::metadata(&filepath) {
1395 result.total_bytes += metadata.len();
1396 }
1397 }
1398 Err(e) => {
1399 result.failure_count += 1;
1400 result
1401 .failures
1402 .push((filename, format!("Removal failed: {e}")));
1403 }
1404 }
1405 }
1406 }
1407 }
1408
1409 result.elapsed_time = start_time.elapsed();
1410 Ok(result)
1411 }
1412
1413 pub fn batch_process<F, T, E>(&self, names: &[String], processor: F) -> BatchResult
1415 where
1416 F: Fn(&str, &[u8]) -> std::result::Result<T, E> + Sync + Send + 'static,
1417 E: std::fmt::Display,
1418 T: Send,
1419 {
1420 let start_time = std::time::Instant::now();
1421 let mut result = BatchResult::new();
1422
1423 if self.parallel {
1424 self.batch_process_parallel(names, processor, &mut result)
1425 } else {
1426 self.batch_process_sequential(names, processor, &mut result)
1427 }
1428
1429 result.elapsed_time = start_time.elapsed();
1430 result
1431 }
1432
1433 fn batch_process_parallel<F, T, E>(
1434 &self,
1435 names: &[String],
1436 processor: F,
1437 result: &mut BatchResult,
1438 ) where
1439 F: Fn(&str, &[u8]) -> std::result::Result<T, E> + Sync + Send + 'static,
1440 E: std::fmt::Display,
1441 T: Send,
1442 {
1443 let mut data_pairs = Vec::new();
1446
1447 for name in names {
1449 match self.cache.cache.read_cached(name) {
1450 Ok(data) => data_pairs.push((name.clone(), data)),
1451 Err(e) => {
1452 result.failure_count += 1;
1453 result
1454 .failures
1455 .push((name.clone(), format!("Cache read failed: {e}")));
1456 }
1457 }
1458 }
1459
1460 if !data_pairs.is_empty() {
1462 use std::sync::{Arc, Mutex};
1463 use std::thread;
1464
1465 let parallel_result = Arc::new(Mutex::new(BatchResult::new()));
1466 let processor = Arc::new(processor);
1467
1468 let handles: Vec<_> = data_pairs
1469 .into_iter()
1470 .map(|(name, data)| {
1471 let result_clone = Arc::clone(¶llel_result);
1472 let processor_clone = Arc::clone(&processor);
1473
1474 thread::spawn(move || match processor_clone(&name, &data) {
1475 Ok(_) => {
1476 let mut r = result_clone.lock().expect("Operation failed");
1477 r.success_count += 1;
1478 r.total_bytes += data.len() as u64;
1479 }
1480 Err(e) => {
1481 let mut r = result_clone.lock().expect("Operation failed");
1482 r.failure_count += 1;
1483 r.failures.push((name, format!("Processing failed: {e}")));
1484 }
1485 })
1486 })
1487 .collect();
1488
1489 for handle in handles {
1490 let _ = handle.join();
1491 }
1492
1493 let parallel_result = parallel_result.lock().expect("Operation failed");
1495 result.success_count += parallel_result.success_count;
1496 result.failure_count += parallel_result.failure_count;
1497 result.total_bytes += parallel_result.total_bytes;
1498 result.failures.extend(parallel_result.failures.clone());
1499 }
1500 }
1501
1502 fn batch_process_sequential<F, T, E>(
1503 &self,
1504 names: &[String],
1505 processor: F,
1506 result: &mut BatchResult,
1507 ) where
1508 F: Fn(&str, &[u8]) -> std::result::Result<T, E>,
1509 E: std::fmt::Display,
1510 {
1511 for name in names {
1512 match self.cache.cache.read_cached(name) {
1513 Ok(data) => match processor(name, &data) {
1514 Ok(_) => {
1515 result.success_count += 1;
1516 result.total_bytes += data.len() as u64;
1517 }
1518 Err(e) => {
1519 result.failure_count += 1;
1520 result
1521 .failures
1522 .push((name.clone(), format!("Processing failed: {e}")));
1523 }
1524 },
1525 Err(e) => {
1526 result.failure_count += 1;
1527 result
1528 .failures
1529 .push((name.clone(), format!("Cache read failed: {e}")));
1530 }
1531 }
1532 }
1533 }
1534
1535 pub fn cache_manager(&self) -> &CacheManager {
1537 &self.cache
1538 }
1539
1540 pub fn write_cached(&self, name: &str, data: &[u8]) -> Result<()> {
1542 self.cache.cache.write_cached(name, data)
1543 }
1544
1545 pub fn read_cached(&self, name: &str) -> Result<Vec<u8>> {
1547 self.cache.cache.read_cached(name)
1548 }
1549
1550 pub fn list_cached_files(&self) -> Result<Vec<String>> {
1552 self.cache.list_cached_files()
1553 }
1554
1555 pub fn print_cache_report(&self) -> Result<()> {
1557 self.cache.print_cache_report()
1558 }
1559
1560 pub fn get_cache_statistics(&self) -> Result<BatchResult> {
1562 let start_time = std::time::Instant::now();
1563 let mut result = BatchResult::new();
1564
1565 let cached_files = self.cache.list_cached_files()?;
1566
1567 for filename in cached_files {
1568 let filepath = self.cache.cache.get_cachedpath(&filename);
1569 match std::fs::metadata(&filepath) {
1570 Ok(metadata) => {
1571 result.success_count += 1;
1572 result.total_bytes += metadata.len();
1573 }
1574 Err(e) => {
1575 result.failure_count += 1;
1576 result
1577 .failures
1578 .push((filename, format!("Metadata read failed: {e}")));
1579 }
1580 }
1581 }
1582
1583 result.elapsed_time = start_time.elapsed();
1584 Ok(result)
1585 }
1586}
1587
1588#[allow(dead_code)]
1590fn matches_glob_pattern(filename: &str, pattern: &str) -> bool {
1591 if pattern == "*" {
1592 return true;
1593 }
1594
1595 if pattern.contains('*') {
1596 let parts: Vec<&str> = pattern.split('*').collect();
1597 if parts.len() == 2 {
1598 let prefix = parts[0];
1599 let suffix = parts[1];
1600 return filename.starts_with(prefix) && filename.ends_with(suffix);
1601 }
1602 }
1603
1604 filename == pattern
1605}
1606
1607#[cfg(test)]
1608mod tests {
1609 use super::*;
1610 use tempfile::TempDir;
1611
1612 #[test]
1613 fn test_batch_result() {
1614 let mut result = BatchResult::new();
1615 assert_eq!(result.success_count, 0);
1616 assert_eq!(result.failure_count, 0);
1617 assert!(result.is_all_success());
1618 assert_eq!(result.success_rate(), 0.0);
1619
1620 result.success_count = 8;
1621 result.failure_count = 2;
1622 result.total_bytes = 1024;
1623
1624 assert!(!result.is_all_success());
1625 assert_eq!(result.success_rate(), 80.0);
1626 assert!(result.summary().contains("8/10 successful"));
1627 assert!(result.summary().contains("80.0%"));
1628 }
1629
1630 #[test]
1631 fn test_batch_operations_creation() {
1632 let tempdir = TempDir::new().expect("Operation failed");
1633 let cache_manager = CacheManager::with_config(tempdir.path().to_path_buf(), 10, 3600);
1634 let batch_ops = BatchOperations::new(cache_manager)
1635 .with_parallel(false)
1636 .with_retry_config(2, std::time::Duration::from_millis(500));
1637
1638 assert!(!batch_ops.parallel);
1639 assert_eq!(batch_ops.max_retries, 2);
1640 }
1641
1642 #[test]
1643 fn test_selective_cleanup() {
1644 let tempdir = TempDir::new().expect("Operation failed");
1645 let cache_manager = CacheManager::with_config(tempdir.path().to_path_buf(), 10, 3600);
1646 let batch_ops = BatchOperations::new(cache_manager);
1647
1648 let test_data = vec![0u8; 100];
1650 batch_ops
1651 .cache
1652 .cache
1653 .write_cached("test1.csv", &test_data)
1654 .expect("Test: cache operation failed");
1655 batch_ops
1656 .cache
1657 .cache
1658 .write_cached("test2.csv", &test_data)
1659 .expect("Test: cache operation failed");
1660 batch_ops
1661 .cache
1662 .cache
1663 .write_cached("data.json", &test_data)
1664 .expect("Test: cache operation failed");
1665
1666 let result = batch_ops
1668 .selective_cleanup(&["*.csv"], None)
1669 .expect("Operation failed");
1670
1671 assert_eq!(result.success_count, 2); assert!(!batch_ops.cache.is_cached("test1.csv"));
1673 assert!(!batch_ops.cache.is_cached("test2.csv"));
1674 assert!(batch_ops.cache.is_cached("data.json")); }
1676
1677 #[test]
1678 fn test_batch_process() {
1679 let tempdir = TempDir::new().expect("Operation failed");
1680 let cache_manager = CacheManager::with_config(tempdir.path().to_path_buf(), 10, 3600);
1681 let batch_ops = BatchOperations::new(cache_manager).with_parallel(false);
1682
1683 let test_data1 = vec![1u8; 100];
1685 let test_data2 = vec![2u8; 200];
1686 batch_ops
1687 .cache
1688 .cache
1689 .write_cached("file1.dat", &test_data1)
1690 .expect("Test: cache operation failed");
1691 batch_ops
1692 .cache
1693 .cache
1694 .write_cached("file2.dat", &test_data2)
1695 .expect("Test: cache operation failed");
1696
1697 let files = vec!["file1.dat".to_string(), "file2.dat".to_string()];
1698
1699 let result = batch_ops.batch_process(&files, |_name, data| {
1701 if data.is_empty() {
1702 Err("Empty file")
1703 } else {
1704 Ok(data.len())
1705 }
1706 });
1707
1708 assert_eq!(result.success_count, 2);
1709 assert_eq!(result.failure_count, 0);
1710 assert_eq!(result.total_bytes, 300); }
1712
1713 #[test]
1714 fn test_get_cache_statistics() {
1715 let tempdir = TempDir::new().expect("Operation failed");
1716 let cache_manager = CacheManager::with_config(tempdir.path().to_path_buf(), 10, 3600);
1717 let batch_ops = BatchOperations::new(cache_manager);
1718
1719 let result = batch_ops.get_cache_statistics().expect("Operation failed");
1721 assert_eq!(result.success_count, 0);
1722
1723 let test_data = vec![0u8; 500];
1725 batch_ops
1726 .cache
1727 .cache
1728 .write_cached("test1.dat", &test_data)
1729 .expect("Test: cache operation failed");
1730 batch_ops
1731 .cache
1732 .cache
1733 .write_cached("test2.dat", &test_data)
1734 .expect("Test: cache operation failed");
1735
1736 let result = batch_ops.get_cache_statistics().expect("Operation failed");
1737 assert_eq!(result.success_count, 2);
1738 assert_eq!(result.total_bytes, 1000);
1739 }
1740
1741 #[test]
1742 fn test_matches_glob_pattern() {
1743 assert!(matches_glob_pattern("test.csv", "*"));
1744 assert!(matches_glob_pattern("test.csv", "*.csv"));
1745 assert!(matches_glob_pattern("test.csv", "test.*"));
1746 assert!(matches_glob_pattern("test.csv", "test.csv"));
1747
1748 assert!(!matches_glob_pattern("test.json", "*.csv"));
1749 assert!(!matches_glob_pattern("other.csv", "test.*"));
1750 }
1751
1752 #[test]
1753 fn test_cache_manager_creation() {
1754 let tempdir = TempDir::new().expect("Operation failed");
1755 let manager = CacheManager::with_config(tempdir.path().to_path_buf(), 10, 3600);
1756 let stats = manager.get_stats();
1757 assert_eq!(stats.file_count, 0);
1758 }
1759
1760 #[test]
1761 fn test_cache_stats_formatting() {
1762 let tempdir = TempDir::new().expect("Operation failed");
1763 let stats = CacheStats {
1764 total_size_bytes: 1024,
1765 file_count: 1,
1766 cachedir: tempdir.path().to_path_buf(),
1767 };
1768
1769 assert_eq!(stats.formatted_size(), "1.0 KB");
1770
1771 let stats_large = CacheStats {
1772 total_size_bytes: 1024 * 1024 * 1024,
1773 file_count: 1,
1774 cachedir: tempdir.path().to_path_buf(),
1775 };
1776
1777 assert_eq!(stats_large.formatted_size(), "1.0 GB");
1778 }
1779
1780 #[test]
1781 fn test_hash_file_name() {
1782 let hash1 = DatasetCache::hash_filename("test.csv");
1783 let hash2 = DatasetCache::hash_filename("test.csv");
1784 let hash3 = DatasetCache::hash_filename("different.csv");
1785
1786 assert_eq!(hash1, hash2);
1787 assert_ne!(hash1, hash3);
1788 assert_eq!(hash1.len(), 64); }
1790
1791 #[test]
1792 fn test_platform_cachedir() {
1793 let cachedir = get_platform_cachedir();
1794 assert!(cachedir.is_some() || cfg!(target_os = "unknown"));
1796
1797 if let Some(dir) = cachedir {
1798 assert!(dir.to_string_lossy().contains("scirs2-datasets"));
1799 }
1800 }
1801
1802 #[test]
1803 fn test_cache_size_management() {
1804 let tempdir = TempDir::new().expect("Operation failed");
1805 let cache = DatasetCache::with_full_config(
1806 tempdir.path().to_path_buf(),
1807 10,
1808 3600,
1809 2048, false,
1811 );
1812
1813 let small_data1 = vec![0u8; 400];
1815 cache
1816 .write_cached("small1.dat", &small_data1)
1817 .expect("Operation failed");
1818
1819 let small_data2 = vec![0u8; 400];
1820 cache
1821 .write_cached("small2.dat", &small_data2)
1822 .expect("Operation failed");
1823
1824 let small_data3 = vec![0u8; 400];
1825 cache
1826 .write_cached("small3.dat", &small_data3)
1827 .expect("Operation failed");
1828
1829 let medium_data = vec![0u8; 800];
1831 cache
1832 .write_cached("medium.dat", &medium_data)
1833 .expect("Operation failed");
1834
1835 let stats = cache.get_detailed_stats().expect("Operation failed");
1837 assert!(stats.total_size_bytes <= cache.max_cache_size());
1838
1839 assert!(cache.is_cached("medium.dat"));
1841 }
1842
1843 #[test]
1844 fn test_offline_mode() {
1845 let tempdir = TempDir::new().expect("Operation failed");
1846 let mut cache = DatasetCache::new(tempdir.path().to_path_buf());
1847
1848 assert!(!cache.is_offline());
1849 cache.set_offline_mode(true);
1850 assert!(cache.is_offline());
1851 }
1852
1853 #[test]
1854 fn test_detailed_stats() {
1855 let tempdir = TempDir::new().expect("Operation failed");
1856 let cache = DatasetCache::new(tempdir.path().to_path_buf());
1857
1858 let test_data = vec![1, 2, 3, 4, 5];
1859 cache
1860 .write_cached("test.dat", &test_data)
1861 .expect("Operation failed");
1862
1863 let stats = cache.get_detailed_stats().expect("Operation failed");
1864 assert_eq!(stats.file_count, 1);
1865 assert_eq!(stats.total_size_bytes, test_data.len() as u64);
1866 assert_eq!(stats.files.len(), 1);
1867 assert_eq!(stats.files[0].name, "test.dat");
1868 assert_eq!(stats.files[0].size_bytes, test_data.len() as u64);
1869 }
1870
1871 #[test]
1872 fn test_cache_manager() {
1873 let tempdir = TempDir::new().expect("Operation failed");
1874 let manager = CacheManager::with_config(tempdir.path().to_path_buf(), 10, 3600);
1875
1876 let stats = manager.get_stats();
1877 assert_eq!(stats.file_count, 0);
1878 assert_eq!(stats.total_size_bytes, 0);
1879
1880 assert_eq!(manager.cachedir(), &tempdir.path().to_path_buf());
1881 }
1882
1883 #[test]
1884 fn test_format_bytes() {
1885 assert_eq!(format_bytes(512), "512 B");
1886 assert_eq!(format_bytes(1024), "1.0 KB");
1887 assert_eq!(format_bytes(1024 * 1024), "1.0 MB");
1888 assert_eq!(format_bytes(1024 * 1024 * 1024), "1.0 GB");
1889 }
1890}