1use crate::error::{DatasetsError, Result};
4use scirs2_core::cache::{CacheBuilder, TTLSizedCache};
5use std::cell::RefCell;
6use std::fs::{self, File};
7use std::hash::{Hash, Hasher};
8use std::io::{Read, Write};
9use std::path::{Path, PathBuf};
10
11const CACHE_DIR_NAME: &str = "scirs2-datasets";
13
14const DEFAULT_CACHE_SIZE: usize = 100;
16
17const DEFAULT_CACHE_TTL: u64 = 3600; const DEFAULT_MAX_CACHE_SIZE: u64 = 500 * 1024 * 1024;
22
23const CACHE_DIR_ENV: &str = "SCIRS2_CACHE_DIR";
25
26#[allow(dead_code)]
28pub fn sha256_hash_file(path: &Path) -> std::result::Result<String, String> {
29 use sha2::{Digest, Sha256};
30
31 let mut file = File::open(path).map_err(|e| format!("Failed to open file: {e}"))?;
32 let mut hasher = Sha256::new();
33 let mut buffer = [0; 8192];
34
35 loop {
36 let bytes_read = file
37 .read(&mut buffer)
38 .map_err(|e| format!("Failed to read file: {e}"))?;
39 if bytes_read == 0 {
40 break;
41 }
42 hasher.update(&buffer[..bytes_read]);
43 }
44
45 Ok(format!("{:x}", hasher.finalize()))
46}
47
48pub struct RegistryEntry {
50 pub sha256: &'static str,
52 pub url: &'static str,
54}
55
56#[allow(dead_code)]
66pub fn get_cachedir() -> Result<PathBuf> {
67 if let Ok(cachedir) = std::env::var(CACHE_DIR_ENV) {
69 let cachepath = PathBuf::from(cachedir);
70 ensuredirectory_exists(&cachepath)?;
71 return Ok(cachepath);
72 }
73
74 if let Some(cachedir) = get_platform_cachedir() {
76 ensuredirectory_exists(&cachedir)?;
77 return Ok(cachedir);
78 }
79
80 let homedir = dirs::home_dir()
82 .ok_or_else(|| DatasetsError::CacheError("Could not find home directory".to_string()))?;
83 let cachedir = homedir.join(format!(".{CACHE_DIR_NAME}"));
84 ensuredirectory_exists(&cachedir)?;
85
86 Ok(cachedir)
87}
88
89#[allow(dead_code)]
91fn get_platform_cachedir() -> Option<PathBuf> {
92 #[cfg(target_os = "windows")]
93 {
94 dirs::data_local_dir().map(|dir| dir.join(CACHE_DIR_NAME))
95 }
96 #[cfg(target_os = "macos")]
97 {
98 dirs::home_dir().map(|dir| dir.join("Library").join("Caches").join(CACHE_DIR_NAME))
99 }
100 #[cfg(not(any(target_os = "windows", target_os = "macos")))]
101 {
102 if let Ok(xdg_cache) = std::env::var("XDG_CACHE_HOME") {
104 Some(PathBuf::from(xdg_cache).join(CACHE_DIR_NAME))
105 } else {
106 dirs::home_dir().map(|home| home.join(".cache").join(CACHE_DIR_NAME))
107 }
108 }
109}
110
111#[allow(dead_code)]
113fn ensuredirectory_exists(dir: &Path) -> Result<()> {
114 if !dir.exists() {
115 fs::create_dir_all(dir).map_err(|e| {
116 DatasetsError::CacheError(format!("Failed to create cache directory: {e}"))
117 })?;
118 }
119 Ok(())
120}
121
122#[allow(dead_code)]
140pub fn fetch_data(
141 filename: &str,
142 registry_entry: Option<&RegistryEntry>,
143) -> std::result::Result<PathBuf, String> {
144 let cachedir = match get_cachedir() {
146 Ok(dir) => dir,
147 Err(e) => return Err(format!("Failed to get cache directory: {e}")),
148 };
149
150 let cachepath = cachedir.join(filename);
152 if cachepath.exists() {
153 return Ok(cachepath);
154 }
155
156 let entry = match registry_entry {
158 Some(entry) => entry,
159 None => return Err(format!("No registry entry found for {filename}")),
160 };
161
162 let tempdir = tempfile::tempdir().map_err(|e| format!("Failed to create temp dir: {e}"))?;
164 let temp_file = tempdir.path().join(filename);
165
166 let response = ureq::get(entry.url)
168 .call()
169 .map_err(|e| format!("Failed to download {filename}: {e}"))?;
170
171 let mut reader = response.into_reader();
172 let mut file = std::fs::File::create(&temp_file)
173 .map_err(|e| format!("Failed to create temp file: {e}"))?;
174
175 std::io::copy(&mut reader, &mut file).map_err(|e| format!("Failed to download file: {e}"))?;
176
177 if !entry.sha256.is_empty() {
179 let computed_hash = sha256_hash_file(&temp_file)?;
180 if computed_hash != entry.sha256 {
181 return Err(format!(
182 "SHA256 hash mismatch for {filename}: expected {}, got {computed_hash}",
183 entry.sha256
184 ));
185 }
186 }
187
188 fs::create_dir_all(&cachedir).map_err(|e| format!("Failed to create cache dir: {e}"))?;
190 if let Some(parent) = cachepath.parent() {
191 fs::create_dir_all(parent).map_err(|e| format!("Failed to create cache dir: {e}"))?;
192 }
193
194 fs::copy(&temp_file, &cachepath).map_err(|e| format!("Failed to copy to cache: {e}"))?;
195
196 Ok(cachepath)
197}
198
199#[derive(Clone, Debug, Eq, PartialEq, Hash)]
201pub struct CacheKey {
202 name: String,
203 config_hash: String,
204}
205
206impl CacheKey {
207 pub fn new(name: &str, config: &crate::real_world::RealWorldConfig) -> Self {
209 use std::collections::hash_map::DefaultHasher;
210 use std::hash::{Hash, Hasher};
211
212 let mut hasher = DefaultHasher::new();
213 config.use_cache.hash(&mut hasher);
214 config.download_if_missing.hash(&mut hasher);
215 config.return_preprocessed.hash(&mut hasher);
216 config.subset.hash(&mut hasher);
217 config.random_state.hash(&mut hasher);
218
219 Self {
220 name: name.to_string(),
221 config_hash: format!("{:x}", hasher.finish()),
222 }
223 }
224
225 pub fn as_string(&self) -> String {
227 format!("{}_{}", self.name, self.config_hash)
228 }
229}
230
231#[derive(Clone, Debug, Eq, PartialEq)]
233struct FileCacheKey(String);
234
235impl Hash for FileCacheKey {
236 fn hash<H: Hasher>(&self, state: &mut H) {
237 self.0.hash(state);
238 }
239}
240
241pub struct DatasetCache {
246 cachedir: PathBuf,
248 mem_cache: RefCell<TTLSizedCache<FileCacheKey, Vec<u8>>>,
250 max_cache_size: u64,
252 offline_mode: bool,
254}
255
256impl Default for DatasetCache {
257 fn default() -> Self {
258 let cachedir = get_cachedir().expect("Could not get cache directory");
259
260 let mem_cache = RefCell::new(
261 CacheBuilder::new()
262 .with_size(DEFAULT_CACHE_SIZE)
263 .with_ttl(DEFAULT_CACHE_TTL)
264 .build_sized_cache(),
265 );
266
267 let offline_mode = std::env::var("SCIRS2_OFFLINE")
269 .map(|v| v.to_lowercase() == "true" || v == "1")
270 .unwrap_or(false);
271
272 DatasetCache {
273 cachedir,
274 mem_cache,
275 max_cache_size: DEFAULT_MAX_CACHE_SIZE,
276 offline_mode,
277 }
278 }
279}
280
281impl DatasetCache {
282 pub fn new(cachedir: PathBuf) -> Self {
284 let mem_cache = RefCell::new(
285 CacheBuilder::new()
286 .with_size(DEFAULT_CACHE_SIZE)
287 .with_ttl(DEFAULT_CACHE_TTL)
288 .build_sized_cache(),
289 );
290
291 let offline_mode = std::env::var("SCIRS2_OFFLINE")
292 .map(|v| v.to_lowercase() == "true" || v == "1")
293 .unwrap_or(false);
294
295 DatasetCache {
296 cachedir,
297 mem_cache,
298 max_cache_size: DEFAULT_MAX_CACHE_SIZE,
299 offline_mode,
300 }
301 }
302
303 pub fn with_config(cachedir: PathBuf, cache_size: usize, ttl_seconds: u64) -> Self {
305 let mem_cache = RefCell::new(
306 CacheBuilder::new()
307 .with_size(cache_size)
308 .with_ttl(ttl_seconds)
309 .build_sized_cache(),
310 );
311
312 let offline_mode = std::env::var("SCIRS2_OFFLINE")
313 .map(|v| v.to_lowercase() == "true" || v == "1")
314 .unwrap_or(false);
315
316 DatasetCache {
317 cachedir,
318 mem_cache,
319 max_cache_size: DEFAULT_MAX_CACHE_SIZE,
320 offline_mode,
321 }
322 }
323
324 pub fn with_full_config(
326 cachedir: PathBuf,
327 cache_size: usize,
328 ttl_seconds: u64,
329 max_cache_size: u64,
330 offline_mode: bool,
331 ) -> Self {
332 let mem_cache = RefCell::new(
333 CacheBuilder::new()
334 .with_size(cache_size)
335 .with_ttl(ttl_seconds)
336 .build_sized_cache(),
337 );
338
339 DatasetCache {
340 cachedir,
341 mem_cache,
342 max_cache_size,
343 offline_mode,
344 }
345 }
346
347 pub fn ensure_cachedir(&self) -> Result<()> {
349 if !self.cachedir.exists() {
350 fs::create_dir_all(&self.cachedir).map_err(|e| {
351 DatasetsError::CacheError(format!("Failed to create cache directory: {e}"))
352 })?;
353 }
354 Ok(())
355 }
356
357 pub fn get_cachedpath(&self, name: &str) -> PathBuf {
359 self.cachedir.join(name)
360 }
361
362 pub fn is_cached(&self, name: &str) -> bool {
364 let key = FileCacheKey(name.to_string());
366 if self.mem_cache.borrow_mut().get(&key).is_some() {
367 return true;
368 }
369
370 self.get_cachedpath(name).exists()
372 }
373
374 pub fn read_cached(&self, name: &str) -> Result<Vec<u8>> {
379 let key = FileCacheKey(name.to_string());
381 if let Some(data) = self.mem_cache.borrow_mut().get(&key) {
382 return Ok(data);
383 }
384
385 let path = self.get_cachedpath(name);
387 if !path.exists() {
388 return Err(DatasetsError::CacheError(format!(
389 "Cached file does not exist: {name}"
390 )));
391 }
392
393 let mut file = File::open(path)
394 .map_err(|e| DatasetsError::CacheError(format!("Failed to open cached file: {e}")))?;
395
396 let mut buffer = Vec::new();
397 file.read_to_end(&mut buffer)
398 .map_err(|e| DatasetsError::CacheError(format!("Failed to read cached file: {e}")))?;
399
400 self.mem_cache.borrow_mut().insert(key, buffer.clone());
402
403 Ok(buffer)
404 }
405
406 pub fn write_cached(&self, name: &str, data: &[u8]) -> Result<()> {
408 self.ensure_cachedir()?;
409
410 if self.max_cache_size > 0 {
412 let current_size = self.get_cache_size_bytes()?;
413 let new_file_size = data.len() as u64;
414
415 if current_size + new_file_size > self.max_cache_size {
416 self.cleanup_cache_to_fit(new_file_size)?;
417 }
418 }
419
420 let path = self.get_cachedpath(name);
422 let mut file = File::create(path)
423 .map_err(|e| DatasetsError::CacheError(format!("Failed to create cache file: {e}")))?;
424
425 file.write_all(data).map_err(|e| {
426 DatasetsError::CacheError(format!("Failed to write to cache file: {e}"))
427 })?;
428
429 let key = FileCacheKey(name.to_string());
431 self.mem_cache.borrow_mut().insert(key, data.to_vec());
432
433 Ok(())
434 }
435
436 pub fn clear_cache(&self) -> Result<()> {
438 if self.cachedir.exists() {
440 fs::remove_dir_all(&self.cachedir)
441 .map_err(|e| DatasetsError::CacheError(format!("Failed to clear cache: {e}")))?;
442 }
443
444 self.mem_cache.borrow_mut().clear();
446
447 Ok(())
448 }
449
450 pub fn remove_cached(&self, name: &str) -> Result<()> {
452 let path = self.get_cachedpath(name);
454 if path.exists() {
455 fs::remove_file(path).map_err(|e| {
456 DatasetsError::CacheError(format!("Failed to remove cached file: {e}"))
457 })?;
458 }
459
460 let key = FileCacheKey(name.to_string());
462 self.mem_cache.borrow_mut().remove(&key);
463
464 Ok(())
465 }
466
467 pub fn hash_filename(name: &str) -> String {
469 let hash = blake3::hash(name.as_bytes());
470 hash.to_hex().to_string()
471 }
472
473 pub fn get_cache_size_bytes(&self) -> Result<u64> {
475 let mut total_size = 0u64;
476
477 if self.cachedir.exists() {
478 let entries = fs::read_dir(&self.cachedir).map_err(|e| {
479 DatasetsError::CacheError(format!("Failed to read cache directory: {e}"))
480 })?;
481
482 for entry in entries {
483 let entry = entry.map_err(|e| {
484 DatasetsError::CacheError(format!("Failed to read directory entry: {e}"))
485 })?;
486
487 if let Ok(metadata) = entry.metadata() {
488 if metadata.is_file() {
489 total_size += metadata.len();
490 }
491 }
492 }
493 }
494
495 Ok(total_size)
496 }
497
498 fn cleanup_cache_to_fit(&self, needed_size: u64) -> Result<()> {
503 if self.max_cache_size == 0 {
504 return Ok(()); }
506
507 let current_size = self.get_cache_size_bytes()?;
508 let target_size = (self.max_cache_size as f64 * 0.8) as u64; let total_needed = current_size + needed_size;
510
511 if total_needed <= target_size {
512 return Ok(()); }
514
515 let size_to_free = total_needed - target_size;
516
517 let mut files_with_times = Vec::new();
519
520 if self.cachedir.exists() {
521 let entries = fs::read_dir(&self.cachedir).map_err(|e| {
522 DatasetsError::CacheError(format!("Failed to read cache directory: {e}"))
523 })?;
524
525 for entry in entries {
526 let entry = entry.map_err(|e| {
527 DatasetsError::CacheError(format!("Failed to read directory entry: {e}"))
528 })?;
529
530 if let Ok(metadata) = entry.metadata() {
531 if metadata.is_file() {
532 if let Ok(modified) = metadata.modified() {
533 files_with_times.push((entry.path(), metadata.len(), modified));
534 }
535 }
536 }
537 }
538 }
539
540 files_with_times.sort_by_key(|(_path, _size, modified)| *modified);
542
543 let mut freed_size = 0u64;
545 for (path, size, _modified) in files_with_times {
546 if freed_size >= size_to_free {
547 break;
548 }
549
550 if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
552 let key = FileCacheKey(filename.to_string());
553 self.mem_cache.borrow_mut().remove(&key);
554 }
555
556 if let Err(e) = fs::remove_file(&path) {
558 eprintln!("Warning: Failed to remove cache file {path:?}: {e}");
559 } else {
560 freed_size += size;
561 }
562 }
563
564 Ok(())
565 }
566
567 pub fn set_offline_mode(&mut self, offline: bool) {
569 self.offline_mode = offline;
570 }
571
572 pub fn is_offline(&self) -> bool {
574 self.offline_mode
575 }
576
577 pub fn set_max_cache_size(&mut self, max_size: u64) {
579 self.max_cache_size = max_size;
580 }
581
582 pub fn max_cache_size(&self) -> u64 {
584 self.max_cache_size
585 }
586
587 pub fn put(&self, name: &str, data: &[u8]) -> Result<()> {
589 self.write_cached(name, data)
590 }
591
592 pub fn get_detailed_stats(&self) -> Result<DetailedCacheStats> {
594 let mut total_size = 0u64;
595 let mut file_count = 0usize;
596 let mut files = Vec::new();
597
598 if self.cachedir.exists() {
599 let entries = fs::read_dir(&self.cachedir).map_err(|e| {
600 DatasetsError::CacheError(format!("Failed to read cache directory: {e}"))
601 })?;
602
603 for entry in entries {
604 let entry = entry.map_err(|e| {
605 DatasetsError::CacheError(format!("Failed to read directory entry: {e}"))
606 })?;
607
608 if let Ok(metadata) = entry.metadata() {
609 if metadata.is_file() {
610 let size = metadata.len();
611 total_size += size;
612 file_count += 1;
613
614 if let Some(filename) = entry.file_name().to_str() {
615 files.push(CacheFileInfo {
616 name: filename.to_string(),
617 size_bytes: size,
618 modified: metadata.modified().ok(),
619 });
620 }
621 }
622 }
623 }
624 }
625
626 files.sort_by(|a, b| b.size_bytes.cmp(&a.size_bytes));
628
629 Ok(DetailedCacheStats {
630 total_size_bytes: total_size,
631 file_count,
632 cachedir: self.cachedir.clone(),
633 max_cache_size: self.max_cache_size,
634 offline_mode: self.offline_mode,
635 files,
636 })
637 }
638}
639
640#[cfg(feature = "download")]
642#[allow(dead_code)]
643pub fn download_data(_url: &str, force_download: bool) -> Result<Vec<u8>> {
644 let cache = DatasetCache::default();
645 let cache_key = DatasetCache::hash_filename(_url);
646
647 if !force_download && cache.is_cached(&cache_key) {
649 return cache.read_cached(&cache_key);
650 }
651
652 let response = reqwest::blocking::get(_url).map_err(|e| {
654 DatasetsError::DownloadError(format!("Failed to download from {_url}: {e}"))
655 })?;
656
657 if !response.status().is_success() {
658 return Err(DatasetsError::DownloadError(format!(
659 "Failed to download from {_url}: HTTP status {}",
660 response.status()
661 )));
662 }
663
664 let data = response
665 .bytes()
666 .map_err(|e| DatasetsError::DownloadError(format!("Failed to read response data: {e}")))?;
667
668 let data_vec = data.to_vec();
669
670 cache.write_cached(&cache_key, &data_vec)?;
672
673 Ok(data_vec)
674}
675
676#[cfg(not(feature = "download"))]
678#[allow(dead_code)]
692pub fn download_data(_url: &str, _force_download: bool) -> Result<Vec<u8>> {
693 Err(DatasetsError::Other(
694 "Download feature is not enabled. Recompile with --features download".to_string(),
695 ))
696}
697
698pub struct CacheManager {
700 cache: DatasetCache,
701}
702
703impl CacheManager {
704 pub fn new() -> Result<Self> {
706 let cachedir = get_cachedir()?;
707 Ok(Self {
708 cache: DatasetCache::with_config(cachedir, DEFAULT_CACHE_SIZE, DEFAULT_CACHE_TTL),
709 })
710 }
711
712 pub fn with_config(cachedir: PathBuf, cache_size: usize, ttl_seconds: u64) -> Self {
714 Self {
715 cache: DatasetCache::with_config(cachedir, cache_size, ttl_seconds),
716 }
717 }
718
719 pub fn get(&self, key: &CacheKey) -> Result<Option<crate::utils::Dataset>> {
721 let name = key.as_string();
722 if self.cache.is_cached(&name) {
723 match self.cache.read_cached(&name) {
724 Ok(cached_data) => {
725 match serde_json::from_slice::<crate::utils::Dataset>(&cached_data) {
726 Ok(dataset) => Ok(Some(dataset)),
727 Err(e) => {
728 self.cache
730 .mem_cache
731 .borrow_mut()
732 .remove(&FileCacheKey(name.clone()));
733 Err(DatasetsError::CacheError(format!(
734 "Failed to deserialize cached dataset: {e}"
735 )))
736 }
737 }
738 }
739 Err(e) => Err(DatasetsError::CacheError(format!(
740 "Failed to read cached data: {e}"
741 ))),
742 }
743 } else {
744 Ok(None)
745 }
746 }
747
748 pub fn put(&self, key: &CacheKey, dataset: &crate::utils::Dataset) -> Result<()> {
750 let name = key.as_string();
751
752 let serialized = serde_json::to_vec(dataset)
754 .map_err(|e| DatasetsError::CacheError(format!("Failed to serialize dataset: {e}")))?;
755
756 self.cache
758 .write_cached(&name, &serialized)
759 .map_err(|e| DatasetsError::CacheError(format!("Failed to write to cache: {e}")))
760 }
761
762 pub fn with_full_config(
764 cachedir: PathBuf,
765 cache_size: usize,
766 ttl_seconds: u64,
767 max_cache_size: u64,
768 offline_mode: bool,
769 ) -> Self {
770 Self {
771 cache: DatasetCache::with_full_config(
772 cachedir,
773 cache_size,
774 ttl_seconds,
775 max_cache_size,
776 offline_mode,
777 ),
778 }
779 }
780
781 pub fn get_stats(&self) -> CacheStats {
783 let cachedir = &self.cache.cachedir;
784 let mut total_size = 0u64;
785 let mut file_count = 0usize;
786
787 if cachedir.exists() {
788 if let Ok(entries) = fs::read_dir(cachedir) {
789 for entry in entries.flatten() {
790 if let Ok(metadata) = entry.metadata() {
791 if metadata.is_file() {
792 total_size += metadata.len();
793 file_count += 1;
794 }
795 }
796 }
797 }
798 }
799
800 CacheStats {
801 total_size_bytes: total_size,
802 file_count,
803 cachedir: cachedir.clone(),
804 }
805 }
806
807 pub fn get_detailed_stats(&self) -> Result<DetailedCacheStats> {
809 self.cache.get_detailed_stats()
810 }
811
812 pub fn set_offline_mode(&mut self, offline: bool) {
814 self.cache.set_offline_mode(offline);
815 }
816
817 pub fn is_offline(&self) -> bool {
819 self.cache.is_offline()
820 }
821
822 pub fn set_max_cache_size(&mut self, max_size: u64) {
824 self.cache.set_max_cache_size(max_size);
825 }
826
827 pub fn max_cache_size(&self) -> u64 {
829 self.cache.max_cache_size()
830 }
831
832 pub fn clear_all(&self) -> Result<()> {
834 self.cache.clear_cache()
835 }
836
837 pub fn remove(&self, name: &str) -> Result<()> {
839 self.cache.remove_cached(name)
840 }
841
842 pub fn cleanup_old_files(&self, target_size: u64) -> Result<()> {
844 self.cache.cleanup_cache_to_fit(target_size)
845 }
846
847 pub fn list_cached_files(&self) -> Result<Vec<String>> {
849 let cachedir = &self.cache.cachedir;
850 let mut files = Vec::new();
851
852 if cachedir.exists() {
853 let entries = fs::read_dir(cachedir).map_err(|e| {
854 DatasetsError::CacheError(format!("Failed to read cache directory: {e}"))
855 })?;
856
857 for entry in entries {
858 let entry = entry.map_err(|e| {
859 DatasetsError::CacheError(format!("Failed to read directory entry: {e}"))
860 })?;
861
862 if let Some(filename) = entry.file_name().to_str() {
863 files.push(filename.to_string());
864 }
865 }
866 }
867
868 files.sort();
869 Ok(files)
870 }
871
872 pub fn cachedir(&self) -> &PathBuf {
874 &self.cache.cachedir
875 }
876
877 pub fn is_cached(&self, name: &str) -> bool {
879 self.cache.is_cached(name)
880 }
881
882 pub fn print_cache_report(&self) -> Result<()> {
884 let stats = self.get_detailed_stats()?;
885
886 println!("=== Cache Report ===");
887 println!("Cache Directory: {}", stats.cachedir.display());
888 println!(
889 "Total Size: {} ({} files)",
890 stats.formatted_size(),
891 stats.file_count
892 );
893 println!("Max Size: {}", stats.formatted_max_size());
894
895 if stats.max_cache_size > 0 {
896 println!("Usage: {:.1}%", stats.usage_percentage() * 100.0);
897 }
898
899 println!(
900 "Offline Mode: {}",
901 if stats.offline_mode {
902 "Enabled"
903 } else {
904 "Disabled"
905 }
906 );
907
908 if !stats.files.is_empty() {
909 println!("\nCached Files:");
910 for file in &stats.files {
911 println!(
912 " {} - {} ({})",
913 file.name,
914 file.formatted_size(),
915 file.formatted_modified()
916 );
917 }
918 }
919
920 Ok(())
921 }
922}
923
924pub struct CacheStats {
926 pub total_size_bytes: u64,
928 pub file_count: usize,
930 pub cachedir: PathBuf,
932}
933
934pub struct DetailedCacheStats {
936 pub total_size_bytes: u64,
938 pub file_count: usize,
940 pub cachedir: PathBuf,
942 pub max_cache_size: u64,
944 pub offline_mode: bool,
946 pub files: Vec<CacheFileInfo>,
948}
949
950#[derive(Debug, Clone)]
952pub struct CacheFileInfo {
953 pub name: String,
955 pub size_bytes: u64,
957 pub modified: Option<std::time::SystemTime>,
959}
960
961impl CacheStats {
962 pub fn formatted_size(&self) -> String {
964 format_bytes(self.total_size_bytes)
965 }
966}
967
968impl DetailedCacheStats {
969 pub fn formatted_size(&self) -> String {
971 format_bytes(self.total_size_bytes)
972 }
973
974 pub fn formatted_max_size(&self) -> String {
976 if self.max_cache_size == 0 {
977 "Unlimited".to_string()
978 } else {
979 format_bytes(self.max_cache_size)
980 }
981 }
982
983 pub fn usage_percentage(&self) -> f64 {
985 if self.max_cache_size == 0 {
986 0.0
987 } else {
988 self.total_size_bytes as f64 / self.max_cache_size as f64
989 }
990 }
991}
992
993impl CacheFileInfo {
994 pub fn formatted_size(&self) -> String {
996 format_bytes(self.size_bytes)
997 }
998
999 pub fn formatted_modified(&self) -> String {
1001 match &self.modified {
1002 Some(time) => {
1003 if let Ok(now) = std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH)
1004 {
1005 if let Ok(modified) = time.duration_since(std::time::UNIX_EPOCH) {
1006 let diff_secs = now.as_secs().saturating_sub(modified.as_secs());
1007 let days = diff_secs / 86400;
1008 let hours = (diff_secs % 86400) / 3600;
1009 let mins = (diff_secs % 3600) / 60;
1010
1011 if days > 0 {
1012 format!("{days} days ago")
1013 } else if hours > 0 {
1014 format!("{hours} hours ago")
1015 } else if mins > 0 {
1016 format!("{mins} minutes ago")
1017 } else {
1018 "Just now".to_string()
1019 }
1020 } else {
1021 "Unknown".to_string()
1022 }
1023 } else {
1024 "Unknown".to_string()
1025 }
1026 }
1027 None => "Unknown".to_string(),
1028 }
1029 }
1030}
1031
1032#[allow(dead_code)]
1034fn format_bytes(bytes: u64) -> String {
1035 let size = bytes as f64;
1036 if size < 1024.0 {
1037 format!("{size} B")
1038 } else if size < 1024.0 * 1024.0 {
1039 format!("{:.1} KB", size / 1024.0)
1040 } else if size < 1024.0 * 1024.0 * 1024.0 {
1041 format!("{:.1} MB", size / (1024.0 * 1024.0))
1042 } else {
1043 format!("{:.1} GB", size / (1024.0 * 1024.0 * 1024.0))
1044 }
1045}
1046
1047#[derive(Debug, Clone)]
1049pub struct BatchResult {
1050 pub success_count: usize,
1052 pub failure_count: usize,
1054 pub failures: Vec<(String, String)>,
1056 pub total_bytes: u64,
1058 pub elapsed_time: std::time::Duration,
1060}
1061
1062impl BatchResult {
1063 pub fn new() -> Self {
1065 Self {
1066 success_count: 0,
1067 failure_count: 0,
1068 failures: Vec::new(),
1069 total_bytes: 0,
1070 elapsed_time: std::time::Duration::ZERO,
1071 }
1072 }
1073
1074 pub fn is_all_success(&self) -> bool {
1076 self.failure_count == 0
1077 }
1078
1079 pub fn success_rate(&self) -> f64 {
1081 let total = self.success_count + self.failure_count;
1082 if total == 0 {
1083 0.0
1084 } else {
1085 (self.success_count as f64 / total as f64) * 100.0
1086 }
1087 }
1088
1089 pub fn summary(&self) -> String {
1091 format!(
1092 "Batch completed: {}/{} successful ({:.1}%), {} bytes processed in {:.2}s",
1093 self.success_count,
1094 self.success_count + self.failure_count,
1095 self.success_rate(),
1096 format_bytes(self.total_bytes),
1097 self.elapsed_time.as_secs_f64()
1098 )
1099 }
1100}
1101
1102impl Default for BatchResult {
1103 fn default() -> Self {
1104 Self::new()
1105 }
1106}
1107
1108pub struct BatchOperations {
1110 cache: CacheManager,
1111 parallel: bool,
1112 max_retries: usize,
1113 retry_delay: std::time::Duration,
1114}
1115
1116impl BatchOperations {
1117 pub fn new(cache: CacheManager) -> Self {
1119 Self {
1120 cache,
1121 parallel: true,
1122 max_retries: 3,
1123 retry_delay: std::time::Duration::from_millis(1000),
1124 }
1125 }
1126
1127 pub fn with_parallel(mut self, parallel: bool) -> Self {
1129 self.parallel = parallel;
1130 self
1131 }
1132
1133 pub fn with_retry_config(
1135 mut self,
1136 max_retries: usize,
1137 retry_delay: std::time::Duration,
1138 ) -> Self {
1139 self.max_retries = max_retries;
1140 self.retry_delay = retry_delay;
1141 self
1142 }
1143
1144 #[cfg(feature = "download")]
1146 pub fn batch_download(&self, urls_andnames: &[(&str, &str)]) -> BatchResult {
1147 let start_time = std::time::Instant::now();
1148 let mut result = BatchResult::new();
1149
1150 if self.parallel {
1151 self.batch_download_parallel(urls_andnames, &mut result)
1152 } else {
1153 self.batch_download_sequential(urls_andnames, &mut result)
1154 }
1155
1156 result.elapsed_time = start_time.elapsed();
1157 result
1158 }
1159
1160 #[cfg(feature = "download")]
1161 fn batch_download_parallel(&self, urls_andnames: &[(&str, &str)], result: &mut BatchResult) {
1162 use std::fs::File;
1163 use std::io::Write;
1164 use std::sync::{Arc, Mutex};
1165 use std::thread;
1166
1167 if let Err(e) = self.cache.cache.ensure_cachedir() {
1169 result.failure_count += urls_andnames.len();
1170 for &(_, name) in urls_andnames {
1171 result
1172 .failures
1173 .push((name.to_string(), format!("Cache setup failed: {e}")));
1174 }
1175 return;
1176 }
1177
1178 let result_arc = Arc::new(Mutex::new(BatchResult::new()));
1179 let cachedir = self.cache.cache.cachedir.clone();
1180 let max_retries = self.max_retries;
1181 let retry_delay = self.retry_delay;
1182
1183 let handles: Vec<_> = urls_andnames
1184 .iter()
1185 .map(|&(url, name)| {
1186 let result_clone = Arc::clone(&result_arc);
1187 let url = url.to_string();
1188 let name = name.to_string();
1189 let cachedir = cachedir.clone();
1190
1191 thread::spawn(move || {
1192 let mut success = false;
1193 let mut last_error = String::new();
1194 let mut downloaded_data = Vec::new();
1195
1196 for attempt in 0..=max_retries {
1197 match download_data(&url, false) {
1198 Ok(data) => {
1199 let path = cachedir.join(&name);
1201 match File::create(&path) {
1202 Ok(mut file) => match file.write_all(&data) {
1203 Ok(_) => {
1204 let mut r = result_clone.lock().unwrap();
1205 r.success_count += 1;
1206 r.total_bytes += data.len() as u64;
1207 downloaded_data = data;
1208 success = true;
1209 break;
1210 }
1211 Err(e) => {
1212 last_error = format!("Failed to write cache file: {e}");
1213 }
1214 },
1215 Err(e) => {
1216 last_error = format!("Failed to create cache file: {e}");
1217 }
1218 }
1219 }
1220 Err(e) => {
1221 last_error = format!("Download failed: {e}");
1222 if attempt < max_retries {
1223 thread::sleep(retry_delay);
1224 }
1225 }
1226 }
1227 }
1228
1229 if !success {
1230 let mut r = result_clone.lock().unwrap();
1231 r.failure_count += 1;
1232 r.failures.push((name.clone(), last_error));
1233 }
1234
1235 (name, success, downloaded_data)
1236 })
1237 })
1238 .collect();
1239
1240 let mut successful_downloads = Vec::new();
1242 for handle in handles {
1243 if let Ok((name, success, data)) = handle.join() {
1244 if success && !data.is_empty() {
1245 successful_downloads.push((name, data));
1246 }
1247 }
1248 }
1249
1250 if let Ok(arc_result) = result_arc.lock() {
1252 result.success_count += arc_result.success_count;
1253 result.failure_count += arc_result.failure_count;
1254 result.failures.extend(arc_result.failures.clone());
1255 }
1256
1257 for (name, data) in successful_downloads {
1259 let key = FileCacheKey(name);
1260 self.cache.cache.mem_cache.borrow_mut().insert(key, data);
1261 }
1262 }
1263
1264 #[cfg(feature = "download")]
1265 fn batch_download_sequential(&self, urls_andnames: &[(&str, &str)], result: &mut BatchResult) {
1266 for &(url, name) in urls_andnames {
1267 let mut success = false;
1268 let mut last_error = String::new();
1269
1270 for attempt in 0..=self.max_retries {
1271 match download_data(url, false) {
1272 Ok(data) => match self.cache.cache.write_cached(name, &data) {
1273 Ok(_) => {
1274 result.success_count += 1;
1275 result.total_bytes += data.len() as u64;
1276 success = true;
1277 break;
1278 }
1279 Err(e) => {
1280 last_error = format!("Cache write failed: {e}");
1281 }
1282 },
1283 Err(e) => {
1284 last_error = format!("Download failed: {e}");
1285 if attempt < self.max_retries {
1286 std::thread::sleep(self.retry_delay);
1287 }
1288 }
1289 }
1290 }
1291
1292 if !success {
1293 result.failure_count += 1;
1294 result.failures.push((name.to_string(), last_error));
1295 }
1296 }
1297 }
1298
1299 pub fn batch_verify_integrity(&self, files_andhashes: &[(&str, &str)]) -> BatchResult {
1301 let start_time = std::time::Instant::now();
1302 let mut result = BatchResult::new();
1303
1304 for &(filename, expected_hash) in files_andhashes {
1305 match self.cache.cache.get_cachedpath(filename).exists() {
1306 true => match sha256_hash_file(&self.cache.cache.get_cachedpath(filename)) {
1307 Ok(actual_hash) => {
1308 if actual_hash == expected_hash {
1309 result.success_count += 1;
1310 if let Ok(metadata) =
1311 std::fs::metadata(self.cache.cache.get_cachedpath(filename))
1312 {
1313 result.total_bytes += metadata.len();
1314 }
1315 } else {
1316 result.failure_count += 1;
1317 result.failures.push((
1318 filename.to_string(),
1319 format!(
1320 "Hash mismatch: expected {expected_hash}, got {actual_hash}"
1321 ),
1322 ));
1323 }
1324 }
1325 Err(e) => {
1326 result.failure_count += 1;
1327 result.failures.push((
1328 filename.to_string(),
1329 format!("Hash computation failed: {e}"),
1330 ));
1331 }
1332 },
1333 false => {
1334 result.failure_count += 1;
1335 result
1336 .failures
1337 .push((filename.to_string(), "File not found in cache".to_string()));
1338 }
1339 }
1340 }
1341
1342 result.elapsed_time = start_time.elapsed();
1343 result
1344 }
1345
1346 pub fn selective_cleanup(
1348 &self,
1349 patterns: &[&str],
1350 max_age_days: Option<u32>,
1351 ) -> Result<BatchResult> {
1352 let start_time = std::time::Instant::now();
1353 let mut result = BatchResult::new();
1354
1355 let cached_files = self.cache.list_cached_files()?;
1356 let now = std::time::SystemTime::now();
1357
1358 for filename in cached_files {
1359 let should_remove = patterns.iter().any(|pattern| {
1360 filename.contains(pattern) || matches_glob_pattern(&filename, pattern)
1361 });
1362
1363 if should_remove {
1364 let filepath = self.cache.cache.get_cachedpath(&filename);
1365
1366 let remove_due_to_age = if let Some(max_age) = max_age_days {
1368 if let Ok(metadata) = std::fs::metadata(&filepath) {
1369 if let Ok(modified) = metadata.modified() {
1370 if let Ok(age) = now.duration_since(modified) {
1371 age.as_secs() > (max_age as u64 * 24 * 3600)
1372 } else {
1373 false
1374 }
1375 } else {
1376 false
1377 }
1378 } else {
1379 false
1380 }
1381 } else {
1382 true };
1384
1385 if remove_due_to_age {
1386 match self.cache.remove(&filename) {
1387 Ok(_) => {
1388 result.success_count += 1;
1389 if let Ok(metadata) = std::fs::metadata(&filepath) {
1390 result.total_bytes += metadata.len();
1391 }
1392 }
1393 Err(e) => {
1394 result.failure_count += 1;
1395 result
1396 .failures
1397 .push((filename, format!("Removal failed: {e}")));
1398 }
1399 }
1400 }
1401 }
1402 }
1403
1404 result.elapsed_time = start_time.elapsed();
1405 Ok(result)
1406 }
1407
1408 pub fn batch_process<F, T, E>(&self, names: &[String], processor: F) -> BatchResult
1410 where
1411 F: Fn(&str, &[u8]) -> std::result::Result<T, E> + Sync + Send + 'static,
1412 E: std::fmt::Display,
1413 T: Send,
1414 {
1415 let start_time = std::time::Instant::now();
1416 let mut result = BatchResult::new();
1417
1418 if self.parallel {
1419 self.batch_process_parallel(names, processor, &mut result)
1420 } else {
1421 self.batch_process_sequential(names, processor, &mut result)
1422 }
1423
1424 result.elapsed_time = start_time.elapsed();
1425 result
1426 }
1427
1428 fn batch_process_parallel<F, T, E>(
1429 &self,
1430 names: &[String],
1431 processor: F,
1432 result: &mut BatchResult,
1433 ) where
1434 F: Fn(&str, &[u8]) -> std::result::Result<T, E> + Sync + Send + 'static,
1435 E: std::fmt::Display,
1436 T: Send,
1437 {
1438 let mut data_pairs = Vec::new();
1441
1442 for name in names {
1444 match self.cache.cache.read_cached(name) {
1445 Ok(data) => data_pairs.push((name.clone(), data)),
1446 Err(e) => {
1447 result.failure_count += 1;
1448 result
1449 .failures
1450 .push((name.clone(), format!("Cache read failed: {e}")));
1451 }
1452 }
1453 }
1454
1455 if !data_pairs.is_empty() {
1457 use std::sync::{Arc, Mutex};
1458 use std::thread;
1459
1460 let parallel_result = Arc::new(Mutex::new(BatchResult::new()));
1461 let processor = Arc::new(processor);
1462
1463 let handles: Vec<_> = data_pairs
1464 .into_iter()
1465 .map(|(name, data)| {
1466 let result_clone = Arc::clone(¶llel_result);
1467 let processor_clone = Arc::clone(&processor);
1468
1469 thread::spawn(move || match processor_clone(&name, &data) {
1470 Ok(_) => {
1471 let mut r = result_clone.lock().unwrap();
1472 r.success_count += 1;
1473 r.total_bytes += data.len() as u64;
1474 }
1475 Err(e) => {
1476 let mut r = result_clone.lock().unwrap();
1477 r.failure_count += 1;
1478 r.failures.push((name, format!("Processing failed: {e}")));
1479 }
1480 })
1481 })
1482 .collect();
1483
1484 for handle in handles {
1485 let _ = handle.join();
1486 }
1487
1488 let parallel_result = parallel_result.lock().unwrap();
1490 result.success_count += parallel_result.success_count;
1491 result.failure_count += parallel_result.failure_count;
1492 result.total_bytes += parallel_result.total_bytes;
1493 result.failures.extend(parallel_result.failures.clone());
1494 }
1495 }
1496
1497 fn batch_process_sequential<F, T, E>(
1498 &self,
1499 names: &[String],
1500 processor: F,
1501 result: &mut BatchResult,
1502 ) where
1503 F: Fn(&str, &[u8]) -> std::result::Result<T, E>,
1504 E: std::fmt::Display,
1505 {
1506 for name in names {
1507 match self.cache.cache.read_cached(name) {
1508 Ok(data) => match processor(name, &data) {
1509 Ok(_) => {
1510 result.success_count += 1;
1511 result.total_bytes += data.len() as u64;
1512 }
1513 Err(e) => {
1514 result.failure_count += 1;
1515 result
1516 .failures
1517 .push((name.clone(), format!("Processing failed: {e}")));
1518 }
1519 },
1520 Err(e) => {
1521 result.failure_count += 1;
1522 result
1523 .failures
1524 .push((name.clone(), format!("Cache read failed: {e}")));
1525 }
1526 }
1527 }
1528 }
1529
1530 pub fn cache_manager(&self) -> &CacheManager {
1532 &self.cache
1533 }
1534
1535 pub fn write_cached(&self, name: &str, data: &[u8]) -> Result<()> {
1537 self.cache.cache.write_cached(name, data)
1538 }
1539
1540 pub fn read_cached(&self, name: &str) -> Result<Vec<u8>> {
1542 self.cache.cache.read_cached(name)
1543 }
1544
1545 pub fn list_cached_files(&self) -> Result<Vec<String>> {
1547 self.cache.list_cached_files()
1548 }
1549
1550 pub fn print_cache_report(&self) -> Result<()> {
1552 self.cache.print_cache_report()
1553 }
1554
1555 pub fn get_cache_statistics(&self) -> Result<BatchResult> {
1557 let start_time = std::time::Instant::now();
1558 let mut result = BatchResult::new();
1559
1560 let cached_files = self.cache.list_cached_files()?;
1561
1562 for filename in cached_files {
1563 let filepath = self.cache.cache.get_cachedpath(&filename);
1564 match std::fs::metadata(&filepath) {
1565 Ok(metadata) => {
1566 result.success_count += 1;
1567 result.total_bytes += metadata.len();
1568 }
1569 Err(e) => {
1570 result.failure_count += 1;
1571 result
1572 .failures
1573 .push((filename, format!("Metadata read failed: {e}")));
1574 }
1575 }
1576 }
1577
1578 result.elapsed_time = start_time.elapsed();
1579 Ok(result)
1580 }
1581}
1582
1583#[allow(dead_code)]
1585fn matches_glob_pattern(filename: &str, pattern: &str) -> bool {
1586 if pattern == "*" {
1587 return true;
1588 }
1589
1590 if pattern.contains('*') {
1591 let parts: Vec<&str> = pattern.split('*').collect();
1592 if parts.len() == 2 {
1593 let prefix = parts[0];
1594 let suffix = parts[1];
1595 return filename.starts_with(prefix) && filename.ends_with(suffix);
1596 }
1597 }
1598
1599 filename == pattern
1600}
1601
1602#[cfg(test)]
1603mod tests {
1604 use super::*;
1605 use tempfile::TempDir;
1606
1607 #[test]
1608 fn test_batch_result() {
1609 let mut result = BatchResult::new();
1610 assert_eq!(result.success_count, 0);
1611 assert_eq!(result.failure_count, 0);
1612 assert!(result.is_all_success());
1613 assert_eq!(result.success_rate(), 0.0);
1614
1615 result.success_count = 8;
1616 result.failure_count = 2;
1617 result.total_bytes = 1024;
1618
1619 assert!(!result.is_all_success());
1620 assert_eq!(result.success_rate(), 80.0);
1621 assert!(result.summary().contains("8/10 successful"));
1622 assert!(result.summary().contains("80.0%"));
1623 }
1624
1625 #[test]
1626 fn test_batch_operations_creation() {
1627 let tempdir = TempDir::new().unwrap();
1628 let cache_manager = CacheManager::with_config(tempdir.path().to_path_buf(), 10, 3600);
1629 let batch_ops = BatchOperations::new(cache_manager)
1630 .with_parallel(false)
1631 .with_retry_config(2, std::time::Duration::from_millis(500));
1632
1633 assert!(!batch_ops.parallel);
1634 assert_eq!(batch_ops.max_retries, 2);
1635 }
1636
1637 #[test]
1638 fn test_selective_cleanup() {
1639 let tempdir = TempDir::new().unwrap();
1640 let cache_manager = CacheManager::with_config(tempdir.path().to_path_buf(), 10, 3600);
1641 let batch_ops = BatchOperations::new(cache_manager);
1642
1643 let test_data = vec![0u8; 100];
1645 batch_ops
1646 .cache
1647 .cache
1648 .write_cached("test1.csv", &test_data)
1649 .unwrap();
1650 batch_ops
1651 .cache
1652 .cache
1653 .write_cached("test2.csv", &test_data)
1654 .unwrap();
1655 batch_ops
1656 .cache
1657 .cache
1658 .write_cached("data.json", &test_data)
1659 .unwrap();
1660
1661 let result = batch_ops.selective_cleanup(&["*.csv"], None).unwrap();
1663
1664 assert_eq!(result.success_count, 2); assert!(!batch_ops.cache.is_cached("test1.csv"));
1666 assert!(!batch_ops.cache.is_cached("test2.csv"));
1667 assert!(batch_ops.cache.is_cached("data.json")); }
1669
1670 #[test]
1671 fn test_batch_process() {
1672 let tempdir = TempDir::new().unwrap();
1673 let cache_manager = CacheManager::with_config(tempdir.path().to_path_buf(), 10, 3600);
1674 let batch_ops = BatchOperations::new(cache_manager).with_parallel(false);
1675
1676 let test_data1 = vec![1u8; 100];
1678 let test_data2 = vec![2u8; 200];
1679 batch_ops
1680 .cache
1681 .cache
1682 .write_cached("file1.dat", &test_data1)
1683 .unwrap();
1684 batch_ops
1685 .cache
1686 .cache
1687 .write_cached("file2.dat", &test_data2)
1688 .unwrap();
1689
1690 let files = vec!["file1.dat".to_string(), "file2.dat".to_string()];
1691
1692 let result = batch_ops.batch_process(&files, |_name, data| {
1694 if data.is_empty() {
1695 Err("Empty file")
1696 } else {
1697 Ok(data.len())
1698 }
1699 });
1700
1701 assert_eq!(result.success_count, 2);
1702 assert_eq!(result.failure_count, 0);
1703 assert_eq!(result.total_bytes, 300); }
1705
1706 #[test]
1707 fn test_get_cache_statistics() {
1708 let tempdir = TempDir::new().unwrap();
1709 let cache_manager = CacheManager::with_config(tempdir.path().to_path_buf(), 10, 3600);
1710 let batch_ops = BatchOperations::new(cache_manager);
1711
1712 let result = batch_ops.get_cache_statistics().unwrap();
1714 assert_eq!(result.success_count, 0);
1715
1716 let test_data = vec![0u8; 500];
1718 batch_ops
1719 .cache
1720 .cache
1721 .write_cached("test1.dat", &test_data)
1722 .unwrap();
1723 batch_ops
1724 .cache
1725 .cache
1726 .write_cached("test2.dat", &test_data)
1727 .unwrap();
1728
1729 let result = batch_ops.get_cache_statistics().unwrap();
1730 assert_eq!(result.success_count, 2);
1731 assert_eq!(result.total_bytes, 1000);
1732 }
1733
1734 #[test]
1735 fn test_matches_glob_pattern() {
1736 assert!(matches_glob_pattern("test.csv", "*"));
1737 assert!(matches_glob_pattern("test.csv", "*.csv"));
1738 assert!(matches_glob_pattern("test.csv", "test.*"));
1739 assert!(matches_glob_pattern("test.csv", "test.csv"));
1740
1741 assert!(!matches_glob_pattern("test.json", "*.csv"));
1742 assert!(!matches_glob_pattern("other.csv", "test.*"));
1743 }
1744
1745 #[test]
1746 fn test_cache_manager_creation() {
1747 let tempdir = TempDir::new().unwrap();
1748 let manager = CacheManager::with_config(tempdir.path().to_path_buf(), 10, 3600);
1749 let stats = manager.get_stats();
1750 assert_eq!(stats.file_count, 0);
1751 }
1752
1753 #[test]
1754 fn test_cache_stats_formatting() {
1755 let tempdir = TempDir::new().unwrap();
1756 let stats = CacheStats {
1757 total_size_bytes: 1024,
1758 file_count: 1,
1759 cachedir: tempdir.path().to_path_buf(),
1760 };
1761
1762 assert_eq!(stats.formatted_size(), "1.0 KB");
1763
1764 let stats_large = CacheStats {
1765 total_size_bytes: 1024 * 1024 * 1024,
1766 file_count: 1,
1767 cachedir: tempdir.path().to_path_buf(),
1768 };
1769
1770 assert_eq!(stats_large.formatted_size(), "1.0 GB");
1771 }
1772
1773 #[test]
1774 fn test_hash_file_name() {
1775 let hash1 = DatasetCache::hash_filename("test.csv");
1776 let hash2 = DatasetCache::hash_filename("test.csv");
1777 let hash3 = DatasetCache::hash_filename("different.csv");
1778
1779 assert_eq!(hash1, hash2);
1780 assert_ne!(hash1, hash3);
1781 assert_eq!(hash1.len(), 64); }
1783
1784 #[test]
1785 fn test_platform_cachedir() {
1786 let cachedir = get_platform_cachedir();
1787 assert!(cachedir.is_some() || cfg!(target_os = "unknown"));
1789
1790 if let Some(dir) = cachedir {
1791 assert!(dir.to_string_lossy().contains("scirs2-datasets"));
1792 }
1793 }
1794
1795 #[test]
1796 fn test_cache_size_management() {
1797 let tempdir = TempDir::new().unwrap();
1798 let cache = DatasetCache::with_full_config(
1799 tempdir.path().to_path_buf(),
1800 10,
1801 3600,
1802 2048, false,
1804 );
1805
1806 let small_data1 = vec![0u8; 400];
1808 cache.write_cached("small1.dat", &small_data1).unwrap();
1809
1810 let small_data2 = vec![0u8; 400];
1811 cache.write_cached("small2.dat", &small_data2).unwrap();
1812
1813 let small_data3 = vec![0u8; 400];
1814 cache.write_cached("small3.dat", &small_data3).unwrap();
1815
1816 let medium_data = vec![0u8; 800];
1818 cache.write_cached("medium.dat", &medium_data).unwrap();
1819
1820 let stats = cache.get_detailed_stats().unwrap();
1822 assert!(stats.total_size_bytes <= cache.max_cache_size());
1823
1824 assert!(cache.is_cached("medium.dat"));
1826 }
1827
1828 #[test]
1829 fn test_offline_mode() {
1830 let tempdir = TempDir::new().unwrap();
1831 let mut cache = DatasetCache::new(tempdir.path().to_path_buf());
1832
1833 assert!(!cache.is_offline());
1834 cache.set_offline_mode(true);
1835 assert!(cache.is_offline());
1836 }
1837
1838 #[test]
1839 fn test_detailed_stats() {
1840 let tempdir = TempDir::new().unwrap();
1841 let cache = DatasetCache::new(tempdir.path().to_path_buf());
1842
1843 let test_data = vec![1, 2, 3, 4, 5];
1844 cache.write_cached("test.dat", &test_data).unwrap();
1845
1846 let stats = cache.get_detailed_stats().unwrap();
1847 assert_eq!(stats.file_count, 1);
1848 assert_eq!(stats.total_size_bytes, test_data.len() as u64);
1849 assert_eq!(stats.files.len(), 1);
1850 assert_eq!(stats.files[0].name, "test.dat");
1851 assert_eq!(stats.files[0].size_bytes, test_data.len() as u64);
1852 }
1853
1854 #[test]
1855 fn test_cache_manager() {
1856 let tempdir = TempDir::new().unwrap();
1857 let manager = CacheManager::with_config(tempdir.path().to_path_buf(), 10, 3600);
1858
1859 let stats = manager.get_stats();
1860 assert_eq!(stats.file_count, 0);
1861 assert_eq!(stats.total_size_bytes, 0);
1862
1863 assert_eq!(manager.cachedir(), &tempdir.path().to_path_buf());
1864 }
1865
1866 #[test]
1867 fn test_format_bytes() {
1868 assert_eq!(format_bytes(512), "512 B");
1869 assert_eq!(format_bytes(1024), "1.0 KB");
1870 assert_eq!(format_bytes(1024 * 1024), "1.0 MB");
1871 assert_eq!(format_bytes(1024 * 1024 * 1024), "1.0 GB");
1872 }
1873}