1use anyhow::{Context, Result};
2use flate2::read::GzDecoder;
3use flate2::write::GzEncoder;
4use flate2::Compression;
5use glob::glob;
6use std::fs::{self, File};
7use std::io::{self};
8use std::path::{Path, PathBuf};
9use uuid::Uuid;
10
11use crate::config::Config;
12use crate::index::{FileEntry, IndexStore};
13use crate::dedup::ContentDeduplicator;
14use crate::delta::DeltaStorage;
15
16pub struct StorageManager {
17 config: Config,
18 index: Box<dyn IndexStore>,
19 deduplicator: ContentDeduplicator,
20 delta_storage: DeltaStorage,
21}
22
23impl StorageManager {
24 pub fn new(config: Config, index: Box<dyn IndexStore>) -> Self {
25 let deduplicator = ContentDeduplicator::new();
26 let delta_storage = DeltaStorage::new(
27 config.similarity_threshold,
28 config.delta_algorithm.clone(),
29 );
30
31 let mut manager = Self {
32 config,
33 index,
34 deduplicator,
35 delta_storage,
36 };
37
38 if let Err(e) = manager.rebuild_dedup_state() {
40 eprintln!("Warning: Failed to rebuild deduplication state: {}", e);
41 }
42
43 manager
44 }
45
46 pub fn store_file(&mut self, file_path: &Path, delete_source: bool) -> Result<()> {
47 if !file_path.exists() {
48 return Err(anyhow::anyhow!("File does not exist: {}", file_path.display()));
49 }
50
51 if !file_path.is_file() {
52 return Err(anyhow::anyhow!("Path is not a file: {}", file_path.display()));
53 }
54
55 if self.index.get_file(file_path)?.is_some() {
57 println!("File already stored: {}", file_path.display());
58 if delete_source {
59 fs::remove_file(file_path)
60 .context("Failed to delete source file")?;
61 println!("Source file deleted: {}", file_path.display());
62 }
63 return Ok(());
64 }
65
66 let file_content = fs::read(file_path)
68 .context("Failed to read file for hashing")?;
69 let file_hash = ContentDeduplicator::calculate_hash(&file_content);
70
71 if self.config.enable_deduplication {
73 if let Some(existing_entry) = self.find_file_by_hash(&file_hash)? {
74 let entry = self.create_reference_entry(file_path, &existing_entry)?;
76 self.index.add_file(entry)?;
77
78 self.deduplicator.add_hash_reference(&file_hash, &existing_entry.id);
80
81 if delete_source {
82 fs::remove_file(file_path)
83 .context("Failed to delete source file")?;
84 println!("Source file deleted: {}", file_path.display());
85 }
86
87 println!("File deduplicated (reference created): {}", file_path.display());
88 println!("References existing file with hash: {}", file_hash);
89 return Ok(());
90 }
91 }
92
93 if self.config.enable_delta_compression {
95 if let Some((base_entry, similarity)) = self.find_similar_file(&file_content)? {
96 if similarity >= self.config.similarity_threshold {
97 return self.store_as_delta(file_path, &file_content, &base_entry, similarity, delete_source);
99 }
100 }
101 }
102
103 self.store_as_base_file(file_path, &file_content, file_hash, delete_source)
105 }
106
107 pub fn owe_file(&mut self, file_path: &Path) -> Result<()> {
108 let entry = self.index.get_file(file_path)?
109 .ok_or_else(|| anyhow::anyhow!("File not found in storage: {}", file_path.display()))?;
110
111 if entry.is_reference.unwrap_or(false) {
113 self.extract_reference_file(&entry)?;
115 } else if entry.is_delta.unwrap_or(false) {
116 self.extract_delta_file(&entry)?;
118 } else {
119 self.decompress_file(&entry.stored_path, &entry.original_path)
121 .context("Failed to decompress file")?;
122
123 let should_delete_from_dedup = if let Some(hash) = &entry.hash {
125 self.deduplicator.remove_hash_reference(hash)
126 } else {
127 true };
129
130 let has_references = self.has_references_to_storage(&entry.id)?;
132
133 if should_delete_from_dedup && !has_references && entry.stored_path.exists() {
135 fs::remove_file(&entry.stored_path)
136 .context("Failed to remove stored file")?;
137 }
138 }
139
140 self.index.remove_file(file_path)?;
142
143 println!("File extracted successfully: {}", file_path.display());
144 Ok(())
145 }
146
147 pub fn list_files(&self) -> Result<Vec<FileEntry>> {
148 self.index.list_files()
149 }
150
151 pub fn search_files(&self, pattern: &str) -> Result<Vec<FileEntry>> {
152 let all_files = self.index.list_files()?;
153 let mut matching_files = Vec::new();
154
155 for file_entry in all_files {
157 let path_str = file_entry.original_path.to_string_lossy();
159
160 if let Ok(matcher) = glob::Pattern::new(pattern) {
162 if matcher.matches(&path_str) {
163 matching_files.push(file_entry);
164 }
165 } else {
166 if path_str.contains(pattern) {
168 matching_files.push(file_entry);
169 }
170 }
171 }
172
173 Ok(matching_files)
174 }
175
176 pub fn rename_file(&mut self, old_path: &Path, new_path: &Path) -> Result<()> {
177 if self.index.get_file(old_path)?.is_none() {
178 return Err(anyhow::anyhow!("File not found in storage: {}", old_path.display()));
179 }
180
181 if self.index.get_file(new_path)?.is_some() {
182 return Err(anyhow::anyhow!("Target file already exists: {}", new_path.display()));
183 }
184
185 self.index.rename_file(old_path, new_path)
186 .context("Failed to rename file in index")?;
187
188 println!("File renamed: {} -> {}", old_path.display(), new_path.display());
189 Ok(())
190 }
191
192 pub fn move_file(&mut self, file_path: &Path, new_location: &Path) -> Result<()> {
193 if self.index.get_file(file_path)?.is_none() {
194 return Err(anyhow::anyhow!("File not found in storage: {}", file_path.display()));
195 }
196
197 let filename = file_path.file_name()
198 .ok_or_else(|| anyhow::anyhow!("Invalid file path"))?;
199 let new_path = new_location.join(filename);
200
201 if self.index.get_file(&new_path)?.is_some() {
202 return Err(anyhow::anyhow!("Target file already exists: {}", new_path.display()));
203 }
204
205 self.index.move_file(file_path, &new_path)
206 .context("Failed to move file in index")?;
207
208 println!("File moved: {} -> {}", file_path.display(), new_path.display());
209 Ok(())
210 }
211
212 pub fn delete_file(&mut self, file_path: &Path) -> Result<()> {
213 let entry = self.index.remove_file(file_path)?
214 .ok_or_else(|| anyhow::anyhow!("File not found in storage: {}", file_path.display()))?;
215
216 if entry.stored_path.exists() {
218 fs::remove_file(&entry.stored_path)
219 .context("Failed to remove stored file")?;
220 }
221
222 println!("File deleted from storage: {}", file_path.display());
223 Ok(())
224 }
225
226 fn decompress_file(&self, input_path: &Path, output_path: &Path) -> Result<()> {
227 let algorithm = if let Some(ext) = input_path.extension() {
229 match ext.to_str() {
230 Some("gz") => crate::config::CompressionAlgorithm::Gzip,
231 Some("zst") => crate::config::CompressionAlgorithm::Zstd,
232 Some("lz4") => crate::config::CompressionAlgorithm::Lz4,
233 _ => return Err(anyhow::anyhow!("Unsupported file extension: {:?}", ext)),
234 }
235 } else {
236 return Err(anyhow::anyhow!("No file extension found"));
237 };
238
239 match algorithm {
240 crate::config::CompressionAlgorithm::Gzip => {
241 self.decompress_file_gzip(input_path, output_path)
242 }
243 crate::config::CompressionAlgorithm::Zstd => {
244 self.decompress_file_zstd(input_path, output_path)
245 }
246 crate::config::CompressionAlgorithm::Lz4 => {
247 self.decompress_file_lz4(input_path, output_path)
248 }
249 }
250 }
251
252 fn decompress_file_gzip(&self, input_path: &Path, output_path: &Path) -> Result<()> {
253 let input_file = File::open(input_path)
254 .context("Failed to open compressed file")?;
255 let mut decoder = GzDecoder::new(input_file);
256
257 if let Some(parent) = output_path.parent() {
259 fs::create_dir_all(parent)
260 .context("Failed to create output directory")?;
261 }
262
263 let mut output_file = File::create(output_path)
264 .context("Failed to create output file")?;
265
266 io::copy(&mut decoder, &mut output_file)
267 .context("Failed to decompress file")?;
268
269 Ok(())
270 }
271
272 fn decompress_file_zstd(&self, input_path: &Path, output_path: &Path) -> Result<()> {
273 let compressed_data = fs::read(input_path)
274 .context("Failed to read compressed file")?;
275
276 let decompressed_data = zstd::decode_all(compressed_data.as_slice())
277 .context("Failed to decompress with zstd")?;
278
279 if let Some(parent) = output_path.parent() {
281 fs::create_dir_all(parent)
282 .context("Failed to create output directory")?;
283 }
284
285 fs::write(output_path, decompressed_data)
286 .context("Failed to write decompressed file")?;
287
288 Ok(())
289 }
290
291 fn decompress_file_lz4(&self, input_path: &Path, output_path: &Path) -> Result<()> {
292 let compressed_data = fs::read(input_path)
293 .context("Failed to read compressed file")?;
294
295 let decompressed_data = lz4_flex::decompress_size_prepended(&compressed_data)
296 .context("Failed to decompress with lz4")?;
297
298 if let Some(parent) = output_path.parent() {
300 fs::create_dir_all(parent)
301 .context("Failed to create output directory")?;
302 }
303
304 fs::write(output_path, decompressed_data)
305 .context("Failed to write decompressed file")?;
306
307 Ok(())
308 }
309
310 pub fn store_files_from_list(&mut self, list_file: &Path, delete_source: bool) -> Result<()> {
311 let content = fs::read_to_string(list_file)
312 .context("Failed to read file list")?;
313
314 let mut include_patterns = Vec::new();
315 let mut exclude_patterns = Vec::new();
316
317 for line in content.lines() {
319 let line = line.trim();
320 if !line.is_empty() && !line.starts_with('#') {
321 if line.starts_with('!') {
322 exclude_patterns.push(&line[1..]);
324 } else {
325 include_patterns.push(line);
327 }
328 }
329 }
330
331 let mut all_files = Vec::new();
333
334 for pattern in include_patterns {
335 if pattern.contains('*') || pattern.contains('?') || pattern.contains('[') {
336 match self.process_glob_pattern(pattern) {
338 Ok(files) => {
339 all_files.extend(files);
340 }
341 Err(e) => {
342 eprintln!("Failed to process glob pattern '{}': {}", pattern, e);
343 }
344 }
345 } else {
346 let file_path = PathBuf::from(pattern);
348 if file_path.exists() {
349 all_files.push(file_path);
350 }
351 }
352 }
353
354 let filtered_files = self.apply_exclude_patterns(all_files, &exclude_patterns)?;
356
357 if self.config.multithread > 1 && filtered_files.len() > 1 {
359 self.store_files_parallel(filtered_files, delete_source)?;
361 } else {
362 for file_path in filtered_files {
364 if let Err(e) = self.store_file(&file_path, delete_source) {
365 eprintln!("Failed to store {}: {}", file_path.display(), e);
366 }
367 }
368 }
369
370 Ok(())
371 }
372
373 pub fn owe_files_from_list(&mut self, list_file: &Path) -> Result<()> {
374 let content = fs::read_to_string(list_file)
375 .context("Failed to read file list")?;
376
377 let mut include_patterns = Vec::new();
378 let mut exclude_patterns = Vec::new();
379
380 for line in content.lines() {
382 let line = line.trim();
383 if !line.is_empty() && !line.starts_with('#') {
384 if line.starts_with('!') {
385 exclude_patterns.push(&line[1..]);
387 } else {
388 include_patterns.push(line);
390 }
391 }
392 }
393
394 let mut all_files = Vec::new();
396
397 for pattern in include_patterns {
398 if pattern.contains('*') || pattern.contains('?') || pattern.contains('[') {
399 match self.find_stored_files_by_pattern(pattern) {
401 Ok(files) => {
402 all_files.extend(files);
403 }
404 Err(e) => {
405 eprintln!("Failed to process pattern '{}': {}", pattern, e);
406 }
407 }
408 } else {
409 let file_path = PathBuf::from(pattern);
411 if self.index.get_file(&file_path)?.is_some() {
412 all_files.push(file_path);
413 }
414 }
415 }
416
417 let filtered_files = self.apply_exclude_patterns_to_stored(all_files, &exclude_patterns)?;
419
420 if self.config.multithread > 1 && filtered_files.len() > 1 {
422 self.owe_files_parallel(filtered_files)?;
424 } else {
425 for file_path in filtered_files {
427 if let Err(e) = self.owe_file(&file_path) {
428 eprintln!("Failed to owe {}: {}", file_path.display(), e);
429 }
430 }
431 }
432
433 Ok(())
434 }
435
436 fn process_glob_pattern(&self, pattern: &str) -> Result<Vec<PathBuf>> {
438 let mut files = Vec::new();
439
440 for entry in glob(pattern).context("Failed to parse glob pattern")? {
442 match entry {
443 Ok(path) => {
444 if path.is_file() {
445 files.push(path);
446 }
447 }
448 Err(e) => {
449 eprintln!("Error reading path: {}", e);
450 }
451 }
452 }
453
454 if files.is_empty() {
455 println!("No files matched pattern: {}", pattern);
456 } else {
457 println!("Found {} files matching pattern: {}", files.len(), pattern);
458 }
459
460 Ok(files)
461 }
462
463 fn find_stored_files_by_pattern(&self, pattern: &str) -> Result<Vec<PathBuf>> {
465 let stored_files = self.index.list_files()?;
466 let mut matching_files = Vec::new();
467
468 let regex_pattern = self.glob_to_regex(pattern)?;
470 let regex = regex::Regex::new(®ex_pattern)
471 .context("Failed to compile regex pattern")?;
472
473 for entry in stored_files {
474 let path_str = entry.original_path.to_string_lossy();
475 if regex.is_match(&path_str) {
476 matching_files.push(entry.original_path);
477 }
478 }
479
480 if matching_files.is_empty() {
481 println!("No stored files matched pattern: {}", pattern);
482 } else {
483 println!("Found {} stored files matching pattern: {}", matching_files.len(), pattern);
484 }
485
486 Ok(matching_files)
487 }
488
489 pub fn glob_to_regex(&self, pattern: &str) -> Result<String> {
491 let mut regex = String::new();
492 let chars: Vec<char> = pattern.chars().collect();
493 let mut i = 0;
494
495 regex.push('^');
496
497 while i < chars.len() {
498 match chars[i] {
499 '*' => {
500 if i + 1 < chars.len() && chars[i + 1] == '*' {
501 regex.push_str(".*");
503 i += 1; } else {
505 regex.push_str(r"[^/\\]*");
507 }
508 }
509 '?' => {
510 regex.push_str(r"[^/\\]");
512 }
513 '[' => {
514 regex.push('[');
516 }
517 ']' => {
518 regex.push(']');
519 }
520 '\\' | '/' => {
521 regex.push_str(r"[/\\]");
523 }
524 c if "^$(){}|+.".contains(c) => {
525 regex.push('\\');
527 regex.push(c);
528 }
529 c => {
530 regex.push(c);
531 }
532 }
533 i += 1;
534 }
535
536 regex.push('$');
537 Ok(regex)
538 }
539
540 fn apply_exclude_patterns(&self, files: Vec<PathBuf>, exclude_patterns: &[&str]) -> Result<Vec<PathBuf>> {
542 if exclude_patterns.is_empty() {
543 return Ok(files);
544 }
545
546 let original_count = files.len();
547 let mut filtered_files = Vec::new();
548
549 for file_path in files {
550 let mut should_exclude = false;
551
552 for pattern in exclude_patterns {
553 if self.matches_pattern(&file_path, pattern)? {
554 should_exclude = true;
555 break;
556 }
557 }
558
559 if !should_exclude {
560 filtered_files.push(file_path);
561 }
562 }
563
564 if original_count != filtered_files.len() {
565 println!("Excluded {} files based on exclude patterns", original_count - filtered_files.len());
566 }
567
568 Ok(filtered_files)
569 }
570
571 fn apply_exclude_patterns_to_stored(&self, files: Vec<PathBuf>, exclude_patterns: &[&str]) -> Result<Vec<PathBuf>> {
573 if exclude_patterns.is_empty() {
574 return Ok(files);
575 }
576
577 let original_count = files.len();
578 let mut filtered_files = Vec::new();
579
580 for file_path in files {
581 let mut should_exclude = false;
582
583 for pattern in exclude_patterns {
584 let regex_pattern = self.glob_to_regex(pattern)?;
586 let regex = regex::Regex::new(®ex_pattern)
587 .context("Failed to compile exclude regex pattern")?;
588
589 let path_str = file_path.to_string_lossy();
590 if regex.is_match(&path_str) {
591 should_exclude = true;
592 break;
593 }
594 }
595
596 if !should_exclude {
597 filtered_files.push(file_path);
598 }
599 }
600
601 if original_count != filtered_files.len() {
602 println!("Excluded {} stored files based on exclude patterns", original_count - filtered_files.len());
603 }
604
605 Ok(filtered_files)
606 }
607
608 fn matches_pattern(&self, file_path: &Path, pattern: &str) -> Result<bool> {
610 for entry in glob(pattern).context("Failed to parse glob pattern")? {
612 match entry {
613 Ok(path) => {
614 if path == file_path {
615 return Ok(true);
616 }
617 }
618 Err(_) => continue,
619 }
620 }
621 Ok(false)
622 }
623
624 pub fn owe_all_files(&mut self) -> Result<()> {
625 let files = self.index.list_files()?;
626
627 if files.is_empty() {
628 println!("No files stored.");
629 return Ok(());
630 }
631
632 println!("Extracting {} stored files...", files.len());
633
634 for entry in files {
635 match self.owe_file(&entry.original_path) {
636 Ok(()) => {
637 println!("✓ Extracted: {}", entry.original_path.display());
638 }
639 Err(e) => {
640 eprintln!("✗ Failed to extract {}: {}", entry.original_path.display(), e);
641 }
642 }
643 }
644
645 println!("Extraction complete.");
646 Ok(())
647 }
648
649 fn store_files_parallel(&mut self, files: Vec<PathBuf>, delete_source: bool) -> Result<()> {
651 println!("Processing {} files sequentially to enable deduplication and delta compression...", files.len());
654
655 let mut success_count = 0;
656 for file_path in files {
657 match self.store_file(&file_path, delete_source) {
658 Ok(()) => {
659 success_count += 1;
660 }
661 Err(e) => {
662 eprintln!("Failed to store {}: {}", file_path.display(), e);
663 }
664 }
665 }
666
667 println!("Stored {} files with deduplication and delta compression enabled", success_count);
668 Ok(())
669 }
670
671 fn owe_files_parallel(&mut self, files: Vec<PathBuf>) -> Result<()> {
673 use rayon::prelude::*;
674 rayon::ThreadPoolBuilder::new()
676 .num_threads(self.config.multithread)
677 .build_global()
678 .unwrap_or_else(|_| {
679 });
681
682 let mut entries = Vec::new();
684 for file_path in &files {
685 if let Some(entry) = self.index.get_file(file_path)? {
686 entries.push(entry);
687 }
688 }
689
690 let results: Vec<Result<PathBuf>> = entries
692 .par_iter()
693 .map(|entry| {
694 Self::decompress_file_static(&entry.stored_path, &entry.original_path)
695 .map(|_| entry.original_path.clone())
696 })
697 .collect();
698
699 let mut success_count = 0;
701 for (i, result) in results.into_iter().enumerate() {
702 match result {
703 Ok(file_path) => {
704 if let Err(e) = fs::remove_file(&entries[i].stored_path) {
706 eprintln!("Failed to remove stored file {}: {}", entries[i].stored_path.display(), e);
707 }
708
709 if let Err(e) = self.index.remove_file(&file_path) {
711 eprintln!("Failed to remove from index {}: {}", file_path.display(), e);
712 } else {
713 success_count += 1;
714 println!("File extracted successfully: {}", file_path.display());
715 }
716 }
717 Err(e) => {
718 eprintln!("Failed to extract file: {}", e);
719 }
720 }
721 }
722
723 println!("Extracted {} files using {} threads", success_count, self.config.multithread);
724 Ok(())
725 }
726
727 fn decompress_file_static(input_path: &Path, output_path: &Path) -> Result<()> {
729 let algorithm = if let Some(ext) = input_path.extension() {
731 match ext.to_str() {
732 Some("gz") => crate::config::CompressionAlgorithm::Gzip,
733 Some("zst") => crate::config::CompressionAlgorithm::Zstd,
734 Some("lz4") => crate::config::CompressionAlgorithm::Lz4,
735 _ => return Err(anyhow::anyhow!("Unsupported file extension: {:?}", ext)),
736 }
737 } else {
738 return Err(anyhow::anyhow!("No file extension found"));
739 };
740
741 match algorithm {
742 crate::config::CompressionAlgorithm::Gzip => {
743 Self::decompress_file_gzip_static(input_path, output_path)
744 }
745 crate::config::CompressionAlgorithm::Zstd => {
746 Self::decompress_file_zstd_static(input_path, output_path)
747 }
748 crate::config::CompressionAlgorithm::Lz4 => {
749 Self::decompress_file_lz4_static(input_path, output_path)
750 }
751 }
752 }
753
754 fn decompress_file_gzip_static(input_path: &Path, output_path: &Path) -> Result<()> {
755 let input_file = File::open(input_path)
756 .context("Failed to open compressed file")?;
757 let mut decoder = GzDecoder::new(input_file);
758
759 if let Some(parent) = output_path.parent() {
761 fs::create_dir_all(parent)
762 .context("Failed to create output directory")?;
763 }
764
765 let mut output_file = File::create(output_path)
766 .context("Failed to create output file")?;
767
768 io::copy(&mut decoder, &mut output_file)
769 .context("Failed to decompress file")?;
770
771 Ok(())
772 }
773
774 fn decompress_file_zstd_static(input_path: &Path, output_path: &Path) -> Result<()> {
775 let compressed_data = fs::read(input_path)
776 .context("Failed to read compressed file")?;
777
778 let decompressed_data = zstd::decode_all(compressed_data.as_slice())
779 .context("Failed to decompress with zstd")?;
780
781 if let Some(parent) = output_path.parent() {
783 fs::create_dir_all(parent)
784 .context("Failed to create output directory")?;
785 }
786
787 fs::write(output_path, decompressed_data)
788 .context("Failed to write decompressed file")?;
789
790 Ok(())
791 }
792
793 fn decompress_file_lz4_static(input_path: &Path, output_path: &Path) -> Result<()> {
794 let compressed_data = fs::read(input_path)
795 .context("Failed to read compressed file")?;
796
797 let decompressed_data = lz4_flex::decompress_size_prepended(&compressed_data)
798 .context("Failed to decompress with lz4")?;
799
800 if let Some(parent) = output_path.parent() {
802 fs::create_dir_all(parent)
803 .context("Failed to create output directory")?;
804 }
805
806 fs::write(output_path, decompressed_data)
807 .context("Failed to write decompressed file")?;
808
809 Ok(())
810 }
811
812 pub fn get_dedup_stats(&self) -> crate::dedup::DedupStats {
814 self.deduplicator.get_stats()
815 }
816
817 pub fn get_delta_stats(&self) -> crate::delta::DeltaStats {
819 self.delta_storage.get_stats()
820 }
821
822 pub fn is_dedup_enabled(&self) -> bool {
824 self.config.enable_deduplication
825 }
826
827 pub fn is_delta_enabled(&self) -> bool {
829 self.config.enable_delta_compression
830 }
831
832 pub fn get_similarity_threshold(&self) -> f32 {
834 self.config.similarity_threshold
835 }
836
837 fn find_file_by_hash(&self, hash: &str) -> Result<Option<FileEntry>> {
839 let all_files = self.index.list_files()?;
840 for file in all_files {
841 if let Some(file_hash) = &file.hash {
842 if file_hash == hash {
843 if !file.is_reference.unwrap_or(false) && !file.is_delta.unwrap_or(false) {
845 return Ok(Some(file));
846 }
847 }
848 }
849 }
850 Ok(None)
851 }
852
853 fn find_similar_file(&self, content: &[u8]) -> Result<Option<(FileEntry, f32)>> {
855 let all_files = self.index.list_files()?;
856 let mut best_match: Option<(FileEntry, f32)> = None;
857
858 for file in all_files {
859 if file.is_reference.unwrap_or(false) || file.is_delta.unwrap_or(false) {
861 continue;
862 }
863
864 if let Ok(stored_content) = self.read_stored_file_content(&file) {
866 let similarity = self.delta_storage.calculate_similarity(content, &stored_content);
867
868 if let Some((_, current_best)) = &best_match {
869 if similarity > *current_best {
870 best_match = Some((file, similarity));
871 }
872 } else {
873 best_match = Some((file, similarity));
874 }
875 }
876 }
877
878 Ok(best_match)
879 }
880
881 fn read_stored_file_content(&self, entry: &FileEntry) -> Result<Vec<u8>> {
883 let compressed_data = fs::read(&entry.stored_path)
885 .context("Failed to read stored file")?;
886
887 match entry.compression_algorithm {
888 crate::config::CompressionAlgorithm::Gzip => {
889 let mut decoder = GzDecoder::new(compressed_data.as_slice());
890 let mut content = Vec::new();
891 std::io::Read::read_to_end(&mut decoder, &mut content)
892 .context("Failed to decompress gzip file")?;
893 Ok(content)
894 }
895 crate::config::CompressionAlgorithm::Zstd => {
896 zstd::decode_all(compressed_data.as_slice())
897 .context("Failed to decompress zstd file")
898 }
899 crate::config::CompressionAlgorithm::Lz4 => {
900 lz4_flex::decompress_size_prepended(&compressed_data)
901 .context("Failed to decompress lz4 file")
902 }
903 }
904 }
905
906 fn create_reference_entry(&self, file_path: &Path, existing_entry: &FileEntry) -> Result<FileEntry> {
908 let id = Uuid::new_v4().to_string();
909 let mut entry = FileEntry::new(
910 id,
911 file_path.to_path_buf(),
912 existing_entry.stored_path.clone(), existing_entry.file_size,
914 0, existing_entry.compression_algorithm.clone(),
916 );
917
918 entry.is_reference = Some(true);
920 entry.base_storage_id = Some(existing_entry.id.clone());
921 entry.hash = existing_entry.hash.clone();
922
923 Ok(entry)
924 }
925
926 fn store_as_delta(
928 &mut self,
929 file_path: &Path,
930 content: &[u8],
931 base_entry: &FileEntry,
932 similarity: f32,
933 delete_source: bool,
934 ) -> Result<()> {
935 let base_content = self.read_stored_file_content(base_entry)?;
937
938 let delta_data = self.delta_storage.create_delta(&base_content, content)?;
940
941 let id = Uuid::new_v4().to_string();
943 let extension = self.config.compression_algorithm.file_extension();
944 let stored_filename = format!("{}.{}", id, extension);
945 let stored_path = self.config.storage_path.join(&stored_filename);
946
947 fs::create_dir_all(&self.config.storage_path)
949 .context("Failed to create storage directory")?;
950
951 let compressed_size = self.compress_data(&delta_data, &stored_path)
953 .context("Failed to compress delta data")?;
954
955 let mut entry = FileEntry::new(
957 id,
958 file_path.to_path_buf(),
959 stored_path,
960 content.len() as u64,
961 compressed_size,
962 self.config.compression_algorithm.clone(),
963 );
964
965 entry.is_delta = Some(true);
967 entry.base_storage_id = Some(base_entry.id.clone());
968 entry.similarity_score = Some(similarity);
969 entry.hash = Some(ContentDeduplicator::calculate_hash(content));
970
971 self.index.add_file(entry)
973 .context("Failed to add delta file to index")?;
974
975 if delete_source {
977 fs::remove_file(file_path)
978 .context("Failed to delete source file")?;
979 println!("Source file deleted: {}", file_path.display());
980 }
981
982 println!("File stored as delta: {}", file_path.display());
983 println!("Similarity: {:.1}%, Delta size: {:.1}%",
984 similarity * 100.0,
985 (compressed_size as f64 / content.len() as f64) * 100.0);
986
987 Ok(())
988 }
989
990 fn store_as_base_file(
992 &mut self,
993 file_path: &Path,
994 content: &[u8],
995 hash: String,
996 delete_source: bool,
997 ) -> Result<()> {
998 let id = Uuid::new_v4().to_string();
1000 let extension = self.config.compression_algorithm.file_extension();
1001 let stored_filename = format!("{}.{}", id, extension);
1002 let stored_path = self.config.storage_path.join(&stored_filename);
1003
1004 fs::create_dir_all(&self.config.storage_path)
1006 .context("Failed to create storage directory")?;
1007
1008 let compressed_size = self.compress_data(content, &stored_path)
1010 .context("Failed to compress file")?;
1011
1012 let mut entry = FileEntry::new(
1014 id.clone(),
1015 file_path.to_path_buf(),
1016 stored_path,
1017 content.len() as u64,
1018 compressed_size,
1019 self.config.compression_algorithm.clone(),
1020 );
1021
1022 entry.hash = Some(hash.clone());
1024
1025 if self.config.enable_deduplication {
1027 self.deduplicator.register_file(hash, id);
1028 }
1029
1030 self.index.add_file(entry)
1032 .context("Failed to add file to index")?;
1033
1034 if delete_source {
1036 fs::remove_file(file_path)
1037 .context("Failed to delete source file")?;
1038 println!("Source file deleted: {}", file_path.display());
1039 }
1040
1041 println!("File stored successfully: {}", file_path.display());
1042 println!("Compression ratio: {:.1}%",
1043 (compressed_size as f64 / content.len() as f64) * 100.0);
1044
1045 Ok(())
1046 }
1047
1048 fn compress_data(&self, data: &[u8], output_path: &Path) -> Result<u64> {
1050 match self.config.compression_algorithm {
1051 crate::config::CompressionAlgorithm::Gzip => {
1052 let output_file = File::create(output_path)
1053 .context("Failed to create output file")?;
1054 let mut encoder = GzEncoder::new(output_file, Compression::new(self.config.compression_level as u32));
1055 std::io::Write::write_all(&mut encoder, data)
1056 .context("Failed to write compressed data")?;
1057 encoder.finish()
1058 .context("Failed to finish compression")?;
1059
1060 Ok(fs::metadata(output_path)?.len())
1061 }
1062 crate::config::CompressionAlgorithm::Zstd => {
1063 let compressed_data = zstd::encode_all(data, self.config.compression_level as i32)
1064 .context("Failed to compress with zstd")?;
1065 fs::write(output_path, &compressed_data)
1066 .context("Failed to write compressed file")?;
1067
1068 Ok(compressed_data.len() as u64)
1069 }
1070 crate::config::CompressionAlgorithm::Lz4 => {
1071 let compressed_data = lz4_flex::compress_prepend_size(data);
1072 fs::write(output_path, &compressed_data)
1073 .context("Failed to write compressed file")?;
1074
1075 Ok(compressed_data.len() as u64)
1076 }
1077 }
1078 }
1079
1080 fn extract_reference_file(&mut self, entry: &FileEntry) -> Result<()> {
1082 self.decompress_file(&entry.stored_path, &entry.original_path)
1085 .context("Failed to decompress reference file")?;
1086
1087 if let Some(base_storage_id) = &entry.base_storage_id {
1089 let has_other_references = self.has_other_references_to_storage(base_storage_id, &entry.original_path)?;
1091
1092 let should_delete_from_dedup = if let Some(hash) = &entry.hash {
1094 self.deduplicator.remove_hash_reference(hash)
1095 } else {
1096 false
1097 };
1098
1099 if !has_other_references && should_delete_from_dedup && entry.stored_path.exists() {
1101 fs::remove_file(&entry.stored_path)
1102 .context("Failed to remove stored file")?;
1103 }
1104 }
1105
1106 Ok(())
1107 }
1108
1109 fn extract_delta_file(&mut self, entry: &FileEntry) -> Result<()> {
1111 let base_storage_id = entry.base_storage_id.as_ref()
1113 .ok_or_else(|| anyhow::anyhow!("Delta file missing base storage ID"))?;
1114
1115 let base_entry = self.find_file_by_storage_id(base_storage_id)?
1117 .ok_or_else(|| anyhow::anyhow!("Base file not found for delta: {}", base_storage_id))?;
1118
1119 let base_content = self.read_stored_file_content(&base_entry)?;
1121
1122 let delta_data = self.read_stored_file_content(entry)?;
1124
1125 let reconstructed_content = self.delta_storage.apply_delta(&base_content, &delta_data)?;
1127
1128 if let Some(parent) = entry.original_path.parent() {
1130 fs::create_dir_all(parent)
1131 .context("Failed to create output directory")?;
1132 }
1133
1134 fs::write(&entry.original_path, reconstructed_content)
1136 .context("Failed to write reconstructed file")?;
1137
1138 if entry.stored_path.exists() {
1140 fs::remove_file(&entry.stored_path)
1141 .context("Failed to remove delta file")?;
1142 }
1143
1144 Ok(())
1145 }
1146
1147 fn find_file_by_storage_id(&self, storage_id: &str) -> Result<Option<FileEntry>> {
1149 let all_files = self.index.list_files()?;
1150 for file in all_files {
1151 if file.id == storage_id {
1152 return Ok(Some(file));
1153 }
1154 }
1155 Ok(None)
1156 }
1157
1158 fn rebuild_dedup_state(&mut self) -> Result<()> {
1160 let all_files = self.index.list_files()?;
1161 let mut dedup_entries = Vec::new();
1162
1163 for file in all_files {
1164 if let Some(hash) = &file.hash {
1165 if !file.is_reference.unwrap_or(false) && !file.is_delta.unwrap_or(false) {
1167 let ref_count = self.count_references_for_hash(hash)?;
1169 dedup_entries.push((file.id.clone(), hash.clone(), ref_count));
1170 }
1171 }
1172 }
1173
1174 self.deduplicator.rebuild_from_index(dedup_entries)?;
1175 Ok(())
1176 }
1177
1178 fn count_references_for_hash(&self, target_hash: &str) -> Result<u32> {
1180 let all_files = self.index.list_files()?;
1181 let mut count = 0;
1182
1183 for file in all_files {
1184 if let Some(hash) = &file.hash {
1185 if hash == target_hash {
1186 count += 1;
1187 }
1188 }
1189 }
1190
1191 Ok(count)
1192 }
1193
1194 fn has_references_to_storage(&self, storage_id: &str) -> Result<bool> {
1196 let all_files = self.index.list_files()?;
1197
1198 for file in all_files {
1199 if file.is_reference.unwrap_or(false) {
1201 if let Some(base_id) = &file.base_storage_id {
1202 if base_id == storage_id {
1203 return Ok(true);
1204 }
1205 }
1206 }
1207
1208 if file.is_delta.unwrap_or(false) {
1210 if let Some(base_id) = &file.base_storage_id {
1211 if base_id == storage_id {
1212 return Ok(true);
1213 }
1214 }
1215 }
1216 }
1217
1218 Ok(false)
1219 }
1220
1221 fn has_other_references_to_storage(&self, storage_id: &str, exclude_path: &Path) -> Result<bool> {
1223 let all_files = self.index.list_files()?;
1224
1225 for file in all_files {
1226 if file.original_path == exclude_path {
1228 continue;
1229 }
1230
1231 if file.is_reference.unwrap_or(false) {
1233 if let Some(base_id) = &file.base_storage_id {
1234 if base_id == storage_id {
1235 return Ok(true);
1236 }
1237 }
1238 }
1239
1240 if file.is_delta.unwrap_or(false) {
1242 if let Some(base_id) = &file.base_storage_id {
1243 if base_id == storage_id {
1244 return Ok(true);
1245 }
1246 }
1247 }
1248 }
1249
1250 Ok(false)
1251 }
1252}