1use crate::error::{Result, SyncError};
3use crate::utils::format_bytes;
4use serde::{Deserialize, Serialize};
5use sha2::{Digest, Sha256};
6use std::path::{Path, PathBuf};
7use std::time::{Duration, SystemTime};
8
9#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
11pub enum DiffAction {
12 Upload,
14 Download,
16 Delete,
18 Conflict,
20 Move,
22 Update,
24 Unchanged,
26 CreateDir,
28}
29
30impl DiffAction {
31 pub fn as_str(&self) -> &'static str {
32 match self {
33 Self::Upload => "upload",
34 Self::Download => "download",
35 Self::Delete => "delete",
36 Self::Conflict => "conflict",
37 Self::Move => "move",
38 Self::Update => "update",
39 Self::Unchanged => "unchanged",
40 Self::CreateDir => "create_dir",
41 }
42 }
43
44 pub fn emoji(&self) -> &'static str {
45 match self {
46 Self::Upload => "📤",
47 Self::Download => "📥",
48 Self::Delete => "🗑️",
49 Self::Conflict => "⚠️",
50 Self::Move => "📦",
51 Self::Update => "🔄",
52 Self::Unchanged => "✅",
53 Self::CreateDir => "📁",
54 }
55 }
56
57 pub fn is_transfer(&self) -> bool {
58 matches!(self, Self::Upload | Self::Download)
59 }
60
61 pub fn is_destructive(&self) -> bool {
62 matches!(self, Self::Delete)
63 }
64
65 pub fn requires_user_action(&self) -> bool {
66 matches!(self, Self::Conflict)
67 }
68}
69
70#[derive(Debug, Clone, Serialize, Deserialize)]
72pub struct FileDiff {
73 pub path: String,
75 pub action: DiffAction,
77 pub source_info: Option<FileMetadata>,
79 pub target_info: Option<FileMetadata>,
81 pub change_details: ChangeDetails,
83 pub size_diff: i64,
85 pub is_large_file: bool,
87 pub requires_chunking: bool,
89 pub requires_encryption: bool,
91 pub priority: u8,
93 pub estimated_duration_ms: u64,
95 pub last_processed: Option<SystemTime>,
97 pub retry_count: u32,
99 pub error_message: Option<String>,
101 pub tags: Vec<String>,
103 pub checksum_type: ChecksumType,
105 pub source_checksum: Option<String>,
107 pub target_checksum: Option<String>,
109 pub diff_id: String,
111 pub created_at: SystemTime,
113}
114
115impl FileDiff {
116 pub fn new(
117 path: String,
118 action: DiffAction,
119 source_info: Option<FileMetadata>,
120 target_info: Option<FileMetadata>,
121 ) -> Self {
122 let size_diff = Self::calculate_size_diff(&source_info, &target_info);
123 let is_large_file = Self::is_large_file(size_diff);
124
125 Self {
126 path,
127 action,
128 source_info,
129 target_info,
130 change_details: ChangeDetails::default(),
131 size_diff,
132 is_large_file,
133 requires_chunking: is_large_file,
134 requires_encryption: false,
135 priority: Self::calculate_priority(action, size_diff),
136 estimated_duration_ms: Self::estimate_duration(size_diff, is_large_file),
137 last_processed: None,
138 retry_count: 0,
139 error_message: None,
140 tags: Vec::new(),
141 checksum_type: ChecksumType::Sha256,
142 source_checksum: None,
143 target_checksum: None,
144 diff_id: Self::generate_diff_id(),
145 created_at: SystemTime::now(),
146 }
147 }
148
149 pub fn upload(
150 path: String,
151 source_info: FileMetadata,
152 target_info: Option<FileMetadata>,
153 ) -> Self {
154 Self::new(path, DiffAction::Upload, Some(source_info), target_info)
155 }
156
157 pub fn download(
158 path: String,
159 target_info: FileMetadata,
160 source_info: Option<FileMetadata>,
161 ) -> Self {
162 Self::new(path, DiffAction::Download, source_info, Some(target_info))
163 }
164
165 pub fn delete(path: String, target_info: FileMetadata) -> Self {
166 Self::new(path, DiffAction::Delete, None, Some(target_info))
167 }
168
169 pub fn conflict(path: String, source_info: FileMetadata, target_info: FileMetadata) -> Self {
170 let mut diff = Self::new(
171 path,
172 DiffAction::Conflict,
173 Some(source_info),
174 Some(target_info),
175 );
176 diff.priority = 100; diff
178 }
179
180 pub fn update(path: String, source_info: FileMetadata, target_info: FileMetadata) -> Self {
181 Self::new(
182 path,
183 DiffAction::Update,
184 Some(source_info),
185 Some(target_info),
186 )
187 }
188
189 pub fn unchanged(path: String, source_info: FileMetadata, target_info: FileMetadata) -> Self {
190 Self::new(
191 path,
192 DiffAction::Unchanged,
193 Some(source_info),
194 Some(target_info),
195 )
196 }
197
198 pub fn create_dir(path: String, source_info: FileMetadata) -> Self {
199 Self::new(path, DiffAction::CreateDir, Some(source_info), None)
200 }
201
202 pub fn move_file(
203 from: String,
204 to: String,
205 source_info: FileMetadata,
206 target_info: FileMetadata,
207 ) -> Self {
208 let mut diff = Self::new(to, DiffAction::Move, Some(source_info), Some(target_info));
209 diff.change_details.old_path = Some(from);
210 diff
211 }
212
213 fn calculate_size_diff(
214 source_info: &Option<FileMetadata>,
215 target_info: &Option<FileMetadata>,
216 ) -> i64 {
217 match (source_info, target_info) {
218 (Some(src), Some(dst)) => src.size as i64 - dst.size as i64,
219 (Some(src), None) => src.size as i64,
220 (None, Some(dst)) => -(dst.size as i64),
221 (None, None) => 0,
222 }
223 }
224
225 fn is_large_file(size_diff: i64) -> bool {
226 size_diff.abs() > 1024 * 1024 * 100 }
228
229 fn calculate_priority(action: DiffAction, size_diff: i64) -> u8 {
230 match action {
231 DiffAction::Conflict => 100,
232 DiffAction::Delete => 90,
233 DiffAction::Update if size_diff.abs() < 1024 * 1024 => 80, DiffAction::Upload | DiffAction::Download => {
235 if size_diff.abs() < 1024 * 1024 {
237 70 } else if size_diff.abs() < 1024 * 1024 * 10 {
239 60 } else {
241 50 }
243 }
244 DiffAction::Move => 40,
245 DiffAction::CreateDir => 75, DiffAction::Unchanged => 10,
247 _ => 30,
248 }
249 }
250
251 fn estimate_duration(size_diff: i64, is_large_file: bool) -> u64 {
252 let bytes_per_second = 1024 * 1024;
254 let duration_secs = (size_diff.abs() as f64 / bytes_per_second as f64).ceil() as u64;
255
256 if is_large_file {
257 duration_secs * 1000 + 5000
259 } else {
260 duration_secs * 1000
261 }
262 }
263
264 fn generate_diff_id() -> String {
265 use uuid::Uuid;
266 format!("diff_{}", Uuid::new_v4().simple())
267 }
268
269 pub fn calculate_similarity(&self) -> f64 {
270 match (&self.source_info, &self.target_info) {
272 (Some(src), Some(dst)) => {
273 if src.size == dst.size {
274 let time_diff = (src.modified - dst.modified).abs();
276 if time_diff < 2 {
277 0.95 } else {
279 0.5 }
281 } else {
282 0.1 }
284 }
285 _ => 0.0, }
287 }
288
289 pub fn is_similar(&self, threshold: f64) -> bool {
290 self.calculate_similarity() >= threshold
291 }
292
293 pub fn should_retry(&self, max_retries: u32) -> bool {
294 self.retry_count < max_retries
295 }
296
297 pub fn mark_retry(&mut self, error: Option<String>) {
298 self.retry_count += 1;
299 self.error_message = error;
300 self.last_processed = Some(SystemTime::now());
301 }
302
303 pub fn mark_success(&mut self) {
304 self.last_processed = Some(SystemTime::now());
305 self.retry_count = 0;
306 self.error_message = None;
307 }
308
309 pub fn is_expired(&self, timeout: Duration) -> bool {
310 if let Some(last_processed) = self.last_processed {
311 last_processed.elapsed().unwrap_or_default() > timeout
312 } else {
313 false
314 }
315 }
316
317 pub fn total_size(&self) -> u64 {
318 match &self.source_info {
319 Some(info) => info.size,
320 None => 0,
321 }
322 }
323
324 pub fn transfer_size(&self) -> u64 {
325 if self.action.is_transfer() {
326 match &self.source_info {
327 Some(info) => info.size,
328 None => 0,
329 }
330 } else {
331 0
332 }
333 }
334
335 pub fn human_readable_size(&self) -> String {
336 format_bytes(self.total_size())
337 }
338
339 pub fn summary(&self) -> String {
340 let action_emoji = self.action.emoji();
341 let size_str = self.human_readable_size();
342
343 match self.action {
344 DiffAction::Upload => format!("{} 上传: {} ({})", action_emoji, self.path, size_str),
345 DiffAction::Download => format!("{} 下载: {} ({})", action_emoji, self.path, size_str),
346 DiffAction::Delete => format!("{} 删除: {}", action_emoji, self.path),
347 DiffAction::Conflict => format!("{} 冲突: {}", action_emoji, self.path),
348 DiffAction::Move => {
349 if let Some(old_path) = &self.change_details.old_path {
350 format!("{} 移动: {} -> {}", action_emoji, old_path, self.path)
351 } else {
352 format!("{} 移动: {}", action_emoji, self.path)
353 }
354 }
355 DiffAction::Update => format!("{} 更新: {} ({})", action_emoji, self.path, size_str),
356 DiffAction::CreateDir => format!("{} 创建目录: {}", action_emoji, self.path),
357 DiffAction::Unchanged => format!("{} 未变: {}", action_emoji, self.path),
358 }
359 }
360
361 pub fn to_json(&self) -> Result<String> {
362 serde_json::to_string_pretty(self)
363 .map_err(|e| crate::error::SyncError::Serialization(e.into()))
364 }
365
366 pub fn from_json(json: &str) -> Result<Self> {
367 serde_json::from_str(json).map_err(|e| crate::error::SyncError::Serialization(e.into()))
368 }
369
370 pub fn is_encrypted(&self) -> bool {
371 self.source_info
372 .as_ref()
373 .map_or(false, |info| info.is_encrypted)
374 || self
375 .target_info
376 .as_ref()
377 .map_or(false, |info| info.is_encrypted)
378 }
379
380 pub fn requires_decryption(&self) -> bool {
381 self.requires_encryption || self.is_encrypted()
382 }
383}
384
385#[derive(Debug, Clone, Serialize, Deserialize)]
387pub struct FileMetadata {
388 pub path: PathBuf,
389 pub size: u64,
390 pub modified: i64,
391 pub created: i64,
392 pub accessed: i64,
393 pub permissions: u32,
394 pub is_dir: bool,
395 pub is_symlink: bool,
396 pub is_hidden: bool,
397 pub is_encrypted: bool,
398 pub mime_type: Option<String>,
399 pub file_hash: Option<String>,
400 pub chunk_hashes: Vec<String>,
401 pub metadata_hash: String,
402 pub storage_class: Option<String>,
403 pub encryption_key_id: Option<String>,
404 pub version: Option<String>,
405 pub tags: Vec<String>,
406 pub custom_metadata: std::collections::HashMap<String, String>,
407}
408
409impl FileMetadata {
410 pub fn new(path: PathBuf) -> Self {
411 let now = SystemTime::now()
412 .duration_since(SystemTime::UNIX_EPOCH)
413 .unwrap_or_default()
414 .as_secs() as i64;
415
416 Self {
417 path,
418 size: 0,
419 modified: now,
420 created: now,
421 accessed: now,
422 permissions: 0o644,
423 is_dir: false,
424 is_symlink: false,
425 is_hidden: false,
426 is_encrypted: false,
427 mime_type: None,
428 file_hash: None,
429 chunk_hashes: Vec::new(),
430 metadata_hash: String::new(),
431 storage_class: None,
432 encryption_key_id: None,
433 version: None,
434 tags: Vec::new(),
435 custom_metadata: std::collections::HashMap::new(),
436 }
437 }
438
439 pub fn from_path(path: &Path) -> Result<Self> {
440 let metadata = std::fs::metadata(path)?;
441
442 let mut file_metadata = Self::new(path.to_path_buf());
443
444 file_metadata.size = metadata.len();
445 file_metadata.is_dir = metadata.is_dir();
446 file_metadata.is_symlink = metadata.file_type().is_symlink();
447
448 if let Ok(modified) = metadata.modified() {
449 file_metadata.modified = modified
450 .duration_since(SystemTime::UNIX_EPOCH)
451 .unwrap_or_default()
452 .as_secs() as i64;
453 }
454
455 if let Ok(created) = metadata.created() {
456 file_metadata.created = created
457 .duration_since(SystemTime::UNIX_EPOCH)
458 .unwrap_or_default()
459 .as_secs() as i64;
460 }
461
462 if let Ok(accessed) = metadata.accessed() {
463 file_metadata.accessed = accessed
464 .duration_since(SystemTime::UNIX_EPOCH)
465 .unwrap_or_default()
466 .as_secs() as i64;
467 }
468
469 if let Some(file_name) = path.file_name() {
471 if file_name.to_string_lossy().starts_with('.') {
472 file_metadata.is_hidden = true;
473 }
474 }
475
476 if let Some(extension) = path.extension() {
478 file_metadata.mime_type = Some(detect_mime_type(extension));
479 }
480
481 Ok(file_metadata)
482 }
483
484 pub fn calculate_hash(&mut self, algorithm: ChecksumType) -> Result<()> {
485 use sha2::{Digest, Sha256};
486 use std::fs::File;
487 use std::io::Read;
488
489 if self.is_dir {
490 self.file_hash = Some(String::new());
491 return Ok(());
492 }
493
494 let mut file = File::open(&self.path)?;
495 let mut hasher = Sha256::new();
496 let mut buffer = [0; 8192];
497
498 loop {
499 let bytes_read = file.read(&mut buffer)?;
500 if bytes_read == 0 {
501 break;
502 }
503 hasher.update(&buffer[..bytes_read]);
504 }
505
506 let hash = format!("{:x}", hasher.finalize());
507 self.file_hash = Some(hash);
508
509 Ok(())
510 }
511
512 pub fn update_metadata_hash(&mut self) {
513 let mut hasher = Sha256::new();
514 hasher.update(self.path.to_string_lossy().as_bytes());
515 hasher.update(&self.size.to_be_bytes());
516 hasher.update(&self.modified.to_be_bytes());
517 hasher.update(&self.permissions.to_be_bytes());
518
519 if let Some(hash) = &self.file_hash {
520 hasher.update(hash.as_bytes());
521 }
522
523 self.metadata_hash = format!("{:x}", hasher.finalize());
524 }
525}
526
527#[derive(Debug, Clone, Serialize, Deserialize, Default)]
529pub struct ChangeDetails {
530 pub old_path: Option<String>,
532 pub content_change: ContentChangeType,
534 pub metadata_changed: bool,
536 pub permissions_changed: bool,
538 pub timestamps_changed: bool,
540 pub rename_confidence: u8,
542 pub change_percentage: u8,
544 pub changed_ranges: Vec<(u64, u64)>,
546 pub lines_added: Option<usize>,
548 pub lines_removed: Option<usize>,
550 pub binary_changes: Option<BinaryChanges>,
552}
553
554#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
556pub enum ContentChangeType {
557 #[default]
558 Unknown,
559 Added,
561 Removed,
563 Rewritten,
565 Partial,
567 MetadataOnly,
569 Moved,
571 Unchanged,
573}
574
575#[derive(Debug, Clone, Serialize, Deserialize)]
577pub struct BinaryChanges {
578 pub different_bytes: u64,
580 pub same_bytes: u64,
582 pub change_patterns: Vec<ChangePattern>,
584}
585
586#[derive(Debug, Clone, Serialize, Deserialize)]
587pub struct ChangePattern {
588 pub start: u64,
589 pub end: u64,
590 pub pattern_type: PatternType,
591}
592
593#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
594pub enum PatternType {
595 Inserted,
596 Deleted,
597 Modified,
598 Moved,
599}
600
601#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
603pub enum ChecksumType {
604 Md5,
605 Sha1,
606 Sha256,
607 Sha512,
608 Blake3,
609 Crc32,
610 Crc64,
611}
612
613impl ChecksumType {
614 pub fn hash_size(&self) -> usize {
615 match self {
616 Self::Md5 => 16,
617 Self::Sha1 => 20,
618 Self::Sha256 => 32,
619 Self::Sha512 => 64,
620 Self::Blake3 => 32,
621 Self::Crc32 => 4,
622 Self::Crc64 => 8,
623 }
624 }
625
626 pub fn recommended() -> Self {
627 Self::Sha256
628 }
629}
630
631#[derive(Debug, Clone, Serialize, Deserialize)]
633pub struct DiffResult {
634 pub files: Vec<FileDiff>,
636 pub total_files: usize,
638 pub files_to_transfer: usize,
640 pub files_to_delete: usize,
642 pub conflicts: usize,
644 pub total_transfer_size: u64,
646 pub total_delete_size: u64,
648 pub estimated_duration_ms: u64,
650 pub calculation_time_ms: u64,
652 pub source_stats: DiffStats,
654 pub target_stats: DiffStats,
656 pub action_stats: std::collections::HashMap<DiffAction, usize>,
658}
659
660impl DiffResult {
661 pub fn new() -> Self {
662 Self {
663 files: Vec::new(),
664 total_files: 0,
665 files_to_transfer: 0,
666 files_to_delete: 0,
667 conflicts: 0,
668 total_transfer_size: 0,
669 total_delete_size: 0,
670 estimated_duration_ms: 0,
671 calculation_time_ms: 0,
672 source_stats: DiffStats::new(),
673 target_stats: DiffStats::new(),
674 action_stats: std::collections::HashMap::new(),
675 }
676 }
677
678 pub fn add_file(&mut self, diff: FileDiff) {
679 *self.action_stats.entry(diff.action).or_insert(0) += 1;
681
682 match diff.action {
684 DiffAction::Upload | DiffAction::Download | DiffAction::Update => {
685 self.files_to_transfer += 1;
686 self.total_transfer_size += diff.transfer_size();
687 }
688 DiffAction::Delete => {
689 self.files_to_delete += 1;
690 self.total_delete_size += diff.total_size();
691 }
692 DiffAction::Conflict => {
693 self.conflicts += 1;
694 }
695 _ => {}
696 }
697
698 if let Some(source) = &diff.source_info {
700 self.source_stats.add_file(source);
701 }
702 if let Some(target) = &diff.target_info {
703 self.target_stats.add_file(target);
704 }
705
706 self.files.push(diff);
707 self.total_files += 1;
708 }
709
710 pub fn sort_by_priority(&mut self) {
711 self.files.sort_by(|a, b| b.priority.cmp(&a.priority));
712 }
713
714 pub fn filter_by_action(&self, action: DiffAction) -> Vec<&FileDiff> {
715 self.files
716 .iter()
717 .filter(|diff| diff.action == action)
718 .collect()
719 }
720
721 pub fn filter_by_tag(&self, tag: &str) -> Vec<&FileDiff> {
722 self.files
723 .iter()
724 .filter(|diff| diff.tags.contains(&tag.to_string()))
725 .collect()
726 }
727
728 pub fn find_by_path(&self, path: &str) -> Option<&FileDiff> {
729 self.files.iter().find(|diff| diff.path == path)
730 }
731
732 pub fn has_conflicts(&self) -> bool {
733 self.conflicts > 0
734 }
735
736 pub fn is_empty(&self) -> bool {
737 self.files.is_empty()
738 }
739
740 pub fn summary(&self) -> String {
741 format!(
742 "文件总数: {}, 需要传输: {} ({}),需要删除: {},冲突: {}",
743 self.total_files,
744 self.files_to_transfer,
745 format_bytes(self.total_transfer_size),
746 self.files_to_delete,
747 self.conflicts
748 )
749 }
750
751 pub fn to_json(&self) -> Result<String> {
752 serde_json::to_string_pretty(self)
753 .map_err(|e| crate::error::SyncError::Serialization(e.into()))
754 }
755
756 pub fn to_csv(&self) -> Result<String> {
757 let mut wtr = csv::Writer::from_writer(Vec::new());
758
759 for diff in &self.files {
760 wtr.serialize(CsvDiff {
761 path: &diff.path,
762 action: diff.action.as_str(),
763 size: diff.total_size(),
764 priority: diff.priority,
765 estimated_duration_ms: diff.estimated_duration_ms,
766 retry_count: diff.retry_count,
767 requires_encryption: diff.requires_encryption,
768 requires_chunking: diff.requires_chunking,
769 tags: diff.tags.join(","),
770 })
771 .map_err(|e| SyncError::Unsupported("转换异常".into()))?;
772 }
773
774 let data = String::from_utf8(
775 wtr.into_inner()
776 .map_err(|e| SyncError::Unsupported("转换异常".into()))?,
777 )
778 .map_err(|e| SyncError::Validation(e.to_string()))?;
779
780 Ok(data)
781 }
782}
783
784#[derive(Debug, Clone, Serialize, Deserialize)]
786pub struct DiffStats {
787 pub total_files: usize,
788 pub total_dirs: usize,
789 pub total_size: u64,
790 pub largest_file: u64,
791 pub smallest_file: u64,
792 pub average_file_size: f64,
793 pub file_types: std::collections::HashMap<String, usize>,
794 pub oldest_file: Option<String>,
795 pub newest_file: Option<String>,
796}
797
798impl DiffStats {
799 pub fn new() -> Self {
800 Self {
801 total_files: 0,
802 total_dirs: 0,
803 total_size: 0,
804 largest_file: 0,
805 smallest_file: u64::MAX,
806 average_file_size: 0.0,
807 file_types: std::collections::HashMap::new(),
808 oldest_file: None,
809 newest_file: None,
810 }
811 }
812
813 pub fn add_file(&mut self, metadata: &FileMetadata) {
814 if metadata.is_dir {
815 self.total_dirs += 1;
816 } else {
817 self.total_files += 1;
818 self.total_size += metadata.size;
819
820 if metadata.size > self.largest_file {
822 self.largest_file = metadata.size;
823 }
824 if metadata.size < self.smallest_file {
825 self.smallest_file = metadata.size;
826 }
827
828 if let Some(mime_type) = &metadata.mime_type {
830 *self.file_types.entry(mime_type.clone()).or_insert(0) += 1;
831 }
832 }
833 }
834
835 pub fn finalize(&mut self) {
836 if self.total_files > 0 {
837 self.average_file_size = self.total_size as f64 / self.total_files as f64;
838 } else {
839 self.smallest_file = 0;
840 }
841 }
842
843 pub fn human_readable(&self) -> String {
844 format!(
845 "文件: {}, 目录: {}, 大小: {}",
846 self.total_files,
847 self.total_dirs,
848 format_bytes(self.total_size)
849 )
850 }
851}
852
853#[derive(Debug, Serialize)]
855struct CsvDiff<'a> {
856 path: &'a str,
857 action: &'static str,
858 size: u64,
859 priority: u8,
860 estimated_duration_ms: u64,
861 retry_count: u32,
862 requires_encryption: bool,
863 requires_chunking: bool,
864 tags: String,
865}
866
867pub struct DiffDetector {
869 options: DiffOptions,
870 cache: std::collections::HashMap<String, FileMetadata>,
871}
872
873impl DiffDetector {
874 pub fn new(options: DiffOptions) -> Self {
875 Self {
876 options,
877 cache: std::collections::HashMap::new(),
878 }
879 }
880
881 pub async fn detect_changes(
882 &mut self,
883 source_files: &[FileMetadata],
884 target_files: &[FileMetadata],
885 ) -> Result<DiffResult> {
886 let start_time = std::time::Instant::now();
887 let mut result = DiffResult::new();
888
889 let mut target_map = std::collections::HashMap::new();
891 for file in target_files {
892 target_map.insert(file.path.to_string_lossy().to_string(), file.clone());
893 }
894
895 for source_file in source_files {
897 let path = source_file.path.to_string_lossy().to_string();
898
899 if let Some(target_file) = target_map.remove(&path) {
900 if self.is_file_changed(&source_file, &target_file) {
902 let diff = self.create_file_diff(&source_file, Some(&target_file));
903 result.add_file(diff);
904 } else {
905 let diff = FileDiff::unchanged(path, source_file.clone(), target_file);
906 result.add_file(diff);
907 }
908 } else {
909 let diff = FileDiff::upload(path, source_file.clone(), None);
911 result.add_file(diff);
912 }
913 }
914
915 for (path, target_file) in target_map {
917 let diff = FileDiff::delete(path, target_file);
918 result.add_file(diff);
919 }
920
921 self.detect_moves(&mut result);
923
924 self.detect_conflicts(&mut result);
926
927 self.update_cache(source_files);
929
930 result.source_stats.finalize();
932 result.target_stats.finalize();
933 result.calculation_time_ms = start_time.elapsed().as_millis() as u64;
934 result.estimated_duration_ms = result
935 .files
936 .iter()
937 .filter(|diff| diff.action.is_transfer())
938 .map(|diff| diff.estimated_duration_ms)
939 .sum();
940
941 result.sort_by_priority();
942 Ok(result)
943 }
944
945 fn is_file_changed(&self, source: &FileMetadata, target: &FileMetadata) -> bool {
946 if self.options.compare_size && source.size != target.size {
947 return true;
948 }
949
950 if self.options.compare_mtime && source.modified != target.modified {
951 return true;
952 }
953
954 if self.options.compare_checksum {
955 match (&source.file_hash, &target.file_hash) {
956 (Some(src_hash), Some(dst_hash)) if src_hash != dst_hash => return true,
957 _ => {}
958 }
959 }
960
961 if source.permissions != target.permissions {
962 return true;
963 }
964
965 false
966 }
967
968 fn create_file_diff(&self, source: &FileMetadata, target: Option<&FileMetadata>) -> FileDiff {
969 let path = source.path.to_string_lossy().to_string();
970
971 match target {
972 Some(target) => {
973 let mut diff = FileDiff::update(path, source.clone(), target.clone());
974
975 self.analyze_changes(&mut diff);
977 diff
978 }
979 None => FileDiff::upload(path, source.clone(), None),
980 }
981 }
982
983 fn analyze_changes(&self, diff: &mut FileDiff) {
984 if let (Some(source), Some(target)) = (&diff.source_info, &diff.target_info) {
985 let mut details = ChangeDetails::default();
986
987 if source.size != target.size {
989 details.content_change = ContentChangeType::Partial;
990 details.change_percentage = if source.size > 0 {
991 ((source.size.abs_diff(target.size) * 100) / source.size) as u8
992 } else {
993 100
994 };
995 }
996
997 if source.modified != target.modified {
999 details.timestamps_changed = true;
1000 }
1001
1002 if source.permissions != target.permissions {
1004 details.permissions_changed = true;
1005 }
1006
1007 diff.change_details = details;
1008 }
1009 }
1010
1011 fn detect_moves(&self, result: &mut DiffResult) {
1012 let mut potential_moves = Vec::new();
1015
1016 for (i, diff_i) in result.files.iter().enumerate() {
1017 if diff_i.action == DiffAction::Delete {
1018 for (j, diff_j) in result.files.iter().enumerate() {
1019 if diff_j.action == DiffAction::Upload {
1020 if let (Some(src), Some(dst)) = (&diff_i.target_info, &diff_j.source_info) {
1021 let similarity = self.calculate_file_similarity(src, dst);
1022 if similarity > 0.8 {
1023 potential_moves.push((i, j, similarity));
1024 }
1025 }
1026 }
1027 }
1028 }
1029 }
1030
1031 for (delete_idx, upload_idx, similarity) in potential_moves {
1033 let delete_path = result.files[delete_idx].path.clone();
1035 let upload_path = result.files[upload_idx].path.clone();
1036
1037 if let (Some(source), Some(target)) = (
1038 result.files[upload_idx].source_info.clone(),
1039 result.files[delete_idx].target_info.clone(),
1040 ) {
1041 let move_diff = FileDiff::move_file(delete_path, upload_path, source, target);
1042
1043 result.files[delete_idx] = move_diff.clone();
1045 result.files[upload_idx] = move_diff;
1046 }
1047 }
1048 }
1049
1050 fn detect_conflicts(&self, result: &mut DiffResult) {
1051 let mut path_map: std::collections::HashMap<String, Vec<usize>> =
1052 std::collections::HashMap::new();
1053 for (idx, diff) in result.files.iter().enumerate() {
1054 path_map.entry(diff.path.clone()).or_default().push(idx);
1055 }
1056 for indices in path_map.values() {
1057 if indices.len() > 1 {
1058 let has_upload = indices
1059 .iter()
1060 .any(|&i| result.files[i].action == DiffAction::Upload);
1061 let has_delete = indices
1062 .iter()
1063 .any(|&i| result.files[i].action == DiffAction::Delete);
1064 let has_update = indices
1065 .iter()
1066 .any(|&i| result.files[i].action == DiffAction::Update);
1067 if (has_upload && has_delete) || (has_upload && has_update) {
1068 for &i in indices {
1069 if let (Some(source), Some(target)) =
1070 (&result.files[i].source_info, &result.files[i].target_info)
1071 {
1072 result.files[i] = FileDiff::conflict(
1073 result.files[i].path.clone(),
1074 source.clone(),
1075 target.clone(),
1076 );
1077 }
1078 }
1079 }
1080 }
1081 }
1082 }
1083
1084 fn calculate_file_similarity(&self, file1: &FileMetadata, file2: &FileMetadata) -> f64 {
1085 let mut similarity = 0.0;
1086
1087 if file1.size == file2.size {
1089 similarity += 0.4;
1090 } else if file1.size > 0 && file2.size > 0 {
1091 let min_size = file1.size.min(file2.size) as f64;
1092 let max_size = file1.size.max(file2.size) as f64;
1093 similarity += 0.4 * (min_size / max_size);
1094 }
1095
1096 let time_diff = (file1.modified - file2.modified).abs();
1098 if time_diff < 60 {
1099 similarity += 0.3; } else if time_diff < 3600 {
1101 similarity += 0.2; } else if time_diff < 86400 {
1103 similarity += 0.1; }
1105
1106 if let (Some(mime1), Some(mime2)) = (&file1.mime_type, &file2.mime_type) {
1108 if mime1 == mime2 {
1109 similarity += 0.3;
1110 } else if mime1.split('/').next() == mime2.split('/').next() {
1111 similarity += 0.15; }
1113 }
1114
1115 similarity
1116 }
1117
1118 fn update_cache(&mut self, files: &[FileMetadata]) {
1119 for file in files {
1120 self.cache
1121 .insert(file.path.to_string_lossy().to_string(), file.clone());
1122 }
1123 }
1124}
1125
1126#[derive(Debug, Clone)]
1128pub struct DiffOptions {
1129 pub compare_size: bool,
1131 pub compare_mtime: bool,
1133 pub compare_checksum: bool,
1135 pub ignore_patterns: Vec<String>,
1137 pub max_depth: Option<usize>,
1139 pub follow_symlinks: bool,
1141 pub detect_moves: bool,
1143 pub similarity_threshold: f64,
1145 pub detect_conflicts: bool,
1147 pub include_hidden: bool,
1149 pub large_file_threshold: u64,
1151}
1152
1153impl Default for DiffOptions {
1154 fn default() -> Self {
1155 Self {
1156 compare_size: true,
1157 compare_mtime: true,
1158 compare_checksum: false, ignore_patterns: vec![
1160 ".*".to_string(),
1161 "*/.*".to_string(),
1162 "*.tmp".to_string(),
1163 "*.temp".to_string(),
1164 ],
1165 max_depth: None,
1166 follow_symlinks: false,
1167 detect_moves: true,
1168 similarity_threshold: 0.7,
1169 detect_conflicts: true,
1170 include_hidden: false,
1171 large_file_threshold: 1024 * 1024 * 100, }
1173 }
1174}
1175
1176fn detect_mime_type(extension: &std::ffi::OsStr) -> String {
1177 let ext = extension.to_string_lossy().to_lowercase();
1178
1179 match ext.as_str() {
1180 "txt" => "text/plain",
1181 "json" => "application/json",
1182 "xml" => "application/xml",
1183 "html" | "htm" => "text/html",
1184 "css" => "text/css",
1185 "js" => "application/javascript",
1186 "jpg" | "jpeg" => "image/jpeg",
1187 "png" => "image/png",
1188 "gif" => "image/gif",
1189 "pdf" => "application/pdf",
1190 "zip" => "application/zip",
1191 "tar" => "application/x-tar",
1192 "gz" => "application/gzip",
1193 "mp3" => "audio/mpeg",
1194 "mp4" => "video/mp4",
1195 "avi" => "video/x-msvideo",
1196 "doc" => "application/msword",
1197 "docx" => "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
1198 "xls" => "application/vnd.ms-excel",
1199 "xlsx" => "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
1200 "ppt" => "application/vnd.ms-powerpoint",
1201 "pptx" => "application/vnd.openxmlformats-officedocument.presentationml.presentation",
1202 "md" => "text/markdown",
1203 "yml" | "yaml" => "text/yaml",
1204 "toml" => "application/toml",
1205 "rs" => "text/x-rust",
1206 "go" => "text/x-go",
1207 "py" => "text/x-python",
1208 "java" => "text/x-java",
1209 "c" => "text/x-c",
1210 "cpp" | "cc" => "text/x-c++",
1211 "h" | "hpp" => "text/x-c++",
1212 _ => "application/octet-stream",
1213 }
1214 .to_string()
1215}
1216
1217#[cfg(test)]
1218mod tests {
1219 use super::*;
1220 #[test]
1221 fn test_diff_result_add_file_and_summary() {
1222 let mut result = DiffResult::new();
1223 let file = FileDiff::new(
1224 "a.txt".to_string(),
1225 DiffAction::Upload,
1226 Some(FileMetadata::new(PathBuf::from("a.txt"))),
1227 None,
1228 );
1229 result.add_file(file);
1230 assert_eq!(result.total_files, 1);
1231 let s = result.summary();
1232 assert!(s.contains("文件总数"));
1233 }
1234}