cloud_disk_sync/sync/
diff.rs

1// src/sync/diff.rs
2use crate::error::{Result, SyncError};
3use crate::utils::format_bytes;
4use serde::{Deserialize, Serialize};
5use sha2::{Digest, Sha256};
6use std::path::{Path, PathBuf};
7use std::time::{Duration, SystemTime};
8
9/// 文件差异操作类型
10#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
11pub enum DiffAction {
12    /// 需要上传到目标
13    Upload,
14    /// 需要从目标下载
15    Download,
16    /// 需要在目标删除
17    Delete,
18    /// 冲突需要解决
19    Conflict,
20    /// 文件移动或重命名
21    Move,
22    /// 文件更新(内容或元数据变化)
23    Update,
24    /// 文件未变化
25    Unchanged,
26    /// 创建目录
27    CreateDir,
28}
29
30impl DiffAction {
31    pub fn as_str(&self) -> &'static str {
32        match self {
33            Self::Upload => "upload",
34            Self::Download => "download",
35            Self::Delete => "delete",
36            Self::Conflict => "conflict",
37            Self::Move => "move",
38            Self::Update => "update",
39            Self::Unchanged => "unchanged",
40            Self::CreateDir => "create_dir",
41        }
42    }
43
44    pub fn emoji(&self) -> &'static str {
45        match self {
46            Self::Upload => "📤",
47            Self::Download => "📥",
48            Self::Delete => "🗑️",
49            Self::Conflict => "⚠️",
50            Self::Move => "📦",
51            Self::Update => "🔄",
52            Self::Unchanged => "✅",
53            Self::CreateDir => "📁",
54        }
55    }
56
57    pub fn is_transfer(&self) -> bool {
58        matches!(self, Self::Upload | Self::Download)
59    }
60
61    pub fn is_destructive(&self) -> bool {
62        matches!(self, Self::Delete)
63    }
64
65    pub fn requires_user_action(&self) -> bool {
66        matches!(self, Self::Conflict)
67    }
68}
69
70/// 文件差异详情
71#[derive(Debug, Clone, Serialize, Deserialize)]
72pub struct FileDiff {
73    /// 相对路径(相对于同步根目录)
74    pub path: String,
75    /// 差异操作类型
76    pub action: DiffAction,
77    /// 源文件信息(如果存在)
78    pub source_info: Option<FileMetadata>,
79    /// 目标文件信息(如果存在)
80    pub target_info: Option<FileMetadata>,
81    /// 变化详情
82    pub change_details: ChangeDetails,
83    /// 文件大小差异(字节)
84    pub size_diff: i64,
85    /// 是否为大文件(超过阈值)
86    pub is_large_file: bool,
87    /// 是否需要分块传输
88    pub requires_chunking: bool,
89    /// 是否需要加密
90    pub requires_encryption: bool,
91    /// 优先级(0-100,越高越先处理)
92    pub priority: u8,
93    /// 预计传输时间(毫秒)
94    pub estimated_duration_ms: u64,
95    /// 上次处理时间
96    pub last_processed: Option<SystemTime>,
97    /// 重试次数
98    pub retry_count: u32,
99    /// 错误信息(如果之前处理失败)
100    pub error_message: Option<String>,
101    /// 自定义标签
102    pub tags: Vec<String>,
103    /// 校验和类型
104    pub checksum_type: ChecksumType,
105    /// 源文件校验和
106    pub source_checksum: Option<String>,
107    /// 目标文件校验和
108    pub target_checksum: Option<String>,
109    /// 差异ID(用于去重和跟踪)
110    pub diff_id: String,
111    /// 创建时间
112    pub created_at: SystemTime,
113}
114
115impl FileDiff {
116    pub fn new(
117        path: String,
118        action: DiffAction,
119        source_info: Option<FileMetadata>,
120        target_info: Option<FileMetadata>,
121    ) -> Self {
122        let size_diff = Self::calculate_size_diff(&source_info, &target_info);
123        let is_large_file = Self::is_large_file(size_diff);
124
125        Self {
126            path,
127            action,
128            source_info,
129            target_info,
130            change_details: ChangeDetails::default(),
131            size_diff,
132            is_large_file,
133            requires_chunking: is_large_file,
134            requires_encryption: false,
135            priority: Self::calculate_priority(action, size_diff),
136            estimated_duration_ms: Self::estimate_duration(size_diff, is_large_file),
137            last_processed: None,
138            retry_count: 0,
139            error_message: None,
140            tags: Vec::new(),
141            checksum_type: ChecksumType::Sha256,
142            source_checksum: None,
143            target_checksum: None,
144            diff_id: Self::generate_diff_id(),
145            created_at: SystemTime::now(),
146        }
147    }
148
149    pub fn upload(
150        path: String,
151        source_info: FileMetadata,
152        target_info: Option<FileMetadata>,
153    ) -> Self {
154        Self::new(path, DiffAction::Upload, Some(source_info), target_info)
155    }
156
157    pub fn download(
158        path: String,
159        target_info: FileMetadata,
160        source_info: Option<FileMetadata>,
161    ) -> Self {
162        Self::new(path, DiffAction::Download, source_info, Some(target_info))
163    }
164
165    pub fn delete(path: String, target_info: FileMetadata) -> Self {
166        Self::new(path, DiffAction::Delete, None, Some(target_info))
167    }
168
169    pub fn conflict(path: String, source_info: FileMetadata, target_info: FileMetadata) -> Self {
170        let mut diff = Self::new(
171            path,
172            DiffAction::Conflict,
173            Some(source_info),
174            Some(target_info),
175        );
176        diff.priority = 100; // 冲突文件最高优先级
177        diff
178    }
179
180    pub fn update(path: String, source_info: FileMetadata, target_info: FileMetadata) -> Self {
181        Self::new(
182            path,
183            DiffAction::Update,
184            Some(source_info),
185            Some(target_info),
186        )
187    }
188
189    pub fn unchanged(path: String, source_info: FileMetadata, target_info: FileMetadata) -> Self {
190        Self::new(
191            path,
192            DiffAction::Unchanged,
193            Some(source_info),
194            Some(target_info),
195        )
196    }
197
198    pub fn create_dir(path: String, source_info: FileMetadata) -> Self {
199        Self::new(path, DiffAction::CreateDir, Some(source_info), None)
200    }
201
202    pub fn move_file(
203        from: String,
204        to: String,
205        source_info: FileMetadata,
206        target_info: FileMetadata,
207    ) -> Self {
208        let mut diff = Self::new(to, DiffAction::Move, Some(source_info), Some(target_info));
209        diff.change_details.old_path = Some(from);
210        diff
211    }
212
213    fn calculate_size_diff(
214        source_info: &Option<FileMetadata>,
215        target_info: &Option<FileMetadata>,
216    ) -> i64 {
217        match (source_info, target_info) {
218            (Some(src), Some(dst)) => src.size as i64 - dst.size as i64,
219            (Some(src), None) => src.size as i64,
220            (None, Some(dst)) => -(dst.size as i64),
221            (None, None) => 0,
222        }
223    }
224
225    fn is_large_file(size_diff: i64) -> bool {
226        size_diff.abs() > 1024 * 1024 * 100 // 100MB 以上为大文件
227    }
228
229    fn calculate_priority(action: DiffAction, size_diff: i64) -> u8 {
230        match action {
231            DiffAction::Conflict => 100,
232            DiffAction::Delete => 90,
233            DiffAction::Update if size_diff.abs() < 1024 * 1024 => 80, // 小文件更新
234            DiffAction::Upload | DiffAction::Download => {
235                // 小文件优先,大文件靠后
236                if size_diff.abs() < 1024 * 1024 {
237                    70 // 小文件
238                } else if size_diff.abs() < 1024 * 1024 * 10 {
239                    60 // 中等文件
240                } else {
241                    50 // 大文件
242                }
243            }
244            DiffAction::Move => 40,
245            DiffAction::CreateDir => 75, // 在上传文件之前创建目录
246            DiffAction::Unchanged => 10,
247            _ => 30,
248        }
249    }
250
251    fn estimate_duration(size_diff: i64, is_large_file: bool) -> u64 {
252        // 假设平均速度 1MB/s
253        let bytes_per_second = 1024 * 1024;
254        let duration_secs = (size_diff.abs() as f64 / bytes_per_second as f64).ceil() as u64;
255
256        if is_large_file {
257            // 大文件增加额外处理时间
258            duration_secs * 1000 + 5000
259        } else {
260            duration_secs * 1000
261        }
262    }
263
264    fn generate_diff_id() -> String {
265        use uuid::Uuid;
266        format!("diff_{}", Uuid::new_v4().simple())
267    }
268
269    pub fn calculate_similarity(&self) -> f64 {
270        // 计算源文件和目标文件的相似度(0.0-1.0)
271        match (&self.source_info, &self.target_info) {
272            (Some(src), Some(dst)) => {
273                if src.size == dst.size {
274                    // 大小相同,检查修改时间等其他因素
275                    let time_diff = (src.modified - dst.modified).abs();
276                    if time_diff < 2 {
277                        0.95 // 时间差小于2秒,高度相似
278                    } else {
279                        0.5 // 时间差较大,中等相似
280                    }
281                } else {
282                    0.1 // 大小不同,低相似度
283                }
284            }
285            _ => 0.0, // 只有一端存在文件,不相似
286        }
287    }
288
289    pub fn is_similar(&self, threshold: f64) -> bool {
290        self.calculate_similarity() >= threshold
291    }
292
293    pub fn should_retry(&self, max_retries: u32) -> bool {
294        self.retry_count < max_retries
295    }
296
297    pub fn mark_retry(&mut self, error: Option<String>) {
298        self.retry_count += 1;
299        self.error_message = error;
300        self.last_processed = Some(SystemTime::now());
301    }
302
303    pub fn mark_success(&mut self) {
304        self.last_processed = Some(SystemTime::now());
305        self.retry_count = 0;
306        self.error_message = None;
307    }
308
309    pub fn is_expired(&self, timeout: Duration) -> bool {
310        if let Some(last_processed) = self.last_processed {
311            last_processed.elapsed().unwrap_or_default() > timeout
312        } else {
313            false
314        }
315    }
316
317    pub fn total_size(&self) -> u64 {
318        match &self.source_info {
319            Some(info) => info.size,
320            None => 0,
321        }
322    }
323
324    pub fn transfer_size(&self) -> u64 {
325        if self.action.is_transfer() {
326            match &self.source_info {
327                Some(info) => info.size,
328                None => 0,
329            }
330        } else {
331            0
332        }
333    }
334
335    pub fn human_readable_size(&self) -> String {
336        format_bytes(self.total_size())
337    }
338
339    pub fn summary(&self) -> String {
340        let action_emoji = self.action.emoji();
341        let size_str = self.human_readable_size();
342
343        match self.action {
344            DiffAction::Upload => format!("{} 上传: {} ({})", action_emoji, self.path, size_str),
345            DiffAction::Download => format!("{} 下载: {} ({})", action_emoji, self.path, size_str),
346            DiffAction::Delete => format!("{} 删除: {}", action_emoji, self.path),
347            DiffAction::Conflict => format!("{} 冲突: {}", action_emoji, self.path),
348            DiffAction::Move => {
349                if let Some(old_path) = &self.change_details.old_path {
350                    format!("{} 移动: {} -> {}", action_emoji, old_path, self.path)
351                } else {
352                    format!("{} 移动: {}", action_emoji, self.path)
353                }
354            }
355            DiffAction::Update => format!("{} 更新: {} ({})", action_emoji, self.path, size_str),
356            DiffAction::CreateDir => format!("{} 创建目录: {}", action_emoji, self.path),
357            DiffAction::Unchanged => format!("{} 未变: {}", action_emoji, self.path),
358        }
359    }
360
361    pub fn to_json(&self) -> Result<String> {
362        serde_json::to_string_pretty(self)
363            .map_err(|e| crate::error::SyncError::Serialization(e.into()))
364    }
365
366    pub fn from_json(json: &str) -> Result<Self> {
367        serde_json::from_str(json).map_err(|e| crate::error::SyncError::Serialization(e.into()))
368    }
369
370    pub fn is_encrypted(&self) -> bool {
371        self.source_info
372            .as_ref()
373            .map_or(false, |info| info.is_encrypted)
374            || self
375                .target_info
376                .as_ref()
377                .map_or(false, |info| info.is_encrypted)
378    }
379
380    pub fn requires_decryption(&self) -> bool {
381        self.requires_encryption || self.is_encrypted()
382    }
383}
384
385/// 文件元数据
386#[derive(Debug, Clone, Serialize, Deserialize)]
387pub struct FileMetadata {
388    pub path: PathBuf,
389    pub size: u64,
390    pub modified: i64,
391    pub created: i64,
392    pub accessed: i64,
393    pub permissions: u32,
394    pub is_dir: bool,
395    pub is_symlink: bool,
396    pub is_hidden: bool,
397    pub is_encrypted: bool,
398    pub mime_type: Option<String>,
399    pub file_hash: Option<String>,
400    pub chunk_hashes: Vec<String>,
401    pub metadata_hash: String,
402    pub storage_class: Option<String>,
403    pub encryption_key_id: Option<String>,
404    pub version: Option<String>,
405    pub tags: Vec<String>,
406    pub custom_metadata: std::collections::HashMap<String, String>,
407}
408
409impl FileMetadata {
410    pub fn new(path: PathBuf) -> Self {
411        let now = SystemTime::now()
412            .duration_since(SystemTime::UNIX_EPOCH)
413            .unwrap_or_default()
414            .as_secs() as i64;
415
416        Self {
417            path,
418            size: 0,
419            modified: now,
420            created: now,
421            accessed: now,
422            permissions: 0o644,
423            is_dir: false,
424            is_symlink: false,
425            is_hidden: false,
426            is_encrypted: false,
427            mime_type: None,
428            file_hash: None,
429            chunk_hashes: Vec::new(),
430            metadata_hash: String::new(),
431            storage_class: None,
432            encryption_key_id: None,
433            version: None,
434            tags: Vec::new(),
435            custom_metadata: std::collections::HashMap::new(),
436        }
437    }
438
439    pub fn from_path(path: &Path) -> Result<Self> {
440        let metadata = std::fs::metadata(path)?;
441
442        let mut file_metadata = Self::new(path.to_path_buf());
443
444        file_metadata.size = metadata.len();
445        file_metadata.is_dir = metadata.is_dir();
446        file_metadata.is_symlink = metadata.file_type().is_symlink();
447
448        if let Ok(modified) = metadata.modified() {
449            file_metadata.modified = modified
450                .duration_since(SystemTime::UNIX_EPOCH)
451                .unwrap_or_default()
452                .as_secs() as i64;
453        }
454
455        if let Ok(created) = metadata.created() {
456            file_metadata.created = created
457                .duration_since(SystemTime::UNIX_EPOCH)
458                .unwrap_or_default()
459                .as_secs() as i64;
460        }
461
462        if let Ok(accessed) = metadata.accessed() {
463            file_metadata.accessed = accessed
464                .duration_since(SystemTime::UNIX_EPOCH)
465                .unwrap_or_default()
466                .as_secs() as i64;
467        }
468
469        // 检测隐藏文件(Unix 系统以 . 开头)
470        if let Some(file_name) = path.file_name() {
471            if file_name.to_string_lossy().starts_with('.') {
472                file_metadata.is_hidden = true;
473            }
474        }
475
476        // 检测 MIME 类型
477        if let Some(extension) = path.extension() {
478            file_metadata.mime_type = Some(detect_mime_type(extension));
479        }
480
481        Ok(file_metadata)
482    }
483
484    pub fn calculate_hash(&mut self, algorithm: ChecksumType) -> Result<()> {
485        use sha2::{Digest, Sha256};
486        use std::fs::File;
487        use std::io::Read;
488
489        if self.is_dir {
490            self.file_hash = Some(String::new());
491            return Ok(());
492        }
493
494        let mut file = File::open(&self.path)?;
495        let mut hasher = Sha256::new();
496        let mut buffer = [0; 8192];
497
498        loop {
499            let bytes_read = file.read(&mut buffer)?;
500            if bytes_read == 0 {
501                break;
502            }
503            hasher.update(&buffer[..bytes_read]);
504        }
505
506        let hash = format!("{:x}", hasher.finalize());
507        self.file_hash = Some(hash);
508
509        Ok(())
510    }
511
512    pub fn update_metadata_hash(&mut self) {
513        let mut hasher = Sha256::new();
514        hasher.update(self.path.to_string_lossy().as_bytes());
515        hasher.update(&self.size.to_be_bytes());
516        hasher.update(&self.modified.to_be_bytes());
517        hasher.update(&self.permissions.to_be_bytes());
518
519        if let Some(hash) = &self.file_hash {
520            hasher.update(hash.as_bytes());
521        }
522
523        self.metadata_hash = format!("{:x}", hasher.finalize());
524    }
525}
526
527/// 变化详情
528#[derive(Debug, Clone, Serialize, Deserialize, Default)]
529pub struct ChangeDetails {
530    /// 旧路径(用于重命名/移动)
531    pub old_path: Option<String>,
532    /// 内容变化类型
533    pub content_change: ContentChangeType,
534    /// 元数据变化
535    pub metadata_changed: bool,
536    /// 权限变化
537    pub permissions_changed: bool,
538    /// 时间戳变化
539    pub timestamps_changed: bool,
540    /// 重命名检测置信度(0-100)
541    pub rename_confidence: u8,
542    /// 变化百分比(0-100)
543    pub change_percentage: u8,
544    /// 变化的字节范围
545    pub changed_ranges: Vec<(u64, u64)>,
546    /// 新增行数(文本文件)
547    pub lines_added: Option<usize>,
548    /// 删除行数(文本文件)
549    pub lines_removed: Option<usize>,
550    /// 二进制变化检测
551    pub binary_changes: Option<BinaryChanges>,
552}
553
554/// 内容变化类型
555#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
556pub enum ContentChangeType {
557    #[default]
558    Unknown,
559    /// 新增文件
560    Added,
561    /// 删除文件
562    Removed,
563    /// 完全重写
564    Rewritten,
565    /// 部分修改
566    Partial,
567    /// 仅元数据变化
568    MetadataOnly,
569    /// 移动/重命名
570    Moved,
571    /// 内容未变
572    Unchanged,
573}
574
575/// 二进制文件变化详情
576#[derive(Debug, Clone, Serialize, Deserialize)]
577pub struct BinaryChanges {
578    /// 不同字节数
579    pub different_bytes: u64,
580    /// 相同字节数
581    pub same_bytes: u64,
582    /// 变化模式(连续变化区域等)
583    pub change_patterns: Vec<ChangePattern>,
584}
585
586#[derive(Debug, Clone, Serialize, Deserialize)]
587pub struct ChangePattern {
588    pub start: u64,
589    pub end: u64,
590    pub pattern_type: PatternType,
591}
592
593#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
594pub enum PatternType {
595    Inserted,
596    Deleted,
597    Modified,
598    Moved,
599}
600
601/// 校验和类型
602#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
603pub enum ChecksumType {
604    Md5,
605    Sha1,
606    Sha256,
607    Sha512,
608    Blake3,
609    Crc32,
610    Crc64,
611}
612
613impl ChecksumType {
614    pub fn hash_size(&self) -> usize {
615        match self {
616            Self::Md5 => 16,
617            Self::Sha1 => 20,
618            Self::Sha256 => 32,
619            Self::Sha512 => 64,
620            Self::Blake3 => 32,
621            Self::Crc32 => 4,
622            Self::Crc64 => 8,
623        }
624    }
625
626    pub fn recommended() -> Self {
627        Self::Sha256
628    }
629}
630
631/// 差异结果集合
632#[derive(Debug, Clone, Serialize, Deserialize)]
633pub struct DiffResult {
634    /// 所有文件差异
635    pub files: Vec<FileDiff>,
636    /// 总文件数
637    pub total_files: usize,
638    /// 需要传输的文件数
639    pub files_to_transfer: usize,
640    /// 需要删除的文件数
641    pub files_to_delete: usize,
642    /// 冲突文件数
643    pub conflicts: usize,
644    /// 总传输大小(字节)
645    pub total_transfer_size: u64,
646    /// 总删除大小(字节)
647    pub total_delete_size: u64,
648    /// 预计传输时间(毫秒)
649    pub estimated_duration_ms: u64,
650    /// 差异计算时间
651    pub calculation_time_ms: u64,
652    /// 来源统计
653    pub source_stats: DiffStats,
654    /// 目标统计
655    pub target_stats: DiffStats,
656    /// 操作统计
657    pub action_stats: std::collections::HashMap<DiffAction, usize>,
658}
659
660impl DiffResult {
661    pub fn new() -> Self {
662        Self {
663            files: Vec::new(),
664            total_files: 0,
665            files_to_transfer: 0,
666            files_to_delete: 0,
667            conflicts: 0,
668            total_transfer_size: 0,
669            total_delete_size: 0,
670            estimated_duration_ms: 0,
671            calculation_time_ms: 0,
672            source_stats: DiffStats::new(),
673            target_stats: DiffStats::new(),
674            action_stats: std::collections::HashMap::new(),
675        }
676    }
677
678    pub fn add_file(&mut self, diff: FileDiff) {
679        // 更新操作统计
680        *self.action_stats.entry(diff.action).or_insert(0) += 1;
681
682        // 更新大小统计
683        match diff.action {
684            DiffAction::Upload | DiffAction::Download | DiffAction::Update => {
685                self.files_to_transfer += 1;
686                self.total_transfer_size += diff.transfer_size();
687            }
688            DiffAction::Delete => {
689                self.files_to_delete += 1;
690                self.total_delete_size += diff.total_size();
691            }
692            DiffAction::Conflict => {
693                self.conflicts += 1;
694            }
695            _ => {}
696        }
697
698        // 更新源和目标统计
699        if let Some(source) = &diff.source_info {
700            self.source_stats.add_file(source);
701        }
702        if let Some(target) = &diff.target_info {
703            self.target_stats.add_file(target);
704        }
705
706        self.files.push(diff);
707        self.total_files += 1;
708    }
709
710    pub fn sort_by_priority(&mut self) {
711        self.files.sort_by(|a, b| b.priority.cmp(&a.priority));
712    }
713
714    pub fn filter_by_action(&self, action: DiffAction) -> Vec<&FileDiff> {
715        self.files
716            .iter()
717            .filter(|diff| diff.action == action)
718            .collect()
719    }
720
721    pub fn filter_by_tag(&self, tag: &str) -> Vec<&FileDiff> {
722        self.files
723            .iter()
724            .filter(|diff| diff.tags.contains(&tag.to_string()))
725            .collect()
726    }
727
728    pub fn find_by_path(&self, path: &str) -> Option<&FileDiff> {
729        self.files.iter().find(|diff| diff.path == path)
730    }
731
732    pub fn has_conflicts(&self) -> bool {
733        self.conflicts > 0
734    }
735
736    pub fn is_empty(&self) -> bool {
737        self.files.is_empty()
738    }
739
740    pub fn summary(&self) -> String {
741        format!(
742            "文件总数: {}, 需要传输: {} ({}),需要删除: {},冲突: {}",
743            self.total_files,
744            self.files_to_transfer,
745            format_bytes(self.total_transfer_size),
746            self.files_to_delete,
747            self.conflicts
748        )
749    }
750
751    pub fn to_json(&self) -> Result<String> {
752        serde_json::to_string_pretty(self)
753            .map_err(|e| crate::error::SyncError::Serialization(e.into()))
754    }
755
756    pub fn to_csv(&self) -> Result<String> {
757        let mut wtr = csv::Writer::from_writer(Vec::new());
758
759        for diff in &self.files {
760            wtr.serialize(CsvDiff {
761                path: &diff.path,
762                action: diff.action.as_str(),
763                size: diff.total_size(),
764                priority: diff.priority,
765                estimated_duration_ms: diff.estimated_duration_ms,
766                retry_count: diff.retry_count,
767                requires_encryption: diff.requires_encryption,
768                requires_chunking: diff.requires_chunking,
769                tags: diff.tags.join(","),
770            })
771            .map_err(|e| SyncError::Unsupported("转换异常".into()))?;
772        }
773
774        let data = String::from_utf8(
775            wtr.into_inner()
776                .map_err(|e| SyncError::Unsupported("转换异常".into()))?,
777        )
778        .map_err(|e| SyncError::Validation(e.to_string()))?;
779
780        Ok(data)
781    }
782}
783
784/// 差异统计
785#[derive(Debug, Clone, Serialize, Deserialize)]
786pub struct DiffStats {
787    pub total_files: usize,
788    pub total_dirs: usize,
789    pub total_size: u64,
790    pub largest_file: u64,
791    pub smallest_file: u64,
792    pub average_file_size: f64,
793    pub file_types: std::collections::HashMap<String, usize>,
794    pub oldest_file: Option<String>,
795    pub newest_file: Option<String>,
796}
797
798impl DiffStats {
799    pub fn new() -> Self {
800        Self {
801            total_files: 0,
802            total_dirs: 0,
803            total_size: 0,
804            largest_file: 0,
805            smallest_file: u64::MAX,
806            average_file_size: 0.0,
807            file_types: std::collections::HashMap::new(),
808            oldest_file: None,
809            newest_file: None,
810        }
811    }
812
813    pub fn add_file(&mut self, metadata: &FileMetadata) {
814        if metadata.is_dir {
815            self.total_dirs += 1;
816        } else {
817            self.total_files += 1;
818            self.total_size += metadata.size;
819
820            // 更新最大/最小文件
821            if metadata.size > self.largest_file {
822                self.largest_file = metadata.size;
823            }
824            if metadata.size < self.smallest_file {
825                self.smallest_file = metadata.size;
826            }
827
828            // 更新文件类型统计
829            if let Some(mime_type) = &metadata.mime_type {
830                *self.file_types.entry(mime_type.clone()).or_insert(0) += 1;
831            }
832        }
833    }
834
835    pub fn finalize(&mut self) {
836        if self.total_files > 0 {
837            self.average_file_size = self.total_size as f64 / self.total_files as f64;
838        } else {
839            self.smallest_file = 0;
840        }
841    }
842
843    pub fn human_readable(&self) -> String {
844        format!(
845            "文件: {}, 目录: {}, 大小: {}",
846            self.total_files,
847            self.total_dirs,
848            format_bytes(self.total_size)
849        )
850    }
851}
852
853/// CSV格式的差异记录
854#[derive(Debug, Serialize)]
855struct CsvDiff<'a> {
856    path: &'a str,
857    action: &'static str,
858    size: u64,
859    priority: u8,
860    estimated_duration_ms: u64,
861    retry_count: u32,
862    requires_encryption: bool,
863    requires_chunking: bool,
864    tags: String,
865}
866
867/// 差异检测器
868pub struct DiffDetector {
869    options: DiffOptions,
870    cache: std::collections::HashMap<String, FileMetadata>,
871}
872
873impl DiffDetector {
874    pub fn new(options: DiffOptions) -> Self {
875        Self {
876            options,
877            cache: std::collections::HashMap::new(),
878        }
879    }
880
881    pub async fn detect_changes(
882        &mut self,
883        source_files: &[FileMetadata],
884        target_files: &[FileMetadata],
885    ) -> Result<DiffResult> {
886        let start_time = std::time::Instant::now();
887        let mut result = DiffResult::new();
888
889        // 将目标文件转换为哈希映射以便快速查找
890        let mut target_map = std::collections::HashMap::new();
891        for file in target_files {
892            target_map.insert(file.path.to_string_lossy().to_string(), file.clone());
893        }
894
895        // 检查源文件的差异
896        for source_file in source_files {
897            let path = source_file.path.to_string_lossy().to_string();
898
899            if let Some(target_file) = target_map.remove(&path) {
900                // 文件在两端都存在
901                if self.is_file_changed(&source_file, &target_file) {
902                    let diff = self.create_file_diff(&source_file, Some(&target_file));
903                    result.add_file(diff);
904                } else {
905                    let diff = FileDiff::unchanged(path, source_file.clone(), target_file);
906                    result.add_file(diff);
907                }
908            } else {
909                // 文件只存在于源端(需要上传)
910                let diff = FileDiff::upload(path, source_file.clone(), None);
911                result.add_file(diff);
912            }
913        }
914
915        // 剩余的目标文件只存在于目标端(需要删除或下载)
916        for (path, target_file) in target_map {
917            let diff = FileDiff::delete(path, target_file);
918            result.add_file(diff);
919        }
920
921        // 检测文件移动/重命名
922        self.detect_moves(&mut result);
923
924        // 检测冲突
925        self.detect_conflicts(&mut result);
926
927        // 更新缓存
928        self.update_cache(source_files);
929
930        // 计算统计信息
931        result.source_stats.finalize();
932        result.target_stats.finalize();
933        result.calculation_time_ms = start_time.elapsed().as_millis() as u64;
934        result.estimated_duration_ms = result
935            .files
936            .iter()
937            .filter(|diff| diff.action.is_transfer())
938            .map(|diff| diff.estimated_duration_ms)
939            .sum();
940
941        result.sort_by_priority();
942        Ok(result)
943    }
944
945    fn is_file_changed(&self, source: &FileMetadata, target: &FileMetadata) -> bool {
946        if self.options.compare_size && source.size != target.size {
947            return true;
948        }
949
950        if self.options.compare_mtime && source.modified != target.modified {
951            return true;
952        }
953
954        if self.options.compare_checksum {
955            match (&source.file_hash, &target.file_hash) {
956                (Some(src_hash), Some(dst_hash)) if src_hash != dst_hash => return true,
957                _ => {}
958            }
959        }
960
961        if source.permissions != target.permissions {
962            return true;
963        }
964
965        false
966    }
967
968    fn create_file_diff(&self, source: &FileMetadata, target: Option<&FileMetadata>) -> FileDiff {
969        let path = source.path.to_string_lossy().to_string();
970
971        match target {
972            Some(target) => {
973                let mut diff = FileDiff::update(path, source.clone(), target.clone());
974
975                // 分析变化详情
976                self.analyze_changes(&mut diff);
977                diff
978            }
979            None => FileDiff::upload(path, source.clone(), None),
980        }
981    }
982
983    fn analyze_changes(&self, diff: &mut FileDiff) {
984        if let (Some(source), Some(target)) = (&diff.source_info, &diff.target_info) {
985            let mut details = ChangeDetails::default();
986
987            // 检查大小变化
988            if source.size != target.size {
989                details.content_change = ContentChangeType::Partial;
990                details.change_percentage = if source.size > 0 {
991                    ((source.size.abs_diff(target.size) * 100) / source.size) as u8
992                } else {
993                    100
994                };
995            }
996
997            // 检查时间戳变化
998            if source.modified != target.modified {
999                details.timestamps_changed = true;
1000            }
1001
1002            // 检查权限变化
1003            if source.permissions != target.permissions {
1004                details.permissions_changed = true;
1005            }
1006
1007            diff.change_details = details;
1008        }
1009    }
1010
1011    fn detect_moves(&self, result: &mut DiffResult) {
1012        // 实现文件移动检测算法
1013        // 基于文件大小、修改时间和内容相似度
1014        let mut potential_moves = Vec::new();
1015
1016        for (i, diff_i) in result.files.iter().enumerate() {
1017            if diff_i.action == DiffAction::Delete {
1018                for (j, diff_j) in result.files.iter().enumerate() {
1019                    if diff_j.action == DiffAction::Upload {
1020                        if let (Some(src), Some(dst)) = (&diff_i.target_info, &diff_j.source_info) {
1021                            let similarity = self.calculate_file_similarity(src, dst);
1022                            if similarity > 0.8 {
1023                                potential_moves.push((i, j, similarity));
1024                            }
1025                        }
1026                    }
1027                }
1028            }
1029        }
1030
1031        // 处理检测到的移动
1032        for (delete_idx, upload_idx, similarity) in potential_moves {
1033            // 更新文件差异为移动操作
1034            let delete_path = result.files[delete_idx].path.clone();
1035            let upload_path = result.files[upload_idx].path.clone();
1036
1037            if let (Some(source), Some(target)) = (
1038                result.files[upload_idx].source_info.clone(),
1039                result.files[delete_idx].target_info.clone(),
1040            ) {
1041                let move_diff = FileDiff::move_file(delete_path, upload_path, source, target);
1042
1043                // 替换原来的差异
1044                result.files[delete_idx] = move_diff.clone();
1045                result.files[upload_idx] = move_diff;
1046            }
1047        }
1048    }
1049
1050    fn detect_conflicts(&self, result: &mut DiffResult) {
1051        let mut path_map: std::collections::HashMap<String, Vec<usize>> =
1052            std::collections::HashMap::new();
1053        for (idx, diff) in result.files.iter().enumerate() {
1054            path_map.entry(diff.path.clone()).or_default().push(idx);
1055        }
1056        for indices in path_map.values() {
1057            if indices.len() > 1 {
1058                let has_upload = indices
1059                    .iter()
1060                    .any(|&i| result.files[i].action == DiffAction::Upload);
1061                let has_delete = indices
1062                    .iter()
1063                    .any(|&i| result.files[i].action == DiffAction::Delete);
1064                let has_update = indices
1065                    .iter()
1066                    .any(|&i| result.files[i].action == DiffAction::Update);
1067                if (has_upload && has_delete) || (has_upload && has_update) {
1068                    for &i in indices {
1069                        if let (Some(source), Some(target)) =
1070                            (&result.files[i].source_info, &result.files[i].target_info)
1071                        {
1072                            result.files[i] = FileDiff::conflict(
1073                                result.files[i].path.clone(),
1074                                source.clone(),
1075                                target.clone(),
1076                            );
1077                        }
1078                    }
1079                }
1080            }
1081        }
1082    }
1083
1084    fn calculate_file_similarity(&self, file1: &FileMetadata, file2: &FileMetadata) -> f64 {
1085        let mut similarity = 0.0;
1086
1087        // 大小相似度(权重40%)
1088        if file1.size == file2.size {
1089            similarity += 0.4;
1090        } else if file1.size > 0 && file2.size > 0 {
1091            let min_size = file1.size.min(file2.size) as f64;
1092            let max_size = file1.size.max(file2.size) as f64;
1093            similarity += 0.4 * (min_size / max_size);
1094        }
1095
1096        // 修改时间相似度(权重30%)
1097        let time_diff = (file1.modified - file2.modified).abs();
1098        if time_diff < 60 {
1099            similarity += 0.3; // 时间差小于1分钟
1100        } else if time_diff < 3600 {
1101            similarity += 0.2; // 时间差小于1小时
1102        } else if time_diff < 86400 {
1103            similarity += 0.1; // 时间差小于1天
1104        }
1105
1106        // 文件类型相似度(权重30%)
1107        if let (Some(mime1), Some(mime2)) = (&file1.mime_type, &file2.mime_type) {
1108            if mime1 == mime2 {
1109                similarity += 0.3;
1110            } else if mime1.split('/').next() == mime2.split('/').next() {
1111                similarity += 0.15; // 相同主类型
1112            }
1113        }
1114
1115        similarity
1116    }
1117
1118    fn update_cache(&mut self, files: &[FileMetadata]) {
1119        for file in files {
1120            self.cache
1121                .insert(file.path.to_string_lossy().to_string(), file.clone());
1122        }
1123    }
1124}
1125
1126/// 差异检测选项
1127#[derive(Debug, Clone)]
1128pub struct DiffOptions {
1129    /// 比较文件大小
1130    pub compare_size: bool,
1131    /// 比较修改时间
1132    pub compare_mtime: bool,
1133    /// 比较文件校验和
1134    pub compare_checksum: bool,
1135    /// 忽略模式列表
1136    pub ignore_patterns: Vec<String>,
1137    /// 最大检测深度
1138    pub max_depth: Option<usize>,
1139    /// 是否跟随符号链接
1140    pub follow_symlinks: bool,
1141    /// 是否检测文件移动
1142    pub detect_moves: bool,
1143    /// 相似度阈值(用于移动检测)
1144    pub similarity_threshold: f64,
1145    /// 是否检测冲突
1146    pub detect_conflicts: bool,
1147    /// 是否包含隐藏文件
1148    pub include_hidden: bool,
1149    /// 文件大小阈值(大文件处理)
1150    pub large_file_threshold: u64,
1151}
1152
1153impl Default for DiffOptions {
1154    fn default() -> Self {
1155        Self {
1156            compare_size: true,
1157            compare_mtime: true,
1158            compare_checksum: false, // 默认关闭,因为计算哈希较慢
1159            ignore_patterns: vec![
1160                ".*".to_string(),
1161                "*/.*".to_string(),
1162                "*.tmp".to_string(),
1163                "*.temp".to_string(),
1164            ],
1165            max_depth: None,
1166            follow_symlinks: false,
1167            detect_moves: true,
1168            similarity_threshold: 0.7,
1169            detect_conflicts: true,
1170            include_hidden: false,
1171            large_file_threshold: 1024 * 1024 * 100, // 100MB
1172        }
1173    }
1174}
1175
1176fn detect_mime_type(extension: &std::ffi::OsStr) -> String {
1177    let ext = extension.to_string_lossy().to_lowercase();
1178
1179    match ext.as_str() {
1180        "txt" => "text/plain",
1181        "json" => "application/json",
1182        "xml" => "application/xml",
1183        "html" | "htm" => "text/html",
1184        "css" => "text/css",
1185        "js" => "application/javascript",
1186        "jpg" | "jpeg" => "image/jpeg",
1187        "png" => "image/png",
1188        "gif" => "image/gif",
1189        "pdf" => "application/pdf",
1190        "zip" => "application/zip",
1191        "tar" => "application/x-tar",
1192        "gz" => "application/gzip",
1193        "mp3" => "audio/mpeg",
1194        "mp4" => "video/mp4",
1195        "avi" => "video/x-msvideo",
1196        "doc" => "application/msword",
1197        "docx" => "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
1198        "xls" => "application/vnd.ms-excel",
1199        "xlsx" => "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
1200        "ppt" => "application/vnd.ms-powerpoint",
1201        "pptx" => "application/vnd.openxmlformats-officedocument.presentationml.presentation",
1202        "md" => "text/markdown",
1203        "yml" | "yaml" => "text/yaml",
1204        "toml" => "application/toml",
1205        "rs" => "text/x-rust",
1206        "go" => "text/x-go",
1207        "py" => "text/x-python",
1208        "java" => "text/x-java",
1209        "c" => "text/x-c",
1210        "cpp" | "cc" => "text/x-c++",
1211        "h" | "hpp" => "text/x-c++",
1212        _ => "application/octet-stream",
1213    }
1214    .to_string()
1215}
1216
1217#[cfg(test)]
1218mod tests {
1219    use super::*;
1220    #[test]
1221    fn test_diff_result_add_file_and_summary() {
1222        let mut result = DiffResult::new();
1223        let file = FileDiff::new(
1224            "a.txt".to_string(),
1225            DiffAction::Upload,
1226            Some(FileMetadata::new(PathBuf::from("a.txt"))),
1227            None,
1228        );
1229        result.add_file(file);
1230        assert_eq!(result.total_files, 1);
1231        let s = result.summary();
1232        assert!(s.contains("文件总数"));
1233    }
1234}