1use chie_crypto::{ChunkHasher, Hash, IncrementalHasher, hash};
9use std::io::{self, Read};
10use thiserror::Error;
11
12pub const DEFAULT_CHUNK_SIZE: usize = 262_144;
14
15#[derive(Debug, Error)]
17pub enum IntegrityError {
18 #[error("Chunk hash mismatch at index {index}: expected {expected}, got {actual}")]
19 ChunkHashMismatch {
20 index: usize,
21 expected: String,
22 actual: String,
23 },
24
25 #[error("Root hash mismatch: expected {expected}, got {actual}")]
26 RootHashMismatch { expected: String, actual: String },
27
28 #[error("Invalid chunk count: expected {expected}, got {actual}")]
29 InvalidChunkCount { expected: usize, actual: usize },
30
31 #[error("Content too large: {size} bytes exceeds maximum {max} bytes")]
32 ContentTooLarge { size: u64, max: u64 },
33
34 #[error("IO error: {0}")]
35 IoError(#[from] io::Error),
36}
37
38#[derive(Debug, Clone)]
40pub struct ContentManifest {
41 pub cid: String,
43 pub size_bytes: u64,
45 pub chunk_size: usize,
47 pub chunk_count: usize,
49 pub chunk_hashes: Vec<Hash>,
51 pub root_hash: Hash,
53}
54
55impl ContentManifest {
56 pub fn verify_chunk(&self, index: usize, chunk_data: &[u8]) -> Result<(), IntegrityError> {
58 if index >= self.chunk_count {
59 return Err(IntegrityError::InvalidChunkCount {
60 expected: self.chunk_count,
61 actual: index + 1,
62 });
63 }
64
65 let expected = &self.chunk_hashes[index];
66 let actual = hash(chunk_data);
67
68 if expected != &actual {
69 return Err(IntegrityError::ChunkHashMismatch {
70 index,
71 expected: hex::encode(expected),
72 actual: hex::encode(actual),
73 });
74 }
75
76 Ok(())
77 }
78
79 pub fn verify_root_hash(&self) -> Result<(), IntegrityError> {
81 let mut hasher = IncrementalHasher::new();
82 for chunk_hash in &self.chunk_hashes {
83 hasher.update(chunk_hash);
84 }
85 let actual = hasher.finalize();
86
87 if actual != self.root_hash {
88 return Err(IntegrityError::RootHashMismatch {
89 expected: hex::encode(self.root_hash),
90 actual: hex::encode(actual),
91 });
92 }
93
94 Ok(())
95 }
96
97 #[must_use]
99 pub fn expected_chunk_size(&self, index: usize) -> usize {
100 if index >= self.chunk_count {
101 return 0;
102 }
103
104 if index == self.chunk_count - 1 {
105 let remaining = self.size_bytes as usize % self.chunk_size;
107 if remaining == 0 {
108 self.chunk_size
109 } else {
110 remaining
111 }
112 } else {
113 self.chunk_size
114 }
115 }
116}
117
118pub struct ManifestBuilder {
120 cid: Option<String>,
121 chunk_size: usize,
122 chunk_hasher: ChunkHasher,
123 total_bytes: u64,
124}
125
126impl Default for ManifestBuilder {
127 fn default() -> Self {
128 Self::new(DEFAULT_CHUNK_SIZE)
129 }
130}
131
132impl ManifestBuilder {
133 pub fn new(chunk_size: usize) -> Self {
135 Self {
136 cid: None,
137 chunk_size,
138 chunk_hasher: ChunkHasher::new(),
139 total_bytes: 0,
140 }
141 }
142
143 pub fn cid(mut self, cid: impl Into<String>) -> Self {
145 self.cid = Some(cid.into());
146 self
147 }
148
149 pub fn add_chunk(&mut self, chunk_data: &[u8]) -> Hash {
151 self.total_bytes += chunk_data.len() as u64;
152 self.chunk_hasher.add_chunk(chunk_data)
153 }
154
155 pub fn from_reader<R: Read>(mut self, reader: &mut R) -> io::Result<Self> {
157 let mut buffer = vec![0u8; self.chunk_size];
158
159 loop {
160 let mut total_read = 0;
161
162 while total_read < self.chunk_size {
163 let bytes_read = reader.read(&mut buffer[total_read..])?;
164 if bytes_read == 0 {
165 break;
166 }
167 total_read += bytes_read;
168 }
169
170 if total_read == 0 {
171 break;
172 }
173
174 self.add_chunk(&buffer[..total_read]);
175
176 if total_read < self.chunk_size {
177 break;
178 }
179 }
180
181 Ok(self)
182 }
183
184 pub fn build(self) -> ContentManifest {
186 let result = self.chunk_hasher.finalize();
187
188 ContentManifest {
189 cid: self.cid.unwrap_or_default(),
190 size_bytes: self.total_bytes,
191 chunk_size: self.chunk_size,
192 chunk_count: result.chunk_count(),
193 chunk_hashes: result.chunk_hashes,
194 root_hash: result.root_hash,
195 }
196 }
197}
198
199pub struct ContentVerifier {
201 manifest: ContentManifest,
202 current_chunk: usize,
203 verified_bytes: u64,
204 failed_chunks: Vec<usize>,
205}
206
207impl ContentVerifier {
208 pub fn new(manifest: ContentManifest) -> Self {
210 Self {
211 manifest,
212 current_chunk: 0,
213 verified_bytes: 0,
214 failed_chunks: Vec::new(),
215 }
216 }
217
218 pub fn verify_next(&mut self, chunk_data: &[u8]) -> Result<(), IntegrityError> {
220 if self.current_chunk >= self.manifest.chunk_count {
221 return Err(IntegrityError::InvalidChunkCount {
222 expected: self.manifest.chunk_count,
223 actual: self.current_chunk + 1,
224 });
225 }
226
227 let result = self.manifest.verify_chunk(self.current_chunk, chunk_data);
228
229 if result.is_err() {
230 self.failed_chunks.push(self.current_chunk);
231 } else {
232 self.verified_bytes += chunk_data.len() as u64;
233 }
234
235 self.current_chunk += 1;
236 result
237 }
238
239 pub fn verify_chunk(&mut self, index: usize, chunk_data: &[u8]) -> Result<(), IntegrityError> {
241 let result = self.manifest.verify_chunk(index, chunk_data);
242
243 if result.is_err() {
244 if !self.failed_chunks.contains(&index) {
245 self.failed_chunks.push(index);
246 }
247 } else {
248 self.verified_bytes += chunk_data.len() as u64;
249 }
250
251 result
252 }
253
254 #[must_use]
256 #[inline]
257 pub fn is_complete(&self) -> bool {
258 self.current_chunk >= self.manifest.chunk_count && self.failed_chunks.is_empty()
259 }
260
261 #[must_use]
263 #[inline]
264 pub const fn chunks_verified(&self) -> usize {
265 self.current_chunk
266 }
267
268 #[must_use]
270 #[inline]
271 pub const fn bytes_verified(&self) -> u64 {
272 self.verified_bytes
273 }
274
275 #[must_use]
277 #[inline]
278 pub fn failed_chunks(&self) -> &[usize] {
279 &self.failed_chunks
280 }
281
282 #[must_use]
284 #[inline]
285 pub fn manifest(&self) -> &ContentManifest {
286 &self.manifest
287 }
288}
289
290pub fn verify_content<R: Read>(
292 manifest: &ContentManifest,
293 reader: &mut R,
294) -> Result<(), IntegrityError> {
295 let mut buffer = vec![0u8; manifest.chunk_size];
296 let mut chunk_index = 0;
297
298 loop {
299 let mut total_read = 0;
300
301 while total_read < manifest.chunk_size {
302 let bytes_read = reader.read(&mut buffer[total_read..])?;
303 if bytes_read == 0 {
304 break;
305 }
306 total_read += bytes_read;
307 }
308
309 if total_read == 0 {
310 break;
311 }
312
313 manifest.verify_chunk(chunk_index, &buffer[..total_read])?;
314 chunk_index += 1;
315 }
316
317 if chunk_index != manifest.chunk_count {
318 return Err(IntegrityError::InvalidChunkCount {
319 expected: manifest.chunk_count,
320 actual: chunk_index,
321 });
322 }
323
324 Ok(())
325}
326
327#[inline]
329pub fn verify_single_chunk(chunk_data: &[u8], expected_hash: &Hash) -> bool {
330 &hash(chunk_data) == expected_hash
331}
332
333#[derive(Debug, Clone, Copy, PartialEq, Eq)]
335pub enum RepairStrategy {
336 Skip,
338 SingleSource,
340 MultiSource,
342}
343
344#[derive(Debug, Clone)]
346pub struct IntegrityRepairResult {
347 pub corrupted_count: usize,
349 pub repaired_count: usize,
351 pub failed_repairs: Vec<usize>,
353 pub repair_duration_ms: u64,
355}
356
357impl IntegrityRepairResult {
358 #[must_use]
360 #[inline]
361 pub fn is_complete(&self) -> bool {
362 self.failed_repairs.is_empty()
363 }
364
365 #[must_use]
367 #[inline]
368 pub fn success_rate(&self) -> f64 {
369 if self.corrupted_count == 0 {
370 return 1.0;
371 }
372 self.repaired_count as f64 / self.corrupted_count as f64
373 }
374}
375
376pub struct IntegrityRepairer {
381 manifest: ContentManifest,
383 strategy: RepairStrategy,
385 max_attempts: usize,
387 repair_stats: RepairStats,
389}
390
391#[derive(Debug, Clone, Default)]
393pub struct RepairStats {
394 pub total_operations: usize,
396 pub total_repaired: usize,
398 pub total_failed: usize,
400 pub avg_repair_time_ms: f64,
402}
403
404impl RepairStats {
405 #[must_use]
407 #[inline]
408 pub fn success_rate(&self) -> f64 {
409 let total = self.total_repaired + self.total_failed;
410 if total == 0 {
411 return 1.0;
412 }
413 self.total_repaired as f64 / total as f64
414 }
415}
416
417impl IntegrityRepairer {
418 #[must_use]
420 pub fn new(manifest: ContentManifest, strategy: RepairStrategy) -> Self {
421 Self {
422 manifest,
423 strategy,
424 max_attempts: 3,
425 repair_stats: RepairStats::default(),
426 }
427 }
428
429 pub fn set_max_attempts(&mut self, max_attempts: usize) {
431 self.max_attempts = max_attempts;
432 }
433
434 pub fn verify_and_repair<F>(
439 &mut self,
440 index: usize,
441 chunk_data: &[u8],
442 fetch_fn: F,
443 ) -> Result<Vec<u8>, IntegrityError>
444 where
445 F: FnMut(usize) -> Option<Vec<u8>>,
446 {
447 if self.manifest.verify_chunk(index, chunk_data).is_ok() {
449 return Ok(chunk_data.to_vec());
450 }
451
452 match self.strategy {
454 RepairStrategy::Skip => Err(IntegrityError::ChunkHashMismatch {
455 index,
456 expected: hex::encode(self.manifest.chunk_hashes[index]),
457 actual: hex::encode(hash(chunk_data)),
458 }),
459 RepairStrategy::SingleSource | RepairStrategy::MultiSource => {
460 self.attempt_repair(index, fetch_fn)
461 }
462 }
463 }
464
465 fn attempt_repair<F>(
467 &mut self,
468 index: usize,
469 mut fetch_fn: F,
470 ) -> Result<Vec<u8>, IntegrityError>
471 where
472 F: FnMut(usize) -> Option<Vec<u8>>,
473 {
474 let start_time = std::time::Instant::now();
475
476 for attempt in 0..self.max_attempts {
477 if let Some(candidate_data) = fetch_fn(index) {
478 if self.manifest.verify_chunk(index, &candidate_data).is_ok() {
480 let duration_ms = start_time.elapsed().as_millis() as u64;
482 self.repair_stats.total_operations += 1;
483 self.repair_stats.total_repaired += 1;
484 self.update_avg_repair_time(duration_ms);
485
486 return Ok(candidate_data);
487 }
488 }
489
490 if self.strategy == RepairStrategy::SingleSource && attempt > 0 {
492 break; }
494 }
495
496 self.repair_stats.total_operations += 1;
498 self.repair_stats.total_failed += 1;
499
500 Err(IntegrityError::ChunkHashMismatch {
501 index,
502 expected: hex::encode(self.manifest.chunk_hashes[index]),
503 actual: "repair_failed".to_string(),
504 })
505 }
506
507 fn update_avg_repair_time(&mut self, new_time_ms: u64) {
509 let total_ops = self.repair_stats.total_operations;
510 let old_avg = self.repair_stats.avg_repair_time_ms;
511 self.repair_stats.avg_repair_time_ms =
512 (old_avg * (total_ops - 1) as f64 + new_time_ms as f64) / total_ops as f64;
513 }
514
515 pub fn batch_verify_and_repair<F>(
519 &mut self,
520 chunks: &[(usize, Vec<u8>)],
521 fetch_fn: F,
522 ) -> IntegrityRepairResult
523 where
524 F: Fn(usize) -> Option<Vec<u8>>,
525 {
526 let start_time = std::time::Instant::now();
527 let mut corrupted_count = 0;
528 let mut repaired_count = 0;
529 let mut failed_repairs = Vec::new();
530
531 for (index, chunk_data) in chunks {
532 if self.manifest.verify_chunk(*index, chunk_data).is_err() {
534 corrupted_count += 1;
535
536 match self.verify_and_repair(*index, chunk_data, &fetch_fn) {
538 Ok(_) => repaired_count += 1,
539 Err(_) => failed_repairs.push(*index),
540 }
541 }
542 }
543
544 let repair_duration_ms = start_time.elapsed().as_millis() as u64;
545
546 IntegrityRepairResult {
547 corrupted_count,
548 repaired_count,
549 failed_repairs,
550 repair_duration_ms,
551 }
552 }
553
554 #[must_use]
556 #[inline]
557 pub fn stats(&self) -> &RepairStats {
558 &self.repair_stats
559 }
560
561 #[must_use]
563 #[inline]
564 pub fn manifest(&self) -> &ContentManifest {
565 &self.manifest
566 }
567
568 #[must_use]
570 #[inline]
571 pub const fn strategy(&self) -> RepairStrategy {
572 self.strategy
573 }
574}
575
576#[cfg(test)]
577mod tests {
578 use super::*;
579 use std::io::Cursor;
580
581 #[test]
582 fn test_manifest_builder() {
583 let data = b"Hello, CHIE Protocol! This is test data for integrity verification.";
584 let mut cursor = Cursor::new(data);
585
586 let manifest = ManifestBuilder::new(20)
587 .cid("QmTest123")
588 .from_reader(&mut cursor)
589 .unwrap()
590 .build();
591
592 assert_eq!(manifest.cid, "QmTest123");
593 assert_eq!(manifest.size_bytes, data.len() as u64);
594 assert_eq!(manifest.chunk_size, 20);
595 assert_eq!(manifest.chunk_count, 4); }
597
598 #[test]
599 fn test_chunk_verification() {
600 let chunk1 = b"Chunk 1 data here..";
601 let chunk2 = b"Chunk 2 data here..";
602
603 let mut builder = ManifestBuilder::new(20);
604 builder.add_chunk(chunk1);
605 builder.add_chunk(chunk2);
606 let manifest = builder.build();
607
608 assert!(manifest.verify_chunk(0, chunk1).is_ok());
609 assert!(manifest.verify_chunk(1, chunk2).is_ok());
610 assert!(manifest.verify_chunk(0, chunk2).is_err()); }
612
613 #[test]
614 fn test_content_verifier() {
615 let chunk1 = b"Chunk 1";
616 let chunk2 = b"Chunk 2";
617 let chunk3 = b"Chunk 3";
618
619 let mut builder = ManifestBuilder::new(10);
620 builder.add_chunk(chunk1);
621 builder.add_chunk(chunk2);
622 builder.add_chunk(chunk3);
623 let manifest = builder.build();
624
625 let mut verifier = ContentVerifier::new(manifest);
626
627 assert!(verifier.verify_next(chunk1).is_ok());
628 assert!(verifier.verify_next(chunk2).is_ok());
629 assert!(verifier.verify_next(chunk3).is_ok());
630 assert!(verifier.is_complete());
631 assert_eq!(verifier.failed_chunks().len(), 0);
632 }
633
634 #[test]
635 fn test_verify_content() {
636 let data = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ";
637 let mut cursor = Cursor::new(data);
638
639 let manifest = ManifestBuilder::new(10)
640 .from_reader(&mut cursor)
641 .unwrap()
642 .build();
643
644 let mut cursor2 = Cursor::new(data);
645 assert!(verify_content(&manifest, &mut cursor2).is_ok());
646
647 let wrong_data = b"ABCDEFGHIJKLMNOPQRSTUVWXYz"; let mut cursor3 = Cursor::new(wrong_data);
650 assert!(verify_content(&manifest, &mut cursor3).is_err());
651 }
652
653 #[test]
654 fn test_root_hash_verification() {
655 let mut builder = ManifestBuilder::new(10);
656 builder.add_chunk(b"Chunk 1");
657 builder.add_chunk(b"Chunk 2");
658 let manifest = builder.build();
659
660 assert!(manifest.verify_root_hash().is_ok());
661 }
662
663 #[test]
664 fn test_integrity_repairer_no_corruption() {
665 let chunk = b"Valid chunk data";
666 let mut builder = ManifestBuilder::new(16);
667 builder.add_chunk(chunk);
668 let manifest = builder.build();
669
670 let mut repairer = IntegrityRepairer::new(manifest, RepairStrategy::SingleSource);
671
672 let result = repairer.verify_and_repair(0, chunk, |_| None);
674 assert!(result.is_ok());
675 assert_eq!(repairer.stats().total_operations, 0);
676 }
677
678 #[test]
679 fn test_integrity_repairer_successful_repair() {
680 let valid_chunk = b"Valid chunk data";
681 let corrupted_chunk = b"Corrupted chunk!";
682
683 let mut builder = ManifestBuilder::new(16);
684 builder.add_chunk(valid_chunk);
685 let manifest = builder.build();
686
687 let mut repairer = IntegrityRepairer::new(manifest, RepairStrategy::SingleSource);
688
689 let result = repairer.verify_and_repair(0, corrupted_chunk, |_| Some(valid_chunk.to_vec()));
691 assert!(result.is_ok());
692 assert_eq!(repairer.stats().total_repaired, 1);
693 assert_eq!(repairer.stats().success_rate(), 1.0);
694 }
695
696 #[test]
697 fn test_integrity_repairer_failed_repair() {
698 let valid_chunk = b"Valid chunk data";
699 let corrupted_chunk = b"Corrupted chunk!";
700
701 let mut builder = ManifestBuilder::new(16);
702 builder.add_chunk(valid_chunk);
703 let manifest = builder.build();
704
705 let mut repairer = IntegrityRepairer::new(manifest, RepairStrategy::SingleSource);
706
707 let result = repairer.verify_and_repair(0, corrupted_chunk, |_| None);
709 assert!(result.is_err());
710 assert_eq!(repairer.stats().total_failed, 1);
711 assert_eq!(repairer.stats().success_rate(), 0.0);
712 }
713
714 #[test]
715 fn test_integrity_repairer_batch_repair() {
716 let chunk1 = b"Chunk 1 valid!!!";
717 let chunk2 = b"Chunk 2 valid!!!";
718 let chunk3 = b"Chunk 3 valid!!!";
719
720 let mut builder = ManifestBuilder::new(16);
721 builder.add_chunk(chunk1);
722 builder.add_chunk(chunk2);
723 builder.add_chunk(chunk3);
724 let manifest = builder.build();
725
726 let mut repairer = IntegrityRepairer::new(manifest, RepairStrategy::MultiSource);
727
728 let chunks = vec![
730 (0, chunk1.to_vec()), (1, b"Corrupted!!!!!!!".to_vec()), (2, b"Also corrupted!".to_vec()), ];
734
735 let fetch_fn = |index: usize| match index {
737 1 => Some(chunk2.to_vec()),
738 2 => Some(chunk3.to_vec()),
739 _ => None,
740 };
741
742 let result = repairer.batch_verify_and_repair(&chunks, fetch_fn);
743 assert_eq!(result.corrupted_count, 2);
744 assert_eq!(result.repaired_count, 2);
745 assert!(result.is_complete());
746 assert_eq!(result.success_rate(), 1.0);
747 }
748
749 #[test]
750 fn test_repair_strategy_skip() {
751 let valid_chunk = b"Valid chunk data";
752 let corrupted_chunk = b"Corrupted chunk!";
753
754 let mut builder = ManifestBuilder::new(16);
755 builder.add_chunk(valid_chunk);
756 let manifest = builder.build();
757
758 let mut repairer = IntegrityRepairer::new(manifest, RepairStrategy::Skip);
759
760 let result = repairer.verify_and_repair(0, corrupted_chunk, |_| Some(valid_chunk.to_vec()));
762 assert!(result.is_err());
763 assert_eq!(repairer.stats().total_operations, 0); }
765
766 #[test]
767 fn test_repair_result_success_rate() {
768 let result = IntegrityRepairResult {
769 corrupted_count: 10,
770 repaired_count: 7,
771 failed_repairs: vec![2, 5, 8],
772 repair_duration_ms: 100,
773 };
774
775 assert_eq!(result.success_rate(), 0.7);
776 assert!(!result.is_complete());
777
778 let perfect_result = IntegrityRepairResult {
779 corrupted_count: 5,
780 repaired_count: 5,
781 failed_repairs: vec![],
782 repair_duration_ms: 50,
783 };
784
785 assert_eq!(perfect_result.success_rate(), 1.0);
786 assert!(perfect_result.is_complete());
787 }
788
789 #[test]
790 fn test_repair_stats() {
791 let valid_chunk = b"Valid chunk data";
792 let corrupted_chunk = b"Corrupted chunk!";
793
794 let mut builder = ManifestBuilder::new(16);
795 builder.add_chunk(valid_chunk);
796 let manifest = builder.build();
797
798 let mut repairer = IntegrityRepairer::new(manifest, RepairStrategy::MultiSource);
799 repairer.set_max_attempts(2);
800
801 let _ = repairer.verify_and_repair(0, corrupted_chunk, |_| Some(valid_chunk.to_vec()));
803
804 let _ = repairer.verify_and_repair(0, corrupted_chunk, |_| None);
806
807 let stats = repairer.stats();
808 assert_eq!(stats.total_operations, 2);
809 assert_eq!(stats.total_repaired, 1);
810 assert_eq!(stats.total_failed, 1);
811 assert_eq!(stats.success_rate(), 0.5);
812 assert!(stats.avg_repair_time_ms >= 0.0);
813 }
814}