1use crate::{Config, PhasedScheme, ValidatingScheme};
118use bytes::BufMut;
119use commonware_codec::{Encode, EncodeSize, FixedSize, RangeCfg, Read, ReadExt, Write};
120use commonware_cryptography::{
121 transcript::{Summary, Transcript},
122 Digest, Hasher,
123};
124use commonware_math::{
125 fields::goldilocks::F,
126 ntt::{EvaluationVector, Matrix},
127};
128use commonware_parallel::Strategy;
129use commonware_storage::bmt::{Builder as BmtBuilder, Error as BmtError, Proof};
130use rand::seq::SliceRandom as _;
131use std::{marker::PhantomData, sync::Arc};
132use thiserror::Error;
133
134fn iter_u64_le(data: impl bytes::Buf) -> impl Iterator<Item = u64> {
136 struct Iter<B> {
137 remaining_u64s: usize,
138 tail: usize,
139 inner: B,
140 }
141
142 impl<B: bytes::Buf> Iter<B> {
143 fn new(inner: B) -> Self {
144 let remaining_u64s = inner.remaining() / 8;
145 let tail = inner.remaining() % 8;
146 Self {
147 remaining_u64s,
148 tail,
149 inner,
150 }
151 }
152 }
153
154 impl<B: bytes::Buf> Iterator for Iter<B> {
155 type Item = u64;
156
157 fn next(&mut self) -> Option<Self::Item> {
158 if self.remaining_u64s > 0 {
159 self.remaining_u64s -= 1;
160 return Some(self.inner.get_u64_le());
161 }
162 if self.tail > 0 {
163 let mut chunk = [0u8; 8];
164 self.inner.copy_to_slice(&mut chunk[..self.tail]);
165 self.tail = 0;
166 return Some(u64::from_le_bytes(chunk));
167 }
168 None
169 }
170 }
171 Iter::new(data)
172}
173
174fn collect_u64_le(max_length: usize, data: impl Iterator<Item = u64>) -> Vec<u8> {
175 let mut out = Vec::with_capacity(max_length);
176 for d in data {
177 out.extend_from_slice(&d.to_le_bytes());
178 }
179 out.truncate(max_length);
180 out
181}
182
183fn row_digest<H: Hasher>(row: &[F]) -> H::Digest {
184 let mut h = H::new();
185 for x in row {
186 h.update(&x.to_le_bytes());
187 }
188 h.finalize()
189}
190
191mod topology;
192use topology::Topology;
193
194#[derive(Clone, Debug)]
196pub struct StrongShard<D: Digest> {
197 data_bytes: usize,
198 root: D,
199 inclusion_proof: Proof<D>,
200 rows: Matrix<F>,
201 checksum: Arc<Matrix<F>>,
202}
203
204impl<D: Digest> PartialEq for StrongShard<D> {
205 fn eq(&self, other: &Self) -> bool {
206 self.data_bytes == other.data_bytes
207 && self.root == other.root
208 && self.inclusion_proof == other.inclusion_proof
209 && self.rows == other.rows
210 && self.checksum == other.checksum
211 }
212}
213
214impl<D: Digest> Eq for StrongShard<D> {}
215
216impl<D: Digest> EncodeSize for StrongShard<D> {
217 fn encode_size(&self) -> usize {
218 self.data_bytes.encode_size()
219 + self.root.encode_size()
220 + self.inclusion_proof.encode_size()
221 + self.rows.encode_size()
222 + self.checksum.encode_size()
223 }
224}
225
226impl<D: Digest> Write for StrongShard<D> {
227 fn write(&self, buf: &mut impl BufMut) {
228 self.data_bytes.write(buf);
229 self.root.write(buf);
230 self.inclusion_proof.write(buf);
231 self.rows.write(buf);
232 self.checksum.write(buf);
233 }
234}
235
236impl<D: Digest> Read for StrongShard<D> {
237 type Cfg = crate::CodecConfig;
238
239 fn read_cfg(
240 buf: &mut impl bytes::Buf,
241 cfg: &Self::Cfg,
242 ) -> Result<Self, commonware_codec::Error> {
243 let data_bytes = usize::read_cfg(buf, &RangeCfg::from(..=cfg.maximum_shard_size))?;
244 let max_els = cfg.maximum_shard_size / F::SIZE;
245 Ok(Self {
246 data_bytes,
247 root: ReadExt::read(buf)?,
248 inclusion_proof: Read::read_cfg(buf, &max_els)?,
249 rows: Read::read_cfg(buf, &(max_els, ()))?,
250 checksum: Arc::new(Read::read_cfg(buf, &(max_els, ()))?),
251 })
252 }
253}
254
255#[cfg(feature = "arbitrary")]
256impl<D: Digest> arbitrary::Arbitrary<'_> for StrongShard<D>
257where
258 D: for<'a> arbitrary::Arbitrary<'a>,
259{
260 fn arbitrary(u: &mut arbitrary::Unstructured<'_>) -> arbitrary::Result<Self> {
261 Ok(Self {
262 data_bytes: u.arbitrary::<u32>()? as usize,
263 root: u.arbitrary()?,
264 inclusion_proof: u.arbitrary()?,
265 rows: u.arbitrary()?,
266 checksum: Arc::new(u.arbitrary()?),
267 })
268 }
269}
270
271#[derive(Clone, Debug)]
272pub struct WeakShard<D: Digest> {
273 inclusion_proof: Proof<D>,
274 shard: Matrix<F>,
275}
276
277impl<D: Digest> PartialEq for WeakShard<D> {
278 fn eq(&self, other: &Self) -> bool {
279 self.inclusion_proof == other.inclusion_proof && self.shard == other.shard
280 }
281}
282
283impl<D: Digest> Eq for WeakShard<D> {}
284
285impl<D: Digest> EncodeSize for WeakShard<D> {
286 fn encode_size(&self) -> usize {
287 self.inclusion_proof.encode_size() + self.shard.encode_size()
288 }
289}
290
291impl<D: Digest> Write for WeakShard<D> {
292 fn write(&self, buf: &mut impl BufMut) {
293 self.inclusion_proof.write(buf);
294 self.shard.write(buf);
295 }
296}
297
298impl<D: Digest> Read for WeakShard<D> {
299 type Cfg = crate::CodecConfig;
300
301 fn read_cfg(
302 buf: &mut impl bytes::Buf,
303 cfg: &Self::Cfg,
304 ) -> Result<Self, commonware_codec::Error> {
305 let max_data_bits = cfg.maximum_shard_size.saturating_mul(8);
306 let max_data_els = F::bits_to_elements(max_data_bits).max(1);
307 Ok(Self {
308 inclusion_proof: Read::read_cfg(buf, &max_data_els)?,
310 shard: Read::read_cfg(buf, &(max_data_els, ()))?,
311 })
312 }
313}
314
315#[cfg(feature = "arbitrary")]
316impl<D: Digest> arbitrary::Arbitrary<'_> for WeakShard<D>
317where
318 D: for<'a> arbitrary::Arbitrary<'a>,
319{
320 fn arbitrary(u: &mut arbitrary::Unstructured<'_>) -> arbitrary::Result<Self> {
321 Ok(Self {
322 inclusion_proof: u.arbitrary()?,
323 shard: u.arbitrary()?,
324 })
325 }
326}
327
328#[derive(Clone)]
330pub struct CheckedShard {
331 index: usize,
332 shard: Matrix<F>,
333 commitment: Summary,
334}
335
336fn shuffle_indices(transcript: &Transcript, total: usize) -> Vec<u32> {
344 let total: u32 = total
345 .try_into()
346 .expect("encoded_rows exceeds u32::MAX; data too large for ZODA");
347 let mut out = (0..total).collect::<Vec<_>>();
348 out.shuffle(&mut transcript.noise(b"shuffle"));
349 out
350}
351
352fn checking_matrix(transcript: &Transcript, topology: &Topology) -> Matrix<F> {
356 Matrix::rand(
357 &mut transcript.noise(b"checking matrix"),
358 topology.data_cols,
359 topology.column_samples,
360 )
361}
362
363#[derive(Clone, PartialEq)]
365pub struct CheckingData<D: Digest> {
366 commitment: Summary,
367 topology: Topology,
368 root: D,
369 checking_matrix: Matrix<F>,
370 encoded_checksum: Matrix<F>,
371 shuffled_indices: Vec<u32>,
372}
373
374impl<D: Digest> Eq for CheckingData<D> {}
375
376impl<D: Digest> CheckingData<D> {
377 fn reckon(
386 config: &Config,
387 commitment: &Summary,
388 data_bytes: usize,
389 root: D,
390 checksum: &Matrix<F>,
391 ) -> Result<Self, Error> {
392 let topology = Topology::reckon(config, data_bytes);
393 let mut transcript = Transcript::new(NAMESPACE);
394 transcript.commit((topology.data_bytes as u64).encode());
395 transcript.commit(root.encode());
396 let expected_commitment = transcript.summarize();
397 if *commitment != expected_commitment {
398 return Err(Error::InvalidShard);
399 }
400 let mut transcript = Transcript::resume(expected_commitment);
401 let checking_matrix = checking_matrix(&transcript, &topology);
402 if checksum.rows() != topology.data_rows || checksum.cols() != topology.column_samples {
403 return Err(Error::InvalidShard);
404 }
405 transcript.commit(checksum.encode());
413 let encoded_checksum = checksum
414 .as_polynomials(topology.encoded_rows)
415 .expect("checksum has too many rows")
416 .evaluate()
417 .data();
418 let shuffled_indices = shuffle_indices(&transcript, topology.encoded_rows);
419
420 Ok(Self {
421 commitment: expected_commitment,
422 topology,
423 root,
424 checking_matrix,
425 encoded_checksum,
426 shuffled_indices,
427 })
428 }
429
430 fn check<H: Hasher<Digest = D>>(
431 &self,
432 commitment: &Summary,
433 index: u16,
434 weak_shard: &WeakShard<D>,
435 ) -> Result<CheckedShard, Error> {
436 if self.commitment != *commitment {
437 return Err(Error::InvalidShard);
438 }
439 self.topology.check_index(index)?;
440 if weak_shard.shard.rows() != self.topology.samples
441 || weak_shard.shard.cols() != self.topology.data_cols
442 {
443 return Err(Error::InvalidWeakShard);
444 }
445 let index = index as usize;
446 let these_shuffled_indices = &self.shuffled_indices
447 [index * self.topology.samples..(index + 1) * self.topology.samples];
448
449 let proof_elements: Vec<(H::Digest, u32)> = these_shuffled_indices
452 .iter()
453 .zip(weak_shard.shard.iter())
454 .map(|(&i, row)| (row_digest::<H>(row), i))
455 .collect();
456
457 let mut hasher = H::new();
459 if weak_shard
460 .inclusion_proof
461 .verify_multi_inclusion(&mut hasher, &proof_elements, &self.root)
462 .is_err()
463 {
464 return Err(Error::InvalidWeakShard);
465 }
466
467 let shard_checksum = weak_shard.shard.mul(&self.checking_matrix);
468 for (row, &i) in shard_checksum.iter().zip(these_shuffled_indices) {
470 if row != &self.encoded_checksum[i as usize] {
471 return Err(Error::InvalidWeakShard);
472 }
473 }
474 Ok(CheckedShard {
475 index,
476 shard: weak_shard.shard.clone(),
477 commitment: *commitment,
478 })
479 }
480}
481
482#[derive(Debug, Error)]
483pub enum Error {
484 #[error("invalid shard")]
485 InvalidShard,
486 #[error("invalid weak shard")]
487 InvalidWeakShard,
488 #[error("invalid index {0}")]
489 InvalidIndex(u16),
490 #[error("insufficient shards {0} < {1}")]
491 InsufficientShards(usize, usize),
492 #[error("insufficient unique rows {0} < {1}")]
493 InsufficientUniqueRows(usize, usize),
494 #[error("failed to create inclusion proof: {0}")]
495 FailedToCreateInclusionProof(BmtError),
496}
497
498const NAMESPACE: &[u8] = b"_COMMONWARE_CODING_ZODA";
499
500#[derive(Clone, Copy)]
501pub struct Zoda<H> {
502 _marker: PhantomData<H>,
503}
504
505impl<H> std::fmt::Debug for Zoda<H> {
506 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
507 write!(f, "Zoda")
508 }
509}
510
511impl<H: Hasher> PhasedScheme for Zoda<H> {
512 type Commitment = Summary;
513 type StrongShard = StrongShard<H::Digest>;
514 type WeakShard = WeakShard<H::Digest>;
515 type CheckingData = CheckingData<H::Digest>;
516 type CheckedShard = CheckedShard;
517 type Error = Error;
518
519 fn encode(
520 config: &Config,
521 data: impl bytes::Buf,
522 strategy: &impl Strategy,
523 ) -> Result<(Self::Commitment, Vec<Self::StrongShard>), Self::Error> {
524 let data_bytes = data.remaining();
526 let topology = Topology::reckon(config, data_bytes);
527 let data = Matrix::init(
528 topology.data_rows,
529 topology.data_cols,
530 F::stream_from_u64s(iter_u64_le(data)),
531 );
532
533 let encoded_data = data
535 .as_polynomials(topology.encoded_rows)
536 .expect("data has too many rows")
537 .evaluate()
538 .data();
539
540 let row_hashes: Vec<H::Digest> = strategy.map_collect_vec(0..encoded_data.rows(), |i| {
542 row_digest::<H>(&encoded_data[i])
543 });
544 let mut bmt_builder = BmtBuilder::<H>::new(row_hashes.len());
545 for hash in &row_hashes {
546 bmt_builder.add(hash);
547 }
548 let bmt = bmt_builder.build();
549 let root = bmt.root();
550
551 let mut transcript = Transcript::new(NAMESPACE);
553 transcript.commit((topology.data_bytes as u64).encode());
554 transcript.commit(root.encode());
555 let commitment = transcript.summarize();
556
557 let mut transcript = Transcript::resume(commitment);
559 let checking_matrix = checking_matrix(&transcript, &topology);
560 let checksum = Arc::new(data.mul(&checking_matrix));
561 transcript.commit(checksum.encode());
566 let shuffled_indices = shuffle_indices(&transcript, encoded_data.rows());
567
568 let shard_results: Vec<Result<StrongShard<H::Digest>, Error>> =
570 strategy.map_collect_vec(0..topology.total_shards, |shard_idx| {
571 let indices = &shuffled_indices
572 [shard_idx * topology.samples..(shard_idx + 1) * topology.samples];
573 let rows = Matrix::init(
574 indices.len(),
575 topology.data_cols,
576 indices
577 .iter()
578 .flat_map(|&i| encoded_data[i as usize].iter().copied()),
579 );
580 let inclusion_proof = bmt
581 .multi_proof(indices)
582 .map_err(Error::FailedToCreateInclusionProof)?;
583 Ok(StrongShard {
584 data_bytes,
585 root,
586 inclusion_proof,
587 rows,
588 checksum: checksum.clone(),
589 })
590 });
591 let shards = shard_results
592 .into_iter()
593 .collect::<Result<Vec<_>, Error>>()?;
594 Ok((commitment, shards))
595 }
596
597 fn weaken(
598 config: &Config,
599 commitment: &Self::Commitment,
600 index: u16,
601 shard: Self::StrongShard,
602 ) -> Result<(Self::CheckingData, Self::CheckedShard, Self::WeakShard), Self::Error> {
603 let weak_shard = WeakShard {
604 inclusion_proof: shard.inclusion_proof,
605 shard: shard.rows,
606 };
607 let checking_data = CheckingData::reckon(
608 config,
609 commitment,
610 shard.data_bytes,
611 shard.root,
612 shard.checksum.as_ref(),
613 )?;
614 let checked_shard = checking_data.check::<H>(commitment, index, &weak_shard)?;
615 Ok((checking_data, checked_shard, weak_shard))
616 }
617
618 fn check(
619 _config: &Config,
620 commitment: &Self::Commitment,
621 checking_data: &Self::CheckingData,
622 index: u16,
623 weak_shard: Self::WeakShard,
624 ) -> Result<Self::CheckedShard, Self::Error> {
625 checking_data.check::<H>(commitment, index, &weak_shard)
626 }
627
628 fn decode<'a>(
629 _config: &Config,
630 commitment: &Self::Commitment,
631 checking_data: Self::CheckingData,
632 shards: impl Iterator<Item = &'a Self::CheckedShard>,
633 _strategy: &impl Strategy,
634 ) -> Result<Vec<u8>, Self::Error> {
635 if checking_data.commitment != *commitment {
636 return Err(Error::InvalidShard);
637 }
638
639 let Topology {
640 encoded_rows,
641 data_cols,
642 samples,
643 data_rows,
644 data_bytes,
645 min_shards,
646 ..
647 } = checking_data.topology;
648 let mut evaluation = EvaluationVector::<F>::empty(encoded_rows.ilog2() as usize, data_cols);
649 let mut shard_count = 0usize;
650 for shard in shards {
651 shard_count += 1;
652 if shard.commitment != *commitment {
653 return Err(Error::InvalidShard);
654 }
655 let indices =
656 &checking_data.shuffled_indices[shard.index * samples..(shard.index + 1) * samples];
657 for (&i, row) in indices.iter().zip(shard.shard.iter()) {
658 evaluation.fill_row(u64::from(i) as usize, row);
659 }
660 }
661 if shard_count < min_shards {
662 return Err(Error::InsufficientShards(shard_count, min_shards));
663 }
664 let filled_rows = evaluation.filled_rows();
667 if filled_rows < data_rows {
668 return Err(Error::InsufficientUniqueRows(filled_rows, data_rows));
669 }
670 Ok(collect_u64_le(
671 data_bytes,
672 F::stream_to_u64s(
673 evaluation
674 .recover()
675 .coefficients_up_to(data_rows)
676 .flatten()
677 .copied(),
678 ),
679 ))
680 }
681}
682
683impl<H: Hasher> ValidatingScheme for Zoda<H> {}
684
685#[cfg(test)]
686mod tests {
687 use super::*;
688 use crate::{Config, PhasedScheme};
689 use commonware_cryptography::Sha256;
690 use commonware_math::{
691 algebra::{FieldNTT as _, Ring as _},
692 ntt::PolynomialVector,
693 };
694 use commonware_parallel::Sequential;
695 use commonware_utils::NZU16;
696
697 const STRATEGY: Sequential = Sequential;
698
699 #[test]
700 fn decode_rejects_duplicate_indices() {
701 let config = Config {
702 minimum_shards: NZU16!(2),
703 extra_shards: NZU16!(1),
704 };
705 let data = b"duplicate shard coverage";
706 let (commitment, shards) = Zoda::<Sha256>::encode(&config, &data[..], &STRATEGY).unwrap();
707 let shard0 = shards[0].clone();
708 let (checking_data, checked_shard0, _weak_shard0) =
709 Zoda::<Sha256>::weaken(&config, &commitment, 0, shard0).unwrap();
710 let duplicate = CheckedShard {
711 index: checked_shard0.index,
712 shard: checked_shard0.shard.clone(),
713 commitment: checked_shard0.commitment,
714 };
715 let shards = [checked_shard0, duplicate];
716 let result = Zoda::<Sha256>::decode(
717 &config,
718 &commitment,
719 checking_data,
720 shards.iter(),
721 &STRATEGY,
722 );
723 match result {
724 Err(Error::InsufficientUniqueRows(actual, expected)) => {
725 assert!(actual < expected);
726 }
727 other => panic!("expected insufficient unique rows error, got {other:?}"),
728 }
729 }
730
731 #[test]
732 fn checksum_malleability() {
733 fn vanishing(lg_domain: u8, vanish_indices: &[u32]) -> PolynomialVector<F> {
737 let w = F::root_of_unity(lg_domain).expect("domain too large for Goldilocks");
738 let mut domain = Vec::with_capacity(1usize << lg_domain);
739 let mut x = F::one();
740 for _ in 0..(1usize << lg_domain) {
741 domain.push(x);
742 x *= &w;
743 }
744 let roots: Vec<F> = vanish_indices.iter().map(|&i| domain[i as usize]).collect();
745 let mut out = EvaluationVector::empty(lg_domain as usize, 1);
746 domain.into_iter().enumerate().for_each(|(i, x)| {
747 let mut acc = F::one();
748 for root in &roots {
749 acc *= &(x - root);
750 }
751 out.fill_row(i, &[acc]);
752 });
753 out.recover()
754 }
755
756 let config = Config {
757 minimum_shards: NZU16!(2),
758 extra_shards: NZU16!(1),
759 };
760 let data = vec![0x5Au8; 256 * 1024];
761 let (commitment, mut shards) =
762 Zoda::<Sha256>::encode(&config, &data[..], &STRATEGY).unwrap();
763
764 let leader_i = 0usize;
765 let a_i = 1usize;
766 let b_i = 2usize;
767
768 {
770 let (checking_data, _, _) = Zoda::<Sha256>::weaken(
771 &config,
772 &commitment,
773 leader_i as u16,
774 shards[leader_i].clone(),
775 )
776 .unwrap();
777
778 let samples = checking_data.topology.samples;
779 let a_indices =
780 checking_data.shuffled_indices[a_i * samples..(a_i + 1) * samples].to_vec();
781 let lg_rows = checking_data.topology.encoded_rows.ilog2() as usize;
782 let shift = vanishing(lg_rows as u8, &a_indices);
783 let mut checksum = (*shards[1].checksum).clone();
784 for (i, shift_i) in shift.coefficients_up_to(checksum.rows()).enumerate() {
785 for j in 0..checksum.cols() {
786 checksum[(i, j)] += &shift_i[0];
787 }
788 }
789 shards[1].checksum = Arc::new(checksum);
790 shards[2].checksum = shards[1].checksum.clone();
791 }
792
793 assert!(matches!(
794 Zoda::<Sha256>::weaken(&config, &commitment, b_i as u16, shards[b_i].clone()),
795 Err(Error::InvalidWeakShard)
796 ));
797
798 assert!(matches!(
801 Zoda::<Sha256>::weaken(&config, &commitment, a_i as u16, shards[a_i].clone()),
802 Err(Error::InvalidWeakShard)
803 ));
804 }
805
806 #[cfg(feature = "arbitrary")]
807 mod conformance {
808 use super::*;
809 use commonware_codec::conformance::CodecConformance;
810 use commonware_cryptography::sha256::Digest as Sha256Digest;
811
812 commonware_conformance::conformance_tests! {
813 CodecConformance<StrongShard<Sha256Digest>>,
814 CodecConformance<WeakShard<Sha256Digest>>,
815 }
816 }
817}