1use crate::{Config, PhasedScheme, ValidatingScheme};
118use bytes::BufMut;
119use commonware_codec::{Encode, EncodeSize, FixedSize, RangeCfg, Read, ReadExt, Write};
120use commonware_cryptography::{
121 transcript::{Summary, Transcript},
122 Digest, Hasher,
123};
124use commonware_math::{
125 fields::goldilocks::F,
126 ntt::{EvaluationVector, Matrix},
127};
128use commonware_parallel::Strategy;
129use commonware_storage::bmt::{Builder as BmtBuilder, Error as BmtError, Proof};
130use rand::seq::SliceRandom as _;
131use std::{marker::PhantomData, sync::Arc};
132use thiserror::Error;
133
134fn iter_u64_le(data: impl bytes::Buf) -> impl Iterator<Item = u64> {
136 struct Iter<B> {
137 remaining_u64s: usize,
138 tail: usize,
139 inner: B,
140 }
141
142 impl<B: bytes::Buf> Iter<B> {
143 fn new(inner: B) -> Self {
144 let remaining_u64s = inner.remaining() / 8;
145 let tail = inner.remaining() % 8;
146 Self {
147 remaining_u64s,
148 tail,
149 inner,
150 }
151 }
152 }
153
154 impl<B: bytes::Buf> Iterator for Iter<B> {
155 type Item = u64;
156
157 fn next(&mut self) -> Option<Self::Item> {
158 if self.remaining_u64s > 0 {
159 self.remaining_u64s -= 1;
160 return Some(self.inner.get_u64_le());
161 }
162 if self.tail > 0 {
163 let mut chunk = [0u8; 8];
164 self.inner.copy_to_slice(&mut chunk[..self.tail]);
165 self.tail = 0;
166 return Some(u64::from_le_bytes(chunk));
167 }
168 None
169 }
170 }
171 Iter::new(data)
172}
173
174fn collect_u64_le(max_length: usize, data: impl Iterator<Item = u64>) -> Vec<u8> {
175 let mut out = Vec::with_capacity(max_length);
176 for d in data {
177 out.extend_from_slice(&d.to_le_bytes());
178 }
179 out.truncate(max_length);
180 out
181}
182
183fn row_digest<H: Hasher>(row: &[F]) -> H::Digest {
184 let mut h = H::new();
185 for x in row {
186 h.update(&x.to_le_bytes());
187 }
188 h.finalize()
189}
190
191mod topology;
192use topology::Topology;
193
194#[derive(Clone, Debug)]
196pub struct StrongShard<D: Digest> {
197 data_bytes: usize,
198 root: D,
199 inclusion_proof: Proof<D>,
200 rows: Matrix<F>,
201 checksum: Arc<Matrix<F>>,
202}
203
204impl<D: Digest> PartialEq for StrongShard<D> {
205 fn eq(&self, other: &Self) -> bool {
206 self.data_bytes == other.data_bytes
207 && self.root == other.root
208 && self.inclusion_proof == other.inclusion_proof
209 && self.rows == other.rows
210 && self.checksum == other.checksum
211 }
212}
213
214impl<D: Digest> Eq for StrongShard<D> {}
215
216impl<D: Digest> EncodeSize for StrongShard<D> {
217 fn encode_size(&self) -> usize {
218 self.data_bytes.encode_size()
219 + self.root.encode_size()
220 + self.inclusion_proof.encode_size()
221 + self.rows.encode_size()
222 + self.checksum.encode_size()
223 }
224}
225
226impl<D: Digest> Write for StrongShard<D> {
227 fn write(&self, buf: &mut impl BufMut) {
228 self.data_bytes.write(buf);
229 self.root.write(buf);
230 self.inclusion_proof.write(buf);
231 self.rows.write(buf);
232 self.checksum.write(buf);
233 }
234}
235
236impl<D: Digest> Read for StrongShard<D> {
237 type Cfg = crate::CodecConfig;
238
239 fn read_cfg(
240 buf: &mut impl bytes::Buf,
241 cfg: &Self::Cfg,
242 ) -> Result<Self, commonware_codec::Error> {
243 let data_bytes = usize::read_cfg(buf, &RangeCfg::from(..=cfg.maximum_shard_size))?;
244 let max_els = cfg.maximum_shard_size / F::SIZE;
245 Ok(Self {
246 data_bytes,
247 root: ReadExt::read(buf)?,
248 inclusion_proof: Read::read_cfg(buf, &max_els)?,
249 rows: Read::read_cfg(buf, &(max_els, ()))?,
250 checksum: Arc::new(Read::read_cfg(buf, &(max_els, ()))?),
251 })
252 }
253}
254
255#[cfg(feature = "arbitrary")]
256impl<D: Digest> arbitrary::Arbitrary<'_> for StrongShard<D>
257where
258 D: for<'a> arbitrary::Arbitrary<'a>,
259{
260 fn arbitrary(u: &mut arbitrary::Unstructured<'_>) -> arbitrary::Result<Self> {
261 Ok(Self {
262 data_bytes: u.arbitrary::<u32>()? as usize,
263 root: u.arbitrary()?,
264 inclusion_proof: u.arbitrary()?,
265 rows: u.arbitrary()?,
266 checksum: Arc::new(u.arbitrary()?),
267 })
268 }
269}
270
271#[derive(Clone, Debug)]
272pub struct WeakShard<D: Digest> {
273 inclusion_proof: Proof<D>,
274 shard: Matrix<F>,
275}
276
277impl<D: Digest> PartialEq for WeakShard<D> {
278 fn eq(&self, other: &Self) -> bool {
279 self.inclusion_proof == other.inclusion_proof && self.shard == other.shard
280 }
281}
282
283impl<D: Digest> Eq for WeakShard<D> {}
284
285impl<D: Digest> EncodeSize for WeakShard<D> {
286 fn encode_size(&self) -> usize {
287 self.inclusion_proof.encode_size() + self.shard.encode_size()
288 }
289}
290
291impl<D: Digest> Write for WeakShard<D> {
292 fn write(&self, buf: &mut impl BufMut) {
293 self.inclusion_proof.write(buf);
294 self.shard.write(buf);
295 }
296}
297
298impl<D: Digest> Read for WeakShard<D> {
299 type Cfg = crate::CodecConfig;
300
301 fn read_cfg(
302 buf: &mut impl bytes::Buf,
303 cfg: &Self::Cfg,
304 ) -> Result<Self, commonware_codec::Error> {
305 let max_data_bits = cfg.maximum_shard_size.saturating_mul(8);
306 let max_data_els = F::bits_to_elements(max_data_bits).max(1);
307 Ok(Self {
308 inclusion_proof: Read::read_cfg(buf, &max_data_els)?,
310 shard: Read::read_cfg(buf, &(max_data_els, ()))?,
311 })
312 }
313}
314
315#[cfg(feature = "arbitrary")]
316impl<D: Digest> arbitrary::Arbitrary<'_> for WeakShard<D>
317where
318 D: for<'a> arbitrary::Arbitrary<'a>,
319{
320 fn arbitrary(u: &mut arbitrary::Unstructured<'_>) -> arbitrary::Result<Self> {
321 Ok(Self {
322 inclusion_proof: u.arbitrary()?,
323 shard: u.arbitrary()?,
324 })
325 }
326}
327
328#[derive(Clone)]
330pub struct CheckedShard {
331 index: usize,
332 shard: Matrix<F>,
333 commitment: Summary,
334}
335
336fn shuffle_indices(transcript: &Transcript, total: usize) -> Vec<u32> {
344 let total: u32 = total
345 .try_into()
346 .expect("encoded_rows exceeds u32::MAX; data too large for ZODA");
347 let mut out = (0..total).collect::<Vec<_>>();
348 out.shuffle(&mut transcript.noise(b"shuffle"));
349 out
350}
351
352fn checking_matrix(transcript: &Transcript, topology: &Topology) -> Matrix<F> {
356 Matrix::rand(
357 &mut transcript.noise(b"checking matrix"),
358 topology.data_cols,
359 topology.column_samples,
360 )
361}
362
363#[derive(Clone, PartialEq)]
365pub struct CheckingData<D: Digest> {
366 commitment: Summary,
367 topology: Topology,
368 root: D,
369 checking_matrix: Matrix<F>,
370 encoded_checksum: Matrix<F>,
371 shuffled_indices: Vec<u32>,
372}
373
374impl<D: Digest> Eq for CheckingData<D> {}
375
376impl<D: Digest> CheckingData<D> {
377 fn reckon(
386 namespace: &[u8],
387 config: &Config,
388 commitment: &Summary,
389 data_bytes: usize,
390 root: D,
391 checksum: &Matrix<F>,
392 ) -> Result<Self, Error> {
393 let topology = Topology::reckon(config, data_bytes);
394 let mut transcript = Transcript::new(NAMESPACE);
395 transcript.commit(namespace);
396 transcript.commit((topology.data_bytes as u64).encode());
397 transcript.commit(root.encode());
398 let expected_commitment = transcript.summarize();
399 if *commitment != expected_commitment {
400 return Err(Error::InvalidShard);
401 }
402 let mut transcript = Transcript::resume(expected_commitment);
403 let checking_matrix = checking_matrix(&transcript, &topology);
404 if checksum.rows() != topology.data_rows || checksum.cols() != topology.column_samples {
405 return Err(Error::InvalidShard);
406 }
407 transcript.commit(checksum.encode());
415 let encoded_checksum = checksum
416 .as_polynomials(topology.encoded_rows)
417 .expect("checksum has too many rows")
418 .evaluate()
419 .data();
420 let shuffled_indices = shuffle_indices(&transcript, topology.encoded_rows);
421
422 Ok(Self {
423 commitment: expected_commitment,
424 topology,
425 root,
426 checking_matrix,
427 encoded_checksum,
428 shuffled_indices,
429 })
430 }
431
432 fn check<H: Hasher<Digest = D>>(
433 &self,
434 commitment: &Summary,
435 index: u16,
436 weak_shard: &WeakShard<D>,
437 ) -> Result<CheckedShard, Error> {
438 if self.commitment != *commitment {
439 return Err(Error::InvalidShard);
440 }
441 self.topology.check_index(index)?;
442 if weak_shard.shard.rows() != self.topology.samples
443 || weak_shard.shard.cols() != self.topology.data_cols
444 {
445 return Err(Error::InvalidWeakShard);
446 }
447 let index = index as usize;
448 let these_shuffled_indices = &self.shuffled_indices
449 [index * self.topology.samples..(index + 1) * self.topology.samples];
450
451 let proof_elements: Vec<(H::Digest, u32)> = these_shuffled_indices
454 .iter()
455 .zip(weak_shard.shard.iter())
456 .map(|(&i, row)| (row_digest::<H>(row), i))
457 .collect();
458
459 let mut hasher = H::new();
461 if weak_shard
462 .inclusion_proof
463 .verify_multi_inclusion(&mut hasher, &proof_elements, &self.root)
464 .is_err()
465 {
466 return Err(Error::InvalidWeakShard);
467 }
468
469 let shard_checksum = weak_shard.shard.mul(&self.checking_matrix);
470 for (row, &i) in shard_checksum.iter().zip(these_shuffled_indices) {
472 if row != &self.encoded_checksum[i as usize] {
473 return Err(Error::InvalidWeakShard);
474 }
475 }
476 Ok(CheckedShard {
477 index,
478 shard: weak_shard.shard.clone(),
479 commitment: *commitment,
480 })
481 }
482}
483
484#[derive(Debug, Error)]
485pub enum Error {
486 #[error("invalid shard")]
487 InvalidShard,
488 #[error("invalid weak shard")]
489 InvalidWeakShard,
490 #[error("invalid index {0}")]
491 InvalidIndex(u16),
492 #[error("insufficient shards {0} < {1}")]
493 InsufficientShards(usize, usize),
494 #[error("insufficient unique rows {0} < {1}")]
495 InsufficientUniqueRows(usize, usize),
496 #[error("failed to create inclusion proof: {0}")]
497 FailedToCreateInclusionProof(BmtError),
498}
499
500const NAMESPACE: &[u8] = b"_COMMONWARE_CODING_ZODA";
501
502#[derive(Clone, Copy)]
503pub struct Zoda<H> {
504 _marker: PhantomData<H>,
505}
506
507impl<H> std::fmt::Debug for Zoda<H> {
508 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
509 write!(f, "Zoda")
510 }
511}
512
513impl<H: Hasher> PhasedScheme for Zoda<H> {
514 type Commitment = Summary;
515 type StrongShard = StrongShard<H::Digest>;
516 type WeakShard = WeakShard<H::Digest>;
517 type CheckingData = CheckingData<H::Digest>;
518 type CheckedShard = CheckedShard;
519 type Error = Error;
520
521 fn encode(
522 namespace: &[u8],
523 config: &Config,
524 data: impl bytes::Buf,
525 strategy: &impl Strategy,
526 ) -> Result<(Self::Commitment, Vec<Self::StrongShard>), Self::Error> {
527 let data_bytes = data.remaining();
529 let topology = Topology::reckon(config, data_bytes);
530 let data = Matrix::init(
531 topology.data_rows,
532 topology.data_cols,
533 F::stream_from_u64s(iter_u64_le(data)),
534 );
535
536 let encoded_data = data
538 .as_polynomials(topology.encoded_rows)
539 .expect("data has too many rows")
540 .evaluate()
541 .data();
542
543 let row_hashes: Vec<H::Digest> = strategy.map_collect_vec(0..encoded_data.rows(), |i| {
545 row_digest::<H>(&encoded_data[i])
546 });
547 let mut bmt_builder = BmtBuilder::<H>::new(row_hashes.len());
548 for hash in &row_hashes {
549 bmt_builder.add(hash);
550 }
551 let bmt = bmt_builder.build();
552 let root = bmt.root();
553
554 let mut transcript = Transcript::new(NAMESPACE);
556 transcript.commit(namespace);
557 transcript.commit((topology.data_bytes as u64).encode());
558 transcript.commit(root.encode());
559 let commitment = transcript.summarize();
560
561 let mut transcript = Transcript::resume(commitment);
563 let checking_matrix = checking_matrix(&transcript, &topology);
564 let checksum = Arc::new(data.mul(&checking_matrix));
565 transcript.commit(checksum.encode());
570 let shuffled_indices = shuffle_indices(&transcript, encoded_data.rows());
571
572 let shard_results: Vec<Result<StrongShard<H::Digest>, Error>> =
574 strategy.map_collect_vec(0..topology.total_shards, |shard_idx| {
575 let indices = &shuffled_indices
576 [shard_idx * topology.samples..(shard_idx + 1) * topology.samples];
577 let rows = Matrix::init(
578 indices.len(),
579 topology.data_cols,
580 indices
581 .iter()
582 .flat_map(|&i| encoded_data[i as usize].iter().copied()),
583 );
584 let inclusion_proof = bmt
585 .multi_proof(indices)
586 .map_err(Error::FailedToCreateInclusionProof)?;
587 Ok(StrongShard {
588 data_bytes,
589 root,
590 inclusion_proof,
591 rows,
592 checksum: checksum.clone(),
593 })
594 });
595 let shards = shard_results
596 .into_iter()
597 .collect::<Result<Vec<_>, Error>>()?;
598 Ok((commitment, shards))
599 }
600
601 fn weaken(
602 namespace: &[u8],
603 config: &Config,
604 commitment: &Self::Commitment,
605 index: u16,
606 shard: Self::StrongShard,
607 ) -> Result<(Self::CheckingData, Self::CheckedShard, Self::WeakShard), Self::Error> {
608 let weak_shard = WeakShard {
609 inclusion_proof: shard.inclusion_proof,
610 shard: shard.rows,
611 };
612 let checking_data = CheckingData::reckon(
613 namespace,
614 config,
615 commitment,
616 shard.data_bytes,
617 shard.root,
618 shard.checksum.as_ref(),
619 )?;
620 let checked_shard = checking_data.check::<H>(commitment, index, &weak_shard)?;
621 Ok((checking_data, checked_shard, weak_shard))
622 }
623
624 fn check(
625 _config: &Config,
626 commitment: &Self::Commitment,
627 checking_data: &Self::CheckingData,
628 index: u16,
629 weak_shard: Self::WeakShard,
630 ) -> Result<Self::CheckedShard, Self::Error> {
631 checking_data.check::<H>(commitment, index, &weak_shard)
632 }
633
634 fn decode<'a>(
635 _config: &Config,
636 commitment: &Self::Commitment,
637 checking_data: Self::CheckingData,
638 shards: impl Iterator<Item = &'a Self::CheckedShard>,
639 _strategy: &impl Strategy,
640 ) -> Result<Vec<u8>, Self::Error> {
641 if checking_data.commitment != *commitment {
642 return Err(Error::InvalidShard);
643 }
644
645 let Topology {
646 encoded_rows,
647 data_cols,
648 samples,
649 data_rows,
650 data_bytes,
651 min_shards,
652 ..
653 } = checking_data.topology;
654 let mut evaluation = EvaluationVector::<F>::empty(encoded_rows.ilog2() as usize, data_cols);
655 let mut shard_count = 0usize;
656 for shard in shards {
657 shard_count += 1;
658 if shard.commitment != *commitment {
659 return Err(Error::InvalidShard);
660 }
661 let indices =
662 &checking_data.shuffled_indices[shard.index * samples..(shard.index + 1) * samples];
663 for (&i, row) in indices.iter().zip(shard.shard.iter()) {
664 evaluation.fill_row(u64::from(i) as usize, row);
665 }
666 }
667 if shard_count < min_shards {
668 return Err(Error::InsufficientShards(shard_count, min_shards));
669 }
670 let filled_rows = evaluation.filled_rows();
673 if filled_rows < data_rows {
674 return Err(Error::InsufficientUniqueRows(filled_rows, data_rows));
675 }
676 Ok(collect_u64_le(
677 data_bytes,
678 F::stream_to_u64s(
679 evaluation
680 .recover()
681 .coefficients_up_to(data_rows)
682 .flatten()
683 .copied(),
684 ),
685 ))
686 }
687}
688
689impl<H: Hasher> ValidatingScheme for Zoda<H> {}
690
691#[cfg(test)]
692mod tests {
693 use super::*;
694 use crate::{Config, PhasedScheme};
695 use commonware_cryptography::Sha256;
696 use commonware_math::{
697 algebra::{FieldNTT as _, Ring as _},
698 ntt::PolynomialVector,
699 };
700 use commonware_parallel::Sequential;
701 use commonware_utils::NZU16;
702
703 const STRATEGY: Sequential = Sequential;
704
705 #[test]
706 fn decode_rejects_duplicate_indices() {
707 let config = Config {
708 minimum_shards: NZU16!(2),
709 extra_shards: NZU16!(1),
710 };
711 let data = b"duplicate shard coverage";
712 let (commitment, shards) =
713 Zoda::<Sha256>::encode(b"", &config, &data[..], &STRATEGY).unwrap();
714 let shard0 = shards[0].clone();
715 let (checking_data, checked_shard0, _weak_shard0) =
716 Zoda::<Sha256>::weaken(b"", &config, &commitment, 0, shard0).unwrap();
717 let duplicate = CheckedShard {
718 index: checked_shard0.index,
719 shard: checked_shard0.shard.clone(),
720 commitment: checked_shard0.commitment,
721 };
722 let shards = [checked_shard0, duplicate];
723 let result = Zoda::<Sha256>::decode(
724 &config,
725 &commitment,
726 checking_data,
727 shards.iter(),
728 &STRATEGY,
729 );
730 match result {
731 Err(Error::InsufficientUniqueRows(actual, expected)) => {
732 assert!(actual < expected);
733 }
734 other => panic!("expected insufficient unique rows error, got {other:?}"),
735 }
736 }
737
738 #[test]
739 fn checksum_malleability() {
740 fn vanishing(lg_domain: u8, vanish_indices: &[u32]) -> PolynomialVector<F> {
744 let w = F::root_of_unity(lg_domain).expect("domain too large for Goldilocks");
745 let mut domain = Vec::with_capacity(1usize << lg_domain);
746 let mut x = F::one();
747 for _ in 0..(1usize << lg_domain) {
748 domain.push(x);
749 x *= &w;
750 }
751 let roots: Vec<F> = vanish_indices.iter().map(|&i| domain[i as usize]).collect();
752 let mut out = EvaluationVector::empty(lg_domain as usize, 1);
753 domain.into_iter().enumerate().for_each(|(i, x)| {
754 let mut acc = F::one();
755 for root in &roots {
756 acc *= &(x - root);
757 }
758 out.fill_row(i, &[acc]);
759 });
760 out.recover()
761 }
762
763 let config = Config {
764 minimum_shards: NZU16!(2),
765 extra_shards: NZU16!(1),
766 };
767 let data = vec![0x5Au8; 256 * 1024];
768 let (commitment, mut shards) =
769 Zoda::<Sha256>::encode(b"", &config, &data[..], &STRATEGY).unwrap();
770
771 let leader_i = 0usize;
772 let a_i = 1usize;
773 let b_i = 2usize;
774
775 {
777 let (checking_data, _, _) = Zoda::<Sha256>::weaken(
778 b"",
779 &config,
780 &commitment,
781 leader_i as u16,
782 shards[leader_i].clone(),
783 )
784 .unwrap();
785
786 let samples = checking_data.topology.samples;
787 let a_indices =
788 checking_data.shuffled_indices[a_i * samples..(a_i + 1) * samples].to_vec();
789 let lg_rows = checking_data.topology.encoded_rows.ilog2() as usize;
790 let shift = vanishing(lg_rows as u8, &a_indices);
791 let mut checksum = (*shards[1].checksum).clone();
792 for (i, shift_i) in shift.coefficients_up_to(checksum.rows()).enumerate() {
793 for j in 0..checksum.cols() {
794 checksum[(i, j)] += &shift_i[0];
795 }
796 }
797 shards[1].checksum = Arc::new(checksum);
798 shards[2].checksum = shards[1].checksum.clone();
799 }
800
801 assert!(matches!(
802 Zoda::<Sha256>::weaken(b"", &config, &commitment, b_i as u16, shards[b_i].clone()),
803 Err(Error::InvalidWeakShard)
804 ));
805
806 assert!(matches!(
809 Zoda::<Sha256>::weaken(b"", &config, &commitment, a_i as u16, shards[a_i].clone()),
810 Err(Error::InvalidWeakShard)
811 ));
812 }
813
814 #[cfg(feature = "arbitrary")]
815 mod conformance {
816 use super::*;
817 use commonware_codec::conformance::CodecConformance;
818 use commonware_cryptography::sha256::Digest as Sha256Digest;
819
820 commonware_conformance::conformance_tests! {
821 CodecConformance<StrongShard<Sha256Digest>>,
822 CodecConformance<WeakShard<Sha256Digest>>,
823 }
824 }
825}