1use crate::{Config, Scheme, ValidatingScheme};
118use bytes::BufMut;
119use commonware_codec::{Encode, EncodeSize, FixedSize, RangeCfg, Read, ReadExt, Write};
120use commonware_cryptography::{
121 transcript::{Summary, Transcript},
122 Hasher,
123};
124use commonware_math::{
125 fields::goldilocks::F,
126 ntt::{EvaluationVector, Matrix},
127};
128use commonware_parallel::Strategy;
129use commonware_storage::bmt::{Builder as BmtBuilder, Error as BmtError, Proof};
130use rand::seq::SliceRandom as _;
131use std::{marker::PhantomData, sync::Arc};
132use thiserror::Error;
133
134fn iter_u64_le(data: impl bytes::Buf) -> impl Iterator<Item = u64> {
136 struct Iter<B> {
137 remaining_u64s: usize,
138 tail: usize,
139 inner: B,
140 }
141
142 impl<B: bytes::Buf> Iter<B> {
143 fn new(inner: B) -> Self {
144 let remaining_u64s = inner.remaining() / 8;
145 let tail = inner.remaining() % 8;
146 Self {
147 remaining_u64s,
148 tail,
149 inner,
150 }
151 }
152 }
153
154 impl<B: bytes::Buf> Iterator for Iter<B> {
155 type Item = u64;
156
157 fn next(&mut self) -> Option<Self::Item> {
158 if self.remaining_u64s > 0 {
159 self.remaining_u64s -= 1;
160 return Some(self.inner.get_u64_le());
161 }
162 if self.tail > 0 {
163 let mut chunk = [0u8; 8];
164 self.inner.copy_to_slice(&mut chunk[..self.tail]);
165 self.tail = 0;
166 return Some(u64::from_le_bytes(chunk));
167 }
168 None
169 }
170 }
171 Iter::new(data)
172}
173
174fn collect_u64_le(max_length: usize, data: impl Iterator<Item = u64>) -> Vec<u8> {
175 let mut out = Vec::with_capacity(max_length);
176 for d in data {
177 out.extend_from_slice(&d.to_le_bytes());
178 }
179 out.truncate(max_length);
180 out
181}
182
183fn row_digest<H: Hasher>(row: &[F]) -> H::Digest {
184 let mut h = H::new();
185 for x in row {
186 h.update(&x.to_le_bytes());
187 }
188 h.finalize()
189}
190
191mod topology {
192 use super::Error;
193 use crate::Config;
194 use commonware_math::fields::goldilocks::F;
195 use commonware_utils::BigRationalExt as _;
196 use num_rational::BigRational;
197
198 const SECURITY_BITS: usize = 126;
199 const LOG2_PRECISION: usize = SECURITY_BITS.next_power_of_two().trailing_zeros() as usize;
203
204 #[derive(Debug, Clone, Copy, PartialEq)]
206 pub struct Topology {
207 pub data_bytes: usize,
209 pub data_cols: usize,
211 pub data_rows: usize,
213 pub encoded_rows: usize,
215 pub samples: usize,
217 pub column_samples: usize,
219 pub min_shards: usize,
221 pub total_shards: usize,
223 }
224
225 impl Topology {
226 const fn with_cols(data_bytes: usize, n: usize, k: usize, cols: usize) -> Self {
227 let data_els = F::bits_to_elements(8 * data_bytes);
228 let data_rows = data_els.div_ceil(cols);
229 let samples = data_rows.div_ceil(n);
230 Self {
231 data_bytes,
232 data_cols: cols,
233 data_rows,
234 encoded_rows: ((n + k) * samples).next_power_of_two(),
235 samples,
236 column_samples: 0,
237 min_shards: n,
238 total_shards: n + k,
239 }
240 }
241
242 pub(crate) fn required_samples(&self) -> usize {
243 let k = BigRational::from_usize(self.encoded_rows - self.data_rows);
244 let m = BigRational::from_usize(self.encoded_rows);
245 let fraction = (&k + BigRational::from_u64(1)) / (BigRational::from_usize(2) * &m);
246
247 let one_minus = BigRational::from_usize(1) - &fraction;
250 let log_term = one_minus.log2_ceil(LOG2_PRECISION);
251 if log_term >= BigRational::from_u64(0) {
252 return usize::MAX;
253 }
254
255 let required = BigRational::from_usize(SECURITY_BITS) / -log_term;
256 required.ceil_to_u128().unwrap_or(u128::MAX) as usize
257 }
258
259 fn correct_column_samples(&mut self) {
260 self.column_samples =
267 F::bits_to_elements(SECURITY_BITS) * self.required_samples().div_ceil(self.samples);
268 }
269
270 pub fn reckon(config: &Config, data_bytes: usize) -> Self {
272 let n = config.minimum_shards as usize;
273 let k = config.extra_shards as usize;
274 let corrected_data_bytes = data_bytes.max(1);
278 let mut out = Self::with_cols(corrected_data_bytes, n, k, 1);
297 loop {
298 let attempt = Self::with_cols(corrected_data_bytes, n, k, out.data_cols + 1);
299 let required_samples = attempt.required_samples();
300 if required_samples.saturating_mul(n + k) <= attempt.encoded_rows {
301 out = Self {
302 samples: required_samples.max(attempt.samples),
303 ..attempt
304 };
305 } else {
306 break;
307 }
308 }
309 out.correct_column_samples();
310 out.data_bytes = data_bytes;
311 out
312 }
313
314 pub fn check_index(&self, i: u16) -> Result<(), Error> {
315 if (0..self.total_shards).contains(&(i as usize)) {
316 return Ok(());
317 }
318 Err(Error::InvalidIndex(i))
319 }
320 }
321}
322use topology::Topology;
323
324#[derive(Clone)]
326pub struct Shard<H: Hasher> {
327 data_bytes: usize,
328 root: H::Digest,
329 inclusion_proof: Proof<H::Digest>,
330 rows: Matrix,
331 checksum: Arc<Matrix>,
332}
333
334impl<H: Hasher> PartialEq for Shard<H> {
335 fn eq(&self, other: &Self) -> bool {
336 self.data_bytes == other.data_bytes
337 && self.root == other.root
338 && self.inclusion_proof == other.inclusion_proof
339 && self.rows == other.rows
340 && self.checksum == other.checksum
341 }
342}
343
344impl<H: Hasher> Eq for Shard<H> {}
345
346impl<H: Hasher> EncodeSize for Shard<H> {
347 fn encode_size(&self) -> usize {
348 self.data_bytes.encode_size()
349 + self.root.encode_size()
350 + self.inclusion_proof.encode_size()
351 + self.rows.encode_size()
352 + self.checksum.encode_size()
353 }
354}
355
356impl<H: Hasher> Write for Shard<H> {
357 fn write(&self, buf: &mut impl BufMut) {
358 self.data_bytes.write(buf);
359 self.root.write(buf);
360 self.inclusion_proof.write(buf);
361 self.rows.write(buf);
362 self.checksum.write(buf);
363 }
364}
365
366impl<H: Hasher> Read for Shard<H> {
367 type Cfg = crate::CodecConfig;
368
369 fn read_cfg(
370 buf: &mut impl bytes::Buf,
371 cfg: &Self::Cfg,
372 ) -> Result<Self, commonware_codec::Error> {
373 let data_bytes = usize::read_cfg(buf, &RangeCfg::from(..=cfg.maximum_shard_size))?;
374 let max_els = cfg.maximum_shard_size / F::SIZE;
375 Ok(Self {
376 data_bytes,
377 root: ReadExt::read(buf)?,
378 inclusion_proof: Read::read_cfg(buf, &max_els)?,
379 rows: Read::read_cfg(buf, &max_els)?,
380 checksum: Arc::new(Read::read_cfg(buf, &max_els)?),
381 })
382 }
383}
384
385#[cfg(feature = "arbitrary")]
386impl<H: Hasher> arbitrary::Arbitrary<'_> for Shard<H>
387where
388 H::Digest: for<'a> arbitrary::Arbitrary<'a>,
389{
390 fn arbitrary(u: &mut arbitrary::Unstructured<'_>) -> arbitrary::Result<Self> {
391 Ok(Self {
392 data_bytes: u.arbitrary::<u32>()? as usize,
393 root: u.arbitrary()?,
394 inclusion_proof: u.arbitrary()?,
395 rows: u.arbitrary()?,
396 checksum: Arc::new(u.arbitrary()?),
397 })
398 }
399}
400
401#[derive(Clone, Debug)]
402pub struct ReShard<H: Hasher> {
403 inclusion_proof: Proof<H::Digest>,
404 shard: Matrix,
405}
406
407impl<H: Hasher> PartialEq for ReShard<H> {
408 fn eq(&self, other: &Self) -> bool {
409 self.inclusion_proof == other.inclusion_proof && self.shard == other.shard
410 }
411}
412
413impl<H: Hasher> Eq for ReShard<H> {}
414
415impl<H: Hasher> EncodeSize for ReShard<H> {
416 fn encode_size(&self) -> usize {
417 self.inclusion_proof.encode_size() + self.shard.encode_size()
418 }
419}
420
421impl<H: Hasher> Write for ReShard<H> {
422 fn write(&self, buf: &mut impl BufMut) {
423 self.inclusion_proof.write(buf);
424 self.shard.write(buf);
425 }
426}
427
428impl<H: Hasher> Read for ReShard<H> {
429 type Cfg = crate::CodecConfig;
430
431 fn read_cfg(
432 buf: &mut impl bytes::Buf,
433 cfg: &Self::Cfg,
434 ) -> Result<Self, commonware_codec::Error> {
435 let max_data_bits = cfg.maximum_shard_size.saturating_mul(8);
436 let max_data_els = F::bits_to_elements(max_data_bits).max(1);
437 Ok(Self {
438 inclusion_proof: Read::read_cfg(buf, &max_data_els)?,
440 shard: Read::read_cfg(buf, &max_data_els)?,
441 })
442 }
443}
444
445#[cfg(feature = "arbitrary")]
446impl<H: Hasher> arbitrary::Arbitrary<'_> for ReShard<H>
447where
448 H::Digest: for<'a> arbitrary::Arbitrary<'a>,
449{
450 fn arbitrary(u: &mut arbitrary::Unstructured<'_>) -> arbitrary::Result<Self> {
451 Ok(Self {
452 inclusion_proof: u.arbitrary()?,
453 shard: u.arbitrary()?,
454 })
455 }
456}
457
458pub struct CheckedShard {
460 index: usize,
461 shard: Matrix,
462}
463
464fn shuffle_indices(transcript: &Transcript, total: usize) -> Vec<u32> {
472 let total: u32 = total
473 .try_into()
474 .expect("encoded_rows exceeds u32::MAX; data too large for ZODA");
475 let mut out = (0..total).collect::<Vec<_>>();
476 out.shuffle(&mut transcript.noise(b"shuffle"));
477 out
478}
479
480fn checking_matrix(transcript: &Transcript, topology: &Topology) -> Matrix {
484 Matrix::rand(
485 &mut transcript.noise(b"checking matrix"),
486 topology.data_cols,
487 topology.column_samples,
488 )
489}
490
491#[derive(Clone)]
493pub struct CheckingData<H: Hasher> {
494 topology: Topology,
495 root: H::Digest,
496 checking_matrix: Matrix,
497 encoded_checksum: Matrix,
498 shuffled_indices: Vec<u32>,
499}
500
501impl<H: Hasher> CheckingData<H> {
502 fn reckon(
511 config: &Config,
512 commitment: &Summary,
513 data_bytes: usize,
514 root: H::Digest,
515 checksum: &Matrix,
516 ) -> Result<Self, Error> {
517 let topology = Topology::reckon(config, data_bytes);
518 let mut transcript = Transcript::new(NAMESPACE);
519 transcript.commit((topology.data_bytes as u64).encode());
520 transcript.commit(root.encode());
521 let expected_commitment = transcript.summarize();
522 if *commitment != expected_commitment {
523 return Err(Error::InvalidShard);
524 }
525 let transcript = Transcript::resume(expected_commitment);
526 let checking_matrix = checking_matrix(&transcript, &topology);
527 if checksum.rows() != topology.data_rows || checksum.cols() != topology.column_samples {
528 return Err(Error::InvalidShard);
529 }
530 let encoded_checksum = checksum
531 .as_polynomials(topology.encoded_rows)
532 .expect("checksum has too many rows")
533 .evaluate()
534 .data();
535 let shuffled_indices = shuffle_indices(&transcript, topology.encoded_rows);
536
537 Ok(Self {
538 topology,
539 root,
540 checking_matrix,
541 encoded_checksum,
542 shuffled_indices,
543 })
544 }
545
546 fn check(&self, index: u16, reshard: &ReShard<H>) -> Result<CheckedShard, Error> {
547 self.topology.check_index(index)?;
548 if reshard.shard.rows() != self.topology.samples
549 || reshard.shard.cols() != self.topology.data_cols
550 {
551 return Err(Error::InvalidReShard);
552 }
553 let index = index as usize;
554 let these_shuffled_indices = &self.shuffled_indices
555 [index * self.topology.samples..(index + 1) * self.topology.samples];
556
557 let proof_elements: Vec<(H::Digest, u32)> = these_shuffled_indices
560 .iter()
561 .zip(reshard.shard.iter())
562 .map(|(&i, row)| (row_digest::<H>(row), i))
563 .collect();
564
565 let mut hasher = H::new();
567 if reshard
568 .inclusion_proof
569 .verify_multi_inclusion(&mut hasher, &proof_elements, &self.root)
570 .is_err()
571 {
572 return Err(Error::InvalidReShard);
573 }
574
575 let shard_checksum = reshard.shard.mul(&self.checking_matrix);
576 for (row, &i) in shard_checksum.iter().zip(these_shuffled_indices) {
578 if row != &self.encoded_checksum[i as usize] {
579 return Err(Error::InvalidReShard);
580 }
581 }
582 Ok(CheckedShard {
583 index,
584 shard: reshard.shard.clone(),
585 })
586 }
587}
588
589#[derive(Debug, Error)]
590pub enum Error {
591 #[error("invalid shard")]
592 InvalidShard,
593 #[error("invalid reshard")]
594 InvalidReShard,
595 #[error("invalid index {0}")]
596 InvalidIndex(u16),
597 #[error("insufficient shards {0} < {1}")]
598 InsufficientShards(usize, usize),
599 #[error("insufficient unique rows {0} < {1}")]
600 InsufficientUniqueRows(usize, usize),
601 #[error("failed to create inclusion proof: {0}")]
602 FailedToCreateInclusionProof(BmtError),
603}
604
605const NAMESPACE: &[u8] = b"commonware-zoda";
607
608#[derive(Clone, Copy)]
609pub struct Zoda<H> {
610 _marker: PhantomData<H>,
611}
612
613impl<H> std::fmt::Debug for Zoda<H> {
614 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
615 write!(f, "Zoda")
616 }
617}
618
619impl<H: Hasher> Scheme for Zoda<H> {
620 type Commitment = Summary;
621
622 type Shard = Shard<H>;
623
624 type ReShard = ReShard<H>;
625
626 type CheckingData = CheckingData<H>;
627
628 type CheckedShard = CheckedShard;
629
630 type Error = Error;
631
632 fn encode(
633 config: &Config,
634 data: impl bytes::Buf,
635 strategy: &impl Strategy,
636 ) -> Result<(Self::Commitment, Vec<Self::Shard>), Self::Error> {
637 let data_bytes = data.remaining();
639 let topology = Topology::reckon(config, data_bytes);
640 let data = Matrix::init(
641 topology.data_rows,
642 topology.data_cols,
643 F::stream_from_u64s(iter_u64_le(data)),
644 );
645
646 let encoded_data = data
648 .as_polynomials(topology.encoded_rows)
649 .expect("data has too many rows")
650 .evaluate()
651 .data();
652
653 let row_hashes: Vec<H::Digest> = strategy.map_collect_vec(0..encoded_data.rows(), |i| {
655 row_digest::<H>(&encoded_data[i])
656 });
657 let mut bmt_builder = BmtBuilder::<H>::new(row_hashes.len());
658 for hash in &row_hashes {
659 bmt_builder.add(hash);
660 }
661 let bmt = bmt_builder.build();
662 let root = bmt.root();
663
664 let mut transcript = Transcript::new(NAMESPACE);
666 transcript.commit((topology.data_bytes as u64).encode());
667 transcript.commit(root.encode());
668 let commitment = transcript.summarize();
669
670 let transcript = Transcript::resume(commitment);
672 let checking_matrix = checking_matrix(&transcript, &topology);
673 let shuffled_indices = shuffle_indices(&transcript, encoded_data.rows());
674
675 let checksum = Arc::new(data.mul(&checking_matrix));
677
678 let shard_results: Vec<Result<Shard<H>, Error>> =
680 strategy.map_collect_vec(0..topology.total_shards, |shard_idx| {
681 let indices = &shuffled_indices
682 [shard_idx * topology.samples..(shard_idx + 1) * topology.samples];
683 let rows = Matrix::init(
684 indices.len(),
685 topology.data_cols,
686 indices
687 .iter()
688 .flat_map(|&i| encoded_data[i as usize].iter().copied()),
689 );
690 let inclusion_proof = bmt
691 .multi_proof(indices)
692 .map_err(Error::FailedToCreateInclusionProof)?;
693 Ok(Shard {
694 data_bytes,
695 root,
696 inclusion_proof,
697 rows,
698 checksum: checksum.clone(),
699 })
700 });
701 let shards = shard_results
702 .into_iter()
703 .collect::<Result<Vec<_>, Error>>()?;
704 Ok((commitment, shards))
705 }
706
707 fn reshard(
708 config: &Config,
709 commitment: &Self::Commitment,
710 index: u16,
711 shard: Self::Shard,
712 ) -> Result<(Self::CheckingData, Self::CheckedShard, Self::ReShard), Self::Error> {
713 let reshard = ReShard {
714 inclusion_proof: shard.inclusion_proof,
715 shard: shard.rows,
716 };
717 let checking_data = CheckingData::reckon(
718 config,
719 commitment,
720 shard.data_bytes,
721 shard.root,
722 shard.checksum.as_ref(),
723 )?;
724 let checked_shard = checking_data.check(index, &reshard)?;
725 Ok((checking_data, checked_shard, reshard))
726 }
727
728 fn check(
729 _config: &Config,
730 _commitment: &Self::Commitment,
731 checking_data: &Self::CheckingData,
732 index: u16,
733 reshard: Self::ReShard,
734 ) -> Result<Self::CheckedShard, Self::Error> {
735 checking_data.check(index, &reshard)
736 }
737
738 fn decode(
739 _config: &Config,
740 _commitment: &Self::Commitment,
741 checking_data: Self::CheckingData,
742 shards: &[Self::CheckedShard],
743 _strategy: &impl Strategy,
744 ) -> Result<Vec<u8>, Self::Error> {
745 let Topology {
746 encoded_rows,
747 data_cols,
748 samples,
749 data_rows,
750 data_bytes,
751 min_shards,
752 ..
753 } = checking_data.topology;
754 if shards.len() < min_shards {
755 return Err(Error::InsufficientShards(shards.len(), min_shards));
756 }
757 let mut evaluation = EvaluationVector::empty(encoded_rows.ilog2() as usize, data_cols);
758 for shard in shards {
759 let indices =
760 &checking_data.shuffled_indices[shard.index * samples..(shard.index + 1) * samples];
761 for (&i, row) in indices.iter().zip(shard.shard.iter()) {
762 evaluation.fill_row(u64::from(i) as usize, row);
763 }
764 }
765 let filled_rows = evaluation.filled_rows();
768 if filled_rows < data_rows {
769 return Err(Error::InsufficientUniqueRows(filled_rows, data_rows));
770 }
771 Ok(collect_u64_le(
772 data_bytes,
773 F::stream_to_u64s(
774 evaluation
775 .recover()
776 .coefficients_up_to(data_rows)
777 .flatten()
778 .copied(),
779 ),
780 ))
781 }
782}
783
784impl<H: Hasher> ValidatingScheme for Zoda<H> {}
785
786#[cfg(test)]
787mod tests {
788 use super::*;
789 use crate::{CodecConfig, Config};
790 use commonware_cryptography::Sha256;
791 use commonware_parallel::Sequential;
792
793 const STRATEGY: Sequential = Sequential;
794
795 #[test]
796 fn topology_reckon_handles_small_extra_shards() {
797 let config = Config {
798 minimum_shards: 3,
799 extra_shards: 1,
800 };
801 let topology = Topology::reckon(&config, 16);
802 assert_eq!(topology.min_shards, 3);
803 assert_eq!(topology.total_shards, 4);
804
805 assert_eq!(topology.data_cols, 1);
809 let required = topology.required_samples();
810 let provided = topology.samples * (topology.column_samples / 2);
811 assert!(
812 provided >= required,
813 "security invariant violated: provided {provided} < required {required}"
814 );
815 }
816
817 #[test]
818 fn reshard_roundtrip_handles_field_packing() {
819 use bytes::BytesMut;
820 use commonware_cryptography::Sha256;
821
822 let config = Config {
823 minimum_shards: 3,
824 extra_shards: 2,
825 };
826 let data = vec![0xAA; 64];
827
828 let (commitment, shards) =
829 Zoda::<Sha256>::encode(&config, data.as_slice(), &STRATEGY).unwrap();
830 let shard = shards.into_iter().next().unwrap();
831
832 let (_, _, reshard) = Zoda::<Sha256>::reshard(&config, &commitment, 0, shard).unwrap();
833
834 let mut buf = BytesMut::new();
835 reshard.write(&mut buf);
836 let mut bytes = buf.freeze();
837 let decoded = ReShard::<Sha256>::read_cfg(
838 &mut bytes,
839 &CodecConfig {
840 maximum_shard_size: data.len(),
841 },
842 )
843 .unwrap();
844
845 assert_eq!(decoded, reshard);
846 }
847
848 #[test]
849 fn decode_rejects_duplicate_indices() {
850 let config = Config {
851 minimum_shards: 2,
852 extra_shards: 0,
853 };
854 let data = b"duplicate shard coverage";
855 let (commitment, shards) = Zoda::<Sha256>::encode(&config, &data[..], &STRATEGY).unwrap();
856 let shard0 = shards[0].clone();
857 let (checking_data, checked_shard0, _reshard0) =
858 Zoda::<Sha256>::reshard(&config, &commitment, 0, shard0).unwrap();
859 let duplicate = CheckedShard {
860 index: checked_shard0.index,
861 shard: checked_shard0.shard.clone(),
862 };
863 let shards = vec![checked_shard0, duplicate];
864 let result =
865 Zoda::<Sha256>::decode(&config, &commitment, checking_data, &shards, &STRATEGY);
866 match result {
867 Err(Error::InsufficientUniqueRows(actual, expected)) => {
868 assert!(actual < expected);
869 }
870 other => panic!("expected insufficient unique rows error, got {other:?}"),
871 }
872 }
873
874 #[cfg(feature = "arbitrary")]
875 mod conformance {
876 use super::*;
877 use commonware_codec::conformance::CodecConformance;
878
879 commonware_conformance::conformance_tests! {
880 CodecConformance<Shard<Sha256>>,
881 CodecConformance<ReShard<Sha256>>,
882 }
883 }
884}