1use crate::{Config, Scheme, ValidatingScheme};
118use bytes::BufMut;
119use commonware_codec::{Encode, EncodeSize, FixedSize, RangeCfg, Read, ReadExt, Write};
120use commonware_cryptography::{
121 transcript::{Summary, Transcript},
122 Digest, Hasher,
123};
124use commonware_math::{
125 fields::goldilocks::F,
126 ntt::{EvaluationVector, Matrix},
127};
128use commonware_parallel::Strategy;
129use commonware_storage::bmt::{Builder as BmtBuilder, Error as BmtError, Proof};
130use rand::seq::SliceRandom as _;
131use std::{marker::PhantomData, sync::Arc};
132use thiserror::Error;
133
134fn iter_u64_le(data: impl bytes::Buf) -> impl Iterator<Item = u64> {
136 struct Iter<B> {
137 remaining_u64s: usize,
138 tail: usize,
139 inner: B,
140 }
141
142 impl<B: bytes::Buf> Iter<B> {
143 fn new(inner: B) -> Self {
144 let remaining_u64s = inner.remaining() / 8;
145 let tail = inner.remaining() % 8;
146 Self {
147 remaining_u64s,
148 tail,
149 inner,
150 }
151 }
152 }
153
154 impl<B: bytes::Buf> Iterator for Iter<B> {
155 type Item = u64;
156
157 fn next(&mut self) -> Option<Self::Item> {
158 if self.remaining_u64s > 0 {
159 self.remaining_u64s -= 1;
160 return Some(self.inner.get_u64_le());
161 }
162 if self.tail > 0 {
163 let mut chunk = [0u8; 8];
164 self.inner.copy_to_slice(&mut chunk[..self.tail]);
165 self.tail = 0;
166 return Some(u64::from_le_bytes(chunk));
167 }
168 None
169 }
170 }
171 Iter::new(data)
172}
173
174fn collect_u64_le(max_length: usize, data: impl Iterator<Item = u64>) -> Vec<u8> {
175 let mut out = Vec::with_capacity(max_length);
176 for d in data {
177 out.extend_from_slice(&d.to_le_bytes());
178 }
179 out.truncate(max_length);
180 out
181}
182
183fn row_digest<H: Hasher>(row: &[F]) -> H::Digest {
184 let mut h = H::new();
185 for x in row {
186 h.update(&x.to_le_bytes());
187 }
188 h.finalize()
189}
190
191mod topology {
192 use super::Error;
193 use crate::Config;
194 use commonware_math::fields::goldilocks::F;
195 use commonware_utils::BigRationalExt as _;
196 use num_rational::BigRational;
197
198 const SECURITY_BITS: usize = 126;
199 const LOG2_PRECISION: usize = SECURITY_BITS.next_power_of_two().trailing_zeros() as usize;
203
204 #[derive(Debug, Clone, Copy, PartialEq)]
206 pub struct Topology {
207 pub data_bytes: usize,
209 pub data_cols: usize,
211 pub data_rows: usize,
213 pub encoded_rows: usize,
215 pub samples: usize,
217 pub column_samples: usize,
219 pub min_shards: usize,
221 pub total_shards: usize,
223 }
224
225 impl Topology {
226 const fn with_cols(data_bytes: usize, n: usize, k: usize, cols: usize) -> Self {
227 let data_els = F::bits_to_elements(8 * data_bytes);
228 let data_rows = data_els.div_ceil(cols);
229 let samples = data_rows.div_ceil(n);
230 Self {
231 data_bytes,
232 data_cols: cols,
233 data_rows,
234 encoded_rows: ((n + k) * samples).next_power_of_two(),
235 samples,
236 column_samples: 0,
237 min_shards: n,
238 total_shards: n + k,
239 }
240 }
241
242 pub(crate) fn required_samples(&self) -> usize {
243 let k = BigRational::from_usize(self.encoded_rows - self.data_rows);
244 let m = BigRational::from_usize(self.encoded_rows);
245 let fraction = (&k + BigRational::from_u64(1)) / (BigRational::from_usize(2) * &m);
246
247 let one_minus = BigRational::from_usize(1) - &fraction;
250 let log_term = one_minus.log2_ceil(LOG2_PRECISION);
251 if log_term >= BigRational::from_u64(0) {
252 return usize::MAX;
253 }
254
255 let required = BigRational::from_usize(SECURITY_BITS) / -log_term;
256 required.ceil_to_u128().unwrap_or(u128::MAX) as usize
257 }
258
259 fn correct_column_samples(&mut self) {
260 self.column_samples =
267 F::bits_to_elements(SECURITY_BITS) * self.required_samples().div_ceil(self.samples);
268 }
269
270 pub fn reckon(config: &Config, data_bytes: usize) -> Self {
272 let n = config.minimum_shards as usize;
273 let k = config.extra_shards as usize;
274 let corrected_data_bytes = data_bytes.max(1);
278 let mut out = Self::with_cols(corrected_data_bytes, n, k, 1);
297 loop {
298 let attempt = Self::with_cols(corrected_data_bytes, n, k, out.data_cols + 1);
299 let required_samples = attempt.required_samples();
300 if required_samples.saturating_mul(n + k) <= attempt.encoded_rows {
301 out = Self {
302 samples: required_samples.max(attempt.samples),
303 ..attempt
304 };
305 } else {
306 break;
307 }
308 }
309 out.correct_column_samples();
310 out.data_bytes = data_bytes;
311 out
312 }
313
314 pub fn check_index(&self, i: u16) -> Result<(), Error> {
315 if (0..self.total_shards).contains(&(i as usize)) {
316 return Ok(());
317 }
318 Err(Error::InvalidIndex(i))
319 }
320 }
321}
322use topology::Topology;
323
324#[derive(Clone)]
326pub struct Shard<D: Digest> {
327 data_bytes: usize,
328 root: D,
329 inclusion_proof: Proof<D>,
330 rows: Matrix,
331 checksum: Arc<Matrix>,
332}
333
334impl<D: Digest> PartialEq for Shard<D> {
335 fn eq(&self, other: &Self) -> bool {
336 self.data_bytes == other.data_bytes
337 && self.root == other.root
338 && self.inclusion_proof == other.inclusion_proof
339 && self.rows == other.rows
340 && self.checksum == other.checksum
341 }
342}
343
344impl<D: Digest> Eq for Shard<D> {}
345
346impl<D: Digest> EncodeSize for Shard<D> {
347 fn encode_size(&self) -> usize {
348 self.data_bytes.encode_size()
349 + self.root.encode_size()
350 + self.inclusion_proof.encode_size()
351 + self.rows.encode_size()
352 + self.checksum.encode_size()
353 }
354}
355
356impl<D: Digest> Write for Shard<D> {
357 fn write(&self, buf: &mut impl BufMut) {
358 self.data_bytes.write(buf);
359 self.root.write(buf);
360 self.inclusion_proof.write(buf);
361 self.rows.write(buf);
362 self.checksum.write(buf);
363 }
364}
365
366impl<D: Digest> Read for Shard<D> {
367 type Cfg = crate::CodecConfig;
368
369 fn read_cfg(
370 buf: &mut impl bytes::Buf,
371 cfg: &Self::Cfg,
372 ) -> Result<Self, commonware_codec::Error> {
373 let data_bytes = usize::read_cfg(buf, &RangeCfg::from(..=cfg.maximum_shard_size))?;
374 let max_els = cfg.maximum_shard_size / F::SIZE;
375 Ok(Self {
376 data_bytes,
377 root: ReadExt::read(buf)?,
378 inclusion_proof: Read::read_cfg(buf, &max_els)?,
379 rows: Read::read_cfg(buf, &max_els)?,
380 checksum: Arc::new(Read::read_cfg(buf, &max_els)?),
381 })
382 }
383}
384
385#[cfg(feature = "arbitrary")]
386impl<D: Digest> arbitrary::Arbitrary<'_> for Shard<D>
387where
388 D: for<'a> arbitrary::Arbitrary<'a>,
389{
390 fn arbitrary(u: &mut arbitrary::Unstructured<'_>) -> arbitrary::Result<Self> {
391 Ok(Self {
392 data_bytes: u.arbitrary::<u32>()? as usize,
393 root: u.arbitrary()?,
394 inclusion_proof: u.arbitrary()?,
395 rows: u.arbitrary()?,
396 checksum: Arc::new(u.arbitrary()?),
397 })
398 }
399}
400
401#[derive(Clone, Debug)]
402pub struct ReShard<D: Digest> {
403 inclusion_proof: Proof<D>,
404 shard: Matrix,
405}
406
407impl<D: Digest> PartialEq for ReShard<D> {
408 fn eq(&self, other: &Self) -> bool {
409 self.inclusion_proof == other.inclusion_proof && self.shard == other.shard
410 }
411}
412
413impl<D: Digest> Eq for ReShard<D> {}
414
415impl<D: Digest> EncodeSize for ReShard<D> {
416 fn encode_size(&self) -> usize {
417 self.inclusion_proof.encode_size() + self.shard.encode_size()
418 }
419}
420
421impl<D: Digest> Write for ReShard<D> {
422 fn write(&self, buf: &mut impl BufMut) {
423 self.inclusion_proof.write(buf);
424 self.shard.write(buf);
425 }
426}
427
428impl<D: Digest> Read for ReShard<D> {
429 type Cfg = crate::CodecConfig;
430
431 fn read_cfg(
432 buf: &mut impl bytes::Buf,
433 cfg: &Self::Cfg,
434 ) -> Result<Self, commonware_codec::Error> {
435 let max_data_bits = cfg.maximum_shard_size.saturating_mul(8);
436 let max_data_els = F::bits_to_elements(max_data_bits).max(1);
437 Ok(Self {
438 inclusion_proof: Read::read_cfg(buf, &max_data_els)?,
440 shard: Read::read_cfg(buf, &max_data_els)?,
441 })
442 }
443}
444
445#[cfg(feature = "arbitrary")]
446impl<D: Digest> arbitrary::Arbitrary<'_> for ReShard<D>
447where
448 D: for<'a> arbitrary::Arbitrary<'a>,
449{
450 fn arbitrary(u: &mut arbitrary::Unstructured<'_>) -> arbitrary::Result<Self> {
451 Ok(Self {
452 inclusion_proof: u.arbitrary()?,
453 shard: u.arbitrary()?,
454 })
455 }
456}
457
458pub struct CheckedShard {
460 index: usize,
461 shard: Matrix,
462}
463
464fn shuffle_indices(transcript: &Transcript, total: usize) -> Vec<u32> {
472 let total: u32 = total
473 .try_into()
474 .expect("encoded_rows exceeds u32::MAX; data too large for ZODA");
475 let mut out = (0..total).collect::<Vec<_>>();
476 out.shuffle(&mut transcript.noise(b"shuffle"));
477 out
478}
479
480fn checking_matrix(transcript: &Transcript, topology: &Topology) -> Matrix {
484 Matrix::rand(
485 &mut transcript.noise(b"checking matrix"),
486 topology.data_cols,
487 topology.column_samples,
488 )
489}
490
491#[derive(Clone)]
493pub struct CheckingData<D: Digest> {
494 topology: Topology,
495 root: D,
496 checking_matrix: Matrix,
497 encoded_checksum: Matrix,
498 shuffled_indices: Vec<u32>,
499}
500
501impl<D: Digest> CheckingData<D> {
502 fn reckon(
511 config: &Config,
512 commitment: &Summary,
513 data_bytes: usize,
514 root: D,
515 checksum: &Matrix,
516 ) -> Result<Self, Error> {
517 let topology = Topology::reckon(config, data_bytes);
518 let mut transcript = Transcript::new(NAMESPACE);
519 transcript.commit((topology.data_bytes as u64).encode());
520 transcript.commit(root.encode());
521 let expected_commitment = transcript.summarize();
522 if *commitment != expected_commitment {
523 return Err(Error::InvalidShard);
524 }
525 let transcript = Transcript::resume(expected_commitment);
526 let checking_matrix = checking_matrix(&transcript, &topology);
527 if checksum.rows() != topology.data_rows || checksum.cols() != topology.column_samples {
528 return Err(Error::InvalidShard);
529 }
530 let encoded_checksum = checksum
531 .as_polynomials(topology.encoded_rows)
532 .expect("checksum has too many rows")
533 .evaluate()
534 .data();
535 let shuffled_indices = shuffle_indices(&transcript, topology.encoded_rows);
536
537 Ok(Self {
538 topology,
539 root,
540 checking_matrix,
541 encoded_checksum,
542 shuffled_indices,
543 })
544 }
545
546 fn check<H: Hasher<Digest = D>>(
547 &self,
548 index: u16,
549 reshard: &ReShard<D>,
550 ) -> Result<CheckedShard, Error> {
551 self.topology.check_index(index)?;
552 if reshard.shard.rows() != self.topology.samples
553 || reshard.shard.cols() != self.topology.data_cols
554 {
555 return Err(Error::InvalidReShard);
556 }
557 let index = index as usize;
558 let these_shuffled_indices = &self.shuffled_indices
559 [index * self.topology.samples..(index + 1) * self.topology.samples];
560
561 let proof_elements: Vec<(H::Digest, u32)> = these_shuffled_indices
564 .iter()
565 .zip(reshard.shard.iter())
566 .map(|(&i, row)| (row_digest::<H>(row), i))
567 .collect();
568
569 let mut hasher = H::new();
571 if reshard
572 .inclusion_proof
573 .verify_multi_inclusion(&mut hasher, &proof_elements, &self.root)
574 .is_err()
575 {
576 return Err(Error::InvalidReShard);
577 }
578
579 let shard_checksum = reshard.shard.mul(&self.checking_matrix);
580 for (row, &i) in shard_checksum.iter().zip(these_shuffled_indices) {
582 if row != &self.encoded_checksum[i as usize] {
583 return Err(Error::InvalidReShard);
584 }
585 }
586 Ok(CheckedShard {
587 index,
588 shard: reshard.shard.clone(),
589 })
590 }
591}
592
593#[derive(Debug, Error)]
594pub enum Error {
595 #[error("invalid shard")]
596 InvalidShard,
597 #[error("invalid reshard")]
598 InvalidReShard,
599 #[error("invalid index {0}")]
600 InvalidIndex(u16),
601 #[error("insufficient shards {0} < {1}")]
602 InsufficientShards(usize, usize),
603 #[error("insufficient unique rows {0} < {1}")]
604 InsufficientUniqueRows(usize, usize),
605 #[error("failed to create inclusion proof: {0}")]
606 FailedToCreateInclusionProof(BmtError),
607}
608
609const NAMESPACE: &[u8] = b"commonware-zoda";
611
612#[derive(Clone, Copy)]
613pub struct Zoda<H> {
614 _marker: PhantomData<H>,
615}
616
617impl<H> std::fmt::Debug for Zoda<H> {
618 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
619 write!(f, "Zoda")
620 }
621}
622
623impl<H: Hasher> Scheme for Zoda<H> {
624 type Commitment = Summary;
625
626 type Shard = Shard<H::Digest>;
627
628 type ReShard = ReShard<H::Digest>;
629
630 type CheckingData = CheckingData<H::Digest>;
631
632 type CheckedShard = CheckedShard;
633
634 type Error = Error;
635
636 fn encode(
637 config: &Config,
638 data: impl bytes::Buf,
639 strategy: &impl Strategy,
640 ) -> Result<(Self::Commitment, Vec<Self::Shard>), Self::Error> {
641 let data_bytes = data.remaining();
643 let topology = Topology::reckon(config, data_bytes);
644 let data = Matrix::init(
645 topology.data_rows,
646 topology.data_cols,
647 F::stream_from_u64s(iter_u64_le(data)),
648 );
649
650 let encoded_data = data
652 .as_polynomials(topology.encoded_rows)
653 .expect("data has too many rows")
654 .evaluate()
655 .data();
656
657 let row_hashes: Vec<H::Digest> = strategy.map_collect_vec(0..encoded_data.rows(), |i| {
659 row_digest::<H>(&encoded_data[i])
660 });
661 let mut bmt_builder = BmtBuilder::<H>::new(row_hashes.len());
662 for hash in &row_hashes {
663 bmt_builder.add(hash);
664 }
665 let bmt = bmt_builder.build();
666 let root = bmt.root();
667
668 let mut transcript = Transcript::new(NAMESPACE);
670 transcript.commit((topology.data_bytes as u64).encode());
671 transcript.commit(root.encode());
672 let commitment = transcript.summarize();
673
674 let transcript = Transcript::resume(commitment);
676 let checking_matrix = checking_matrix(&transcript, &topology);
677 let shuffled_indices = shuffle_indices(&transcript, encoded_data.rows());
678
679 let checksum = Arc::new(data.mul(&checking_matrix));
681
682 let shard_results: Vec<Result<Shard<H::Digest>, Error>> =
684 strategy.map_collect_vec(0..topology.total_shards, |shard_idx| {
685 let indices = &shuffled_indices
686 [shard_idx * topology.samples..(shard_idx + 1) * topology.samples];
687 let rows = Matrix::init(
688 indices.len(),
689 topology.data_cols,
690 indices
691 .iter()
692 .flat_map(|&i| encoded_data[i as usize].iter().copied()),
693 );
694 let inclusion_proof = bmt
695 .multi_proof(indices)
696 .map_err(Error::FailedToCreateInclusionProof)?;
697 Ok(Shard {
698 data_bytes,
699 root,
700 inclusion_proof,
701 rows,
702 checksum: checksum.clone(),
703 })
704 });
705 let shards = shard_results
706 .into_iter()
707 .collect::<Result<Vec<_>, Error>>()?;
708 Ok((commitment, shards))
709 }
710
711 fn reshard(
712 config: &Config,
713 commitment: &Self::Commitment,
714 index: u16,
715 shard: Self::Shard,
716 ) -> Result<(Self::CheckingData, Self::CheckedShard, Self::ReShard), Self::Error> {
717 let reshard = ReShard {
718 inclusion_proof: shard.inclusion_proof,
719 shard: shard.rows,
720 };
721 let checking_data = CheckingData::reckon(
722 config,
723 commitment,
724 shard.data_bytes,
725 shard.root,
726 shard.checksum.as_ref(),
727 )?;
728 let checked_shard = checking_data.check::<H>(index, &reshard)?;
729 Ok((checking_data, checked_shard, reshard))
730 }
731
732 fn check(
733 _config: &Config,
734 _commitment: &Self::Commitment,
735 checking_data: &Self::CheckingData,
736 index: u16,
737 reshard: Self::ReShard,
738 ) -> Result<Self::CheckedShard, Self::Error> {
739 checking_data.check::<H>(index, &reshard)
740 }
741
742 fn decode(
743 _config: &Config,
744 _commitment: &Self::Commitment,
745 checking_data: Self::CheckingData,
746 shards: &[Self::CheckedShard],
747 _strategy: &impl Strategy,
748 ) -> Result<Vec<u8>, Self::Error> {
749 let Topology {
750 encoded_rows,
751 data_cols,
752 samples,
753 data_rows,
754 data_bytes,
755 min_shards,
756 ..
757 } = checking_data.topology;
758 if shards.len() < min_shards {
759 return Err(Error::InsufficientShards(shards.len(), min_shards));
760 }
761 let mut evaluation = EvaluationVector::empty(encoded_rows.ilog2() as usize, data_cols);
762 for shard in shards {
763 let indices =
764 &checking_data.shuffled_indices[shard.index * samples..(shard.index + 1) * samples];
765 for (&i, row) in indices.iter().zip(shard.shard.iter()) {
766 evaluation.fill_row(u64::from(i) as usize, row);
767 }
768 }
769 let filled_rows = evaluation.filled_rows();
772 if filled_rows < data_rows {
773 return Err(Error::InsufficientUniqueRows(filled_rows, data_rows));
774 }
775 Ok(collect_u64_le(
776 data_bytes,
777 F::stream_to_u64s(
778 evaluation
779 .recover()
780 .coefficients_up_to(data_rows)
781 .flatten()
782 .copied(),
783 ),
784 ))
785 }
786}
787
788impl<H: Hasher> ValidatingScheme for Zoda<H> {}
789
790#[cfg(test)]
791mod tests {
792 use super::*;
793 use crate::{CodecConfig, Config};
794 use bytes::BytesMut;
795 use commonware_cryptography::{sha256::Digest as Sha256Digest, Sha256};
796 use commonware_parallel::Sequential;
797
798 const STRATEGY: Sequential = Sequential;
799
800 #[test]
801 fn topology_reckon_handles_small_extra_shards() {
802 let config = Config {
803 minimum_shards: 3,
804 extra_shards: 1,
805 };
806 let topology = Topology::reckon(&config, 16);
807 assert_eq!(topology.min_shards, 3);
808 assert_eq!(topology.total_shards, 4);
809
810 assert_eq!(topology.data_cols, 1);
814 let required = topology.required_samples();
815 let provided = topology.samples * (topology.column_samples / 2);
816 assert!(
817 provided >= required,
818 "security invariant violated: provided {provided} < required {required}"
819 );
820 }
821
822 #[test]
823 fn reshard_roundtrip_handles_field_packing() {
824 let config = Config {
825 minimum_shards: 3,
826 extra_shards: 2,
827 };
828 let data = vec![0xAA; 64];
829
830 let (commitment, shards) =
831 Zoda::<Sha256>::encode(&config, data.as_slice(), &STRATEGY).unwrap();
832 let shard = shards.into_iter().next().unwrap();
833
834 let (_, _, reshard) = Zoda::<Sha256>::reshard(&config, &commitment, 0, shard).unwrap();
835
836 let mut buf = BytesMut::new();
837 reshard.write(&mut buf);
838 let mut bytes = buf.freeze();
839 let decoded = ReShard::<Sha256Digest>::read_cfg(
840 &mut bytes,
841 &CodecConfig {
842 maximum_shard_size: data.len(),
843 },
844 )
845 .unwrap();
846
847 assert_eq!(decoded, reshard);
848 }
849
850 #[test]
851 fn decode_rejects_duplicate_indices() {
852 let config = Config {
853 minimum_shards: 2,
854 extra_shards: 0,
855 };
856 let data = b"duplicate shard coverage";
857 let (commitment, shards) = Zoda::<Sha256>::encode(&config, &data[..], &STRATEGY).unwrap();
858 let shard0 = shards[0].clone();
859 let (checking_data, checked_shard0, _reshard0) =
860 Zoda::<Sha256>::reshard(&config, &commitment, 0, shard0).unwrap();
861 let duplicate = CheckedShard {
862 index: checked_shard0.index,
863 shard: checked_shard0.shard.clone(),
864 };
865 let shards = vec![checked_shard0, duplicate];
866 let result =
867 Zoda::<Sha256>::decode(&config, &commitment, checking_data, &shards, &STRATEGY);
868 match result {
869 Err(Error::InsufficientUniqueRows(actual, expected)) => {
870 assert!(actual < expected);
871 }
872 other => panic!("expected insufficient unique rows error, got {other:?}"),
873 }
874 }
875
876 #[cfg(feature = "arbitrary")]
877 mod conformance {
878 use super::*;
879 use commonware_codec::conformance::CodecConformance;
880
881 commonware_conformance::conformance_tests! {
882 CodecConformance<Shard<Sha256Digest>>,
883 CodecConformance<ReShard<Sha256Digest>>,
884 }
885 }
886}