ark_transcript/
lib.rs

1// -*- mode: rust; -*-
2//
3// Copyright (c) 2019 Web 3 Foundation
4//
5// Authors:
6// - Jeffrey Burdges <jeff@web3.foundation>
7
8#![cfg_attr(not(feature = "std"), no_std)]
9#![deny(unsafe_code)]
10#![doc = include_str!("../README.md")]
11
12
13use ark_std::{
14    UniformRand,
15    borrow::{Borrow,BorrowMut},
16    io::{self, Read, Write}, // Result
17    vec::Vec,
18};
19use ark_serialize::{CanonicalSerialize};
20use ark_ff::{Field,PrimeField};
21
22use rand_core::{RngCore,CryptoRng};
23
24pub use sha3::{Shake128};
25pub use digest;
26use digest::{Update,XofReader,ExtendableOutput};
27
28#[cfg(test)]
29mod tests;
30
31#[cfg(any(test, debug_assertions))]
32pub mod debug;
33
34/// Trascript labels.
35/// 
36/// We prefer if labels are `&'static [u8]` but of course
37/// users might require labels provided by another langauge.
38pub trait AsLabel {
39    fn as_label(&self) -> &[u8];
40}
41impl AsLabel for &'static [u8] {
42    fn as_label(&self) -> &[u8] { &self[..] }
43}
44impl<const N: usize> AsLabel for &'static [u8; N] {
45    fn as_label(&self) -> &[u8] { &self[..] }
46}
47
48/// Identify a byte slice as a label, which requires this not be
49/// user controlled data.
50/// 
51/// We use `Borrow<[u8]>` so that `IsLabel<[u8; N]>`, `IsLabel<&[u8]>`,
52/// `IsLabel<[u8]>`, etc. all work correctly.  `AsRef` would permit the
53/// `IsLabel<str>`, which maybe non-cannonical and cause breakage.
54#[derive(Clone,Debug)]
55pub struct IsLabel<T>(pub T);
56impl<T: Borrow<[u8]>> AsLabel for IsLabel<T> {
57    fn as_label(&self) -> &[u8] { self.0.borrow() }
58}
59
60
61/// All types interpretable as `Transcript`s, including primarily
62/// `impl BorrowMut<Traanscript>` types like `Transcript` and
63/// `&mut Transcript`.
64/// 
65/// We permit `&[u8]` and `AsLabel<T>` here too, but caution that
66/// `&[u8]` needs internal applicaiton domain seperation. 
67pub trait IntoTranscript {
68    type Taken: BorrowMut<Transcript>;
69    fn into_transcript(self) -> Self::Taken;
70}
71impl<B: BorrowMut<Transcript>> IntoTranscript for B {
72    type Taken = B;
73    fn into_transcript(self) -> B { self }
74}
75impl<T: Borrow<[u8]>> IntoTranscript for IsLabel<T> {
76    type Taken = Transcript;
77    fn into_transcript(self) -> Transcript {
78        Transcript::new_labeled(self)
79    }
80}
81impl<'a> IntoTranscript for &'a [u8] {
82    type Taken = Transcript;
83    fn into_transcript(self) -> Transcript {
84        Transcript::from_accumulation(self)
85    }
86}
87impl<'a, const N: usize> IntoTranscript for &'a [u8; N] {
88    type Taken = Transcript;
89    fn into_transcript(self) -> Transcript {
90        Transcript::from_accumulation(self)
91    }
92}
93
94/// Inner hasher or accumulator object.
95/// 
96/// We make this distinction at runtime instead of at compile-time
97/// for simplicity elsewhere.
98#[derive(Clone)]
99enum Mode {
100    /// Actual Shake128 hasher being written to.
101    Hash(Shake128),
102    /// Accumulate bytes instead of hashing them.
103    Accumulate(Vec<u8>),
104}
105
106impl Mode {
107    /// Abstracts over the writing modes
108    fn raw_write(&mut self, bytes: &[u8]) {
109        match self {
110            Mode::Hash(hasher) => hasher.update(bytes),
111            Mode::Accumulate(acc) => acc.extend_from_slice(bytes),
112        }
113    }
114
115    /// Switch from writing to reading
116    /// 
117    /// Panics if called in accumulation mode
118    fn raw_reader(self) -> Reader {
119        #[cfg(feature = "debug-transcript")]
120        println!("Shake128 {}transcript XoF reader",self.debug_name);
121        match self {
122            Mode::Hash(hasher) => Reader(hasher.clone().finalize_xof()),
123            Mode::Accumulate(acc) => {
124                let mut t = Transcript::from_accumulation(acc);
125                t.seperate();
126                t.mode.raw_reader()
127            }
128        }
129    }
130}
131
132/// Shake128 transcript style hasher.
133#[derive(Clone)]
134pub struct Transcript {
135    /// Length writen between `seperate()` calls.  Always less than 2^31.
136    /// `None` means `write` was not yet invoked, so seperate() does nothing.
137    /// We need this to distinguish zero length write calls.
138    length: Option<u32>,
139    /// Actual Shake128 hasher being written to, or maybe an accumulator
140    mode: Mode,
141    /// Is this a witness transcript?
142    #[cfg(feature = "debug-transcript")]
143    debug_name: &'static str,
144}
145
146impl Default for Transcript {
147    /// Create a fresh empty `Transcript`.
148    fn default() -> Transcript {
149        Transcript::new_blank()
150    }
151}
152
153impl Update for Transcript {
154    fn update(&mut self, bytes: &[u8]) {
155        self.write_bytes(bytes);
156    }
157}
158
159impl Write for Transcript {
160    // Always succeed fully
161    fn write(&mut self, bytes: &[u8]) -> io::Result<usize> {
162        self.write_bytes(bytes);
163        Ok(bytes.len())
164    }
165
166    // Always succeed immediately
167    fn flush(&mut self) -> io::Result<()> {
168        Ok(())
169    }    
170}
171
172
173impl Transcript {
174    /// Create a `Transcript` from `Shake128`.
175    pub fn from_shake128(hasher: Shake128) -> Transcript {
176        Transcript {
177            length: None,
178            mode: Mode::Hash(hasher),
179            #[cfg(feature = "debug-transcript")]
180            debug_name: "",
181        } 
182    }
183
184    /// Create a `Transcript` from previously accumulated bytes.
185    /// 
186    /// We do not domain seperate these initial bytes, but we domain
187    /// seperate everything after this, making this safe.
188    pub fn from_accumulation(acc: impl AsRef<[u8]>) -> Transcript {
189        let mut hasher = Shake128::default();
190        hasher.update(acc.as_ref());
191        Transcript::from_shake128(hasher)
192    }
193
194    /// Create an empty `Transcript`.
195    pub fn new_blank() -> Transcript {
196        #[cfg(feature = "debug-transcript")]
197        println!("Initial Shake128 transcript..");
198        Transcript::from_accumulation(&[])
199    }
200
201    /// Create a fresh `Transcript` with an initial domain label.
202    /// 
203    /// We implicitly have an initial zero length user data write
204    /// preceeding this first label.
205    pub fn new_labeled(label: impl AsLabel) -> Transcript {
206        let mut t = Transcript::new_blank();
207        t.label(label);
208        t
209    }
210    
211    /// Create an empty `Transcript` in bytes accumulation mode.
212    /// 
213    /// You cannot create `Reader`s in accumulation mode, but 
214    /// `accumulator_finalize` exports the accumulated `Vec<u8>`.
215    /// You could then transport this elsewhere and start a
216    /// real hasher using `from_accumulation`.
217    pub fn new_blank_accumulator() -> Transcript {
218        #[cfg(feature = "debug-transcript")]
219        println!("Initial Shake128 transcript..");
220        Transcript {
221            length: None,
222            mode: Mode::Accumulate(Vec::new()),
223            #[cfg(feature = "debug-transcript")]
224            debug_name: "",
225        }
226    }
227
228    /// Avoid repeated allocations by reserving additional space when in accumulation mode.
229    pub fn accumulator_reserve(&mut self, additional: usize) {
230        match &mut self.mode {
231            Mode::Accumulate(acc) => acc.reserve(additional),
232            _ => {},
233        }
234    }
235
236    /// Invokes `seperate` and exports the accumulated transcript bytes,
237    /// which you later pass into `Transcript::from_accumulation`.
238    pub fn accumulator_finalize(mut self) -> Vec<u8> {
239        self.seperate();
240        match self.mode {
241            Mode::Hash(_) => panic!("Attempte to accumulator_finalize a hashing Transcript"),
242            Mode::Accumulate(acc) => acc,
243        }
244    }
245
246    /// Write basic unlabeled domain seperator into the hasher.
247    /// 
248    /// Implemented by writing in big endian the number of bytes
249    /// written since the previous `seperate` call, aka I2OSP(len,4)
250    /// from [rfc8017](https://www.rfc-editor.org/rfc/rfc8017.txt).
251    /// 
252    /// We do nothing unless `write_bytes` was called previously, aka
253    /// after the previous `seperate` call.  Invoking `write_bytes(b"")`
254    /// before `seperate` forces seperation, aka aligns multiple code
255    /// paths with convergent hashing, but in which users supply zero
256    /// length inputs.
257    pub fn seperate(&mut self) {
258        #[cfg(feature = "debug-transcript")]
259        println!("Shake128 {}transcript seperator: {}",self.debug_name, self.length);
260        if let Some(l) = self.length {
261            self.mode.raw_write( & l.to_be_bytes() ); 
262        }
263        self.length = None;
264    }
265
266    /// Write bytes into the hasher, increasing doain separator counter.
267    /// 
268    /// We wrap each 2^31 bytes into a seperate domain, instead of
269    /// producing an error.
270    pub fn write_bytes(&mut self, mut bytes: &[u8]) {
271        const HIGH: u32 = 0x80000000;
272        loop {
273            let length = self.length.get_or_insert(0);
274            let l = ark_std::cmp::min( (HIGH - 1 - *length) as usize, bytes.len() );
275            #[cfg(feature = "debug-transcript")]
276            match ark_std::str::from_utf8(bytes) {
277                Ok(s) => {
278                    println!("Shake128 {}transcript write of {} bytes: b\"{}\"", self.debug_name, l, s);
279                }
280                Err(_) => {
281                    println!("Shake128 {}transcript write of {} bytes out of {}", self.debug_name, l, bytes.len());
282                }
283            }
284            self.mode.raw_write( &bytes[0..l] );
285            bytes = &bytes[l..];
286            if bytes.len() == 0 {
287                *length += u32::try_from(l).unwrap();
288                return;
289            }
290            *length |= HIGH;
291            self.seperate();
292        }
293    }
294
295    /*
296    /// I2OSP(len,4) from [rfc8017](https://www.rfc-editor.org/rfc/rfc8017.txt)
297    /// with our own domain seperation 
298    fn append_u32(&mut self, v: u32) {
299        self.seperate();
300        self.write_bytes(&v.to_be_bytes());
301        self.seperate();
302    }
303    */
304
305    /// I2OSP(len,8) from [rfc8017](https://www.rfc-editor.org/rfc/rfc8017.txt)
306    /// with our own domain seperation 
307    pub fn append_u64(&mut self, v: u64) {
308        self.seperate();
309        self.write_bytes(&v.to_be_bytes());
310        self.seperate();
311    }
312
313    /// Write into the hasher items seralizable by Arkworks.
314    /// 
315    /// We `ensure_seperated` from any previously supplied user data,
316    /// so we therfore suggest `label` be called in between `append`
317    /// and `write`s of possibly empty user data.
318    /// See concerns on `ensure_seperated`.
319    /// 
320    /// We use uncompressed serialization here for performance. 
321    pub fn append<O: CanonicalSerialize+?Sized>(&mut self, itm: &O) {
322        self.seperate();
323        itm.serialize_uncompressed(&mut *self)
324        .expect("ArkTranscript should infaillibly flushed"); 
325        self.seperate();
326    }
327    // In concrete terms, `t.append(itm);` yields `t.ensure_seperated(); itm.serialize_uncompressed(&t);`,
328    // while `t.seperate(); t.append(itm);` yields `t.seperate(); itm.serialize_uncompressed(&t);`,
329    // which differ if preceeded by a `t.write(user_data);` with empty `user_data`.
330
331    /// Write into the hasher a slice of items seralizable by Arkworks,
332    /// exactly like invoking `append` repeatedly.
333    pub fn append_slice<O,B>(&mut self, itms: &[B])
334    where O: CanonicalSerialize+?Sized, B: Borrow<O>, 
335    {
336        self.seperate();
337        for itm in itms.iter() {
338            itm.borrow()
339            .serialize_uncompressed(&mut *self)
340            .expect("ArkTranscript should infaillibly flushed");
341            self.seperate();
342        }
343    }
344
345    /// Write domain separation label into the hasher,
346    /// after first ending the previous write phase.
347    pub fn label(&mut self, label: impl AsLabel) {
348        self.seperate();
349        self.write_bytes(label.as_label());
350        self.seperate();
351    }
352
353    /// Create a challenge reader.
354    /// 
355    /// Invoking `self.label(label)` has the same effect upon `self`,
356    /// but the reader returnned cannot be obtained by any combinataion of other methods.
357    pub fn challenge(&mut self, label: impl AsLabel) -> Reader {
358        #[cfg(feature = "debug-transcript")]
359        println!("Shake128 {}transcript challenge",self.debug_name);
360        self.label(label);
361        self.write_bytes(b"challenge");
362        let reader = self.mode.clone().raw_reader();
363        self.seperate();
364        reader
365    }
366
367    /// Forks transcript to prepare a witness reader.
368    /// 
369    /// We `clone` the transcript and `label` this clone, but do not
370    /// touch the original.  After forking, you should write any
371    /// secret seeds into the transcript, and then invoke `witness`
372    /// with system randomness.
373    pub fn fork(&self, label: impl AsLabel) -> Transcript {
374        let mut fork = self.clone();
375        #[cfg(feature = "debug-transcript")]
376        {
377            fork.debug_name = "witness ";
378            println!("Shake128 {}transcript forked", self.debug_name);
379        }
380        // Invoking label forces an extra `seperate` vs `challenge`
381        fork.label(label);
382        fork
383    }
384    // In fact, `clone` alone works fine instead here, assuming you
385    // correctly supply secret seeds and system randomness.
386 
387    /// Set the `debug_name` if you're doing anything complex, using clone, etc.
388    #[cfg(feature = "debug-transcript")]
389    pub fn set_debug_name(&mut self, debug_name: &'static str) {
390        self.debug_name = debug_name;
391    }
392
393    // #[cfg(not(feature = "debug-transcript"))]
394    // pub fn set_debug_name(&mut self, debug_name: &'static str) {
395    // }
396
397    /// Create a witness reader from a forked transcript.
398    /// 
399    /// We expect `rng` to be system randomness when used in production,
400    /// ala `&mut rng_core::OsRng` or maybe `&mut rand::thread_rng()`,
401    /// as otherwise you incur risks from any weaknesses elsewhere.
402    /// 
403    /// You'll need a deterministic randomness for test vectors of ourse, 
404    /// ala `&mut ark_transcript::debug::TestVectorFakeRng`.
405    /// We suggest implementing this choice inside your secret key type,
406    /// along side whatever supplies secret seeds.
407    pub fn witness(mut self, rng: &mut (impl RngCore+CryptoRng)) -> Reader {
408        self.seperate();
409        let mut rand = [0u8; 32];
410        rng.fill_bytes(&mut rand);
411        self.write_bytes(&rand);
412        self.mode.raw_reader()
413    }
414}
415
416
417/// Shake128 transcript style XoF reader, used for both 
418/// Fiat-Shamir challenges and witnesses.
419#[repr(transparent)]
420pub struct Reader(sha3::Shake128Reader);
421
422impl Reader {
423    /// Read bytes from the transcript into the buffer.
424    pub fn read_bytes(&mut self, buf: &mut [u8]) {
425        XofReader::read(&mut self.0, buf);
426    }
427
428    /// Read bytes from the transcript. Always succeed fully.
429    pub fn read_byte_array<const N: usize>(&mut self) -> [u8; N] {
430        let mut buf = [0u8; N];
431        self.read_bytes(&mut buf);
432        buf
433    }
434
435    /// Sample a `T` using `ark_std:::UniformRand`
436    /// 
437    /// Arkworks always does rejection sampling so far, so
438    /// constant-time-ness depends the object being sampled.
439    pub fn read_uniform<T: UniformRand>(&mut self) -> T {
440        <T as UniformRand>::rand(self)
441    }
442
443    /// Sample a prime field element using reduction mod the order from
444    /// a 128 bit larger array of random bytes.
445    ///
446    /// Identical to the [IETF hash-to-curve draft](https://datatracker.ietf.org/doc/draft-irtf-cfrg-hash-to-curve/14/)
447    /// except we only supports prime fields here, making this 
448    /// compatable with constant-time implementation.
449    pub fn read_reduce<F: PrimeField>(&mut self) -> F {
450        xof_read_reduced::<F,Self>(self)
451    }
452}
453
454pub fn xof_read_reduced<F: PrimeField,R: XofReader>(xof: &mut R) -> F {
455    // The final output of `hash_to_field` will be an array of field
456    // elements from F::BaseField, each of size `len_per_elem`.
457    let len_per_base_elem = get_len_per_elem::<F, 128>();
458    if len_per_base_elem > 256 {
459        panic!("PrimeField larger than 1913 bits!");
460    }
461    // Rust *still* lacks alloca, hence this ugly hack.
462    let mut alloca = [0u8; 256];
463    let alloca = &mut alloca[0..len_per_base_elem];
464    xof.read(alloca);
465    alloca.reverse();  // Need BE for IRTF draft but Arkworks' LE is faster
466    F::from_le_bytes_mod_order(&alloca)
467}
468
469/// This function computes the length in bytes that a hash function should output
470/// for hashing an element of type `Field`.
471/// See section 5.1 and 5.3 of the
472/// [IETF hash-to-curve standardization draft](https://datatracker.ietf.org/doc/draft-irtf-cfrg-hash-to-curve/14/)
473const fn get_len_per_elem<F: Field, const SEC_PARAM: usize>() -> usize {
474    // ceil(log(p))
475    let base_field_size_in_bits = F::BasePrimeField::MODULUS_BIT_SIZE as usize;
476    // ceil(log(p)) + security_parameter
477    let base_field_size_with_security_padding_in_bits = base_field_size_in_bits + SEC_PARAM;
478    // ceil( (ceil(log(p)) + security_parameter) / 8)
479    let bytes_per_base_field_elem =
480        ((base_field_size_with_security_padding_in_bits + 7) / 8) as u64;
481    bytes_per_base_field_elem as usize
482}
483
484impl XofReader for Reader {
485    fn read(&mut self, dest: &mut [u8]) {
486        self.read_bytes(dest);
487    }
488}
489
490/// Read bytes from the transcript. Always succeed fully.
491impl Read for Reader {
492    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
493        self.read_bytes(buf);
494        Ok(buf.len())
495    }
496
497    fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
498        self.read_bytes(buf);
499        Ok(())
500    }
501}
502
503/// Read bytes from the transcript. Always succeed fully
504impl RngCore for Reader {
505    fn next_u32(&mut self) -> u32 {
506        let mut b = [0u8; 4];
507        self.read_bytes(&mut b);
508        u32::from_le_bytes(b)
509    }
510    fn next_u64(&mut self) -> u64 {
511        let mut b = [0u8; 8];
512        self.read_bytes(&mut b);
513        u64::from_le_bytes(b)
514    }
515    fn fill_bytes(&mut self, dest: &mut [u8]) {
516        self.read_bytes(dest);
517    }
518    fn try_fill_bytes(&mut self, dest: &mut [u8]) -> Result<(), rand_core::Error> {
519        self.fill_bytes(dest);
520        Ok(())
521    }
522}
523// impl<T: BorrowMut<Transcript>> CryptoRng for TranscriptIO<T> { }
524