Skip to main content

midnight_base_crypto/
data_provider.rs

1// This file is part of midnight-ledger.
2// Copyright (C) 2025 Midnight Foundation
3// SPDX-License-Identifier: Apache-2.0
4// Licensed under the Apache License, Version 2.0 (the "License");
5// You may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7// http://www.apache.org/licenses/LICENSE-2.0
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14//! Provides mechanisms to fetch Midnight proof-related parameters and keys.
15
16use futures::StreamExt;
17#[cfg(feature = "cli")]
18use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
19use lazy_static::lazy_static;
20use reqwest::Url;
21use sha2::Digest;
22use sha2::Sha256;
23use std::env;
24use std::fs::File;
25use std::io;
26use std::io::BufReader;
27use std::io::Read;
28use std::io::Seek;
29use std::io::Write;
30use std::path::PathBuf;
31use std::time::Duration;
32use std::time::Instant;
33use tracing::{info, warn};
34
35/// Retrieves various static cryptographic artifacts from a data server.
36/// This keeps a local file system cache of the parameters, prover keys, verifier keys, and IR, that
37/// can also be fetched remotely.
38///
39/// The local cache is located at: `$MIDNIGHT_PP` / `$XDG_CACHE_HOME/midnight/zk-params` /
40/// `$HOME/.cache/midnight/zk-params` in that order of fall-backs.
41///
42/// The provider knows which data is in scope, and the SHA-256 hashes of each of these. When
43/// reading one of the values from cache, or fetching them, the SHA-256 hash is verified for
44/// integrity.
45///
46/// The data provider can operate in an on-demand, or synchronous mode. In the former, if a datum
47/// is *not* locally available, it is fetched when requested. In the latter, the datum is only
48/// fetched when explicitly requested via a [`MidnightDataProvider::fetch`] call. Key material is
49/// *always* synchronous.
50#[derive(Clone)]
51pub struct MidnightDataProvider {
52    /// How to handle requests to fetch data
53    pub fetch_mode: FetchMode,
54    /// The base URL of the data store to use.
55    ///
56    /// Fetching an item with `name` will request it from `{base_url}/{name}`.
57    pub base_url: Url,
58    /// How to report status of fetching to the user
59    pub output_mode: OutputMode,
60    /// Additional (non-parameter) files allows to be fetched
61    /// Triple of `file_path`, SHA-256 `hash`, and `description`
62    pub expected_data: Vec<(&'static str, [u8; 32], &'static str)>,
63    /// The path to the directory where Midnight key material is stored
64    pub dir: PathBuf,
65}
66
67lazy_static! {
68    /// The default base URL to use for the Midnight data provider.
69    pub static ref BASE_URL: Url = Url::parse(&std::env::var("MIDNIGHT_PARAM_SOURCE").unwrap_or("https://midnight-s3-fileshare-dev-eu-west-1.s3.eu-west-1.amazonaws.com/".to_owned())).expect("$MIDNIGHT_PARAM_SOURCE should be a valid URL");
70}
71
72/// Parse a 256-bit hex hash at const time.
73pub const fn hexhash(hex: &[u8]) -> [u8; 32] {
74    match const_hex::const_decode_to_array(hex) {
75        Ok(hash) => hash,
76        Err(_) => panic!("hash should be correct format"),
77    }
78}
79
80const EXPECTED_DATA: &[(&str, [u8; 32], &str)] = &[
81    (
82        "bls_midnight_2p0",
83        hexhash(b"59b30b3114a34ccbbfb599376e178fb8d9b3366cae2174c2f1da20e75847f823"),
84        "public parameters for k=0",
85    ),
86    (
87        "bls_midnight_2p1",
88        hexhash(b"bbe04fe3c70d0c138447cb086b4baddc30cb8bb2a004114bc02e6f739516280e"),
89        "public parameters for k=1",
90    ),
91    (
92        "bls_midnight_2p2",
93        hexhash(b"80e15568fa1a0117db893239be7fa5e34a6bcc3a8c3bfa7709534b9cb88eb6c1"),
94        "public parameters for k=2",
95    ),
96    (
97        "bls_midnight_2p3",
98        hexhash(b"4be827a6472193df80d8f08b4b25a85baef436fdd1965d89b6af89f4ec4e99e2"),
99        "public parameters for k=3",
100    ),
101    (
102        "bls_midnight_2p4",
103        hexhash(b"232f401fad10c7ddf8828d2aa4c85c6506c5da09795998cecaeb9f75fc8f6ada"),
104        "public parameters for k=4",
105    ),
106    (
107        "bls_midnight_2p5",
108        hexhash(b"0a1c9229f315fc1868ff25f668fb83aec4d09f4f23a706b5197c692c619d72c6"),
109        "public parameters for k=5",
110    ),
111    (
112        "bls_midnight_2p6",
113        hexhash(b"cf2ad6be7d0fedf5bec2aaa35f6be4aca33053d74268fdf5aa54fcb2891ea6df"),
114        "public parameters for k=6",
115    ),
116    (
117        "bls_midnight_2p7",
118        hexhash(b"e82ae890c080188355f37feaffe91372584cd810615082d9143d4dec0453fd9d"),
119        "public parameters for k=7",
120    ),
121    (
122        "bls_midnight_2p8",
123        hexhash(b"909b707551eaaea79828e883cde6fc46ab15986c3b1d791bed462c9e2805c933"),
124        "public parameters for k=8",
125    ),
126    (
127        "bls_midnight_2p9",
128        hexhash(b"b9009f1098bcefffec3c461ab3a5e3a17f7e5599f0f08c70fcdc55a89227bcbd"),
129        "public parameters for k=9",
130    ),
131    (
132        "bls_midnight_2p10",
133        hexhash(b"46b2290933cbed4c378889e4ba971f1a92888331ffb09466acd4ff61a1e2cb42"),
134        "public parameters for k=10",
135    ),
136    (
137        "bls_midnight_2p11",
138        hexhash(b"9901589d7956ff58be0d85569b2f455b77b58c3758026ffb5bbe4807000b96d1"),
139        "public parameters for k=11",
140    ),
141    (
142        "bls_midnight_2p12",
143        hexhash(b"ef08eb3fcf62df8f72c515cffa027e681808b530cb016eea104115545ef6d5c8"),
144        "public parameters for k=12",
145    ),
146    (
147        "bls_midnight_2p13",
148        hexhash(b"d3324910969c4cc54143b8045b649e5c3a4bd5fb7b8f85fe1b770f640ce1c803"),
149        "public parameters for k=13",
150    ),
151    (
152        "bls_midnight_2p14",
153        hexhash(b"fc253016885ec830e97808c9ec920bb5cab5c21af590380a6cb5eb0538e2b244"),
154        "public parameters for k=14",
155    ),
156    (
157        "bls_midnight_2p15",
158        hexhash(b"724c7c3d779148bb113c7ee9c034b2f27db16e6bdf315fde90105a9bad00b1de"),
159        "public parameters for k=15",
160    ),
161    (
162        "bls_midnight_2p16",
163        hexhash(b"09c877216d6589b370263e18af40a030a901b41a7a7c37ef58c9901db41f05c6"),
164        "public parameters for k=16",
165    ),
166    (
167        "bls_midnight_2p17",
168        hexhash(b"4a9ef6c7c0619aab74eede44b13e753e3ba54508a02dd3b7106a949aabb73b74"),
169        "public parameters for k=17",
170    ),
171    (
172        "bls_midnight_2p18",
173        hexhash(b"e8436dc5d8b598f169c127c745135d889744007e6d384ff126df8d1332522f86"),
174        "public parameters for k=18",
175    ),
176    (
177        "bls_midnight_2p19",
178        hexhash(b"8e8dc15c4362f05c912f1e770559a3945db3e58a374def416ed5d3e65ad5b10e"),
179        "public parameters for k=19",
180    ),
181    (
182        "bls_midnight_2p20",
183        hexhash(b"1cc62978558fdc1e445cd70cfd9a86ec3c2e2151b6d74811232d37faf9133ff1"),
184        "public parameters for k=20",
185    ),
186    (
187        "bls_midnight_2p21",
188        hexhash(b"9cf1644a87f0f027ae5fc6278f91d823a6334ff3e338a29e2f2ef57d071ed64d"),
189        "public parameters for k=21",
190    ),
191    (
192        "bls_midnight_2p22",
193        hexhash(b"e8ad5eed936d657a0fb59d2a55ba19f81a3083bb3554ef88f464f5377e9b2c2f"),
194        "public parameters for k=22",
195    ),
196    (
197        "bls_midnight_2p23",
198        hexhash(b"09399d05f9f50875dfdd87dc9903d40c897eaafa9ec8cbb08bace853ecc36c0c"),
199        "public parameters for k=23",
200    ),
201    (
202        "bls_midnight_2p24",
203        hexhash(b"b0e6fa7a4ab4a79a1e6560966f267556409db44bab6d5fab3711ad6c6b623207"),
204        "public parameters for k=24",
205    ),
206    (
207        "bls_midnight_2p25",
208        hexhash(b"3289a751c938988cd2f54154d8722d1eda2cd11593064afdde82099b24ff4a58"),
209        "public parameters for k=25",
210    ),
211];
212
213impl MidnightDataProvider {
214    /// Creates a new data provider with the default base URL.
215    pub fn new(
216        fetch_mode: FetchMode,
217        output_mode: OutputMode,
218        expected_data: Vec<(&'static str, [u8; 32], &'static str)>,
219    ) -> io::Result<Self> {
220        Ok(Self {
221            fetch_mode,
222            base_url: BASE_URL.clone(),
223            output_mode,
224            expected_data,
225            dir: env::var_os("MIDNIGHT_PP")
226                .map(PathBuf::from)
227                .or_else(|| {
228                    env::var_os("XDG_CACHE_HOME")
229                        .map(|p| PathBuf::from(p).join("midnight").join("zk-params"))
230                })
231                .or_else(|| {
232                    env::var_os("HOME").map(|p| {
233                        PathBuf::from(p)
234                            .join(".cache")
235                            .join("midnight")
236                            .join("zk-params")
237                    })
238                })
239                .ok_or_else(|| {
240                    io::Error::new(
241                        io::ErrorKind::NotFound,
242                        "Could not determine $HOME, $XDG_CACHE_HOME, or $MIDNIGHT_PP",
243                    )
244                })?,
245        })
246    }
247
248    fn expected_hash(&self, name: &str) -> io::Result<[u8; 32]> {
249        Ok(EXPECTED_DATA
250            .iter()
251            .chain(self.expected_data.iter())
252            .find(|(n, ..)| *n == name)
253            .ok_or_else(|| {
254                io::Error::new(
255                    io::ErrorKind::InvalidInput,
256                    format!(
257                        "artifact '{name}' is not a known managed artifact by the proof data cache."
258                    ),
259                )
260            })?
261            .1)
262    }
263
264    fn description(&self, name: &str) -> io::Result<&'static str> {
265        Ok(EXPECTED_DATA
266            .iter()
267            .chain(self.expected_data.iter())
268            .find(|(n, ..)| *n == name)
269            .ok_or_else(|| {
270                io::Error::new(
271                    io::ErrorKind::InvalidInput,
272                    format!(
273                        "artifact '{name}' is not a known managed artifact by the proof data cache."
274                    ),
275                )
276            })?
277            .2)
278    }
279
280    fn get_local(&self, name: &str) -> io::Result<Option<BufReader<File>>> {
281        let path = self.dir.join(name);
282        let expected_hash = self.expected_hash(name)?;
283        if !std::fs::exists(&path)? {
284            return Ok(None);
285        }
286        let mut file = BufReader::new(File::open(&path)?);
287        let mut hasher = Sha256::new();
288        let mut buf = [0u8; 1 << 20];
289        loop {
290            let read = file.read(&mut buf)?;
291            if read == 0 {
292                break;
293            }
294            hasher.update(&buf[..read]);
295        }
296        let actual_hash = <[u8; 32]>::from(hasher.finalize());
297        if actual_hash != expected_hash {
298            return Err(io::Error::new(
299                io::ErrorKind::InvalidData,
300                format!(
301                    "Hash mismatch in data stored at {}. Found hash {}, but expected {}. Please try removing this file to force a re-fetch. If that does not work, you may be subject to an attack.",
302                    path.display(),
303                    const_hex::encode(actual_hash),
304                    const_hex::encode(expected_hash)
305                ),
306            ));
307        }
308        file.seek(io::SeekFrom::Start(0))?;
309        Ok(Some(file))
310    }
311
312    async fn get_or_fetch(&self, name: &str) -> io::Result<BufReader<File>> {
313        if let Some(data) = self.get_local(name)? {
314            return Ok(data);
315        };
316        let expected_hash = self.expected_hash(name)?;
317        let path = self.dir.join(name);
318        let parent = path.parent().ok_or_else(|| {
319            io::Error::new(
320                io::ErrorKind::NotFound,
321                format!("parent of path file {name} should exist."),
322            )
323        })?;
324        std::fs::create_dir_all(parent)?;
325        let mut file = atomic_write_file::OpenOptions::new()
326            .read(true)
327            .open(&path)?;
328        self.fetch_data_to(name, expected_hash, &mut file).await?;
329        let mut rfile = file.as_file().try_clone()?;
330        file.commit()?;
331        rfile.seek(io::SeekFrom::Start(0))?;
332        Ok(BufReader::new(rfile))
333    }
334
335    /// Fetches a given item.
336    pub async fn fetch(&self, name: &str) -> io::Result<()> {
337        self.get_or_fetch(name).await?;
338        Ok(())
339    }
340
341    /// The name of the public parameters for the given `k` value.
342    pub fn name_k(k: u8) -> String {
343        format!("bls_midnight_2p{k}")
344    }
345
346    /// Fetches the public parameters for a give `k`.
347    pub async fn fetch_k(&self, k: u8) -> io::Result<()> {
348        self.fetch(&Self::name_k(k)).await
349    }
350
351    // Only arise due to feature gates.
352    #[allow(irrefutable_let_patterns)]
353    async fn fetch_data_to(
354        &self,
355        name: &str,
356        expected_hash: [u8; 32],
357        f: &mut File,
358    ) -> io::Result<()> {
359        const RETRIES: usize = 3;
360        let desc = self.description(name)?;
361        if let OutputMode::Log = &self.output_mode {
362            info!(
363                "Missing {desc}. Attempting to download from the host {} - this is not a trusted service, the data will be verified.",
364                self.base_url
365            );
366        }
367        #[cfg(feature = "cli")]
368        if let OutputMode::Cli(pb) = &self.output_mode {
369            pb.println(format!("Missing {desc}. Attempting to download from the host {} - this is not a trusted service, the data will be verified.", self.base_url))?;
370        }
371        let mut url = self.base_url.clone();
372        url.path_segments_mut()
373            .map_err(|()| {
374                io::Error::new(
375                    io::ErrorKind::InvalidInput,
376                    format!(
377                        "Base URL '{}' for proving data provider invalid",
378                        &self.base_url
379                    ),
380                )
381            })?
382            .push(name);
383        for i in 0..RETRIES {
384            let retry_msg = if i == RETRIES - 1 {
385                "Giving up."
386            } else {
387                "Retrying..."
388            };
389            f.seek(io::SeekFrom::Start(0))?;
390            f.set_len(0)?;
391            let mut hasher = Sha256::new();
392            let res = match reqwest::Client::new().get(url.clone()).send().await {
393                Ok(res) => res,
394                Err(e) => {
395                    #[cfg(feature = "cli")]
396                    if let OutputMode::Cli(pb) = &self.output_mode {
397                        pb.println(format!("{e}. {retry_msg}"))?;
398                    }
399                    warn!("{e}. {retry_msg}");
400                    continue;
401                }
402            };
403            let total_size = res.content_length();
404            #[cfg(feature = "cli")]
405            let pb = if let OutputMode::Cli(multi) = &self.output_mode {
406                let pb = match total_size {
407                    Some(size) => ProgressBar::new(size).with_style(
408                        ProgressStyle::with_template(
409                            "{msg} [{bar:.green.bold}] {bytes:.bold} / {total_bytes:.bold}",
410                        )
411                        .expect("Static style should parse")
412                        .progress_chars("=> "),
413                    ),
414                    None => ProgressBar::no_length().with_style(
415                        ProgressStyle::with_template("{msg} {spinner:.green.bold} {bytes:.bold}")
416                            .expect("Static style should parse"),
417                    ),
418                };
419                let pb = multi.insert(0, pb);
420                pb.set_message(format!("Fetching {desc}"));
421                Some(pb)
422            } else {
423                None
424            };
425            let mut downloaded: u64 = 0;
426            let mut t_last = Instant::now();
427            const LOG_UPDATE_FREQ: Duration = Duration::from_secs(5);
428            let mut stream = res.bytes_stream();
429
430            while let Some(resp) = stream.next().await {
431                let data = match resp {
432                    Ok(res) => res,
433                    Err(e) => {
434                        #[cfg(feature = "cli")]
435                        if let OutputMode::Cli(pb) = &self.output_mode {
436                            pb.println(format!("{e}. {retry_msg}"))?;
437                        }
438                        warn!("{e}. {retry_msg}");
439                        continue;
440                    }
441                };
442                f.write_all(&data)?;
443                hasher.update(&data);
444                downloaded += data.len() as u64;
445                #[cfg(feature = "cli")]
446                if let Some(pb) = &pb {
447                    pb.set_position(downloaded);
448                }
449                let t = Instant::now();
450                if matches!(self.output_mode, OutputMode::Log) && t - t_last > LOG_UPDATE_FREQ {
451                    t_last = t;
452                    match total_size {
453                        Some(size) => {
454                            info!("Fetching '{name}' - {downloaded} / {size} bytes downloaded")
455                        }
456                        None => info!("Fetching '{name}' - {downloaded} bytes downloaded"),
457                    }
458                }
459            }
460            info!("Fetching {desc} - finished.");
461            #[cfg(feature = "cli")]
462            if let Some(pb) = pb {
463                pb.finish();
464            }
465            let hash = <[u8; 32]>::from(hasher.finalize());
466            if hash == expected_hash {
467                if let OutputMode::Log = self.output_mode {
468                    info!("Fetching {desc} - verified correct.");
469                }
470                return Ok(());
471            }
472            warn!(
473                ?hash,
474                ?expected_hash,
475                "Fetching {desc} - hash mismatch. {retry_msg}"
476            );
477        }
478        Err(io::Error::new(
479            io::ErrorKind::InvalidData,
480            format!("Failed to fetch data from {url} after {RETRIES} attempts. Giving up."),
481        ))
482    }
483
484    /// Retrieves a file from the data provider according to the fetch mode, giving a specific
485    /// error message on failure.
486    pub async fn get_file(&self, name: &str, desc: &str) -> io::Result<BufReader<File>> {
487        Ok(match self.fetch_mode {
488            FetchMode::OnDemand => self.get_or_fetch(name).await?,
489            FetchMode::Synchronous => self
490                .get_local(name)?
491                .ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, desc))?,
492        })
493    }
494}
495
496/// How to behave when fetching data
497#[derive(Debug, Copy, Clone)]
498pub enum FetchMode {
499    /// Fetch on demand, whenever it gets accessed
500    OnDemand,
501    /// Fetch data only when explicitly requested
502    Synchronous,
503}
504
505#[derive(Debug, Clone)]
506/// How to output updates to the user
507pub enum OutputMode {
508    #[cfg(feature = "cli")]
509    /// Assume an interactive CLI
510    Cli(MultiProgress),
511    /// Assume logging output only
512    Log,
513}