Skip to main content

kontor_crypto_core/
prepare.rs

1//! prepare_file and reconstruct_file using core primitives.
2
3use crate::config;
4use crate::erasure::{decode_file_symbols, encode_file_symbols};
5use crate::error::{CoreError, Result};
6use crate::merkle;
7use crate::types::{FileMetadata, PreparedFile};
8use sha2::{Digest, Sha256};
9
10/// Content-addressed object identifier: `obj_<SHA256(data)>`.
11pub fn compute_object_id(data: &[u8]) -> String {
12    let mut h = Sha256::new();
13    h.update(data);
14    format!("obj_{:x}", h.finalize())
15}
16
17/// Unique file identifier: `file_<SHA256(domain || len(data) || data || len(nonce) || nonce)>`.
18pub fn compute_file_id(data: &[u8], nonce: &[u8]) -> String {
19    let mut h = Sha256::new();
20    h.update(b"kontor.file_id.v1");
21    h.update((data.len() as u64).to_le_bytes());
22    h.update(data);
23    h.update((nonce.len() as u64).to_le_bytes());
24    h.update(nonce);
25    format!("file_{:x}", h.finalize())
26}
27
28/// Intermediate result after input validation, erasure encoding, and
29/// power-of-two padding — the shared pipeline used by both `prepare_file`
30/// and the WASM `prepareLeaves` entry point.
31pub struct EncodedFile {
32    pub object_id: String,
33    pub file_id: String,
34    pub padded_symbols: Vec<Vec<u8>>,
35    pub padded_len: usize,
36    pub original_size: usize,
37}
38
39/// Validates inputs, computes content/file IDs, erasure-encodes data,
40/// and pads to a power-of-two number of symbols.
41pub fn validate_and_encode(data: &[u8], filename: &str, nonce: &[u8]) -> Result<EncodedFile> {
42    if data.is_empty() {
43        return Err(CoreError::EmptyData {
44            operation: "prepare_file".to_string(),
45        });
46    }
47    if data.len() > config::MAX_FILE_SIZE_BYTES {
48        return Err(CoreError::InvalidInput(format!(
49            "prepare_file input size {} exceeds maximum {}",
50            data.len(),
51            config::MAX_FILE_SIZE_BYTES
52        )));
53    }
54    if filename.is_empty() {
55        return Err(CoreError::InvalidInput(
56            "prepare_file filename must be non-empty".to_string(),
57        ));
58    }
59    if filename.len() > config::MAX_FILENAME_LEN_BYTES {
60        return Err(CoreError::InvalidInput(format!(
61            "prepare_file filename length {} exceeds maximum {}",
62            filename.len(),
63            config::MAX_FILENAME_LEN_BYTES
64        )));
65    }
66    if nonce.len() > config::MAX_NONCE_LEN_BYTES {
67        return Err(CoreError::InvalidInput(format!(
68            "prepare_file nonce length {} exceeds maximum {}",
69            nonce.len(),
70            config::MAX_NONCE_LEN_BYTES
71        )));
72    }
73
74    let object_id = compute_object_id(data);
75    let file_id = compute_file_id(data, nonce);
76
77    let all_symbols = encode_file_symbols(data)?;
78    let padded_len = all_symbols.len().next_power_of_two();
79    let mut padded_symbols = all_symbols;
80    padded_symbols.resize(padded_len, vec![0; config::CHUNK_SIZE_BYTES]);
81
82    Ok(EncodedFile {
83        object_id,
84        file_id,
85        padded_symbols,
86        padded_len,
87        original_size: data.len(),
88    })
89}
90
91/// Prepares raw data into PreparedFile and FileMetadata.
92/// Algorithm: object_id = SHA256(data), file_id = SHA256(domain || len(data) || data || len(nonce) || nonce),
93/// encode_file_symbols, pad to power of two, build_tree, build metadata and prepared file.
94pub fn prepare_file(
95    data: &[u8],
96    filename: &str,
97    nonce: &[u8],
98) -> Result<(PreparedFile, FileMetadata)> {
99    let encoded = validate_and_encode(data, filename, nonce)?;
100
101    let (tree, root) = merkle::build_tree(&encoded.padded_symbols)?;
102
103    let metadata = FileMetadata {
104        root,
105        object_id: encoded.object_id,
106        file_id: encoded.file_id.clone(),
107        nonce: nonce.to_vec(),
108        padded_len: encoded.padded_len,
109        original_size: encoded.original_size,
110        filename: filename.to_string(),
111    };
112    metadata.validate()?;
113
114    let prepared_file = PreparedFile {
115        tree,
116        file_id: encoded.file_id,
117        root,
118    };
119
120    Ok((prepared_file, metadata))
121}
122
123/// Reconstructs the original file from erasure-coded symbols and metadata.
124pub fn reconstruct_file(symbols: &[Option<Vec<u8>>], metadata: &FileMetadata) -> Result<Vec<u8>> {
125    metadata.validate()?;
126    let mut mutable = symbols.to_vec();
127    decode_file_symbols(
128        &mut mutable,
129        metadata.num_codewords(),
130        metadata.original_size,
131    )
132}