Skip to main content

ant_core/data/client/
data.rs

1//! In-memory data operations using self-encryption.
2//!
3//! Upload and download raw byte data. Content is encrypted via
4//! convergent encryption and stored as content-addressed chunks.
5//! Use this when you already have data in memory (e.g., `Bytes`).
6//! For file-based streaming uploads that avoid loading the entire
7//! file into memory, see the `file` module.
8
9use crate::data::client::adaptive::{observe_op, rebucketed_ordered};
10use crate::data::client::batch::{PaymentIntent, PreparedChunk};
11use crate::data::client::classify_error;
12use crate::data::client::file::{ExternalPaymentInfo, PreparedUpload};
13use crate::data::client::merkle::PaymentMode;
14use crate::data::client::Client;
15use crate::data::error::{Error, Result};
16use ant_protocol::{compute_address, DATA_TYPE_CHUNK};
17use bytes::Bytes;
18use futures::stream::StreamExt;
19use self_encryption::{decrypt, encrypt, DataMap, EncryptedChunk};
20use tracing::{debug, info};
21
22/// Result of an in-memory data upload: the `DataMap` needed to retrieve the data.
23#[derive(Debug, Clone)]
24pub struct DataUploadResult {
25    /// The data map containing chunk metadata for reconstruction.
26    pub data_map: DataMap,
27    /// Number of chunks stored on the network.
28    pub chunks_stored: usize,
29    /// Which payment mode was actually used (not just requested).
30    pub payment_mode_used: PaymentMode,
31}
32
33impl Client {
34    /// Upload in-memory data to the network using self-encryption.
35    ///
36    /// The content is encrypted and split into chunks, each stored
37    /// as a content-addressed chunk on the network. Returns a `DataMap`
38    /// that can be used to retrieve and decrypt the data.
39    ///
40    /// # Errors
41    ///
42    /// Returns an error if encryption fails or any chunk cannot be stored.
43    pub async fn data_upload(&self, content: Bytes) -> Result<DataUploadResult> {
44        let content_len = content.len();
45        debug!("Encrypting data ({content_len} bytes)");
46
47        let (data_map, encrypted_chunks) = encrypt(content)
48            .map_err(|e| Error::Encryption(format!("Failed to encrypt data: {e}")))?;
49
50        info!("Data encrypted into {} chunks", encrypted_chunks.len());
51
52        let chunk_contents: Vec<Bytes> = encrypted_chunks
53            .into_iter()
54            .map(|chunk| chunk.content)
55            .collect();
56
57        let (addresses, _storage_cost, _gas_cost) =
58            self.batch_upload_chunks(chunk_contents).await?;
59        let chunks_stored = addresses.len();
60
61        info!("Data uploaded: {chunks_stored} chunks stored ({content_len} bytes original)");
62
63        Ok(DataUploadResult {
64            data_map,
65            chunks_stored,
66            payment_mode_used: PaymentMode::Single,
67        })
68    }
69
70    /// Upload in-memory data with a specific payment mode.
71    ///
72    /// When `mode` is `Auto` and the chunk count >= threshold, or when `mode`
73    /// is `Merkle`, this buffers all chunks and pays via a single merkle
74    /// batch transaction. Otherwise falls back to per-chunk payment.
75    ///
76    /// # Errors
77    ///
78    /// Returns an error if encryption fails or any chunk cannot be stored.
79    pub async fn data_upload_with_mode(
80        &self,
81        content: Bytes,
82        mode: PaymentMode,
83    ) -> Result<DataUploadResult> {
84        let content_len = content.len();
85        debug!("Encrypting data ({content_len} bytes) with mode {mode:?}");
86
87        let (data_map, encrypted_chunks) = encrypt(content)
88            .map_err(|e| Error::Encryption(format!("Failed to encrypt data: {e}")))?;
89
90        let chunk_count = encrypted_chunks.len();
91        info!("Data encrypted into {chunk_count} chunks");
92
93        let chunk_contents: Vec<Bytes> = encrypted_chunks
94            .into_iter()
95            .map(|chunk| chunk.content)
96            .collect();
97
98        if self.should_use_merkle(chunk_count, mode) {
99            // Merkle batch payment path
100            info!("Using merkle batch payment for {chunk_count} chunks");
101
102            let addresses: Vec<[u8; 32]> =
103                chunk_contents.iter().map(|c| compute_address(c)).collect();
104
105            // Compute average chunk size for quoting
106            let avg_size =
107                chunk_contents.iter().map(bytes::Bytes::len).sum::<usize>() / chunk_count.max(1);
108            let avg_size_u64 = u64::try_from(avg_size).unwrap_or(0);
109
110            // Try merkle batch; in Auto mode, fall back to per-chunk on network issues
111            let batch_result = match self
112                .pay_for_merkle_batch(&addresses, DATA_TYPE_CHUNK, avg_size_u64)
113                .await
114            {
115                Ok(result) => result,
116                Err(Error::InsufficientPeers(ref msg)) if mode == PaymentMode::Auto => {
117                    info!("Merkle needs more peers ({msg}), falling back to wave-batch");
118                    let (addresses, _sc, _gc) = self.batch_upload_chunks(chunk_contents).await?;
119                    return Ok(DataUploadResult {
120                        data_map,
121                        chunks_stored: addresses.len(),
122                        payment_mode_used: PaymentMode::Single,
123                    });
124                }
125                Err(e) => return Err(e),
126            };
127
128            let chunks_stored = self
129                .merkle_upload_chunks(chunk_contents, addresses, &batch_result, None)
130                .await?;
131
132            info!("Data uploaded via merkle: {chunks_stored} chunks stored ({content_len} bytes)");
133            Ok(DataUploadResult {
134                data_map,
135                chunks_stored,
136                payment_mode_used: PaymentMode::Merkle,
137            })
138        } else {
139            // Wave-based batch payment path (single EVM tx per wave).
140            let (addresses, _sc, _gc) = self.batch_upload_chunks(chunk_contents).await?;
141
142            info!(
143                "Data uploaded: {} chunks stored ({content_len} bytes original)",
144                addresses.len()
145            );
146            Ok(DataUploadResult {
147                data_map,
148                chunks_stored: addresses.len(),
149                payment_mode_used: PaymentMode::Single,
150            })
151        }
152    }
153
154    /// Phase 1 of external-signer data upload: encrypt and collect quotes.
155    ///
156    /// Encrypts in-memory data via self-encryption, then collects storage
157    /// quotes for each chunk without making any on-chain payment. Returns
158    /// a [`PreparedUpload`] containing the data map and a [`PaymentIntent`]
159    /// with the payment details for external signing.
160    ///
161    /// After the caller signs and submits the payment transaction, call
162    /// [`Client::finalize_upload`] with the tx hashes to complete storage.
163    ///
164    /// # Errors
165    ///
166    /// Returns an error if encryption fails or quote collection fails.
167    pub async fn data_prepare_upload(&self, content: Bytes) -> Result<PreparedUpload> {
168        let content_len = content.len();
169        debug!("Preparing data upload for external signing ({content_len} bytes)");
170
171        let (data_map, encrypted_chunks) = encrypt(content)
172            .map_err(|e| Error::Encryption(format!("Failed to encrypt data: {e}")))?;
173
174        let chunk_count = encrypted_chunks.len();
175        info!("Data encrypted into {chunk_count} chunks");
176
177        let chunk_contents: Vec<Bytes> = encrypted_chunks
178            .into_iter()
179            .map(|chunk| chunk.content)
180            .collect();
181
182        let quote_limiter = self.controller().quote.clone();
183        let quote_concurrency = quote_limiter.current().min(chunk_count.max(1));
184        let results: Vec<Result<Option<PreparedChunk>>> = futures::stream::iter(chunk_contents)
185            .map(|content| {
186                let limiter = quote_limiter.clone();
187                async move {
188                    observe_op(
189                        &limiter,
190                        || async move { self.prepare_chunk_payment(content).await },
191                        classify_error,
192                    )
193                    .await
194                }
195            })
196            .buffer_unordered(quote_concurrency)
197            .collect()
198            .await;
199
200        let mut prepared_chunks = Vec::with_capacity(results.len());
201        for result in results {
202            if let Some(prepared) = result? {
203                prepared_chunks.push(prepared);
204            }
205        }
206
207        let payment_intent = PaymentIntent::from_prepared_chunks(&prepared_chunks);
208
209        info!(
210            "Data prepared for external signing: {} chunks, total {} atto ({content_len} bytes)",
211            prepared_chunks.len(),
212            payment_intent.total_amount,
213        );
214
215        Ok(PreparedUpload {
216            data_map,
217            payment_info: ExternalPaymentInfo::WaveBatch {
218                prepared_chunks,
219                payment_intent,
220            },
221            data_map_address: None,
222        })
223    }
224
225    /// Store a `DataMap` on the network as a public chunk.
226    ///
227    /// The serialized `DataMap` is stored as a regular content-addressed chunk.
228    /// Anyone who knows the returned address can retrieve and use the `DataMap`
229    /// to download the original data.
230    ///
231    /// # Errors
232    ///
233    /// Returns an error if serialization or the chunk store fails.
234    pub async fn data_map_store(&self, data_map: &DataMap) -> Result<[u8; 32]> {
235        let serialized = rmp_serde::to_vec(data_map)
236            .map_err(|e| Error::Serialization(format!("Failed to serialize DataMap: {e}")))?;
237
238        info!(
239            "Storing DataMap as public chunk ({} bytes serialized)",
240            serialized.len()
241        );
242
243        self.chunk_put(Bytes::from(serialized)).await
244    }
245
246    /// Fetch a `DataMap` from the network by its chunk address.
247    ///
248    /// Retrieves the chunk at `address` and deserializes it as a `DataMap`.
249    ///
250    /// # Errors
251    ///
252    /// Returns an error if the chunk is not found or deserialization fails.
253    pub async fn data_map_fetch(&self, address: &[u8; 32]) -> Result<DataMap> {
254        let chunk = self.chunk_get(address).await?.ok_or_else(|| {
255            Error::InvalidData(format!(
256                "DataMap chunk not found at {}",
257                hex::encode(address)
258            ))
259        })?;
260
261        rmp_serde::from_slice(&chunk.content)
262            .map_err(|e| Error::Serialization(format!("Failed to deserialize DataMap: {e}")))
263    }
264
265    /// Download and decrypt data from the network using its `DataMap`.
266    ///
267    /// Retrieves all chunks referenced by the data map, then decrypts
268    /// and reassembles the original content. Fetches chunks concurrently;
269    /// the fan-out is sized by the adaptive controller's `fetch` channel
270    /// and ramps up under healthy conditions.
271    ///
272    /// # Errors
273    ///
274    /// Returns an error if any chunk cannot be retrieved or decryption fails.
275    pub async fn data_download(&self, data_map: &DataMap) -> Result<Bytes> {
276        let chunk_infos = data_map.infos();
277        debug!("Downloading data ({} chunks)", chunk_infos.len());
278
279        // Extract owned addresses to avoid HRTB lifetime issue with
280        // stream::iter over references combined with async closures.
281        let addresses: Vec<[u8; 32]> = chunk_infos.iter().map(|info| info.dst_hash.0).collect();
282
283        // Build ONE peer pool from a single DHT lookup, then reuse it
284        // for every chunk's GET. On networks where find_closest_peers
285        // dominates per-chunk wall-clock, this turns a download from
286        // O(N * lookup_time) into O(lookup_time + N * RPC_time).
287        let pool = self.build_peer_pool_for(&addresses).await?;
288        debug!(
289            "Built peer pool of {} for {} chunks",
290            pool.len(),
291            addresses.len()
292        );
293        let pool_ref = &pool;
294
295        // Rolling rebucketing: re-reads the controller's fetch cap as
296        // each slot frees, so a long download (e.g. 10 GB = ~2500
297        // chunks) sees adaptive growth/decay mid-flight without batch
298        // fences. Output is index-sorted so self_encryption decrypt
299        // sees DataMap-ordered chunks.
300        let fetch_limiter = self.controller().fetch.clone();
301        let encrypted_chunks: Vec<EncryptedChunk> = rebucketed_ordered(
302            &fetch_limiter,
303            addresses.into_iter().enumerate(),
304            |(idx, address)| {
305                let limiter = fetch_limiter.clone();
306                async move {
307                    let chunk = observe_op(
308                        &limiter,
309                        || async move { self.chunk_get_with_pool(&address, pool_ref).await },
310                        classify_error,
311                    )
312                    .await?
313                    .ok_or_else(|| {
314                        Error::InvalidData(format!(
315                            "Missing chunk {} required for data reconstruction",
316                            hex::encode(address)
317                        ))
318                    })?;
319                    Ok::<_, Error>((
320                        idx,
321                        EncryptedChunk {
322                            content: chunk.content,
323                        },
324                    ))
325                }
326            },
327        )
328        .await?;
329
330        debug!(
331            "All {} chunks retrieved, decrypting",
332            encrypted_chunks.len()
333        );
334
335        let content = decrypt(data_map, &encrypted_chunks)
336            .map_err(|e| Error::Encryption(format!("Failed to decrypt data: {e}")))?;
337
338        info!("Data downloaded and decrypted ({} bytes)", content.len());
339
340        Ok(content)
341    }
342}
343
344/// Compile-time assertions that Client method futures are Send.
345///
346/// These methods are called from axum handlers and tokio::spawn contexts
347/// that require Send + 'static. The async closures inside stream
348/// combinators must not capture references with concrete lifetimes
349/// (HRTB issue). If any of these checks fail, the stream closures
350/// need restructuring to use owned values instead of references.
351#[cfg(test)]
352mod send_assertions {
353    use super::*;
354
355    fn _assert_send<T: Send>(_: &T) {}
356
357    #[allow(
358        dead_code,
359        unreachable_code,
360        unused_variables,
361        clippy::diverging_sub_expression
362    )]
363    async fn _data_download_is_send(client: &Client) {
364        let dm: DataMap = todo!();
365        let fut = client.data_download(&dm);
366        _assert_send(&fut);
367    }
368
369    #[allow(dead_code, unreachable_code, clippy::diverging_sub_expression)]
370    async fn _data_upload_is_send(client: &Client) {
371        let fut = client.data_upload(Bytes::new());
372        _assert_send(&fut);
373    }
374
375    #[allow(dead_code, unreachable_code, clippy::diverging_sub_expression)]
376    async fn _data_upload_with_mode_is_send(client: &Client) {
377        let fut = client.data_upload_with_mode(Bytes::new(), PaymentMode::Auto);
378        _assert_send(&fut);
379    }
380
381    #[allow(dead_code, unreachable_code, clippy::diverging_sub_expression)]
382    async fn _data_prepare_upload_is_send(client: &Client) {
383        let fut = client.data_prepare_upload(Bytes::new());
384        _assert_send(&fut);
385    }
386}