Skip to main content

sci_form/transport/
chunked.rs

1//! Chunked streaming for large datasets to avoid JSON bottlenecks.
2//!
3//! Instead of serializing entire grids or coordinate arrays as one JSON blob,
4//! this module provides chunk-based iteration that can be streamed over
5//! WebSocket, Server-Sent Events, or consumed incrementally.
6
7use serde::{Deserialize, Serialize};
8
9/// A chunk of data from a streamed computation.
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct DataChunk {
12    /// Chunk index (0-based).
13    pub index: usize,
14    /// Total number of chunks (if known).
15    pub total: Option<usize>,
16    /// Data type identifier.
17    pub kind: ChunkKind,
18    /// Float values in this chunk.
19    pub values: Vec<f64>,
20    /// Number of logical items in this chunk.
21    pub count: usize,
22}
23
24/// Type of data in a chunk.
25#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
26pub enum ChunkKind {
27    /// 3D coordinates (flat xyz triples).
28    Coordinates,
29    /// ESP grid values.
30    EspValues,
31    /// DOS curve data.
32    DosValues,
33    /// Generic numeric data.
34    Generic,
35}
36
37/// Iterator that produces chunks from a large float buffer.
38pub struct ChunkedIterator {
39    data: Vec<f64>,
40    chunk_size: usize,
41    position: usize,
42    kind: ChunkKind,
43    total_chunks: usize,
44    current_index: usize,
45}
46
47impl ChunkedIterator {
48    /// Create a new chunked iterator over data.
49    ///
50    /// `chunk_size`: number of f64 values per chunk.
51    pub fn new(data: Vec<f64>, chunk_size: usize, kind: ChunkKind) -> Self {
52        let total = (data.len() + chunk_size - 1) / chunk_size.max(1);
53        Self {
54            data,
55            chunk_size: chunk_size.max(1),
56            position: 0,
57            kind,
58            total_chunks: total,
59            current_index: 0,
60        }
61    }
62
63    /// Total number of chunks.
64    pub fn total_chunks(&self) -> usize {
65        self.total_chunks
66    }
67
68    /// Whether all data has been consumed.
69    pub fn is_done(&self) -> bool {
70        self.position >= self.data.len()
71    }
72}
73
74impl Iterator for ChunkedIterator {
75    type Item = DataChunk;
76
77    fn next(&mut self) -> Option<DataChunk> {
78        if self.position >= self.data.len() {
79            return None;
80        }
81
82        let end = (self.position + self.chunk_size).min(self.data.len());
83        let values = self.data[self.position..end].to_vec();
84        let count = values.len();
85        let chunk = DataChunk {
86            index: self.current_index,
87            total: Some(self.total_chunks),
88            kind: self.kind,
89            values,
90            count,
91        };
92
93        self.position = end;
94        self.current_index += 1;
95        Some(chunk)
96    }
97}
98
99/// Split an ESP grid into chunks for streaming.
100pub fn chunk_esp_grid(grid: &crate::esp::EspGrid, chunk_size: usize) -> ChunkedIterator {
101    ChunkedIterator::new(grid.values.clone(), chunk_size, ChunkKind::EspValues)
102}
103
104/// Split coordinate data into chunks (aligned to xyz triples).
105pub fn chunk_coordinates(coords: &[f64], atoms_per_chunk: usize) -> ChunkedIterator {
106    ChunkedIterator::new(coords.to_vec(), atoms_per_chunk * 3, ChunkKind::Coordinates)
107}
108
109/// Split DOS data into chunks.
110pub fn chunk_dos(dos: &crate::dos::DosResult, points_per_chunk: usize) -> Vec<DataChunk> {
111    let mut chunks = Vec::new();
112    let n = dos.energies.len();
113    let total = (n + points_per_chunk - 1) / points_per_chunk.max(1);
114
115    for i in 0..total {
116        let start = i * points_per_chunk;
117        let end = (start + points_per_chunk).min(n);
118
119        // Interleave energy and DOS values: [e0, d0, e1, d1, ...]
120        let mut values = Vec::with_capacity((end - start) * 2);
121        for j in start..end {
122            values.push(dos.energies[j]);
123            values.push(dos.total_dos[j]);
124        }
125
126        chunks.push(DataChunk {
127            index: i,
128            total: Some(total),
129            kind: ChunkKind::DosValues,
130            values,
131            count: end - start,
132        });
133    }
134    chunks
135}
136
137#[cfg(test)]
138mod tests {
139    use super::*;
140
141    #[test]
142    fn test_chunked_iterator_basic() {
143        let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
144        let mut iter = ChunkedIterator::new(data, 2, ChunkKind::Generic);
145
146        let c1 = iter.next().unwrap();
147        assert_eq!(c1.index, 0);
148        assert_eq!(c1.values, vec![1.0, 2.0]);
149        assert_eq!(c1.total, Some(3));
150
151        let c2 = iter.next().unwrap();
152        assert_eq!(c2.index, 1);
153        assert_eq!(c2.values, vec![3.0, 4.0]);
154
155        let c3 = iter.next().unwrap();
156        assert_eq!(c3.index, 2);
157        assert_eq!(c3.values, vec![5.0]);
158
159        assert!(iter.next().is_none());
160        assert!(iter.is_done());
161    }
162
163    #[test]
164    fn test_chunked_iterator_exact_division() {
165        let data = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
166        let iter = ChunkedIterator::new(data, 3, ChunkKind::Generic);
167        let chunks: Vec<_> = iter.collect();
168        assert_eq!(chunks.len(), 2);
169        assert_eq!(chunks[0].count, 3);
170        assert_eq!(chunks[1].count, 3);
171    }
172
173    #[test]
174    fn test_chunk_coordinates_alignment() {
175        // 4 atoms = 12 coords; chunk by 2 atoms = 6 values
176        let coords = vec![0.0; 12];
177        let iter = chunk_coordinates(&coords, 2);
178        let chunks: Vec<_> = iter.collect();
179        assert_eq!(chunks.len(), 2);
180        assert_eq!(chunks[0].count, 6);
181        assert_eq!(chunks[0].kind, ChunkKind::Coordinates);
182    }
183
184    #[test]
185    fn test_chunk_esp() {
186        let grid = crate::esp::EspGrid {
187            origin: [0.0; 3],
188            spacing: 0.5,
189            dims: [3, 3, 3],
190            values: vec![0.1; 27],
191        };
192        let iter = chunk_esp_grid(&grid, 10);
193        let chunks: Vec<_> = iter.collect();
194        assert_eq!(chunks.len(), 3); // 27 / 10 = 3 chunks
195        assert_eq!(chunks[0].count, 10);
196        assert_eq!(chunks[1].count, 10);
197        assert_eq!(chunks[2].count, 7);
198    }
199
200    #[test]
201    fn test_chunk_dos() {
202        let dos = crate::dos::DosResult {
203            energies: vec![1.0, 2.0, 3.0, 4.0, 5.0],
204            total_dos: vec![0.1, 0.2, 0.3, 0.4, 0.5],
205            pdos: vec![],
206            sigma: 0.3,
207        };
208        let chunks = chunk_dos(&dos, 2);
209        assert_eq!(chunks.len(), 3); // 5 / 2 = 3
210                                     // First chunk: [e0, d0, e1, d1]
211        assert_eq!(chunks[0].values, vec![1.0, 0.1, 2.0, 0.2]);
212        assert_eq!(chunks[0].count, 2);
213    }
214
215    #[test]
216    fn test_total_chunks() {
217        let iter = ChunkedIterator::new(vec![0.0; 100], 30, ChunkKind::Generic);
218        assert_eq!(iter.total_chunks(), 4);
219    }
220}