nodedb_codec/
spherical.rs1use crate::error::CodecError;
23
24pub fn encode(data: &[f32], dims: usize, vectors: usize) -> Result<Vec<u8>, CodecError> {
29 if data.is_empty() || dims == 0 {
30 return Ok(build_header(dims as u32, 0, 0, &[]));
31 }
32
33 if data.len() != vectors * dims {
34 return Err(CodecError::Corrupt {
35 detail: format!(
36 "expected {} elements ({vectors} vectors × {dims} dims), got {}",
37 vectors * dims,
38 data.len()
39 ),
40 });
41 }
42
43 let mut transformed = Vec::with_capacity(data.len());
45 for v in 0..vectors {
46 let start = v * dims;
47 let vec_data = &data[start..start + dims];
48 let spherical = cartesian_to_spherical(vec_data);
49 transformed.extend_from_slice(&spherical);
50 }
51
52 let raw_bytes: Vec<u8> = transformed.iter().flat_map(|f| f.to_le_bytes()).collect();
54 let compressed = crate::lz4::encode(&raw_bytes);
55
56 Ok(build_header(
57 dims as u32,
58 vectors as u32,
59 1, &compressed,
61 ))
62}
63
64pub fn encode_raw(data: &[f32], dims: usize, vectors: usize) -> Result<Vec<u8>, CodecError> {
69 let raw_bytes: Vec<u8> = data.iter().flat_map(|f| f.to_le_bytes()).collect();
70 let compressed = crate::lz4::encode(&raw_bytes);
71
72 Ok(build_header(dims as u32, vectors as u32, 0, &compressed))
73}
74
75pub fn decode(data: &[u8]) -> Result<(Vec<f32>, usize, usize), CodecError> {
77 if data.len() < 9 {
78 return Err(CodecError::Truncated {
79 expected: 9,
80 actual: data.len(),
81 });
82 }
83
84 let dims = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize;
85 let vectors = u32::from_le_bytes([data[4], data[5], data[6], data[7]]) as usize;
86 let transform = data[8];
87
88 if vectors == 0 || dims == 0 {
89 return Ok((Vec::new(), dims, 0));
90 }
91
92 let compressed = &data[9..];
93 let raw_bytes = crate::lz4::decode(compressed).map_err(|e| CodecError::DecompressFailed {
94 detail: format!("spherical lz4: {e}"),
95 })?;
96
97 let expected_bytes = vectors * dims * 4;
98 if raw_bytes.len() != expected_bytes {
99 return Err(CodecError::Corrupt {
100 detail: format!("expected {expected_bytes} bytes, got {}", raw_bytes.len()),
101 });
102 }
103
104 let floats: Vec<f32> = raw_bytes
105 .chunks_exact(4)
106 .map(|c| f32::from_le_bytes([c[0], c[1], c[2], c[3]]))
107 .collect();
108
109 if transform == 0 {
110 return Ok((floats, dims, vectors));
112 }
113
114 let mut result = Vec::with_capacity(floats.len());
116 for v in 0..vectors {
117 let start = v * dims;
118 let spherical = &floats[start..start + dims];
119 let cartesian = spherical_to_cartesian(spherical);
120 result.extend_from_slice(&cartesian);
121 }
122
123 Ok((result, dims, vectors))
124}
125
126pub fn normalization_ratio(data: &[f32], dims: usize) -> f64 {
130 if data.is_empty() || dims == 0 {
131 return 0.0;
132 }
133 let vectors = data.len() / dims;
134 let mut normalized = 0usize;
135
136 for v in 0..vectors {
137 let start = v * dims;
138 let norm: f32 = data[start..start + dims]
139 .iter()
140 .map(|x| x * x)
141 .sum::<f32>()
142 .sqrt();
143 if (0.99..=1.01).contains(&norm) {
144 normalized += 1;
145 }
146 }
147
148 normalized as f64 / vectors as f64
149}
150
151fn cartesian_to_spherical(cart: &[f32]) -> Vec<f32> {
163 let n = cart.len();
164 if n == 0 {
165 return Vec::new();
166 }
167 if n == 1 {
168 return vec![cart[0]];
169 }
170
171 let mut spherical = Vec::with_capacity(n);
172
173 let r: f32 = cart.iter().map(|x| x * x).sum::<f32>().sqrt();
175 spherical.push(r);
176
177 for i in 0..n - 1 {
179 let sum_sq: f32 = cart[i..].iter().map(|x| x * x).sum::<f32>();
180 let denom = sum_sq.sqrt();
181 if denom < 1e-30 {
182 spherical.push(0.0);
183 } else if i < n - 2 {
184 spherical.push((cart[i] / denom).acos());
185 } else {
186 spherical.push(cart[n - 1].atan2(cart[n - 2]));
188 }
189 }
190
191 spherical
192}
193
194fn spherical_to_cartesian(sph: &[f32]) -> Vec<f32> {
196 let n = sph.len();
197 if n == 0 {
198 return Vec::new();
199 }
200 if n == 1 {
201 return vec![sph[0]];
202 }
203
204 let r = sph[0];
205 let angles = &sph[1..];
206 let dims = n; let mut cart = Vec::with_capacity(dims);
209
210 for i in 0..dims - 1 {
211 let mut val = r;
212 for a in &angles[..i] {
213 val *= a.sin();
214 }
215 if i < dims - 2 {
216 val *= angles[i].cos();
217 } else {
218 val *= angles[dims - 2].sin();
220 }
221 cart.push(val);
222 }
223
224 let mut val = r;
226 for angle in &angles[..dims - 2] {
227 val *= angle.sin();
228 }
229 val *= angles[dims - 2].cos();
230 cart.push(val);
231
232 cart
233}
234
235fn build_header(dims: u32, vectors: u32, transform: u8, compressed: &[u8]) -> Vec<u8> {
236 let mut out = Vec::with_capacity(9 + compressed.len());
237 out.extend_from_slice(&dims.to_le_bytes());
238 out.extend_from_slice(&vectors.to_le_bytes());
239 out.push(transform);
240 out.extend_from_slice(compressed);
241 out
242}
243
244#[cfg(test)]
245mod tests {
246 use super::*;
247
248 fn make_normalized_vectors(n: usize, dims: usize) -> Vec<f32> {
249 let mut data = Vec::with_capacity(n * dims);
250 for i in 0..n {
251 let mut vec: Vec<f32> = (0..dims)
252 .map(|d| ((i * dims + d) as f32 * 0.1).sin())
253 .collect();
254 let norm: f32 = vec.iter().map(|x| x * x).sum::<f32>().sqrt();
255 if norm > 0.0 {
256 for v in &mut vec {
257 *v /= norm;
258 }
259 }
260 data.extend_from_slice(&vec);
261 }
262 data
263 }
264
265 #[test]
266 fn empty_roundtrip() {
267 let encoded = encode(&[], 32, 0).unwrap();
268 let (decoded, dims, vectors) = decode(&encoded).unwrap();
269 assert!(decoded.is_empty());
270 assert_eq!(vectors, 0);
271 assert_eq!(dims, 32);
272 }
273
274 #[test]
275 fn normalized_roundtrip() {
276 let data = make_normalized_vectors(100, 32);
277 let encoded = encode(&data, 32, 100).unwrap();
278 let (decoded, dims, vectors) = decode(&encoded).unwrap();
279
280 assert_eq!(dims, 32);
281 assert_eq!(vectors, 100);
282 assert_eq!(decoded.len(), data.len());
283
284 let max_error: f32 = data
288 .iter()
289 .zip(decoded.iter())
290 .map(|(a, b)| (a - b).abs())
291 .fold(0.0f32, f32::max);
292 assert!(
293 max_error < 0.1,
294 "max reconstruction error {max_error} exceeds threshold"
295 );
296 }
297
298 #[test]
299 fn raw_roundtrip() {
300 let data: Vec<f32> = (0..320).map(|i| i as f32 * 0.01).collect();
301 let encoded = encode_raw(&data, 32, 10).unwrap();
302 let (decoded, dims, vectors) = decode(&encoded).unwrap();
303 assert_eq!(dims, 32);
304 assert_eq!(vectors, 10);
305 for (a, b) in data.iter().zip(decoded.iter()) {
306 assert_eq!(a.to_bits(), b.to_bits());
307 }
308 }
309
310 #[test]
311 fn compression_ratio() {
312 let data = make_normalized_vectors(1000, 128);
313 let encoded = encode(&data, 128, 1000).unwrap();
314 let raw_size = data.len() * 4;
315 let ratio = raw_size as f64 / encoded.len() as f64;
316 assert!(ratio > 0.9, "should not expand >10%, got {ratio:.2}x");
319 }
320
321 #[test]
322 fn normalization_check() {
323 let normalized = make_normalized_vectors(100, 32);
324 assert!(normalization_ratio(&normalized, 32) > 0.95);
325
326 let raw: Vec<f32> = (0..3200).map(|i| i as f32).collect();
327 assert!(normalization_ratio(&raw, 32) < 0.1);
328 }
329
330 #[test]
331 fn truncated_error() {
332 assert!(decode(&[]).is_err());
333 assert!(decode(&[0; 5]).is_err());
334 }
335}