velesdb_migrate/
transform.rs

1//! Data transformation utilities.
2
3use std::collections::HashMap;
4
5use crate::connectors::ExtractedPoint;
6
7/// Transforms extracted data before loading.
8pub struct Transformer {
9    /// Field mappings (source -> dest).
10    field_mappings: HashMap<String, String>,
11}
12
13impl Transformer {
14    /// Create a new transformer.
15    #[must_use]
16    pub fn new(field_mappings: HashMap<String, String>) -> Self {
17        Self { field_mappings }
18    }
19
20    /// Transform a batch of points.
21    #[must_use]
22    pub fn transform_batch(&self, points: Vec<ExtractedPoint>) -> Vec<ExtractedPoint> {
23        points
24            .into_iter()
25            .map(|p| self.transform_point(p))
26            .collect()
27    }
28
29    /// Transform a single point.
30    #[must_use]
31    pub fn transform_point(&self, mut point: ExtractedPoint) -> ExtractedPoint {
32        if !self.field_mappings.is_empty() {
33            let mut new_payload = HashMap::new();
34
35            for (key, value) in point.payload.drain() {
36                let new_key = self.field_mappings.get(&key).cloned().unwrap_or(key);
37                new_payload.insert(new_key, value);
38            }
39
40            point.payload = new_payload;
41        }
42
43        point
44    }
45
46    /// Normalize a vector to unit length.
47    #[must_use]
48    pub fn normalize_vector(vector: &[f32]) -> Vec<f32> {
49        let norm: f32 = vector.iter().map(|x| x * x).sum::<f32>().sqrt();
50        if norm > 0.0 {
51            vector.iter().map(|x| x / norm).collect()
52        } else {
53            vector.to_vec()
54        }
55    }
56
57    /// Quantize vector to SQ8 (scalar quantization).
58    #[must_use]
59    pub fn quantize_sq8(vector: &[f32]) -> Vec<u8> {
60        let min = vector.iter().copied().fold(f32::INFINITY, f32::min);
61        let max = vector.iter().copied().fold(f32::NEG_INFINITY, f32::max);
62        let range = max - min;
63
64        if range == 0.0 {
65            return vec![128u8; vector.len()];
66        }
67
68        vector
69            .iter()
70            .map(|&x| ((x - min) / range * 255.0) as u8)
71            .collect()
72    }
73
74    /// Quantize vector to binary (1-bit).
75    #[must_use]
76    pub fn quantize_binary(vector: &[f32]) -> Vec<u8> {
77        let bytes_needed = vector.len().div_ceil(8);
78        let mut result = vec![0u8; bytes_needed];
79
80        for (i, &val) in vector.iter().enumerate() {
81            if val > 0.0 {
82                result[i / 8] |= 1 << (7 - (i % 8));
83            }
84        }
85
86        result
87    }
88}
89
90impl Default for Transformer {
91    fn default() -> Self {
92        Self::new(HashMap::new())
93    }
94}
95
96#[cfg(test)]
97mod tests {
98    use super::*;
99
100    #[test]
101    fn test_transform_point_no_mapping() {
102        let transformer = Transformer::default();
103
104        let point = ExtractedPoint {
105            id: "1".to_string(),
106            vector: vec![0.1, 0.2],
107            payload: HashMap::from([("title".to_string(), serde_json::json!("Test"))]),
108        };
109
110        let result = transformer.transform_point(point);
111        assert!(result.payload.contains_key("title"));
112    }
113
114    #[test]
115    fn test_transform_point_with_mapping() {
116        let mappings = HashMap::from([("old_name".to_string(), "new_name".to_string())]);
117        let transformer = Transformer::new(mappings);
118
119        let point = ExtractedPoint {
120            id: "1".to_string(),
121            vector: vec![0.1, 0.2],
122            payload: HashMap::from([("old_name".to_string(), serde_json::json!("Test"))]),
123        };
124
125        let result = transformer.transform_point(point);
126        assert!(result.payload.contains_key("new_name"));
127        assert!(!result.payload.contains_key("old_name"));
128    }
129
130    #[test]
131    fn test_normalize_vector() {
132        let vec = vec![3.0, 4.0];
133        let normalized = Transformer::normalize_vector(&vec);
134
135        assert!((normalized[0] - 0.6).abs() < 0.001);
136        assert!((normalized[1] - 0.8).abs() < 0.001);
137
138        // Check unit length
139        let norm: f32 = normalized.iter().map(|x| x * x).sum::<f32>().sqrt();
140        assert!((norm - 1.0).abs() < 0.001);
141    }
142
143    #[test]
144    fn test_normalize_zero_vector() {
145        let vec = vec![0.0, 0.0, 0.0];
146        let normalized = Transformer::normalize_vector(&vec);
147        assert_eq!(normalized, vec![0.0, 0.0, 0.0]);
148    }
149
150    #[test]
151    fn test_quantize_sq8() {
152        let vec = vec![0.0, 0.5, 1.0];
153        let quantized = Transformer::quantize_sq8(&vec);
154
155        assert_eq!(quantized[0], 0);
156        assert_eq!(quantized[1], 127); // ~128
157        assert_eq!(quantized[2], 255);
158    }
159
160    #[test]
161    fn test_quantize_binary() {
162        let vec = vec![1.0, -1.0, 0.5, -0.5, 1.0, -1.0, 0.1, -0.1];
163        let binary = Transformer::quantize_binary(&vec);
164
165        // First byte: 1 0 1 0 1 0 1 0 = 0xAA = 170
166        assert_eq!(binary.len(), 1);
167        assert_eq!(binary[0], 0b10101010);
168    }
169
170    #[test]
171    fn test_transform_batch() {
172        let transformer = Transformer::default();
173
174        let points = vec![
175            ExtractedPoint {
176                id: "1".to_string(),
177                vector: vec![0.1],
178                payload: HashMap::new(),
179            },
180            ExtractedPoint {
181                id: "2".to_string(),
182                vector: vec![0.2],
183                payload: HashMap::new(),
184            },
185        ];
186
187        let result = transformer.transform_batch(points);
188        assert_eq!(result.len(), 2);
189    }
190}