velesdb_migrate/
transform.rs1use std::collections::HashMap;
4
5use crate::connectors::ExtractedPoint;
6
7pub struct Transformer {
9 field_mappings: HashMap<String, String>,
11}
12
13impl Transformer {
14 #[must_use]
16 pub fn new(field_mappings: HashMap<String, String>) -> Self {
17 Self { field_mappings }
18 }
19
20 #[must_use]
22 pub fn transform_batch(&self, points: Vec<ExtractedPoint>) -> Vec<ExtractedPoint> {
23 points
24 .into_iter()
25 .map(|p| self.transform_point(p))
26 .collect()
27 }
28
29 #[must_use]
31 pub fn transform_point(&self, mut point: ExtractedPoint) -> ExtractedPoint {
32 if !self.field_mappings.is_empty() {
33 let mut new_payload = HashMap::new();
34
35 for (key, value) in point.payload.drain() {
36 let new_key = self.field_mappings.get(&key).cloned().unwrap_or(key);
37 new_payload.insert(new_key, value);
38 }
39
40 point.payload = new_payload;
41 }
42
43 point
44 }
45
46 #[must_use]
48 pub fn normalize_vector(vector: &[f32]) -> Vec<f32> {
49 let norm: f32 = vector.iter().map(|x| x * x).sum::<f32>().sqrt();
50 if norm > 0.0 {
51 vector.iter().map(|x| x / norm).collect()
52 } else {
53 vector.to_vec()
54 }
55 }
56
57 #[must_use]
59 pub fn quantize_sq8(vector: &[f32]) -> Vec<u8> {
60 let min = vector.iter().copied().fold(f32::INFINITY, f32::min);
61 let max = vector.iter().copied().fold(f32::NEG_INFINITY, f32::max);
62 let range = max - min;
63
64 if range == 0.0 {
65 return vec![128u8; vector.len()];
66 }
67
68 vector
69 .iter()
70 .map(|&x| ((x - min) / range * 255.0) as u8)
71 .collect()
72 }
73
74 #[must_use]
76 pub fn quantize_binary(vector: &[f32]) -> Vec<u8> {
77 let bytes_needed = vector.len().div_ceil(8);
78 let mut result = vec![0u8; bytes_needed];
79
80 for (i, &val) in vector.iter().enumerate() {
81 if val > 0.0 {
82 result[i / 8] |= 1 << (7 - (i % 8));
83 }
84 }
85
86 result
87 }
88}
89
90impl Default for Transformer {
91 fn default() -> Self {
92 Self::new(HashMap::new())
93 }
94}
95
96#[cfg(test)]
97mod tests {
98 use super::*;
99
100 #[test]
101 fn test_transform_point_no_mapping() {
102 let transformer = Transformer::default();
103
104 let point = ExtractedPoint {
105 id: "1".to_string(),
106 vector: vec![0.1, 0.2],
107 payload: HashMap::from([("title".to_string(), serde_json::json!("Test"))]),
108 };
109
110 let result = transformer.transform_point(point);
111 assert!(result.payload.contains_key("title"));
112 }
113
114 #[test]
115 fn test_transform_point_with_mapping() {
116 let mappings = HashMap::from([("old_name".to_string(), "new_name".to_string())]);
117 let transformer = Transformer::new(mappings);
118
119 let point = ExtractedPoint {
120 id: "1".to_string(),
121 vector: vec![0.1, 0.2],
122 payload: HashMap::from([("old_name".to_string(), serde_json::json!("Test"))]),
123 };
124
125 let result = transformer.transform_point(point);
126 assert!(result.payload.contains_key("new_name"));
127 assert!(!result.payload.contains_key("old_name"));
128 }
129
130 #[test]
131 fn test_normalize_vector() {
132 let vec = vec![3.0, 4.0];
133 let normalized = Transformer::normalize_vector(&vec);
134
135 assert!((normalized[0] - 0.6).abs() < 0.001);
136 assert!((normalized[1] - 0.8).abs() < 0.001);
137
138 let norm: f32 = normalized.iter().map(|x| x * x).sum::<f32>().sqrt();
140 assert!((norm - 1.0).abs() < 0.001);
141 }
142
143 #[test]
144 fn test_normalize_zero_vector() {
145 let vec = vec![0.0, 0.0, 0.0];
146 let normalized = Transformer::normalize_vector(&vec);
147 assert_eq!(normalized, vec![0.0, 0.0, 0.0]);
148 }
149
150 #[test]
151 fn test_quantize_sq8() {
152 let vec = vec![0.0, 0.5, 1.0];
153 let quantized = Transformer::quantize_sq8(&vec);
154
155 assert_eq!(quantized[0], 0);
156 assert_eq!(quantized[1], 127); assert_eq!(quantized[2], 255);
158 }
159
160 #[test]
161 fn test_quantize_binary() {
162 let vec = vec![1.0, -1.0, 0.5, -0.5, 1.0, -1.0, 0.1, -0.1];
163 let binary = Transformer::quantize_binary(&vec);
164
165 assert_eq!(binary.len(), 1);
167 assert_eq!(binary[0], 0b10101010);
168 }
169
170 #[test]
171 fn test_transform_batch() {
172 let transformer = Transformer::default();
173
174 let points = vec![
175 ExtractedPoint {
176 id: "1".to_string(),
177 vector: vec![0.1],
178 payload: HashMap::new(),
179 },
180 ExtractedPoint {
181 id: "2".to_string(),
182 vector: vec![0.2],
183 payload: HashMap::new(),
184 },
185 ];
186
187 let result = transformer.transform_batch(points);
188 assert_eq!(result.len(), 2);
189 }
190}