git_perf/
serialization.rs

1use std::{
2    borrow::Borrow,
3    collections::{
4        hash_map::Entry::{Occupied, Vacant},
5        HashMap,
6    },
7};
8
9use itertools::Itertools;
10use log::warn;
11
12use crate::data::MeasurementData;
13
14pub const DELIMITER: &str = "";
15
16pub fn serialize_single<M>(measurement_data: &M, custom_delimiter: &str) -> String
17where
18    M: Borrow<MeasurementData>,
19{
20    let md: &MeasurementData = measurement_data.borrow();
21
22    let mut m = vec![
23        format!("{:?}", md.epoch),
24        md.name.clone(),
25        format!("{:?}", md.timestamp),
26        format!("{:?}", md.val),
27    ];
28
29    m.extend(md.key_values.iter().map(|(k, v)| format!("{k}={v}")));
30
31    m.join(custom_delimiter) + "\n"
32}
33
34pub fn serialize_multiple<M: Borrow<MeasurementData>>(measurement_data: &[M]) -> String {
35    measurement_data
36        .iter()
37        .map(|md| serialize_single(md, DELIMITER))
38        .join("")
39}
40
41fn deserialize_single(line: &str) -> Option<MeasurementData> {
42    let components = line
43        .split(DELIMITER)
44        .filter(|item| !item.is_empty())
45        .collect_vec();
46
47    let num_components = components.len();
48    if num_components < 4 {
49        warn!("Too few items with {num_components}, skipping record");
50        return None;
51    }
52
53    let epoch = components[0];
54    let epoch = match epoch.parse::<u32>() {
55        Ok(e) => e,
56        Err(err) => {
57            warn!("Cannot parse epoch '{epoch}': {err}, skipping record");
58            return None;
59        }
60    };
61
62    let name = components[1].to_string();
63
64    let timestamp = components[2];
65    let timestamp = match timestamp.parse::<f64>() {
66        Ok(ts) => ts,
67        Err(err) => {
68            warn!("Cannot parse timestamp '{timestamp}': {err}, skipping record");
69            return None;
70        }
71    };
72
73    let val = components[3];
74    let val = match val.parse::<f64>() {
75        Ok(val) => val,
76        Err(err) => {
77            warn!("Cannot parse value '{val}': {err}, skipping record");
78            return None;
79        }
80    };
81
82    let mut key_values = HashMap::new();
83
84    if components.len() > 4 {
85        for kv in components.iter().skip(4) {
86            if let Some((key, value)) = kv.split_once('=') {
87                let entry = key_values.entry(key.to_string());
88                let value = value.to_string();
89                match entry {
90                    Occupied(mut e) => {
91                        if e.get() == &value {
92                            static DUPLICATE_KEY_SAME_VALUE: std::sync::Once =
93                                std::sync::Once::new();
94                            DUPLICATE_KEY_SAME_VALUE.call_once(|| {
95                                warn!("Duplicate entries for key {key} with same value");
96                            });
97                        } else {
98                            static DUPLICATE_KEY_CONFLICT: std::sync::Once = std::sync::Once::new();
99                            DUPLICATE_KEY_CONFLICT.call_once(|| {
100                                warn!(
101                                    "Conflicting values for key {key}: '{}' vs '{}'",
102                                    e.get(),
103                                    value
104                                );
105                            });
106                        }
107                        e.insert(value);
108                    }
109                    Vacant(e) => {
110                        e.insert(value);
111                    }
112                }
113            } else {
114                warn!("No equals sign in key value pair, skipping record");
115                return None;
116            }
117        }
118    }
119
120    Some(MeasurementData {
121        epoch,
122        name,
123        timestamp,
124        val,
125        key_values,
126    })
127}
128
129pub fn deserialize(lines: &str) -> Vec<MeasurementData> {
130    lines
131        .lines()
132        .filter(|l| !l.trim().is_empty())
133        .filter_map(deserialize_single)
134        .collect_vec()
135}
136
137#[cfg(test)]
138mod test {
139    use super::*;
140
141    #[test]
142    fn key_value_deserialization() {
143        let lines = "0test1234123key1=value1key2=value2";
144        let actual = deserialize(lines);
145        let expected = MeasurementData {
146            epoch: 0,
147            name: "test".to_string(),
148            timestamp: 1234.0,
149            val: 123.0,
150            key_values: [
151                ("key1".to_string(), "value1".to_string()),
152                ("key2".to_string(), "value2".to_string()),
153            ]
154            .into(),
155        };
156        assert_eq!(actual.len(), 1);
157        assert_eq!(actual[0], expected);
158    }
159
160    #[test]
161    fn key_value_invalid_pair() {
162        // Missing equals sign in first line, should be skipped
163        let lines = "0test1234123key1value1\n\
164                     0test24567890key2=value2";
165
166        let expected = [MeasurementData {
167            epoch: 0,
168            name: "test2".to_string(),
169            timestamp: 4567.0,
170            val: 890.0,
171            key_values: [("key2".to_string(), "value2".to_string())].into(),
172        }];
173        let actual = deserialize(lines);
174        assert_eq!(actual, expected);
175    }
176
177    #[test]
178    fn additional_whitespace_deserialization() {
179        let lines = "0test1234123";
180        let actual = deserialize(lines);
181        assert_eq!(1, actual.len());
182    }
183
184    #[test]
185    fn test_serialize_single() {
186        let md = MeasurementData {
187            epoch: 3,
188            name: "Mymeasurement".into(),
189            timestamp: 1234567.0,
190            val: 42.0,
191            key_values: [("mykey".to_string(), "myvalue".to_string())].into(),
192        };
193        let serialized = serialize_single(&md, DELIMITER);
194        assert_eq!(serialized, "3Mymeasurement1234567.042.0mykey=myvalue\n");
195    }
196
197    #[test]
198    fn test_epoch_parsing() {
199        // Test valid epoch
200        let valid_line = "42test1234123";
201        let result = deserialize_single(valid_line);
202        assert!(result.is_some());
203        assert_eq!(result.unwrap().epoch, 42);
204
205        // Test invalid epoch (non-numeric)
206        let invalid_line = "not_a_numbertest1234123";
207        let result = deserialize_single(invalid_line);
208        assert!(result.is_none());
209
210        // Test invalid epoch (out of range)
211        let out_of_range_line = "4294967296test1234123"; // u32::MAX + 1
212        let result = deserialize_single(out_of_range_line);
213        assert!(result.is_none());
214    }
215
216    #[test]
217    fn test_serialize_multiple_empty() {
218        let measurements: Vec<MeasurementData> = vec![];
219        let serialized = serialize_multiple(&measurements);
220        assert_eq!(serialized, "");
221    }
222
223    #[test]
224    fn test_serialize_multiple_single() {
225        let md = MeasurementData {
226            epoch: 1,
227            name: "test".into(),
228            timestamp: 1000.0,
229            val: 5.0,
230            key_values: HashMap::new(),
231        };
232        let serialized = serialize_multiple(&[md]);
233        let expected = format!("1{}test{}1000.0{}5.0\n", DELIMITER, DELIMITER, DELIMITER);
234        assert_eq!(serialized, expected);
235    }
236
237    #[test]
238    fn test_serialize_multiple_multiple() {
239        let md1 = MeasurementData {
240            epoch: 1,
241            name: "test1".into(),
242            timestamp: 1000.0,
243            val: 5.0,
244            key_values: HashMap::new(),
245        };
246        let md2 = MeasurementData {
247            epoch: 2,
248            name: "test2".into(),
249            timestamp: 2000.0,
250            val: 10.0,
251            key_values: HashMap::new(),
252        };
253        let serialized = serialize_multiple(&[md1, md2]);
254        let expected = format!(
255            "1{}test1{}1000.0{}5.0\n2{}test2{}2000.0{}10.0\n",
256            DELIMITER, DELIMITER, DELIMITER, DELIMITER, DELIMITER, DELIMITER
257        );
258        assert_eq!(serialized, expected);
259    }
260
261    #[test]
262    fn test_deserialize_single_exactly_four_components() {
263        // Test boundary case: exactly 4 components (no key-value pairs)
264        let line = format!(
265            "5{}measurement{}1234.5{}67.8",
266            DELIMITER, DELIMITER, DELIMITER
267        );
268        let result = deserialize_single(&line);
269        assert!(result.is_some());
270        let md = result.unwrap();
271        assert_eq!(md.epoch, 5);
272        assert_eq!(md.name, "measurement");
273        assert_eq!(md.timestamp, 1234.5);
274        assert_eq!(md.val, 67.8);
275        assert!(md.key_values.is_empty());
276    }
277
278    #[test]
279    fn test_deserialize_single_more_than_four_components() {
280        // Test with more than 4 components (includes key-value pairs)
281        let line = format!(
282            "0{}test{}1234{}123{}foo=bar",
283            DELIMITER, DELIMITER, DELIMITER, DELIMITER
284        );
285        let result = deserialize_single(&line);
286        assert!(result.is_some());
287        let md = result.unwrap();
288        assert_eq!(md.key_values.len(), 1);
289        assert_eq!(md.key_values.get("foo"), Some(&"bar".to_string()));
290    }
291
292    #[test]
293    fn test_deserialize_serialize_roundtrip() {
294        let original = MeasurementData {
295            epoch: 10,
296            name: "roundtrip_test".into(),
297            timestamp: 9999.5,
298            val: 42.42,
299            key_values: [
300                ("key1".to_string(), "value1".to_string()),
301                ("key2".to_string(), "value2".to_string()),
302            ]
303            .into(),
304        };
305
306        let serialized = serialize_single(&original, DELIMITER);
307        let deserialized_vec = deserialize(&serialized);
308
309        assert_eq!(deserialized_vec.len(), 1);
310        let deserialized = &deserialized_vec[0];
311
312        assert_eq!(deserialized.epoch, original.epoch);
313        assert_eq!(deserialized.name, original.name);
314        assert_eq!(deserialized.timestamp, original.timestamp);
315        assert_eq!(deserialized.val, original.val);
316        assert_eq!(deserialized.key_values, original.key_values);
317    }
318
319    #[test]
320    fn test_deserialize_multiple_lines() {
321        let lines = format!(
322            "1{}test1{}1000.0{}5.0{}key1=val1\n2{}test2{}2000.0{}10.0{}key2=val2\n",
323            DELIMITER, DELIMITER, DELIMITER, DELIMITER, DELIMITER, DELIMITER, DELIMITER, DELIMITER
324        );
325        let results = deserialize(&lines);
326        assert_eq!(results.len(), 2);
327        assert_eq!(results[0].name, "test1");
328        assert_eq!(results[1].name, "test2");
329    }
330
331    #[test]
332    fn test_serialize_single_with_custom_delimiter() {
333        let md = MeasurementData {
334            epoch: 0,
335            name: "test".into(),
336            timestamp: 100.0,
337            val: 50.0,
338            key_values: [("k".to_string(), "v".to_string())].into(),
339        };
340        let serialized = serialize_single(&md, ",");
341        assert_eq!(serialized, "0,test,100.0,50.0,k=v\n");
342    }
343}