git_perf/
serialization.rs

1use std::{
2    borrow::Borrow,
3    collections::{
4        hash_map::Entry::{Occupied, Vacant},
5        HashMap,
6    },
7};
8
9use itertools::Itertools;
10use log::warn;
11
12use crate::data::MeasurementData;
13
14pub const DELIMITER: &str = "";
15
16pub fn serialize_single<M>(measurement_data: &M, custom_delimiter: &str) -> String
17where
18    M: Borrow<MeasurementData>,
19{
20    let md: &MeasurementData = measurement_data.borrow();
21
22    let mut m = vec![
23        format!("{:?}", md.epoch),
24        md.name.clone(),
25        format!("{:?}", md.timestamp),
26        format!("{:?}", md.val),
27    ];
28
29    m.extend(md.key_values.iter().map(|(k, v)| format!("{k}={v}")));
30
31    m.join(custom_delimiter) + "\n"
32}
33
34pub fn serialize_multiple<M: Borrow<MeasurementData>>(measurement_data: &[M]) -> String {
35    measurement_data
36        .iter()
37        .map(|md| serialize_single(md, DELIMITER))
38        .join("")
39}
40
41fn deserialize_single(line: &str) -> Option<MeasurementData> {
42    let components = line
43        .split(DELIMITER)
44        .filter(|item| !item.is_empty())
45        .collect_vec();
46
47    let num_components = components.len();
48    if num_components < 4 {
49        warn!("Too few items with {num_components}, skipping record");
50        return None;
51    }
52
53    // TODO(kaihowl) test this
54    let epoch = components[0];
55    let epoch = match epoch.parse::<u32>() {
56        Ok(e) => e,
57        Err(err) => {
58            warn!("Cannot parse epoch '{epoch}': {err}, skipping record");
59            return None;
60        }
61    };
62
63    let name = components[1].to_string();
64
65    let timestamp = components[2];
66    let timestamp = match timestamp.parse::<f64>() {
67        Ok(ts) => ts,
68        Err(err) => {
69            warn!("Cannot parse timestamp '{timestamp}': {err}, skipping record");
70            return None;
71        }
72    };
73
74    let val = components[3];
75    let val = match val.parse::<f64>() {
76        Ok(val) => val,
77        Err(err) => {
78            warn!("Cannot parse value '{val}': {err}, skipping record");
79            return None;
80        }
81    };
82
83    let mut key_values = HashMap::new();
84
85    if components.len() > 4 {
86        for kv in components.iter().skip(4) {
87            // TODO(kaihowl) different delimiter?
88            if let Some((key, value)) = kv.split_once('=') {
89                let entry = key_values.entry(key.to_string());
90                let value = value.to_string();
91                match entry {
92                    Occupied(mut e) => {
93                        // TODO(kaihowl) reinstate + only emit this (and other) errors once
94                        // eprintln!("Duplicate entries for key {key}");
95                        e.insert(value);
96                    }
97                    Vacant(e) => {
98                        e.insert(value);
99                    }
100                }
101            } else {
102                warn!("No equals sign in key value pair, skipping record");
103                return None;
104            }
105        }
106    }
107
108    Some(MeasurementData {
109        epoch,
110        name,
111        timestamp,
112        val,
113        key_values,
114    })
115}
116
117pub fn deserialize(lines: &str) -> Vec<MeasurementData> {
118    lines
119        .lines()
120        .filter(|l| !l.trim().is_empty())
121        .filter_map(deserialize_single)
122        .collect_vec()
123}
124
125#[cfg(test)]
126mod test {
127    use super::*;
128
129    #[test]
130    fn key_value_deserialization() {
131        let lines = "0test1234123key1=value1key2=value2";
132        let actual = deserialize(lines);
133        let expected = MeasurementData {
134            epoch: 0,
135            name: "test".to_string(),
136            timestamp: 1234.0,
137            val: 123.0,
138            key_values: [
139                ("key1".to_string(), "value1".to_string()),
140                ("key2".to_string(), "value2".to_string()),
141            ]
142            .into(),
143        };
144        assert_eq!(actual.len(), 1);
145        assert_eq!(actual[0], expected);
146    }
147
148    #[test]
149    fn key_value_invalid_pair() {
150        // Missing equals sign in first line, should be skipped
151        let lines = "0test1234123key1value1\n\
152                     0test24567890key2=value2";
153
154        let expected = [MeasurementData {
155            epoch: 0,
156            name: "test2".to_string(),
157            timestamp: 4567.0,
158            val: 890.0,
159            key_values: [("key2".to_string(), "value2".to_string())].into(),
160        }];
161        let actual = deserialize(lines);
162        assert_eq!(actual, expected);
163    }
164
165    #[test]
166    fn additional_whitespace_deserialization() {
167        let lines = "0test1234123";
168        let actual = deserialize(lines);
169        assert_eq!(1, actual.len());
170    }
171
172    #[test]
173    fn test_serialize_single() {
174        let md = MeasurementData {
175            epoch: 3,
176            name: "Mymeasurement".into(),
177            timestamp: 1234567.0,
178            val: 42.0,
179            key_values: [("mykey".to_string(), "myvalue".to_string())].into(),
180        };
181        let serialized = serialize_single(&md, DELIMITER);
182        assert_eq!(serialized, "3Mymeasurement1234567.042.0mykey=myvalue\n");
183    }
184}