git_perf/
serialization.rs

1use std::{
2    borrow::Borrow,
3    collections::{
4        hash_map::Entry::{Occupied, Vacant},
5        HashMap,
6    },
7};
8
9use itertools::Itertools;
10
11use crate::data::MeasurementData;
12
13// TODO(kaihowl) serialization with flatten and custom function does not work
14#[derive(Debug, PartialEq)]
15struct SerializeMeasurementData<'a> {
16    epoch: u32,
17    name: &'a str,
18    timestamp: f64,
19    val: f64,
20    key_values: &'a HashMap<String, String>,
21}
22
23pub const DELIMITER: &str = "";
24
25pub fn serialize_single<M>(measurement_data: &M) -> String
26where
27    M: Borrow<MeasurementData>,
28{
29    let md: &MeasurementData = measurement_data.borrow();
30
31    let mut m = vec![
32        format!("{:?}", md.epoch),
33        md.name.clone(),
34        format!("{:?}", md.timestamp),
35        format!("{:?}", md.val),
36    ];
37
38    m.extend(md.key_values.iter().map(|(k, v)| format!("{k}={v}")));
39
40    m.join(DELIMITER) + "\n"
41}
42
43pub fn serialize_multiple<M: Borrow<MeasurementData>>(measurement_data: &[M]) -> String {
44    measurement_data
45        .iter()
46        .map(|md| serialize_single(md))
47        .join("")
48}
49
50fn deserialize_single(line: &str) -> Option<MeasurementData> {
51    let components = line
52        .split(DELIMITER)
53        .filter(|item| !item.is_empty())
54        .collect_vec();
55
56    let num_components = components.len();
57    if num_components < 4 {
58        eprintln!("Too few items with {num_components}, skipping record");
59        return None;
60    }
61
62    // TODO(kaihowl) test this
63    let epoch = components[0];
64    let epoch = match epoch.parse::<u32>() {
65        Ok(e) => e,
66        Err(err) => {
67            eprintln!("Cannot parse epoch '{epoch}': {err}, skipping record");
68            return None;
69        }
70    };
71
72    let name = components[1].to_string();
73
74    let timestamp = components[2];
75    let timestamp = match timestamp.parse::<f64>() {
76        Ok(ts) => ts,
77        Err(err) => {
78            eprintln!("Cannot parse timestamp '{timestamp}': {err}, skipping record");
79            return None;
80        }
81    };
82
83    let val = components[3];
84    let val = match val.parse::<f64>() {
85        Ok(val) => val,
86        Err(err) => {
87            eprintln!("Cannot parse value '{val}': {err}, skipping record");
88            return None;
89        }
90    };
91
92    let mut key_values = HashMap::new();
93
94    if components.len() > 4 {
95        for kv in components.iter().skip(4) {
96            // TODO(kaihowl) different delimiter?
97            if let Some((key, value)) = kv.split_once('=') {
98                let entry = key_values.entry(key.to_string());
99                let value = value.to_string();
100                match entry {
101                    Occupied(mut e) => {
102                        // TODO(kaihowl) reinstate + only emit this (and other) errors once
103                        // eprintln!("Duplicate entries for key {key}");
104                        e.insert(value);
105                    }
106                    Vacant(e) => {
107                        e.insert(value);
108                    }
109                }
110            } else {
111                eprintln!("No equals sign in key value pair, skipping record");
112                return None;
113            }
114        }
115    }
116
117    Some(MeasurementData {
118        epoch,
119        name,
120        timestamp,
121        val,
122        key_values,
123    })
124}
125
126pub fn deserialize(lines: &str) -> Vec<MeasurementData> {
127    lines
128        .lines()
129        .filter(|l| !l.trim().is_empty())
130        .filter_map(deserialize_single)
131        .collect_vec()
132}
133
134#[cfg(test)]
135mod test {
136    use super::*;
137
138    #[test]
139    fn key_value_deserialization() {
140        let lines = "0test1234123key1=value1key2=value2";
141        let actual = deserialize(lines);
142        let expected = MeasurementData {
143            epoch: 0,
144            name: "test".to_string(),
145            timestamp: 1234.0,
146            val: 123.0,
147            key_values: [
148                ("key1".to_string(), "value1".to_string()),
149                ("key2".to_string(), "value2".to_string()),
150            ]
151            .into(),
152        };
153        assert_eq!(actual.len(), 1);
154        assert_eq!(actual[0], expected);
155    }
156
157    #[test]
158    fn key_value_invalid_pair() {
159        // Missing equals sign in first line, should be skipped
160        let lines = "0test1234123key1value1\n\
161                     0test24567890key2=value2";
162
163        let expected = [MeasurementData {
164            epoch: 0,
165            name: "test2".to_string(),
166            timestamp: 4567.0,
167            val: 890.0,
168            key_values: [("key2".to_string(), "value2".to_string())].into(),
169        }];
170        let actual = deserialize(lines);
171        assert_eq!(actual, expected);
172    }
173
174    #[test]
175    fn additional_whitespace_deserialization() {
176        let lines = "0test1234123";
177        let actual = deserialize(lines);
178        assert_eq!(1, actual.len());
179    }
180
181    #[test]
182    fn test_serialize_single() {
183        let md = MeasurementData {
184            epoch: 3,
185            name: "Mymeasurement".into(),
186            timestamp: 1234567.0,
187            val: 42.0,
188            key_values: [("mykey".to_string(), "myvalue".to_string())].into(),
189        };
190        let serialized = serialize_single(&md);
191        assert_eq!(serialized, "3Mymeasurement1234567.042.0mykey=myvalue\n");
192    }
193}