json_sift/
lib.rs

1#![doc = include_str!("../docs.md")]
2
3mod metar;
4pub use metar::{Rule, SiftParser};
5
6use csv::WriterBuilder;
7use serde_json::Value;
8use std::collections::{BTreeSet, HashMap};
9use thiserror::Error;
10
11// errors: json + structure + optional detector/pattern
12#[derive(Error, Debug)]
13pub enum ParseError {
14    #[error("JSON: {0}")]
15    Json(String),
16    #[error("Structure: {0}")]
17    Structure(String),
18}
19
20//parse raw json string into serde value
21pub fn parse_json(s: &str) -> Result<Value, ParseError> {
22    serde_json::from_str(s).map_err(|e| ParseError::Json(e.to_string()))
23    // if json parse fails, i wrap error into the custom ParseError
24}
25
26// main logic == flatten json =>rows=>csv
27pub fn convert_to_csv(v: &Value) -> Result<String, ParseError> {
28    //hashing all rows
29    let mut rows = Vec::<HashMap<String, String>>::new();
30    //using tree set to keep keys sorted + uniqe
31    let mut keys = BTreeSet::new();
32
33    match v {
34        // if array of obj
35        Value::Array(a) => {
36            for it in a {
37                // new flat map for each element
38                let mut m = HashMap::new();
39                flatten(it, "".into(), &mut m)?;
40                // remember all col names from row
41                for k in m.keys() {
42                    keys.insert(k.clone()); // clone because set owns the Str
43                }
44                rows.push(m);
45            }
46        }
47        // if it's a single object
48        Value::Object(_) => {
49            //same as above but only once
50            let mut m = HashMap::new();
51            flatten(v, "".into(), &mut m)?;
52            //col names
53            for k in m.keys() {
54                keys.insert(k.clone());
55            }
56            rows.push(m);
57        }
58        // everything else == err
59        _ => return Err(ParseError::Structure("expect object or array".into())),
60    }
61
62    // starting a header row from all keys
63    let hdr: Vec<String> = keys.into_iter().collect();
64
65    // use csv writer so it handles quoting/escaping
66    let mut wtr = WriterBuilder::new()
67        .has_headers(true)
68        .from_writer(Vec::new());
69
70    // header creation (for csv
71    wtr.write_record(&hdr)
72        .map_err(|e| ParseError::Structure(e.to_string()))?;
73
74    // rows
75    for row in rows {
76        // for each column in fixed order
77        let record = hdr.iter().map(|col| {
78            // get cell value or empty string if missing
79            row.get(col).map_or("", |v| v.as_str())
80        });
81        wtr.write_record(record)
82            .map_err(|e| ParseError::Structure(e.to_string()))?;
83    }
84
85    // get underlying vec and convert to Str
86    let buf = wtr
87        .into_inner()
88        .map_err(|e| ParseError::Structure(e.to_string()))?;
89    let out = String::from_utf8(buf).map_err(|e| ParseError::Structure(e.to_string()))?;
90    Ok(out)
91}
92
93//detectors look only at str content not field name !!!!!  for METAR etc
94//if true == metar with be in col name
95const PREFIX_WITH_DETECTOR_NAME: bool = true;
96
97// flatten json recursively
98//v is curr val
99//prefix is col name prefix
100fn flatten(v: &Value, prefix: String, out: &mut HashMap<String, String>) -> Result<(), ParseError> {
101    match v {
102        Value::Object(m) => {
103            for (k, vv) in m {
104                //if prefix empty, key is k, else prefix.k
105                let key = if prefix.is_empty() {
106                    k.clone()
107                } else {
108                    format!("{prefix}.{k}")
109                };
110                flatten(vv, key, out)?;
111            }
112        }
113        Value::Array(a) => {
114            //if arr = [v0,v1] then cols [pref[0], pref[1]]
115            for (i, vv) in a.iter().enumerate() {
116                flatten(vv, format!("{}[{}]", prefix, i), out)?;
117            }
118        }
119        Value::String(s) => parse_scalar(prefix, s, out)?,
120        Value::Number(n) => {
121            out.insert(prefix, n.to_string());
122        }
123        Value::Bool(b) => {
124            out.insert(prefix, b.to_string());
125        }
126        Value::Null => {
127            out.insert(prefix, "".into());
128        }
129    }
130    Ok(())
131}
132
133//scalar pipeline for str val
134fn parse_scalar(
135    prefix: String,
136    s: &str,
137    out: &mut HashMap<String, String>,
138) -> Result<(), ParseError> {
139    let text = s.trim();
140    if text.is_empty() {
141        out.insert(prefix, String::new());
142        return Ok(());
143    }
144
145    //full-string detectors (only metar for now)
146    if let Some(mut decoded) = metar::decode_metar(text) {
147        //if metar parse ok
148        let det_name = "metar";
149        for (dk, dv) in decoded.drain() {
150            //drain() puts out decoded key-values
151            // build column name with/without detector prefix
152            let col = if prefix.is_empty() {
153                if PREFIX_WITH_DETECTOR_NAME {
154                    format!("{det_name}.{dk}")
155                } else {
156                    dk
157                }
158            } else if PREFIX_WITH_DETECTOR_NAME {
159                format!("{prefix}.{det_name}.{dk}")
160            } else {
161                format!("{prefix}.{dk}")
162            };
163            out.insert(col, dv); //insert in out map
164        }
165        return Ok(());
166    }
167
168    // cuts into toeksn
169    let tokens = metar::complex_key_value(text);
170
171    // if nothing or 1 token then keep string or try single-pattern
172    if tokens.is_empty() {
173        out.insert(prefix, text.to_string());
174        return Ok(());
175    }
176    if tokens.len() == 1 {
177        let t = tokens[0].trim();
178        if let Some(pat) = metar::holds_pattern_value(t) {
179            metar::apply_pattern(&prefix, t, pat, out);
180        } else {
181            out.insert(prefix, text.to_string());
182        }
183        return Ok(());
184    }
185
186    //if looks like normal human phrase
187    if !metar::all_tokens_code_like(&tokens) {
188        out.insert(prefix, text.to_string());
189        return Ok(());
190    }
191
192    //for code alike  tokens apply patterns
193    let mut i = 0; //counter
194    for t in tokens {
195        let t = t.trim();
196        if t.is_empty() {
197            continue;
198        }
199        //check for simple patterns
200        if let Some(pat) = metar::holds_pattern_value(t) {
201            metar::apply_pattern(&prefix, t, pat, out);
202            continue;
203        }
204        // if not write as token_n under
205        let col = if prefix.is_empty() {
206            format!("token_{i}")
207        } else {
208            format!("{prefix}.token_{i}")
209        };
210        out.insert(col, t.to_string());
211        i += 1;
212    }
213
214    Ok(())
215}