Skip to main content

logdive_core/
parser.rs

1//! Line-by-line parser for structured JSON log input.
2//!
3//! The single entry point [`parse_line`] takes one line of text and returns
4//! `Some(LogEntry)` if the line is a JSON object, or `None` otherwise. It
5//! never panics and never returns an error: malformed input is silently
6//! skipped, as mandated by the project doc's parser task ("graceful skip on
7//! malformed lines") and reinforced by the v1 scope note that logdive
8//! accepts structured JSON only — non-JSON lines are simply not its concern.
9//!
10//! Known keys (`timestamp`, `level`, `message`, `tag`) are lifted into the
11//! corresponding `LogEntry` fields. All other top-level keys are preserved
12//! in `LogEntry::fields` for `json_extract()`-based querying downstream.
13
14use serde_json::Value;
15
16use crate::entry::LogEntry;
17
18/// Parse a single line of JSON log input.
19///
20/// Returns `Some(LogEntry)` if `line` is a non-empty JSON object, otherwise
21/// `None`. The caller is expected to iterate over an input source and
22/// discard `None` results (optionally incrementing a "lines skipped"
23/// counter — the CLI does exactly this in milestone 6).
24///
25/// # Behaviour
26///
27/// - Empty or whitespace-only lines return `None`.
28/// - Lines that are not valid JSON return `None`.
29/// - Lines that are valid JSON but not objects (e.g. `42`, `"hi"`, `[1,2]`)
30///   return `None`, because logdive's v1 scope restricts ingestion to
31///   structured JSON logs.
32/// - Within an object, keys matching [`LogEntry::KNOWN_KEYS`] populate the
33///   corresponding struct fields; all other keys go into `LogEntry::fields`.
34/// - For the known string-typed fields, non-string scalar values (numbers,
35///   booleans, null) are stringified so information is preserved. Object
36///   and array values for known fields are *not* coerced — instead they
37///   remain in `fields` under their original key, leaving the known field
38///   as `None`.
39pub fn parse_line(line: &str) -> Option<LogEntry> {
40    if line.trim().is_empty() {
41        return None;
42    }
43
44    let value: Value = serde_json::from_str(line).ok()?;
45    let obj = match value {
46        Value::Object(map) => map,
47        _ => return None,
48    };
49
50    let mut entry = LogEntry::new(line);
51
52    for (key, value) in obj {
53        match key.as_str() {
54            "timestamp" => match coerce_scalar_to_string(&value) {
55                Some(s) => entry.timestamp = Some(s),
56                None => {
57                    entry.fields.insert(key, value);
58                }
59            },
60            "level" => match coerce_scalar_to_string(&value) {
61                Some(s) => entry.level = Some(s),
62                None => {
63                    entry.fields.insert(key, value);
64                }
65            },
66            "message" => match coerce_scalar_to_string(&value) {
67                Some(s) => entry.message = Some(s),
68                None => {
69                    entry.fields.insert(key, value);
70                }
71            },
72            "tag" => match coerce_scalar_to_string(&value) {
73                Some(s) => entry.tag = Some(s),
74                None => {
75                    entry.fields.insert(key, value);
76                }
77            },
78            _ => {
79                entry.fields.insert(key, value);
80            }
81        }
82    }
83
84    Some(entry)
85}
86
87/// Convert a JSON scalar to its string form for storage in a known
88/// `Option<String>` field.
89///
90/// Returns `None` for objects and arrays — the caller preserves those under
91/// their original key in `LogEntry::fields` instead of losing structure via
92/// stringification.
93fn coerce_scalar_to_string(v: &Value) -> Option<String> {
94    match v {
95        Value::String(s) => Some(s.clone()),
96        Value::Number(n) => Some(n.to_string()),
97        Value::Bool(b) => Some(b.to_string()),
98        Value::Null => Some("null".to_string()),
99        Value::Object(_) | Value::Array(_) => None,
100    }
101}
102
103#[cfg(test)]
104mod tests {
105    use super::*;
106    use serde_json::json;
107
108    #[test]
109    fn parses_a_fully_populated_line() {
110        let line = r#"{"timestamp":"2026-04-19T10:00:00Z","level":"error","message":"boom","service":"payments","req_id":42}"#;
111        let e = parse_line(line).expect("should parse");
112
113        assert_eq!(e.timestamp.as_deref(), Some("2026-04-19T10:00:00Z"));
114        assert_eq!(e.level.as_deref(), Some("error"));
115        assert_eq!(e.message.as_deref(), Some("boom"));
116        assert!(e.tag.is_none());
117        assert_eq!(e.fields.get("service"), Some(&json!("payments")));
118        assert_eq!(e.fields.get("req_id"), Some(&json!(42)));
119        assert_eq!(e.raw, line);
120    }
121
122    #[test]
123    fn missing_known_fields_become_none_without_panic() {
124        // Only one known key present; three missing.
125        let e = parse_line(r#"{"level":"info"}"#).expect("should parse");
126        assert_eq!(e.level.as_deref(), Some("info"));
127        assert!(e.timestamp.is_none());
128        assert!(e.message.is_none());
129        assert!(e.tag.is_none());
130        assert!(e.fields.is_empty());
131    }
132
133    #[test]
134    fn malformed_json_returns_none() {
135        assert!(parse_line(r#"{"level": "error""#).is_none()); // truncated
136        assert!(parse_line("not json at all").is_none());
137        assert!(parse_line("{this is broken}").is_none());
138    }
139
140    #[test]
141    fn empty_and_whitespace_lines_return_none() {
142        assert!(parse_line("").is_none());
143        assert!(parse_line("   ").is_none());
144        assert!(parse_line("\t\n").is_none());
145    }
146
147    #[test]
148    fn valid_json_but_not_an_object_returns_none() {
149        // v1 scope: structured JSON *objects* only.
150        assert!(parse_line("42").is_none());
151        assert!(parse_line(r#""hello""#).is_none());
152        assert!(parse_line("[1,2,3]").is_none());
153        assert!(parse_line("true").is_none());
154        assert!(parse_line("null").is_none());
155    }
156
157    #[test]
158    fn unknown_keys_land_in_fields_map() {
159        let e =
160            parse_line(r#"{"user_id":"u-1","duration_ms":123,"ok":true}"#).expect("should parse");
161        assert_eq!(e.fields.len(), 3);
162        assert_eq!(e.fields.get("user_id"), Some(&json!("u-1")));
163        assert_eq!(e.fields.get("duration_ms"), Some(&json!(123)));
164        assert_eq!(e.fields.get("ok"), Some(&json!(true)));
165    }
166
167    #[test]
168    fn numeric_level_is_stringified() {
169        // Syslog-style numeric severities are common. Preserve the info.
170        let e = parse_line(r#"{"level":3}"#).expect("should parse");
171        assert_eq!(e.level.as_deref(), Some("3"));
172        // The numeric value was consumed into `level`, not duplicated into fields.
173        assert!(e.fields.is_empty());
174    }
175
176    #[test]
177    fn boolean_and_null_known_fields_are_stringified() {
178        let e = parse_line(r#"{"tag":true,"message":null}"#).expect("should parse");
179        assert_eq!(e.tag.as_deref(), Some("true"));
180        assert_eq!(e.message.as_deref(), Some("null"));
181    }
182
183    #[test]
184    fn object_valued_known_field_is_preserved_in_fields_map() {
185        // `message` is an object — we refuse to stringify lossily. Instead the
186        // original key/value is kept in `fields`, and the known field stays None.
187        let line = r#"{"message":{"code":500,"text":"err"}}"#;
188        let e = parse_line(line).expect("should parse");
189        assert!(e.message.is_none());
190        assert_eq!(
191            e.fields.get("message"),
192            Some(&json!({"code": 500, "text": "err"}))
193        );
194    }
195
196    #[test]
197    fn array_valued_known_field_is_preserved_in_fields_map() {
198        let e = parse_line(r#"{"tag":["a","b"]}"#).expect("should parse");
199        assert!(e.tag.is_none());
200        assert_eq!(e.fields.get("tag"), Some(&json!(["a", "b"])));
201    }
202
203    #[test]
204    fn raw_is_preserved_verbatim_including_whitespace() {
205        // Dedup hashing in milestone 2 depends on byte-exact preservation.
206        let line = "  {\"level\":\"info\"}  ";
207        let e = parse_line(line).expect("should parse");
208        assert_eq!(e.raw, line);
209    }
210
211    #[test]
212    fn empty_json_object_is_a_valid_entry() {
213        // `{}` parses, produces an entry with everything None and no fields.
214        // Whether the indexer accepts such a row is milestone 2's decision.
215        let e = parse_line("{}").expect("should parse");
216        assert!(e.timestamp.is_none());
217        assert!(e.level.is_none());
218        assert!(e.message.is_none());
219        assert!(e.tag.is_none());
220        assert!(e.fields.is_empty());
221        assert_eq!(e.raw, "{}");
222    }
223}