logdive_core/parsers/mod.rs
1//! Format-aware line parsing for log ingestion.
2//!
3//! v0.2.0 introduced multiple input formats. This module contains:
4//! - the per-format parsers as submodules (`json`, `logfmt`, `plain`),
5//! - the [`LogFormat`] enum used to select among them,
6//! - and the [`parse_line`] dispatcher that routes each line to the
7//! right submodule based on the chosen format.
8//!
9//! All three submodules expose a `parse_line(line: &str) -> Option<LogEntry>`
10//! function with the same graceful-skip contract: returns `None` on empty,
11//! whitespace-only, or unparseable input. The dispatcher is a thin
12//! `match` over the format selector — power users who already know the
13//! format ahead of time can call the submodule directly.
14//!
15//! Submodules are intentionally `pub mod`: third-party consumers (a
16//! plugin doing a custom ingestion pipeline, say) sometimes want to bypass
17//! the dispatcher. The dispatcher remains the canonical entry point and
18//! is what the CLI ingest path uses.
19
20pub mod json;
21pub mod logfmt;
22pub mod plain;
23
24use crate::entry::LogEntry;
25
26/// Selects which line parser the dispatcher uses.
27///
28/// `Default` is [`LogFormat::Json`] — the v0.1.0 default carried forward
29/// so callers that don't explicitly pick a format get the same behavior
30/// they used to.
31#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
32pub enum LogFormat {
33 /// Structured JSON, one object per line. The v0.1.0 default.
34 #[default]
35 Json,
36 /// logfmt — `key=value` pairs, see `parsers::logfmt`.
37 Logfmt,
38 /// Unstructured plaintext. The whole line becomes `LogEntry::message`.
39 Plain,
40}
41
42impl LogFormat {
43 /// Every [`LogFormat`] variant, in declaration order.
44 ///
45 /// Use this when you need to enumerate all supported formats without
46 /// hard-coding the list at a call site — for example, the API
47 /// `/version` endpoint reports this slice so clients know what the
48 /// running binary accepts. Adding a new variant here automatically
49 /// propagates to every such consumer.
50 pub const ALL: &'static [Self] = &[Self::Json, Self::Logfmt, Self::Plain];
51
52 /// Parse a CLI-style format name. Case-insensitive.
53 ///
54 /// Returns `None` for unrecognized names. The CLI wraps this in a
55 /// `clap` value parser that surfaces the unknown name as a usage
56 /// error; library consumers can call it directly.
57 pub fn from_name(s: &str) -> Option<Self> {
58 match s.to_ascii_lowercase().as_str() {
59 "json" => Some(Self::Json),
60 "logfmt" => Some(Self::Logfmt),
61 "plain" => Some(Self::Plain),
62 _ => None,
63 }
64 }
65
66 /// Canonical short name used in CLI flags, configuration, and any
67 /// future `Display`-based contexts. Always one lowercase word that
68 /// round-trips through [`Self::from_name`].
69 pub fn name(self) -> &'static str {
70 match self {
71 Self::Json => "json",
72 Self::Logfmt => "logfmt",
73 Self::Plain => "plain",
74 }
75 }
76}
77
78impl std::fmt::Display for LogFormat {
79 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
80 f.write_str(self.name())
81 }
82}
83
84/// Parse a single log line according to the chosen format.
85///
86/// Routes to the format-specific parser. Returns `None` under the standard
87/// graceful-skip contract — empty input, whitespace-only input, or
88/// format-specific malformed input (unterminated quote in logfmt,
89/// non-object JSON, etc.).
90pub fn parse_line(format: LogFormat, line: &str) -> Option<LogEntry> {
91 match format {
92 LogFormat::Json => json::parse_line(line),
93 LogFormat::Logfmt => logfmt::parse_line(line),
94 LogFormat::Plain => plain::parse_line(line),
95 }
96}
97
98// ---------------------------------------------------------------------------
99// Tests
100// ---------------------------------------------------------------------------
101
102#[cfg(test)]
103mod tests {
104 use super::*;
105
106 // ----- LogFormat name parsing ---------------------------------------
107
108 #[test]
109 fn from_name_recognizes_known_formats() {
110 assert_eq!(LogFormat::from_name("json"), Some(LogFormat::Json));
111 assert_eq!(LogFormat::from_name("logfmt"), Some(LogFormat::Logfmt));
112 assert_eq!(LogFormat::from_name("plain"), Some(LogFormat::Plain));
113 }
114
115 #[test]
116 fn from_name_is_case_insensitive() {
117 assert_eq!(LogFormat::from_name("JSON"), Some(LogFormat::Json));
118 assert_eq!(LogFormat::from_name("Logfmt"), Some(LogFormat::Logfmt));
119 assert_eq!(LogFormat::from_name("PLAIN"), Some(LogFormat::Plain));
120 }
121
122 #[test]
123 fn from_name_returns_none_for_unknown() {
124 assert!(LogFormat::from_name("yaml").is_none());
125 assert!(LogFormat::from_name("plaintext").is_none()); // no aliases
126 assert!(LogFormat::from_name("").is_none());
127 }
128
129 #[test]
130 fn name_round_trips_through_from_name() {
131 for variant in [LogFormat::Json, LogFormat::Logfmt, LogFormat::Plain] {
132 assert_eq!(LogFormat::from_name(variant.name()), Some(variant));
133 }
134 }
135
136 #[test]
137 fn default_is_json() {
138 assert_eq!(LogFormat::default(), LogFormat::Json);
139 }
140
141 #[test]
142 fn display_uses_canonical_name() {
143 assert_eq!(format!("{}", LogFormat::Json), "json");
144 assert_eq!(format!("{}", LogFormat::Logfmt), "logfmt");
145 assert_eq!(format!("{}", LogFormat::Plain), "plain");
146 }
147
148 // ----- ALL const ---------------------------------------------------
149
150 #[test]
151 fn all_contains_every_variant() {
152 assert_eq!(LogFormat::ALL.len(), 3);
153 assert!(LogFormat::ALL.contains(&LogFormat::Json));
154 assert!(LogFormat::ALL.contains(&LogFormat::Logfmt));
155 assert!(LogFormat::ALL.contains(&LogFormat::Plain));
156 }
157
158 #[test]
159 fn all_names_round_trip_through_from_name() {
160 // LogFormat: Copy, so *format is fine when iterating &[LogFormat].
161 for format in LogFormat::ALL {
162 assert_eq!(LogFormat::from_name(format.name()), Some(*format));
163 }
164 }
165
166 // ----- Dispatcher routing -------------------------------------------
167 //
168 // The dispatcher is one match over three arms — the trivial-mistake
169 // failure mode is "right format dispatched to wrong parser." Each
170 // routing test compares the dispatcher's output with the same call
171 // made directly to the format-specific submodule. If the match arms
172 // ever get swapped, these tests fail loudly.
173
174 #[test]
175 fn dispatcher_json_matches_direct_json_call() {
176 let line = r#"{"timestamp":"2026-04-15T09:00:00Z","level":"info","message":"hi"}"#;
177 let direct = json::parse_line(line);
178 let routed = parse_line(LogFormat::Json, line);
179 assert_eq!(direct, routed);
180 assert!(direct.is_some(), "fixture line should parse");
181 }
182
183 #[test]
184 fn dispatcher_logfmt_matches_direct_logfmt_call() {
185 let line = "level=info service=payments req_id=42";
186 let direct = logfmt::parse_line(line);
187 let routed = parse_line(LogFormat::Logfmt, line);
188 assert_eq!(direct, routed);
189 assert!(direct.is_some());
190 }
191
192 #[test]
193 fn dispatcher_plain_matches_direct_plain_call() {
194 let line = "starting service version 2.4.1";
195 let direct = plain::parse_line(line);
196 let routed = parse_line(LogFormat::Plain, line);
197 assert_eq!(direct, routed);
198 assert!(direct.is_some());
199 }
200
201 #[test]
202 fn dispatcher_returns_none_for_empty_line_in_every_format() {
203 // Each parser has its own None-on-empty rule — the dispatcher
204 // mustn't accidentally treat one of them differently.
205 for format in [LogFormat::Json, LogFormat::Logfmt, LogFormat::Plain] {
206 assert!(parse_line(format, "").is_none(), "format {format} on empty");
207 assert!(
208 parse_line(format, " \t ").is_none(),
209 "format {format} on whitespace"
210 );
211 }
212 }
213}