Skip to main content

binocular/preview/structured_log/
parse.rs

1use crate::preview::structured_log::types::{LogEntry, LogFormat};
2use std::io::{BufRead, BufReader};
3use std::path::Path;
4
5pub fn parse_initial(
6    path: &Path,
7    format: &LogFormat,
8    max_entries: usize,
9) -> (Vec<LogEntry>, usize, Vec<String>) {
10    let Ok(file) = std::fs::File::open(path) else {
11        return (Vec::new(), 0, Vec::new());
12    };
13    let reader = BufReader::new(file);
14
15    let mut entries: Vec<LogEntry> = Vec::with_capacity(1024);
16    let mut total_lines = 0usize;
17    let mut field_order: Vec<String> = Vec::new();
18    let mut seen_fields: std::collections::HashSet<String> = std::collections::HashSet::new();
19
20    for line in reader.lines() {
21        let Ok(line) = line else { continue };
22        total_lines += 1;
23
24        if entries.len() >= max_entries {
25            continue;
26        }
27
28        let trimmed = line.trim();
29        if trimmed.is_empty() {
30            continue;
31        }
32
33        if let Some(entry) = parse_line(trimmed, format) {
34            for (k, _) in &entry.fields {
35                if seen_fields.insert(k.clone()) {
36                    field_order.push(k.clone());
37                }
38            }
39            entries.push(entry);
40        }
41    }
42
43    let all_fields = prioritised_fields(field_order);
44    (entries, total_lines, all_fields)
45}
46
47pub fn parse_line(line: &str, format: &LogFormat) -> Option<LogEntry> {
48    let trimmed = line.trim();
49    if trimmed.is_empty() {
50        return None;
51    }
52    match format {
53        LogFormat::Jsonl => parse_jsonl(trimmed),
54        LogFormat::Logfmt => parse_logfmt(trimmed),
55    }
56}
57
58fn parse_jsonl(line: &str) -> Option<LogEntry> {
59    let val: serde_json::Value = serde_json::from_str(line).ok()?;
60    let obj = val.as_object()?;
61    let fields: Vec<(String, String)> = obj
62        .iter()
63        .map(|(k, v)| (k.clone(), json_value_to_string(v)))
64        .collect();
65    Some(LogEntry {
66        fields,
67        raw: line.to_string(),
68    })
69}
70
71fn parse_logfmt(line: &str) -> Option<LogEntry> {
72    let mut fields = Vec::new();
73    let mut rest = line;
74
75    while !rest.is_empty() {
76        rest = rest.trim_start();
77        if rest.is_empty() {
78            break;
79        }
80
81        let eq = rest.find('=')?;
82        let key = rest[..eq].trim().to_string();
83        rest = &rest[eq + 1..];
84
85        let value = if rest.starts_with('"') {
86            let mut chars = rest[1..].char_indices();
87            let mut end = rest.len() - 1;
88            let mut prev_backslash = false;
89            for (i, c) in chars.by_ref() {
90                if c == '"' && !prev_backslash {
91                    end = i;
92                    break;
93                }
94                prev_backslash = c == '\\';
95            }
96            let v = rest[1..end].replace("\\\"", "\"");
97            rest = rest.get(end + 2..).unwrap_or("").trim_start_matches(' ');
98            v
99        } else {
100            let end = rest.find(' ').unwrap_or(rest.len());
101            let v = rest[..end].to_string();
102            rest = rest.get(end..).unwrap_or("").trim_start_matches(' ');
103            v
104        };
105
106        if !key.is_empty() {
107            fields.push((key, value));
108        }
109    }
110
111    if fields.is_empty() {
112        return None;
113    }
114    Some(LogEntry {
115        fields,
116        raw: line.to_string(),
117    })
118}
119
120fn json_value_to_string(v: &serde_json::Value) -> String {
121    match v {
122        serde_json::Value::String(s) => s.clone(),
123        serde_json::Value::Null => String::new(),
124        serde_json::Value::Bool(b) => b.to_string(),
125        serde_json::Value::Number(n) => n.to_string(),
126        _ => v.to_string(),
127    }
128}
129
130fn prioritised_fields(mut fields: Vec<String>) -> Vec<String> {
131    const PRIORITY: &[&str] = &[
132        "time",
133        "timestamp",
134        "ts",
135        "datetime",
136        "date",
137        "@timestamp",
138        "level",
139        "severity",
140        "lvl",
141        "log_level",
142        "loglevel",
143        "msg",
144        "message",
145        "text",
146        "body",
147        "service",
148        "app",
149        "application",
150        "component",
151        "error",
152        "err",
153        "caller",
154        "file",
155        "line",
156    ];
157
158    fields.sort_by_key(|f| {
159        let lower = f.to_ascii_lowercase();
160        let pos = PRIORITY.iter().position(|&p| p == lower.as_str());
161        (pos.unwrap_or(usize::MAX), f.clone())
162    });
163    fields
164}