Skip to main content

binocular/preview/structured_log/
detect.rs

1//! Structured-log file detection.
2
3use crate::preview::structured_log::types::LogFormat;
4use std::io::{BufRead, BufReader};
5use std::path::Path;
6
7const DETECT_SAMPLE: usize = 20;
8const DETECT_THRESHOLD: f32 = 0.75;
9
10pub fn detect_structured_log(path: &Path) -> Option<LogFormat> {
11    let ext = path
12        .extension()
13        .and_then(|e| e.to_str())
14        .unwrap_or("")
15        .to_ascii_lowercase();
16
17    match ext.as_str() {
18        "jsonl" | "ndjson" => return Some(LogFormat::Jsonl),
19        "log" | "logs" => {}
20        _ => return None,
21    }
22
23    let file = std::fs::File::open(path).ok()?;
24    let reader = BufReader::new(file);
25    let lines: Vec<String> = reader
26        .lines()
27        .take(DETECT_SAMPLE)
28        .filter_map(|l| l.ok())
29        .map(|l| l.trim().to_string())
30        .filter(|l| !l.is_empty())
31        .collect();
32
33    if lines.is_empty() {
34        return None;
35    }
36
37    let json_hits = lines
38        .iter()
39        .filter(|l| {
40            serde_json::from_str::<serde_json::Value>(l)
41                .map(|v| v.is_object())
42                .unwrap_or(false)
43        })
44        .count();
45    if json_hits as f32 / lines.len() as f32 >= DETECT_THRESHOLD {
46        return Some(LogFormat::Jsonl);
47    }
48
49    let logfmt_hits = lines.iter().filter(|l| looks_like_logfmt(l)).count();
50    if logfmt_hits as f32 / lines.len() as f32 >= DETECT_THRESHOLD {
51        return Some(LogFormat::Logfmt);
52    }
53
54    None
55}
56
57fn looks_like_logfmt(line: &str) -> bool {
58    line.split_whitespace()
59        .any(|t| t.contains('=') && !t.starts_with('=') && !t.ends_with('='))
60}