use crate::config::InputFormat as ConfigInputFormat;
use anyhow::Result;
#[allow(dead_code)] pub fn detect_format(sample_line: &str) -> Result<ConfigInputFormat> {
let trimmed = sample_line.trim();
if trimmed.is_empty() {
return Ok(ConfigInputFormat::Line);
}
if detect_json(trimmed) {
return Ok(ConfigInputFormat::Json);
}
if detect_cef(trimmed) {
return Ok(ConfigInputFormat::Cef);
}
if detect_syslog(trimmed) {
return Ok(ConfigInputFormat::Syslog);
}
if detect_combined_logs(trimmed) {
return Ok(ConfigInputFormat::Combined);
}
if detect_logfmt(trimmed) {
return Ok(ConfigInputFormat::Logfmt);
}
if let Some(csv_format) = detect_csv_variants(trimmed) {
return Ok(csv_format);
}
Ok(ConfigInputFormat::Line)
}
#[allow(dead_code)] fn detect_json(line: &str) -> bool {
if !line.starts_with('{') {
return false;
}
serde_json::from_str::<serde_json::Value>(line).is_ok()
}
#[allow(dead_code)] fn detect_cef(line: &str) -> bool {
line.starts_with("CEF:")
}
#[allow(dead_code)] fn detect_syslog(line: &str) -> bool {
if line.starts_with('<') {
if let Some(end_bracket) = line.find('>') {
if end_bracket < 10 {
let after_priority = &line[end_bracket + 1..];
if after_priority.starts_with('1') && after_priority.len() > 2 {
let next_char = after_priority.chars().nth(1);
if next_char == Some(' ') || next_char == Some('\t') {
return true;
}
}
if after_priority.len() > 3 {
let timestamp_part = &after_priority[..3];
if matches!(
timestamp_part,
"Jan"
| "Feb"
| "Mar"
| "Apr"
| "May"
| "Jun"
| "Jul"
| "Aug"
| "Sep"
| "Oct"
| "Nov"
| "Dec"
) {
return true;
}
}
}
}
}
if line.len() > 15 {
let month_part = &line[..3];
if matches!(
month_part,
"Jan"
| "Feb"
| "Mar"
| "Apr"
| "May"
| "Jun"
| "Jul"
| "Aug"
| "Sep"
| "Oct"
| "Nov"
| "Dec"
) {
if let Some(space1) = line[3..].find(' ') {
let after_month = &line[3 + space1 + 1..];
if let Some(space2) = after_month.find(' ') {
let day_part = &after_month[..space2];
if day_part.len() <= 2 && day_part.chars().all(|c| c.is_ascii_digit()) {
let after_day = &after_month[space2 + 1..];
if after_day.len() >= 8 {
let time_part = &after_day[..8];
if time_part.matches(':').count() == 2 {
if let Some(space3) = after_day[8..].find(' ') {
let after_time = &after_day[8 + space3 + 1..];
if after_time.contains(':') {
return true;
}
}
}
}
}
}
}
}
}
false
}
#[allow(dead_code)] fn detect_combined_logs(line: &str) -> bool {
if let Some(first_space) = line.find(' ') {
let potential_ip = &line[..first_space];
if is_likely_ip_address(potential_ip) {
if line.contains('[') && line.contains(']') && line.contains(':') {
if line.contains("\"GET ")
|| line.contains("\"POST ")
|| line.contains("\"PUT ")
|| line.contains("\"DELETE ")
|| line.contains("\" ")
{
return true;
}
}
}
}
false
}
#[allow(dead_code)] fn is_likely_ip_address(s: &str) -> bool {
if s.chars().all(|c| c.is_ascii_digit() || c == '.') && s.contains('.') {
return true;
}
if s.contains(':') && s.chars().all(|c| c.is_ascii_hexdigit() || c == ':') {
return true;
}
if s.chars().any(|c| c.is_ascii_alphabetic())
&& s.chars()
.all(|c| c.is_ascii_alphanumeric() || c == '.' || c == '-')
{
return true;
}
false
}
#[allow(dead_code)] fn detect_logfmt(line: &str) -> bool {
let mut has_equals = false;
let mut potential_pairs = 0;
for part in line.split_whitespace() {
if part.contains('=') {
has_equals = true;
if let Some(eq_pos) = part.find('=') {
let key = &part[..eq_pos];
let value = &part[eq_pos + 1..];
if !key.is_empty() && key.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') {
if !value.is_empty() {
potential_pairs += 1;
}
}
}
}
}
has_equals && potential_pairs > 0
}
#[allow(dead_code)] fn detect_csv_variants(line: &str) -> Option<ConfigInputFormat> {
let comma_count = line.matches(',').count();
let tab_count = line.matches('\t').count();
if tab_count >= 2 {
if let Some(first_field) = line.split('\t').next() {
if first_field.chars().any(|c| c.is_ascii_alphabetic())
&& !first_field.chars().all(|c| c.is_ascii_digit())
{
return Some(ConfigInputFormat::Tsv(None));
} else {
return Some(ConfigInputFormat::Tsvnh);
}
}
}
if comma_count >= 2 {
if let Some(first_field) = line.split(',').next() {
let trimmed_field = first_field.trim_matches('"').trim();
if trimmed_field.chars().any(|c| c.is_ascii_alphabetic())
&& !trimmed_field.chars().all(|c| c.is_ascii_digit())
{
return Some(ConfigInputFormat::Csv(None));
} else {
return Some(ConfigInputFormat::Csvnh);
}
}
}
None
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_detect_json() {
assert_eq!(
detect_format(r#"{"key": "value", "num": 42}"#).unwrap(),
ConfigInputFormat::Json
);
assert_eq!(
detect_format(r#"{"timestamp": "2023-04-15T10:00:00Z"}"#).unwrap(),
ConfigInputFormat::Json
);
}
#[test]
fn test_detect_cef() {
assert_eq!(
detect_format("CEF:0|Vendor|Product|Version|EventID|Name|Severity|Extension").unwrap(),
ConfigInputFormat::Cef
);
}
#[test]
fn test_detect_syslog() {
assert_eq!(
detect_format("<34>1 2023-04-15T10:00:00.000Z hostname app - - - message").unwrap(),
ConfigInputFormat::Syslog
);
assert_eq!(
detect_format("<13>Apr 15 10:00:00 hostname program: message").unwrap(),
ConfigInputFormat::Syslog
);
assert_eq!(
detect_format("Jan 15 10:30:45 server1 sshd[1234]: Accepted publickey for user")
.unwrap(),
ConfigInputFormat::Syslog
);
assert_eq!(
detect_format("Dec 25 23:59:59 hostname kernel: USB disconnect").unwrap(),
ConfigInputFormat::Syslog
);
}
#[test]
fn test_detect_combined() {
assert_eq!(
detect_format(
r#"192.168.1.1 - - [15/Apr/2023:10:00:00 +0000] "GET /path HTTP/1.1" 200 1234"#
)
.unwrap(),
ConfigInputFormat::Combined
);
}
#[test]
fn test_detect_logfmt() {
assert_eq!(
detect_format("time=2023-04-15T10:00:00Z level=info msg=test").unwrap(),
ConfigInputFormat::Logfmt
);
assert_eq!(
detect_format("key1=value1 key2=value2 key3=value3").unwrap(),
ConfigInputFormat::Logfmt
);
}
#[test]
fn test_detect_csv() {
assert!(matches!(
detect_format("name,age,city").unwrap(),
ConfigInputFormat::Csv(_)
));
assert!(matches!(
detect_format("1,2,3").unwrap(),
ConfigInputFormat::Csvnh
));
assert!(matches!(
detect_format("john\t25\tnyc").unwrap(),
ConfigInputFormat::Tsv(_)
)); assert!(matches!(
detect_format("name\tage\tcity").unwrap(),
ConfigInputFormat::Tsv(_)
));
assert!(matches!(
detect_format("1\t2\t3").unwrap(),
ConfigInputFormat::Tsvnh
));
}
#[test]
fn test_detect_line_fallback() {
assert_eq!(
detect_format("just some random text").unwrap(),
ConfigInputFormat::Line
);
assert_eq!(detect_format("").unwrap(), ConfigInputFormat::Line);
assert_eq!(
detect_format("a single word").unwrap(),
ConfigInputFormat::Line
);
}
}