use crate::error::{RlgError, RlgResult};
use crate::utils::sanitize_log_message;
use once_cell::sync::Lazy;
use regex::Regex;
use serde::{Deserialize, Serialize};
use std::fmt;
use std::str::FromStr;
static CLF_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new(
r#"^(?P<host>\S+) (?P<ident>\S+) (?P<user>\S+) \[(?P<time>[^\]]+)\] "(?P<method>\S+) (?P<path>\S+) (?P<protocol>\S+)" (?P<status>\d{3}) (?P<size>\d+|-)$"#
).unwrap()
});
static CEF_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new(
r#"^CEF:\d+\|[^|]+\|[^|]+\|[^|]+\|[^|]+\|[^|]+\|[^|]+\|.*$"#,
)
.unwrap()
});
static W3C_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new(
r#"^#Fields:.*
.+$"#,
)
.unwrap()
});
#[non_exhaustive]
#[derive(
Clone,
Copy,
Debug,
Deserialize,
Eq,
Hash,
Ord,
PartialEq,
PartialOrd,
Serialize,
)]
pub enum LogFormat {
CLF,
JSON,
CEF,
ELF,
W3C,
GELF,
ApacheAccessLog,
Logstash,
Log4jXML,
NDJSON,
}
impl FromStr for LogFormat {
type Err = RlgError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"clf" => Ok(LogFormat::CLF),
"json" => Ok(LogFormat::JSON),
"cef" => Ok(LogFormat::CEF),
"elf" => Ok(LogFormat::ELF),
"w3c" => Ok(LogFormat::W3C),
"gelf" => Ok(LogFormat::GELF),
"apacheaccesslog" => Ok(LogFormat::ApacheAccessLog),
"logstash" => Ok(LogFormat::Logstash),
"log4jxml" => Ok(LogFormat::Log4jXML),
"ndjson" => Ok(LogFormat::NDJSON),
_ => Err(RlgError::FormatParseError(format!(
"Unknown log format: {}",
s
))),
}
}
}
impl LogFormat {
pub fn validate(&self, input: &str) -> bool {
match self {
LogFormat::CLF | LogFormat::ApacheAccessLog => {
CLF_REGEX.is_match(input)
}
LogFormat::JSON
| LogFormat::Logstash
| LogFormat::NDJSON => {
serde_json::from_str::<serde_json::Value>(input).is_ok()
}
LogFormat::CEF => CEF_REGEX.is_match(input),
LogFormat::ELF | LogFormat::W3C => {
W3C_REGEX.is_match(input)
}
LogFormat::GELF => {
serde_json::from_str::<serde_json::Value>(input).is_ok()
}
LogFormat::Log4jXML => {
input.trim_start().starts_with("<log4j:event")
}
}
}
pub fn format_log(&self, entry: &str) -> RlgResult<String> {
let sanitized_entry = sanitize_log_message(entry);
match self {
LogFormat::CLF
| LogFormat::ApacheAccessLog
| LogFormat::CEF
| LogFormat::ELF
| LogFormat::W3C
| LogFormat::Log4jXML => Ok(sanitized_entry),
LogFormat::JSON
| LogFormat::Logstash
| LogFormat::NDJSON
| LogFormat::GELF => serde_json::to_string_pretty(
&serde_json::from_str::<serde_json::Value>(
&sanitized_entry,
)
.map_err(|e| {
RlgError::FormattingError(format!(
"Invalid JSON: {}",
e
))
})?,
)
.map_err(|e| {
RlgError::FormattingError(format!(
"JSON formatting error: {}",
e
))
}),
}
}
}
impl fmt::Display for LogFormat {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let s = match self {
LogFormat::CLF => "CLF",
LogFormat::JSON => "JSON",
LogFormat::CEF => "CEF",
LogFormat::ELF => "ELF",
LogFormat::W3C => "W3C",
LogFormat::GELF => "GELF",
LogFormat::ApacheAccessLog => "Apache Access Log",
LogFormat::Logstash => "Logstash",
LogFormat::Log4jXML => "Log4j XML",
LogFormat::NDJSON => "NDJSON",
};
write!(f, "{}", s)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_log_format_from_str() {
assert_eq!(LogFormat::from_str("clf").unwrap(), LogFormat::CLF);
assert_eq!(
LogFormat::from_str("JSON").unwrap(),
LogFormat::JSON
);
assert!(LogFormat::from_str("invalid").is_err());
}
#[test]
fn test_log_format_validate() {
let clf_log = r#"127.0.0.1 - - [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326"#;
assert!(LogFormat::CLF.validate(clf_log));
let json_log = r#"{"level":"info","message":"Test log","timestamp":"2023-05-17T12:34:56Z"}"#;
assert!(LogFormat::JSON.validate(json_log));
}
#[test]
fn test_log_format_format_log() {
let json_log = r#"{"level":"info","message":"Test log","timestamp":"2023-05-17T12:34:56Z"}"#;
let formatted = LogFormat::JSON.format_log(json_log).unwrap();
assert!(formatted.contains("{\n"));
let clf_log = r#"127.0.0.1 - - [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326"#;
let formatted = LogFormat::CLF.format_log(clf_log).unwrap();
assert_eq!(formatted, clf_log); }
}