use crate::error::{RlgError, RlgResult};
use crate::utils::sanitize_log_message;
use regex::Regex;
use serde::{Deserialize, Serialize};
use std::fmt;
use std::str::FromStr;
use std::sync::LazyLock;
static CLF_REGEX: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(
r#"^(?P<host>\S+) (?P<ident>\S+) (?P<user>\S+) \[(?P<time>[^\]]+)\] "(?P<method>\S+) (?P<path>\S+) (?P<protocol>\S+)" (?P<status>\d{3}) (?P<size>\d+|-)$"#
).expect("Failed to compile CLF regex")
});
static CEF_REGEX: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(
r"^CEF:\d+\|[^|]+\|[^|]+\|[^|]+\|[^|]+\|[^|]+\|[^|]+\|.*$",
)
.expect("Failed to compile CEF regex")
});
static W3C_REGEX: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(
r"^#Fields:.*
.+$",
)
.expect("Failed to compile W3C regex")
});
#[derive(
Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize,
)]
pub enum LogFormat {
CLF,
JSON,
CEF,
ELF,
W3C,
GELF,
ApacheAccessLog,
Logstash,
Log4jXML,
NDJSON,
MCP,
OTLP,
Logfmt,
ECS,
}
macro_rules! define_log_format_strings {
( $( $variant:ident => $display:expr, [ $( $key:expr ),+ ] );+ $(;)? ) => {
impl FromStr for LogFormat {
type Err = RlgError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
$(
$( $key )|+ => Ok(Self::$variant),
)+
_ => Err(RlgError::FormatParseError(format!(
"Unknown log format: {s}"
))),
}
}
}
impl fmt::Display for LogFormat {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let s = match self {
$( Self::$variant => $display, )+
};
write!(f, "{s}")
}
}
};
}
define_log_format_strings! {
CLF => "CLF", ["clf"];
JSON => "JSON", ["json"];
CEF => "CEF", ["cef"];
ELF => "ELF", ["elf"];
W3C => "W3C", ["w3c"];
GELF => "GELF", ["gelf"];
ApacheAccessLog => "Apache Access Log", ["apache", "apacheaccesslog"];
Logstash => "Logstash", ["logstash"];
Log4jXML => "Log4j XML", ["log4jxml"];
NDJSON => "NDJSON", ["ndjson"];
MCP => "MCP", ["mcp"];
OTLP => "OTLP", ["otlp"];
Logfmt => "logfmt", ["logfmt"];
ECS => "ECS", ["ecs"];
}
impl LogFormat {
#[must_use]
pub fn validate(&self, entry: &str) -> bool {
if entry.is_empty() {
return false;
}
match self {
Self::CLF => CLF_REGEX.is_match(entry),
Self::CEF => CEF_REGEX.is_match(entry),
Self::W3C => W3C_REGEX.is_match(entry),
Self::JSON
| Self::GELF
| Self::Logstash
| Self::NDJSON
| Self::MCP
| Self::OTLP
| Self::ECS => {
serde_json::from_str::<serde_json::Value>(entry).is_ok()
}
Self::Logfmt => {
entry.contains('=') && !entry.starts_with('=')
}
Self::Log4jXML => {
entry.contains("<log4j:event") && entry.contains('>')
}
Self::ELF | Self::ApacheAccessLog => true, }
}
pub fn format_log(&self, entry: &str) -> RlgResult<String> {
let sanitized_entry = sanitize_log_message(entry);
match self {
Self::CLF
| Self::ApacheAccessLog
| Self::CEF
| Self::ELF
| Self::W3C
| Self::Log4jXML
| Self::Logfmt => Ok(sanitized_entry),
Self::JSON
| Self::Logstash
| Self::NDJSON
| Self::GELF
| Self::MCP
| Self::OTLP
| Self::ECS => {
let val = serde_json::from_str::<serde_json::Value>(
&sanitized_entry,
)
.map_err(|e| {
RlgError::FormattingError(format!(
"Invalid JSON: {e}"
))
})?;
Ok(serde_json::to_string_pretty(&val).expect(
"serde_json::to_string_pretty cannot fail on a valid Value",
))
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_log_format_from_str() {
assert_eq!(
LogFormat::from_str("json").unwrap(),
LogFormat::JSON
);
assert_eq!(LogFormat::from_str("CLF").unwrap(), LogFormat::CLF);
assert!(LogFormat::from_str("invalid").is_err());
}
#[test]
fn test_log_format_validate() {
let clf_log = r#"127.0.0.1 - - [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326"#;
assert!(LogFormat::CLF.validate(clf_log));
assert!(LogFormat::JSON.validate(r#"{"key": "value"}"#));
}
#[test]
fn test_log_format_format_log() {
let json_log = r#"{"key":"value"}"#;
let formatted = LogFormat::JSON.format_log(json_log).unwrap();
assert!(formatted.contains('"'));
let clf_log = r#"127.0.0.1 - - [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326"#;
let formatted = LogFormat::CLF.format_log(clf_log).unwrap();
assert_eq!(formatted, clf_log); }
}