use crate::category::Category;
use glob::Pattern;
use regex::Regex;
use serde::{Deserialize, Serialize};
pub const DEFAULT_FIELD_SIGNAL_THRESHOLD: f64 = 3.5;
#[derive(Debug, Clone)]
pub struct FieldNameSignal {
pub key_pattern: String,
pub(crate) key_regex: Regex,
pub category: Category,
pub label: String,
pub threshold: f64,
}
impl FieldNameSignal {
pub fn new(
key_pattern: impl Into<String>,
category: Category,
label: Option<String>,
threshold: f64,
) -> Result<Self, String> {
let key_pattern = key_pattern.into();
let key_regex = regex::RegexBuilder::new(&key_pattern)
.case_insensitive(true)
.build()
.map_err(|e| format!("field-name signal pattern {:?}: {e}", key_pattern))?;
let label = label.unwrap_or_else(|| format!("field-signal:{}", key_pattern));
Ok(Self {
key_pattern,
key_regex,
category,
label,
threshold,
})
}
#[inline]
#[must_use]
pub fn matches_key(&self, key: &str) -> bool {
self.key_regex.is_match(key)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FieldRule {
pub pattern: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub category: Option<Category>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub label: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub min_length: Option<usize>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub sub_processor: Option<String>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub sub_fields: Vec<FieldRule>,
}
impl FieldRule {
#[must_use]
pub fn new(pattern: impl Into<String>) -> Self {
Self {
pattern: pattern.into(),
category: None,
label: None,
min_length: None,
sub_processor: None,
sub_fields: Vec::new(),
}
}
#[must_use]
pub fn with_min_length(mut self, min: usize) -> Self {
self.min_length = Some(min);
self
}
#[must_use]
pub fn with_category(mut self, category: Category) -> Self {
self.category = Some(category);
self
}
#[must_use]
pub fn with_label(mut self, label: impl Into<String>) -> Self {
self.label = Some(label.into());
self
}
#[must_use]
pub fn with_sub_processor(mut self, name: impl Into<String>) -> Self {
self.sub_processor = Some(name.into());
self
}
#[must_use]
pub fn with_sub_fields(mut self, fields: Vec<FieldRule>) -> Self {
self.sub_fields = fields;
self
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileTypeProfile {
pub processor: String,
#[serde(default)]
pub extensions: Vec<String>,
#[serde(default)]
pub include: Vec<String>,
#[serde(default)]
pub exclude: Vec<String>,
pub fields: Vec<FieldRule>,
#[serde(default)]
pub options: std::collections::HashMap<String, String>,
#[serde(skip)]
pub field_name_signals: Vec<FieldNameSignal>,
}
impl FileTypeProfile {
#[must_use]
pub fn new(processor: impl Into<String>, fields: Vec<FieldRule>) -> Self {
Self {
processor: processor.into(),
extensions: Vec::new(),
include: Vec::new(),
exclude: Vec::new(),
fields,
options: std::collections::HashMap::new(),
field_name_signals: Vec::new(),
}
}
#[must_use]
pub fn with_extension(mut self, ext: impl Into<String>) -> Self {
self.extensions.push(ext.into());
self
}
#[must_use]
pub fn with_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
self.options.insert(key.into(), value.into());
self
}
pub fn matches_filename(&self, filename: &str) -> bool {
if self.extensions.is_empty() {
return false;
}
if !self
.extensions
.iter()
.any(|ext| filename.ends_with(ext.as_str()))
{
return false;
}
let basename: &str = std::path::Path::new(filename)
.file_name()
.and_then(|n| n.to_str())
.unwrap_or(filename);
let glob_matches =
|pat: &str| Pattern::new(pat).is_ok_and(|p| p.matches(filename) || p.matches(basename));
if !self.include.is_empty() && !self.include.iter().any(|pat| glob_matches(pat)) {
return false;
}
if self.exclude.iter().any(|pat| glob_matches(pat)) {
return false;
}
true
}
#[must_use]
pub fn with_include(mut self, pat: impl Into<String>) -> Self {
self.include.push(pat.into());
self
}
#[must_use]
pub fn with_exclude(mut self, pat: impl Into<String>) -> Self {
self.exclude.push(pat.into());
self
}
}
impl Serialize for Category {
fn serialize<S: serde::Serializer>(
&self,
serializer: S,
) -> std::result::Result<S::Ok, S::Error> {
serializer.serialize_str(&self.to_string())
}
}
impl<'de> Deserialize<'de> for Category {
fn deserialize<D: serde::Deserializer<'de>>(
deserializer: D,
) -> std::result::Result<Self, D::Error> {
let s = String::deserialize(deserializer)?;
Ok(match s.as_str() {
"email" => Category::Email,
"name" => Category::Name,
"phone" => Category::Phone,
"ipv4" => Category::IpV4,
"ipv6" => Category::IpV6,
"credit_card" => Category::CreditCard,
"ssn" => Category::Ssn,
"hostname" => Category::Hostname,
"mac_address" => Category::MacAddress,
"container_id" => Category::ContainerId,
"uuid" => Category::Uuid,
"jwt" => Category::Jwt,
"auth_token" => Category::AuthToken,
"file_path" => Category::FilePath,
"windows_sid" => Category::WindowsSid,
"url" => Category::Url,
"aws_arn" => Category::AwsArn,
"azure_resource_id" => Category::AzureResourceId,
other => {
let tag = other.strip_prefix("custom:").unwrap_or(other);
Category::Custom(tag.into())
}
})
}
}