use std::collections::HashMap;
use std::ffi::CString;
use std::ptr;
use std::sync::Arc;
use crate::Policy;
use crate::error::PolicyError;
use crate::field::{LogFieldSelector, MetricFieldSelector, TraceFieldSelector};
use crate::proto::tero::policy::v1::{
AggregationTemporality, LogField, LogMatcher, LogSampleKey, MetricField, MetricMatcher,
MetricType, SamplingMode, SpanKind, SpanStatusCode, TraceField, TraceMatcher,
TraceSamplingConfig, log_matcher, log_sample_key, metric_matcher, trace_matcher,
};
use crate::registry::PolicyStats;
use super::keep::CompiledKeep;
use super::match_key::MatchKey;
use super::signal::{LogSignal, MetricSignal, Signal, TraceSignal};
use super::transform::CompiledTransform;
#[derive(Debug, Clone)]
pub struct PolicyMatchRef {
pub policy_index: usize,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CompiledSamplingMode {
HashSeed,
Proportional,
Equalizing,
}
#[derive(Debug, Clone)]
pub struct CompiledTraceSampling {
pub threshold: u64,
pub probability: f64,
pub precision: u32,
pub fail_closed: bool,
pub mode: CompiledSamplingMode,
pub hash_seed: u32,
}
#[derive(Debug)]
pub struct CompiledPolicy<S: Signal> {
pub id: String,
pub required_match_count: usize,
pub keep: CompiledKeep,
pub transform: Option<CompiledTransform<S>>,
pub stats: Arc<PolicyStats>,
pub enabled: bool,
pub sample_key: Option<S::FieldSelector>,
pub trace_sampling: Option<CompiledTraceSampling>,
}
#[derive(Debug, Clone)]
pub struct ExistenceCheck<S: Signal> {
pub policy_index: usize,
pub field: S::FieldSelector,
pub should_exist: bool,
pub is_negated: bool,
}
#[derive(Debug)]
pub struct PatternInfo {
pub pattern: String,
pub policy_index: usize,
pub case_insensitive: bool,
}
pub struct VectorscanDatabase {
db: *mut vectorscan_rs_sys::hs_database_t,
scratch: *mut vectorscan_rs_sys::hs_scratch_t,
}
unsafe impl Send for VectorscanDatabase {}
unsafe impl Sync for VectorscanDatabase {}
impl VectorscanDatabase {
fn compile(patterns: &[String], ids: &[u32], flags: &[u32]) -> Result<Self, PolicyError> {
assert_eq!(patterns.len(), ids.len());
assert_eq!(patterns.len(), flags.len());
if patterns.is_empty() {
return Err(PolicyError::CompileError {
reason: "no patterns to compile".to_string(),
});
}
let c_patterns: Vec<CString> = patterns
.iter()
.map(|p| {
CString::new(p.as_str()).map_err(|e| PolicyError::CompileError {
reason: format!("invalid pattern string: {}", e),
})
})
.collect::<Result<Vec<_>, _>>()?;
let pattern_ptrs: Vec<*const std::ffi::c_char> =
c_patterns.iter().map(|s| s.as_ptr()).collect();
let mut db: *mut vectorscan_rs_sys::hs_database_t = ptr::null_mut();
let mut compile_error: *mut vectorscan_rs_sys::hs_compile_error_t = ptr::null_mut();
let result = unsafe {
vectorscan_rs_sys::hs_compile_multi(
pattern_ptrs.as_ptr(),
flags.as_ptr(),
ids.as_ptr(),
patterns.len() as u32,
vectorscan_rs_sys::HS_MODE_BLOCK,
ptr::null(),
&mut db,
&mut compile_error,
)
};
if result != vectorscan_rs_sys::HS_SUCCESS as i32 {
let error_msg = if !compile_error.is_null() {
let msg = unsafe {
let msg_ptr = (*compile_error).message;
if msg_ptr.is_null() {
"unknown error".to_string()
} else {
std::ffi::CStr::from_ptr(msg_ptr)
.to_string_lossy()
.into_owned()
}
};
unsafe {
vectorscan_rs_sys::hs_free_compile_error(compile_error);
}
msg
} else {
format!("compile failed with code {}", result)
};
return Err(PolicyError::CompileError {
reason: format!("failed to compile Vectorscan database: {}", error_msg),
});
}
let mut scratch: *mut vectorscan_rs_sys::hs_scratch_t = ptr::null_mut();
let result = unsafe { vectorscan_rs_sys::hs_alloc_scratch(db, &mut scratch) };
if result != vectorscan_rs_sys::HS_SUCCESS as i32 {
unsafe {
vectorscan_rs_sys::hs_free_database(db);
}
return Err(PolicyError::CompileError {
reason: format!("failed to allocate scratch space: code {}", result),
});
}
Ok(VectorscanDatabase { db, scratch })
}
pub fn scan(&self, data: &[u8]) -> Result<Vec<u32>, PolicyError> {
let matches = std::cell::RefCell::new(Vec::new());
let mut scan_scratch: *mut vectorscan_rs_sys::hs_scratch_t = ptr::null_mut();
let result =
unsafe { vectorscan_rs_sys::hs_clone_scratch(self.scratch, &mut scan_scratch) };
if result != vectorscan_rs_sys::HS_SUCCESS as i32 {
return Err(PolicyError::CompileError {
reason: format!("failed to clone scratch space: code {}", result),
});
}
unsafe extern "C" fn on_match(
id: std::ffi::c_uint,
_from: std::ffi::c_ulonglong,
_to: std::ffi::c_ulonglong,
_flags: std::ffi::c_uint,
context: *mut std::ffi::c_void,
) -> std::ffi::c_int {
unsafe {
let matches = &*(context as *const std::cell::RefCell<Vec<u32>>);
matches.borrow_mut().push(id);
}
0
}
let result = unsafe {
vectorscan_rs_sys::hs_scan(
self.db,
data.as_ptr() as *const std::ffi::c_char,
data.len() as u32,
0,
scan_scratch,
Some(on_match),
&matches as *const _ as *mut std::ffi::c_void,
)
};
unsafe {
vectorscan_rs_sys::hs_free_scratch(scan_scratch);
}
if result != vectorscan_rs_sys::HS_SUCCESS as i32
&& result != vectorscan_rs_sys::HS_SCAN_TERMINATED
{
return Err(PolicyError::CompileError {
reason: format!("scan failed with code {}", result),
});
}
Ok(matches.into_inner())
}
}
impl Drop for VectorscanDatabase {
fn drop(&mut self) {
unsafe {
if !self.scratch.is_null() {
vectorscan_rs_sys::hs_free_scratch(self.scratch);
}
if !self.db.is_null() {
vectorscan_rs_sys::hs_free_database(self.db);
}
}
}
}
pub struct CompiledDatabase {
pub database: VectorscanDatabase,
pub pattern_index: Vec<PolicyMatchRef>,
}
impl std::fmt::Debug for CompiledDatabase {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("CompiledDatabase")
.field("pattern_count", &self.pattern_index.len())
.finish()
}
}
#[derive(Debug)]
pub struct CompiledMatchers<S: Signal> {
pub databases: HashMap<MatchKey<S>, CompiledDatabase>,
pub existence_checks: Vec<ExistenceCheck<S>>,
pub policies: Vec<CompiledPolicy<S>>,
}
impl CompiledMatchers<LogSignal> {
pub fn build(
policies: impl Iterator<Item = (Policy, Arc<PolicyStats>)>,
) -> Result<Self, PolicyError> {
let groups = PatternGroups::<LogSignal>::build_from_log_policies(policies)?;
groups.compile()
}
}
impl CompiledMatchers<MetricSignal> {
pub fn build(
policies: impl Iterator<Item = (Policy, Arc<PolicyStats>)>,
) -> Result<Self, PolicyError> {
let groups = PatternGroups::<MetricSignal>::build_from_metric_policies(policies)?;
groups.compile()
}
}
impl CompiledMatchers<TraceSignal> {
pub fn build(
policies: impl Iterator<Item = (Policy, Arc<PolicyStats>)>,
) -> Result<Self, PolicyError> {
let groups = PatternGroups::<TraceSignal>::build_from_trace_policies(policies)?;
groups.compile()
}
}
#[derive(Debug)]
pub struct PatternGroups<S: Signal> {
pub groups: HashMap<MatchKey<S>, Vec<PatternInfo>>,
pub existence_checks: Vec<ExistenceCheck<S>>,
pub policies: Vec<CompiledPolicy<S>>,
}
impl<S: Signal> Default for PatternGroups<S> {
fn default() -> Self {
Self {
groups: HashMap::new(),
existence_checks: Vec::new(),
policies: Vec::new(),
}
}
}
impl PatternGroups<LogSignal> {
pub fn build_from_log_policies(
policies: impl Iterator<Item = (Policy, Arc<PolicyStats>)>,
) -> Result<Self, PolicyError> {
let mut result = PatternGroups::default();
for (policy_index, (policy, stats)) in policies.enumerate() {
let log_target = match policy.log_target() {
Some(t) => t,
None => continue,
};
if log_target.r#match.is_empty() {
return Err(PolicyError::InvalidPolicy {
policy_id: policy.id().to_string(),
reason: "log target must have at least one matcher".to_string(),
});
}
let required_match_count = log_target.r#match.iter().filter(|m| !m.negate).count();
let transform = log_target
.transform
.as_ref()
.map(|t| CompiledTransform::from_proto(t, policy.id()))
.transpose()?
.filter(|t| !t.is_empty());
let sample_key = log_target
.sample_key
.as_ref()
.and_then(extract_log_sample_key);
result.policies.push(CompiledPolicy {
id: policy.id().to_string(),
required_match_count,
keep: CompiledKeep::parse(&log_target.keep)?,
transform,
stats,
enabled: policy.enabled(),
sample_key,
trace_sampling: None,
});
for matcher in &log_target.r#match {
let field = extract_log_field(matcher)?;
let is_negated = matcher.negate;
let case_insensitive = matcher.case_insensitive;
process_match_type(
matcher.r#match.as_ref(),
field,
is_negated,
case_insensitive,
policy_index,
&mut result.groups,
&mut result.existence_checks,
);
}
}
Ok(result)
}
}
impl PatternGroups<MetricSignal> {
pub fn build_from_metric_policies(
policies: impl Iterator<Item = (Policy, Arc<PolicyStats>)>,
) -> Result<Self, PolicyError> {
let mut result = PatternGroups::default();
for (policy_index, (policy, stats)) in policies.enumerate() {
let metric_target = match policy.metric_target() {
Some(t) => t,
None => continue,
};
if metric_target.r#match.is_empty() {
return Err(PolicyError::InvalidPolicy {
policy_id: policy.id().to_string(),
reason: "metric target must have at least one matcher".to_string(),
});
}
let required_match_count = metric_target.r#match.iter().filter(|m| !m.negate).count();
let keep = if metric_target.keep {
CompiledKeep::All
} else {
CompiledKeep::None
};
result.policies.push(CompiledPolicy {
id: policy.id().to_string(),
required_match_count,
keep,
transform: None,
stats,
enabled: policy.enabled(),
sample_key: None,
trace_sampling: None,
});
for matcher in &metric_target.r#match {
let is_negated = matcher.negate;
let case_insensitive = matcher.case_insensitive;
let extraction = extract_metric_field(matcher)?;
let match_type = extraction
.synthesized_match
.as_ref()
.or(matcher.r#match.as_ref());
process_match_type(
match_type,
extraction.field,
is_negated,
case_insensitive,
policy_index,
&mut result.groups,
&mut result.existence_checks,
);
}
}
Ok(result)
}
}
impl PatternGroups<TraceSignal> {
pub fn build_from_trace_policies(
policies: impl Iterator<Item = (Policy, Arc<PolicyStats>)>,
) -> Result<Self, PolicyError> {
let mut result = PatternGroups::default();
for (policy_index, (policy, stats)) in policies.enumerate() {
let trace_target = match policy.trace_target() {
Some(t) => t,
None => continue,
};
if trace_target.r#match.is_empty() {
return Err(PolicyError::InvalidPolicy {
policy_id: policy.id().to_string(),
reason: "trace target must have at least one matcher".to_string(),
});
}
let required_match_count = trace_target.r#match.iter().filter(|m| !m.negate).count();
let keep = compile_trace_keep(trace_target.keep.as_ref());
let trace_sampling = compile_trace_sampling(trace_target.keep.as_ref());
result.policies.push(CompiledPolicy {
id: policy.id().to_string(),
required_match_count,
keep,
transform: None,
stats,
enabled: policy.enabled(),
sample_key: None,
trace_sampling: Some(trace_sampling),
});
for matcher in &trace_target.r#match {
let is_negated = matcher.negate;
let case_insensitive = matcher.case_insensitive;
let extraction = extract_trace_field(matcher)?;
let match_type = extraction
.synthesized_match
.as_ref()
.or(matcher.r#match.as_ref());
process_match_type(
match_type,
extraction.field,
is_negated,
case_insensitive,
policy_index,
&mut result.groups,
&mut result.existence_checks,
);
}
}
Ok(result)
}
}
impl<S: Signal> PatternGroups<S> {
pub fn compile(self) -> Result<CompiledMatchers<S>, PolicyError> {
let mut databases = HashMap::new();
for (key, patterns) in self.groups {
if patterns.is_empty() {
continue;
}
let mut pattern_strings = Vec::with_capacity(patterns.len());
let mut pattern_ids = Vec::with_capacity(patterns.len());
let mut pattern_flags = Vec::with_capacity(patterns.len());
let mut pattern_index = Vec::with_capacity(patterns.len());
for (pattern_id, info) in patterns.into_iter().enumerate() {
pattern_strings.push(info.pattern);
pattern_ids.push(pattern_id as u32);
let mut flags = vectorscan_rs_sys::HS_FLAG_SINGLEMATCH;
if info.case_insensitive {
flags |= vectorscan_rs_sys::HS_FLAG_CASELESS;
}
pattern_flags.push(flags);
pattern_index.push(PolicyMatchRef {
policy_index: info.policy_index,
});
}
let database =
VectorscanDatabase::compile(&pattern_strings, &pattern_ids, &pattern_flags)?;
databases.insert(
key,
CompiledDatabase {
database,
pattern_index,
},
);
}
Ok(CompiledMatchers {
databases,
existence_checks: self.existence_checks,
policies: self.policies,
})
}
}
fn process_match_type<S: Signal, M>(
match_type: Option<&M>,
field: S::FieldSelector,
is_negated: bool,
case_insensitive: bool,
policy_index: usize,
groups: &mut HashMap<MatchKey<S>, Vec<PatternInfo>>,
existence_checks: &mut Vec<ExistenceCheck<S>>,
) where
M: MatchTypeAccessor,
{
let Some(m) = match_type else { return };
match m.as_match_variant() {
MatchVariant::Exact(s) => {
let pattern = format!("^{}$", regex_escape(s));
let key = MatchKey::new(field, is_negated);
groups.entry(key).or_default().push(PatternInfo {
pattern,
policy_index,
case_insensitive,
});
}
MatchVariant::Regex(pattern) => {
let key = MatchKey::new(field, is_negated);
groups.entry(key).or_default().push(PatternInfo {
pattern: pattern.to_string(),
policy_index,
case_insensitive,
});
}
MatchVariant::Exists(should_exist) => {
existence_checks.push(ExistenceCheck {
policy_index,
field,
should_exist,
is_negated,
});
}
MatchVariant::StartsWith(s) => {
let pattern = format!("^{}", regex_escape(s));
let key = MatchKey::new(field, is_negated);
groups.entry(key).or_default().push(PatternInfo {
pattern,
policy_index,
case_insensitive,
});
}
MatchVariant::EndsWith(s) => {
let pattern = format!("{}$", regex_escape(s));
let key = MatchKey::new(field, is_negated);
groups.entry(key).or_default().push(PatternInfo {
pattern,
policy_index,
case_insensitive,
});
}
MatchVariant::Contains(s) => {
let pattern = regex_escape(s);
let key = MatchKey::new(field, is_negated);
groups.entry(key).or_default().push(PatternInfo {
pattern,
policy_index,
case_insensitive,
});
}
}
}
enum MatchVariant<'a> {
Exact(&'a str),
Regex(&'a str),
Exists(bool),
StartsWith(&'a str),
EndsWith(&'a str),
Contains(&'a str),
}
trait MatchTypeAccessor {
fn as_match_variant(&self) -> MatchVariant<'_>;
}
impl MatchTypeAccessor for log_matcher::Match {
fn as_match_variant(&self) -> MatchVariant<'_> {
match self {
log_matcher::Match::Exact(s) => MatchVariant::Exact(s),
log_matcher::Match::Regex(s) => MatchVariant::Regex(s),
log_matcher::Match::Exists(b) => MatchVariant::Exists(*b),
log_matcher::Match::StartsWith(s) => MatchVariant::StartsWith(s),
log_matcher::Match::EndsWith(s) => MatchVariant::EndsWith(s),
log_matcher::Match::Contains(s) => MatchVariant::Contains(s),
}
}
}
impl MatchTypeAccessor for metric_matcher::Match {
fn as_match_variant(&self) -> MatchVariant<'_> {
match self {
metric_matcher::Match::Exact(s) => MatchVariant::Exact(s),
metric_matcher::Match::Regex(s) => MatchVariant::Regex(s),
metric_matcher::Match::Exists(b) => MatchVariant::Exists(*b),
metric_matcher::Match::StartsWith(s) => MatchVariant::StartsWith(s),
metric_matcher::Match::EndsWith(s) => MatchVariant::EndsWith(s),
metric_matcher::Match::Contains(s) => MatchVariant::Contains(s),
}
}
}
impl MatchTypeAccessor for trace_matcher::Match {
fn as_match_variant(&self) -> MatchVariant<'_> {
match self {
trace_matcher::Match::Exact(s) => MatchVariant::Exact(s),
trace_matcher::Match::Regex(s) => MatchVariant::Regex(s),
trace_matcher::Match::Exists(b) => MatchVariant::Exists(*b),
trace_matcher::Match::StartsWith(s) => MatchVariant::StartsWith(s),
trace_matcher::Match::EndsWith(s) => MatchVariant::EndsWith(s),
trace_matcher::Match::Contains(s) => MatchVariant::Contains(s),
}
}
}
fn extract_log_field(matcher: &LogMatcher) -> Result<LogFieldSelector, PolicyError> {
match &matcher.field {
Some(log_matcher::Field::LogField(f)) => {
let field = LogField::try_from(*f).unwrap_or(LogField::Unspecified);
Ok(LogFieldSelector::Simple(field))
}
Some(log_matcher::Field::LogAttribute(path)) => {
Ok(LogFieldSelector::from_log_attribute(path))
}
Some(log_matcher::Field::ResourceAttribute(path)) => {
Ok(LogFieldSelector::from_resource_attribute(path))
}
Some(log_matcher::Field::ScopeAttribute(path)) => {
Ok(LogFieldSelector::from_scope_attribute(path))
}
None => Err(PolicyError::FieldError {
reason: "matcher has no field specified".to_string(),
}),
}
}
fn extract_log_sample_key(sample_key: &LogSampleKey) -> Option<LogFieldSelector> {
match &sample_key.field {
Some(log_sample_key::Field::LogField(f)) => {
let field = LogField::try_from(*f).unwrap_or(LogField::Unspecified);
Some(LogFieldSelector::Simple(field))
}
Some(log_sample_key::Field::LogAttribute(path)) => {
Some(LogFieldSelector::from_log_attribute(path))
}
Some(log_sample_key::Field::ResourceAttribute(path)) => {
Some(LogFieldSelector::from_resource_attribute(path))
}
Some(log_sample_key::Field::ScopeAttribute(path)) => {
Some(LogFieldSelector::from_scope_attribute(path))
}
None => None,
}
}
struct MetricFieldExtraction {
field: MetricFieldSelector,
synthesized_match: Option<metric_matcher::Match>,
}
fn extract_metric_field(matcher: &MetricMatcher) -> Result<MetricFieldExtraction, PolicyError> {
match &matcher.field {
Some(metric_matcher::Field::MetricField(f)) => {
let field = MetricField::try_from(*f).unwrap_or(MetricField::Unspecified);
Ok(MetricFieldExtraction {
field: MetricFieldSelector::Simple(field),
synthesized_match: None,
})
}
Some(metric_matcher::Field::DatapointAttribute(path)) => Ok(MetricFieldExtraction {
field: MetricFieldSelector::from_datapoint_attribute(path),
synthesized_match: None,
}),
Some(metric_matcher::Field::ResourceAttribute(path)) => Ok(MetricFieldExtraction {
field: MetricFieldSelector::from_resource_attribute(path),
synthesized_match: None,
}),
Some(metric_matcher::Field::ScopeAttribute(path)) => Ok(MetricFieldExtraction {
field: MetricFieldSelector::from_scope_attribute(path),
synthesized_match: None,
}),
Some(metric_matcher::Field::MetricType(t)) => {
let metric_type = MetricType::try_from(*t).unwrap_or(MetricType::Unspecified);
Ok(MetricFieldExtraction {
field: MetricFieldSelector::Type,
synthesized_match: Some(metric_matcher::Match::Exact(
metric_type.as_str_name().to_string(),
)),
})
}
Some(metric_matcher::Field::AggregationTemporality(t)) => {
let temporality =
AggregationTemporality::try_from(*t).unwrap_or(AggregationTemporality::Unspecified);
Ok(MetricFieldExtraction {
field: MetricFieldSelector::Temporality,
synthesized_match: Some(metric_matcher::Match::Exact(
temporality.as_str_name().to_string(),
)),
})
}
None => Err(PolicyError::FieldError {
reason: "matcher has no field specified".to_string(),
}),
}
}
struct TraceFieldExtraction {
field: TraceFieldSelector,
synthesized_match: Option<trace_matcher::Match>,
}
fn extract_trace_field(matcher: &TraceMatcher) -> Result<TraceFieldExtraction, PolicyError> {
match &matcher.field {
Some(trace_matcher::Field::TraceField(f)) => {
let field = TraceField::try_from(*f).unwrap_or(TraceField::Unspecified);
Ok(TraceFieldExtraction {
field: TraceFieldSelector::Simple(field),
synthesized_match: None,
})
}
Some(trace_matcher::Field::SpanAttribute(path)) => Ok(TraceFieldExtraction {
field: TraceFieldSelector::from_span_attribute(path),
synthesized_match: None,
}),
Some(trace_matcher::Field::ResourceAttribute(path)) => Ok(TraceFieldExtraction {
field: TraceFieldSelector::from_resource_attribute(path),
synthesized_match: None,
}),
Some(trace_matcher::Field::ScopeAttribute(path)) => Ok(TraceFieldExtraction {
field: TraceFieldSelector::from_scope_attribute(path),
synthesized_match: None,
}),
Some(trace_matcher::Field::SpanKind(k)) => {
let kind = SpanKind::try_from(*k).unwrap_or(SpanKind::Unspecified);
Ok(TraceFieldExtraction {
field: TraceFieldSelector::SpanKind,
synthesized_match: Some(trace_matcher::Match::Exact(
kind.as_str_name().to_string(),
)),
})
}
Some(trace_matcher::Field::SpanStatus(s)) => {
let status = SpanStatusCode::try_from(*s).unwrap_or(SpanStatusCode::Unspecified);
Ok(TraceFieldExtraction {
field: TraceFieldSelector::SpanStatus,
synthesized_match: Some(trace_matcher::Match::Exact(
status.as_str_name().to_string(),
)),
})
}
Some(trace_matcher::Field::EventName(name)) => Ok(TraceFieldExtraction {
field: TraceFieldSelector::EventName,
synthesized_match: Some(trace_matcher::Match::Exact(name.clone())),
}),
Some(trace_matcher::Field::EventAttribute(path)) => Ok(TraceFieldExtraction {
field: TraceFieldSelector::from_event_attribute(path),
synthesized_match: None,
}),
Some(trace_matcher::Field::LinkTraceId(id)) => Ok(TraceFieldExtraction {
field: TraceFieldSelector::LinkTraceId,
synthesized_match: Some(trace_matcher::Match::Exact(id.clone())),
}),
None => Err(PolicyError::FieldError {
reason: "matcher has no field specified".to_string(),
}),
}
}
fn compile_trace_keep(config: Option<&TraceSamplingConfig>) -> CompiledKeep {
match config {
None => CompiledKeep::All,
Some(c) if c.percentage >= 100.0 => CompiledKeep::All,
Some(c) if c.percentage <= 0.0 => CompiledKeep::None,
Some(c) => CompiledKeep::Percentage(c.percentage as f64 / 100.0),
}
}
fn compile_trace_sampling(config: Option<&TraceSamplingConfig>) -> CompiledTraceSampling {
match config {
None => CompiledTraceSampling {
threshold: 0,
probability: 1.0,
precision: 4,
fail_closed: true,
mode: CompiledSamplingMode::HashSeed,
hash_seed: 0,
},
Some(c) => {
let probability = (c.percentage as f64 / 100.0).clamp(0.0, 1.0);
let precision = c.sampling_precision.unwrap_or(4).clamp(1, 14);
let mode = match c.mode.and_then(|m| SamplingMode::try_from(m).ok()) {
Some(SamplingMode::Proportional) => CompiledSamplingMode::Proportional,
Some(SamplingMode::Equalizing) => CompiledSamplingMode::Equalizing,
_ => CompiledSamplingMode::HashSeed,
};
CompiledTraceSampling {
threshold: super::rejection_threshold(probability),
probability,
precision,
fail_closed: c.fail_closed.unwrap_or(true),
mode,
hash_seed: c.hash_seed.unwrap_or(0),
}
}
}
}
fn regex_escape(s: &str) -> String {
let mut result = String::with_capacity(s.len() * 2);
for c in s.chars() {
match c {
'\\' | '.' | '+' | '*' | '?' | '(' | ')' | '[' | ']' | '{' | '}' | '^' | '$' | '|' => {
result.push('\\');
result.push(c);
}
_ => result.push(c),
}
}
result
}
#[cfg(test)]
mod tests {
use super::*;
use crate::proto::tero::policy::v1::{
LogAdd, LogRedact, LogTarget, LogTransform, Policy as ProtoPolicy, log_add, log_redact,
};
fn make_policy_with_matcher(
id: &str,
field: log_matcher::Field,
match_type: log_matcher::Match,
negate: bool,
keep: &str,
) -> Policy {
let matcher = LogMatcher {
field: Some(field),
r#match: Some(match_type),
negate,
case_insensitive: false,
};
let log_target = LogTarget {
r#match: vec![matcher],
keep: keep.to_string(),
transform: None,
sample_key: None,
};
let proto = ProtoPolicy {
id: id.to_string(),
name: id.to_string(),
enabled: true,
target: Some(crate::proto::tero::policy::v1::policy::Target::Log(
log_target,
)),
..Default::default()
};
Policy::new(proto)
}
fn attr_path(key: &str) -> crate::proto::tero::policy::v1::AttributePath {
crate::proto::tero::policy::v1::AttributePath {
path: vec![key.to_string()],
}
}
#[test]
fn build_pattern_groups_regex() {
let policy = make_policy_with_matcher(
"test",
log_matcher::Field::LogField(LogField::Body.into()),
log_matcher::Match::Regex("error.*".to_string()),
false,
"none",
);
let stats = Arc::new(PolicyStats::default());
let groups = PatternGroups::build_from_log_policies([(policy, stats)].into_iter()).unwrap();
assert_eq!(groups.policies.len(), 1);
assert_eq!(groups.policies[0].id, "test");
assert_eq!(groups.groups.len(), 1);
let key = MatchKey::new(LogFieldSelector::Simple(LogField::Body), false);
let patterns = groups.groups.get(&key).unwrap();
assert_eq!(patterns.len(), 1);
assert_eq!(patterns[0].pattern, "error.*");
}
#[test]
fn build_pattern_groups_exact() {
let policy = make_policy_with_matcher(
"test",
log_matcher::Field::LogField(LogField::SeverityText.into()),
log_matcher::Match::Exact("ERROR".to_string()),
false,
"all",
);
let stats = Arc::new(PolicyStats::default());
let groups = PatternGroups::build_from_log_policies([(policy, stats)].into_iter()).unwrap();
let key = MatchKey::new(LogFieldSelector::Simple(LogField::SeverityText), false);
let patterns = groups.groups.get(&key).unwrap();
assert_eq!(patterns[0].pattern, "^ERROR$");
}
#[test]
fn build_pattern_groups_negated() {
let policy = make_policy_with_matcher(
"test",
log_matcher::Field::LogField(LogField::Body.into()),
log_matcher::Match::Regex("debug".to_string()),
true,
"none",
);
let stats = Arc::new(PolicyStats::default());
let groups = PatternGroups::build_from_log_policies([(policy, stats)].into_iter()).unwrap();
let key = MatchKey::new(LogFieldSelector::Simple(LogField::Body), true);
assert!(groups.groups.contains_key(&key));
}
#[test]
fn build_pattern_groups_existence() {
let policy = make_policy_with_matcher(
"test",
log_matcher::Field::LogAttribute(attr_path("trace_id")),
log_matcher::Match::Exists(true),
false,
"all",
);
let stats = Arc::new(PolicyStats::default());
let groups = PatternGroups::build_from_log_policies([(policy, stats)].into_iter()).unwrap();
assert!(groups.groups.is_empty());
assert_eq!(groups.existence_checks.len(), 1);
assert!(groups.existence_checks[0].should_exist);
}
#[test]
fn regex_escape_special_chars() {
assert_eq!(regex_escape("hello.world"), "hello\\.world");
assert_eq!(regex_escape("test*"), "test\\*");
assert_eq!(regex_escape("a+b"), "a\\+b");
assert_eq!(regex_escape("(test)"), "\\(test\\)");
assert_eq!(regex_escape("plain"), "plain");
}
#[test]
fn compile_pattern_groups() {
let policy = make_policy_with_matcher(
"test",
log_matcher::Field::LogField(LogField::Body.into()),
log_matcher::Match::Regex("error".to_string()),
false,
"none",
);
let stats = Arc::new(PolicyStats::default());
let compiled = CompiledMatchers::<LogSignal>::build([(policy, stats)].into_iter()).unwrap();
assert_eq!(compiled.policies.len(), 1);
assert_eq!(compiled.databases.len(), 1);
let key = MatchKey::new(LogFieldSelector::Simple(LogField::Body), false);
let db = compiled.databases.get(&key).unwrap();
assert_eq!(db.pattern_index.len(), 1);
assert_eq!(db.pattern_index[0].policy_index, 0);
}
#[test]
fn compile_policy_without_transform() {
let policy = make_policy_with_matcher(
"test",
log_matcher::Field::LogField(LogField::Body.into()),
log_matcher::Match::Regex("error".to_string()),
false,
"none",
);
let stats = Arc::new(PolicyStats::default());
let compiled = CompiledMatchers::<LogSignal>::build([(policy, stats)].into_iter()).unwrap();
assert!(compiled.policies[0].transform.is_none());
}
#[test]
fn compile_policy_with_transform() {
let matcher = LogMatcher {
field: Some(log_matcher::Field::LogField(LogField::Body.into())),
r#match: Some(log_matcher::Match::Regex("error".to_string())),
negate: false,
case_insensitive: false,
};
let transform = LogTransform {
redact: vec![LogRedact {
field: Some(log_redact::Field::LogAttribute(attr_path("password"))),
replacement: "[REDACTED]".to_string(),
regex: None,
}],
add: vec![LogAdd {
field: Some(log_add::Field::LogAttribute(attr_path("processed"))),
value: "true".to_string(),
upsert: false,
}],
..Default::default()
};
let log_target = LogTarget {
r#match: vec![matcher],
keep: "all".to_string(),
transform: Some(transform),
sample_key: None,
};
let proto = ProtoPolicy {
id: "test".to_string(),
name: "test".to_string(),
enabled: true,
target: Some(crate::proto::tero::policy::v1::policy::Target::Log(
log_target,
)),
..Default::default()
};
let policy = Policy::new(proto);
let stats = Arc::new(PolicyStats::default());
let compiled = CompiledMatchers::<LogSignal>::build([(policy, stats)].into_iter()).unwrap();
let transform = compiled.policies[0].transform.as_ref().unwrap();
assert_eq!(transform.ops.len(), 2);
}
#[test]
fn compile_policy_with_empty_transform() {
let matcher = LogMatcher {
field: Some(log_matcher::Field::LogField(LogField::Body.into())),
r#match: Some(log_matcher::Match::Regex("error".to_string())),
negate: false,
case_insensitive: false,
};
let transform = LogTransform::default();
let log_target = LogTarget {
r#match: vec![matcher],
keep: "all".to_string(),
transform: Some(transform),
sample_key: None,
};
let proto = ProtoPolicy {
id: "test".to_string(),
name: "test".to_string(),
enabled: true,
target: Some(crate::proto::tero::policy::v1::policy::Target::Log(
log_target,
)),
..Default::default()
};
let policy = Policy::new(proto);
let stats = Arc::new(PolicyStats::default());
let compiled = CompiledMatchers::<LogSignal>::build([(policy, stats)].into_iter()).unwrap();
assert!(compiled.policies[0].transform.is_none());
}
#[test]
fn build_pattern_groups_starts_with() {
let policy = make_policy_with_matcher(
"test",
log_matcher::Field::LogField(LogField::Body.into()),
log_matcher::Match::StartsWith("ERROR:".to_string()),
false,
"none",
);
let stats = Arc::new(PolicyStats::default());
let groups = PatternGroups::build_from_log_policies([(policy, stats)].into_iter()).unwrap();
let key = MatchKey::new(LogFieldSelector::Simple(LogField::Body), false);
let patterns = groups.groups.get(&key).unwrap();
assert_eq!(patterns[0].pattern, "^ERROR:");
}
#[test]
fn build_pattern_groups_ends_with() {
let policy = make_policy_with_matcher(
"test",
log_matcher::Field::LogField(LogField::Body.into()),
log_matcher::Match::EndsWith(".json".to_string()),
false,
"none",
);
let stats = Arc::new(PolicyStats::default());
let groups = PatternGroups::build_from_log_policies([(policy, stats)].into_iter()).unwrap();
let key = MatchKey::new(LogFieldSelector::Simple(LogField::Body), false);
let patterns = groups.groups.get(&key).unwrap();
assert_eq!(patterns[0].pattern, "\\.json$");
}
#[test]
fn build_pattern_groups_contains() {
let policy = make_policy_with_matcher(
"test",
log_matcher::Field::LogField(LogField::Body.into()),
log_matcher::Match::Contains("error".to_string()),
false,
"none",
);
let stats = Arc::new(PolicyStats::default());
let groups = PatternGroups::build_from_log_policies([(policy, stats)].into_iter()).unwrap();
let key = MatchKey::new(LogFieldSelector::Simple(LogField::Body), false);
let patterns = groups.groups.get(&key).unwrap();
assert_eq!(patterns[0].pattern, "error");
}
#[test]
fn build_pattern_groups_contains_special_chars() {
let policy = make_policy_with_matcher(
"test",
log_matcher::Field::LogField(LogField::Body.into()),
log_matcher::Match::Contains("file.txt".to_string()),
false,
"none",
);
let stats = Arc::new(PolicyStats::default());
let groups = PatternGroups::build_from_log_policies([(policy, stats)].into_iter()).unwrap();
let key = MatchKey::new(LogFieldSelector::Simple(LogField::Body), false);
let patterns = groups.groups.get(&key).unwrap();
assert_eq!(patterns[0].pattern, "file\\.txt");
}
fn make_policy_with_case_insensitive(
id: &str,
match_type: log_matcher::Match,
case_insensitive: bool,
) -> Policy {
let matcher = LogMatcher {
field: Some(log_matcher::Field::LogField(LogField::Body.into())),
r#match: Some(match_type),
negate: false,
case_insensitive,
};
let log_target = LogTarget {
r#match: vec![matcher],
keep: "none".to_string(),
transform: None,
sample_key: None,
};
let proto = ProtoPolicy {
id: id.to_string(),
name: id.to_string(),
enabled: true,
target: Some(crate::proto::tero::policy::v1::policy::Target::Log(
log_target,
)),
..Default::default()
};
Policy::new(proto)
}
#[test]
fn build_pattern_groups_case_insensitive_flag() {
let policy = make_policy_with_case_insensitive(
"test",
log_matcher::Match::Exact("ERROR".to_string()),
true,
);
let stats = Arc::new(PolicyStats::default());
let groups = PatternGroups::build_from_log_policies([(policy, stats)].into_iter()).unwrap();
let key = MatchKey::new(LogFieldSelector::Simple(LogField::Body), false);
let patterns = groups.groups.get(&key).unwrap();
assert!(patterns[0].case_insensitive);
}
#[test]
fn build_pattern_groups_case_sensitive_flag() {
let policy = make_policy_with_case_insensitive(
"test",
log_matcher::Match::Exact("ERROR".to_string()),
false,
);
let stats = Arc::new(PolicyStats::default());
let groups = PatternGroups::build_from_log_policies([(policy, stats)].into_iter()).unwrap();
let key = MatchKey::new(LogFieldSelector::Simple(LogField::Body), false);
let patterns = groups.groups.get(&key).unwrap();
assert!(!patterns[0].case_insensitive);
}
#[test]
fn compile_case_insensitive_patterns() {
let policy = make_policy_with_case_insensitive(
"test",
log_matcher::Match::Regex("error".to_string()),
true,
);
let stats = Arc::new(PolicyStats::default());
let compiled = CompiledMatchers::<LogSignal>::build([(policy, stats)].into_iter()).unwrap();
assert_eq!(compiled.policies.len(), 1);
assert_eq!(compiled.databases.len(), 1);
}
#[test]
fn case_insensitive_exact_match_compiles() {
let policy = make_policy_with_case_insensitive(
"test",
log_matcher::Match::Exact("Error".to_string()),
true,
);
let stats = Arc::new(PolicyStats::default());
let compiled = CompiledMatchers::<LogSignal>::build([(policy, stats)].into_iter()).unwrap();
let key = MatchKey::new(LogFieldSelector::Simple(LogField::Body), false);
let db = compiled.databases.get(&key).unwrap();
let matches = db.database.scan(b"error").unwrap();
assert!(!matches.is_empty(), "Should match 'error' (lowercase)");
let matches = db.database.scan(b"ERROR").unwrap();
assert!(!matches.is_empty(), "Should match 'ERROR' (uppercase)");
let matches = db.database.scan(b"Error").unwrap();
assert!(!matches.is_empty(), "Should match 'Error' (mixed case)");
let matches = db.database.scan(b"warning").unwrap();
assert!(matches.is_empty(), "Should not match 'warning'");
}
#[test]
fn case_sensitive_exact_match_compiles() {
let policy = make_policy_with_case_insensitive(
"test",
log_matcher::Match::Exact("Error".to_string()),
false,
);
let stats = Arc::new(PolicyStats::default());
let compiled = CompiledMatchers::<LogSignal>::build([(policy, stats)].into_iter()).unwrap();
let key = MatchKey::new(LogFieldSelector::Simple(LogField::Body), false);
let db = compiled.databases.get(&key).unwrap();
let matches = db.database.scan(b"Error").unwrap();
assert!(!matches.is_empty(), "Should match 'Error' (exact case)");
let matches = db.database.scan(b"error").unwrap();
assert!(matches.is_empty(), "Should NOT match 'error' (wrong case)");
let matches = db.database.scan(b"ERROR").unwrap();
assert!(matches.is_empty(), "Should NOT match 'ERROR' (wrong case)");
}
#[test]
fn case_insensitive_contains_match() {
let policy = make_policy_with_case_insensitive(
"test",
log_matcher::Match::Contains("error".to_string()),
true,
);
let stats = Arc::new(PolicyStats::default());
let compiled = CompiledMatchers::<LogSignal>::build([(policy, stats)].into_iter()).unwrap();
let key = MatchKey::new(LogFieldSelector::Simple(LogField::Body), false);
let db = compiled.databases.get(&key).unwrap();
let matches = db.database.scan(b"This is an ERROR message").unwrap();
assert!(!matches.is_empty(), "Should match ERROR in message");
let matches = db.database.scan(b"This is an Error message").unwrap();
assert!(!matches.is_empty(), "Should match Error in message");
}
#[test]
fn case_insensitive_starts_with_match() {
let policy = make_policy_with_case_insensitive(
"test",
log_matcher::Match::StartsWith("error".to_string()),
true,
);
let stats = Arc::new(PolicyStats::default());
let compiled = CompiledMatchers::<LogSignal>::build([(policy, stats)].into_iter()).unwrap();
let key = MatchKey::new(LogFieldSelector::Simple(LogField::Body), false);
let db = compiled.databases.get(&key).unwrap();
let matches = db.database.scan(b"ERROR: something went wrong").unwrap();
assert!(!matches.is_empty(), "Should match ERROR at start");
let matches = db.database.scan(b"Error: something went wrong").unwrap();
assert!(!matches.is_empty(), "Should match Error at start");
let matches = db.database.scan(b"Something ERROR happened").unwrap();
assert!(matches.is_empty(), "Should NOT match ERROR in middle");
}
#[test]
fn case_insensitive_ends_with_match() {
let policy = make_policy_with_case_insensitive(
"test",
log_matcher::Match::EndsWith(".json".to_string()),
true,
);
let stats = Arc::new(PolicyStats::default());
let compiled = CompiledMatchers::<LogSignal>::build([(policy, stats)].into_iter()).unwrap();
let key = MatchKey::new(LogFieldSelector::Simple(LogField::Body), false);
let db = compiled.databases.get(&key).unwrap();
let matches = db.database.scan(b"config.JSON").unwrap();
assert!(!matches.is_empty(), "Should match .JSON at end");
let matches = db.database.scan(b"config.Json").unwrap();
assert!(!matches.is_empty(), "Should match .Json at end");
let matches = db.database.scan(b"config.json.bak").unwrap();
assert!(matches.is_empty(), "Should NOT match .json in middle");
}
#[test]
fn build_from_log_policies_empty_match_list_rejected() {
let log_target = LogTarget {
r#match: Vec::new(),
keep: "all".to_string(),
transform: None,
sample_key: None,
};
let proto = ProtoPolicy {
id: "no-matchers".to_string(),
name: "no-matchers".to_string(),
enabled: true,
target: Some(crate::proto::tero::policy::v1::policy::Target::Log(
log_target,
)),
..Default::default()
};
let policy = Policy::new(proto);
let stats = Arc::new(PolicyStats::default());
let err =
PatternGroups::build_from_log_policies([(policy, stats)].into_iter()).unwrap_err();
assert!(matches!(
err,
PolicyError::InvalidPolicy { ref policy_id, .. } if policy_id == "no-matchers"
));
}
#[test]
fn build_from_metric_policies_empty_match_list_rejected() {
use crate::proto::tero::policy::v1::MetricTarget;
let metric_target = MetricTarget {
r#match: Vec::new(),
keep: true,
};
let proto = ProtoPolicy {
id: "metric-no-matchers".to_string(),
name: "metric-no-matchers".to_string(),
enabled: true,
target: Some(crate::proto::tero::policy::v1::policy::Target::Metric(
metric_target,
)),
..Default::default()
};
let policy = Policy::new(proto);
let stats = Arc::new(PolicyStats::default());
let err =
PatternGroups::build_from_metric_policies([(policy, stats)].into_iter()).unwrap_err();
assert!(matches!(
err,
PolicyError::InvalidPolicy { ref policy_id, .. } if policy_id == "metric-no-matchers"
));
}
#[test]
fn build_from_trace_policies_empty_match_list_rejected() {
use crate::proto::tero::policy::v1::TraceTarget;
let trace_target = TraceTarget {
r#match: Vec::new(),
keep: None,
};
let proto = ProtoPolicy {
id: "trace-no-matchers".to_string(),
name: "trace-no-matchers".to_string(),
enabled: true,
target: Some(crate::proto::tero::policy::v1::policy::Target::Trace(
trace_target,
)),
..Default::default()
};
let policy = Policy::new(proto);
let stats = Arc::new(PolicyStats::default());
let err =
PatternGroups::build_from_trace_policies([(policy, stats)].into_iter()).unwrap_err();
assert!(matches!(
err,
PolicyError::InvalidPolicy { ref policy_id, .. } if policy_id == "trace-no-matchers"
));
}
}