use crate::input::RedactableRange;
use crate::types::{Finding, RedactionRules, RedactionPolicy};
use super::contextual::{detect_contextual_assignments, propagate_repeated_secrets};
use super::custom::{detect_custom_strings, detect_custom_files};
use super::regexes::{cidr_regex, ip_regex, secret_regex, url_regex};
use super::scanners::{detect_domains, detect_emails, detect_pattern, detect_phones};
use super::validators::{is_valid_cidr, is_valid_ip, looks_like_secret};
use super::select_non_overlapping;
pub(crate) struct PolicyDetectionResult {
pub findings: Vec<Finding>,
pub dropped_findings: usize,
}
pub(crate) fn detect_with_policy(
text: &str,
policy: &RedactionPolicy,
ranges: &[RedactableRange],
) -> PolicyDetectionResult {
let mut findings = Vec::new();
if !policy.custom_files.is_empty() {
findings.extend(detect_custom_files(text, ranges, &policy.custom_files));
}
for range_info in ranges {
let file_matches_custom = range_info
.file_path
.as_ref()
.map(|p| policy.custom_files.iter().any(|f| f.path == p.as_str()))
.unwrap_or(false);
if file_matches_custom {
continue;
}
let fragment = &text[range_info.range.clone()];
let offset = range_info.range.start;
let mut fragment_findings = detect_builtins(fragment, &policy.rules);
fragment_findings.extend(detect_custom_strings(fragment, &policy.custom_strings));
for finding in &mut fragment_findings {
finding.start += offset;
finding.end += offset;
}
findings.extend(fragment_findings);
}
let (findings, dropped) = select_non_overlapping(findings);
PolicyDetectionResult {
findings,
dropped_findings: dropped,
}
}
fn detect_builtins(text: &str, rules: &RedactionRules) -> Vec<Finding> {
let mut findings = Vec::new();
detect_contextual_assignments(text, &mut findings, *rules);
if rules.secret {
propagate_repeated_secrets(text, &mut findings);
detect_pattern(
text,
secret_regex(),
crate::types::FindingKind::Secret,
92,
&mut findings,
looks_like_secret,
);
}
if rules.email {
detect_emails(text, &mut findings);
}
if rules.url {
detect_pattern(
text,
url_regex(),
crate::types::FindingKind::Url,
96,
&mut findings,
|_| true,
);
}
if rules.cidr {
detect_pattern(
text,
cidr_regex(),
crate::types::FindingKind::Cidr,
94,
&mut findings,
is_valid_cidr,
);
}
if rules.ip {
detect_pattern(
text,
ip_regex(),
crate::types::FindingKind::Ip,
90,
&mut findings,
is_valid_ip,
);
}
if rules.phone {
detect_phones(text, &mut findings);
}
if rules.domain {
detect_domains(text, &mut findings);
}
findings
}