use super::inference::surrounding_line_window;
use crate::ascii_ci::ci_find;
pub fn is_false_positive_match_context(
text: &str,
match_start: usize,
file_path: Option<&str>,
) -> bool {
is_false_positive_match_context_with_path(text, match_start, file_path, None)
}
pub fn is_false_positive_match_context_with_path(
text: &str,
match_start: usize,
_file_path: Option<&str>,
path_lower: Option<&str>,
) -> bool {
let window = surrounding_line_window(text, match_start, 1);
let bytes = window.as_bytes();
is_go_sum_checksum_bytes(bytes, path_lower)
|| is_integrity_hash_bytes(bytes)
|| is_configmap_binary_data_bytes(bytes)
|| is_git_lfs_pointer_context_bytes(bytes)
|| is_renovate_digest_context_bytes(bytes)
|| is_cors_header_bytes(bytes)
|| is_http_cache_header_bytes(bytes)
|| has_disclaimer_comment_bytes(bytes)
}
static DISCLAIMER_PHRASES: std::sync::LazyLock<Vec<String>> = std::sync::LazyLock::new(|| {
#[derive(serde::Deserialize)]
struct DisclaimerFile {
phrases: Vec<String>,
}
let raw = include_str!("../../data/disclaimer-phrases.toml");
match toml::from_str::<DisclaimerFile>(raw) {
Ok(parsed) => parsed
.phrases
.into_iter()
.map(|p| p.to_ascii_lowercase())
.collect(),
Err(e) => {
tracing::warn!(
error = %e,
"disclaimer-phrases.toml failed to parse; falling back to empty phrase list \
(test-file disclaimers will not be suppressed this run)",
);
Vec::new()
}
}
});
fn has_disclaimer_comment_bytes(bytes: &[u8]) -> bool {
const COMMENT_MARKERS: &[&[u8]] = &[b"//", b"#", b"--", b"/*", b"<!--", b"rem "];
let phrases: &[String] = &DISCLAIMER_PHRASES;
for marker in COMMENT_MARKERS {
let m_len = marker.len();
let first_lower = marker[0];
let first_upper = first_lower.to_ascii_uppercase();
for start in memchr::memchr2_iter(first_lower, first_upper, bytes) {
if start + m_len > bytes.len() {
break;
}
if !bytes[start..start + m_len].eq_ignore_ascii_case(marker) {
continue;
}
let comment_tail = &bytes[start + m_len..];
for phrase in phrases {
if ci_find(comment_tail, phrase.as_bytes()) {
return true;
}
}
}
}
false
}
pub fn is_false_positive_context(lines: &[&str], line_idx: usize, file_path: Option<&str>) -> bool {
is_false_positive_context_with_path(lines, line_idx, file_path)
}
pub fn is_false_positive_context_with_path(
lines: &[&str],
line_idx: usize,
path_lower: Option<&str>,
) -> bool {
if line_idx >= lines.len() {
return false;
}
let line_bytes = lines[line_idx].as_bytes();
is_go_sum_checksum_bytes(line_bytes, path_lower)
|| is_integrity_hash_context(lines, line_idx, line_bytes)
|| is_configmap_binary_data_context(lines, line_idx, line_bytes)
|| is_git_lfs_pointer_context_with_lines(lines, line_idx, line_bytes)
|| is_renovate_digest_context_with_lines(lines, line_idx, line_bytes)
|| is_cors_header_bytes(line_bytes)
|| is_http_cache_header_context(lines, line_idx, line_bytes)
}
fn is_go_sum_checksum_bytes(bytes: &[u8], path: Option<&str>) -> bool {
ci_find(bytes, b"h1:")
|| path
.is_some_and(|p| crate::ascii_ci::ends_with_ignore_ascii_case(p.as_bytes(), b"go.sum"))
}
fn is_integrity_hash_context(lines: &[&str], line_idx: usize, line_bytes: &[u8]) -> bool {
is_integrity_hash_bytes(line_bytes)
|| surrounding_lines_contain(lines, line_idx, 2, |candidate| {
is_integrity_hash_bytes(candidate.as_bytes())
})
}
fn is_integrity_hash_bytes(bytes: &[u8]) -> bool {
ci_find(bytes, b"integrity") && (ci_find(bytes, b"sha256-") || ci_find(bytes, b"sha512-"))
}
fn is_configmap_binary_data_context(lines: &[&str], line_idx: usize, line_bytes: &[u8]) -> bool {
is_configmap_binary_data_bytes(line_bytes)
|| nearby_lines_contain(lines, line_idx, 8, |candidate| {
is_configmap_binary_data_bytes(candidate.trim().as_bytes())
})
}
fn is_configmap_binary_data_bytes(bytes: &[u8]) -> bool {
ci_find(bytes, b"binarydata:")
}
fn is_git_lfs_pointer_context_with_lines(
lines: &[&str],
line_idx: usize,
line_bytes: &[u8],
) -> bool {
is_git_lfs_pointer_context_bytes(line_bytes)
|| nearby_lines_contain(lines, line_idx, 3, |candidate| {
is_git_lfs_pointer_context_bytes(candidate.as_bytes())
})
}
fn is_git_lfs_pointer_context_bytes(bytes: &[u8]) -> bool {
ci_find(bytes, b"oid sha256:") || ci_find(bytes, b"git-lfs")
}
fn is_renovate_digest_context_with_lines(
lines: &[&str],
line_idx: usize,
line_bytes: &[u8],
) -> bool {
is_renovate_digest_context_bytes(line_bytes)
|| surrounding_lines_contain(lines, line_idx, 2, |candidate| {
is_renovate_digest_context_bytes(candidate.as_bytes())
})
}
fn is_renovate_digest_context_bytes(bytes: &[u8]) -> bool {
ci_find(bytes, b"renovate/") && contains_hex_sequence_bytes(bytes)
}
fn is_cors_header_bytes(bytes: &[u8]) -> bool {
ci_find(bytes, b"access-control-")
}
fn is_http_cache_header_context(lines: &[&str], line_idx: usize, line_bytes: &[u8]) -> bool {
is_http_cache_header_bytes(line_bytes)
|| surrounding_lines_contain(lines, line_idx, 1, |candidate| {
is_http_cache_header_bytes(candidate.as_bytes())
})
}
fn is_http_cache_header_bytes(bytes: &[u8]) -> bool {
let trimmed_start = bytes
.iter()
.position(|b| !b.is_ascii_whitespace())
.unwrap_or(bytes.len());
let trimmed = &bytes[trimmed_start..];
trimmed
.get(..4)
.is_some_and(|p| p.eq_ignore_ascii_case(b"etag"))
|| has_token_bytes(bytes, b"etag")
}
fn has_token_bytes(text: &[u8], token: &[u8]) -> bool {
let n = token.len();
if n == 0 {
return true;
}
let mut start = 0usize;
for (i, &b) in text.iter().enumerate() {
if !b.is_ascii_alphanumeric() {
if i - start == n && text[start..i].eq_ignore_ascii_case(token) {
return true;
}
start = i + 1;
}
}
text.len() - start == n && text[start..].eq_ignore_ascii_case(token)
}
fn contains_hex_sequence_bytes(bytes: &[u8]) -> bool {
let mut run = 0usize;
for &b in bytes {
if b.is_ascii_hexdigit() {
run += 1;
if run >= 8 {
return true;
}
} else {
run = 0;
}
}
false
}
fn nearby_lines_contain(
lines: &[&str],
line_idx: usize,
lookback_lines: usize,
predicate: impl Fn(&str) -> bool,
) -> bool {
let start = line_idx.saturating_sub(lookback_lines);
lines
.iter()
.take(line_idx + 1)
.skip(start)
.copied()
.any(predicate)
}
fn surrounding_lines_contain(
lines: &[&str],
line_idx: usize,
radius: usize,
predicate: impl Fn(&str) -> bool,
) -> bool {
let start = line_idx.saturating_sub(radius);
let end = (line_idx + radius + 1).min(lines.len());
lines[start..end].iter().copied().any(predicate)
}