use super::{documentation::documentation_line_flags, CodeContext};
const TEST_PREFIX_LEN: usize = 5;
const ENCRYPTED_BLOCK_LOOKBACK_LINES: usize = 10;
const TEST_FUNCTION_LOOKBACK_LINES: usize = 100;
pub fn infer_context(lines: &[&str], line_idx: usize, file_path: Option<&str>) -> CodeContext {
let documentation_lines = documentation_line_flags(lines);
infer_context_with_documentation(lines, line_idx, file_path, &documentation_lines)
}
pub fn is_known_example_credential(credential: &str) -> bool {
let upper = credential.to_uppercase();
if upper.ends_with("EXAMPLE") || upper.ends_with("EXAMPLEKEY") {
return true;
}
let body = credential.as_bytes();
let x_count = body.iter().filter(|&&b| b == b'x' || b == b'X').count();
if body.len() >= 16 && x_count > body.len() * 3 / 4 {
return true;
}
if is_hex_sequential_placeholder(credential) {
return true;
}
if is_empty_input_hash(credential) {
return true;
}
is_sequential_placeholder(credential)
}
fn is_empty_input_hash(credential: &str) -> bool {
let lower = credential.to_ascii_lowercase();
match lower.len() {
32 => lower == "d41d8cd98f00b204e9800998ecf8427e", 40 => lower == "da39a3ee5e6b4b0d3255bfef95601890afd80709", 64 => lower == "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", _ => false,
}
}
pub fn is_sequential_placeholder(credential: &str) -> bool {
let body = crate::confidence::KNOWN_PREFIXES
.iter()
.find_map(|prefix| credential.strip_prefix(prefix))
.unwrap_or(credential);
if body.len() < 8 {
return false;
}
let bytes = body.as_bytes();
if bytes.iter().all(|&byte| byte == bytes[0]) {
return true;
}
if bytes.len() >= 8 {
let pair = &bytes[..2];
if bytes
.chunks(2)
.all(|chunk| chunk == pair || (chunk.len() < 2 && chunk[0] == pair[0]))
{
return true;
}
}
false
}
fn is_hex_sequential_placeholder(credential: &str) -> bool {
let body = crate::confidence::KNOWN_PREFIXES
.iter()
.find_map(|prefix| credential.strip_prefix(prefix))
.unwrap_or(credential);
if body.len() < 16 || !body.bytes().all(|b| b.is_ascii_hexdigit()) {
return false;
}
let bytes: Vec<u8> = body.bytes().collect();
if bytes.len() >= 16 {
let ascending = bytes
.windows(2)
.filter(|w| {
w[1] == w[0] + 1 || (w[0] == b'9' && w[1] == b'a') || (w[0] == b'f' && w[1] == b'0')
})
.count();
let descending = bytes
.windows(2)
.filter(|w| {
w[1] + 1 == w[0] || (w[0] == b'a' && w[1] == b'9') || (w[0] == b'0' && w[1] == b'f')
})
.count();
let threshold = (bytes.len() - 1) * 9 / 10;
if ascending > threshold || descending > threshold {
return true;
}
}
let pairs: Vec<&[u8]> = bytes.chunks(2).filter(|chunk| chunk.len() == 2).collect();
if pairs.len() < 8 {
return false;
}
let first_chars: Vec<u8> = pairs
.iter()
.map(|pair| pair[0].to_ascii_lowercase())
.collect();
let ascending = first_chars
.windows(2)
.filter(|window| {
window[1] == window[0] + 1
|| (window[0] == b'f' && window[1] == b'a')
|| (window[0] == b'9' && window[1] == b'a')
|| (window[0] == b'9' && window[1] == b'0')
})
.count();
let second_chars: Vec<u8> = pairs
.iter()
.map(|pair| pair[1].to_ascii_lowercase())
.collect();
let ascending2 = second_chars
.windows(2)
.filter(|window| {
window[1] == window[0] + 1
|| (window[0] == b'f' && window[1] == b'0')
|| (window[0] == b'9' && window[1] == b'0')
|| (window[0] == b'9' && window[1] == b'a')
})
.count();
let threshold = pairs.len() * 9 / 10;
ascending > threshold && ascending2 > threshold
}
pub fn infer_context_with_documentation(
lines: &[&str],
line_idx: usize,
file_path: Option<&str>,
documentation_lines: &[bool],
) -> CodeContext {
if line_idx >= lines.len() {
return CodeContext::Unknown;
}
let line = lines[line_idx];
let trimmed = line.trim();
if file_path.is_some_and(is_test_file) {
return CodeContext::TestCode;
}
if is_in_encrypted_block(lines, line_idx) {
return CodeContext::Encrypted;
}
if is_comment_line(trimmed) {
return CodeContext::Comment;
}
if documentation_lines.get(line_idx).copied().unwrap_or(false) {
return CodeContext::Documentation;
}
if is_in_test_function(lines, line_idx) {
return CodeContext::TestCode;
}
if is_assignment_line(trimmed) {
return CodeContext::Assignment;
}
infer_default_context(trimmed)
}
fn is_test_file(path: &str) -> bool {
let filename = path.rsplit(['/', '\\']).next().unwrap_or(path);
let stem = filename.split('.').next().unwrap_or(filename);
stem.len() > TEST_PREFIX_LEN
&& stem
.as_bytes()
.get(..TEST_PREFIX_LEN)
.is_some_and(|bytes| bytes.eq_ignore_ascii_case(b"test_"))
|| filename.ends_with("_test.go")
|| filename.ends_with("_test.rs")
|| filename.ends_with("_test.py")
|| filename.ends_with("_test.rb")
|| filename.ends_with("_test.java")
|| filename.ends_with("Test.java")
|| filename.ends_with("Tests.java")
|| filename.ends_with(".test.js")
|| filename.ends_with(".test.ts")
|| filename.ends_with(".spec.js")
|| filename.ends_with(".spec.ts")
|| path.split(['/', '\\']).any(|component| {
component.eq_ignore_ascii_case("test")
|| component.eq_ignore_ascii_case("tests")
|| component.eq_ignore_ascii_case("__tests__")
|| component.eq_ignore_ascii_case("fixtures")
|| component.eq_ignore_ascii_case("testdata")
|| component.eq_ignore_ascii_case("spec")
})
}
fn infer_default_context(trimmed: &str) -> CodeContext {
if memchr::memchr(b'"', trimmed.as_bytes()).is_some()
|| memchr::memchr(b'\'', trimmed.as_bytes()).is_some()
{
CodeContext::StringLiteral
} else {
CodeContext::Unknown
}
}
fn is_comment_line(trimmed: &str) -> bool {
trimmed.starts_with("//")
|| trimmed.starts_with('#')
|| (trimmed.starts_with("--") && !trimmed.starts_with("---"))
|| trimmed.starts_with("/*")
|| trimmed.starts_with("<!--")
|| trimmed.starts_with("<#")
|| trimmed.starts_with("* ")
|| trimmed.starts_with("*/")
|| trimmed.starts_with("rem ")
|| trimmed.starts_with("REM ")
}
fn is_assignment_line(trimmed: &str) -> bool {
has_assignment_operator(trimmed) || has_yaml_mapping(trimmed)
}
pub(crate) fn has_assignment_operator(trimmed: &str) -> bool {
for operator in [":=", "->", "="] {
if let Some(pos) = trimmed.find(operator) {
if !is_comparison_operator(trimmed, pos, operator) {
return true;
}
}
}
false
}
fn has_yaml_mapping(trimmed: &str) -> bool {
memchr::memmem::find(trimmed.as_bytes(), b": ").is_some() && !trimmed.starts_with("- ")
}
fn is_comparison_operator(trimmed: &str, pos: usize, operator: &str) -> bool {
if operator != "=" {
return false;
}
let before = trimmed[..pos].chars().last();
let after = trimmed[pos + operator.len()..].chars().next();
matches!(before, Some('=' | '!' | '>' | '<')) || matches!(after, Some('='))
}
fn is_in_encrypted_block(lines: &[&str], line_idx: usize) -> bool {
let start = line_idx.saturating_sub(ENCRYPTED_BLOCK_LOOKBACK_LINES);
for line in lines.iter().take(line_idx + 1).skip(start) {
let trimmed = line.trim();
if trimmed.starts_with("$ANSIBLE_VAULT")
|| trimmed.starts_with("ENC[")
|| memchr::memmem::find(trimmed.as_bytes(), b"sops:").is_some()
|| memchr::memmem::find(trimmed.as_bytes(), b"sealed-secrets").is_some()
|| trimmed.starts_with("-----BEGIN PGP MESSAGE-----")
|| trimmed.starts_with("-----BEGIN AGE ENCRYPTED")
{
return true;
}
}
false
}
fn is_in_test_function(lines: &[&str], line_idx: usize) -> bool {
let start = line_idx.saturating_sub(TEST_FUNCTION_LOOKBACK_LINES);
for candidate_line_idx in (start..line_idx).rev() {
let trimmed = lines[candidate_line_idx].trim();
if trimmed.starts_with("def test_")
|| trimmed.starts_with("class Test")
|| trimmed.starts_with("it(")
|| trimmed.starts_with("describe(")
|| trimmed.starts_with("test(")
|| trimmed == "#[test]"
|| trimmed == concat!("#[cfg(", "test)]")
|| trimmed.starts_with("#[tokio::test")
|| trimmed.starts_with("func Test")
|| trimmed == "@Test"
{
return true;
}
if trimmed.starts_with("class ") {
return false;
}
if (trimmed.starts_with("def ") || trimmed.starts_with("async def "))
&& !trimmed.contains("def test_")
{
return false;
}
if trimmed.starts_with("func ") && !trimmed.contains("func Test") {
return false;
}
if (trimmed.starts_with("fn ")
|| trimmed.starts_with("pub fn ")
|| trimmed.starts_with("async fn ")
|| trimmed.starts_with("pub async fn "))
&& !trimmed.contains("fn test_")
{
let pre_start = candidate_line_idx.saturating_sub(3);
let mut is_test_attr = false;
for pre_line in &lines[pre_start..candidate_line_idx] {
let pre_trimmed = pre_line.trim();
if pre_trimmed == "#[test]"
|| pre_trimmed == concat!("#[cfg(", "test)]")
|| pre_trimmed.starts_with("#[tokio::test")
|| pre_trimmed.starts_with("#[test")
|| pre_trimmed == "@Test"
{
is_test_attr = true;
break;
}
}
if is_test_attr {
return true;
}
return false;
}
if trimmed.starts_with("function ") && !trimmed.contains("function test") {
return false;
}
}
false
}
pub(crate) fn surrounding_line_window(text: &str, offset: usize, radius: usize) -> &str {
if text.is_empty() {
return "";
}
let bytes = text.as_bytes();
let safe_offset = offset.min(bytes.len());
let mut start = safe_offset;
let mut found_lines = 0;
while start > 0 && found_lines <= radius {
start -= 1;
if bytes[start] == b'\n' {
found_lines += 1;
}
}
if start > 0 || (start == 0 && bytes[0] == b'\n') {
start += 1;
}
let mut end = safe_offset;
let mut found_lines = 0;
while end < bytes.len() && found_lines <= radius {
if bytes[end] == b'\n' {
found_lines += 1;
}
end += 1;
}
while start < text.len() && !text.is_char_boundary(start) {
start += 1;
}
while end > start && !text.is_char_boundary(end) {
end -= 1;
}
&text[start..end]
}