static CB050_PATTERNS: LazyLock<Vec<(Regex, &'static str, &'static str)>> = LazyLock::new(|| {
vec![
(
Regex::new(r"todo\s*!\s*\(").expect("valid regex"),
"CB-050-A",
"todo!() macro - will panic at runtime",
),
(
Regex::new(r"unimplemented\s*!\s*\(").expect("valid regex"),
"CB-050-B",
"unimplemented!() macro - will panic at runtime",
),
(
Regex::new(r#"panic\s*!\s*\(\s*"[^"]*not\s+implemented[^"]*""#).expect("valid regex"),
"CB-050-C",
"panic!() with 'not implemented' message",
),
(
Regex::new(r"raise\s+NotImplementedError").expect("valid regex"),
"CB-050-E",
"Python NotImplementedError - will raise at runtime",
),
(
Regex::new(r"pass\s*#\s*(?i:stub|todo|fixme)").expect("valid regex"),
"CB-050-F",
"Python pass with stub comment",
),
]
});
static EMPTY_BODY_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"fn\s+(\w+)\s*\([^)]*\)\s*(?:->\s*[^{]+)?\s*\{\s*\}").expect("valid regex")
});
static TRAIT_BLOCK_PATTERN: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"trait\s+\w+[^{]*\{").expect("valid regex"));
#[allow(dead_code)] static STRING_LITERAL_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r#"^\s*(?:let\s+\w+\s*=\s*)?"[^"]*$|^\s*r#*""#).expect("valid regex")
});
static COMMENT_PATTERN: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"^\s*(?://|/\*|\*|///|//!)").expect("valid regex"));
static PYTHON_COMMENT_PATTERN: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"^\s*#[^\[]").expect("valid regex"));
static DOC_TEST_PATTERN: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"^\s*///.*```").expect("valid regex"));
pub fn detect_cb050_code_stubs_in_str(code: &str) -> Vec<(u32, &'static str, String)> {
detect_cb050_code_stubs_in_str_with_path(code, "")
}
pub fn detect_cb050_code_stubs_in_str_with_path(
code: &str,
path: &str,
) -> Vec<(u32, &'static str, String)> {
let mut violations = Vec::new();
let is_test_file = is_test_path(path);
if is_test_file {
return violations;
}
let lines: Vec<&str> = code.lines().collect();
let skip_mask = compute_skip_mask(&lines);
let trait_lines = compute_trait_block_lines(&lines);
for (line_idx, line) in lines.iter().enumerate() {
let line_num = (line_idx + 1) as u32;
if skip_mask[line_idx] {
continue;
}
for (pattern, id, desc) in CB050_PATTERNS.iter() {
if pattern.is_match(line) {
if !is_in_string_literal(line, pattern) {
violations.push((line_num, *id, desc.to_string()));
}
}
}
}
for cap in EMPTY_BODY_PATTERN.captures_iter(code) {
let match_start = cap.get(0).expect("capture group 0 always exists").start();
let fn_name = cap.get(1).map(|m| m.as_str()).unwrap_or("");
let line_num = code
.get(..match_start)
.unwrap_or_default()
.matches('\n')
.count() as u32
+ 1;
if trait_lines.contains(&(line_num as usize)) {
continue;
}
if is_marker_function(fn_name) {
continue;
}
violations.push((
line_num,
"CB-050-D",
format!("Empty function body: {}()", fn_name),
));
}
violations
}
fn is_test_path(path: &str) -> bool {
if path.is_empty() {
return false;
}
path.contains("/tests/")
|| path.contains("/test/")
|| path.starts_with("tests/")
|| path.starts_with("test/")
|| path.contains("_test.rs")
|| path.contains("_tests.rs")
|| path.ends_with("/tests.rs")
|| path.ends_with("/test.rs")
|| path.contains("src/tests/")
}
fn compute_skip_mask(lines: &[&str]) -> Vec<bool> {
let mut skip = vec![false; lines.len()];
let mut in_doc_test = false;
let mut in_multiline_string = false;
for (i, line) in lines.iter().enumerate() {
if DOC_TEST_PATTERN.is_match(line) {
in_doc_test = !in_doc_test;
skip[i] = true;
continue;
}
if in_doc_test {
skip[i] = true;
continue;
}
if COMMENT_PATTERN.is_match(line) {
skip[i] = true;
continue;
}
if PYTHON_COMMENT_PATTERN.is_match(line) {
skip[i] = true;
continue;
}
let raw_start_marker = "r#\"";
let raw_end_marker = "\"#";
let raw_string_starts = line.matches(raw_start_marker).count();
let raw_string_ends = line.matches(raw_end_marker).count();
if raw_string_starts > raw_string_ends {
in_multiline_string = true;
} else if raw_string_ends > raw_string_starts {
in_multiline_string = false;
}
if in_multiline_string {
skip[i] = true;
}
}
skip
}
fn compute_trait_block_lines(lines: &[&str]) -> std::collections::HashSet<usize> {
let mut trait_lines = std::collections::HashSet::new();
let mut brace_depth = 0;
let mut in_trait = false;
for (i, line) in lines.iter().enumerate() {
if TRAIT_BLOCK_PATTERN.is_match(line) {
in_trait = true;
}
if in_trait {
trait_lines.insert(i + 1);
brace_depth += line.matches('{').count();
brace_depth = brace_depth.saturating_sub(line.matches('}').count());
if brace_depth == 0 && line.contains('}') {
in_trait = false;
}
}
}
trait_lines
}
fn is_in_string_literal(line: &str, pattern: &Regex) -> bool {
if let Some(m) = pattern.find(line) {
let before = line.get(..m.start()).unwrap_or_default();
let quote_count = before
.chars()
.filter(|&c| c == '"')
.count()
.saturating_sub(before.matches(r#"\""#).count());
quote_count % 2 == 1
} else {
false
}
}
fn is_marker_function(name: &str) -> bool {
let lower_name = name.to_lowercase();
let exact_markers = [
"marker", "sentinel", "phantom", "noop", "no_op", "dummy", "_",
];
if exact_markers.iter().any(|&m| lower_name == m) {
return true;
}
let pattern_markers = ["_marker", "_sentinel", "_phantom", "_noop", "_dummy"];
if pattern_markers.iter().any(|m| lower_name.ends_with(m)) {
return true;
}
let prefix_markers = ["marker_", "sentinel_", "phantom_", "noop_", "dummy_"];
if prefix_markers.iter().any(|m| lower_name.starts_with(m)) {
return true;
}
false
}