use std::collections::{HashMap, HashSet};
use std::fs;
use std::path::Path;
use ignore::WalkBuilder;
use regex::Regex;
use serde_json::{json, Value};
const SOURCE_EXTENSIONS: &[&str] = &[
"rs", "py", "js", "ts", "tsx", "jsx", "go", "java", "rb", "c", "cpp", "h", "hpp", "cs",
"swift", "kt", "scala", "php", "lua", "sh", "bash", "zsh",
];
fn is_source_file(path: &Path) -> bool {
path.extension()
.and_then(|e| e.to_str())
.map(|ext| SOURCE_EXTENSIONS.contains(&ext))
.unwrap_or(false)
}
const PHASED_PATTERN: &str = r"(?i)\b(phase|step|part)[_\-]?\d+\b";
const PHASED_COMMENT_PATTERN: &str = r"(?i)\b(phase|step|part)\s?[_\-]?\s?\d+[a-z]?\b";
const TEMPORAL_PATTERN: &str = r"(?i)\b(week|day|sprint)[_\-]?\d+\b";
const SCAFFOLD_IDENT_PATTERN: &str =
r"(?i)\b(scaffold|boilerplate|placeholder|stub|sample|example)\b";
const NUMBERED_SUFFIX_PATTERN: &str = r"(?i)_(v\d+|final|draft)\b";
const TEMP_PREFIX_PATTERN: &str = r"(?i)^(temp_|tmp_|backup_|old_)";
const TEMP_SUFFIX_PATTERN: &str = r"(?i)(_bak|\.bak|_old|_backup|_copy|\.orig)$";
const SCAFFOLD_FILE_PATTERN: &str =
r"(?i)^(scaffold[_\-]|boilerplate[_\-]|template_|stub_|placeholder[_\-]|sample_|example_)";
const NUMBERED_FILE_SUFFIX_PATTERN: &str = r"(?i)_(v\d+|new|final|draft)\.";
const TODO_PATTERN: &str = r"\b(TODO|FIXME|HACK|XXX|TEMP|TEMPORARY)\b";
const FUNC_DEF_PATTERN: &str = r"(?:(?:def|function|fn|func|fun)\s+(\w+)|(?:(?:public|private|protected|static|async|void|int|string|bool|float|double|var|let|const)\s+)+(\w+)\s*\()";
const PLACEHOLDER_PATTERNS: &[&str] = &[
r"^\s*pass\s*$",
r"^\s*\.\.\.\s*$",
r"^\s*unimplemented!\(\)\s*;?\s*$",
r"^\s*todo!\(.*\)\s*;?\s*$",
r#"^\s*throw\s+new\s+Error\s*\(\s*"not implemented"\s*\)\s*;?\s*$"#,
r"^\s*raise\s+NotImplementedError\b",
r"(?i)^\s*(?://|#)\s*implementation goes here",
r"(?i)^\s*(?://|#)\s*add\s+\w+\s+here",
r"(?i)^\s*(?://|#)\s*TODO:\s*implement\b",
r"(?i)^\s*(?://|#)\s*(?:stub|placeholder)\b",
r"(?i)^\s*(?://|#)\s*not\s+(?:yet\s+)?implemented",
r#"(?i)^\s*(?:return\s+)?Err\s*\(\s*["'](?:not |not yet )?implemented"#,
r#"(?i)^\s*return\s+.*["'](?:not |not yet )?implemented"#,
];
const DEBUG_PRINT_PATTERNS: &[&str] = &[
r#"console\.log\(\s*["'](?:DEBUG|>>>|\*\*\*)"#,
r#"(?<!\w)print\(\s*["'](?:DEBUG|>>>|\*\*\*)"#,
r#"println!\(\s*["'](?:DEBUG|>>>|\*\*\*)"#,
r#"fmt\.Println\(\s*["'](?:DEBUG|>>>|\*\*\*)"#,
];
const FRAMEWORK_DEFAULT_PATTERNS: &[&str] = &[
r"(?i)a sample cdk",
r"(?i)welcome to your cdk",
r"(?i)this project was bootstrapped",
r"(?i)this is a blank",
r"(?i)replace this",
r"(?i)create next app",
r"(?i)generated by create[ -]",
r"(?i)my application",
];
const VERBOSE_DOC_PATTERN: &str = r"(?i)\b(this|the)\s+(function|method|class|module|constructor)\s+(does|performs|handles|is responsible|provides|implements|creates|returns|takes|accepts)\b";
const AI_VOCAB_PATTERN: &str = r"(?i)\b(comprehensive|robust|elegant|leverage|streamline|utilize|facilitate|orchestrate|encapsulate|paradigm)\b";
const ERROR_STRING_PATTERN: &str = r#"(?:throw\s+new\s+\w*Error\s*\(\s*["']|raise\s+\w*(?:Error|Exception)\s*\(\s*["']|panic!\s*\(\s*["'])([^"']+)["']"#;
const DELIVERY_FILE_PATTERN: &str =
r"(?i)^(DELIVERY|IMPLEMENTATION_PLAN|PROGRESS|SESSION_|HANDOFF|OPTIMIZATION_PLAN)";
const DELIVERY_FILE_SUFFIX_PATTERN: &str = r"(?i)_SUMMARY\.md$";
const CLAUDE_FILE_PATTERN: &str = r"(?i)^CLAUDE.*\.md$";
const DELIVERY_SIGNOFF_PATTERN: &str = r"(?i)(DELIVERABLES|SIGN_OFF|VALIDATION_REPORT|CHECKLIST)";
const SECRET_PATTERNS: &[(&str, &str, &str)] = &[
(r"sk-[A-Za-z0-9]{32,}", "openai_api_key", "high"),
(r"sk-ant-[A-Za-z0-9_\-]{20,}", "anthropic_api_key", "high"),
(r"AKIA[0-9A-Z]{16}", "aws_access_key", "high"),
(r"ghp_[A-Za-z0-9]{36}", "github_token", "high"),
(r"ghs_[A-Za-z0-9]{36}", "github_token", "high"),
(r"github_pat_[A-Za-z0-9_]{82}", "github_token", "high"),
(r"AIza[0-9A-Za-z_\-]{35}", "google_api_key", "high"),
(r"sk_live_[A-Za-z0-9]{24,}", "stripe_secret_key", "high"),
(
r"pk_live_[A-Za-z0-9]{24,}",
"stripe_publishable_key",
"high",
),
(
r"-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----",
"private_key",
"high",
),
(
r"https?://hooks\.slack\.com/services/[A-Za-z0-9/]+",
"slack_webhook",
"high",
),
(
r"https?://discord(?:app)?\.com/api/webhooks/[0-9]+/[A-Za-z0-9_\-]+",
"discord_webhook",
"high",
),
(
r"(?i)(?:mongodb|postgres(?:ql)?|mysql|redis|amqp|mssql)://[^:\s/]{1,64}:[^@\s]{4,}@",
"db_connection_string",
"high",
),
(
r#"(?i)\b(?:api[_\-]?key|apikey|api[_\-]?token)\s*[:=]\s*["'][A-Za-z0-9_\-\.]{16,}["']"#,
"api_key",
"medium",
),
(
r#"(?i)\b(?:password|passwd|pwd)\s*[:=]\s*["'][^"']{4,}["']"#,
"password",
"medium",
),
(
r#"(?i)\b(?:secret|client[_\-]?secret|secret[_\-]?key|secret[_\-]?token)\s*[:=]\s*["'][^"']{8,}["']"#,
"secret",
"medium",
),
(
r"eyJ[A-Za-z0-9_\-]{10,}\.[A-Za-z0-9_\-]{10,}\.[A-Za-z0-9_\-]{10,}",
"jwt_token",
"medium",
),
];
const SECRET_PLACEHOLDERS: &[&str] = &[
"your_api_key",
"your-api-key",
"your_key_here",
"insert_key_here",
"insert_key",
"replace_me",
"replaceme",
"changeme",
"change_me",
"placeholder",
"xxxxxxxxxxxxxxxxxxxx", "********************", "password123",
"your_secret",
"my_secret",
];
const ENV_VAR_PATTERNS: &[&str] = &[
"process.env.",
"os.environ",
"std::env::var",
"env::var(",
"getenv(",
"ENV[",
"ENV.fetch",
"$ENV{",
"from_env",
"env!(\"",
"dotenv",
];
fn redact_secret(matched: &str) -> String {
let chars: Vec<char> = matched.chars().collect();
let show = chars.len().min(8);
format!("{}***", chars[..show].iter().collect::<String>())
}
fn is_env_var_line(line: &str) -> bool {
ENV_VAR_PATTERNS.iter().any(|p| line.contains(p))
}
fn is_placeholder_secret(matched: &str) -> bool {
let lower = matched.to_lowercase();
if SECRET_PLACEHOLDERS.iter().any(|p| lower.contains(p)) {
return true;
}
let chars: Vec<char> = lower.chars().collect();
if chars.len() > 4 && chars.iter().all(|&c| c == chars[0]) {
return true;
}
lower.starts_with("your_")
|| lower.starts_with("my_")
|| lower.starts_with('<')
|| lower.ends_with('>')
|| lower.starts_with("${")
|| lower.starts_with("%(")
|| lower.contains("getenv")
|| lower.contains("environ")
}
fn is_emoji(c: char) -> bool {
matches!(c,
'\u{1F600}'..='\u{1F64F}' | '\u{1F300}'..='\u{1F5FF}' | '\u{1F680}'..='\u{1F6FF}' | '\u{1F900}'..='\u{1F9FF}' | '\u{1FA70}'..='\u{1FAFF}' | '\u{2600}'..='\u{26FF}' | '\u{2700}'..='\u{27BF}' | '\u{1F1E6}'..='\u{1F1FF}' | '\u{1F170}'..='\u{1F251}' | '\u{24C2}' | '\u{FE0F}' | '\u{20E3}' )
}
fn find_emojis_in_line(line: &str) -> Vec<(char, usize)> {
let mut emojis = Vec::new();
let mut byte_pos = 0;
for ch in line.chars() {
if is_emoji(ch) {
emojis.push((ch, byte_pos));
}
byte_pos += ch.len_utf8();
}
emojis
}
struct AlgorithmExpectation {
term: &'static str,
lang_keywords: &'static [(&'static [&'static str], &'static [&'static str])],
default_keywords: &'static [&'static str],
valid_imports: &'static [&'static str],
}
const RUST_EXT: &[&str] = &["rs"];
const PYTHON_EXT: &[&str] = &["py"];
const JS_TS_EXT: &[&str] = &["js", "ts", "tsx", "jsx"];
const GO_EXT: &[&str] = &["go"];
const ALGORITHM_EXPECTATIONS: &[AlgorithmExpectation] = &[
AlgorithmExpectation {
term: "uuid",
lang_keywords: &[
(RUST_EXT, &["Uuid", "new_v4", "new_v7"]),
(PYTHON_EXT, &["uuid4", "uuid1", "uuid.uuid"]),
(JS_TS_EXT, &["randomUUID", "uuidv4", "crypto"]),
(GO_EXT, &["uuid.New", "uuid.Must"]),
],
default_keywords: &["uuid", "random", "crypto", "generate"],
valid_imports: &[
"use uuid",
"import uuid",
"from uuid",
"\"uuid\"",
"'uuid'",
"google/uuid",
],
},
AlgorithmExpectation {
term: "sha256",
lang_keywords: &[
(RUST_EXT, &["Sha256", "sha2", "digest"]),
(PYTHON_EXT, &["hashlib", "sha256"]),
(JS_TS_EXT, &["createHash", "crypto", "subtle"]),
(GO_EXT, &["sha256.New", "crypto/sha"]),
],
default_keywords: &["sha", "hash", "digest", "crypto"],
valid_imports: &["use sha", "import hashlib", "crypto", "digest"],
},
AlgorithmExpectation {
term: "sha512",
lang_keywords: &[
(RUST_EXT, &["Sha512", "sha2", "digest"]),
(PYTHON_EXT, &["hashlib", "sha512"]),
(JS_TS_EXT, &["createHash", "crypto", "subtle"]),
(GO_EXT, &["sha512.New", "crypto/sha"]),
],
default_keywords: &["sha", "hash", "digest", "crypto"],
valid_imports: &["use sha", "import hashlib", "crypto", "digest"],
},
AlgorithmExpectation {
term: "sha1",
lang_keywords: &[
(RUST_EXT, &["Sha1", "sha1", "digest"]),
(PYTHON_EXT, &["hashlib", "sha1"]),
(JS_TS_EXT, &["createHash", "crypto", "subtle"]),
(GO_EXT, &["sha1.New", "crypto/sha"]),
],
default_keywords: &["sha", "hash", "digest", "crypto"],
valid_imports: &["use sha", "import hashlib", "crypto", "digest"],
},
AlgorithmExpectation {
term: "base64",
lang_keywords: &[
(RUST_EXT, &["Base64", "STANDARD", "URL_SAFE"]),
(PYTHON_EXT, &["b64encode", "b64decode", "base64"]),
(JS_TS_EXT, &["btoa", "atob", "Buffer.from"]),
(GO_EXT, &["base64.Std", "encoding/base64"]),
],
default_keywords: &["base64", "encode", "decode", "btoa", "atob"],
valid_imports: &["use base64", "import base64", "encoding/base64"],
},
AlgorithmExpectation {
term: "md5",
lang_keywords: &[
(RUST_EXT, &["Md5", "md5", "digest"]),
(PYTHON_EXT, &["hashlib", "md5"]),
(JS_TS_EXT, &["createHash", "crypto"]),
(GO_EXT, &["md5.New", "crypto/md5"]),
],
default_keywords: &["md5", "hash", "digest"],
valid_imports: &["use md5", "import hashlib", "crypto/md5"],
},
AlgorithmExpectation {
term: "jwt",
lang_keywords: &[
(RUST_EXT, &["jsonwebtoken", "Header", "encode", "decode"]),
(PYTHON_EXT, &["jwt", "PyJWT"]),
(JS_TS_EXT, &["jsonwebtoken", "jwt", "sign", "verify"]),
(GO_EXT, &["jwt.Parse", "golang-jwt"]),
],
default_keywords: &["jwt", "token", "sign", "verify"],
valid_imports: &["jsonwebtoken", "import jwt", "PyJWT", "golang-jwt"],
},
AlgorithmExpectation {
term: "aes",
lang_keywords: &[
(RUST_EXT, &["Aes", "Cipher", "aes"]),
(PYTHON_EXT, &["AES", "Cipher", "Fernet", "cryptography"]),
(JS_TS_EXT, &["createCipheriv", "crypto", "subtle"]),
(GO_EXT, &["aes.NewCipher", "crypto/aes"]),
],
default_keywords: &["cipher", "aes", "encrypt", "decrypt", "crypto"],
valid_imports: &["use aes", "cryptography", "Fernet", "crypto/aes"],
},
AlgorithmExpectation {
term: "encrypt",
lang_keywords: &[
(RUST_EXT, &["Aes", "Cipher", "aes"]),
(PYTHON_EXT, &["AES", "Cipher", "Fernet", "cryptography"]),
(JS_TS_EXT, &["createCipheriv", "crypto", "subtle"]),
(GO_EXT, &["aes.NewCipher", "crypto/aes"]),
],
default_keywords: &["cipher", "aes", "encrypt", "decrypt", "crypto"],
valid_imports: &["use aes", "cryptography", "Fernet", "crypto/aes"],
},
AlgorithmExpectation {
term: "decrypt",
lang_keywords: &[
(RUST_EXT, &["Aes", "Cipher", "aes"]),
(PYTHON_EXT, &["AES", "Cipher", "Fernet", "cryptography"]),
(JS_TS_EXT, &["createCipheriv", "crypto", "subtle"]),
(GO_EXT, &["aes.NewCipher", "crypto/aes"]),
],
default_keywords: &["cipher", "aes", "encrypt", "decrypt", "crypto"],
valid_imports: &["use aes", "cryptography", "Fernet", "crypto/aes"],
},
AlgorithmExpectation {
term: "regex",
lang_keywords: &[
(RUST_EXT, &["Regex", "captures", "is_match"]),
(PYTHON_EXT, &["re.compile", "re.match", "re.search"]),
(JS_TS_EXT, &["RegExp", "new RegExp", ".test(", ".match("]),
(GO_EXT, &["regexp.Compile", "regexp.Must"]),
],
default_keywords: &["regex", "Regex", "match", "pattern", "compile"],
valid_imports: &["use regex", "import re", "regexp"],
},
AlgorithmExpectation {
term: "regexp",
lang_keywords: &[
(RUST_EXT, &["Regex", "captures", "is_match"]),
(PYTHON_EXT, &["re.compile", "re.match", "re.search"]),
(JS_TS_EXT, &["RegExp", "new RegExp", ".test(", ".match("]),
(GO_EXT, &["regexp.Compile", "regexp.Must"]),
],
default_keywords: &["regex", "Regex", "match", "pattern", "compile"],
valid_imports: &["use regex", "import re", "regexp"],
},
];
fn get_lang_keywords<'a>(exp: &'a AlgorithmExpectation, ext: &str) -> &'a [&'static str] {
for &(extensions, keywords) in exp.lang_keywords {
if extensions.contains(&ext) {
return keywords;
}
}
exp.default_keywords
}
fn file_has_valid_import(source: &str, patterns: &[&str]) -> bool {
patterns.iter().any(|p| source.contains(p))
}
const BRACE_LANGUAGES: &[&str] = &[
"rs", "js", "ts", "tsx", "jsx", "go", "java", "c", "cpp", "h", "hpp", "cs", "swift", "kt",
"scala", "php",
];
fn compute_multiline_string_mask(source: &str, ext: &str) -> Vec<bool> {
let lines: Vec<&str> = source.lines().collect();
let mut mask = vec![false; lines.len()];
let mut in_string = false;
let mut closer: &str = "";
for (i, line) in lines.iter().enumerate() {
if in_string {
mask[i] = true;
if line.contains(closer) {
in_string = false;
}
} else {
match ext {
"rs" => {
if let Some(pos) = line.find("r##\"") {
if !line[pos + 4..].contains("\"##") {
in_string = true;
closer = "\"##";
}
} else if let Some(pos) = line.find("r#\"") {
if !line[pos + 3..].contains("\"#") {
in_string = true;
closer = "\"#";
}
}
}
"py" => {
if line.contains("\"\"\"") && line.matches("\"\"\"").count() % 2 != 0 {
in_string = true;
closer = "\"\"\"";
} else if line.contains("'''") && line.matches("'''").count() % 2 != 0 {
in_string = true;
closer = "'''";
}
}
"js" | "ts" | "tsx" | "jsx" => {
if line.matches('`').count() % 2 != 0 {
in_string = true;
closer = "`";
}
}
_ => {}
}
}
}
mask
}
fn is_inside_string_literal(line: &str, pos: usize) -> bool {
let mut in_str = false;
let bytes = line.as_bytes();
for i in 0..pos.min(bytes.len()) {
if bytes[i] == b'"' && (i == 0 || bytes[i - 1] != b'\\') {
in_str = !in_str;
}
}
in_str
}
fn extract_comment<'a>(line: &'a str, ext: &str) -> Option<&'a str> {
let bytes = line.as_bytes();
let len = bytes.len();
let mut in_str = false;
let mut str_char: u8 = 0;
let trimmed = line.trim_start();
if trimmed.starts_with("* ") || trimmed.starts_with("*\t") || trimmed == "*" {
return Some(trimmed.get(1..).unwrap_or("").trim_start());
}
let mut i = 0;
while i < len {
let b = bytes[i];
if !in_str && (b == b'"' || b == b'\'') {
in_str = true;
str_char = b;
i += 1;
continue;
}
if in_str {
if b == str_char && (i == 0 || bytes[i - 1] != b'\\') {
in_str = false;
}
i += 1;
continue;
}
if b == b'/' && i + 1 < len && bytes[i + 1] == b'/' {
return Some(&line[i + 2..]);
}
if b == b'#' && matches!(ext, "py" | "rb" | "sh" | "bash" | "zsh") {
return Some(&line[i + 1..]);
}
if b == b'-' && i + 1 < len && bytes[i + 1] == b'-' && ext == "lua" {
return Some(&line[i + 2..]);
}
i += 1;
}
None
}
fn is_test_or_script_file(rel_path: &str, ext: &str) -> bool {
if matches!(ext, "sh" | "bash" | "zsh") {
return true;
}
let lower = rel_path.to_lowercase();
lower.contains("/test/")
|| lower.contains("/tests/")
|| lower.contains("/__tests__/")
|| lower.contains(".test.")
|| lower.contains(".spec.")
|| lower.contains("_test.")
|| lower.starts_with("test_")
}
struct FileScaffoldingState {
verbose_doc_lines: Vec<usize>,
error_strings: Vec<(usize, String)>,
ai_vocab_count: usize,
comment_line_count: usize,
comment_blocks: Vec<(usize, Vec<String>)>,
current_block_start: usize,
current_block_lines: Vec<String>,
recent_doc_lines: Vec<(usize, String)>,
over_doc_findings: Vec<(usize, usize, usize)>,
}
impl FileScaffoldingState {
fn new() -> Self {
Self {
verbose_doc_lines: Vec::new(),
error_strings: Vec::new(),
ai_vocab_count: 0,
comment_line_count: 0,
comment_blocks: Vec::new(),
current_block_start: 0,
current_block_lines: Vec::new(),
recent_doc_lines: Vec::new(),
over_doc_findings: Vec::new(),
}
}
fn flush_block(&mut self) {
if self.current_block_lines.len() >= 3 {
self.comment_blocks.push((
self.current_block_start,
std::mem::take(&mut self.current_block_lines),
));
} else {
self.current_block_lines.clear();
}
}
}
fn word_jaccard(a: &str, b: &str) -> f64 {
let words_a: HashSet<&str> = a.split_whitespace().collect();
let words_b: HashSet<&str> = b.split_whitespace().collect();
let intersection = words_a.intersection(&words_b).count();
let union = words_a.union(&words_b).count();
if union == 0 {
0.0
} else {
intersection as f64 / union as f64
}
}
fn is_comment_only(trimmed: &str, ext: &str) -> bool {
if trimmed.is_empty() {
return false;
}
if trimmed.starts_with("//") || trimmed.starts_with("/*") || trimmed.starts_with("*/") {
return true;
}
if trimmed.starts_with("* ") || trimmed == "*" {
return true;
}
match ext {
"py" | "rb" | "sh" | "bash" | "zsh" => trimmed.starts_with('#'),
"lua" => trimmed.starts_with("--"),
_ => false,
}
}
fn walk_function_body<'a>(
source_lines: &'a [&'a str],
function_line: usize,
ext: &str,
max_lines: usize,
) -> Vec<&'a str> {
let max_scan = (function_line + 200).min(source_lines.len());
let mut body_lines = Vec::new();
if BRACE_LANGUAGES.contains(&ext) {
let mut found_brace = false;
let mut depth: i32 = 0;
for (i, &src_line) in source_lines
.iter()
.enumerate()
.take(max_scan)
.skip(function_line)
{
let trimmed = src_line.trim();
if !is_comment_only(trimmed, ext) {
let mut in_single = false;
let mut in_double = false;
let mut in_backtick = false;
let mut escaped = false;
for ch in trimmed.chars() {
if escaped {
escaped = false;
continue;
}
if ch == '\\' && (in_single || in_double) {
escaped = true;
continue;
}
match ch {
'\'' if !in_double && !in_backtick => in_single = !in_single,
'"' if !in_single && !in_backtick => in_double = !in_double,
'`' if !in_single && !in_double => in_backtick = !in_backtick,
'{' if !in_single && !in_double && !in_backtick => {
if !found_brace {
found_brace = true;
}
depth += 1;
}
'}' if !in_single && !in_double && !in_backtick => {
depth -= 1;
}
_ => {}
}
}
} if found_brace {
if i > function_line || depth == 0 {
body_lines.push(trimmed);
if body_lines.len() >= max_lines {
break;
}
}
if depth == 0 {
break;
}
}
}
} else if ext == "py" {
let def_indent =
source_lines[function_line].len() - source_lines[function_line].trim_start().len();
for &line in source_lines.iter().take(max_scan).skip(function_line + 1) {
if line.trim().is_empty() {
continue;
}
let indent = line.len() - line.trim_start().len();
if indent <= def_indent {
break;
}
body_lines.push(line.trim());
if body_lines.len() >= max_lines {
break;
}
}
}
body_lines
}
fn count_function_body_lines(source_lines: &[&str], function_line: usize, ext: &str) -> usize {
walk_function_body(source_lines, function_line, ext, 200)
.iter()
.filter(|line| {
!line.is_empty() && !is_comment_only(line, ext) && **line != "{" && **line != "}"
})
.count()
}
fn extract_param_names(line: &str) -> Vec<String> {
let mut names = Vec::new();
let open = match line.find('(') {
Some(p) => p,
None => return names,
};
let mut depth = 0;
let mut close = line.len();
for (i, ch) in line[open..].char_indices() {
match ch {
'(' => depth += 1,
')' => {
depth -= 1;
if depth == 0 {
close = open + i;
break;
}
}
_ => {}
}
}
let params_str = &line[open + 1..close];
for segment in params_str.split(',') {
let segment = segment.trim();
if segment.is_empty() {
continue;
}
let before_colon = segment.split(':').next().unwrap_or(segment);
let before_eq = before_colon.split('=').next().unwrap_or(before_colon);
let token = before_eq.trim();
let last_word = token.split_whitespace().last().unwrap_or("");
let cleaned = last_word.trim_start_matches('&').trim_start_matches('*');
if !cleaned.is_empty() {
names.push(cleaned.to_string());
}
}
names
}
fn get_function_body_text(
source_lines: &[&str],
function_line: usize,
ext: &str,
max_lines: usize,
) -> String {
walk_function_body(source_lines, function_line, ext, max_lines).join("\n")
}
fn is_doc_comment_line(trimmed: &str, ext: &str) -> bool {
match ext {
"rs" => trimmed.starts_with("///") || trimmed.starts_with("//!"),
"java" | "ts" | "tsx" | "js" | "jsx" | "go" | "c" | "cpp" | "h" | "hpp" | "cs"
| "swift" | "kt" | "scala" | "php" => {
trimmed.starts_with("///") || trimmed.starts_with("/**") || trimmed.starts_with("* ")
}
"py" | "rb" => trimmed.starts_with('#'),
"lua" => trimmed.starts_with("--"),
_ => false,
}
}
pub fn execute_detect_scaffolding(args: &HashMap<String, Value>) -> Result<Value, String> {
let path = args
.get("path")
.and_then(|v| v.as_str())
.ok_or("Missing required argument 'path'")?;
let root = Path::new(path);
if !root.exists() {
return Err(format!("Path does not exist: {path}"));
}
let include_todos = args
.get("include_todos")
.and_then(|v| v.as_bool())
.unwrap_or(false);
let include_placeholders = args
.get("include_placeholders")
.and_then(|v| v.as_bool())
.unwrap_or(true);
let include_phased_comments = args
.get("include_phased_comments")
.and_then(|v| v.as_bool())
.unwrap_or(true);
let include_temp_files = args
.get("include_temp_files")
.and_then(|v| v.as_bool())
.unwrap_or(true);
let include_emojis = args
.get("include_emojis")
.and_then(|v| v.as_bool())
.unwrap_or(false);
let include_secrets = args
.get("include_secrets")
.and_then(|v| v.as_bool())
.unwrap_or(false);
let re_phased = Regex::new(PHASED_PATTERN).map_err(|e| format!("Regex error: {e}"))?;
let re_temporal = Regex::new(TEMPORAL_PATTERN).map_err(|e| format!("Regex error: {e}"))?;
let re_scaffold_ident =
Regex::new(SCAFFOLD_IDENT_PATTERN).map_err(|e| format!("Regex error: {e}"))?;
let re_numbered =
Regex::new(NUMBERED_SUFFIX_PATTERN).map_err(|e| format!("Regex error: {e}"))?;
let re_todo = Regex::new(TODO_PATTERN).map_err(|e| format!("Regex error: {e}"))?;
let re_func_def = Regex::new(FUNC_DEF_PATTERN).map_err(|e| format!("Regex error: {e}"))?;
let re_phased_comment =
Regex::new(PHASED_COMMENT_PATTERN).map_err(|e| format!("Regex error: {e}"))?;
let re_impl_phase = Regex::new(r"(?i)\bimplementation:\s*(phase|step|part)\s*\d+")
.map_err(|e| format!("Regex error: {e}"))?;
let re_step_progress =
Regex::new(r"(?i)\bstep\s+\d+\s*/\s*\d+\s*:").map_err(|e| format!("Regex error: {e}"))?;
let placeholder_regexes: Vec<Regex> = PLACEHOLDER_PATTERNS
.iter()
.filter_map(|p| Regex::new(p).ok())
.collect();
let debug_print_regexes: Vec<Regex> = DEBUG_PRINT_PATTERNS
.iter()
.filter_map(|p| Regex::new(p).ok())
.collect();
let secret_regexes: Vec<(Regex, &'static str, &'static str)> = if include_secrets {
SECRET_PATTERNS
.iter()
.filter_map(|&(pat, label, confidence)| {
Regex::new(pat).ok().map(|re| (re, label, confidence))
})
.collect()
} else {
Vec::new()
};
let framework_default_regexes: Vec<Regex> = FRAMEWORK_DEFAULT_PATTERNS
.iter()
.filter_map(|p| Regex::new(p).ok())
.collect();
let re_verbose_doc =
Regex::new(VERBOSE_DOC_PATTERN).map_err(|e| format!("Regex error: {e}"))?;
let re_ai_vocab = Regex::new(AI_VOCAB_PATTERN).map_err(|e| format!("Regex error: {e}"))?;
let re_error_string =
Regex::new(ERROR_STRING_PATTERN).map_err(|e| format!("Regex error: {e}"))?;
let name_regexes: Vec<(&str, &Regex)> = vec![
("phased", &re_phased),
("temporal", &re_temporal),
("scaffold", &re_scaffold_ident),
("numbered", &re_numbered),
];
let mut findings: Vec<Value> = Vec::new();
let mut phased_name_lines: HashSet<(String, usize)> = HashSet::new();
let mut all_comment_blocks: Vec<(String, usize, Vec<String>)> = Vec::new();
let walker = WalkBuilder::new(root)
.git_ignore(true)
.git_global(true)
.git_exclude(true)
.hidden(true)
.build();
for entry in walker.flatten() {
let entry_path = entry.path();
if entry_path.is_dir() || !is_source_file(entry_path) {
continue;
}
let source = match fs::read_to_string(entry_path) {
Ok(s) => s,
Err(_) => continue, };
let rel_path = entry_path
.strip_prefix(root)
.unwrap_or(entry_path)
.to_string_lossy()
.to_string();
let ext = entry_path
.extension()
.and_then(|e| e.to_str())
.unwrap_or("");
let string_mask = compute_multiline_string_mask(&source, ext);
let source_lines: Vec<&str> = source.lines().collect();
let mut file_state = FileScaffoldingState::new();
for (line_num_0, line) in source_lines.iter().enumerate() {
let line_num = line_num_0 + 1;
if line_num_0 < string_mask.len() && string_mask[line_num_0] {
continue;
}
fn has_placeholder_body(
source_lines: &[&str],
function_line: usize,
placeholder_res: &[Regex],
) -> bool {
let end = (function_line + 10).min(source_lines.len());
for line in &source_lines[function_line..end] {
let trimmed = line.trim();
if placeholder_res.iter().any(|re| re.is_match(trimmed)) {
return true;
}
if trimmed.starts_with('}')
|| (trimmed.starts_with(';') && !trimmed.contains('('))
{
break;
}
}
false
}
for caps in re_func_def.captures_iter(line) {
let cap_match = caps.get(1).or_else(|| caps.get(2));
let name = cap_match.map(|m| m.as_str()).unwrap_or("");
if let Some(ref m) = cap_match {
if is_inside_string_literal(line, m.start()) {
continue;
}
}
for &(category, regex) in &name_regexes {
if regex.is_match(name) {
if category == "phased" {
phased_name_lines.insert((rel_path.clone(), line_num));
}
let confidence = if category == "scaffold"
&& re_scaffold_ident.is_match(name)
{
if has_placeholder_body(&source_lines, line_num_0, &placeholder_regexes)
{
"high"
} else {
"low" }
} else if category == "phased" {
"low"
} else {
"high"
};
findings.push(json!({
"file": rel_path,
"line": line_num,
"category": "scaffolding_name",
"match_text": name,
"pattern": category,
"confidence": confidence,
}));
}
}
if file_state.recent_doc_lines.len() >= 6 {
let doc_count = file_state.recent_doc_lines.len();
let body_count = count_function_body_lines(&source_lines, line_num_0, ext);
if body_count > 0 && doc_count > 3 * body_count {
file_state
.over_doc_findings
.push((line_num, doc_count, body_count));
}
}
if !file_state.recent_doc_lines.is_empty() {
let doc_text: String = file_state
.recent_doc_lines
.iter()
.map(|(_, text)| text.as_str())
.collect::<Vec<_>>()
.join(" ")
.to_lowercase();
let params = extract_param_names(line);
for param in ¶ms {
if !param.starts_with('_') {
continue;
}
let stripped = ¶m[1..];
if stripped.is_empty() || stripped == "self" || stripped == "cls" {
continue;
}
if doc_text.contains(&stripped.to_lowercase()) {
findings.push(json!({
"file": rel_path,
"line": line_num,
"category": "ignored_param_documented",
"match_text": format!("param '{}' documented but ignored", param),
"pattern": "ignored_param_documented",
"confidence": "medium",
}));
}
}
}
if !has_placeholder_body(&source_lines, line_num_0, &placeholder_regexes) {
let name_lower = name.to_lowercase();
let mut body_cache: Option<String> = None;
for expectation in ALGORITHM_EXPECTATIONS {
if name_lower.contains(expectation.term) {
if file_has_valid_import(&source, expectation.valid_imports) {
continue;
}
let body = body_cache.get_or_insert_with(|| {
get_function_body_text(&source_lines, line_num_0, ext, 50)
});
if !body.is_empty() {
let keywords = get_lang_keywords(expectation, ext);
let has_keyword = keywords.iter().any(|kw| body.contains(kw));
if !has_keyword {
findings.push(json!({
"file": rel_path,
"line": line_num,
"category": "misleading_name",
"match_text": format!("'{}' claims '{}' but body lacks expected keywords", name, expectation.term),
"pattern": "misleading_name",
"confidence": "low",
}));
}
}
}
}
}
}
if include_todos {
for m in re_todo.find_iter(line) {
if is_inside_string_literal(line, m.start()) {
continue;
}
let bytes = line.as_bytes();
let before_slash = m.start() > 0 && bytes[m.start() - 1] == b'/';
let after_slash = m.end() < bytes.len() && bytes[m.end()] == b'/';
if before_slash || after_slash {
continue;
}
if m.as_str() == "XXX" {
let before_char = if m.start() > 0 {
line.chars().nth(m.start() - 1)
} else {
None
};
let after_char = if m.end() < line.len() {
line.chars().nth(m.end())
} else {
None
};
if let Some(c) = before_char {
if c.is_ascii_digit() || c == '-' || c == 'X' {
continue;
}
}
if let Some(c) = after_char {
if c.is_ascii_digit() || c == '-' || c == 'X' {
continue;
}
}
}
findings.push(json!({
"file": rel_path,
"line": line_num,
"category": "todo",
"match_text": m.as_str(),
"pattern": "todo_marker",
"confidence": "low",
}));
}
}
if include_placeholders {
for re in &placeholder_regexes {
if re.is_match(line) {
if ext == "py" && line.trim() == "pass" {
let in_control_block = (0..line_num_0)
.rev()
.find(|&i| !source_lines[i].trim().is_empty())
.map(|i| {
let prev = source_lines[i].trim();
prev.ends_with(':') && {
let lc = prev.to_lowercase();
lc.starts_with("except")
|| lc.starts_with("finally")
|| lc.starts_with("else")
|| lc.starts_with("elif")
|| lc.starts_with("if ")
|| lc.starts_with("for ")
|| lc.starts_with("while ")
|| lc.starts_with("with ")
|| lc.contains("def __init__")
|| lc.contains("def __new__")
|| (lc.starts_with("def ")
&& (lc.contains("(self") || lc.contains("(cls")))
}
})
.unwrap_or(false);
if in_control_block {
break;
}
}
findings.push(json!({
"file": rel_path,
"line": line_num,
"category": "placeholder",
"match_text": line.trim(),
"pattern": "placeholder_body",
"confidence": "high",
}));
break; }
}
}
for re in &debug_print_regexes {
if let Some(m) = re.find(line) {
if is_inside_string_literal(line, m.start()) {
break;
}
findings.push(json!({
"file": rel_path,
"line": line_num,
"category": "debug_print",
"match_text": line.trim(),
"pattern": "debug_print",
"confidence": "high",
}));
break;
}
}
if include_emojis {
let emojis = find_emojis_in_line(line);
for (emoji_char, _byte_pos) in emojis {
findings.push(json!({
"file": rel_path,
"line": line_num,
"category": "emoji",
"match_text": emoji_char.to_string(),
"pattern": "emoji",
"confidence": "medium",
}));
}
}
if include_secrets && !is_env_var_line(line) {
for (re, label, confidence) in &secret_regexes {
if let Some(m) = re.find(line) {
let matched = m.as_str();
if !is_placeholder_secret(matched) {
findings.push(json!({
"file": rel_path,
"line": line_num,
"category": "hardcoded_secret",
"match_text": redact_secret(matched),
"pattern": label,
"confidence": confidence,
}));
break; }
}
}
}
let comment = extract_comment(line, ext);
if include_phased_comments && !is_test_or_script_file(&rel_path, ext) {
if let Some(comment) = comment {
if let Some(m) = re_phased_comment.find(comment) {
if !phased_name_lines.contains(&(rel_path.clone(), line_num)) {
let lower = comment.to_lowercase();
let looks_like_scaffolding = lower.contains("implement")
|| lower.contains("build")
|| lower.contains("create")
|| lower.contains("setup")
|| lower.contains("todo");
let has_domain_context = lower.contains("ms)")
|| lower.contains("sec)")
|| lower.contains("ops)")
|| lower.contains("latency")
|| lower.contains("throughput")
|| lower.contains("handshake")
|| lower.contains("lexing")
|| lower.contains("parsing")
|| lower.contains("compilation")
|| lower.contains("detection")
|| lower.contains("optimization")
|| lower.contains("rendering")
|| lower.contains("pipeline")
|| lower.contains("migration")
|| lower.contains("deployment")
|| lower.contains("authentication")
|| lower.contains("authorization")
|| lower.contains("serialization");
if looks_like_scaffolding && !has_domain_context {
findings.push(json!({
"file": rel_path,
"line": line_num,
"category": "phased_comment",
"match_text": m.as_str(),
"pattern": "phased_comment",
"confidence": "low", }));
}
}
}
}
}
if include_phased_comments && !is_test_or_script_file(&rel_path, ext) {
if let Some(comment) = comment {
if let Some(m) = re_impl_phase.find(comment) {
if !phased_name_lines.contains(&(rel_path.clone(), line_num)) {
findings.push(json!({
"file": rel_path,
"line": line_num,
"category": "phased_comment",
"match_text": m.as_str(),
"pattern": "implementation_phase",
"confidence": "low",
}));
}
}
}
}
if include_phased_comments && !is_test_or_script_file(&rel_path, ext) {
if let Some(comment) = comment {
if let Some(m) = re_step_progress.find(comment) {
if !phased_name_lines.contains(&(rel_path.clone(), line_num)) {
findings.push(json!({
"file": rel_path,
"line": line_num,
"category": "phased_comment",
"match_text": m.as_str(),
"pattern": "step_progress",
"confidence": "low",
}));
}
}
}
}
for re in &framework_default_regexes {
if re.is_match(line) {
findings.push(json!({
"file": rel_path,
"line": line_num,
"category": "framework_default",
"match_text": line.trim(),
"pattern": "framework_default",
"confidence": "medium",
}));
break;
}
}
if let Some(comment) = comment {
file_state.comment_line_count += 1;
if re_verbose_doc.is_match(comment) {
file_state.verbose_doc_lines.push(line_num);
}
file_state.ai_vocab_count += re_ai_vocab.find_iter(comment).count();
if file_state.current_block_lines.is_empty() {
file_state.current_block_start = line_num;
}
file_state
.current_block_lines
.push(comment.trim().to_string());
let trimmed = line.trim();
if is_doc_comment_line(trimmed, ext) {
file_state
.recent_doc_lines
.push((line_num, trimmed.to_string()));
}
} else {
file_state.flush_block();
if !line.trim().is_empty() {
file_state.recent_doc_lines.clear();
}
}
if let Some(caps) = re_error_string.captures(line) {
if let Some(msg) = caps.get(1) {
file_state
.error_strings
.push((line_num, msg.as_str().to_string()));
}
}
}
file_state.flush_block();
for (fn_line, doc_count, body_count) in &file_state.over_doc_findings {
findings.push(json!({
"file": rel_path,
"line": fn_line,
"category": "over_documented",
"match_text": format!("{} doc lines for {}-line body ({:.1}:1 ratio)", doc_count, body_count, *doc_count as f64 / (*body_count).max(1) as f64),
"pattern": "over_documented",
"confidence": "low",
}));
}
if file_state.verbose_doc_lines.len() >= 3 {
findings.push(json!({
"file": rel_path,
"line": file_state.verbose_doc_lines[0],
"category": "verbose_doc",
"match_text": format!("{} verbose doc comments in file", file_state.verbose_doc_lines.len()),
"pattern": "verbose_doc",
"confidence": "low",
}));
}
let mut error_groups: HashMap<&str, Vec<usize>> = HashMap::new();
for (line_nr, msg) in &file_state.error_strings {
error_groups.entry(msg.as_str()).or_default().push(*line_nr);
}
for (msg, lines) in &error_groups {
if lines.len() >= 3 {
findings.push(json!({
"file": rel_path,
"line": lines[0],
"category": "identical_error",
"match_text": format!("\"{}\" repeated {} times", msg, lines.len()),
"pattern": "identical_error",
"confidence": "medium",
}));
}
}
if file_state.comment_line_count > 0 {
let density =
(file_state.ai_vocab_count as f64 / file_state.comment_line_count as f64) * 100.0;
if file_state.ai_vocab_count >= 5 && density >= 5.0 {
findings.push(json!({
"file": rel_path,
"line": 1,
"category": "ai_vocabulary",
"match_text": format!(
"{} AI vocabulary words in {} comment lines ({:.0}%)",
file_state.ai_vocab_count, file_state.comment_line_count, density
),
"pattern": "ai_vocabulary",
"confidence": "low",
}));
}
}
let blocks = &file_state.comment_blocks;
for i in 0..blocks.len() {
for j in (i + 1)..blocks.len() {
let text_a = blocks[i].1.join(" ");
let text_b = blocks[j].1.join(" ");
if word_jaccard(&text_a, &text_b) >= 0.85 {
findings.push(json!({
"file": rel_path,
"line": blocks[i].0,
"category": "comment_clone",
"match_text": format!(
"Near-identical comment blocks at lines {} and {}",
blocks[i].0, blocks[j].0
),
"pattern": "comment_clone",
"confidence": "medium",
}));
}
}
}
for (start_line, lines) in file_state.comment_blocks {
all_comment_blocks.push((rel_path.clone(), start_line, lines));
}
}
if all_comment_blocks.len() <= 2000 {
let mut cross_file_count = 0usize;
let mut reported_pairs: HashSet<(String, usize, String, usize)> = HashSet::new();
for i in 0..all_comment_blocks.len() {
if cross_file_count >= 20 {
break;
}
for j in (i + 1)..all_comment_blocks.len() {
if cross_file_count >= 20 {
break;
}
let (ref file_a, line_a, ref lines_a) = all_comment_blocks[i];
let (ref file_b, line_b, ref lines_b) = all_comment_blocks[j];
if file_a == file_b {
continue;
}
let pair = if (file_a, line_a) < (file_b, line_b) {
(file_a.clone(), line_a, file_b.clone(), line_b)
} else {
(file_b.clone(), line_b, file_a.clone(), line_a)
};
if reported_pairs.contains(&pair) {
continue;
}
let text_a = lines_a.join(" ");
let text_b = lines_b.join(" ");
if word_jaccard(&text_a, &text_b) >= 0.85 {
findings.push(json!({
"file": file_a,
"line": line_a,
"category": "comment_clone",
"match_text": format!(
"Near-identical comment blocks: {}:{} and {}:{}",
file_a, line_a, file_b, line_b
),
"pattern": "cross_file_comment_clone",
"confidence": "medium",
}));
reported_pairs.insert(pair);
cross_file_count += 1;
}
}
}
}
if include_temp_files {
findings.extend(detect_temp_file_findings(root));
}
let limit = args.get("limit").and_then(|v| v.as_u64()).unwrap_or(200) as usize;
let offset = args.get("offset").and_then(|v| v.as_u64()).unwrap_or(0) as usize;
let total_count = findings.len();
let page: Vec<Value> = findings.into_iter().skip(offset).take(limit).collect();
let has_more = offset + page.len() < total_count;
let result = json!({
"path": path,
"total_findings": total_count,
"offset": offset,
"limit": limit,
"has_more": has_more,
"findings": page,
});
Ok(json!({
"content": [{
"type": "text",
"text": serde_json::to_string_pretty(&result).unwrap_or_default()
}]
}))
}
fn detect_temp_file_findings(root: &Path) -> Vec<Value> {
let checks: Vec<(&str, Regex, &str)> = vec![
("phased", Regex::new(PHASED_PATTERN).unwrap(), "medium"),
("temporal", Regex::new(TEMPORAL_PATTERN).unwrap(), "medium"),
("temp", Regex::new(TEMP_PREFIX_PATTERN).unwrap(), "medium"),
("temp", Regex::new(TEMP_SUFFIX_PATTERN).unwrap(), "medium"),
(
"scaffold",
Regex::new(SCAFFOLD_FILE_PATTERN).unwrap(),
"medium",
),
(
"numbered",
Regex::new(NUMBERED_FILE_SUFFIX_PATTERN).unwrap(),
"medium",
),
(
"numbered",
Regex::new(NUMBERED_SUFFIX_PATTERN).unwrap(),
"medium",
),
(
"delivery",
Regex::new(DELIVERY_FILE_PATTERN).unwrap(),
"high",
),
(
"delivery",
Regex::new(DELIVERY_FILE_SUFFIX_PATTERN).unwrap(),
"high",
),
("delivery", Regex::new(CLAUDE_FILE_PATTERN).unwrap(), "high"),
(
"delivery",
Regex::new(DELIVERY_SIGNOFF_PATTERN).unwrap(),
"high",
),
];
let mut findings: Vec<Value> = Vec::new();
let walker = WalkBuilder::new(root)
.git_ignore(true)
.git_global(true)
.git_exclude(true)
.hidden(true)
.build();
for entry in walker.flatten() {
let entry_path = entry.path();
let name = match entry_path.file_name().and_then(|n| n.to_str()) {
Some(n) => n,
None => continue,
};
let stem = entry_path
.file_stem()
.and_then(|s| s.to_str())
.unwrap_or(name);
let rel_path = entry_path
.strip_prefix(root)
.unwrap_or(entry_path)
.to_string_lossy()
.to_string();
if rel_path.is_empty() || rel_path == "." {
continue;
}
for (category, regex, confidence) in &checks {
if regex.is_match(name) || regex.is_match(stem) {
findings.push(json!({
"path": rel_path,
"name": name,
"category": format!("temp_file_{}", category),
"pattern": regex.as_str(),
"confidence": confidence,
}));
break;
}
}
}
findings
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
use tempfile::TempDir;
#[test]
fn extract_comment_rust_double_slash() {
assert_eq!(
extract_comment(" let x = 1; // Phase 1 init", "rs"),
Some(" Phase 1 init")
);
}
#[test]
fn extract_comment_python_hash() {
assert_eq!(
extract_comment("x = 1 # Step 2 setup", "py"),
Some(" Step 2 setup")
);
}
#[test]
fn extract_comment_lua_double_dash() {
assert_eq!(
extract_comment("local x = 1 -- Part 3", "lua"),
Some(" Part 3")
);
}
#[test]
fn extract_comment_inside_string_rejected() {
assert_eq!(
extract_comment(r#"let s = "http://example.com"; // real comment"#, "rs"),
Some(" real comment")
);
assert_eq!(
extract_comment(r#"let s = "// not a comment";"#, "rs"),
None
);
}
#[test]
fn extract_comment_block_star_continuation() {
assert_eq!(
extract_comment(" * Phase 2: continue block", "rs"),
Some("Phase 2: continue block")
);
}
#[test]
fn extract_comment_no_comment() {
assert_eq!(extract_comment("let phase = 1;", "rs"), None);
}
#[test]
fn phased_comment_pattern_matches() {
let re = Regex::new(PHASED_COMMENT_PATTERN).unwrap();
assert!(re.is_match("Phase 1"));
assert!(re.is_match("phase 2"));
assert!(re.is_match("Phase 2B"));
assert!(re.is_match("step-3"));
assert!(re.is_match("Step 3"));
assert!(re.is_match("Part 1"));
assert!(re.is_match("part_4"));
assert!(re.is_match("PHASE 10"));
assert!(re.is_match("Phase2"));
assert!(re.is_match("step3a"));
}
#[test]
fn phased_comment_pattern_non_matches() {
let re = Regex::new(PHASED_COMMENT_PATTERN).unwrap();
assert!(!re.is_match("phaser"));
assert!(!re.is_match("stepping"));
assert!(!re.is_match("partial"));
assert!(!re.is_match("the phase of the moon"));
assert!(!re.is_match("a step ahead"));
}
#[test]
fn integration_phased_comment_detection() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("main.rs");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "fn main() {{").unwrap();
writeln!(f, " // Phase 1: initialization").unwrap();
writeln!(f, " let x = 1;").unwrap();
writeln!(f, " // Phase 1: Implement basic structure").unwrap();
writeln!(f, " let y = x + 1;").unwrap();
writeln!(f, "}}").unwrap();
let mut args = HashMap::new();
args.insert(
"path".to_string(),
Value::String(dir.path().to_string_lossy().to_string()),
);
args.insert("include_todos".to_string(), Value::Bool(false));
args.insert("include_placeholders".to_string(), Value::Bool(false));
let result = execute_detect_scaffolding(&args).unwrap();
let text = result["content"][0]["text"].as_str().unwrap();
let parsed: Value = serde_json::from_str(text).unwrap();
let findings = parsed["findings"].as_array().unwrap();
let phased: Vec<&Value> = findings
.iter()
.filter(|f| f["category"] == "phased_comment")
.collect();
assert_eq!(phased.len(), 1);
assert_eq!(phased[0]["line"], 4);
assert!(phased[0]["match_text"]
.as_str()
.unwrap()
.contains("Phase 1"));
assert_eq!(phased[0]["confidence"], "low"); }
#[test]
fn integration_pass_in_except_not_flagged() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("handler.py");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "def handle():").unwrap();
writeln!(f, " try:").unwrap();
writeln!(f, " do_something()").unwrap();
writeln!(f, " except:").unwrap();
writeln!(f, " pass").unwrap();
writeln!(f, " finally:").unwrap();
writeln!(f, " pass").unwrap();
let mut args = HashMap::new();
args.insert(
"path".to_string(),
Value::String(dir.path().to_string_lossy().to_string()),
);
args.insert("include_todos".to_string(), Value::Bool(false));
args.insert("include_phased_comments".to_string(), Value::Bool(false));
let result = execute_detect_scaffolding(&args).unwrap();
let text = result["content"][0]["text"].as_str().unwrap();
let parsed: Value = serde_json::from_str(text).unwrap();
let findings = parsed["findings"].as_array().unwrap();
let placeholders: Vec<&Value> = findings
.iter()
.filter(|f| f["category"] == "placeholder")
.collect();
assert!(
placeholders.is_empty(),
"pass in except/finally blocks should not be flagged as placeholder, got: {:?}",
placeholders
);
}
#[test]
fn test_pass_in_init_not_flagged() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("models.py");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "class Foo:").unwrap();
writeln!(f, " def __init__(self):").unwrap();
writeln!(f, " pass").unwrap();
writeln!(f).unwrap();
writeln!(f, "class Bar:").unwrap();
writeln!(f, " def process(self):").unwrap();
writeln!(f, " pass").unwrap();
let mut args = HashMap::new();
args.insert(
"path".to_string(),
Value::String(dir.path().to_string_lossy().to_string()),
);
args.insert("include_todos".to_string(), Value::Bool(false));
args.insert("include_phased_comments".to_string(), Value::Bool(false));
let result = execute_detect_scaffolding(&args).unwrap();
let text = result["content"][0]["text"].as_str().unwrap();
let parsed: Value = serde_json::from_str(text).unwrap();
let findings = parsed["findings"].as_array().unwrap();
let placeholders: Vec<&Value> = findings
.iter()
.filter(|f| f["category"] == "placeholder")
.collect();
assert!(
placeholders.is_empty(),
"pass in __init__ and class methods should not be flagged as placeholder, got: {:?}",
placeholders
);
}
#[test]
fn integration_pass_in_bare_function_still_flagged() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("stub.py");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "def stub():").unwrap();
writeln!(f, " pass").unwrap();
let mut args = HashMap::new();
args.insert(
"path".to_string(),
Value::String(dir.path().to_string_lossy().to_string()),
);
args.insert("include_todos".to_string(), Value::Bool(false));
args.insert("include_phased_comments".to_string(), Value::Bool(false));
let result = execute_detect_scaffolding(&args).unwrap();
let text = result["content"][0]["text"].as_str().unwrap();
let parsed: Value = serde_json::from_str(text).unwrap();
let findings = parsed["findings"].as_array().unwrap();
let placeholders: Vec<&Value> = findings
.iter()
.filter(|f| f["category"] == "placeholder")
.collect();
assert_eq!(
placeholders.len(),
1,
"pass in bare function body should still be flagged"
);
}
#[test]
fn integration_phased_comments_in_test_file_skipped() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("foo.test.ts");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "// Step 1: setup").unwrap();
writeln!(f, "const x = 1;").unwrap();
writeln!(f, "// Step 2: verify").unwrap();
writeln!(f, "expect(x).toBe(1);").unwrap();
let mut args = HashMap::new();
args.insert(
"path".to_string(),
Value::String(dir.path().to_string_lossy().to_string()),
);
args.insert("include_todos".to_string(), Value::Bool(false));
args.insert("include_placeholders".to_string(), Value::Bool(false));
let result = execute_detect_scaffolding(&args).unwrap();
let text = result["content"][0]["text"].as_str().unwrap();
let parsed: Value = serde_json::from_str(text).unwrap();
let findings = parsed["findings"].as_array().unwrap();
let phased: Vec<&Value> = findings
.iter()
.filter(|f| f["category"] == "phased_comment")
.collect();
assert!(
phased.is_empty(),
"phased comments in test files should be skipped, got: {:?}",
phased
);
}
#[test]
fn integration_phased_comments_in_shell_script_skipped() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("deploy.sh");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "#!/bin/bash").unwrap();
writeln!(f, "# Step 1: build").unwrap();
writeln!(f, "make build").unwrap();
writeln!(f, "# Step 2: deploy").unwrap();
writeln!(f, "make deploy").unwrap();
let mut args = HashMap::new();
args.insert(
"path".to_string(),
Value::String(dir.path().to_string_lossy().to_string()),
);
args.insert("include_todos".to_string(), Value::Bool(false));
args.insert("include_placeholders".to_string(), Value::Bool(false));
let result = execute_detect_scaffolding(&args).unwrap();
let text = result["content"][0]["text"].as_str().unwrap();
let parsed: Value = serde_json::from_str(text).unwrap();
let findings = parsed["findings"].as_array().unwrap();
let phased: Vec<&Value> = findings
.iter()
.filter(|f| f["category"] == "phased_comment")
.collect();
assert!(
phased.is_empty(),
"phased comments in shell scripts should be skipped, got: {:?}",
phased
);
}
#[test]
fn test_scaffolding_findings_have_confidence() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("main.rs");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "fn phase_1_init() {{}}").unwrap();
writeln!(f, "// Phase 2: processing").unwrap();
writeln!(f, "// TODO: fix later").unwrap();
let mut args = HashMap::new();
args.insert(
"path".to_string(),
Value::String(dir.path().to_string_lossy().to_string()),
);
args.insert("include_todos".to_string(), Value::Bool(true));
args.insert("include_placeholders".to_string(), Value::Bool(true));
args.insert("include_phased_comments".to_string(), Value::Bool(true));
let result = execute_detect_scaffolding(&args).unwrap();
let text = result["content"][0]["text"].as_str().unwrap();
let parsed: Value = serde_json::from_str(text).unwrap();
let findings = parsed["findings"].as_array().unwrap();
for finding in findings {
assert!(
finding.get("confidence").is_some(),
"Finding should have confidence field: {:?}",
finding
);
let confidence = finding["confidence"].as_str().unwrap();
assert!(
matches!(confidence, "high" | "medium" | "low"),
"Confidence should be high/medium/low, got: {}",
confidence
);
}
let scaffolding_names: Vec<&Value> = findings
.iter()
.filter(|f| f["category"] == "scaffolding_name")
.collect();
for f in &scaffolding_names {
assert_eq!(f["confidence"], "high");
}
let phased: Vec<&Value> = findings
.iter()
.filter(|f| f["category"] == "phased_comment")
.collect();
for f in &phased {
assert_eq!(f["confidence"], "medium");
}
let todos: Vec<&Value> = findings
.iter()
.filter(|f| f["category"] == "todo")
.collect();
for f in &todos {
assert_eq!(f["confidence"], "low");
}
}
#[test]
fn test_todos_disabled_by_default() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("main.rs");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "fn main() {{}}").unwrap();
writeln!(f, "// TODO: fix later").unwrap();
let mut args = HashMap::new();
args.insert(
"path".to_string(),
Value::String(dir.path().to_string_lossy().to_string()),
);
args.insert("include_placeholders".to_string(), Value::Bool(false));
args.insert("include_phased_comments".to_string(), Value::Bool(false));
let result = execute_detect_scaffolding(&args).unwrap();
let text = result["content"][0]["text"].as_str().unwrap();
let parsed: Value = serde_json::from_str(text).unwrap();
let findings = parsed["findings"].as_array().unwrap();
let todos: Vec<&Value> = findings
.iter()
.filter(|f| f["category"] == "todo")
.collect();
assert!(
todos.is_empty(),
"TODO markers should NOT appear when include_todos defaults to false, got: {:?}",
todos
);
}
#[test]
fn integration_phased_comments_disabled() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("main.rs");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "// Phase 1: init").unwrap();
writeln!(f, "fn main() {{}}").unwrap();
let mut args = HashMap::new();
args.insert(
"path".to_string(),
Value::String(dir.path().to_string_lossy().to_string()),
);
args.insert("include_todos".to_string(), Value::Bool(false));
args.insert("include_placeholders".to_string(), Value::Bool(false));
args.insert("include_phased_comments".to_string(), Value::Bool(false));
let result = execute_detect_scaffolding(&args).unwrap();
let text = result["content"][0]["text"].as_str().unwrap();
let parsed: Value = serde_json::from_str(text).unwrap();
let findings = parsed["findings"].as_array().unwrap();
let phased: Vec<&Value> = findings
.iter()
.filter(|f| f["category"] == "phased_comment")
.collect();
assert!(phased.is_empty(), "phased comments should be suppressed");
}
fn run_scaffolding(dir: &TempDir) -> Value {
let mut args = HashMap::new();
args.insert(
"path".to_string(),
Value::String(dir.path().to_string_lossy().to_string()),
);
args.insert("include_todos".to_string(), Value::Bool(false));
args.insert("include_placeholders".to_string(), Value::Bool(true));
args.insert("include_phased_comments".to_string(), Value::Bool(true));
args.insert("include_temp_files".to_string(), Value::Bool(true));
let result = execute_detect_scaffolding(&args).unwrap();
let text = result["content"][0]["text"].as_str().unwrap();
serde_json::from_str(text).unwrap()
}
fn findings_by_category<'a>(findings: &'a [Value], category: &str) -> Vec<&'a Value> {
findings
.iter()
.filter(|f| f["category"] == category)
.collect()
}
#[test]
fn test_delivery_file_detection() {
let dir = TempDir::new().unwrap();
fs::write(dir.path().join("DELIVERY.md"), "# Delivery notes").unwrap();
fs::write(dir.path().join("IMPLEMENTATION_PLAN.md"), "# Plan").unwrap();
fs::write(dir.path().join("README.md"), "# Project").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let delivery: Vec<&Value> = findings
.iter()
.filter(|f| {
f.get("category")
.and_then(|c| c.as_str())
.map(|c| c.contains("delivery"))
.unwrap_or(false)
})
.collect();
assert!(
delivery.len() >= 2,
"Should detect DELIVERY.md and IMPLEMENTATION_PLAN.md, got: {:?}",
delivery
);
for d in &delivery {
assert_eq!(d["confidence"], "high");
}
}
#[test]
fn test_expanded_placeholder_patterns() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("stub.ts");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "function doSomething() {{").unwrap();
writeln!(f, " // Implementation goes here").unwrap();
writeln!(f, "}}").unwrap();
writeln!(f, "function doOther() {{").unwrap();
writeln!(f, " // Add logic here").unwrap();
writeln!(f, "}}").unwrap();
writeln!(f, "function doThird() {{").unwrap();
writeln!(f, " // TODO: implement").unwrap();
writeln!(f, "}}").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let placeholders = findings_by_category(findings, "placeholder");
assert!(
placeholders.len() >= 3,
"Should detect all 3 comment-based placeholders, got {} findings: {:?}",
placeholders.len(),
placeholders
);
}
#[test]
fn test_framework_default_detection() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("app.ts");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "// Welcome to your CDK TypeScript app").unwrap();
writeln!(f, "const app = new App();").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let defaults = findings_by_category(findings, "framework_default");
assert_eq!(
defaults.len(),
1,
"Should detect framework default comment: {:?}",
defaults
);
assert_eq!(defaults[0]["confidence"], "medium");
}
#[test]
fn test_domain_keyword_fp_reduction() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("auth.rs");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "// Phase 3: Build authentication pipeline").unwrap();
writeln!(f, "fn build_auth() {{}}").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let phased = findings_by_category(findings, "phased_comment");
assert!(
phased.is_empty(),
"Phase N with domain keyword 'authentication' should NOT be flagged, got: {:?}",
phased
);
}
#[test]
fn test_verbose_doc_single_not_flagged() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("service.ts");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "// This function handles user authentication").unwrap();
writeln!(f, "function authenticate() {{}}").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let verbose = findings_by_category(findings, "verbose_doc");
assert!(
verbose.is_empty(),
"Single verbose doc comment should NOT trigger finding, got: {:?}",
verbose
);
}
#[test]
fn test_verbose_doc_threshold_flagged() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("service.ts");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "// This function handles user authentication").unwrap();
writeln!(f, "function authenticate() {{}}").unwrap();
writeln!(f, "// This method performs data validation").unwrap();
writeln!(f, "function validate() {{}}").unwrap();
writeln!(f, "// This function creates a new session").unwrap();
writeln!(f, "function createSession() {{}}").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let verbose = findings_by_category(findings, "verbose_doc");
assert_eq!(
verbose.len(),
1,
"3+ verbose doc comments should produce exactly 1 finding per file: {:?}",
verbose
);
assert_eq!(verbose[0]["confidence"], "low");
}
#[test]
fn test_identical_error_strings() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("handler.ts");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "function a() {{ throw new Error(\"Invalid input\"); }}").unwrap();
writeln!(f, "function b() {{ throw new Error(\"Invalid input\"); }}").unwrap();
writeln!(f, "function c() {{ throw new Error(\"Invalid input\"); }}").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let errors = findings_by_category(findings, "identical_error");
assert_eq!(
errors.len(),
1,
"3 identical error strings should produce 1 finding: {:?}",
errors
);
assert_eq!(errors[0]["confidence"], "medium");
}
#[test]
fn test_identical_error_unique_ok() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("handler.ts");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "function a() {{ throw new Error(\"Name required\"); }}").unwrap();
writeln!(f, "function b() {{ throw new Error(\"Email invalid\"); }}").unwrap();
writeln!(
f,
"function c() {{ throw new Error(\"Age out of range\"); }}"
)
.unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let errors = findings_by_category(findings, "identical_error");
assert!(
errors.is_empty(),
"3 different error messages should NOT trigger finding, got: {:?}",
errors
);
}
#[test]
fn test_ai_vocabulary_density_high() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("service.ts");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(
f,
"// This comprehensive module provides robust functionality"
)
.unwrap();
writeln!(
f,
"// It leverages elegant patterns to streamline operations"
)
.unwrap();
writeln!(f, "// We utilize this to facilitate orchestration").unwrap();
writeln!(f, "// The paradigm encapsulates the comprehensive approach").unwrap();
writeln!(f, "function doWork() {{}}").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let ai_vocab = findings_by_category(findings, "ai_vocabulary");
assert_eq!(
ai_vocab.len(),
1,
"High AI vocabulary density should produce finding: {:?}",
ai_vocab
);
assert_eq!(ai_vocab[0]["confidence"], "low");
}
#[test]
fn test_ai_vocabulary_density_low() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("service.ts");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "// This robust module handles authentication").unwrap();
writeln!(f, "// It provides comprehensive user management").unwrap();
writeln!(f, "// Normal technical comment about caching").unwrap();
writeln!(f, "// Another normal comment about database queries").unwrap();
writeln!(f, "function doWork() {{}}").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let ai_vocab = findings_by_category(findings, "ai_vocabulary");
assert!(
ai_vocab.is_empty(),
"Low AI vocabulary density should NOT trigger finding, got: {:?}",
ai_vocab
);
}
#[test]
fn test_comment_clone_detection() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("service.ts");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "// Initialize the database connection pool").unwrap();
writeln!(f, "// Set up retry logic and timeout handling").unwrap();
writeln!(f, "// Configure logging for all queries").unwrap();
writeln!(f, "// Return the configured connection object").unwrap();
writeln!(f, "function initDb() {{}}").unwrap();
writeln!(f).unwrap();
writeln!(f, "// Initialize the database connection pool").unwrap();
writeln!(f, "// Set up retry logic and timeout handling").unwrap();
writeln!(f, "// Configure logging for all queries").unwrap();
writeln!(f, "// Return the configured connection object").unwrap();
writeln!(f, "function initDb2() {{}}").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let clones = findings_by_category(findings, "comment_clone");
assert_eq!(
clones.len(),
1,
"Two near-identical comment blocks should produce 1 finding: {:?}",
clones
);
assert_eq!(clones[0]["confidence"], "medium");
}
#[test]
fn test_comment_clone_different_ok() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("service.ts");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "// Initialize the database connection pool").unwrap();
writeln!(f, "// Set up retry logic and timeout handling").unwrap();
writeln!(f, "// Configure logging for all queries").unwrap();
writeln!(f, "function initDb() {{}}").unwrap();
writeln!(f).unwrap();
writeln!(f, "// Parse user authentication tokens").unwrap();
writeln!(f, "// Validate JWT signatures and expiry").unwrap();
writeln!(f, "// Extract role-based permissions from claims").unwrap();
writeln!(f, "function parseAuth() {{}}").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let clones = findings_by_category(findings, "comment_clone");
assert!(
clones.is_empty(),
"Two different comment blocks should NOT trigger finding, got: {:?}",
clones
);
}
#[test]
fn test_multi_signal_file() {
let dir = TempDir::new().unwrap();
fs::write(dir.path().join("DELIVERY.md"), "# Delivery").unwrap();
let file_path = dir.path().join("app.ts");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(
f,
"// This function handles comprehensive robust elegant work"
)
.unwrap();
writeln!(f, "// This method performs leverage and streamline tasks").unwrap();
writeln!(
f,
"// This class provides utilize facilitate orchestrate things"
)
.unwrap();
writeln!(f, "function main() {{}}").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let delivery: Vec<&Value> = findings
.iter()
.filter(|f| {
f.get("category")
.and_then(|c| c.as_str())
.map(|c| c.contains("delivery"))
.unwrap_or(false)
})
.collect();
let verbose = findings_by_category(findings, "verbose_doc");
let ai_vocab = findings_by_category(findings, "ai_vocabulary");
assert!(!delivery.is_empty(), "Should detect delivery file");
assert!(!verbose.is_empty(), "Should detect verbose doc comments");
assert!(!ai_vocab.is_empty(), "Should detect AI vocabulary density");
}
#[test]
fn test_phased_comment_with_domain_context() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("deploy.rs");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "// Phase 2: Build deployment pipeline").unwrap();
writeln!(f, "// Step 1: Create optimization pass").unwrap();
writeln!(f, "// Part 3: Implement serialization layer").unwrap();
writeln!(f, "fn main() {{}}").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let phased = findings_by_category(findings, "phased_comment");
assert!(
phased.is_empty(),
"Phased comments with domain context (deployment, optimization, serialization) should NOT be flagged, got: {:?}",
phased
);
}
#[test]
fn test_not_implemented_error_python() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("service.py");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "def process():").unwrap();
writeln!(f, " raise NotImplementedError").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let placeholders = findings_by_category(findings, "placeholder");
assert_eq!(
placeholders.len(),
1,
"raise NotImplementedError should be flagged as placeholder: {:?}",
placeholders
);
}
#[test]
fn test_framework_default_in_readme() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("index.tsx");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "// This project was bootstrapped with Create React App").unwrap();
writeln!(f, "import React from 'react';").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let defaults = findings_by_category(findings, "framework_default");
assert_eq!(
defaults.len(),
1,
"Framework bootstrapped message should be detected: {:?}",
defaults
);
}
#[test]
fn test_deliverables_sign_off_patterns() {
let dir = TempDir::new().unwrap();
fs::write(dir.path().join("DELIVERABLES.md"), "# Deliverables").unwrap();
fs::write(
dir.path().join("PHASE4_SIGN_OFF_EXECUTIVE_SUMMARY.txt"),
"Sign off",
)
.unwrap();
fs::write(dir.path().join("PHASE4_VALIDATION_REPORT.md"), "Report").unwrap();
fs::write(
dir.path().join("PHASE4_INTEGRATION_CHECKLIST.md"),
"Checklist",
)
.unwrap();
fs::write(dir.path().join("README.md"), "# Project").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let delivery: Vec<&Value> = findings
.iter()
.filter(|f| {
f.get("category")
.and_then(|c| c.as_str())
.map(|c| c.contains("delivery"))
.unwrap_or(false)
})
.collect();
assert!(
delivery.len() >= 4,
"Should detect DELIVERABLES, SIGN_OFF, VALIDATION_REPORT, CHECKLIST files, got {}: {:?}",
delivery.len(),
delivery
);
for d in &delivery {
assert_eq!(d["confidence"], "high");
}
}
#[test]
fn test_versioned_draft_already_detected() {
let dir = TempDir::new().unwrap();
fs::write(dir.path().join("product_gaps_v2.md"), "# Gaps").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let numbered: Vec<&Value> = findings
.iter()
.filter(|f| {
f.get("category")
.and_then(|c| c.as_str())
.map(|c| c.contains("numbered"))
.unwrap_or(false)
})
.collect();
assert!(
!numbered.is_empty(),
"product_gaps_v2.md should be flagged by existing numbered pattern: {:?}",
findings
);
}
#[test]
fn test_stub_comment_placeholder() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("api.ts");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "function getUser() {{").unwrap();
writeln!(f, " // stub").unwrap();
writeln!(f, "}}").unwrap();
writeln!(f, "function getOrder() {{").unwrap();
writeln!(f, " // placeholder for order logic").unwrap();
writeln!(f, "}}").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let placeholders = findings_by_category(findings, "placeholder");
assert!(
placeholders.len() >= 2,
"Should detect '// stub' and '// placeholder' as placeholders, got: {:?}",
placeholders
);
}
#[test]
fn test_not_implemented_err_return() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("service.rs");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "fn process() -> Result<(), String> {{").unwrap();
writeln!(f, r#" Err("Not implemented".to_string())"#).unwrap();
writeln!(f, "}}").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let placeholders = findings_by_category(findings, "placeholder");
assert!(
!placeholders.is_empty(),
"Err(\"Not implemented\") should be flagged as placeholder: {:?}",
placeholders
);
}
#[test]
fn test_not_yet_implemented_comment() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("handler.ts");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "function handle() {{").unwrap();
writeln!(f, " // not yet implemented").unwrap();
writeln!(f, "}}").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let placeholders = findings_by_category(findings, "placeholder");
assert!(
!placeholders.is_empty(),
"'// not yet implemented' should be flagged as placeholder: {:?}",
placeholders
);
}
#[test]
fn test_create_next_app_default() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("layout.tsx");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, r#"<title>Create Next App</title>"#).unwrap();
writeln!(f, "export default function Layout() {{}}").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let defaults = findings_by_category(findings, "framework_default");
assert!(
!defaults.is_empty(),
"'Create Next App' should be flagged as framework_default: {:?}",
defaults
);
}
#[test]
fn test_generated_by_create_app() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("index.tsx");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "// Generated by create-next-app").unwrap();
writeln!(f, "export default function Home() {{}}").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let defaults = findings_by_category(findings, "framework_default");
assert!(
!defaults.is_empty(),
"'Generated by create-next-app' should be flagged as framework_default: {:?}",
defaults
);
}
#[test]
fn test_implementation_phase_doc_comment() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("lib.rs");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "//! Implementation: Phase 2").unwrap();
writeln!(f, "pub fn run() {{}}").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let phased = findings_by_category(findings, "phased_comment");
let impl_phase: Vec<&&Value> = phased
.iter()
.filter(|f| f["pattern"] == "implementation_phase")
.collect();
assert!(
!impl_phase.is_empty(),
"'//! Implementation: Phase 2' should be flagged as phased_comment/implementation_phase: {:?}",
findings
);
assert_eq!(impl_phase[0]["confidence"], "low");
}
#[test]
fn test_step_progress_format() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("build.rs");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "fn main() {{").unwrap();
writeln!(f, " // Step 1/5: Building project...").unwrap();
writeln!(f, " build();").unwrap();
writeln!(f, " // Step 2/5: Running tests...").unwrap();
writeln!(f, " test();").unwrap();
writeln!(f, "}}").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let phased = findings_by_category(findings, "phased_comment");
let step_progress: Vec<&&Value> = phased
.iter()
.filter(|f| f["pattern"] == "step_progress")
.collect();
assert!(
step_progress.len() >= 2,
"'Step 1/5:' and 'Step 2/5:' should be flagged, got: {:?}",
step_progress
);
}
#[test]
fn test_step_progress_in_test_skipped() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("integration.test.ts");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "// Step 1/3: Setup test environment").unwrap();
writeln!(f, "const db = setupDb();").unwrap();
writeln!(f, "// Step 2/3: Run query").unwrap();
writeln!(f, "const result = query(db);").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let phased = findings_by_category(findings, "phased_comment");
let step_progress: Vec<&&Value> = phased
.iter()
.filter(|f| f["pattern"] == "step_progress")
.collect();
assert!(
step_progress.is_empty(),
"Step N/M in test files should NOT be flagged, got: {:?}",
step_progress
);
}
#[test]
fn test_cross_file_comment_clone() {
let dir = TempDir::new().unwrap();
let file_a = dir.path().join("service_a.ts");
let mut f = fs::File::create(&file_a).unwrap();
writeln!(f, "// Initialize the database connection pool").unwrap();
writeln!(f, "// Set up retry logic and timeout handling").unwrap();
writeln!(f, "// Configure logging for all database queries").unwrap();
writeln!(f, "function initDbA() {{}}").unwrap();
let file_b = dir.path().join("service_b.ts");
let mut f = fs::File::create(&file_b).unwrap();
writeln!(f, "// Initialize the database connection pool").unwrap();
writeln!(f, "// Set up retry logic and timeout handling").unwrap();
writeln!(f, "// Configure logging for all database queries").unwrap();
writeln!(f, "function initDbB() {{}}").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let clones: Vec<&Value> = findings
.iter()
.filter(|f| {
f["category"] == "comment_clone" && f["pattern"] == "cross_file_comment_clone"
})
.collect();
assert!(
!clones.is_empty(),
"Near-identical comment blocks across files should be detected: {:?}",
findings
);
assert_eq!(clones[0]["confidence"], "medium");
}
#[test]
fn test_cross_file_different_comments_ok() {
let dir = TempDir::new().unwrap();
let file_a = dir.path().join("auth.ts");
let mut f = fs::File::create(&file_a).unwrap();
writeln!(f, "// Parse user authentication tokens").unwrap();
writeln!(f, "// Validate JWT signatures and expiry dates").unwrap();
writeln!(f, "// Extract role-based permissions from claims").unwrap();
writeln!(f, "function parseAuth() {{}}").unwrap();
let file_b = dir.path().join("db.ts");
let mut f = fs::File::create(&file_b).unwrap();
writeln!(f, "// Initialize the database connection pool").unwrap();
writeln!(f, "// Set up retry logic and timeout handling").unwrap();
writeln!(f, "// Configure logging for all database queries").unwrap();
writeln!(f, "function initDb() {{}}").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let cross_clones: Vec<&Value> = findings
.iter()
.filter(|f| {
f["category"] == "comment_clone" && f["pattern"] == "cross_file_comment_clone"
})
.collect();
assert!(
cross_clones.is_empty(),
"Different comment blocks across files should NOT be flagged, got: {:?}",
cross_clones
);
}
#[test]
fn test_over_doc_flagged() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("service.rs");
let mut f = fs::File::create(&file_path).unwrap();
for i in 1..=10 {
writeln!(f, "/// Doc comment line {}", i).unwrap();
}
writeln!(f, "fn tiny() {{").unwrap();
writeln!(f, " let x = 1;").unwrap();
writeln!(f, " let y = 2;").unwrap();
writeln!(f, "}}").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let over_doc = findings_by_category(findings, "over_documented");
assert_eq!(
over_doc.len(),
1,
"10 doc lines / 2-line body should be flagged as over_documented: {:?}",
over_doc
);
assert_eq!(over_doc[0]["confidence"], "low");
}
#[test]
fn test_over_doc_ok() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("service.rs");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "/// First doc line").unwrap();
writeln!(f, "/// Second doc line").unwrap();
writeln!(f, "/// Third doc line").unwrap();
writeln!(f, "fn reasonable() {{").unwrap();
writeln!(f, " let a = 1;").unwrap();
writeln!(f, " let b = 2;").unwrap();
writeln!(f, " let c = 3;").unwrap();
writeln!(f, " let d = 4;").unwrap();
writeln!(f, " let e = 5;").unwrap();
writeln!(f, "}}").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let over_doc = findings_by_category(findings, "over_documented");
assert!(
over_doc.is_empty(),
"3 doc lines / 5-line body should NOT be flagged, got: {:?}",
over_doc
);
}
#[test]
fn test_over_doc_short_doc_ok() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("service.rs");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "/// Line 1").unwrap();
writeln!(f, "/// Line 2").unwrap();
writeln!(f, "/// Line 3").unwrap();
writeln!(f, "/// Line 4").unwrap();
writeln!(f, "fn getter() {{").unwrap();
writeln!(f, " return self.x;").unwrap();
writeln!(f, "}}").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let over_doc = findings_by_category(findings, "over_documented");
assert!(
over_doc.is_empty(),
"4 doc lines (below min 6) should NOT be flagged, got: {:?}",
over_doc
);
}
#[test]
fn test_over_doc_python() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("service.py");
let mut f = fs::File::create(&file_path).unwrap();
for i in 1..=8 {
writeln!(f, "# Doc comment line {}", i).unwrap();
}
writeln!(f, "def tiny():").unwrap();
writeln!(f, " x = 1").unwrap();
writeln!(f, " y = 2").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let over_doc = findings_by_category(findings, "over_documented");
assert_eq!(
over_doc.len(),
1,
"8 # lines / 2-line Python body should be flagged: {:?}",
over_doc
);
}
#[test]
fn test_ignored_param_documented() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("service.rs");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "/// Process the config to set up the service.").unwrap();
writeln!(f, "fn process(_config: Config) {{").unwrap();
writeln!(f, " do_stuff();").unwrap();
writeln!(f, "}}").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let ignored = findings_by_category(findings, "ignored_param_documented");
assert_eq!(
ignored.len(),
1,
"_config with doc mentioning 'config' should be flagged: {:?}",
ignored
);
assert_eq!(ignored[0]["confidence"], "medium");
}
#[test]
fn test_ignored_param_no_doc() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("service.rs");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "fn process(_config: Config) {{").unwrap();
writeln!(f, " do_stuff();").unwrap();
writeln!(f, "}}").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let ignored = findings_by_category(findings, "ignored_param_documented");
assert!(
ignored.is_empty(),
"_config without doc should NOT be flagged: {:?}",
ignored
);
}
#[test]
fn test_ignored_param_python() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("service.py");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "# Handle the incoming request.").unwrap();
writeln!(f, "def handle(_request):").unwrap();
writeln!(f, " pass").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let ignored = findings_by_category(findings, "ignored_param_documented");
assert_eq!(
ignored.len(),
1,
"_request with doc mentioning 'request' should be flagged: {:?}",
ignored
);
}
#[test]
fn test_underscore_alone_not_flagged() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("service.rs");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "/// Process the input value.").unwrap();
writeln!(f, "fn process(_: i32) {{").unwrap();
writeln!(f, " do_stuff();").unwrap();
writeln!(f, "}}").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let ignored = findings_by_category(findings, "ignored_param_documented");
assert!(
ignored.is_empty(),
"Bare _ param should NOT be flagged: {:?}",
ignored
);
}
#[test]
fn test_misleading_name_uuid() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("util.ts");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "function generate_uuid() {{").unwrap();
writeln!(f, " const ts = Date.now().toString(16);").unwrap();
writeln!(f, " return ts + '-' + ts;").unwrap();
writeln!(f, "}}").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let misleading = findings_by_category(findings, "misleading_name");
assert_eq!(
misleading.len(),
1,
"generate_uuid without uuid/crypto keywords should be flagged: {:?}",
misleading
);
assert_eq!(misleading[0]["confidence"], "low");
}
#[test]
fn test_misleading_name_ok() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("util.ts");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "function hash_sha256(data: string) {{").unwrap();
writeln!(f, " const hash = crypto.createHash('sha256');").unwrap();
writeln!(f, " hash.update(data);").unwrap();
writeln!(f, " return hash.digest('hex');").unwrap();
writeln!(f, "}}").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let misleading = findings_by_category(findings, "misleading_name");
assert!(
misleading.is_empty(),
"hash_sha256 with crypto keywords should NOT be flagged: {:?}",
misleading
);
}
#[test]
fn test_misleading_name_placeholder_skip() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("util.rs");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "fn compute_md5(data: &[u8]) -> String {{").unwrap();
writeln!(f, " todo!()").unwrap();
writeln!(f, "}}").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let misleading = findings_by_category(findings, "misleading_name");
assert!(
misleading.is_empty(),
"compute_md5 with todo!() should NOT be flagged as misleading (caught by placeholder): {:?}",
misleading
);
}
#[test]
fn test_misleading_name_base64() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("util.ts");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "function to_base64(input: string) {{").unwrap();
writeln!(f, " return 'aGVsbG8=';").unwrap();
writeln!(f, "}}").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let misleading = findings_by_category(findings, "misleading_name");
assert_eq!(
misleading.len(),
1,
"to_base64 returning string literal should be flagged: {:?}",
misleading
);
}
#[test]
fn test_misleading_name_rust_uuid_with_import() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("ids.rs");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "use uuid::Uuid;").unwrap();
writeln!(f).unwrap();
writeln!(f, "fn make_uuid() -> String {{").unwrap();
writeln!(f, " let id = generate_id();").unwrap();
writeln!(f, " format!(\"prefix-{{}}\", id)").unwrap();
writeln!(f, "}}").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let misleading = findings_by_category(findings, "misleading_name");
assert!(
misleading.is_empty(),
"make_uuid with 'use uuid' import should NOT be flagged: {:?}",
misleading
);
}
#[test]
fn test_misleading_name_python_hashlib_import() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("hashing.py");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "import hashlib").unwrap();
writeln!(f).unwrap();
writeln!(f, "def compute_sha256(data):").unwrap();
writeln!(f, " return do_hash(data)").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let misleading = findings_by_category(findings, "misleading_name");
assert!(
misleading.is_empty(),
"compute_sha256 with 'import hashlib' should NOT be flagged: {:?}",
misleading
);
}
#[test]
fn test_misleading_name_rust_specific_keywords() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("hash.rs");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "fn compute_sha256(data: &[u8]) -> Vec<u8> {{").unwrap();
writeln!(f, " let result = Sha256::digest(data);").unwrap();
writeln!(f, " result.to_vec()").unwrap();
writeln!(f, "}}").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let misleading = findings_by_category(findings, "misleading_name");
assert!(
misleading.is_empty(),
"compute_sha256 with Sha256::digest in body should NOT be flagged: {:?}",
misleading
);
}
#[test]
fn test_misleading_name_python_no_import() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("ids.py");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "def generate_uuid():").unwrap();
writeln!(f, " import time").unwrap();
writeln!(f, " return hex(int(time.time()))").unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let misleading = findings_by_category(findings, "misleading_name");
assert_eq!(
misleading.len(),
1,
"generate_uuid without uuid import should be flagged: {:?}",
misleading
);
assert_eq!(misleading[0]["confidence"], "low");
}
#[test]
fn test_is_emoji() {
assert!(is_emoji('😀'));
assert!(is_emoji('😃'));
assert!(is_emoji('😊'));
assert!(is_emoji('🤔'));
assert!(is_emoji('🚀'));
assert!(is_emoji('✨'));
assert!(is_emoji('🔥'));
assert!(is_emoji('💡'));
assert!(is_emoji('🎉'));
assert!(!is_emoji('a'));
assert!(!is_emoji('A'));
assert!(!is_emoji('1'));
assert!(!is_emoji('!'));
assert!(!is_emoji(' '));
assert!(!is_emoji('ñ'));
assert!(!is_emoji('中'));
}
#[test]
fn test_find_emojis_in_line() {
let line = "// This is a comment 🚀 with emoji";
let emojis = find_emojis_in_line(line);
assert_eq!(emojis.len(), 1);
assert_eq!(emojis[0].0, '🚀');
let line_multiple = "// TODO: Fix 🔥 this 💡 ASAP 🎉";
let emojis = find_emojis_in_line(line_multiple);
assert_eq!(emojis.len(), 3);
assert_eq!(emojis[0].0, '🔥');
assert_eq!(emojis[1].0, '💡');
assert_eq!(emojis[2].0, '🎉');
let line_no_emoji = "// Regular comment without emoji";
let emojis = find_emojis_in_line(line_no_emoji);
assert!(emojis.is_empty());
}
#[test]
fn integration_emoji_detection_in_comments() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("main.rs");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "fn main() {{").unwrap();
writeln!(f, " // This is awesome! 🚀").unwrap();
writeln!(f, " let x = 1;").unwrap();
writeln!(f, " // TODO: Fix this bug 🐛").unwrap();
writeln!(f, "}}").unwrap();
let mut args = HashMap::new();
args.insert(
"path".to_string(),
Value::String(dir.path().to_string_lossy().to_string()),
);
args.insert("include_emojis".to_string(), Value::Bool(true));
args.insert("include_todos".to_string(), Value::Bool(false));
args.insert("include_placeholders".to_string(), Value::Bool(false));
args.insert("include_phased_comments".to_string(), Value::Bool(false));
args.insert("include_temp_files".to_string(), Value::Bool(false));
let result = execute_detect_scaffolding(&args).unwrap();
let text = result["content"][0]["text"].as_str().unwrap();
let parsed: Value = serde_json::from_str(text).unwrap();
let findings = parsed["findings"].as_array().unwrap();
let emojis: Vec<&Value> = findings
.iter()
.filter(|f| f["category"] == "emoji")
.collect();
assert_eq!(emojis.len(), 2, "Should find 2 emojis");
assert_eq!(emojis[0]["line"], 2);
assert_eq!(emojis[0]["match_text"], "🚀");
assert_eq!(emojis[0]["confidence"], "medium");
assert_eq!(emojis[1]["line"], 4);
assert_eq!(emojis[1]["match_text"], "🐛");
}
#[test]
fn integration_emoji_detection_disabled_by_default() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("main.rs");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "// Comment with emoji 🚀").unwrap();
writeln!(f, "fn main() {{}}").unwrap();
let mut args = HashMap::new();
args.insert(
"path".to_string(),
Value::String(dir.path().to_string_lossy().to_string()),
);
args.insert("include_todos".to_string(), Value::Bool(false));
args.insert("include_placeholders".to_string(), Value::Bool(false));
args.insert("include_phased_comments".to_string(), Value::Bool(false));
args.insert("include_temp_files".to_string(), Value::Bool(false));
let result = execute_detect_scaffolding(&args).unwrap();
let text = result["content"][0]["text"].as_str().unwrap();
let parsed: Value = serde_json::from_str(text).unwrap();
let findings = parsed["findings"].as_array().unwrap();
let emojis: Vec<&Value> = findings
.iter()
.filter(|f| f["category"] == "emoji")
.collect();
assert!(
emojis.is_empty(),
"Emoji detection should be disabled by default"
);
}
#[test]
fn integration_emoji_multiple_on_same_line() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("app.ts");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "// 🚀 Launch feature 🎉 Celebrate 🔥 Hot").unwrap();
let mut args = HashMap::new();
args.insert(
"path".to_string(),
Value::String(dir.path().to_string_lossy().to_string()),
);
args.insert("include_emojis".to_string(), Value::Bool(true));
args.insert("include_todos".to_string(), Value::Bool(false));
args.insert("include_placeholders".to_string(), Value::Bool(false));
args.insert("include_phased_comments".to_string(), Value::Bool(false));
args.insert("include_temp_files".to_string(), Value::Bool(false));
let result = execute_detect_scaffolding(&args).unwrap();
let text = result["content"][0]["text"].as_str().unwrap();
let parsed: Value = serde_json::from_str(text).unwrap();
let findings = parsed["findings"].as_array().unwrap();
let emojis: Vec<&Value> = findings
.iter()
.filter(|f| f["category"] == "emoji")
.collect();
assert_eq!(emojis.len(), 3, "Should detect all 3 emojis on same line");
assert_eq!(emojis[0]["match_text"], "🚀");
assert_eq!(emojis[1]["match_text"], "🎉");
assert_eq!(emojis[2]["match_text"], "🔥");
assert_eq!(emojis[0]["line"], 1);
assert_eq!(emojis[1]["line"], 1);
assert_eq!(emojis[2]["line"], 1);
}
fn run_secrets(dir: &TempDir) -> Value {
let mut args = HashMap::new();
args.insert(
"path".to_string(),
Value::String(dir.path().to_string_lossy().to_string()),
);
args.insert("include_secrets".to_string(), Value::Bool(true));
args.insert("include_todos".to_string(), Value::Bool(false));
args.insert("include_placeholders".to_string(), Value::Bool(false));
args.insert("include_phased_comments".to_string(), Value::Bool(false));
args.insert("include_temp_files".to_string(), Value::Bool(false));
let result = execute_detect_scaffolding(&args).unwrap();
let text = result["content"][0]["text"].as_str().unwrap();
serde_json::from_str(text).unwrap()
}
fn secrets_by_pattern<'a>(findings: &'a [Value], pattern: &str) -> Vec<&'a Value> {
findings
.iter()
.filter(|f| f["category"] == "hardcoded_secret" && f["pattern"] == pattern)
.collect()
}
#[test]
fn test_redact_secret_short() {
let r = redact_secret("abc");
assert_eq!(r, "abc***");
}
#[test]
fn test_redact_secret_long() {
let r = redact_secret("sk-abcdefghijklmn");
assert_eq!(r, "sk-abcde***");
}
#[test]
fn test_is_placeholder_secret_known() {
assert!(is_placeholder_secret("your_api_key_here"));
assert!(is_placeholder_secret("replaceme"));
assert!(is_placeholder_secret("changeme"));
assert!(is_placeholder_secret("xxxxxxxxxxxx"));
assert!(is_placeholder_secret("placeholder"));
assert!(is_placeholder_secret("password123"));
assert!(is_placeholder_secret("<api_key>"));
assert!(is_placeholder_secret("${API_KEY}"));
assert!(is_placeholder_secret("your_token"));
}
#[test]
fn test_is_placeholder_secret_real() {
assert!(!is_placeholder_secret(
"sk-FakeKeyAbcXYZwxyzABCDEFGHIJKLMNOP"
));
assert!(!is_placeholder_secret("AKIAIOSFODNN7FAKETST1"));
assert!(!is_placeholder_secret(
"ghp_16C7e42F292c6912E7710c838347Ae178B4a"
));
}
#[test]
fn test_is_env_var_line() {
assert!(is_env_var_line(r#"let key = process.env.API_KEY;"#));
assert!(is_env_var_line(r#"key = os.environ.get("API_KEY")"#));
assert!(is_env_var_line(
r#"let key = std::env::var("KEY").unwrap();"#
));
assert!(is_env_var_line(r#"password = getenv("PASSWORD")"#));
assert!(!is_env_var_line(
r#"let key = "sk-hardcodedkey12345678901234";"#
));
}
#[test]
fn integration_secrets_disabled_by_default() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("config.py");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(
f,
r#"API_KEY = "sk-abcdefghijklmnopqrstuvwxyz1234567890abcd""#
)
.unwrap();
let parsed = run_scaffolding(&dir);
let findings = parsed["findings"].as_array().unwrap();
let secrets: Vec<&Value> = findings
.iter()
.filter(|f| f["category"] == "hardcoded_secret")
.collect();
assert!(
secrets.is_empty(),
"Secrets detection should be opt-in (disabled by default)"
);
}
#[test]
fn integration_openai_key_detected() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("client.py");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(
f,
r#"openai.api_key = "sk-FakeKeyAbcXYZwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ""#
)
.unwrap();
let parsed = run_secrets(&dir);
let findings = parsed["findings"].as_array().unwrap();
let hits = secrets_by_pattern(findings, "openai_api_key");
assert_eq!(hits.len(), 1, "Should detect OpenAI API key");
assert_eq!(hits[0]["confidence"], "high");
let match_text = hits[0]["match_text"].as_str().unwrap();
assert!(
match_text.ends_with("***"),
"match_text should be redacted: {match_text}"
);
}
#[test]
fn integration_aws_key_detected() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("aws.js");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, r#"const accessKeyId = "AKIAIOSFODNN7FAKETST1";"#).unwrap();
let parsed = run_secrets(&dir);
let findings = parsed["findings"].as_array().unwrap();
let hits = secrets_by_pattern(findings, "aws_access_key");
assert_eq!(hits.len(), 1, "Should detect AWS access key");
assert_eq!(hits[0]["confidence"], "high");
}
#[test]
fn integration_github_token_detected() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("deploy.sh");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, "GH_TOKEN=ghp_16C7e42F292c6912E7710c838347Ae178B4a").unwrap();
let parsed = run_secrets(&dir);
let findings = parsed["findings"].as_array().unwrap();
let hits = secrets_by_pattern(findings, "github_token");
assert_eq!(hits.len(), 1, "Should detect GitHub token");
assert_eq!(hits[0]["confidence"], "high");
}
#[test]
fn integration_private_key_header_detected() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("keys.rs");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, r#"let pem = "-----BEGIN RSA PRIVATE KEY-----";"#).unwrap();
let parsed = run_secrets(&dir);
let findings = parsed["findings"].as_array().unwrap();
let hits = secrets_by_pattern(findings, "private_key");
assert_eq!(hits.len(), 1, "Should detect private key header");
assert_eq!(hits[0]["confidence"], "high");
}
#[test]
fn integration_password_assignment_detected() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("db.py");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, r#"password = "s3cur3P@ssw0rd!""#).unwrap();
let parsed = run_secrets(&dir);
let findings = parsed["findings"].as_array().unwrap();
let hits = secrets_by_pattern(findings, "password");
assert_eq!(hits.len(), 1, "Should detect hardcoded password");
assert_eq!(hits[0]["confidence"], "medium");
}
#[test]
fn integration_stripe_live_key_detected() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("payments.ts");
let mut f = fs::File::create(&file_path).unwrap();
let key = ["sk", "_live_", "abcdefghijklmnopqrstuvwx"].concat();
writeln!(f, r#"const stripe = Stripe("{key}");"#).unwrap();
let parsed = run_secrets(&dir);
let findings = parsed["findings"].as_array().unwrap();
let hits = secrets_by_pattern(findings, "stripe_secret_key");
assert_eq!(hits.len(), 1, "Should detect Stripe live key");
assert_eq!(hits[0]["confidence"], "high");
}
#[test]
fn integration_db_connection_string_detected() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("app.go");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(
f,
r#"db, _ := sql.Open("postgres", "postgres://admin:s3cr3t@localhost:5432/mydb")"#
)
.unwrap();
let parsed = run_secrets(&dir);
let findings = parsed["findings"].as_array().unwrap();
let hits = secrets_by_pattern(findings, "db_connection_string");
assert_eq!(
hits.len(),
1,
"Should detect DB connection string with credentials"
);
assert_eq!(hits[0]["confidence"], "high");
}
#[test]
fn integration_env_var_not_flagged() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("config.js");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, r#"const apiKey = process.env.OPENAI_API_KEY;"#).unwrap();
writeln!(f, r#"const secret = process.env.CLIENT_SECRET;"#).unwrap();
let parsed = run_secrets(&dir);
let findings = parsed["findings"].as_array().unwrap();
let secrets: Vec<&Value> = findings
.iter()
.filter(|f| f["category"] == "hardcoded_secret")
.collect();
assert!(
secrets.is_empty(),
"Env-var references should not be flagged: {secrets:?}"
);
}
#[test]
fn integration_placeholder_not_flagged() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("example.py");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(f, r#"api_key = "your_api_key_here""#).unwrap();
writeln!(f, r#"password = "changeme""#).unwrap();
writeln!(f, r#"secret = "replaceme""#).unwrap();
let parsed = run_secrets(&dir);
let findings = parsed["findings"].as_array().unwrap();
let secrets: Vec<&Value> = findings
.iter()
.filter(|f| f["category"] == "hardcoded_secret")
.collect();
assert!(
secrets.is_empty(),
"Placeholder secrets should not be flagged: {secrets:?}"
);
}
#[test]
fn integration_slack_webhook_detected() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("notify.py");
let mut f = fs::File::create(&file_path).unwrap();
let url = [
"https://hooks.slack",
".com/services/T0AAAAAA1/B0BBBBBBB/AbCdEfGhIjKlMnOpQrStUvWx",
]
.concat();
writeln!(f, r#"WEBHOOK = "{url}""#).unwrap();
let parsed = run_secrets(&dir);
let findings = parsed["findings"].as_array().unwrap();
let hits = secrets_by_pattern(findings, "slack_webhook");
assert_eq!(hits.len(), 1, "Should detect Slack webhook URL");
assert_eq!(hits[0]["confidence"], "high");
}
#[test]
fn integration_secret_match_text_is_redacted() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("auth.rs");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(
f,
r#"const API_KEY: &str = "sk-FakeKeyAbcXYZwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";"#
)
.unwrap();
let parsed = run_secrets(&dir);
let findings = parsed["findings"].as_array().unwrap();
let hits: Vec<&Value> = findings
.iter()
.filter(|f| f["category"] == "hardcoded_secret")
.collect();
assert_eq!(hits.len(), 1);
let match_text = hits[0]["match_text"].as_str().unwrap();
assert!(
match_text.ends_with("***"),
"match_text should end with *** (redacted): {match_text}"
);
assert!(
match_text.len() < 30,
"match_text should be short (redacted): {match_text}"
);
}
#[test]
fn integration_google_api_key_detected() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("maps.js");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(
f,
r#"const MAPS_KEY = "AIzaSyD3aKfGH9XbTpQr2VwZyN1J4eL8moCuW7s";"#
)
.unwrap();
let parsed = run_secrets(&dir);
let findings = parsed["findings"].as_array().unwrap();
let hits = secrets_by_pattern(findings, "google_api_key");
assert_eq!(hits.len(), 1, "Should detect Google API key");
assert_eq!(hits[0]["confidence"], "high");
}
#[test]
fn integration_jwt_token_detected() {
let dir = TempDir::new().unwrap();
let file_path = dir.path().join("test_auth.py");
let mut f = fs::File::create(&file_path).unwrap();
writeln!(
f,
r#"TOKEN = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c""#
)
.unwrap();
let parsed = run_secrets(&dir);
let findings = parsed["findings"].as_array().unwrap();
let hits = secrets_by_pattern(findings, "jwt_token");
assert_eq!(hits.len(), 1, "Should detect hardcoded JWT token");
assert_eq!(hits[0]["confidence"], "medium");
}
}