use regex::Regex;
use std::path::Path;
use crate::parsers::markdown::MAX_REGEX_INPUT_SIZE;
use crate::regex_util::static_regex;
static_regex!(fn claude_hooks_pattern, r"(?im)^\s*-?\s*(?:type|event):\s*(?:PreToolExecution|PostToolExecution|Notification|Stop|SubagentStop)\b");
static_regex!(fn context_fork_pattern, r"(?im)^\s*context:\s*fork\b");
static_regex!(fn agent_field_pattern, r"(?im)^\s*agent:\s*\S+");
static_regex!(fn allowed_tools_pattern, r"(?im)^\s*allowed-tools:\s*.+");
static_regex!(fn at_import_pattern, r"(?m)(?:^|\s)@[\w./*-]+\.\w+");
static_regex!(fn claude_section_guard_pattern, r"(?im)^(?:#+\s*|<!--\s*)claude(?:\s+code)?(?:\s+specific|\s+only)?(?:\s*-->)?");
static_regex!(fn markdown_header_pattern, r"^#+\s+.+");
static_regex!(fn build_command_pattern, r"(?m)(?:^|\s|`)((?:npm|pnpm|yarn|bun)\s+(?:install|i|add|build|test|run|exec|ci)\b[^\n`]*)");
static_regex!(fn tool_allow_pattern, r"(?im)(?:allowed[-_]?tools\s*:|tools\s*:\s*\[|\ballways?\s+allow\s+(\w+)\b|\bcan\s+use\s+(\w+)\b|\bmay\s+use\s+(\w+)\b)");
static_regex!(fn tool_disallow_pattern, r"(?im)(?:disallowed[-_]?tools\s*:|\bnever\s+use\s+(\w+)\b|\bdon'?t\s+use\s+(\w+)\b|\bdo\s+not\s+use\s+(\w+)\b|\bforbidden\s*:\s*(\w+)\b|\bprohibited\s*:\s*(\w+)\b|\bno\s+(\w+)\s+tool\b)");
static_regex!(fn layer_precedence_pattern, r"(?im)(?:precedence|priority|override|hierarchy|takes?\s+precedence|supersede|primary\s+source|authoritative)");
#[derive(Debug, Clone)]
pub struct ClaudeSpecificFeature {
pub line: usize,
pub column: usize,
pub feature: String,
pub description: String,
}
pub fn find_claude_specific_features(content: &str) -> Vec<ClaudeSpecificFeature> {
if content.len() > MAX_REGEX_INPUT_SIZE {
return Vec::new();
}
let mut results = Vec::new();
let guard_pattern = claude_section_guard_pattern();
let mut in_claude_section = false;
let mut claude_section_level = 0;
for (line_num, line) in content.lines().enumerate() {
let is_claude_guard = guard_pattern.is_match(line);
if is_claude_guard {
in_claude_section = true;
let trimmed = line.trim_start();
if trimmed.starts_with('#') {
claude_section_level = trimmed.chars().take_while(|c| *c == '#').count();
} else if trimmed.starts_with("<!--") {
claude_section_level = 2; }
continue;
}
if in_claude_section
&& (line.trim_start().starts_with('#') || line.trim_start().starts_with("<!--"))
{
let trimmed = line.trim_start();
let current_level = if trimmed.starts_with('#') {
trimmed.chars().take_while(|c| *c == '#').count()
} else if trimmed.starts_with("<!--") {
2 } else {
usize::MAX
};
if current_level <= claude_section_level {
in_claude_section = false;
}
}
if in_claude_section {
continue;
}
if let Some(mat) = claude_hooks_pattern().find(line) {
results.push(ClaudeSpecificFeature {
line: line_num + 1,
column: mat.start() + 1,
feature: "hooks".to_string(),
description: "Claude Code hooks are not supported by other AGENTS.md readers"
.to_string(),
});
}
if let Some(mat) = context_fork_pattern().find(line) {
results.push(ClaudeSpecificFeature {
line: line_num + 1,
column: mat.start() + 1,
feature: "context:fork".to_string(),
description: "Context forking is Claude Code specific".to_string(),
});
}
if let Some(mat) = agent_field_pattern().find(line) {
results.push(ClaudeSpecificFeature {
line: line_num + 1,
column: mat.start() + 1,
feature: "agent".to_string(),
description: "Agent field is Claude Code specific".to_string(),
});
}
if let Some(mat) = allowed_tools_pattern().find(line) {
results.push(ClaudeSpecificFeature {
line: line_num + 1,
column: mat.start() + 1,
feature: "allowed-tools".to_string(),
description: "Tool restrictions are Claude Code specific".to_string(),
});
}
if let Some(mat) = at_import_pattern().find(line) {
let matched = mat.as_str().trim_start();
if matched.starts_with('@') && matched.matches('@').count() == 1 {
results.push(ClaudeSpecificFeature {
line: line_num + 1,
column: mat.start() + 1,
feature: "@import".to_string(),
description: "The @file import syntax is Claude Code specific".to_string(),
});
}
}
}
results
}
#[derive(Debug, Clone)]
pub struct MarkdownStructureIssue {
pub line: usize,
pub column: usize,
pub issue: String,
pub suggestion: String,
}
pub fn check_markdown_structure(content: &str) -> Vec<MarkdownStructureIssue> {
let mut results = Vec::new();
let pattern = markdown_header_pattern();
let mut fence_check = false;
let has_headers = content.lines().any(|line| {
if line.trim_start().starts_with("```") {
fence_check = !fence_check;
return false;
}
!fence_check && pattern.is_match(line)
});
if !has_headers && !content.trim().is_empty() {
results.push(MarkdownStructureIssue {
line: 1,
column: 0,
issue: "No markdown headers found".to_string(),
suggestion: "Add headers (# Section) to structure the document for better readability"
.to_string(),
});
}
let mut last_level = 0;
let mut in_code_block = false;
for (line_num, line) in content.lines().enumerate() {
let trimmed = line.trim_start();
if trimmed.starts_with("```") {
in_code_block = !in_code_block;
continue;
}
if in_code_block {
continue;
}
if pattern.is_match(line) {
let current_level = line.chars().take_while(|&c| c == '#').count();
if last_level > 0 && current_level > last_level + 1 {
results.push(MarkdownStructureIssue {
line: line_num + 1,
column: 0,
issue: format!(
"Header level skipped from {} to {}",
last_level, current_level
),
suggestion: format!(
"Use h{} instead of h{} for proper hierarchy",
last_level + 1,
current_level
),
});
}
last_level = current_level;
}
}
results
}
#[derive(Debug, Clone)]
pub struct HardCodedPath {
pub line: usize,
pub column: usize,
pub path: String,
pub platform: String,
}
static_regex!(fn hard_coded_path_pattern, r"(?i)(?:\.claude/|\.opencode/|\.cursor/|\.cline/|\.github/copilot/|~/Library/|~/\.[a-z][\w-]*/|/Users/[a-zA-Z][\w.-]*/|/home/[a-zA-Z][\w.-]*/|[A-Z]:\\Users\\[a-zA-Z][\w.-]*\\)");
pub fn find_hard_coded_paths(content: &str) -> Vec<HardCodedPath> {
if content.len() > MAX_REGEX_INPUT_SIZE {
return Vec::new();
}
let mut results = Vec::new();
let pattern = hard_coded_path_pattern();
for (line_num, line) in content.lines().enumerate() {
for mat in pattern.find_iter(line) {
let path = mat.as_str().to_lowercase();
let platform = if path.contains(".claude") {
"Claude Code"
} else if path.contains(".opencode") {
"OpenCode"
} else if path.contains(".cursor") {
"Cursor"
} else if path.contains(".cline") {
"Cline"
} else if path.contains(".github/copilot") {
"GitHub Copilot"
} else if path.contains("/library/") || path.starts_with("~/library/") {
"macOS"
} else if path.starts_with("/users/") || path.starts_with("/home/") {
"OS-specific absolute path"
} else if path.contains(":\\users\\") {
"Windows absolute path"
} else if path.starts_with("~/.") {
"User-specific hidden directory"
} else {
"OS-specific"
};
results.push(HardCodedPath {
line: line_num + 1,
column: mat.start() + 1,
path: mat.as_str().to_string(),
platform: platform.to_string(),
});
}
}
results
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum PackageManager {
Npm,
Pnpm,
Yarn,
Bun,
}
impl PackageManager {
pub fn as_str(&self) -> &'static str {
match self {
PackageManager::Npm => "npm",
PackageManager::Pnpm => "pnpm",
PackageManager::Yarn => "yarn",
PackageManager::Bun => "bun",
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum CommandType {
Install,
Build,
Test,
Run,
Other,
}
#[derive(Debug, Clone)]
pub struct BuildCommand {
pub line: usize,
#[allow(dead_code)] pub column: usize,
pub package_manager: PackageManager,
pub command_type: CommandType,
pub raw_command: String,
}
pub fn extract_build_commands(content: &str) -> Vec<BuildCommand> {
if content.len() > MAX_REGEX_INPUT_SIZE {
return Vec::new();
}
let mut results = Vec::new();
let pattern = build_command_pattern();
for (line_num, line) in content.lines().enumerate() {
for caps in pattern.captures_iter(line) {
let raw = match caps.get(1) {
Some(m) => m.as_str().trim(),
None => continue,
};
let column = caps.get(1).map(|m| m.start()).unwrap_or(0);
let package_manager = if raw.starts_with("npm") {
PackageManager::Npm
} else if raw.starts_with("pnpm") {
PackageManager::Pnpm
} else if raw.starts_with("yarn") {
PackageManager::Yarn
} else if raw.starts_with("bun") {
PackageManager::Bun
} else {
continue;
};
let command_type = if raw.contains(" install")
|| raw.contains(" i ")
|| raw.ends_with(" i")
|| raw.contains(" add")
|| raw.contains(" ci")
{
CommandType::Install
} else if raw.contains(" build") {
CommandType::Build
} else if raw.contains(" test") {
CommandType::Test
} else if raw.contains(" run") || raw.contains(" exec") {
CommandType::Run
} else {
CommandType::Other
};
results.push(BuildCommand {
line: line_num + 1,
column,
package_manager,
command_type,
raw_command: raw.to_string(),
});
}
}
results
}
#[derive(Debug, Clone)]
pub struct BuildConflict {
pub file1: std::path::PathBuf,
pub file1_line: usize,
pub file1_manager: PackageManager,
#[allow(dead_code)] pub file1_command: String,
pub file2: std::path::PathBuf,
#[allow(dead_code)] pub file2_line: usize,
pub file2_manager: PackageManager,
#[allow(dead_code)] pub file2_command: String,
pub command_type: CommandType,
}
pub fn detect_build_conflicts(
files: &[(std::path::PathBuf, Vec<BuildCommand>)],
) -> Vec<BuildConflict> {
use std::collections::HashMap;
let mut by_type: HashMap<CommandType, Vec<(std::path::PathBuf, &BuildCommand)>> =
HashMap::new();
for (path, commands) in files {
for cmd in commands {
by_type
.entry(cmd.command_type)
.or_default()
.push((path.clone(), cmd));
}
}
let mut conflicts = Vec::new();
for (cmd_type, entries) in by_type {
let mut by_manager: HashMap<PackageManager, Vec<(std::path::PathBuf, &BuildCommand)>> =
HashMap::new();
for (path, cmd) in entries {
by_manager
.entry(cmd.package_manager)
.or_default()
.push((path, cmd));
}
if by_manager.len() > 1 {
let managers: Vec<_> = by_manager.keys().collect();
for i in 0..managers.len() {
for j in (i + 1)..managers.len() {
let manager1 = managers[i];
let manager2 = managers[j];
let entries1 = &by_manager[manager1];
let entries2 = &by_manager[manager2];
if let (Some((path1, cmd1)), Some((path2, cmd2))) =
(entries1.first(), entries2.first())
{
if path1 != path2 {
conflicts.push(BuildConflict {
file1: path1.clone(),
file1_line: cmd1.line,
file1_manager: *manager1,
file1_command: cmd1.raw_command.clone(),
file2: path2.clone(),
file2_line: cmd2.line,
file2_manager: *manager2,
file2_command: cmd2.raw_command.clone(),
command_type: cmd_type,
});
}
}
}
}
}
}
conflicts
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ConstraintType {
Allow,
Disallow,
}
#[derive(Debug, Clone)]
pub struct ToolConstraint {
pub line: usize,
#[allow(dead_code)] pub column: usize,
pub tool_name: String,
pub constraint_type: ConstraintType,
pub source_context: String,
}
pub fn extract_tool_constraints(content: &str) -> Vec<ToolConstraint> {
if content.len() > MAX_REGEX_INPUT_SIZE {
return Vec::new();
}
let mut results = Vec::new();
let allow_pattern = tool_allow_pattern();
let disallow_pattern = tool_disallow_pattern();
for (line_num, line) in content.lines().enumerate() {
if let Some(mat) = allow_pattern.find(line) {
let matched = mat.as_str();
if let Some(caps) = allow_pattern.captures(line) {
for i in 1..=6 {
if let Some(tool_cap) = caps.get(i) {
if let Some(canonical) = normalize_tool_name(tool_cap.as_str()) {
results.push(ToolConstraint {
line: line_num + 1,
column: mat.start() + 1,
tool_name: canonical,
constraint_type: ConstraintType::Allow,
source_context: matched.to_string(),
});
}
}
}
}
let tools = extract_tool_names_from_line(line, mat.end());
for tool in tools {
results.push(ToolConstraint {
line: line_num + 1,
column: mat.start() + 1,
tool_name: tool,
constraint_type: ConstraintType::Allow,
source_context: matched.to_string(),
});
}
}
if let Some(mat) = disallow_pattern.find(line) {
let matched = mat.as_str();
if let Some(caps) = disallow_pattern.captures(line) {
for i in 1..=6 {
if let Some(tool_cap) = caps.get(i) {
if let Some(canonical) = normalize_tool_name(tool_cap.as_str()) {
results.push(ToolConstraint {
line: line_num + 1,
column: mat.start() + 1,
tool_name: canonical,
constraint_type: ConstraintType::Disallow,
source_context: matched.to_string(),
});
}
}
}
}
let tools = extract_tool_names_from_line(line, mat.end());
for tool in tools {
results.push(ToolConstraint {
line: line_num + 1,
column: mat.start() + 1,
tool_name: tool,
constraint_type: ConstraintType::Disallow,
source_context: matched.to_string(),
});
}
}
}
results
}
fn extract_tool_names_from_line(line: &str, start_pos: usize) -> Vec<String> {
let mut tools = Vec::new();
let remainder = if start_pos < line.len() {
&line[start_pos..]
} else {
return tools;
};
let remainder_lower = remainder.to_lowercase();
let remainder_bytes = remainder_lower.as_bytes();
for tool in KNOWN_TOOLS {
let tool_lower = tool.to_lowercase();
if let Some(pos) = remainder_lower.find(&tool_lower) {
let before_ok = pos == 0 || !is_word_char(remainder_bytes[pos - 1]);
let after_pos = pos + tool_lower.len();
let after_ok =
after_pos >= remainder_bytes.len() || !is_word_char(remainder_bytes[after_pos]);
if before_ok && after_ok {
tools.push(tool.to_string());
}
}
}
tools
}
#[inline]
fn is_word_char(b: u8) -> bool {
b.is_ascii_alphanumeric() || b == b'_'
}
const KNOWN_TOOLS: &[&str] = &[
"Bash",
"Read",
"Write",
"Edit",
"Grep",
"Glob",
"Task",
"WebFetch",
"WebSearch",
"AskUserQuestion",
"TodoRead",
"TodoWrite",
"MultiTool",
"NotebookEdit",
"EnterPlanMode",
"ExitPlanMode",
"Skill",
"StatusBarMessageTool",
"TaskOutput",
"mcp",
"computer",
"execute",
];
fn normalize_tool_name(name: &str) -> Option<String> {
let name_lower = name.to_lowercase();
for tool in KNOWN_TOOLS {
if tool.to_lowercase() == name_lower {
return Some(tool.to_string());
}
}
None
}
#[derive(Debug, Clone)]
pub struct ToolConflict {
pub tool_name: String,
pub allow_file: std::path::PathBuf,
pub allow_line: usize,
#[allow(dead_code)] pub allow_context: String,
pub disallow_file: std::path::PathBuf,
#[allow(dead_code)] pub disallow_line: usize,
#[allow(dead_code)] pub disallow_context: String,
}
#[allow(clippy::type_complexity)]
pub fn detect_tool_conflicts(
files: &[(std::path::PathBuf, Vec<ToolConstraint>)],
) -> Vec<ToolConflict> {
use std::collections::{HashMap, HashSet};
type ConstraintGroup<'a> = (
Vec<(std::path::PathBuf, &'a ToolConstraint)>,
Vec<(std::path::PathBuf, &'a ToolConstraint)>,
);
let mut by_tool: HashMap<String, ConstraintGroup<'_>> = HashMap::new();
for (path, constraints) in files {
for constraint in constraints {
let tool_key = constraint.tool_name.to_lowercase();
let entry = by_tool
.entry(tool_key)
.or_insert_with(|| (Vec::new(), Vec::new()));
match constraint.constraint_type {
ConstraintType::Allow => entry.0.push((path.clone(), constraint)),
ConstraintType::Disallow => entry.1.push((path.clone(), constraint)),
}
}
}
let mut conflicts = Vec::new();
let mut reported: HashSet<(String, std::path::PathBuf, std::path::PathBuf)> = HashSet::new();
for (tool_key, (allowed, disallowed)) in by_tool {
if allowed.is_empty() || disallowed.is_empty() {
continue;
}
for (allow_path, allow_constraint) in &allowed {
for (disallow_path, disallow_constraint) in &disallowed {
if allow_path == disallow_path {
continue;
}
let key = if allow_path < disallow_path {
(tool_key.clone(), allow_path.clone(), disallow_path.clone())
} else {
(tool_key.clone(), disallow_path.clone(), allow_path.clone())
};
if reported.insert(key) {
conflicts.push(ToolConflict {
tool_name: allow_constraint.tool_name.clone(),
allow_file: allow_path.clone(),
allow_line: allow_constraint.line,
allow_context: allow_constraint.source_context.clone(),
disallow_file: disallow_path.clone(),
disallow_line: disallow_constraint.line,
disallow_context: disallow_constraint.source_context.clone(),
});
}
}
}
}
conflicts
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum LayerType {
ClaudeMd,
AgentsMd,
GeminiMd,
CursorRules,
CopilotInstructions,
ClineRules,
OpenCodeRules,
Other,
}
impl LayerType {
pub fn as_str(&self) -> &'static str {
match self {
LayerType::ClaudeMd => "CLAUDE.md",
LayerType::AgentsMd => "AGENTS.md",
LayerType::GeminiMd => "GEMINI[.local].md",
LayerType::CursorRules => "Cursor Rules",
LayerType::CopilotInstructions => "Copilot Instructions",
LayerType::ClineRules => "Cline Rules",
LayerType::OpenCodeRules => "OpenCode Rules",
LayerType::Other => "Other",
}
}
}
#[derive(Debug, Clone)]
pub struct InstructionLayer {
pub path: std::path::PathBuf,
pub layer_type: LayerType,
pub has_precedence_doc: bool,
}
pub fn categorize_layer(path: &Path, content: &str) -> InstructionLayer {
let path_str = path.to_string_lossy().to_lowercase();
let file_name = path
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("")
.to_lowercase();
let layer_type = if file_name == "claude.md" {
LayerType::ClaudeMd
} else if file_name == "agents.md" {
LayerType::AgentsMd
} else if file_name == "gemini.md" || file_name == "gemini.local.md" {
LayerType::GeminiMd
} else if path_str.contains(".cursor") && path_str.contains("rules") {
LayerType::CursorRules
} else if path_str.contains(".github") && path_str.contains("copilot") {
LayerType::CopilotInstructions
} else if file_name == ".clinerules" || path_str.contains(".clinerules") {
LayerType::ClineRules
} else if path_str.contains(".opencode") {
LayerType::OpenCodeRules
} else {
LayerType::Other
};
let has_precedence_doc =
content.len() <= MAX_REGEX_INPUT_SIZE && layer_precedence_pattern().is_match(content);
InstructionLayer {
path: path.to_path_buf(),
layer_type,
has_precedence_doc,
}
}
#[derive(Debug, Clone)]
pub struct LayerPrecedenceIssue {
pub layers: Vec<InstructionLayer>,
pub description: String,
}
pub fn detect_precedence_issues(layers: &[InstructionLayer]) -> Option<LayerPrecedenceIssue> {
let meaningful_layers: Vec<_> = layers
.iter()
.filter(|l| l.layer_type != LayerType::Other)
.collect();
if meaningful_layers.len() <= 1 {
return None;
}
let has_precedence = meaningful_layers.iter().any(|l| l.has_precedence_doc);
if !has_precedence {
let layer_names: Vec<_> = meaningful_layers
.iter()
.map(|l| format!("{} ({})", l.layer_type.as_str(), l.path.display()))
.collect();
Some(LayerPrecedenceIssue {
layers: meaningful_layers.into_iter().cloned().collect(),
description: format!(
"Multiple instruction layers detected without documented precedence: {}",
layer_names.join(", ")
),
})
} else {
None
}
}
pub fn is_instruction_file(path: &Path) -> bool {
let file_name = match path.file_name().and_then(|n| n.to_str()) {
Some(name) => name,
None => return false,
};
if file_name.ends_with(".bak")
|| file_name.ends_with(".old")
|| file_name.ends_with(".tmp")
|| file_name.ends_with(".swp")
|| file_name.ends_with('~')
{
return false;
}
if file_name.eq_ignore_ascii_case("claude.md")
|| file_name.eq_ignore_ascii_case("agents.md")
|| file_name.eq_ignore_ascii_case("gemini.md")
|| file_name.eq_ignore_ascii_case("gemini.local.md")
|| file_name.eq_ignore_ascii_case(".clinerules")
{
return true;
}
use std::path::Component;
let mut found_cursor = false;
let mut found_github = false;
let mut found_copilot_after_github = false;
let mut found_rules = false;
let mut found_opencode = false;
for component in path.components() {
let s = match component {
Component::Normal(os) => match os.to_str() {
Some(s) => s,
None => continue,
},
_ => continue,
};
if s.eq_ignore_ascii_case(".cursor") {
found_cursor = true;
} else if s.eq_ignore_ascii_case("rules") {
found_rules = true;
} else if s.eq_ignore_ascii_case(".github") {
found_github = true;
} else if found_github && ascii_contains_ignore_case(s, "copilot") {
found_copilot_after_github = true;
} else if s.eq_ignore_ascii_case(".opencode") {
found_opencode = true;
}
}
if found_cursor {
let has_mdc_ext = Path::new(file_name)
.extension()
.and_then(|ext| ext.to_str())
.is_some_and(|ext| ext.eq_ignore_ascii_case("mdc"));
if has_mdc_ext || found_rules {
return true;
}
}
if found_github && found_copilot_after_github {
return true;
}
if found_opencode {
return true;
}
false
}
fn ascii_contains_ignore_case(haystack: &str, needle: &str) -> bool {
if needle.is_empty() {
return true;
}
if needle.len() > haystack.len() {
return false;
}
let haystack = haystack.as_bytes();
let needle = needle.as_bytes();
haystack
.windows(needle.len())
.any(|window| window.eq_ignore_ascii_case(needle))
}
pub const CODEX_BYTE_LIMIT: usize = 32_768;
#[derive(Debug, Clone)]
pub struct ByteLimitExceeded {
pub byte_count: usize,
pub limit: usize,
}
pub fn check_byte_limit(content: &str, limit: usize) -> Option<ByteLimitExceeded> {
let byte_count = content.len();
if byte_count > limit {
Some(ByteLimitExceeded { byte_count, limit })
} else {
None
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_regex_patterns_compile() {
let _ = claude_hooks_pattern();
let _ = context_fork_pattern();
let _ = agent_field_pattern();
let _ = allowed_tools_pattern();
let _ = claude_section_guard_pattern();
let _ = markdown_header_pattern();
let _ = hard_coded_path_pattern();
let _ = build_command_pattern();
let _ = tool_allow_pattern();
let _ = tool_disallow_pattern();
let _ = layer_precedence_pattern();
}
#[test]
fn test_detect_hooks_in_content() {
let content = r#"# Agent Config
- type: PreToolExecution
command: echo "test"
"#;
let results = find_claude_specific_features(content);
assert_eq!(results.len(), 1);
assert_eq!(results[0].feature, "hooks");
}
#[test]
fn test_detect_context_fork() {
let content = r#"---
name: test
context: fork
agent: Explore
---
Body"#;
let results = find_claude_specific_features(content);
assert!(results.iter().any(|r| r.feature == "context:fork"));
}
#[test]
fn test_detect_agent_field() {
let content = r#"---
name: test
agent: general-purpose
---
Body"#;
let results = find_claude_specific_features(content);
assert!(results.iter().any(|r| r.feature == "agent"));
}
#[test]
fn test_detect_allowed_tools() {
let content = r#"---
name: test
allowed-tools: Read Write Bash
---
Body"#;
let results = find_claude_specific_features(content);
assert!(results.iter().any(|r| r.feature == "allowed-tools"));
}
#[test]
fn test_detect_at_import_syntax() {
let content = "Include rules from @path/to/rules.md in your config.";
let results = find_claude_specific_features(content);
assert!(
results.iter().any(|r| r.feature == "@import"),
"Should detect @path/to/rules.md as @import syntax"
);
}
#[test]
fn test_detect_at_import_with_wildcard() {
let content = "Load all rules with @.config/rules/*.md";
let results = find_claude_specific_features(content);
assert!(
results.iter().any(|r| r.feature == "@import"),
"Should detect @.config/rules/*.md (wildcard @import)"
);
}
#[test]
fn test_no_false_positive_email_in_at_import() {
let content = "Contact user@example.com for questions.";
let results = find_claude_specific_features(content);
assert!(
!results.iter().any(|r| r.feature == "@import"),
"Email addresses should not be flagged as @imports"
);
}
#[test]
fn test_no_false_positive_email_standalone() {
let content = "Email admin@domain.org for access.";
let results = find_claude_specific_features(content);
assert!(
!results.iter().any(|r| r.feature == "@import"),
"Standalone email should not trigger @import detection"
);
}
#[test]
fn test_no_claude_features_in_clean_content() {
let content = r#"# Project Guidelines
Follow the coding style guide.
## Commands
- npm run build
- npm run test
"#;
let results = find_claude_specific_features(content);
assert!(results.is_empty());
}
#[test]
fn test_multiple_claude_features() {
let content = r#"---
name: test
context: fork
agent: Plan
allowed-tools: Read Write
---
Body"#;
let results = find_claude_specific_features(content);
assert!(results.len() >= 3);
}
#[test]
fn test_detect_custom_agent_name() {
let content = r#"---
name: test
agent: security-reviewer
---
Body"#;
let results = find_claude_specific_features(content);
assert!(results.iter().any(|r| r.feature == "agent"));
}
#[test]
fn test_guarded_hooks_in_claude_section() {
let content = r#"# Project Guidelines
## Claude Code Specific
- type: PreToolExecution
command: echo "test"
"#;
let results = find_claude_specific_features(content);
assert!(
results.is_empty(),
"Hooks in Claude-specific section should not trigger XP-001"
);
}
#[test]
fn test_guarded_context_fork() {
let content = r#"# Config
## Claude Only
context: fork
agent: Explore
"#;
let results = find_claude_specific_features(content);
assert!(
results.is_empty(),
"Features in Claude-only section should not trigger XP-001"
);
}
#[test]
fn test_guarded_agent_field() {
let content = r#"# Settings
## Claude Specific
agent: security-reviewer
allowed-tools: Read Write
"#;
let results = find_claude_specific_features(content);
assert!(
results.is_empty(),
"Agent field in Claude-specific section should not trigger XP-001"
);
}
#[test]
fn test_guard_section_ends_at_new_header() {
let content = r#"# Main
## Claude Code Specific
- type: Stop
command: cleanup
## Other Settings
agent: something
"#;
let results = find_claude_specific_features(content);
assert_eq!(results.len(), 1, "Expected exactly 1 result");
assert!(
!results.iter().any(|r| r.feature == "hooks"),
"Hooks in Claude section should be guarded"
);
assert!(
results.iter().any(|r| r.feature == "agent"),
"Agent field outside Claude section should be reported"
);
}
#[test]
fn test_multiple_claude_sections() {
let content = r#"# Config
## Claude Code Specific
- type: PreToolExecution
command: test1
## General Settings
Some general content.
## Claude Only
context: fork
agent: Plan
"#;
let results = find_claude_specific_features(content);
assert!(
results.is_empty(),
"Features in any Claude section should be guarded"
);
}
#[test]
fn test_html_comment_guard() {
let content = r#"# Config
<!-- Claude Code Specific -->
- type: Notification
command: notify-send
"#;
let results = find_claude_specific_features(content);
assert!(
results.is_empty(),
"HTML comment guard should protect Claude features"
);
}
#[test]
fn test_case_insensitive_guard() {
let content = r#"# Config
## CLAUDE CODE SPECIFIC
- type: SubagentStop
command: cleanup
## claude specific
allowed-tools: Bash
"#;
let results = find_claude_specific_features(content);
assert!(
results.is_empty(),
"Case-insensitive guard should protect Claude features"
);
}
#[test]
fn test_unguarded_features_still_detected() {
let content = r#"# Project Config
## Hooks Setup
- type: PreToolExecution
command: echo "test"
agent: reviewer
"#;
let results = find_claude_specific_features(content);
assert_eq!(results.len(), 2, "Unguarded features should be detected");
assert!(results.iter().any(|r| r.feature == "hooks"));
assert!(results.iter().any(|r| r.feature == "agent"));
}
#[test]
fn test_html_comment_header_resets_guard() {
let content = r#"# Config
<!-- Claude Specific -->
- type: PreToolExecution
command: test
<!-- General Settings -->
agent: reviewer
"#;
let results = find_claude_specific_features(content);
assert_eq!(
results.len(),
1,
"Agent field after non-Claude HTML header should be detected"
);
assert!(
results[0].feature == "agent",
"Should detect agent field outside Claude section"
);
}
#[test]
fn test_whitespace_before_markdown_header() {
let content = r#"# Config
## Claude Specific
- type: PreToolExecution
command: test
## Other Section
agent: reviewer
"#;
let results = find_claude_specific_features(content);
assert_eq!(
results.len(),
1,
"Agent field after indented header should be detected"
);
assert!(
results[0].feature == "agent",
"Indented header should reset guard protection"
);
}
#[test]
fn test_subheaders_within_claude_section() {
let content = r#"# Config
## Claude Specific
### Hooks Setup
- type: PreToolExecution
command: test
### Context Configuration
context: fork
agent: reviewer
"#;
let results = find_claude_specific_features(content);
assert!(
results.is_empty(),
"Features under subheaders within Claude section should still be protected"
);
}
#[test]
fn test_reset_on_same_level_header() {
let content = r#"## Claude Specific
- type: PreToolExecution
command: test
## Other Settings
agent: reviewer
"#;
let results = find_claude_specific_features(content);
assert_eq!(
results.len(),
1,
"Agent field after same-level header should be detected"
);
assert!(results[0].feature == "agent");
}
#[test]
fn test_detect_no_headers() {
let content = "Just some text without any headers.\nMore text here.";
let results = check_markdown_structure(content);
assert_eq!(results.len(), 1);
assert!(results[0].issue.contains("No markdown headers"));
}
#[test]
fn test_valid_markdown_structure() {
let content = r#"# Main Title
Some content here.
## Section One
More content.
### Subsection
Details.
"#;
let results = check_markdown_structure(content);
assert!(results.is_empty());
}
#[test]
fn test_detect_skipped_header_level() {
let content = r#"# Title
#### Skipped to h4
"#;
let results = check_markdown_structure(content);
assert_eq!(results.len(), 1);
assert!(results[0].issue.contains("skipped"));
}
#[test]
fn test_headers_inside_code_block_ignored() {
let content = r#"# Title
## Commands
```bash
# Testing
make java-test # Run Java integration tests
# Linting
make java-lint # Run Java spotlessApply
### Raw Equivalents Per Stack
```
## Next Section
"#;
let results = check_markdown_structure(content);
assert!(
results.is_empty(),
"Headers inside code blocks should be ignored, got: {:?}",
results
);
}
#[test]
fn test_only_headers_in_code_block_means_no_headers() {
let content = r#"Some content without headers.
```markdown
# This is inside a code block
## Also inside
```
More content.
"#;
let results = check_markdown_structure(content);
assert_eq!(results.len(), 1);
assert!(results[0].issue.contains("No markdown headers found"));
}
#[test]
fn test_empty_content_no_issue() {
let content = "";
let results = check_markdown_structure(content);
assert!(results.is_empty());
}
#[test]
fn test_whitespace_only_no_issue() {
let content = " \n\n ";
let results = check_markdown_structure(content);
assert!(results.is_empty());
}
#[test]
fn test_detect_claude_path() {
let content = "Check the config at .claude/settings.json";
let results = find_hard_coded_paths(content);
assert_eq!(results.len(), 1);
assert_eq!(results[0].platform, "Claude Code");
}
#[test]
fn test_detect_opencode_path() {
let content = "OpenCode stores settings in .opencode/config.yaml";
let results = find_hard_coded_paths(content);
assert_eq!(results.len(), 1);
assert_eq!(results[0].platform, "OpenCode");
}
#[test]
fn test_detect_cursor_path() {
let content = "Cursor rules are in .cursor/rules/";
let results = find_hard_coded_paths(content);
assert_eq!(results.len(), 1);
assert_eq!(results[0].platform, "Cursor");
}
#[test]
fn test_detect_multiple_platform_paths() {
let content = r#"
Platform configs:
- Claude: .claude/settings.json
- Cursor: .cursor/rules/
- OpenCode: .opencode/config.yaml
"#;
let results = find_hard_coded_paths(content);
assert_eq!(results.len(), 3);
}
#[test]
fn test_no_hard_coded_paths() {
let content = r#"# Project Config
Use environment variables for configuration.
Check the project root for settings.
"#;
let results = find_hard_coded_paths(content);
assert!(results.is_empty());
}
#[test]
fn test_case_insensitive_path_detection() {
let content = "Config at .CLAUDE/Settings.json";
let results = find_hard_coded_paths(content);
assert_eq!(results.len(), 1);
}
#[test]
fn test_detect_hooks_event_variant() {
let content = r#"hooks:
- event: Notification
command: notify-send
- event: SubagentStop
command: cleanup
"#;
let results = find_claude_specific_features(content);
assert_eq!(results.len(), 2);
assert!(results.iter().all(|r| r.feature == "hooks"));
}
#[test]
fn test_detect_cline_path() {
let content = "Cline config is in .cline/settings.json";
let results = find_hard_coded_paths(content);
assert_eq!(results.len(), 1);
assert_eq!(results[0].platform, "Cline");
}
#[test]
fn test_detect_github_copilot_path() {
let content = "GitHub Copilot config at .github/copilot/config.json";
let results = find_hard_coded_paths(content);
assert_eq!(results.len(), 1);
assert_eq!(results[0].platform, "GitHub Copilot");
}
#[test]
fn test_extreme_header_skip_h1_to_h6() {
let content = r#"# Title
###### Deep header
"#;
let results = check_markdown_structure(content);
assert_eq!(results.len(), 1);
assert!(results[0].issue.contains("skipped from 1 to 6"));
}
#[test]
fn test_no_false_positive_relative_paths() {
let content = r#"# Project
Files are at:
- ./src/config.js
- ../parent/file.ts
- src/helpers/utils.rs
"#;
let results = find_hard_coded_paths(content);
assert!(results.is_empty());
}
#[test]
fn test_extract_npm_commands() {
let content = r#"# Build
Run `npm install` to install dependencies.
Then `npm run build` to build the project.
"#;
let results = extract_build_commands(content);
assert_eq!(results.len(), 2);
assert!(
results
.iter()
.all(|r| r.package_manager == PackageManager::Npm)
);
}
#[test]
fn test_extract_pnpm_commands() {
let content = r#"# Install
Use pnpm install for dependencies.
"#;
let results = extract_build_commands(content);
assert_eq!(results.len(), 1);
assert_eq!(results[0].package_manager, PackageManager::Pnpm);
assert_eq!(results[0].command_type, CommandType::Install);
}
#[test]
fn test_extract_yarn_commands() {
let content = "yarn add express\nyarn test";
let results = extract_build_commands(content);
assert_eq!(results.len(), 2);
assert!(
results
.iter()
.all(|r| r.package_manager == PackageManager::Yarn)
);
}
#[test]
fn test_extract_bun_commands() {
let content = "bun install\nbun run build";
let results = extract_build_commands(content);
assert_eq!(results.len(), 2);
assert!(
results
.iter()
.all(|r| r.package_manager == PackageManager::Bun)
);
}
#[test]
fn test_detect_build_conflicts() {
use std::path::PathBuf;
let file1 = PathBuf::from("CLAUDE.md");
let file2 = PathBuf::from("AGENTS.md");
let commands1 = vec![BuildCommand {
line: 1,
column: 0,
package_manager: PackageManager::Npm,
command_type: CommandType::Install,
raw_command: "npm install".to_string(),
}];
let commands2 = vec![BuildCommand {
line: 1,
column: 0,
package_manager: PackageManager::Pnpm,
command_type: CommandType::Install,
raw_command: "pnpm install".to_string(),
}];
let files = vec![(file1, commands1), (file2, commands2)];
let conflicts = detect_build_conflicts(&files);
assert_eq!(conflicts.len(), 1);
let managers: std::collections::HashSet<_> =
[conflicts[0].file1_manager, conflicts[0].file2_manager]
.into_iter()
.collect();
assert!(managers.contains(&PackageManager::Npm));
assert!(managers.contains(&PackageManager::Pnpm));
}
#[test]
fn test_no_conflict_same_package_manager() {
use std::path::PathBuf;
let file1 = PathBuf::from("CLAUDE.md");
let file2 = PathBuf::from("AGENTS.md");
let commands1 = vec![BuildCommand {
line: 1,
column: 0,
package_manager: PackageManager::Npm,
command_type: CommandType::Install,
raw_command: "npm install".to_string(),
}];
let commands2 = vec![BuildCommand {
line: 1,
column: 0,
package_manager: PackageManager::Npm,
command_type: CommandType::Build,
raw_command: "npm run build".to_string(),
}];
let files = vec![(file1, commands1), (file2, commands2)];
let conflicts = detect_build_conflicts(&files);
assert!(conflicts.is_empty());
}
#[test]
fn test_extract_tool_allow_constraint() {
let content = "allowed-tools: Read Write Bash";
let results = extract_tool_constraints(content);
assert!(!results.is_empty());
assert!(results.iter().any(|r| r.tool_name == "Read"));
assert!(
results
.iter()
.all(|r| r.constraint_type == ConstraintType::Allow)
);
}
#[test]
fn test_extract_tool_disallow_constraint() {
let content = "Never use Bash for this task.";
let results = extract_tool_constraints(content);
assert!(!results.is_empty());
assert!(results.iter().any(|r| r.tool_name == "Bash"));
assert!(
results
.iter()
.any(|r| r.constraint_type == ConstraintType::Disallow)
);
}
#[test]
fn test_detect_tool_conflicts() {
use std::path::PathBuf;
let file1 = PathBuf::from("CLAUDE.md");
let file2 = PathBuf::from("AGENTS.md");
let constraints1 = vec![ToolConstraint {
line: 1,
column: 0,
tool_name: "Bash".to_string(),
constraint_type: ConstraintType::Allow,
source_context: "allowed-tools:".to_string(),
}];
let constraints2 = vec![ToolConstraint {
line: 1,
column: 0,
tool_name: "Bash".to_string(),
constraint_type: ConstraintType::Disallow,
source_context: "never use".to_string(),
}];
let files = vec![(file1, constraints1), (file2, constraints2)];
let conflicts = detect_tool_conflicts(&files);
assert_eq!(conflicts.len(), 1);
assert_eq!(conflicts[0].tool_name, "Bash");
}
#[test]
fn test_no_tool_conflict_same_constraint_type() {
use std::path::PathBuf;
let file1 = PathBuf::from("CLAUDE.md");
let file2 = PathBuf::from("AGENTS.md");
let constraints1 = vec![ToolConstraint {
line: 1,
column: 0,
tool_name: "Bash".to_string(),
constraint_type: ConstraintType::Allow,
source_context: "allowed-tools:".to_string(),
}];
let constraints2 = vec![ToolConstraint {
line: 1,
column: 0,
tool_name: "Bash".to_string(),
constraint_type: ConstraintType::Allow,
source_context: "allowed-tools:".to_string(),
}];
let files = vec![(file1, constraints1), (file2, constraints2)];
let conflicts = detect_tool_conflicts(&files);
assert!(conflicts.is_empty());
}
#[test]
fn test_categorize_claude_md() {
use std::path::PathBuf;
let layer = categorize_layer(&PathBuf::from("project/CLAUDE.md"), "# Project");
assert_eq!(layer.layer_type, LayerType::ClaudeMd);
}
#[test]
fn test_categorize_agents_md() {
use std::path::PathBuf;
let layer = categorize_layer(&PathBuf::from("project/AGENTS.md"), "# Project");
assert_eq!(layer.layer_type, LayerType::AgentsMd);
}
#[test]
fn test_categorize_cursor_rules() {
use std::path::PathBuf;
let layer = categorize_layer(&PathBuf::from("project/.cursor/rules/test.mdc"), "# Rules");
assert_eq!(layer.layer_type, LayerType::CursorRules);
}
#[test]
fn test_precedence_detected() {
use std::path::PathBuf;
let layer = categorize_layer(
&PathBuf::from("CLAUDE.md"),
"CLAUDE.md takes precedence over AGENTS.md",
);
assert!(layer.has_precedence_doc);
}
#[test]
fn test_precedence_not_detected() {
use std::path::PathBuf;
let layer = categorize_layer(&PathBuf::from("CLAUDE.md"), "# Simple rules");
assert!(!layer.has_precedence_doc);
}
#[test]
fn test_detect_precedence_issues_multiple_layers() {
use std::path::PathBuf;
let layers = vec![
InstructionLayer {
path: PathBuf::from("CLAUDE.md"),
layer_type: LayerType::ClaudeMd,
has_precedence_doc: false,
},
InstructionLayer {
path: PathBuf::from("AGENTS.md"),
layer_type: LayerType::AgentsMd,
has_precedence_doc: false,
},
];
let issue = detect_precedence_issues(&layers);
assert!(issue.is_some());
assert!(
issue
.unwrap()
.description
.contains("without documented precedence")
);
}
#[test]
fn test_no_precedence_issue_with_docs() {
use std::path::PathBuf;
let layers = vec![
InstructionLayer {
path: PathBuf::from("CLAUDE.md"),
layer_type: LayerType::ClaudeMd,
has_precedence_doc: true, },
InstructionLayer {
path: PathBuf::from("AGENTS.md"),
layer_type: LayerType::AgentsMd,
has_precedence_doc: false,
},
];
let issue = detect_precedence_issues(&layers);
assert!(issue.is_none());
}
#[test]
fn test_no_precedence_issue_single_layer() {
use std::path::PathBuf;
let layers = vec![InstructionLayer {
path: PathBuf::from("CLAUDE.md"),
layer_type: LayerType::ClaudeMd,
has_precedence_doc: false,
}];
let issue = detect_precedence_issues(&layers);
assert!(issue.is_none());
}
#[test]
fn test_is_instruction_file() {
use std::path::PathBuf;
assert!(is_instruction_file(&PathBuf::from("CLAUDE.md")));
assert!(is_instruction_file(&PathBuf::from("AGENTS.md")));
assert!(is_instruction_file(&PathBuf::from(
".cursor/rules/test.mdc"
)));
assert!(is_instruction_file(&PathBuf::from(
".github/copilot-instructions.md"
)));
assert!(is_instruction_file(&PathBuf::from(".clinerules")));
assert!(!is_instruction_file(&PathBuf::from("README.md")));
assert!(!is_instruction_file(&PathBuf::from("src/main.rs")));
}
#[test]
fn test_tool_extraction_case_insensitive() {
let content = "Never use BASH for this task.";
let results = extract_tool_constraints(content);
assert!(
results.iter().any(|r| r.tool_name == "Bash"),
"Should detect 'Bash' from 'BASH' (case-insensitive)"
);
}
#[test]
fn test_tool_extraction_word_boundaries() {
let content = "Never use subash command.";
let results = extract_tool_constraints(content);
assert!(
!results.iter().any(|r| r.tool_name == "Bash"),
"Should NOT detect 'Bash' from 'subash' (word boundary)"
);
}
#[test]
fn test_tool_extraction_no_false_positive_bashful() {
let content = "allowed-tools: Bashful developer Read";
let results = extract_tool_constraints(content);
assert!(
!results.iter().any(|r| r.tool_name == "Bash"),
"Should NOT detect 'Bash' from 'Bashful'"
);
assert!(
results.iter().any(|r| r.tool_name == "Read"),
"Should detect 'Read'"
);
}
#[test]
fn test_tool_extraction_no_false_positive_reader() {
let content = "allowed-tools: Reader mode";
let results = extract_tool_constraints(content);
assert!(
!results.iter().any(|r| r.tool_name == "Read"),
"Should NOT detect 'Read' from 'Reader'"
);
}
#[test]
fn test_tool_extraction_valid_word_boundary() {
let content = "allowed-tools: Read, Write, Bash";
let results = extract_tool_constraints(content);
assert!(results.iter().any(|r| r.tool_name == "Read"));
assert!(results.iter().any(|r| r.tool_name == "Write"));
assert!(results.iter().any(|r| r.tool_name == "Bash"));
}
#[test]
fn test_detect_build_conflicts_three_files() {
use std::path::PathBuf;
let file1 = PathBuf::from("CLAUDE.md");
let file2 = PathBuf::from("AGENTS.md");
let file3 = PathBuf::from(".cursor/rules/dev.mdc");
let commands1 = vec![BuildCommand {
line: 1,
column: 0,
package_manager: PackageManager::Npm,
command_type: CommandType::Install,
raw_command: "npm install".to_string(),
}];
let commands2 = vec![BuildCommand {
line: 1,
column: 0,
package_manager: PackageManager::Pnpm,
command_type: CommandType::Install,
raw_command: "pnpm install".to_string(),
}];
let commands3 = vec![BuildCommand {
line: 1,
column: 0,
package_manager: PackageManager::Yarn,
command_type: CommandType::Install,
raw_command: "yarn install".to_string(),
}];
let files = vec![(file1, commands1), (file2, commands2), (file3, commands3)];
let conflicts = detect_build_conflicts(&files);
assert_eq!(
conflicts.len(),
3,
"Should detect 3 conflicts between 3 different package managers"
);
}
#[test]
fn test_detect_tool_conflicts_three_files() {
use std::path::PathBuf;
let file1 = PathBuf::from("CLAUDE.md");
let file2 = PathBuf::from("AGENTS.md");
let file3 = PathBuf::from(".cursor/rules/dev.mdc");
let constraints1 = vec![ToolConstraint {
line: 1,
column: 0,
tool_name: "Bash".to_string(),
constraint_type: ConstraintType::Allow,
source_context: "allowed-tools:".to_string(),
}];
let constraints2 = vec![ToolConstraint {
line: 1,
column: 0,
tool_name: "Bash".to_string(),
constraint_type: ConstraintType::Disallow,
source_context: "never use".to_string(),
}];
let constraints3 = vec![ToolConstraint {
line: 1,
column: 0,
tool_name: "Bash".to_string(),
constraint_type: ConstraintType::Disallow,
source_context: "don't use".to_string(),
}];
let files = vec![
(file1, constraints1),
(file2, constraints2),
(file3, constraints3),
];
let conflicts = detect_tool_conflicts(&files);
assert_eq!(
conflicts.len(),
2,
"Should detect 2 conflicts (allow vs disallow pairs)"
);
}
#[test]
fn test_extract_build_commands_empty_file() {
let content = "";
let results = extract_build_commands(content);
assert!(
results.is_empty(),
"Empty file should have no build commands"
);
}
#[test]
fn test_extract_tool_constraints_empty_file() {
let content = "";
let results = extract_tool_constraints(content);
assert!(
results.is_empty(),
"Empty file should have no tool constraints"
);
}
#[test]
fn test_detect_build_conflicts_empty_commands() {
use std::path::PathBuf;
let file1 = PathBuf::from("CLAUDE.md");
let file2 = PathBuf::from("AGENTS.md");
let files: Vec<(PathBuf, Vec<BuildCommand>)> =
vec![(file1, Vec::new()), (file2, Vec::new())];
let conflicts = detect_build_conflicts(&files);
assert!(
conflicts.is_empty(),
"Files with no commands should have no conflicts"
);
}
#[test]
fn test_detect_tool_conflicts_empty_constraints() {
use std::path::PathBuf;
let file1 = PathBuf::from("CLAUDE.md");
let file2 = PathBuf::from("AGENTS.md");
let files: Vec<(PathBuf, Vec<ToolConstraint>)> =
vec![(file1, Vec::new()), (file2, Vec::new())];
let conflicts = detect_tool_conflicts(&files);
assert!(
conflicts.is_empty(),
"Files with no constraints should have no conflicts"
);
}
#[test]
fn test_npm_i_without_trailing_space() {
let content = "Run `npm i` to install";
let results = extract_build_commands(content);
assert_eq!(results.len(), 1);
assert_eq!(results[0].package_manager, PackageManager::Npm);
assert_eq!(
results[0].command_type,
CommandType::Install,
"npm i without trailing space should be Install"
);
}
#[test]
fn test_yarn_i_at_end_of_content() {
let content = "Install with yarn i\n";
let results = extract_build_commands(content);
assert_eq!(results.len(), 1);
assert_eq!(results[0].package_manager, PackageManager::Yarn);
assert_eq!(
results[0].command_type,
CommandType::Install,
"yarn i at end of line should be Install"
);
}
#[test]
fn test_pnpm_i_standalone() {
let content = "pnpm i";
let results = extract_build_commands(content);
assert_eq!(results.len(), 1);
assert_eq!(results[0].package_manager, PackageManager::Pnpm);
assert_eq!(
results[0].command_type,
CommandType::Install,
"pnpm i should be Install"
);
}
#[test]
fn test_bun_i_end_of_line() {
let content = "First run bun i\nThen run bun run build";
let results = extract_build_commands(content);
assert_eq!(results.len(), 2);
let install_cmd = results.iter().find(|r| r.raw_command.contains(" i"));
assert!(install_cmd.is_some());
assert_eq!(install_cmd.unwrap().command_type, CommandType::Install);
}
#[test]
fn test_backup_file_claude_md_bak() {
use std::path::PathBuf;
assert!(
!is_instruction_file(&PathBuf::from("CLAUDE.md.bak")),
"CLAUDE.md.bak should NOT be considered an instruction file"
);
}
#[test]
fn test_backup_file_agents_md_old() {
use std::path::PathBuf;
assert!(
!is_instruction_file(&PathBuf::from("AGENTS.md.old")),
"AGENTS.md.old should NOT be considered an instruction file"
);
}
#[test]
fn test_backup_file_cursor_rules_tmp() {
use std::path::PathBuf;
assert!(
!is_instruction_file(&PathBuf::from(".cursor/rules/test.mdc.tmp")),
".cursor/rules/test.mdc.tmp should NOT be considered an instruction file"
);
}
#[test]
fn test_backup_file_swp() {
use std::path::PathBuf;
assert!(
!is_instruction_file(&PathBuf::from("CLAUDE.md.swp")),
"CLAUDE.md.swp should NOT be considered an instruction file"
);
}
#[test]
fn test_backup_file_tilde() {
use std::path::PathBuf;
assert!(
!is_instruction_file(&PathBuf::from("AGENTS.md~")),
"AGENTS.md~ should NOT be considered an instruction file"
);
}
#[test]
fn test_valid_instruction_files_still_work() {
use std::path::PathBuf;
assert!(is_instruction_file(&PathBuf::from("CLAUDE.md")));
assert!(is_instruction_file(&PathBuf::from("AGENTS.md")));
assert!(is_instruction_file(&PathBuf::from(
".cursor/rules/test.mdc"
)));
assert!(is_instruction_file(&PathBuf::from(
".github/copilot-instructions.md"
)));
}
#[test]
fn test_instruction_file_case_variations() {
use std::path::PathBuf;
assert!(
is_instruction_file(&PathBuf::from("Claude.MD")),
"Claude.MD should match (case-insensitive)"
);
assert!(
is_instruction_file(&PathBuf::from("agents.MD")),
"agents.MD should match (case-insensitive)"
);
assert!(
is_instruction_file(&PathBuf::from("GEMINI.md")),
"GEMINI.md should match (case-insensitive)"
);
assert!(
is_instruction_file(&PathBuf::from("Gemini.Local.Md")),
"Gemini.Local.Md should match (case-insensitive)"
);
assert!(
is_instruction_file(&PathBuf::from(".CLINERULES")),
".CLINERULES should match (case-insensitive)"
);
}
#[test]
fn test_instruction_file_no_false_positive_cursor_substring() {
use std::path::PathBuf;
assert!(
!is_instruction_file(&PathBuf::from("my.cursor-notes.txt")),
"my.cursor-notes.txt should NOT match - .cursor is not a directory component"
);
assert!(
!is_instruction_file(&PathBuf::from("my.cursor-notes.mdc")),
"my.cursor-notes.mdc should NOT match - .cursor is not a directory component"
);
}
#[test]
fn test_instruction_file_deeply_nested_cursor() {
use std::path::PathBuf;
assert!(
is_instruction_file(&PathBuf::from("a/b/.cursor/rules/deep/file.mdc")),
"Deeply nested .cursor/rules path should match"
);
assert!(
is_instruction_file(&PathBuf::from("project/.cursor/rules/api.mdc")),
".cursor/rules/*.mdc should match"
);
assert!(
!is_instruction_file(&PathBuf::from("a/b/.cursor/config/settings.json")),
".cursor/config/settings.json should NOT match"
);
}
#[test]
fn test_instruction_file_opencode_directory() {
use std::path::PathBuf;
assert!(
is_instruction_file(&PathBuf::from(".opencode/config.md")),
".opencode directory should match"
);
assert!(
is_instruction_file(&PathBuf::from("project/.opencode/something.yaml")),
"nested .opencode directory should match"
);
}
#[test]
fn test_instruction_file_github_copilot_variants() {
use std::path::PathBuf;
assert!(
is_instruction_file(&PathBuf::from(".github/copilot-instructions.md")),
".github/copilot-instructions.md should match"
);
assert!(
is_instruction_file(&PathBuf::from(".github/copilot/settings.json")),
".github/copilot/settings.json should match"
);
assert!(
!is_instruction_file(&PathBuf::from(".github/workflows/ci.yml")),
".github/workflows/ci.yml should NOT match"
);
}
#[test]
fn test_instruction_file_bare_filename_no_path() {
use std::path::PathBuf;
assert!(
!is_instruction_file(&PathBuf::from("random.mdc")),
"random.mdc without .cursor parent should NOT match"
);
assert!(
!is_instruction_file(&PathBuf::from("rules.mdc")),
"rules.mdc without .cursor parent should NOT match"
);
}
#[test]
fn test_instruction_file_empty_and_special_paths() {
use std::path::PathBuf;
assert!(
!is_instruction_file(&PathBuf::from("")),
"Empty path should not match"
);
assert!(
is_instruction_file(&PathBuf::from("/CLAUDE.md")),
"Absolute path /CLAUDE.md should match"
);
assert!(
is_instruction_file(&PathBuf::from("../../CLAUDE.md")),
"Relative path with .. should match"
);
assert!(
!is_instruction_file(&PathBuf::from(".cursor/config.md")),
".cursor/config.md (no .mdc, no rules) should NOT match"
);
}
#[test]
fn test_find_claude_specific_features_oversized_input() {
let large_content = "a".repeat(MAX_REGEX_INPUT_SIZE + 1000);
let results = find_claude_specific_features(&large_content);
assert!(
results.is_empty(),
"Oversized content should be skipped for ReDoS protection"
);
}
#[test]
fn test_find_hard_coded_paths_oversized_input() {
let large_content = "a".repeat(MAX_REGEX_INPUT_SIZE + 1000);
let results = find_hard_coded_paths(&large_content);
assert!(
results.is_empty(),
"Oversized content should be skipped for ReDoS protection"
);
}
#[test]
fn test_extract_build_commands_oversized_input() {
let large_content = "a".repeat(MAX_REGEX_INPUT_SIZE + 1000);
let results = extract_build_commands(&large_content);
assert!(
results.is_empty(),
"Oversized content should be skipped for ReDoS protection"
);
}
#[test]
fn test_extract_tool_constraints_oversized_input() {
let large_content = "a".repeat(MAX_REGEX_INPUT_SIZE + 1000);
let results = extract_tool_constraints(&large_content);
assert!(
results.is_empty(),
"Oversized content should be skipped for ReDoS protection"
);
}
#[test]
fn test_categorize_layer_oversized_input_precedence_doc() {
use std::path::PathBuf;
let large_content = "precedence ".repeat((MAX_REGEX_INPUT_SIZE / 11) + 100);
let layer = categorize_layer(&PathBuf::from("CLAUDE.md"), &large_content);
assert!(
!layer.has_precedence_doc,
"Oversized content should not detect precedence for ReDoS protection"
);
}
#[test]
fn test_find_claude_specific_features_exactly_at_64kb_limit() {
let base = "type: PreToolExecution\n";
let needed = MAX_REGEX_INPUT_SIZE - base.len();
let content = format!("{}{}", base, "a".repeat(needed));
assert_eq!(
content.len(),
MAX_REGEX_INPUT_SIZE,
"Content must be exactly at the limit"
);
let results = find_claude_specific_features(&content);
assert!(
!results.is_empty(),
"Content at exactly the limit should be processed"
);
}
#[test]
fn test_find_claude_specific_features_one_byte_over_limit() {
let base = "type: PreToolExecution\n";
let needed = MAX_REGEX_INPUT_SIZE - base.len() + 1;
let content = format!("{}{}", base, "a".repeat(needed));
assert_eq!(
content.len(),
MAX_REGEX_INPUT_SIZE + 1,
"Content must be one byte over the limit"
);
let results = find_claude_specific_features(&content);
assert!(
results.is_empty(),
"Content one byte over the limit should be skipped"
);
}
#[test]
fn test_find_hard_coded_paths_exactly_at_64kb_limit() {
let base = "/Users/name/project\n";
let needed = MAX_REGEX_INPUT_SIZE - base.len();
let content = format!("{}{}", base, "a".repeat(needed));
assert_eq!(
content.len(),
MAX_REGEX_INPUT_SIZE,
"Content must be exactly at the limit"
);
let results = find_hard_coded_paths(&content);
assert!(
!results.is_empty(),
"Content at exactly the limit should be processed"
);
}
#[test]
fn test_find_hard_coded_paths_one_byte_over_limit() {
let base = "/Users/name/project\n";
let needed = MAX_REGEX_INPUT_SIZE - base.len() + 1;
let content = format!("{}{}", base, "a".repeat(needed));
assert_eq!(
content.len(),
MAX_REGEX_INPUT_SIZE + 1,
"Content must be one byte over the limit"
);
let results = find_hard_coded_paths(&content);
assert!(
results.is_empty(),
"Content one byte over the limit should be skipped"
);
}
#[test]
fn test_extract_build_commands_exactly_at_64kb_limit() {
let base = "npm install\n";
let needed = MAX_REGEX_INPUT_SIZE - base.len();
let content = format!("{}{}", base, "a".repeat(needed));
assert_eq!(
content.len(),
MAX_REGEX_INPUT_SIZE,
"Content must be exactly at the limit"
);
let results = extract_build_commands(&content);
assert!(
!results.is_empty(),
"Content at exactly the limit should be processed"
);
}
#[test]
fn test_extract_build_commands_one_byte_over_limit() {
let base = "npm install\n";
let needed = MAX_REGEX_INPUT_SIZE - base.len() + 1;
let content = format!("{}{}", base, "a".repeat(needed));
assert_eq!(
content.len(),
MAX_REGEX_INPUT_SIZE + 1,
"Content must be one byte over the limit"
);
let results = extract_build_commands(&content);
assert!(
results.is_empty(),
"Content one byte over the limit should be skipped"
);
}
#[test]
fn test_extract_tool_constraints_exactly_at_64kb_limit() {
let base = "allowed-tools: bash\n";
let needed = MAX_REGEX_INPUT_SIZE - base.len();
let content = format!("{}{}", base, "a".repeat(needed));
assert_eq!(
content.len(),
MAX_REGEX_INPUT_SIZE,
"Content must be exactly at the limit"
);
let results = extract_tool_constraints(&content);
assert!(
!results.is_empty(),
"Content at exactly the limit should be processed"
);
}
#[test]
fn test_extract_tool_constraints_one_byte_over_limit() {
let base = "allowed-tools: bash\n";
let needed = MAX_REGEX_INPUT_SIZE - base.len() + 1;
let content = format!("{}{}", base, "a".repeat(needed));
assert_eq!(
content.len(),
MAX_REGEX_INPUT_SIZE + 1,
"Content must be one byte over the limit"
);
let results = extract_tool_constraints(&content);
assert!(
results.is_empty(),
"Content one byte over the limit should be skipped"
);
}
#[test]
fn test_categorize_layer_exactly_at_64kb_limit() {
use std::path::PathBuf;
let base = "precedence\n";
let needed = MAX_REGEX_INPUT_SIZE - base.len();
let content = format!("{}{}", base, "a".repeat(needed));
assert_eq!(
content.len(),
MAX_REGEX_INPUT_SIZE,
"Content must be exactly at the limit"
);
let layer = categorize_layer(&PathBuf::from("CLAUDE.md"), &content);
assert!(
layer.has_precedence_doc,
"has_precedence_doc should be true when content.len() == MAX_REGEX_INPUT_SIZE"
);
}
#[test]
fn test_categorize_layer_one_byte_over_limit() {
use std::path::PathBuf;
let base = "precedence\n";
let needed = MAX_REGEX_INPUT_SIZE - base.len() + 1;
let content = format!("{}{}", base, "a".repeat(needed));
assert_eq!(
content.len(),
MAX_REGEX_INPUT_SIZE + 1,
"Content must be one byte over the limit"
);
let layer = categorize_layer(&PathBuf::from("CLAUDE.md"), &content);
assert!(
!layer.has_precedence_doc,
"has_precedence_doc should be false when content.len() == MAX_REGEX_INPUT_SIZE + 1"
);
}
#[test]
fn test_check_markdown_structure_processes_above_64kb_limit() {
let header = "# Title\n";
let padding = "a ".repeat((MAX_REGEX_INPUT_SIZE - header.len()) / 2 + 1);
let content = format!("{}{}", header, padding);
assert!(
content.len() > MAX_REGEX_INPUT_SIZE,
"Content must exceed the limit to be a meaningful regression test"
);
let issues = check_markdown_structure(&content);
assert!(
issues.is_empty(),
"check_markdown_structure should process content above 64KB (no size guard)"
);
}
#[test]
fn test_categorize_gemini_md_variants() {
use std::path::PathBuf;
let files = ["project/GEMINI.md", "project/GEMINI.local.md"];
for file in files {
let layer = categorize_layer(&PathBuf::from(file), "# Project");
assert_eq!(
layer.layer_type,
LayerType::GeminiMd,
"Failed for file: {}",
file
);
}
}
#[test]
fn test_check_byte_limit_under() {
let content = "Short content";
let result = check_byte_limit(content, CODEX_BYTE_LIMIT);
assert!(result.is_none());
}
#[test]
fn test_check_byte_limit_exact() {
let content = "a".repeat(CODEX_BYTE_LIMIT);
let result = check_byte_limit(&content, CODEX_BYTE_LIMIT);
assert!(result.is_none(), "Exact limit should not trigger");
}
#[test]
fn test_check_byte_limit_over() {
let content = "a".repeat(CODEX_BYTE_LIMIT + 1);
let result = check_byte_limit(&content, CODEX_BYTE_LIMIT);
assert!(result.is_some());
let exceeded = result.unwrap();
assert_eq!(exceeded.byte_count, CODEX_BYTE_LIMIT + 1);
assert_eq!(exceeded.limit, CODEX_BYTE_LIMIT);
}
#[test]
fn test_check_byte_limit_empty() {
let result = check_byte_limit("", CODEX_BYTE_LIMIT);
assert!(result.is_none());
}
}