#![cfg_attr(coverage_nightly, coverage(off))]
use super::types::*;
use ignore::WalkBuilder;
use std::fs;
use std::path::{Path, PathBuf};
const SKIP_DIRS: &[&str] = &[
".git",
".claude",
"node_modules",
"target",
".pmat",
"vendor",
"build",
"dist",
"__pycache__",
".venv",
"site-packages",
];
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "path_exists")]
pub fn walkdir_markdown_files(dir: &Path) -> Vec<PathBuf> {
let excludes = load_markdown_excludes(dir);
walkdir_markdown_files_with_excludes(dir, &excludes)
}
pub fn walkdir_markdown_files_with_excludes(dir: &Path, excludes: &[String]) -> Vec<PathBuf> {
let mut files = Vec::new();
let walker = WalkBuilder::new(dir)
.hidden(false)
.git_ignore(true)
.git_exclude(true)
.add_custom_ignore_filename(".pmatignore")
.add_custom_ignore_filename(".paimlignore")
.filter_entry(|entry| {
let name = entry
.path()
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("");
!SKIP_DIRS.contains(&name)
})
.build();
for entry in walker.flatten() {
let path = entry.path();
if !path.is_file() {
continue;
}
let is_md = path
.extension()
.and_then(|e| e.to_str())
.map(|e| matches!(e, "md" | "mdx" | "markdown"))
.unwrap_or(false);
if !is_md {
continue;
}
if path_matches_any_exclude(path, dir, excludes) {
continue;
}
files.push(path.to_path_buf());
}
files
}
fn load_markdown_excludes(dir: &Path) -> Vec<String> {
let mut out: Vec<String> = Vec::new();
let gates = dir.join(".pmat-gates.toml");
if let Ok(content) = fs::read_to_string(&gates) {
if let Ok(table) = content.parse::<toml::Table>() {
push_str_array(&mut out, table.get("exclude").and_then(|e| e.get("paths")));
push_str_array(
&mut out,
table.get("file_health").and_then(|fh| fh.get("exclude")),
);
}
}
if let Ok(cfg) = crate::models::comply_config::PmatYamlConfig::load(dir) {
for pat in &cfg.comply.thresholds.file_health_exclude {
if !out.iter().any(|p| p == pat) {
out.push(pat.clone());
}
}
}
out
}
fn push_str_array(out: &mut Vec<String>, v: Option<&toml::Value>) {
let Some(arr) = v.and_then(|x| x.as_array()) else {
return;
};
for item in arr {
if let Some(s) = item.as_str() {
let s = s.to_string();
if !out.iter().any(|p| p == &s) {
out.push(s);
}
}
}
}
fn path_matches_any_exclude(path: &Path, root: &Path, patterns: &[String]) -> bool {
if patterns.is_empty() {
return false;
}
let rel = path.strip_prefix(root).unwrap_or(path);
let rel_str = rel.to_string_lossy();
let file_name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
for pattern in patterns {
if glob_like_match(&rel_str, file_name, pattern) {
return true;
}
}
false
}
fn glob_like_match(path_str: &str, file_name: &str, pattern: &str) -> bool {
if let Some(suffix) = pattern.strip_prefix("**/") {
if suffix.ends_with("/**") {
let segment = suffix.trim_end_matches("/**");
return path_str.contains(segment);
}
if suffix.contains('*') {
return glob::Pattern::new(suffix)
.map(|p| p.matches(file_name))
.unwrap_or(false);
}
return file_name == suffix || path_str.contains(suffix);
}
if let Some(prefix) = pattern.strip_suffix("/**") {
return path_str.starts_with(prefix) || path_str.contains(&format!("/{prefix}/"));
}
if pattern.contains('/') {
return path_str.contains(pattern);
}
if pattern.contains('*') {
return glob::Pattern::new(pattern)
.map(|p| p.matches(file_name))
.unwrap_or(false);
}
file_name == pattern
}
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "path_exists")]
pub fn detect_cb900_broken_internal_link(project_path: &Path) -> Vec<CbPatternViolation> {
let files = walkdir_markdown_files(project_path);
let mut violations = Vec::new();
for file_path in &files {
let content = match fs::read_to_string(file_path) {
Ok(c) => c,
Err(_) => continue,
};
let rel = file_path
.strip_prefix(project_path)
.unwrap_or(file_path)
.display()
.to_string();
let file_dir = file_path.parent().unwrap_or(project_path);
for (i, line) in content.lines().enumerate() {
if line.trim().starts_with("```") {
continue;
}
let mut search_pos = 0;
while let Some(start) = line[search_pos..].find("](") {
let abs_start = search_pos + start + 2;
if let Some(end) = line[abs_start..].find(')') {
let link_target = &line[abs_start..abs_start + end];
if !link_target.starts_with("http")
&& !link_target.starts_with("mailto:")
&& !link_target.starts_with('#')
&& !link_target.is_empty()
{
let file_part = link_target.split('#').next().unwrap_or(link_target);
if !file_part.is_empty() {
let target_path = file_dir.join(file_part);
if !target_path.exists() {
violations.push(CbPatternViolation {
pattern_id: "CB-900".to_string(),
file: rel.clone(),
line: i + 1,
description: format!(
"Broken internal link `{}` — target does not exist",
link_target
),
severity: Severity::Warning,
});
}
}
}
search_pos = abs_start + end + 1;
} else {
break;
}
}
}
}
violations
}
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "path_exists")]
pub fn detect_cb901_heading_hierarchy_skip(project_path: &Path) -> Vec<CbPatternViolation> {
let files = walkdir_markdown_files(project_path);
let mut violations = Vec::new();
for file_path in &files {
let content = match fs::read_to_string(file_path) {
Ok(c) => c,
Err(_) => continue,
};
let rel = file_path
.strip_prefix(project_path)
.unwrap_or(file_path)
.display()
.to_string();
let mut last_level: usize = 0;
let mut in_code_block = false;
for (i, line) in content.lines().enumerate() {
let trimmed = line.trim();
if trimmed.starts_with("```") {
in_code_block = !in_code_block;
continue;
}
if in_code_block {
continue;
}
if trimmed.starts_with('#') {
let level = trimmed.chars().take_while(|c| *c == '#').count();
if (1..=6).contains(&level) {
if last_level > 0 && level > last_level + 1 {
violations.push(CbPatternViolation {
pattern_id: "CB-901".to_string(),
file: rel.clone(),
line: i + 1,
description: format!(
"Heading hierarchy skip: h{} to h{} — missing h{}",
last_level,
level,
last_level + 1
),
severity: Severity::Info,
});
}
last_level = level;
}
}
}
}
violations
}
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "path_exists")]
pub fn detect_cb902_missing_alt_text(project_path: &Path) -> Vec<CbPatternViolation> {
let files = walkdir_markdown_files(project_path);
let mut violations = Vec::new();
for file_path in &files {
let content = match fs::read_to_string(file_path) {
Ok(c) => c,
Err(_) => continue,
};
let rel = file_path
.strip_prefix(project_path)
.unwrap_or(file_path)
.display()
.to_string();
let mut in_code_block = false;
for (i, line) in content.lines().enumerate() {
let trimmed = line.trim();
if trimmed.starts_with("```") {
in_code_block = !in_code_block;
continue;
}
if in_code_block {
continue;
}
if line.contains("![]") {
violations.push(CbPatternViolation {
pattern_id: "CB-902".to_string(),
file: rel.clone(),
line: i + 1,
description:
"Image missing alt text — add descriptive text in ``"
.to_string(),
severity: Severity::Info,
});
}
}
}
violations
}
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "path_exists")]
pub fn detect_cb903_bare_url(project_path: &Path) -> Vec<CbPatternViolation> {
let files = walkdir_markdown_files(project_path);
let mut violations = Vec::new();
for file_path in &files {
let content = match fs::read_to_string(file_path) {
Ok(c) => c,
Err(_) => continue,
};
let rel = file_path
.strip_prefix(project_path)
.unwrap_or(file_path)
.display()
.to_string();
let mut in_code_block = false;
for (i, line) in content.lines().enumerate() {
let trimmed = line.trim();
if trimmed.starts_with("```") {
in_code_block = !in_code_block;
continue;
}
if in_code_block {
continue;
}
if let Some(http_pos) = line.find("http://").or_else(|| line.find("https://")) {
if http_pos > 0 {
let before = line.as_bytes()[http_pos - 1];
if before == b'(' || before == b'<' || before == b'"' || before == b'\'' {
continue;
}
}
if trimmed.starts_with('[') || trimmed.starts_with("` or angle brackets `<url>`"
.to_string(),
severity: Severity::Info,
});
}
}
}
}
violations
}
const MD_LINE_LENGTH_THRESHOLD: usize = 120;
#[provable_contracts_macros::contract("pmat-core.yaml", equation = "path_exists")]
pub fn detect_cb904_long_line(project_path: &Path) -> Vec<CbPatternViolation> {
let files = walkdir_markdown_files(project_path);
let mut violations = Vec::new();
for file_path in &files {
let content = match fs::read_to_string(file_path) {
Ok(c) => c,
Err(_) => continue,
};
let rel = file_path
.strip_prefix(project_path)
.unwrap_or(file_path)
.display()
.to_string();
let mut in_code_block = false;
for (i, line) in content.lines().enumerate() {
let trimmed = line.trim();
if trimmed.starts_with("```") {
in_code_block = !in_code_block;
continue;
}
if in_code_block {
continue;
}
if trimmed.starts_with('|') {
continue;
}
if trimmed.contains("http://") || trimmed.contains("https://") {
continue;
}
if line.len() > MD_LINE_LENGTH_THRESHOLD {
violations.push(CbPatternViolation {
pattern_id: "CB-904".to_string(),
file: rel.clone(),
line: i + 1,
description: format!(
"Line length {} exceeds {} characters",
line.len(),
MD_LINE_LENGTH_THRESHOLD
),
severity: Severity::Info,
});
}
}
}
violations
}