use once_cell::sync::Lazy;
use regex::Regex;
use crate::model::{Kind, Project, Requirement, Status};
#[derive(Debug, Clone, serde::Serialize)]
pub struct Finding {
pub error: bool,
pub field: &'static str,
pub rule_code: &'static str,
pub message: String,
}
impl Finding {
fn err(code: &'static str, field: &'static str, message: impl Into<String>) -> Self {
Self {
error: true,
field,
rule_code: code,
message: message.into(),
}
}
fn warn(code: &'static str, field: &'static str, message: impl Into<String>) -> Self {
Self {
error: false,
field,
rule_code: code,
message: message.into(),
}
}
}
#[allow(dead_code)]
pub const RULES: &[(&str, &str)] = &[
("REQ-V-0001", "title is required"),
("REQ-V-0002", "title is too short (min 5 characters)"),
("REQ-V-0003", "title is too long (max 120 characters)"),
("REQ-V-0004", "title ends with a period (warn)"),
("REQ-V-0005", "statement is required"),
(
"REQ-V-0006",
"statement must be a complete sentence (>=5 words)",
),
("REQ-V-0007", "statement is too long (>80 words, warn)"),
(
"REQ-V-0008",
"statement must contain a normative modal verb",
),
("REQ-V-0009", "statement contains a weasel word (warn)"),
("REQ-V-0010", "statement looks compound (warn)"),
("REQ-V-0011", "statement must not be a question"),
("REQ-V-0012", "rationale is required"),
("REQ-V-0013", "rationale is very short (warn)"),
(
"REQ-V-0014",
"functional requirement is missing acceptance criteria",
),
("REQ-V-0015", "acceptance criterion is too vague (warn)"),
("REQ-V-0016", "link target does not exist"),
("REQ-V-0017", "self-link not allowed"),
(
"REQ-V-0018",
"status requires acceptance for functional requirement",
),
(
"REQ-V-0019",
"verifies-link source has no test record (verification claim without evidence)",
),
(
"REQ-V-0020",
"duplicate-intent: another non-obsolete requirement is semantically very similar",
),
(
"REQ-V-0021",
"link cycle detected (graph-level; one finding per cycle)",
),
(
"REQ-V-0022",
"statement stacks uncertainty hedges (perhaps, probably, maybe, possibly, might) (warn)",
),
(
"REQ-V-0023",
"external statement-quality hook flagged this requirement (opt-in via REQ_VALIDATE_LLM_CMD)",
),
];
static HEDGE_WORDS: &[&str] = &[
"perhaps",
"probably",
"maybe",
"possibly",
"might",
"roughly",
"potentially",
];
static WEASEL_WORDS: &[&str] = &[
"etc",
"and/or",
"user-friendly",
"easy to use",
"robust",
"fast",
"efficient",
"flexible",
"approximately",
"as appropriate",
"if possible",
"tbd",
"to be determined",
"various",
"some",
"many",
"few",
"minimal",
"maximal",
"state-of-the-art",
"seamless",
];
static MODAL_RE: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?i)\b(shall|must|should|will)\b").unwrap());
static URL_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)\b[a-z][a-z0-9+.-]*://\S+").unwrap());
static BACKTICK_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"`[^`]*`").unwrap());
fn strip_non_prose(s: &str) -> String {
let no_urls = URL_RE.replace_all(s, " ");
let no_code = BACKTICK_RE.replace_all(&no_urls, " ");
no_code.into_owned()
}
pub fn validate_requirement(r: &Requirement) -> Vec<Finding> {
let mut out = Vec::new();
let title = r.title.trim();
let title_chars = title.chars().count();
if title.is_empty() {
out.push(Finding::err("REQ-V-0001", "title", "title is required"));
} else if title_chars < 5 {
out.push(Finding::err(
"REQ-V-0002",
"title",
"title is too short (min 5 characters)",
));
} else if title_chars > 120 {
out.push(Finding::err(
"REQ-V-0003",
"title",
"title is too long (max 120 characters)",
));
}
if title.ends_with('.') {
out.push(Finding::warn(
"REQ-V-0004",
"title",
"drop the trailing period — titles are not sentences",
));
}
let stmt = r.statement.trim();
if stmt.is_empty() {
out.push(Finding::err(
"REQ-V-0005",
"statement",
"statement is required",
));
} else {
let words = stmt.split_whitespace().count();
if words < 5 {
out.push(Finding::err(
"REQ-V-0006",
"statement",
"statement must be a complete sentence (>=5 words)",
));
}
if words > 80 {
out.push(Finding::warn(
"REQ-V-0007",
"statement",
format!(
"statement is {} words long — split into atomic requirements",
words
),
));
}
let prose = strip_non_prose(stmt);
if !MODAL_RE.is_match(&prose) {
out.push(Finding::err(
"REQ-V-0008",
"statement",
"statement must contain a normative modal verb (shall / must / should / will)",
));
}
let prose_lower = prose.to_lowercase();
for w in WEASEL_WORDS {
if prose_lower.contains(w) {
out.push(Finding::warn(
"REQ-V-0009",
"statement",
format!(
"avoid the vague term '{}': prefer a measurable criterion",
w
),
));
}
}
let modal_hits = MODAL_RE.find_iter(&prose).count();
let and_joins = prose.to_lowercase().matches(" and ").count();
let comma_count = prose.matches(',').count();
let looks_enumeration = and_joins == 1 && comma_count >= 2;
let looks_compound =
prose.contains(';') || modal_hits > 1 || (and_joins >= 2 && !looks_enumeration);
if looks_compound {
out.push(Finding::warn(
"REQ-V-0010",
"statement",
"statement looks compound — split into atomic requirements",
));
}
let hedge_hits = HEDGE_WORDS
.iter()
.filter(|w| prose_lower.contains(*w))
.count();
if hedge_hits >= 2 {
out.push(Finding::warn(
"REQ-V-0022",
"statement",
"statement stacks uncertainty hedges — commit to a concrete behaviour",
));
}
if stmt.contains('?') {
out.push(Finding::err(
"REQ-V-0011",
"statement",
"statement must not be a question",
));
}
}
if r.rationale.trim().is_empty() {
out.push(Finding::err(
"REQ-V-0012",
"rationale",
"rationale is required — explain WHY",
));
} else if r.rationale.split_whitespace().count() < 3 {
out.push(Finding::warn(
"REQ-V-0013",
"rationale",
"rationale is very short",
));
}
if matches!(r.kind, Kind::Functional) && r.acceptance.is_empty() {
out.push(Finding::err(
"REQ-V-0014",
"acceptance",
"functional requirements need at least one acceptance criterion",
));
}
for (i, ac) in r.acceptance.iter().enumerate() {
if ac.split_whitespace().count() < 3 {
out.push(Finding::warn(
"REQ-V-0015",
"acceptance",
format!("acceptance #{} is too vague to verify", i + 1),
));
}
}
out
}
pub const DUP_INTENT_THRESHOLD: f64 = 0.65;
fn token_set(s: &str) -> std::collections::HashSet<String> {
use once_cell::sync::Lazy;
use regex::Regex;
static STOP: Lazy<std::collections::HashSet<&'static str>> = Lazy::new(|| {
[
"the", "a", "an", "and", "or", "of", "to", "for", "on", "in", "is", "be", "by", "with",
"as", "that", "this", "shall", "must", "should", "will", "system", "cli",
]
.iter()
.copied()
.collect()
});
static WORD_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"[a-z0-9]+").unwrap());
let lower = s.to_lowercase();
WORD_RE
.find_iter(&lower)
.map(|m| m.as_str().to_string())
.filter(|w| w.len() > 2 && !STOP.contains(w.as_str()))
.collect()
}
fn jaccard(a: &std::collections::HashSet<String>, b: &std::collections::HashSet<String>) -> f64 {
if a.is_empty() && b.is_empty() {
return 0.0;
}
let inter = a.intersection(b).count() as f64;
let union = a.union(b).count() as f64;
inter / union
}
pub fn validate_project(p: &Project) -> Vec<(String, Vec<Finding>)> {
let mut out = Vec::new();
let active: Vec<(&String, std::collections::HashSet<String>)> = p
.requirements
.iter()
.filter(|(_, r)| !matches!(r.status, crate::model::Status::Obsolete))
.map(|(id, r)| (id, token_set(&format!("{} {}", r.title, r.statement))))
.collect();
for (id, r) in &p.requirements {
let mut findings = validate_requirement(r);
if matches!(r.status, crate::model::Status::Obsolete) {
findings.retain(|f| f.error);
}
if !matches!(r.status, crate::model::Status::Obsolete) {
let my_tokens: Option<&std::collections::HashSet<String>> = active
.iter()
.find_map(|(aid, ts)| if *aid == id { Some(ts) } else { None });
if let Some(my) = my_tokens {
for (other_id, other_tokens) in &active {
if *other_id == id {
continue;
}
if id.as_str() > other_id.as_str() {
continue;
}
let sim = jaccard(my, other_tokens);
if sim >= DUP_INTENT_THRESHOLD {
findings.push(Finding::warn(
"REQ-V-0020",
"statement",
format!(
"duplicate-intent: {} overlaps {} at {:.0}% similarity",
id,
other_id,
sim * 100.0
),
));
}
}
}
}
for link in &r.links {
if !p.requirements.contains_key(&link.target) {
findings.push(Finding::err(
"REQ-V-0016",
"links",
format!("link target {} does not exist", link.target),
));
} else if link.target == r.id {
findings.push(Finding::err(
"REQ-V-0017",
"links",
"self-link is not allowed",
));
}
if matches!(link.kind, crate::model::LinkKind::Verifies) && r.tests.is_empty() {
findings.push(Finding::warn(
"REQ-V-0019",
"links",
format!(
"verifies → {} but {} has no test records",
link.target, r.id
),
));
}
}
if matches!(
r.status,
Status::Approved | Status::Implemented | Status::Verified
) && r.acceptance.is_empty()
&& matches!(r.kind, Kind::Functional)
{
findings.push(Finding::err(
"REQ-V-0018",
"status",
"cannot be approved/implemented/verified without acceptance criteria",
));
}
if !findings.is_empty() {
out.push((id.clone(), findings));
}
}
if let Ok(cmd) = std::env::var("REQ_VALIDATE_LLM_CMD") {
let trimmed = cmd.trim();
if !trimmed.is_empty() {
for (id, r) in &p.requirements {
if matches!(r.status, crate::model::Status::Obsolete) {
continue;
}
let payload = serde_json::json!({
"id": id,
"title": r.title,
"statement": r.statement,
"rationale": r.rationale,
});
let outcome = run_llm_hook(trimmed, &payload.to_string());
match outcome {
Ok(verdict) => {
if verdict.0 {
continue;
}
let finding = Finding::warn(
"REQ-V-0023",
"statement",
format!("LLM hook flagged: {}", verdict.1),
);
if let Some((_, existing)) = out.iter_mut().find(|(rid, _)| rid == id) {
existing.push(finding);
} else {
out.push((id.clone(), vec![finding]));
}
}
Err(e) => {
let finding = Finding::warn(
"REQ-V-0023",
"statement",
format!("LLM hook unavailable: {}", e),
);
if let Some((_, existing)) = out.iter_mut().find(|(rid, _)| rid == id) {
existing.push(finding);
} else {
out.push((id.clone(), vec![finding]));
}
}
}
}
}
}
for kind in [
crate::model::LinkKind::Parent,
crate::model::LinkKind::DependsOn,
crate::model::LinkKind::Refines,
crate::model::LinkKind::Verifies,
] {
let cycles = find_cycles(p, kind);
for cycle in cycles {
let owner = cycle
.iter()
.min()
.cloned()
.unwrap_or_else(|| cycle[0].clone());
let path = cycle.join(" -> ");
let finding = Finding::err(
"REQ-V-0021",
"links",
format!("{} cycle: {} -> {}", kind.as_str(), path, cycle[0]),
);
if let Some((_, existing)) = out.iter_mut().find(|(rid, _)| *rid == owner) {
existing.push(finding);
} else {
out.push((owner, vec![finding]));
}
}
}
out
}
fn run_llm_hook(cmd: &str, payload: &str) -> Result<(bool, String), String> {
use std::io::Write;
use std::process::{Command, Stdio};
use std::time::{Duration, Instant};
let (shell, flag) = if cfg!(windows) {
("cmd", "/C")
} else {
("sh", "-c")
};
let mut child = Command::new(shell)
.args([flag, cmd])
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()
.map_err(|e| format!("spawn: {}", e))?;
{
let mut stdin = child.stdin.take().ok_or("stdin unavailable")?;
stdin
.write_all(payload.as_bytes())
.map_err(|e| format!("write: {}", e))?;
drop(stdin);
}
let deadline = Instant::now() + Duration::from_secs(10);
loop {
match child.try_wait() {
Ok(Some(_)) => break,
Ok(None) => {
if Instant::now() >= deadline {
let _ = child.kill();
return Err("timed out after 10s".to_string());
}
std::thread::sleep(Duration::from_millis(50));
}
Err(e) => return Err(format!("wait: {}", e)),
}
}
let out = child
.wait_with_output()
.map_err(|e| format!("wait: {}", e))?;
if !out.status.success() {
return Err(format!(
"hook exited non-zero: {}",
String::from_utf8_lossy(&out.stderr).trim()
));
}
let body = String::from_utf8_lossy(&out.stdout);
let v: serde_json::Value =
serde_json::from_str(body.trim()).map_err(|e| format!("parse json: {}", e))?;
let ok = v["ok"].as_bool().ok_or("missing 'ok' boolean")?;
let message = v["message"].as_str().unwrap_or("").to_string();
Ok((ok, message))
}
fn find_cycles(p: &Project, kind: crate::model::LinkKind) -> Vec<Vec<String>> {
use std::collections::BTreeSet;
let mut seen: BTreeSet<Vec<String>> = BTreeSet::new();
for start in p.requirements.keys() {
let mut current = start.clone();
let mut path: Vec<String> = Vec::new();
loop {
if let Some(pos) = path.iter().position(|x| x == ¤t) {
let cycle = path[pos..].to_vec();
let mut canonical = cycle.clone();
if let Some(min_pos) = canonical
.iter()
.enumerate()
.min_by_key(|(_, v)| (*v).clone())
.map(|(i, _)| i)
{
canonical.rotate_left(min_pos);
}
seen.insert(canonical);
break;
}
path.push(current.clone());
let next = p.requirements.get(¤t).and_then(|r| {
r.links
.iter()
.find(|l| l.kind == kind)
.map(|l| l.target.clone())
});
match next {
Some(n) if p.requirements.contains_key(&n) => current = n,
_ => break,
}
}
}
seen.into_iter().collect()
}
pub fn errors_only(findings: &[Finding]) -> Vec<&Finding> {
findings.iter().filter(|f| f.error).collect()
}