use std::collections::BTreeMap;
use std::path::{Path, PathBuf};
use std::str::FromStr;
use std::time::{Duration, Instant};
use clap::{CommandFactory, Parser};
use crate::rules::ProbeOutcome;
use crate::walk::{self, EntryType};
pub const RESERVED: &[&str] = &["index.md", "log.md"];
pub fn is_reserved(file_name: &str) -> bool {
RESERVED.contains(&file_name)
}
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct Frontmatter {
pub type_: Option<String>,
pub title: Option<String>,
pub description: Option<String>,
pub resource: Option<String>,
pub timestamp: Option<String>,
pub tags: Vec<String>,
pub extra: BTreeMap<String, String>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Parsed {
pub fm: Frontmatter,
pub fm_block: String,
pub fm_span: (usize, usize),
pub body_start_line: usize,
pub parseable: bool,
}
pub fn parse(text: &str) -> Option<Parsed> {
let lines: Vec<&str> = text.split_inclusive('\n').collect();
let is_fence = |l: &str| l.trim_end_matches(['\n', '\r']) == "---";
if lines.is_empty() || !is_fence(lines[0]) {
return None;
}
let close = lines.iter().enumerate().skip(1).find(|(_, l)| is_fence(l));
let (close_idx, _) = close?;
let inner: String = lines[1..close_idx].concat();
let parseable = yaml_edit::Document::from_str(&inner).is_ok();
let fm = extract_fields(&inner);
Some(Parsed {
fm,
fm_block: inner,
fm_span: (1, close_idx + 1),
body_start_line: close_idx + 2,
parseable,
})
}
fn unquote(v: &str) -> String {
let v = v.trim();
let bytes = v.as_bytes();
if v.len() >= 2
&& ((bytes[0] == b'"' && bytes[v.len() - 1] == b'"')
|| (bytes[0] == b'\'' && bytes[v.len() - 1] == b'\''))
{
v[1..v.len() - 1].to_string()
} else {
v.to_string()
}
}
fn flow_items(body: &str) -> Vec<String> {
body.split(',')
.map(|s| unquote(s.trim()))
.filter(|s| !s.is_empty())
.collect()
}
fn extract_fields(inner: &str) -> Frontmatter {
let mut fm = Frontmatter::default();
let raw: Vec<&str> = inner.lines().collect();
let mut i = 0;
while i < raw.len() {
let line = raw[i];
i += 1;
if line.is_empty() || line.starts_with([' ', '\t']) || line.trim_start().starts_with('#') {
continue;
}
let Some((key, val)) = line.split_once(':') else {
continue;
};
let key = key.trim();
let val = val.trim();
if key == "tags" {
if val.is_empty() {
while i < raw.len() {
let item = raw[i];
let t = item.trim_start();
if item.starts_with([' ', '\t']) && t.starts_with('-') {
let v = unquote(t[1..].trim());
if !v.is_empty() {
fm.tags.push(v);
}
i += 1;
} else if t.is_empty() {
i += 1;
} else {
break;
}
}
} else if let Some(body) = val.strip_prefix('[').and_then(|s| s.strip_suffix(']')) {
fm.tags = flow_items(body);
} else {
fm.tags = flow_items(val);
}
continue;
}
let value = unquote(val);
match key {
"type" => fm.type_ = Some(value),
"title" => fm.title = Some(value),
"description" => fm.description = Some(value),
"resource" => fm.resource = Some(value),
"timestamp" => fm.timestamp = Some(value),
_ if !value.is_empty() => {
fm.extra.insert(key.to_string(), value);
}
_ => {}
}
}
fm
}
pub fn fm_to_json(fm: &Frontmatter) -> serde_json::Value {
let mut m = serde_json::Map::new();
if let Some(t) = &fm.type_ {
m.insert("type".into(), serde_json::Value::String(t.clone()));
}
if let Some(t) = &fm.title {
m.insert("title".into(), serde_json::Value::String(t.clone()));
}
if let Some(d) = &fm.description {
m.insert("description".into(), serde_json::Value::String(d.clone()));
}
if let Some(r) = &fm.resource {
m.insert("resource".into(), serde_json::Value::String(r.clone()));
}
if let Some(t) = &fm.timestamp {
m.insert("timestamp".into(), serde_json::Value::String(t.clone()));
}
if !fm.tags.is_empty() {
m.insert(
"tags".into(),
serde_json::Value::Array(
fm.tags
.iter()
.map(|t| serde_json::Value::String(t.clone()))
.collect(),
),
);
}
for (k, v) in &fm.extra {
m.insert(k.clone(), serde_json::Value::String(v.clone()));
}
serde_json::Value::Object(m)
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Link {
pub target: String,
pub absolute: bool,
pub line: usize,
}
pub fn links(body: &str) -> Vec<Link> {
let re = regex::Regex::new(r"\[[^\]]*\]\(([^)\s]+)\)").expect("static regex compiles");
let mut out = Vec::new();
for (n, line) in body.lines().enumerate() {
for cap in re.captures_iter(line) {
let target = cap[1].to_string();
let lower = target.to_ascii_lowercase();
if lower.starts_with("http://")
|| lower.starts_with("https://")
|| lower.starts_with("mailto:")
|| target.starts_with('#')
{
continue;
}
out.push(Link {
absolute: target.starts_with('/'),
target,
line: n + 1,
});
}
}
out
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Finding {
pub path: PathBuf,
pub reserved: bool,
pub has_frontmatter: bool,
pub parseable: bool,
pub has_type: bool,
pub conformant: bool,
pub issues: Vec<String>,
pub fm: Option<Frontmatter>,
}
pub fn conformance(selector: &walk::Selector) -> Result<Vec<Finding>, String> {
let base = &selector.base;
let mut findings = Vec::new();
for entry in selector.walk() {
let entry = entry.map_err(|e| e.to_string())?;
if !entry.file_type().is_some_and(|t| t.is_file()) {
continue;
}
let path = entry.path();
if path.extension().and_then(|e| e.to_str()) != Some("md") {
continue;
}
let name = path
.file_name()
.and_then(|n| n.to_str())
.unwrap_or_default()
.to_string();
let rel = path.strip_prefix(base).unwrap_or(path).to_path_buf();
let text = std::fs::read_to_string(path).map_err(|e| format!("{}: {e}", rel.display()))?;
let reserved = is_reserved(&name);
let parsed = parse(&text);
let mut issues = Vec::new();
let (has_frontmatter, parseable, has_type, fm) = match &parsed {
Some(p) => (
true,
p.parseable,
p.fm.type_.as_deref().is_some_and(|t| !t.trim().is_empty()),
Some(p.fm.clone()),
),
None => (false, false, false, None),
};
if reserved {
if has_frontmatter && !parseable {
issues.push("frontmatter is not parseable YAML".to_string());
}
} else if !has_frontmatter {
issues.push("missing frontmatter (no leading --- fence)".to_string());
} else if !parseable {
issues.push("frontmatter is not parseable YAML".to_string());
} else if !has_type {
issues.push("frontmatter missing a non-empty `type`".to_string());
}
findings.push(Finding {
path: rel,
reserved,
has_frontmatter,
parseable,
has_type,
conformant: issues.is_empty(),
issues,
fm,
});
}
Ok(findings)
}
pub fn broken_links(selector: &walk::Selector) -> Result<Vec<(PathBuf, Link)>, String> {
let base = &selector.base;
let mut broken = Vec::new();
for entry in selector.walk() {
let entry = entry.map_err(|e| e.to_string())?;
if !entry.file_type().is_some_and(|t| t.is_file()) {
continue;
}
let path = entry.path();
if path.extension().and_then(|e| e.to_str()) != Some("md") {
continue;
}
let Ok(text) = std::fs::read_to_string(path) else {
continue;
};
let rel = path.strip_prefix(base).unwrap_or(path).to_path_buf();
let dir = path.parent().unwrap_or(base);
let body = match parse(&text) {
Some(p) => {
let start = p.body_start_line.saturating_sub(1);
text.lines().skip(start).collect::<Vec<_>>().join("\n")
}
None => text.clone(),
};
for link in links(&body) {
let target = link.target.split('#').next().unwrap_or("");
if target.is_empty() {
continue;
}
let resolved = if link.absolute {
base.join(target.trim_start_matches('/'))
} else {
dir.join(target)
};
if !resolved.exists() {
broken.push((rel.clone(), link));
}
}
}
Ok(broken)
}
pub fn today_utc() -> String {
let secs = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_secs())
.unwrap_or(0);
let days = (secs / 86_400) as i64;
let z = days + 719_468;
let era = z.div_euclid(146_097);
let doe = z.rem_euclid(146_097);
let yoe = (doe - doe / 1460 + doe / 36_524 - doe / 146_096) / 365;
let y = yoe + era * 400;
let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
let mp = (5 * doy + 2) / 153;
let d = doy - (153 * mp + 2) / 5 + 1;
let m = if mp < 10 { mp + 3 } else { mp - 9 };
let y = if m <= 2 { y + 1 } else { y };
format!("{y:04}-{m:02}-{d:02}")
}
pub fn yaml_scalar(v: &str) -> String {
let needs_quote = v.is_empty()
|| v != v.trim()
|| v.starts_with(['[', '{', '#', '*', '&', '!', '|', '>', '\'', '"', '@', '`'])
|| v.contains(": ")
|| v.ends_with(':');
if needs_quote {
format!("\"{}\"", v.replace('\\', "\\\\").replace('"', "\\\""))
} else {
v.to_string()
}
}
pub fn build_concept(
type_: &str,
title: &str,
description: Option<&str>,
tags: &[String],
timestamp: &str,
body: Option<&str>,
) -> String {
let mut s = format!("---\ntype: {}\n", yaml_scalar(type_));
s.push_str(&format!("title: {}\n", yaml_scalar(title)));
if let Some(d) = description {
s.push_str(&format!("description: {}\n", yaml_scalar(d)));
}
if !tags.is_empty() {
let items: Vec<String> = tags.iter().map(|t| yaml_scalar(t)).collect();
s.push_str(&format!("tags: [{}]\n", items.join(", ")));
}
s.push_str(&format!("timestamp: {timestamp}\n---\n\n"));
match body {
Some(b) if !b.trim().is_empty() => {
s.push_str(b);
if !b.ends_with('\n') {
s.push('\n');
}
}
_ => s.push_str(&format!("# {title}\n")),
}
s
}
pub fn set_field(text: &str, field: &str, value: &str) -> Result<(String, bool), String> {
let parsed = parse(text).ok_or("no frontmatter to edit")?;
let (start, end) = parsed.fm_span; let all: Vec<&str> = text.split_inclusive('\n').collect();
let inner = &all[start..end - 1];
let new_line = format!("{field}: {}\n", yaml_scalar(value));
let mut replaced = false;
let mut new_inner: Vec<String> = Vec::with_capacity(inner.len() + 1);
for line in inner {
let is_target = line
.split_once(':')
.is_some_and(|(k, _)| k.trim() == field && !line.starts_with([' ', '\t']));
if is_target && !replaced {
new_inner.push(new_line.clone());
replaced = true;
} else {
new_inner.push((*line).to_string());
}
}
if !replaced {
new_inner.push(new_line);
}
let mut out = String::with_capacity(text.len() + field.len() + value.len() + 4);
out.push_str(&all[..start].concat());
out.push_str(&new_inner.concat());
out.push_str(&all[end - 1..].concat());
Ok((out, replaced))
}
pub fn log_entry(existing: &str, today: &str, kind: &str, message: &str) -> String {
let bullet = format!("* **{kind}**: {message}\n");
let heading = format!("## {today}\n");
if let Some(rest) = existing.strip_prefix(&heading) {
format!("{heading}{bullet}{rest}")
} else if existing.trim().is_empty() {
format!("{heading}{bullet}")
} else {
format!("{heading}{bullet}\n{existing}")
}
}
pub fn render_index(entries: &[(String, String, String)]) -> String {
let mut out = String::from("# Index\n\n");
for (file, title, desc) in entries {
if desc.is_empty() {
out.push_str(&format!("* [{title}]({file})\n"));
} else {
out.push_str(&format!("* [{title}]({file}) - {desc}\n"));
}
}
out
}
pub fn md_selector(
base: PathBuf,
names: Option<Vec<regex::Regex>>,
hidden: bool,
follow: bool,
) -> walk::Selector {
let names = names.or_else(|| crate::pattern::compile_name_set("*.md").ok());
walk::Selector {
base,
names,
types: vec![EntryType::F],
size: None,
hidden,
follow,
no_ignore: false,
}
}
#[derive(Parser, Debug)]
#[command(
name = "okf",
about = "Assert that a directory is a conformant OKF bundle."
)]
struct OkfCheck {
#[arg(long, default_value = ".")]
base: PathBuf,
#[arg(long)]
name: Option<String>,
#[arg(long)]
hidden: bool,
#[arg(long)]
follow: bool,
#[arg(long)]
strict: bool,
}
pub fn check_grammar() -> crate::deps::Grammar {
crate::deps::grammar(OkfCheck::command())
}
pub fn check(
args: &[String],
root: &Path,
timeout: Option<Duration>,
) -> (ProbeOutcome, String, String) {
let started = Instant::now();
let broken = |msg: String| (ProbeOutcome::Broken, msg, String::new());
let cli = match OkfCheck::try_parse_from(
std::iter::once("okf").chain(args.iter().map(String::as_str)),
) {
Ok(c) => c,
Err(e) => {
let valid = check_grammar()
.flags
.iter()
.map(|s| format!("--{}", s.name))
.collect::<Vec<_>>()
.join(" ");
return broken(format!(
"okf: {} (valid flags: {valid})",
e.to_string().lines().next().unwrap_or("bad arguments")
));
}
};
let names = match &cli.name {
Some(spec) => match crate::pattern::compile_name_set(spec) {
Ok(n) => Some(n),
Err(e) => return broken(format!("okf: invalid --name: {e}")),
},
None => None,
};
let base = root.join(&cli.base);
if !base.exists() {
return broken(format!(
"okf: bundle base does not exist: {}",
base.display()
));
}
let selector = md_selector(base.clone(), names, cli.hidden, cli.follow);
let findings = match conformance(&selector) {
Ok(f) => f,
Err(e) => return broken(format!("okf: {e}")),
};
if let Some(limit) = timeout
&& started.elapsed() >= limit
{
return broken(format!("okf: timed out after {:.1}s", limit.as_secs_f64()));
}
let mut report = String::new();
let mut violations = 0usize;
for f in &findings {
if !f.conformant {
violations += 1;
report.push_str(&format!("{}: {}\n", f.path.display(), f.issues.join("; ")));
}
}
let concepts = findings.iter().filter(|f| !f.reserved).count();
if cli.strict {
match broken_links(&selector) {
Ok(bl) => {
for (path, link) in &bl {
violations += 1;
report.push_str(&format!(
"{}:{}: broken link {}\n",
path.display(),
link.line,
link.target
));
}
}
Err(e) => return broken(format!("okf: {e}")),
}
}
if violations == 0 {
(
ProbeOutcome::Holds,
format!("{concepts} concept(s) conform"),
report,
)
} else {
(
ProbeOutcome::Violated,
format!("{violations} OKF violation(s)"),
report.trim_end().to_string(),
)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_detects_and_extracts_frontmatter() {
let doc = "---\ntype: Playbook\ntitle: Onboarding\ndescription: How to onboard\nresource: bq://x\ntimestamp: 2026-01-02\ntags: [ops, hr]\nowner: jane\n---\n# Steps\nbody\n";
let p = parse(doc).unwrap();
assert_eq!(p.fm.type_.as_deref(), Some("Playbook"));
assert_eq!(p.fm.title.as_deref(), Some("Onboarding"));
assert_eq!(p.fm.description.as_deref(), Some("How to onboard"));
assert_eq!(p.fm.resource.as_deref(), Some("bq://x"));
assert_eq!(p.fm.timestamp.as_deref(), Some("2026-01-02"));
assert_eq!(p.fm.tags, ["ops", "hr"]);
assert_eq!(p.fm.extra.get("owner").map(String::as_str), Some("jane"));
assert!(p.parseable);
assert_eq!(p.fm_span, (1, 9));
assert_eq!(p.body_start_line, 10);
}
#[test]
fn parse_handles_block_tags_and_quotes() {
let doc = "---\ntype: \"BigQuery Table\"\ntags:\n - core\n - 'pii'\n---\nbody\n";
let p = parse(doc).unwrap();
assert_eq!(p.fm.type_.as_deref(), Some("BigQuery Table"));
assert_eq!(p.fm.tags, ["core", "pii"]);
}
#[test]
fn parse_returns_none_without_a_fence() {
assert!(parse("# title\nno frontmatter\n").is_none());
assert!(parse("").is_none());
assert!(parse("\n---\ntype: x\n---\n").is_none());
}
#[test]
fn unclosed_fence_is_not_frontmatter() {
assert!(parse("---\ntype: x\nno closing fence\n").is_none());
}
#[test]
fn reserved_files_recognised() {
assert!(is_reserved("index.md"));
assert!(is_reserved("log.md"));
assert!(!is_reserved("concept.md"));
}
#[test]
fn links_classifies_and_filters() {
let body = "[a](/tables/x.md) [b](../sibling.md) [c](https://e.test) [d](#frag) [e](mailto:x@y.z)\n";
let ls = links(body);
assert_eq!(ls.len(), 2);
assert_eq!(ls[0].target, "/tables/x.md");
assert!(ls[0].absolute);
assert_eq!(ls[1].target, "../sibling.md");
assert!(!ls[1].absolute);
}
}