1use std::collections::BTreeMap;
26use std::path::{Path, PathBuf};
27use std::str::FromStr;
28use std::time::{Duration, Instant};
29
30use clap::{CommandFactory, Parser};
31
32use crate::rules::ProbeOutcome;
33use crate::walk::{self, EntryType};
34
35pub const RESERVED: &[&str] = &["index.md", "log.md"];
37
38pub fn is_reserved(file_name: &str) -> bool {
50 RESERVED.contains(&file_name)
51}
52
53#[derive(Debug, Clone, Default, PartialEq, Eq)]
57pub struct Frontmatter {
58 pub type_: Option<String>,
60 pub title: Option<String>,
62 pub description: Option<String>,
64 pub resource: Option<String>,
66 pub timestamp: Option<String>,
68 pub tags: Vec<String>,
70 pub extra: BTreeMap<String, String>,
72}
73
74#[derive(Debug, Clone, PartialEq, Eq)]
76pub struct Parsed {
77 pub fm: Frontmatter,
79 pub fm_block: String,
81 pub fm_span: (usize, usize),
83 pub body_start_line: usize,
85 pub parseable: bool,
87}
88
89pub fn parse(text: &str) -> Option<Parsed> {
114 let lines: Vec<&str> = text.split_inclusive('\n').collect();
117 let is_fence = |l: &str| l.trim_end_matches(['\n', '\r']) == "---";
118 if lines.is_empty() || !is_fence(lines[0]) {
119 return None;
120 }
121 let close = lines.iter().enumerate().skip(1).find(|(_, l)| is_fence(l));
123 let (close_idx, _) = close?;
124 let inner: String = lines[1..close_idx].concat();
125 let parseable = yaml_edit::Document::from_str(&inner).is_ok();
126 let fm = extract_fields(&inner);
127 Some(Parsed {
128 fm,
129 fm_block: inner,
130 fm_span: (1, close_idx + 1),
131 body_start_line: close_idx + 2,
132 parseable,
133 })
134}
135
136fn unquote(v: &str) -> String {
138 let v = v.trim();
139 let bytes = v.as_bytes();
140 if v.len() >= 2
141 && ((bytes[0] == b'"' && bytes[v.len() - 1] == b'"')
142 || (bytes[0] == b'\'' && bytes[v.len() - 1] == b'\''))
143 {
144 v[1..v.len() - 1].to_string()
145 } else {
146 v.to_string()
147 }
148}
149
150fn flow_items(body: &str) -> Vec<String> {
152 body.split(',')
153 .map(|s| unquote(s.trim()))
154 .filter(|s| !s.is_empty())
155 .collect()
156}
157
158fn extract_fields(inner: &str) -> Frontmatter {
163 let mut fm = Frontmatter::default();
164 let raw: Vec<&str> = inner.lines().collect();
165 let mut i = 0;
166 while i < raw.len() {
167 let line = raw[i];
168 i += 1;
169 if line.is_empty() || line.starts_with([' ', '\t']) || line.trim_start().starts_with('#') {
171 continue;
172 }
173 let Some((key, val)) = line.split_once(':') else {
174 continue;
175 };
176 let key = key.trim();
177 let val = val.trim();
178 if key == "tags" {
179 if val.is_empty() {
180 while i < raw.len() {
182 let item = raw[i];
183 let t = item.trim_start();
184 if item.starts_with([' ', '\t']) && t.starts_with('-') {
185 let v = unquote(t[1..].trim());
186 if !v.is_empty() {
187 fm.tags.push(v);
188 }
189 i += 1;
190 } else if t.is_empty() {
191 i += 1;
192 } else {
193 break;
194 }
195 }
196 } else if let Some(body) = val.strip_prefix('[').and_then(|s| s.strip_suffix(']')) {
197 fm.tags = flow_items(body);
198 } else {
199 fm.tags = flow_items(val);
201 }
202 continue;
203 }
204 let value = unquote(val);
205 match key {
206 "type" => fm.type_ = Some(value),
207 "title" => fm.title = Some(value),
208 "description" => fm.description = Some(value),
209 "resource" => fm.resource = Some(value),
210 "timestamp" => fm.timestamp = Some(value),
211 _ if !value.is_empty() => {
212 fm.extra.insert(key.to_string(), value);
213 }
214 _ => {}
215 }
216 }
217 fm
218}
219
220pub fn fm_to_json(fm: &Frontmatter) -> serde_json::Value {
225 let mut m = serde_json::Map::new();
226 if let Some(t) = &fm.type_ {
227 m.insert("type".into(), serde_json::Value::String(t.clone()));
228 }
229 if let Some(t) = &fm.title {
230 m.insert("title".into(), serde_json::Value::String(t.clone()));
231 }
232 if let Some(d) = &fm.description {
233 m.insert("description".into(), serde_json::Value::String(d.clone()));
234 }
235 if let Some(r) = &fm.resource {
236 m.insert("resource".into(), serde_json::Value::String(r.clone()));
237 }
238 if let Some(t) = &fm.timestamp {
239 m.insert("timestamp".into(), serde_json::Value::String(t.clone()));
240 }
241 if !fm.tags.is_empty() {
242 m.insert(
243 "tags".into(),
244 serde_json::Value::Array(
245 fm.tags
246 .iter()
247 .map(|t| serde_json::Value::String(t.clone()))
248 .collect(),
249 ),
250 );
251 }
252 for (k, v) in &fm.extra {
253 m.insert(k.clone(), serde_json::Value::String(v.clone()));
254 }
255 serde_json::Value::Object(m)
256}
257
258#[derive(Debug, Clone, PartialEq, Eq)]
260pub struct Link {
261 pub target: String,
263 pub absolute: bool,
265 pub line: usize,
267}
268
269pub fn links(body: &str) -> Vec<Link> {
285 let re = regex::Regex::new(r"\[[^\]]*\]\(([^)\s]+)\)").expect("static regex compiles");
287 let mut out = Vec::new();
288 for (n, line) in body.lines().enumerate() {
289 for cap in re.captures_iter(line) {
290 let target = cap[1].to_string();
291 let lower = target.to_ascii_lowercase();
292 if lower.starts_with("http://")
293 || lower.starts_with("https://")
294 || lower.starts_with("mailto:")
295 || target.starts_with('#')
296 {
297 continue;
298 }
299 out.push(Link {
300 absolute: target.starts_with('/'),
301 target,
302 line: n + 1,
303 });
304 }
305 }
306 out
307}
308
309#[derive(Debug, Clone, PartialEq, Eq)]
311pub struct Finding {
312 pub path: PathBuf,
314 pub reserved: bool,
316 pub has_frontmatter: bool,
318 pub parseable: bool,
320 pub has_type: bool,
322 pub conformant: bool,
324 pub issues: Vec<String>,
326 pub fm: Option<Frontmatter>,
328}
329
330pub fn conformance(selector: &walk::Selector) -> Result<Vec<Finding>, String> {
338 let base = &selector.base;
339 let mut findings = Vec::new();
340 for entry in selector.walk() {
341 let entry = entry.map_err(|e| e.to_string())?;
342 if !entry.file_type().is_some_and(|t| t.is_file()) {
343 continue;
344 }
345 let path = entry.path();
346 if path.extension().and_then(|e| e.to_str()) != Some("md") {
347 continue;
348 }
349 let name = path
350 .file_name()
351 .and_then(|n| n.to_str())
352 .unwrap_or_default()
353 .to_string();
354 let rel = path.strip_prefix(base).unwrap_or(path).to_path_buf();
355 let text = std::fs::read_to_string(path).map_err(|e| format!("{}: {e}", rel.display()))?;
356 let reserved = is_reserved(&name);
357 let parsed = parse(&text);
358 let mut issues = Vec::new();
359 let (has_frontmatter, parseable, has_type, fm) = match &parsed {
360 Some(p) => (
361 true,
362 p.parseable,
363 p.fm.type_.as_deref().is_some_and(|t| !t.trim().is_empty()),
364 Some(p.fm.clone()),
365 ),
366 None => (false, false, false, None),
367 };
368 if reserved {
369 if has_frontmatter && !parseable {
373 issues.push("frontmatter is not parseable YAML".to_string());
374 }
375 } else if !has_frontmatter {
376 issues.push("missing frontmatter (no leading --- fence)".to_string());
377 } else if !parseable {
378 issues.push("frontmatter is not parseable YAML".to_string());
379 } else if !has_type {
380 issues.push("frontmatter missing a non-empty `type`".to_string());
381 }
382 findings.push(Finding {
383 path: rel,
384 reserved,
385 has_frontmatter,
386 parseable,
387 has_type,
388 conformant: issues.is_empty(),
389 issues,
390 fm,
391 });
392 }
393 Ok(findings)
394}
395
396pub fn broken_links(selector: &walk::Selector) -> Result<Vec<(PathBuf, Link)>, String> {
401 let base = &selector.base;
402 let mut broken = Vec::new();
403 for entry in selector.walk() {
404 let entry = entry.map_err(|e| e.to_string())?;
405 if !entry.file_type().is_some_and(|t| t.is_file()) {
406 continue;
407 }
408 let path = entry.path();
409 if path.extension().and_then(|e| e.to_str()) != Some("md") {
410 continue;
411 }
412 let Ok(text) = std::fs::read_to_string(path) else {
413 continue;
414 };
415 let rel = path.strip_prefix(base).unwrap_or(path).to_path_buf();
416 let dir = path.parent().unwrap_or(base);
417 let body = match parse(&text) {
419 Some(p) => {
420 let start = p.body_start_line.saturating_sub(1);
421 text.lines().skip(start).collect::<Vec<_>>().join("\n")
422 }
423 None => text.clone(),
424 };
425 for link in links(&body) {
426 let target = link.target.split('#').next().unwrap_or("");
427 if target.is_empty() {
428 continue;
429 }
430 let resolved = if link.absolute {
431 base.join(target.trim_start_matches('/'))
432 } else {
433 dir.join(target)
434 };
435 if !resolved.exists() {
436 broken.push((rel.clone(), link));
437 }
438 }
439 }
440 Ok(broken)
441}
442
443pub fn today_utc() -> String {
446 let secs = std::time::SystemTime::now()
447 .duration_since(std::time::UNIX_EPOCH)
448 .map(|d| d.as_secs())
449 .unwrap_or(0);
450 let days = (secs / 86_400) as i64;
451 let z = days + 719_468;
452 let era = z.div_euclid(146_097);
453 let doe = z.rem_euclid(146_097);
454 let yoe = (doe - doe / 1460 + doe / 36_524 - doe / 146_096) / 365;
455 let y = yoe + era * 400;
456 let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
457 let mp = (5 * doy + 2) / 153;
458 let d = doy - (153 * mp + 2) / 5 + 1;
459 let m = if mp < 10 { mp + 3 } else { mp - 9 };
460 let y = if m <= 2 { y + 1 } else { y };
461 format!("{y:04}-{m:02}-{d:02}")
462}
463
464pub fn yaml_scalar(v: &str) -> String {
477 let needs_quote = v.is_empty()
478 || v != v.trim()
479 || v.starts_with(['[', '{', '#', '*', '&', '!', '|', '>', '\'', '"', '@', '`'])
480 || v.contains(": ")
481 || v.ends_with(':');
482 if needs_quote {
483 format!("\"{}\"", v.replace('\\', "\\\\").replace('"', "\\\""))
484 } else {
485 v.to_string()
486 }
487}
488
489pub fn build_concept(
493 type_: &str,
494 title: &str,
495 description: Option<&str>,
496 tags: &[String],
497 timestamp: &str,
498 body: Option<&str>,
499) -> String {
500 let mut s = format!("---\ntype: {}\n", yaml_scalar(type_));
501 s.push_str(&format!("title: {}\n", yaml_scalar(title)));
502 if let Some(d) = description {
503 s.push_str(&format!("description: {}\n", yaml_scalar(d)));
504 }
505 if !tags.is_empty() {
506 let items: Vec<String> = tags.iter().map(|t| yaml_scalar(t)).collect();
507 s.push_str(&format!("tags: [{}]\n", items.join(", ")));
508 }
509 s.push_str(&format!("timestamp: {timestamp}\n---\n\n"));
510 match body {
511 Some(b) if !b.trim().is_empty() => {
512 s.push_str(b);
513 if !b.ends_with('\n') {
514 s.push('\n');
515 }
516 }
517 _ => s.push_str(&format!("# {title}\n")),
518 }
519 s
520}
521
522pub fn set_field(text: &str, field: &str, value: &str) -> Result<(String, bool), String> {
527 let parsed = parse(text).ok_or("no frontmatter to edit")?;
528 let (start, end) = parsed.fm_span; let all: Vec<&str> = text.split_inclusive('\n').collect();
530 let inner = &all[start..end - 1];
531 let new_line = format!("{field}: {}\n", yaml_scalar(value));
532 let mut replaced = false;
533 let mut new_inner: Vec<String> = Vec::with_capacity(inner.len() + 1);
534 for line in inner {
535 let is_target = line
536 .split_once(':')
537 .is_some_and(|(k, _)| k.trim() == field && !line.starts_with([' ', '\t']));
538 if is_target && !replaced {
539 new_inner.push(new_line.clone());
540 replaced = true;
541 } else {
542 new_inner.push((*line).to_string());
543 }
544 }
545 if !replaced {
546 new_inner.push(new_line);
547 }
548 let mut out = String::with_capacity(text.len() + field.len() + value.len() + 4);
549 out.push_str(&all[..start].concat());
550 out.push_str(&new_inner.concat());
551 out.push_str(&all[end - 1..].concat());
552 Ok((out, replaced))
553}
554
555pub fn log_entry(existing: &str, today: &str, kind: &str, message: &str) -> String {
558 let bullet = format!("* **{kind}**: {message}\n");
559 let heading = format!("## {today}\n");
560 if let Some(rest) = existing.strip_prefix(&heading) {
561 format!("{heading}{bullet}{rest}")
562 } else if existing.trim().is_empty() {
563 format!("{heading}{bullet}")
564 } else {
565 format!("{heading}{bullet}\n{existing}")
566 }
567}
568
569pub fn render_index(entries: &[(String, String, String)]) -> String {
571 let mut out = String::from("# Index\n\n");
572 for (file, title, desc) in entries {
573 if desc.is_empty() {
574 out.push_str(&format!("* [{title}]({file})\n"));
575 } else {
576 out.push_str(&format!("* [{title}]({file}) - {desc}\n"));
577 }
578 }
579 out
580}
581
582pub fn md_selector(
586 base: PathBuf,
587 names: Option<Vec<regex::Regex>>,
588 hidden: bool,
589 follow: bool,
590) -> walk::Selector {
591 let names = names.or_else(|| crate::pattern::compile_name_set("*.md").ok());
592 walk::Selector {
593 base,
594 names,
595 types: vec![EntryType::F],
596 size: None,
597 hidden,
598 follow,
599 no_ignore: false,
600 }
601}
602
603#[derive(Parser, Debug)]
608#[command(
609 name = "okf",
610 about = "Assert that a directory is a conformant OKF bundle."
611)]
612struct OkfCheck {
613 #[arg(long, default_value = ".")]
615 base: PathBuf,
616 #[arg(long)]
618 name: Option<String>,
619 #[arg(long)]
621 hidden: bool,
622 #[arg(long)]
624 follow: bool,
625 #[arg(long)]
627 strict: bool,
628}
629
630pub fn check_grammar() -> crate::deps::Grammar {
632 crate::deps::grammar(OkfCheck::command())
633}
634
635pub fn check(
641 args: &[String],
642 root: &Path,
643 timeout: Option<Duration>,
644) -> (ProbeOutcome, String, String) {
645 let started = Instant::now();
646 let broken = |msg: String| (ProbeOutcome::Broken, msg, String::new());
647 let cli = match OkfCheck::try_parse_from(
648 std::iter::once("okf").chain(args.iter().map(String::as_str)),
649 ) {
650 Ok(c) => c,
651 Err(e) => {
652 let valid = check_grammar()
653 .flags
654 .iter()
655 .map(|s| format!("--{}", s.name))
656 .collect::<Vec<_>>()
657 .join(" ");
658 return broken(format!(
659 "okf: {} (valid flags: {valid})",
660 e.to_string().lines().next().unwrap_or("bad arguments")
661 ));
662 }
663 };
664
665 let names = match &cli.name {
666 Some(spec) => match crate::pattern::compile_name_set(spec) {
667 Ok(n) => Some(n),
668 Err(e) => return broken(format!("okf: invalid --name: {e}")),
669 },
670 None => None,
671 };
672 let base = root.join(&cli.base);
673 if !base.exists() {
674 return broken(format!(
675 "okf: bundle base does not exist: {}",
676 base.display()
677 ));
678 }
679 let selector = md_selector(base.clone(), names, cli.hidden, cli.follow);
680
681 let findings = match conformance(&selector) {
682 Ok(f) => f,
683 Err(e) => return broken(format!("okf: {e}")),
684 };
685 if let Some(limit) = timeout
686 && started.elapsed() >= limit
687 {
688 return broken(format!("okf: timed out after {:.1}s", limit.as_secs_f64()));
689 }
690
691 let mut report = String::new();
692 let mut violations = 0usize;
693 for f in &findings {
694 if !f.conformant {
695 violations += 1;
696 report.push_str(&format!("{}: {}\n", f.path.display(), f.issues.join("; ")));
697 }
698 }
699 let concepts = findings.iter().filter(|f| !f.reserved).count();
700
701 if cli.strict {
702 match broken_links(&selector) {
703 Ok(bl) => {
704 for (path, link) in &bl {
705 violations += 1;
706 report.push_str(&format!(
707 "{}:{}: broken link {}\n",
708 path.display(),
709 link.line,
710 link.target
711 ));
712 }
713 }
714 Err(e) => return broken(format!("okf: {e}")),
715 }
716 }
717
718 if violations == 0 {
719 (
720 ProbeOutcome::Holds,
721 format!("{concepts} concept(s) conform"),
722 report,
723 )
724 } else {
725 (
726 ProbeOutcome::Violated,
727 format!("{violations} OKF violation(s)"),
728 report.trim_end().to_string(),
729 )
730 }
731}
732
733#[cfg(test)]
734mod tests {
735 use super::*;
736
737 #[test]
738 fn parse_detects_and_extracts_frontmatter() {
739 let doc = "---\ntype: Playbook\ntitle: Onboarding\ndescription: How to onboard\nresource: bq://x\ntimestamp: 2026-01-02\ntags: [ops, hr]\nowner: jane\n---\n# Steps\nbody\n";
740 let p = parse(doc).unwrap();
741 assert_eq!(p.fm.type_.as_deref(), Some("Playbook"));
742 assert_eq!(p.fm.title.as_deref(), Some("Onboarding"));
743 assert_eq!(p.fm.description.as_deref(), Some("How to onboard"));
744 assert_eq!(p.fm.resource.as_deref(), Some("bq://x"));
745 assert_eq!(p.fm.timestamp.as_deref(), Some("2026-01-02"));
746 assert_eq!(p.fm.tags, ["ops", "hr"]);
747 assert_eq!(p.fm.extra.get("owner").map(String::as_str), Some("jane"));
748 assert!(p.parseable);
749 assert_eq!(p.fm_span, (1, 9));
750 assert_eq!(p.body_start_line, 10);
751 }
752
753 #[test]
754 fn parse_handles_block_tags_and_quotes() {
755 let doc = "---\ntype: \"BigQuery Table\"\ntags:\n - core\n - 'pii'\n---\nbody\n";
756 let p = parse(doc).unwrap();
757 assert_eq!(p.fm.type_.as_deref(), Some("BigQuery Table"));
758 assert_eq!(p.fm.tags, ["core", "pii"]);
759 }
760
761 #[test]
762 fn parse_returns_none_without_a_fence() {
763 assert!(parse("# title\nno frontmatter\n").is_none());
764 assert!(parse("").is_none());
765 assert!(parse("\n---\ntype: x\n---\n").is_none());
767 }
768
769 #[test]
770 fn unclosed_fence_is_not_frontmatter() {
771 assert!(parse("---\ntype: x\nno closing fence\n").is_none());
772 }
773
774 #[test]
775 fn reserved_files_recognised() {
776 assert!(is_reserved("index.md"));
777 assert!(is_reserved("log.md"));
778 assert!(!is_reserved("concept.md"));
779 }
780
781 #[test]
782 fn links_classifies_and_filters() {
783 let body = "[a](/tables/x.md) [b](../sibling.md) [c](https://e.test) [d](#frag) [e](mailto:x@y.z)\n";
784 let ls = links(body);
785 assert_eq!(ls.len(), 2);
786 assert_eq!(ls[0].target, "/tables/x.md");
787 assert!(ls[0].absolute);
788 assert_eq!(ls[1].target, "../sibling.md");
789 assert!(!ls[1].absolute);
790 }
791}