Skip to main content

coding_tools/
okf.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2026 Jonathan Shook
3
4//! Open Knowledge Format (OKF) support shared across the suite.
5//!
6//! [OKF v0.1](https://github.com/GoogleCloudPlatform/knowledge-catalog/blob/main/okf/SPEC.md)
7//! is a minimal standard for knowledge corpora: a directory tree of Markdown
8//! *concept* files, each carrying a leading YAML **frontmatter** block (one
9//! required field, `type`; recommended `title`/`description`/`resource`/`tags`/
10//! `timestamp`), plus reserved `index.md` (a directory listing) and `log.md` (a
11//! change history). Cross-links are ordinary Markdown links, either
12//! bundle-relative (`/tables/customers.md`) or document-relative.
13//!
14//! This module is the single home for the format: frontmatter detection and
15//! field extraction ([`parse`]), reserved-file recognition ([`is_reserved`]),
16//! cross-link extraction ([`links`]), bundle conformance ([`conformance`]) and
17//! broken-link detection ([`broken_links`]). It is reused by `ct-okf`, by the
18//! OKF-awareness added to `ct-search`/`ct-tree`/`ct-view`/`ct-outline`, and by
19//! the `okf` built-in check ([`check`]).
20//!
21//! Conformance is deliberately permissive (per the spec): a non-reserved `.md`
22//! conforms when it has parseable frontmatter carrying a non-empty `type`;
23//! unknown keys, unknown types, and broken links are tolerated, never fatal.
24
25use std::collections::BTreeMap;
26use std::path::{Path, PathBuf};
27use std::str::FromStr;
28use std::time::{Duration, Instant};
29
30use clap::{CommandFactory, Parser};
31
32use crate::rules::ProbeOutcome;
33use crate::walk::{self, EntryType};
34
35/// Reserved file names with defined structural roles; never concept documents.
36pub const RESERVED: &[&str] = &["index.md", "log.md"];
37
38/// Whether `file_name` is an OKF reserved file (`index.md` / `log.md`).
39///
40/// # Examples
41///
42/// ```
43/// use coding_tools::okf::is_reserved;
44///
45/// assert!(is_reserved("index.md"));
46/// assert!(is_reserved("log.md"));
47/// assert!(!is_reserved("customers.md"));
48/// ```
49pub fn is_reserved(file_name: &str) -> bool {
50    RESERVED.contains(&file_name)
51}
52
53/// The recognised frontmatter fields, extracted from a concept's YAML block.
54/// Unknown keys are preserved in [`extra`](Frontmatter::extra), as the spec
55/// requires consumers to tolerate and round-trip them.
56#[derive(Debug, Clone, Default, PartialEq, Eq)]
57pub struct Frontmatter {
58    /// The required `type` — the concept kind (e.g. `BigQuery Table`).
59    pub type_: Option<String>,
60    /// Human-readable name.
61    pub title: Option<String>,
62    /// One-sentence summary.
63    pub description: Option<String>,
64    /// URI of the underlying asset.
65    pub resource: Option<String>,
66    /// ISO 8601 last-modified datetime.
67    pub timestamp: Option<String>,
68    /// Cross-cutting tags.
69    pub tags: Vec<String>,
70    /// Any other scalar keys, preserved verbatim.
71    pub extra: BTreeMap<String, String>,
72}
73
74/// A concept's parsed frontmatter and where it sits in the file.
75#[derive(Debug, Clone, PartialEq, Eq)]
76pub struct Parsed {
77    /// The extracted, recognised fields.
78    pub fm: Frontmatter,
79    /// The inner YAML text between the `---` fences (fences excluded).
80    pub fm_block: String,
81    /// 1-based inclusive line span of the whole block, fences included.
82    pub fm_span: (usize, usize),
83    /// 1-based line where the Markdown body begins (after the closing fence).
84    pub body_start_line: usize,
85    /// Whether the inner block parses as YAML (per `yaml-edit`).
86    pub parseable: bool,
87}
88
89/// Parse a concept's leading frontmatter, or `None` when the text does not open
90/// with a `---` fence (the only frontmatter form OKF defines).
91///
92/// Recognised scalar fields are typed; `tags` accepts both the flow form
93/// (`tags: [a, b]`) and the block form (`tags:` then `- a`). [`parseable`] is
94/// set by handing the inner block to `yaml-edit`, which is what conformance
95/// checks for; field extraction itself is lenient.
96///
97/// [`parseable`]: Parsed::parseable
98///
99/// # Examples
100///
101/// ```
102/// use coding_tools::okf::parse;
103///
104/// let doc = "---\ntype: Playbook\ntitle: Onboarding\ntags: [ops, hr]\n---\n# Steps\n";
105/// let p = parse(doc).unwrap();
106/// assert_eq!(p.fm.type_.as_deref(), Some("Playbook"));
107/// assert_eq!(p.fm.tags, ["ops", "hr"]);
108/// assert!(p.parseable);
109/// assert_eq!(p.body_start_line, 6);
110///
111/// assert!(parse("# no frontmatter here\n").is_none());
112/// ```
113pub fn parse(text: &str) -> Option<Parsed> {
114    // The file must open with a fence line that is exactly `---` (allowing a
115    // trailing CR). A leading BOM or blank line means "no frontmatter".
116    let lines: Vec<&str> = text.split_inclusive('\n').collect();
117    let is_fence = |l: &str| l.trim_end_matches(['\n', '\r']) == "---";
118    if lines.is_empty() || !is_fence(lines[0]) {
119        return None;
120    }
121    // Find the closing fence.
122    let close = lines.iter().enumerate().skip(1).find(|(_, l)| is_fence(l));
123    let (close_idx, _) = close?;
124    let inner: String = lines[1..close_idx].concat();
125    let parseable = yaml_edit::Document::from_str(&inner).is_ok();
126    let fm = extract_fields(&inner);
127    Some(Parsed {
128        fm,
129        fm_block: inner,
130        fm_span: (1, close_idx + 1),
131        body_start_line: close_idx + 2,
132        parseable,
133    })
134}
135
136/// Strip one layer of matching single/double quotes from a YAML scalar.
137fn unquote(v: &str) -> String {
138    let v = v.trim();
139    let bytes = v.as_bytes();
140    if v.len() >= 2
141        && ((bytes[0] == b'"' && bytes[v.len() - 1] == b'"')
142            || (bytes[0] == b'\'' && bytes[v.len() - 1] == b'\''))
143    {
144        v[1..v.len() - 1].to_string()
145    } else {
146        v.to_string()
147    }
148}
149
150/// Parse a flow sequence body (`a, b, "c"`) — the inside of `[...]`.
151fn flow_items(body: &str) -> Vec<String> {
152    body.split(',')
153        .map(|s| unquote(s.trim()))
154        .filter(|s| !s.is_empty())
155        .collect()
156}
157
158/// The lenient field reader behind [`parse`]: top-level `key: value` scalars and
159/// a `tags` sequence (flow or block). Indented continuation lines that are not
160/// block sequence items are ignored, so nested mappings never corrupt the
161/// recognised fields.
162fn extract_fields(inner: &str) -> Frontmatter {
163    let mut fm = Frontmatter::default();
164    let raw: Vec<&str> = inner.lines().collect();
165    let mut i = 0;
166    while i < raw.len() {
167        let line = raw[i];
168        i += 1;
169        // Only top-level keys (no leading whitespace) define fields.
170        if line.is_empty() || line.starts_with([' ', '\t']) || line.trim_start().starts_with('#') {
171            continue;
172        }
173        let Some((key, val)) = line.split_once(':') else {
174            continue;
175        };
176        let key = key.trim();
177        let val = val.trim();
178        if key == "tags" {
179            if val.is_empty() {
180                // Block form: collect following `- item` lines.
181                while i < raw.len() {
182                    let item = raw[i];
183                    let t = item.trim_start();
184                    if item.starts_with([' ', '\t']) && t.starts_with('-') {
185                        let v = unquote(t[1..].trim());
186                        if !v.is_empty() {
187                            fm.tags.push(v);
188                        }
189                        i += 1;
190                    } else if t.is_empty() {
191                        i += 1;
192                    } else {
193                        break;
194                    }
195                }
196            } else if let Some(body) = val.strip_prefix('[').and_then(|s| s.strip_suffix(']')) {
197                fm.tags = flow_items(body);
198            } else {
199                // A single bare value.
200                fm.tags = flow_items(val);
201            }
202            continue;
203        }
204        let value = unquote(val);
205        match key {
206            "type" => fm.type_ = Some(value),
207            "title" => fm.title = Some(value),
208            "description" => fm.description = Some(value),
209            "resource" => fm.resource = Some(value),
210            "timestamp" => fm.timestamp = Some(value),
211            _ if !value.is_empty() => {
212                fm.extra.insert(key.to_string(), value);
213            }
214            _ => {}
215        }
216    }
217    fm
218}
219
220/// Render a [`Frontmatter`] as a JSON object — only fields that are present
221/// appear; unknown keys from [`extra`](Frontmatter::extra) are included
222/// verbatim. Shared so `ct-okf` and the OKF-aware tools emit metadata the same
223/// way.
224pub fn fm_to_json(fm: &Frontmatter) -> serde_json::Value {
225    let mut m = serde_json::Map::new();
226    if let Some(t) = &fm.type_ {
227        m.insert("type".into(), serde_json::Value::String(t.clone()));
228    }
229    if let Some(t) = &fm.title {
230        m.insert("title".into(), serde_json::Value::String(t.clone()));
231    }
232    if let Some(d) = &fm.description {
233        m.insert("description".into(), serde_json::Value::String(d.clone()));
234    }
235    if let Some(r) = &fm.resource {
236        m.insert("resource".into(), serde_json::Value::String(r.clone()));
237    }
238    if let Some(t) = &fm.timestamp {
239        m.insert("timestamp".into(), serde_json::Value::String(t.clone()));
240    }
241    if !fm.tags.is_empty() {
242        m.insert(
243            "tags".into(),
244            serde_json::Value::Array(
245                fm.tags
246                    .iter()
247                    .map(|t| serde_json::Value::String(t.clone()))
248                    .collect(),
249            ),
250        );
251    }
252    for (k, v) in &fm.extra {
253        m.insert(k.clone(), serde_json::Value::String(v.clone()));
254    }
255    serde_json::Value::Object(m)
256}
257
258/// A Markdown cross-link found in a concept body.
259#[derive(Debug, Clone, PartialEq, Eq)]
260pub struct Link {
261    /// The raw link target (the `(...)` of `[text](target)`).
262    pub target: String,
263    /// Whether the target is bundle-relative (begins with `/`).
264    pub absolute: bool,
265    /// 1-based line the link occurs on.
266    pub line: usize,
267}
268
269/// Extract Markdown `[text](target)` cross-links from `body`, skipping external
270/// URLs (`http(s)://`, `mailto:`) and bare anchors (`#…`). The kind of
271/// relationship is conveyed by prose, not syntax, so links are untyped edges.
272///
273/// # Examples
274///
275/// ```
276/// use coding_tools::okf::links;
277///
278/// let body = "see [customers](/tables/customers.md) and [home](https://x.test)\n";
279/// let ls = links(body);
280/// assert_eq!(ls.len(), 1);
281/// assert_eq!(ls[0].target, "/tables/customers.md");
282/// assert!(ls[0].absolute);
283/// ```
284pub fn links(body: &str) -> Vec<Link> {
285    // [text](target) — target is the run up to the first ')' or whitespace.
286    let re = regex::Regex::new(r"\[[^\]]*\]\(([^)\s]+)\)").expect("static regex compiles");
287    let mut out = Vec::new();
288    for (n, line) in body.lines().enumerate() {
289        for cap in re.captures_iter(line) {
290            let target = cap[1].to_string();
291            let lower = target.to_ascii_lowercase();
292            if lower.starts_with("http://")
293                || lower.starts_with("https://")
294                || lower.starts_with("mailto:")
295                || target.starts_with('#')
296            {
297                continue;
298            }
299            out.push(Link {
300                absolute: target.starts_with('/'),
301                target,
302                line: n + 1,
303            });
304        }
305    }
306    out
307}
308
309/// A per-file conformance finding for a bundle.
310#[derive(Debug, Clone, PartialEq, Eq)]
311pub struct Finding {
312    /// Path relative to the bundle base (or absolute if it could not be made so).
313    pub path: PathBuf,
314    /// Whether this is a reserved file (`index.md`/`log.md`).
315    pub reserved: bool,
316    /// Whether the file opens with a frontmatter fence.
317    pub has_frontmatter: bool,
318    /// Whether the frontmatter block parses as YAML.
319    pub parseable: bool,
320    /// Whether a non-empty `type` is present (concepts only).
321    pub has_type: bool,
322    /// Whether this file satisfies OKF v0.1 conformance for its role.
323    pub conformant: bool,
324    /// Human-readable reasons it fails (empty when conformant).
325    pub issues: Vec<String>,
326    /// The parsed frontmatter, when present (for downstream listing).
327    pub fm: Option<Frontmatter>,
328}
329
330/// Walk a bundle and judge each `.md` file's conformance.
331///
332/// Concepts (non-reserved `.md`) must have parseable frontmatter with a
333/// non-empty `type`. Reserved files (`index.md`/`log.md`) need no `type` (and the
334/// bundle-root `index.md` may carry `okf_version` frontmatter); their only rule
335/// here is that any frontmatter present is parseable. The walk honors the
336/// [`walk::Selector`] it is handed, so callers control root, filter, and flags.
337pub fn conformance(selector: &walk::Selector) -> Result<Vec<Finding>, String> {
338    let base = &selector.base;
339    let mut findings = Vec::new();
340    for entry in selector.walk() {
341        let entry = entry.map_err(|e| e.to_string())?;
342        if !entry.file_type().is_some_and(|t| t.is_file()) {
343            continue;
344        }
345        let path = entry.path();
346        if path.extension().and_then(|e| e.to_str()) != Some("md") {
347            continue;
348        }
349        let name = path
350            .file_name()
351            .and_then(|n| n.to_str())
352            .unwrap_or_default()
353            .to_string();
354        let rel = path.strip_prefix(base).unwrap_or(path).to_path_buf();
355        let text = std::fs::read_to_string(path).map_err(|e| format!("{}: {e}", rel.display()))?;
356        let reserved = is_reserved(&name);
357        let parsed = parse(&text);
358        let mut issues = Vec::new();
359        let (has_frontmatter, parseable, has_type, fm) = match &parsed {
360            Some(p) => (
361                true,
362                p.parseable,
363                p.fm.type_.as_deref().is_some_and(|t| !t.trim().is_empty()),
364                Some(p.fm.clone()),
365            ),
366            None => (false, false, false, None),
367        };
368        if reserved {
369            // Reserved files (index.md/log.md) need no `type`; the bundle-root
370            // index.md may even carry `okf_version` frontmatter. Their only
371            // requirement here is that any frontmatter present is parseable.
372            if has_frontmatter && !parseable {
373                issues.push("frontmatter is not parseable YAML".to_string());
374            }
375        } else if !has_frontmatter {
376            issues.push("missing frontmatter (no leading --- fence)".to_string());
377        } else if !parseable {
378            issues.push("frontmatter is not parseable YAML".to_string());
379        } else if !has_type {
380            issues.push("frontmatter missing a non-empty `type`".to_string());
381        }
382        findings.push(Finding {
383            path: rel,
384            reserved,
385            has_frontmatter,
386            parseable,
387            has_type,
388            conformant: issues.is_empty(),
389            issues,
390            fm,
391        });
392    }
393    Ok(findings)
394}
395
396/// Find bundle cross-links whose target file is missing. Bundle-relative
397/// (`/…`) targets resolve against `base`; document-relative targets resolve
398/// against the linking file's directory. Any fragment (`#…`) is dropped before
399/// resolution. External URLs are ignored by [`links`] and never appear here.
400pub fn broken_links(selector: &walk::Selector) -> Result<Vec<(PathBuf, Link)>, String> {
401    let base = &selector.base;
402    let mut broken = Vec::new();
403    for entry in selector.walk() {
404        let entry = entry.map_err(|e| e.to_string())?;
405        if !entry.file_type().is_some_and(|t| t.is_file()) {
406            continue;
407        }
408        let path = entry.path();
409        if path.extension().and_then(|e| e.to_str()) != Some("md") {
410            continue;
411        }
412        let Ok(text) = std::fs::read_to_string(path) else {
413            continue;
414        };
415        let rel = path.strip_prefix(base).unwrap_or(path).to_path_buf();
416        let dir = path.parent().unwrap_or(base);
417        // Only the body's links matter; frontmatter has no Markdown links.
418        let body = match parse(&text) {
419            Some(p) => {
420                let start = p.body_start_line.saturating_sub(1);
421                text.lines().skip(start).collect::<Vec<_>>().join("\n")
422            }
423            None => text.clone(),
424        };
425        for link in links(&body) {
426            let target = link.target.split('#').next().unwrap_or("");
427            if target.is_empty() {
428                continue;
429            }
430            let resolved = if link.absolute {
431                base.join(target.trim_start_matches('/'))
432            } else {
433                dir.join(target)
434            };
435            if !resolved.exists() {
436                broken.push((rel.clone(), link));
437            }
438        }
439    }
440    Ok(broken)
441}
442
443/// Today's date as `YYYY-MM-DD` (UTC), via Howard Hinnant's civil-from-days.
444/// Shared so `log.md` entries and timestamps are stamped consistently.
445pub fn today_utc() -> String {
446    let secs = std::time::SystemTime::now()
447        .duration_since(std::time::UNIX_EPOCH)
448        .map(|d| d.as_secs())
449        .unwrap_or(0);
450    let days = (secs / 86_400) as i64;
451    let z = days + 719_468;
452    let era = z.div_euclid(146_097);
453    let doe = z.rem_euclid(146_097);
454    let yoe = (doe - doe / 1460 + doe / 36_524 - doe / 146_096) / 365;
455    let y = yoe + era * 400;
456    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
457    let mp = (5 * doy + 2) / 153;
458    let d = doy - (153 * mp + 2) / 5 + 1;
459    let m = if mp < 10 { mp + 3 } else { mp - 9 };
460    let y = if m <= 2 { y + 1 } else { y };
461    format!("{y:04}-{m:02}-{d:02}")
462}
463
464/// Quote a frontmatter scalar value if it would not be a safe bare YAML scalar.
465/// Shared by the authoring verbs and the `--script` engine so a value written by
466/// either path round-trips identically.
467///
468/// # Examples
469///
470/// ```
471/// use coding_tools::okf::yaml_scalar;
472///
473/// assert_eq!(yaml_scalar("Customers"), "Customers");
474/// assert_eq!(yaml_scalar("a: b"), "\"a: b\"");
475/// ```
476pub fn yaml_scalar(v: &str) -> String {
477    let needs_quote = v.is_empty()
478        || v != v.trim()
479        || v.starts_with(['[', '{', '#', '*', '&', '!', '|', '>', '\'', '"', '@', '`'])
480        || v.contains(": ")
481        || v.ends_with(':');
482    if needs_quote {
483        format!("\"{}\"", v.replace('\\', "\\\\").replace('"', "\\\""))
484    } else {
485        v.to_string()
486    }
487}
488
489/// Build a new concept's file text: a frontmatter block (a non-empty `type` is
490/// required by OKF) followed by the body, defaulting to a single `# title`
491/// heading when no body is given.
492pub fn build_concept(
493    type_: &str,
494    title: &str,
495    description: Option<&str>,
496    tags: &[String],
497    timestamp: &str,
498    body: Option<&str>,
499) -> String {
500    let mut s = format!("---\ntype: {}\n", yaml_scalar(type_));
501    s.push_str(&format!("title: {}\n", yaml_scalar(title)));
502    if let Some(d) = description {
503        s.push_str(&format!("description: {}\n", yaml_scalar(d)));
504    }
505    if !tags.is_empty() {
506        let items: Vec<String> = tags.iter().map(|t| yaml_scalar(t)).collect();
507        s.push_str(&format!("tags: [{}]\n", items.join(", ")));
508    }
509    s.push_str(&format!("timestamp: {timestamp}\n---\n\n"));
510    match body {
511        Some(b) if !b.trim().is_empty() => {
512            s.push_str(b);
513            if !b.ends_with('\n') {
514                s.push('\n');
515            }
516        }
517        _ => s.push_str(&format!("# {title}\n")),
518    }
519    s
520}
521
522/// Set or update a top-level scalar frontmatter `field` on a concept's `text`,
523/// preserving every other byte. Returns the new text and whether an existing key
524/// was replaced (`false` means a new key was appended before the closing fence).
525/// Errors when the text has no frontmatter to edit.
526pub fn set_field(text: &str, field: &str, value: &str) -> Result<(String, bool), String> {
527    let parsed = parse(text).ok_or("no frontmatter to edit")?;
528    let (start, end) = parsed.fm_span; // 1-based, fences included
529    let all: Vec<&str> = text.split_inclusive('\n').collect();
530    let inner = &all[start..end - 1];
531    let new_line = format!("{field}: {}\n", yaml_scalar(value));
532    let mut replaced = false;
533    let mut new_inner: Vec<String> = Vec::with_capacity(inner.len() + 1);
534    for line in inner {
535        let is_target = line
536            .split_once(':')
537            .is_some_and(|(k, _)| k.trim() == field && !line.starts_with([' ', '\t']));
538        if is_target && !replaced {
539            new_inner.push(new_line.clone());
540            replaced = true;
541        } else {
542            new_inner.push((*line).to_string());
543        }
544    }
545    if !replaced {
546        new_inner.push(new_line);
547    }
548    let mut out = String::with_capacity(text.len() + field.len() + value.len() + 4);
549    out.push_str(&all[..start].concat());
550    out.push_str(&new_inner.concat());
551    out.push_str(&all[end - 1..].concat());
552    Ok((out, replaced))
553}
554
555/// Prepend a dated, labelled entry to a `log.md`'s existing text, merging into
556/// the same-day section when it is already on top (newest first).
557pub fn log_entry(existing: &str, today: &str, kind: &str, message: &str) -> String {
558    let bullet = format!("* **{kind}**: {message}\n");
559    let heading = format!("## {today}\n");
560    if let Some(rest) = existing.strip_prefix(&heading) {
561        format!("{heading}{bullet}{rest}")
562    } else if existing.trim().is_empty() {
563        format!("{heading}{bullet}")
564    } else {
565        format!("{heading}{bullet}\n{existing}")
566    }
567}
568
569/// Render an `index.md` body from `(file, title, description)` entries.
570pub fn render_index(entries: &[(String, String, String)]) -> String {
571    let mut out = String::from("# Index\n\n");
572    for (file, title, desc) in entries {
573        if desc.is_empty() {
574            out.push_str(&format!("* [{title}]({file})\n"));
575        } else {
576            out.push_str(&format!("* [{title}]({file}) - {desc}\n"));
577        }
578    }
579    out
580}
581
582/// Build a `.md`-restricted [`walk::Selector`] under `base`, optionally narrowed
583/// by a `name` pattern set. Shared by the tool and the built-in check so they
584/// select concepts identically.
585pub fn md_selector(
586    base: PathBuf,
587    names: Option<Vec<regex::Regex>>,
588    hidden: bool,
589    follow: bool,
590) -> walk::Selector {
591    let names = names.or_else(|| crate::pattern::compile_name_set("*.md").ok());
592    walk::Selector {
593        base,
594        names,
595        types: vec![EntryType::F],
596        size: None,
597        hidden,
598        follow,
599        no_ignore: false,
600    }
601}
602
603// ----- The `okf` built-in check -------------------------------------------------------
604
605/// The `okf` built-in check's argument grammar (mirrors `deps`/`mods`): assert a
606/// bundle's OKF conformance, optionally also rejecting broken cross-links.
607#[derive(Parser, Debug)]
608#[command(
609    name = "okf",
610    about = "Assert that a directory is a conformant OKF bundle."
611)]
612struct OkfCheck {
613    /// Bundle root to check, relative to the project root.
614    #[arg(long, default_value = ".")]
615    base: PathBuf,
616    /// Limit to files whose name matches; '|'-separated alternatives.
617    #[arg(long)]
618    name: Option<String>,
619    /// Include dot-entries (names starting with '.').
620    #[arg(long)]
621    hidden: bool,
622    /// Follow symlinks while traversing.
623    #[arg(long)]
624    follow: bool,
625    /// Also fail when a bundle-relative cross-link points at a missing file.
626    #[arg(long)]
627    strict: bool,
628}
629
630/// The `okf` check's introspected grammar (see [`crate::deps::grammar`]).
631pub fn check_grammar() -> crate::deps::Grammar {
632    crate::deps::grammar(OkfCheck::command())
633}
634
635/// Run an `okf` built-in check: walk the bundle under `root`/`--base` and assert
636/// every non-reserved `.md` conforms (parseable frontmatter with a non-empty
637/// `type`); with `--strict`, also assert no broken bundle cross-links. Returns
638/// the probe outcome, a one-line reason, and a violation report. Argument and
639/// walk errors are [`ProbeOutcome::Broken`].
640pub fn check(
641    args: &[String],
642    root: &Path,
643    timeout: Option<Duration>,
644) -> (ProbeOutcome, String, String) {
645    let started = Instant::now();
646    let broken = |msg: String| (ProbeOutcome::Broken, msg, String::new());
647    let cli = match OkfCheck::try_parse_from(
648        std::iter::once("okf").chain(args.iter().map(String::as_str)),
649    ) {
650        Ok(c) => c,
651        Err(e) => {
652            let valid = check_grammar()
653                .flags
654                .iter()
655                .map(|s| format!("--{}", s.name))
656                .collect::<Vec<_>>()
657                .join(" ");
658            return broken(format!(
659                "okf: {} (valid flags: {valid})",
660                e.to_string().lines().next().unwrap_or("bad arguments")
661            ));
662        }
663    };
664
665    let names = match &cli.name {
666        Some(spec) => match crate::pattern::compile_name_set(spec) {
667            Ok(n) => Some(n),
668            Err(e) => return broken(format!("okf: invalid --name: {e}")),
669        },
670        None => None,
671    };
672    let base = root.join(&cli.base);
673    if !base.exists() {
674        return broken(format!(
675            "okf: bundle base does not exist: {}",
676            base.display()
677        ));
678    }
679    let selector = md_selector(base.clone(), names, cli.hidden, cli.follow);
680
681    let findings = match conformance(&selector) {
682        Ok(f) => f,
683        Err(e) => return broken(format!("okf: {e}")),
684    };
685    if let Some(limit) = timeout
686        && started.elapsed() >= limit
687    {
688        return broken(format!("okf: timed out after {:.1}s", limit.as_secs_f64()));
689    }
690
691    let mut report = String::new();
692    let mut violations = 0usize;
693    for f in &findings {
694        if !f.conformant {
695            violations += 1;
696            report.push_str(&format!("{}: {}\n", f.path.display(), f.issues.join("; ")));
697        }
698    }
699    let concepts = findings.iter().filter(|f| !f.reserved).count();
700
701    if cli.strict {
702        match broken_links(&selector) {
703            Ok(bl) => {
704                for (path, link) in &bl {
705                    violations += 1;
706                    report.push_str(&format!(
707                        "{}:{}: broken link {}\n",
708                        path.display(),
709                        link.line,
710                        link.target
711                    ));
712                }
713            }
714            Err(e) => return broken(format!("okf: {e}")),
715        }
716    }
717
718    if violations == 0 {
719        (
720            ProbeOutcome::Holds,
721            format!("{concepts} concept(s) conform"),
722            report,
723        )
724    } else {
725        (
726            ProbeOutcome::Violated,
727            format!("{violations} OKF violation(s)"),
728            report.trim_end().to_string(),
729        )
730    }
731}
732
733#[cfg(test)]
734mod tests {
735    use super::*;
736
737    #[test]
738    fn parse_detects_and_extracts_frontmatter() {
739        let doc = "---\ntype: Playbook\ntitle: Onboarding\ndescription: How to onboard\nresource: bq://x\ntimestamp: 2026-01-02\ntags: [ops, hr]\nowner: jane\n---\n# Steps\nbody\n";
740        let p = parse(doc).unwrap();
741        assert_eq!(p.fm.type_.as_deref(), Some("Playbook"));
742        assert_eq!(p.fm.title.as_deref(), Some("Onboarding"));
743        assert_eq!(p.fm.description.as_deref(), Some("How to onboard"));
744        assert_eq!(p.fm.resource.as_deref(), Some("bq://x"));
745        assert_eq!(p.fm.timestamp.as_deref(), Some("2026-01-02"));
746        assert_eq!(p.fm.tags, ["ops", "hr"]);
747        assert_eq!(p.fm.extra.get("owner").map(String::as_str), Some("jane"));
748        assert!(p.parseable);
749        assert_eq!(p.fm_span, (1, 9));
750        assert_eq!(p.body_start_line, 10);
751    }
752
753    #[test]
754    fn parse_handles_block_tags_and_quotes() {
755        let doc = "---\ntype: \"BigQuery Table\"\ntags:\n  - core\n  - 'pii'\n---\nbody\n";
756        let p = parse(doc).unwrap();
757        assert_eq!(p.fm.type_.as_deref(), Some("BigQuery Table"));
758        assert_eq!(p.fm.tags, ["core", "pii"]);
759    }
760
761    #[test]
762    fn parse_returns_none_without_a_fence() {
763        assert!(parse("# title\nno frontmatter\n").is_none());
764        assert!(parse("").is_none());
765        // A blank first line is not a fence.
766        assert!(parse("\n---\ntype: x\n---\n").is_none());
767    }
768
769    #[test]
770    fn unclosed_fence_is_not_frontmatter() {
771        assert!(parse("---\ntype: x\nno closing fence\n").is_none());
772    }
773
774    #[test]
775    fn reserved_files_recognised() {
776        assert!(is_reserved("index.md"));
777        assert!(is_reserved("log.md"));
778        assert!(!is_reserved("concept.md"));
779    }
780
781    #[test]
782    fn links_classifies_and_filters() {
783        let body = "[a](/tables/x.md) [b](../sibling.md) [c](https://e.test) [d](#frag) [e](mailto:x@y.z)\n";
784        let ls = links(body);
785        assert_eq!(ls.len(), 2);
786        assert_eq!(ls[0].target, "/tables/x.md");
787        assert!(ls[0].absolute);
788        assert_eq!(ls[1].target, "../sibling.md");
789        assert!(!ls[1].absolute);
790    }
791}