Skip to main content

coding_tools/
okfroots.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2026 Jonathan Shook
3
4//! OKF **content roots**: which directories `ct-okf` treats as knowledge
5//! bundles, how they are discovered, and how their concept files are fed to the
6//! [`crate::okfindex`] search index.
7//!
8//! A directory is a content root if **any** of three signals holds, so a user
9//! can adopt whichever is convenient and they interoperate:
10//!
11//! 1. a `.okf` **marker file** in the directory (our convention — it may be
12//!    empty, or carry optional JSONC directives);
13//! 2. a bundle-root `index.md` declaring `okf_version` — the only root signal
14//!    the OKF standard itself defines;
15//! 3. an entry in the project config `.ct/okf.jsonc` (the explicit list managed
16//!    by `ct okf roots add/rm` and `ct okf roots scan --write`).
17//!
18//! All three converge on the same set; the config is the durable record. Paths
19//! are anchored at the **project root** — the nearest ancestor holding `.ct`
20//! (reusing [`crate::rules::discover_root`]) — and the index lives under
21//! `.ct/okf/`.
22
23use std::collections::BTreeSet;
24use std::path::{Path, PathBuf};
25use std::time::UNIX_EPOCH;
26
27use crate::okf;
28use crate::okfindex::{DocSource, FileStat};
29
30/// The project config file, under `.ct/`.
31pub const CONFIG_FILE: &str = "okf.jsonc";
32/// The per-directory root marker (our convention; OKF defines no marker).
33pub const MARKER_FILE: &str = ".okf";
34/// The index directory name, under `.ct/`.
35pub const INDEX_DIR: &str = "okf";
36
37/// The project root for `start`: the nearest ancestor containing `.ct`, else
38/// `start` itself (so the tools still work in a directory without a `.ct`).
39pub fn project_root(start: &Path) -> PathBuf {
40    crate::rules::discover_root(start).unwrap_or_else(|| start.to_path_buf())
41}
42
43/// Path to `.ct/okf.jsonc` under `project`.
44pub fn config_path(project: &Path) -> PathBuf {
45    project.join(".ct").join(CONFIG_FILE)
46}
47
48/// Path to the index directory `.ct/okf/` under `project`.
49pub fn index_dir(project: &Path) -> PathBuf {
50    project.join(".ct").join(INDEX_DIR)
51}
52
53/// Normalize `dir` to a project-relative, `/`-separated key (the form stored in
54/// config and used as a stable identity). An absolute or already-relative path
55/// that is not under `project` is returned cleaned but unchanged in spirit.
56pub fn rel_key(project: &Path, dir: &Path) -> String {
57    let rel = dir.strip_prefix(project).unwrap_or(dir);
58    let s = rel.to_string_lossy().replace('\\', "/");
59    let s = s.trim_matches('/');
60    if s.is_empty() {
61        ".".to_string()
62    } else {
63        s.to_string()
64    }
65}
66
67// ----- Config -------------------------------------------------------------------------
68
69/// The `.ct/okf.jsonc` project config: the explicit list of content roots
70/// (project-relative keys).
71#[derive(Debug, Clone, Default, PartialEq, Eq)]
72pub struct Config {
73    pub roots: Vec<String>,
74}
75
76impl Config {
77    /// Load the config, or an empty one when the file is absent. A malformed
78    /// file is an error (so a typo is surfaced, not silently ignored).
79    pub fn load(project: &Path) -> Result<Config, String> {
80        let path = config_path(project);
81        let text = match std::fs::read_to_string(&path) {
82            Ok(t) => t,
83            Err(_) => return Ok(Config::default()),
84        };
85        let value =
86            jsonc_parser::parse_to_serde_value(&text, &jsonc_parser::ParseOptions::default())
87                .map_err(|e| format!("{}: {e}", path.display()))?
88                .ok_or_else(|| format!("{}: empty config", path.display()))?;
89        let obj = value
90            .as_object()
91            .ok_or_else(|| format!("{}: config root must be an object", path.display()))?;
92        let mut roots = Vec::new();
93        if let Some(arr) = obj.get("roots").and_then(|v| v.as_array()) {
94            for r in arr {
95                if let Some(s) = r.as_str() {
96                    roots.push(s.trim_matches('/').to_string());
97                }
98            }
99        }
100        Ok(Config { roots })
101    }
102
103    /// Write the config (creating `.ct/` if needed), sorted and de-duplicated.
104    pub fn save(&self, project: &Path) -> Result<(), String> {
105        let mut roots: Vec<String> = self.roots.clone();
106        roots.sort();
107        roots.dedup();
108        let path = config_path(project);
109        if let Some(parent) = path.parent() {
110            std::fs::create_dir_all(parent).map_err(|e| format!("{}: {e}", parent.display()))?;
111        }
112        let value = serde_json::json!({ "roots": roots });
113        let text = format!(
114            "// OKF content roots for this project, managed by `ct okf roots`.\n{}\n",
115            serde_json::to_string_pretty(&value).map_err(|e| e.to_string())?
116        );
117        std::fs::write(&path, text).map_err(|e| format!("{}: {e}", path.display()))
118    }
119
120    /// Add `key` (a project-relative root). Returns whether it was newly added.
121    pub fn add(&mut self, key: &str) -> bool {
122        if self.roots.iter().any(|r| r == key) {
123            false
124        } else {
125            self.roots.push(key.to_string());
126            true
127        }
128    }
129
130    /// Remove `key`. Returns whether it was present.
131    pub fn remove(&mut self, key: &str) -> bool {
132        let before = self.roots.len();
133        self.roots.retain(|r| r != key);
134        self.roots.len() != before
135    }
136}
137
138// ----- Detection ----------------------------------------------------------------------
139
140/// How a root was detected.
141#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
142pub enum Detection {
143    /// Listed in `.ct/okf.jsonc`.
144    Config,
145    /// Has a `.okf` marker file.
146    Marker,
147    /// Has a bundle-root `index.md` declaring `okf_version`.
148    OkfVersion,
149}
150
151impl Detection {
152    pub fn label(self) -> &'static str {
153        match self {
154            Detection::Config => "config",
155            Detection::Marker => "marker",
156            Detection::OkfVersion => "okf_version",
157        }
158    }
159}
160
161/// A detected content root: its absolute directory, project-relative key, and
162/// the signals that flagged it.
163#[derive(Debug, Clone, PartialEq, Eq)]
164pub struct Root {
165    pub dir: PathBuf,
166    pub key: String,
167    pub via: Vec<Detection>,
168}
169
170/// Whether `dir` has a bundle-root `index.md` carrying `okf_version` frontmatter.
171fn has_okf_version_index(dir: &Path) -> bool {
172    let p = dir.join("index.md");
173    std::fs::read_to_string(&p)
174        .ok()
175        .and_then(|t| okf::parse(&t))
176        .is_some_and(|parsed| parsed.fm.extra.contains_key("okf_version"))
177}
178
179/// Whether `dir` directly contains at least one OKF **concept** — a non-reserved
180/// `.md` whose frontmatter carries a non-empty `type`.
181fn has_concept(dir: &Path) -> bool {
182    let Ok(entries) = std::fs::read_dir(dir) else {
183        return false;
184    };
185    for entry in entries.flatten() {
186        let path = entry.path();
187        if path.extension().and_then(|e| e.to_str()) != Some("md") {
188            continue;
189        }
190        let name = path
191            .file_name()
192            .and_then(|n| n.to_str())
193            .unwrap_or_default();
194        if okf::is_reserved(name) {
195            continue;
196        }
197        if let Ok(text) = std::fs::read_to_string(&path)
198            && okf::parse(&text)
199                .is_some_and(|p| p.fm.type_.as_deref().is_some_and(|t| !t.trim().is_empty()))
200        {
201            return true;
202        }
203    }
204    false
205}
206
207/// Build a `.md`-restricted directory walker for `dir`, including dot-entries so
208/// `.okf` markers are visible, and honouring ignore files (the index should not
209/// cover what the VCS ignores).
210fn walk(dir: &Path) -> impl Iterator<Item = PathBuf> {
211    ignore::WalkBuilder::new(dir)
212        .hidden(false)
213        .build()
214        .filter_map(Result::ok)
215        .filter(|e| e.file_type().is_some_and(|t| t.is_file()))
216        .map(ignore::DirEntry::into_path)
217}
218
219/// Detect every content root in `project`: the union of config entries, `.okf`
220/// markers, and `okf_version` index files. Sorted by key; each carries the
221/// signals that flagged it.
222pub fn detect(project: &Path) -> Result<Vec<Root>, String> {
223    use std::collections::BTreeMap;
224    let mut found: BTreeMap<String, (PathBuf, BTreeSet<Detection>)> = BTreeMap::new();
225    let note = |dir: PathBuf,
226                via: Detection,
227                found: &mut BTreeMap<String, (PathBuf, BTreeSet<Detection>)>| {
228        let key = rel_key(project, &dir);
229        found
230            .entry(key)
231            .or_insert_with(|| (dir, BTreeSet::new()))
232            .1
233            .insert(via);
234    };
235
236    // 1) Config entries.
237    for key in Config::load(project)?.roots {
238        let dir = project.join(&key);
239        note(dir, Detection::Config, &mut found);
240    }
241    // 2) & 3) Markers and okf_version index files, found by one walk.
242    for path in walk(project) {
243        let name = path
244            .file_name()
245            .and_then(|n| n.to_str())
246            .unwrap_or_default();
247        if name == MARKER_FILE
248            && let Some(parent) = path.parent()
249        {
250            note(parent.to_path_buf(), Detection::Marker, &mut found);
251        } else if name == "index.md"
252            && let Some(parent) = path.parent()
253            && has_okf_version_index(parent)
254        {
255            note(parent.to_path_buf(), Detection::OkfVersion, &mut found);
256        }
257    }
258
259    Ok(found
260        .into_iter()
261        .map(|(key, (dir, via))| Root {
262            dir,
263            key,
264            via: via.into_iter().collect(),
265        })
266        .collect())
267}
268
269/// Heuristically derive candidate roots in `project` for onboarding: the
270/// top-most directories that either declare `okf_version` or directly contain an
271/// OKF concept. A directory is dropped when an ancestor already qualifies, so
272/// nested concept folders collapse into their bundle root.
273pub fn scan_candidates(project: &Path) -> Vec<PathBuf> {
274    let mut dirs: BTreeSet<PathBuf> = BTreeSet::new();
275    for path in walk(project) {
276        let Some(parent) = path.parent() else {
277            continue;
278        };
279        let name = path
280            .file_name()
281            .and_then(|n| n.to_str())
282            .unwrap_or_default();
283        let qualifies = (name == "index.md" && has_okf_version_index(parent))
284            || (path.extension().and_then(|e| e.to_str()) == Some("md")
285                && !okf::is_reserved(name)
286                && has_concept(parent));
287        if qualifies {
288            dirs.insert(parent.to_path_buf());
289        }
290    }
291    // Collapse to top-most: drop any dir that has an ancestor in the set.
292    let all: Vec<PathBuf> = dirs.iter().cloned().collect();
293    all.iter()
294        .filter(|d| !all.iter().any(|a| *d != a && d.starts_with(a)))
295        .cloned()
296        .collect()
297}
298
299/// Create an empty `.okf` marker in `dir` (no-op if one already exists).
300pub fn write_marker(dir: &Path) -> Result<(), String> {
301    let path = dir.join(MARKER_FILE);
302    if path.exists() {
303        return Ok(());
304    }
305    std::fs::create_dir_all(dir).map_err(|e| format!("{}: {e}", dir.display()))?;
306    std::fs::write(&path, "// OKF content root marker.\n")
307        .map_err(|e| format!("{}: {e}", path.display()))
308}
309
310// ----- Feeding the index --------------------------------------------------------------
311
312/// Nanoseconds since the Unix epoch for a file's mtime (0 if unavailable).
313fn mtime_ns(meta: &std::fs::Metadata) -> u64 {
314    meta.modified()
315        .ok()
316        .and_then(|t| t.duration_since(UNIX_EPOCH).ok())
317        .map(|d| d.as_nanos() as u64)
318        .unwrap_or(0)
319}
320
321/// The live concept files across `roots`: every non-reserved `.md`, as a
322/// [`FileStat`] keyed by project-relative path. De-duplicated when roots
323/// overlap. The result drives [`crate::okfindex::Index::update`].
324pub fn concept_files(project: &Path, roots: &[PathBuf]) -> Vec<FileStat> {
325    let mut seen: BTreeSet<String> = BTreeSet::new();
326    let mut out = Vec::new();
327    for root in roots {
328        for path in walk(root) {
329            if path.extension().and_then(|e| e.to_str()) != Some("md") {
330                continue;
331            }
332            let name = path
333                .file_name()
334                .and_then(|n| n.to_str())
335                .unwrap_or_default();
336            if okf::is_reserved(name) {
337                continue;
338            }
339            let key = rel_key(project, &path);
340            if !seen.insert(key.clone()) {
341                continue;
342            }
343            let Ok(meta) = std::fs::metadata(&path) else {
344                continue;
345            };
346            out.push(FileStat {
347                key,
348                path,
349                mtime_ns: mtime_ns(&meta),
350                size: meta.len(),
351            });
352        }
353    }
354    out
355}
356
357/// Read one concept file into a [`DocSource`] for indexing: title/type/tags from
358/// frontmatter (title falling back to the file stem), and a searchable text of
359/// the description, resource, and body.
360pub fn load_doc(path: &Path) -> Result<DocSource, String> {
361    let text = std::fs::read_to_string(path).map_err(|e| format!("{}: {e}", path.display()))?;
362    let stem = path
363        .file_stem()
364        .and_then(|s| s.to_str())
365        .unwrap_or("")
366        .to_string();
367    let parsed = okf::parse(&text);
368    let (fm, body) = match &parsed {
369        Some(p) => {
370            let start = p.body_start_line.saturating_sub(1);
371            let body = text.lines().skip(start).collect::<Vec<_>>().join("\n");
372            (p.fm.clone(), body)
373        }
374        None => (okf::Frontmatter::default(), text.clone()),
375    };
376    let mut searchable = String::new();
377    for part in [
378        fm.description.as_deref(),
379        fm.resource.as_deref(),
380        Some(body.as_str()),
381    ]
382    .into_iter()
383    .flatten()
384    {
385        searchable.push_str(part);
386        searchable.push(' ');
387    }
388    Ok(DocSource {
389        title: fm.title.unwrap_or(stem),
390        type_: fm.type_.unwrap_or_default(),
391        tags: fm.tags,
392        text: searchable,
393    })
394}
395
396#[cfg(test)]
397mod tests {
398    use super::*;
399    use std::sync::atomic::{AtomicU32, Ordering};
400
401    static TAG: AtomicU32 = AtomicU32::new(0);
402
403    fn scratch() -> PathBuf {
404        let n = TAG.fetch_add(1, Ordering::Relaxed);
405        let dir = std::env::temp_dir().join(format!("ct-okfroots-{}-{n}", std::process::id()));
406        let _ = std::fs::remove_dir_all(&dir);
407        std::fs::create_dir_all(dir.join(".ct")).unwrap();
408        dir
409    }
410
411    fn write(path: &Path, text: &str) {
412        std::fs::create_dir_all(path.parent().unwrap()).unwrap();
413        std::fs::write(path, text).unwrap();
414    }
415
416    fn concept(type_: &str, title: &str) -> String {
417        format!(
418            "---\ntype: {type_}\ntitle: {title}\ndescription: about {title}\n---\n# {title}\nbody text\n"
419        )
420    }
421
422    #[test]
423    fn rel_key_normalizes_under_project() {
424        let project = Path::new("/proj");
425        assert_eq!(rel_key(project, Path::new("/proj/docs/kb")), "docs/kb");
426        assert_eq!(rel_key(project, Path::new("/proj")), ".");
427    }
428
429    #[test]
430    fn detects_roots_via_marker_okf_version_and_config() {
431        let p = scratch();
432        // A marker root.
433        write(&p.join("kb1/a.md"), &concept("Note", "Alpha"));
434        write(&p.join("kb1/.okf"), "");
435        // An okf_version index root.
436        write(
437            &p.join("kb2/index.md"),
438            "---\nokf_version: \"0.1\"\n---\n# Index\n",
439        );
440        write(&p.join("kb2/b.md"), &concept("Note", "Beta"));
441        // A config-only root.
442        write(&p.join("kb3/c.md"), &concept("Note", "Gamma"));
443        Config {
444            roots: vec!["kb3".to_string()],
445        }
446        .save(&p)
447        .unwrap();
448
449        let roots = detect(&p).unwrap();
450        let keys: Vec<&str> = roots.iter().map(|r| r.key.as_str()).collect();
451        assert!(keys.contains(&"kb1"), "{keys:?}");
452        assert!(keys.contains(&"kb2"), "{keys:?}");
453        assert!(keys.contains(&"kb3"), "{keys:?}");
454        let kb1 = roots.iter().find(|r| r.key == "kb1").unwrap();
455        assert!(kb1.via.contains(&Detection::Marker));
456        let kb2 = roots.iter().find(|r| r.key == "kb2").unwrap();
457        assert!(kb2.via.contains(&Detection::OkfVersion));
458        let kb3 = roots.iter().find(|r| r.key == "kb3").unwrap();
459        assert!(kb3.via.contains(&Detection::Config));
460    }
461
462    #[test]
463    fn scan_collapses_nested_concept_dirs_to_topmost() {
464        let p = scratch();
465        write(
466            &p.join("kb/index.md"),
467            "---\nokf_version: \"0.1\"\n---\n# Index\n",
468        );
469        write(&p.join("kb/a.md"), &concept("Note", "A"));
470        write(&p.join("kb/sub/b.md"), &concept("Note", "B"));
471        let cands = scan_candidates(&p);
472        // Only the top-most "kb" qualifies; "kb/sub" collapses into it.
473        assert_eq!(cands.len(), 1, "{cands:?}");
474        assert!(cands[0].ends_with("kb"));
475    }
476
477    #[test]
478    fn config_roundtrips_and_dedups() {
479        let p = scratch();
480        let mut cfg = Config::default();
481        assert!(cfg.add("docs/kb"));
482        assert!(!cfg.add("docs/kb")); // already present
483        assert!(cfg.add("notes"));
484        cfg.save(&p).unwrap();
485        let loaded = Config::load(&p).unwrap();
486        assert_eq!(
487            loaded.roots,
488            vec!["docs/kb".to_string(), "notes".to_string()]
489        );
490        let mut loaded = loaded;
491        assert!(loaded.remove("notes"));
492        assert!(!loaded.remove("notes"));
493    }
494
495    #[test]
496    fn concept_files_lists_md_excluding_reserved() {
497        let p = scratch();
498        write(&p.join("kb/a.md"), &concept("Note", "A"));
499        write(&p.join("kb/b.md"), &concept("Note", "B"));
500        write(
501            &p.join("kb/index.md"),
502            "---\nokf_version: \"0.1\"\n---\n# Index\n",
503        );
504        write(&p.join("kb/log.md"), "# Log\n");
505        let files = concept_files(&p, &[p.join("kb")]);
506        let mut keys: Vec<&str> = files.iter().map(|f| f.key.as_str()).collect();
507        keys.sort();
508        assert_eq!(keys, vec!["kb/a.md", "kb/b.md"]); // index.md/log.md excluded
509    }
510
511    #[test]
512    fn load_doc_extracts_frontmatter_and_body() {
513        let p = scratch();
514        let path = p.join("kb/customers.md");
515        write(
516            &path,
517            "---\ntype: BigQuery Table\ntitle: Customers\ndescription: the customer dimension\ntags: [core, pii]\n---\n# Customers\nrow-per-customer.\n",
518        );
519        let doc = load_doc(&path).unwrap();
520        assert_eq!(doc.title, "Customers");
521        assert_eq!(doc.type_, "BigQuery Table");
522        assert_eq!(doc.tags, vec!["core".to_string(), "pii".to_string()]);
523        assert!(doc.text.contains("customer dimension"));
524        assert!(doc.text.contains("row-per-customer"));
525    }
526
527    #[test]
528    fn project_root_walks_up_to_ct() {
529        let p = scratch(); // holds .ct
530        let deep = p.join("a/b/c");
531        std::fs::create_dir_all(&deep).unwrap();
532        assert_eq!(project_root(&deep), p);
533
534        // With no `.ct` above it, discovery falls back to the start directory.
535        let n = TAG.fetch_add(1, Ordering::Relaxed);
536        let lone =
537            std::env::temp_dir().join(format!("ct-okfroots-lone-{}-{n}", std::process::id()));
538        let _ = std::fs::remove_dir_all(&lone);
539        std::fs::create_dir_all(&lone).unwrap();
540        assert_eq!(project_root(&lone), lone);
541    }
542
543    #[test]
544    fn write_marker_is_idempotent_and_detected() {
545        let p = scratch();
546        let kb = p.join("kb");
547        std::fs::create_dir_all(&kb).unwrap();
548        write_marker(&kb).unwrap();
549        assert!(kb.join(MARKER_FILE).is_file());
550        write_marker(&kb).unwrap(); // second call is a no-op, not an error
551        let roots = detect(&p).unwrap();
552        assert!(
553            roots
554                .iter()
555                .any(|r| r.key == "kb" && r.via.contains(&Detection::Marker)),
556            "{roots:?}"
557        );
558    }
559
560    #[test]
561    fn concept_files_respects_ignore_files() {
562        let p = scratch();
563        write(&p.join("kb/a.md"), &concept("Note", "A"));
564        write(&p.join("kb/skip/b.md"), &concept("Note", "B"));
565        // A `.ignore` file (honored by the walker without requiring git) hides skip/.
566        write(&p.join("kb/.ignore"), "skip/\n");
567        let files = concept_files(&p, &[p.join("kb")]);
568        let keys: Vec<&str> = files.iter().map(|f| f.key.as_str()).collect();
569        assert!(keys.contains(&"kb/a.md"), "{keys:?}");
570        assert!(
571            !keys.iter().any(|k| k.contains("skip/b.md")),
572            "ignored file indexed: {keys:?}"
573        );
574    }
575}