Skip to main content

sema_docs/
lib.rs

1//! Canonical structured documentation for Sema's builtins and special forms.
2//!
3//! Each builtin/special form is one markdown file (`crates/sema-docs/entries/stdlib/<module>/<slug>.md`)
4//! with a single YAML frontmatter block (`name`, `params`, `returns`, `see_also`, ...) followed by
5//! a markdown body that may contain `sema` example code blocks.
6//!
7//! The filename is just a slug; the `name` field is canonical (so operator names like `*`, `<=`,
8//! `null?` are fine). From this source `sema-docs gen` produces a single committed JSON index
9//! ([`builtin_index`]) consumed at runtime by the LSP (hover/completion) and the REPL (apropos/doc).
10//! The website is intentionally **not** generated from this yet.
11
12use serde::{Deserialize, Serialize};
13use std::collections::HashSet;
14use std::fs;
15use std::path::Path;
16
17/// A single documented parameter.
18#[derive(Debug, Clone, Default, Serialize, Deserialize)]
19pub struct Param {
20    pub name: String,
21    #[serde(rename = "type", default, skip_serializing_if = "Option::is_none")]
22    pub ty: Option<String>,
23    #[serde(default, skip_serializing_if = "Option::is_none")]
24    pub doc: Option<String>,
25}
26
27/// Frontmatter as authored at the top of each entry file.
28#[derive(Debug, Clone, Default, Deserialize)]
29struct Frontmatter {
30    name: String,
31    #[serde(default)]
32    module: Option<String>,
33    #[serde(default)]
34    section: Option<String>,
35    #[serde(default)]
36    params: Vec<Param>,
37    #[serde(default)]
38    returns: Option<String>,
39    #[serde(default)]
40    since: Option<String>,
41    #[serde(default)]
42    deprecated: bool,
43    #[serde(default)]
44    see_also: Vec<String>,
45    #[serde(default)]
46    aliases: Vec<String>,
47    /// Explicit summary; if absent it's derived from the first body paragraph.
48    #[serde(default)]
49    summary: Option<String>,
50    /// Syntax template for special forms (e.g. `(let ((name value) ...) body ...)`).
51    /// When present, shown as a signature block in hover and used as the label in
52    /// signature help. Overrides flat parameter rendering for forms with complex syntax.
53    #[serde(default, skip_serializing_if = "Option::is_none")]
54    syntax: Option<String>,
55}
56
57/// A fully resolved documentation entry (the serialized contract shared with LSP/REPL).
58#[derive(Debug, Clone, Serialize, Deserialize)]
59pub struct DocEntry {
60    pub name: String,
61    #[serde(default, skip_serializing_if = "Vec::is_empty")]
62    pub aliases: Vec<String>,
63    pub module: String,
64    #[serde(default, skip_serializing_if = "Option::is_none")]
65    pub section: Option<String>,
66    pub summary: String,
67    #[serde(default, skip_serializing_if = "Vec::is_empty")]
68    pub params: Vec<Param>,
69    #[serde(default, skip_serializing_if = "Option::is_none")]
70    pub returns: Option<String>,
71    #[serde(default, skip_serializing_if = "Option::is_none")]
72    pub since: Option<String>,
73    #[serde(default, skip_serializing_if = "is_false")]
74    pub deprecated: bool,
75    #[serde(default, skip_serializing_if = "Vec::is_empty")]
76    pub see_also: Vec<String>,
77    /// Runnable example snippets (the contents of ```sema fenced blocks in the body).
78    #[serde(default, skip_serializing_if = "Vec::is_empty")]
79    pub examples: Vec<String>,
80    /// The full markdown body (including examples), used verbatim for hover.
81    pub body: String,
82    /// Syntax template for special forms (e.g. `(let ((name value) ...) body ...)`).
83    /// When present, shown as a signature block in hover and used as the label in
84    /// signature help. Overrides flat parameter rendering for forms with complex syntax.
85    #[serde(default, skip_serializing_if = "Option::is_none")]
86    pub syntax: Option<String>,
87    /// `true` for special forms (no params schema; syntax lives in the body).
88    #[serde(default, skip_serializing_if = "is_false")]
89    pub special_form: bool,
90}
91
92fn is_false(b: &bool) -> bool {
93    !*b
94}
95
96/// The serialized doc index (committed as JSON, loaded at runtime).
97#[derive(Debug, Serialize, Deserialize)]
98pub struct DocIndex {
99    pub version: u32,
100    pub entries: Vec<DocEntry>,
101}
102
103/// Load the committed doc index that's compiled into the binary. Used by the LSP and REPL.
104pub fn builtin_index() -> DocIndex {
105    const JSON: &str = include_str!("../builtin_docs.generated.json");
106    serde_json::from_str(JSON).expect("crates/sema-docs/builtin_docs.generated.json is valid")
107}
108
109#[derive(Debug)]
110pub struct DocError(pub String);
111impl std::fmt::Display for DocError {
112    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
113        write!(f, "{}", self.0)
114    }
115}
116impl std::error::Error for DocError {}
117type Result<T> = std::result::Result<T, DocError>;
118fn err<T>(msg: impl Into<String>) -> Result<T> {
119    Err(DocError(msg.into()))
120}
121
122// ── Parsing ───────────────────────────────────────────────────────
123
124/// Split a leading `---\n...\n---` YAML frontmatter block; returns `(yaml, body)`.
125fn split_frontmatter(text: &str) -> Result<(&str, &str)> {
126    let t = text.trim_start_matches('\u{feff}');
127    let t = t.trim_start_matches(['\n', '\r']);
128    let rest = t
129        .strip_prefix("---\n")
130        .or_else(|| t.strip_prefix("---\r\n"))
131        .ok_or_else(|| DocError("missing `---` frontmatter".into()))?;
132    let end = rest
133        .find("\n---")
134        .ok_or_else(|| DocError("unterminated frontmatter".into()))?;
135    let yaml = &rest[..end];
136    let after = &rest[end + 4..];
137    let after = after
138        .strip_prefix('\n')
139        .or_else(|| after.strip_prefix("\r\n"))
140        .unwrap_or(after);
141    Ok((yaml, after))
142}
143
144/// Parse a single entry file into a [`DocEntry`].
145pub fn parse_entry(
146    file: &Path,
147    text: &str,
148    default_module: &str,
149    special_form: bool,
150) -> Result<DocEntry> {
151    let ctx = file.display();
152    let (yaml, body_raw) = split_frontmatter(text).map_err(|e| DocError(format!("{ctx}: {e}")))?;
153    let fm: Frontmatter =
154        serde_yaml::from_str(yaml).map_err(|e| DocError(format!("{ctx}: frontmatter: {e}")))?;
155    if fm.name.trim().is_empty() {
156        return err(format!("{ctx}: missing `name`"));
157    }
158    let body = body_raw.trim().to_string();
159    let summary = fm.summary.clone().unwrap_or_else(|| first_paragraph(&body));
160    let examples = extract_sema_examples(&body);
161    Ok(DocEntry {
162        name: fm.name,
163        aliases: fm.aliases,
164        module: fm.module.unwrap_or_else(|| default_module.to_string()),
165        section: fm.section,
166        summary,
167        params: fm.params,
168        returns: fm.returns,
169        since: fm.since,
170        deprecated: fm.deprecated,
171        see_also: fm.see_also,
172        examples,
173        body,
174        syntax: fm.syntax,
175        special_form,
176    })
177}
178
179/// First prose paragraph of a markdown body, skipping any leading fenced code block (signature
180/// blocks like ```sema\n(f x) → y\n``` are common at the top of an entry) and headings.
181fn first_paragraph(body: &str) -> String {
182    let mut lines = body.lines().peekable();
183    // Skip leading blank lines and leading fenced code blocks.
184    loop {
185        while matches!(lines.peek(), Some(l) if l.trim().is_empty()) {
186            lines.next();
187        }
188        match lines.peek() {
189            Some(l) if l.trim_start().starts_with("```") => {
190                lines.next(); // opening fence
191                for l in lines.by_ref() {
192                    if l.trim_start().starts_with("```") {
193                        break;
194                    }
195                }
196            }
197            _ => break,
198        }
199    }
200    let mut out = String::new();
201    for line in lines {
202        let l = line.trim();
203        if l.is_empty() {
204            if !out.is_empty() {
205                break;
206            }
207            continue;
208        }
209        if l.starts_with("```") || l.starts_with('#') {
210            break;
211        }
212        if !out.is_empty() {
213            out.push(' ');
214        }
215        out.push_str(l);
216    }
217    out
218}
219
220/// Extract the contents of ```sema fenced blocks from a markdown body.
221fn extract_sema_examples(body: &str) -> Vec<String> {
222    let mut examples = Vec::new();
223    let mut in_block = false;
224    let mut buf = String::new();
225    for line in body.lines() {
226        let trimmed = line.trim_start();
227        if !in_block && matches!(trimmed, "```sema" | "```scheme" | "```lisp") {
228            in_block = true;
229            buf.clear();
230            continue;
231        }
232        if in_block && trimmed == "```" {
233            in_block = false;
234            let snippet = buf.trim_end().to_string();
235            if !snippet.is_empty() {
236                examples.push(snippet);
237            }
238            continue;
239        }
240        if in_block {
241            buf.push_str(line);
242            buf.push('\n');
243        }
244    }
245    examples
246}
247
248// ── Loading + validation ──────────────────────────────────────────
249
250/// Recursively collect `*.md` files under `dir`.
251fn collect_md(dir: &Path, out: &mut Vec<std::path::PathBuf>) -> Result<()> {
252    let rd = fs::read_dir(dir).map_err(|e| DocError(format!("reading {}: {e}", dir.display())))?;
253    for entry in rd {
254        let path = entry.map_err(|e| DocError(e.to_string()))?.path();
255        if path.is_dir() {
256            collect_md(&path, out)?;
257        } else if path.extension().map(|x| x == "md").unwrap_or(false) {
258            out.push(path);
259        }
260    }
261    Ok(())
262}
263
264/// Load every entry from the stdlib doc tree and the special-forms tree.
265pub fn load(stdlib_dir: &Path, special_forms_dir: &Path) -> Result<Vec<DocEntry>> {
266    let mut entries = Vec::new();
267    let mut load_tree = |root: &Path, special: bool| -> Result<()> {
268        if !root.exists() {
269            return Ok(());
270        }
271        let mut files = Vec::new();
272        collect_md(root, &mut files)?;
273        files.sort();
274        for path in files {
275            // default module = the immediate parent directory name relative to the tree.
276            let default_module = path
277                .parent()
278                .and_then(|p| p.file_name())
279                .and_then(|s| s.to_str())
280                .filter(|d| Path::new(root).file_name().and_then(|s| s.to_str()) != Some(d))
281                .unwrap_or(if special { "special-forms" } else { "misc" });
282            let text = fs::read_to_string(&path)
283                .map_err(|e| DocError(format!("reading {}: {e}", path.display())))?;
284            entries.push(parse_entry(&path, &text, default_module, special)?);
285        }
286        Ok(())
287    };
288    load_tree(stdlib_dir, false)?;
289    load_tree(special_forms_dir, true)?;
290    entries.sort_by(|a, b| (&a.module, &a.name).cmp(&(&b.module, &b.name)));
291    Ok(entries)
292}
293
294/// Validate the loaded entries. Hard errors (`Err`): duplicate (module, name) / (module, alias)
295/// pairs, unbalanced code fences, leaked VitePress `:::` containers. Soft warnings (`Ok`): empty
296/// summaries — these become hard errors under `strict` (the coverage gate).
297pub fn validate(entries: &[DocEntry], strict: bool) -> Result<Vec<String>> {
298    let mut seen: HashSet<(String, String)> = HashSet::new();
299    let mut errors = Vec::new();
300    let mut warnings = Vec::new();
301    for e in entries {
302        for n in std::iter::once(&e.name).chain(e.aliases.iter()) {
303            let key = (e.module.clone(), n.clone());
304            if !seen.insert(key) {
305                errors.push(format!("duplicate doc name `{n}` in module `{}`", e.module));
306            }
307        }
308        if e.summary.trim().is_empty() {
309            let msg = format!("`{}` ({}) has an empty summary", e.name, e.module);
310            if strict {
311                errors.push(msg);
312            } else {
313                warnings.push(msg);
314            }
315        }
316        if e.body.matches("```").count() % 2 != 0 {
317            errors.push(format!(
318                "`{}` ({}) has unbalanced ``` fences",
319                e.name, e.module
320            ));
321        }
322        if e.body.contains(":::") {
323            errors.push(format!(
324                "`{}` ({}) leaks a `:::` container into hover",
325                e.name, e.module
326            ));
327        }
328    }
329    if errors.is_empty() {
330        Ok(warnings)
331    } else {
332        err(format!(
333            "doc validation failed:\n  - {}",
334            errors.join("\n  - ")
335        ))
336    }
337}
338
339/// Drop duplicate entries within the same module (first wins, in load order).
340/// Returns one warning per drop.
341pub fn dedupe(entries: &mut Vec<DocEntry>) -> Vec<String> {
342    let mut seen: HashSet<(String, String)> = HashSet::new();
343    let mut warnings = Vec::new();
344    entries.retain(|e| {
345        let names: Vec<&String> = std::iter::once(&e.name).chain(e.aliases.iter()).collect();
346        if names
347            .iter()
348            .any(|n| seen.contains(&(e.module.clone(), n.to_string())))
349        {
350            warnings.push(format!(
351                "dropped duplicate `{}` in module `{}`",
352                e.name, e.module
353            ));
354            false
355        } else {
356            for n in names {
357                seen.insert((e.module.clone(), n.clone()));
358            }
359            true
360        }
361    });
362    warnings
363}
364
365pub fn build_index(entries: Vec<DocEntry>) -> DocIndex {
366    DocIndex {
367        version: 1,
368        entries,
369    }
370}