Skip to main content

stoa_core/
schema.rs

1//! In-memory model of the workspace `STOA.md` schema (ARCHITECTURE §3).
2//!
3//! For M2 the schema's job is narrow: hold the allow-lists used by
4//! [`crate::validate_page`]. The default vocabulary ships with `stoa init`.
5//! Parsing pulls extra entries the user added; unknown tokens stay opt-in
6//! per workspace.
7
8use std::collections::BTreeSet;
9
10use crate::kind::{Kind, Status};
11use crate::relationship::{DEFAULT_ENTITY_TYPES, DEFAULT_RELATIONSHIP_TYPES};
12
13/// Workspace-scoped vocabulary backing schema validation.
14///
15/// Stored as sorted sets so `entity_types()` / `relationship_types()` output
16/// is deterministic — useful when surfacing them in error messages.
17#[derive(Debug, Clone, PartialEq, Eq)]
18pub struct Schema {
19    entity_types: BTreeSet<String>,
20    relationship_types: BTreeSet<String>,
21    kinds: BTreeSet<String>,
22    statuses: BTreeSet<String>,
23}
24
25impl Schema {
26    /// Default schema (ARCHITECTURE §5). Used when no `STOA.md` is on disk
27    /// or as the fallback floor for [`Schema::from_stoa_md`].
28    #[must_use]
29    pub fn defaults() -> Self {
30        let entity_types = DEFAULT_ENTITY_TYPES
31            .iter()
32            .map(|s| (*s).to_owned())
33            .collect();
34        let relationship_types = DEFAULT_RELATIONSHIP_TYPES
35            .iter()
36            .map(|s| (*s).to_owned())
37            .collect();
38        let kinds = Kind::defaults()
39            .iter()
40            .map(|k| k.as_str().to_owned())
41            .collect();
42        let statuses = Status::defaults()
43            .iter()
44            .map(|s| s.as_str().to_owned())
45            .collect();
46        Self {
47            entity_types,
48            relationship_types,
49            kinds,
50            statuses,
51        }
52    }
53
54    /// Build a schema from a `STOA.md` document. Starts from defaults and
55    /// extends with any extra vocabulary mentioned in the file.
56    ///
57    /// The parser is intentionally forgiving — `STOA.md` is human-edited
58    /// markdown, not strict YAML. We scan for fenced-code or bullet-list
59    /// entries under "Entity types" / "Relationship types" headings.
60    #[must_use]
61    pub fn from_stoa_md(text: &str) -> Self {
62        let mut schema = Self::defaults();
63        let mut section: Option<Section> = None;
64        for raw_line in text.lines() {
65            let line = raw_line.trim_start();
66            if let Some(next) = Section::detect(line) {
67                section = Some(next);
68                continue;
69            }
70            if line.is_empty() {
71                continue;
72            }
73            if line.starts_with('#') {
74                section = None;
75                continue;
76            }
77            if let Some(token) = parse_bullet_token(line) {
78                schema.add(section, token);
79            }
80        }
81        schema
82    }
83
84    fn add(&mut self, section: Option<Section>, token: String) {
85        match section {
86            Some(Section::EntityTypes) => {
87                let _ignored = self.entity_types.insert(token);
88            },
89            Some(Section::RelationshipTypes) => {
90                let _ignored = self.relationship_types.insert(token);
91            },
92            _ => {},
93        }
94    }
95
96    /// Sorted view of the entity-type allow-list.
97    #[must_use]
98    pub fn entity_types(&self) -> Vec<&str> {
99        self.entity_types.iter().map(String::as_str).collect()
100    }
101
102    /// Sorted view of the relationship-type allow-list.
103    #[must_use]
104    pub fn relationship_types(&self) -> Vec<&str> {
105        self.relationship_types.iter().map(String::as_str).collect()
106    }
107
108    /// True if `value` is a recognized entity-type.
109    #[must_use]
110    pub fn allows_entity_type(&self, value: &str) -> bool {
111        self.entity_types.contains(value)
112    }
113
114    /// True if `value` is a recognized relationship-type.
115    #[must_use]
116    pub fn allows_relationship_type(&self, value: &str) -> bool {
117        self.relationship_types.contains(value)
118    }
119
120    /// True if `value` is a recognized page-kind.
121    #[must_use]
122    pub fn allows_kind(&self, value: &str) -> bool {
123        self.kinds.contains(value)
124    }
125
126    /// True if `value` is a recognized status.
127    #[must_use]
128    pub fn allows_status(&self, value: &str) -> bool {
129        self.statuses.contains(value)
130    }
131}
132
133#[derive(Debug, Clone, Copy)]
134enum Section {
135    EntityTypes,
136    RelationshipTypes,
137}
138
139impl Section {
140    fn detect(line: &str) -> Option<Self> {
141        let lower = line.to_ascii_lowercase();
142        if lower.starts_with('#') && lower.contains("entity") && lower.contains("type") {
143            Some(Self::EntityTypes)
144        } else if lower.starts_with('#') && lower.contains("relationship") {
145            Some(Self::RelationshipTypes)
146        } else {
147            None
148        }
149    }
150}
151
152fn parse_bullet_token(line: &str) -> Option<String> {
153    let trimmed = line.trim();
154    let body = trimmed
155        .strip_prefix("- ")
156        .or_else(|| trimmed.strip_prefix("* "))?;
157    // NOTE: prefer the first backtick-quoted token over a bare word so prose around it is ignored.
158    if let Some(rest) = body.strip_prefix('`') {
159        let end = rest.find('`')?;
160        return Some(rest[..end].to_owned());
161    }
162    body.split_whitespace().next().map(str::to_owned)
163}
164
165#[cfg(test)]
166mod tests {
167    use super::Schema;
168
169    #[test]
170    fn defaults_cover_documented_types() {
171        let s = Schema::defaults();
172        assert!(s.allows_entity_type("library"));
173        assert!(s.allows_entity_type("decision"));
174        assert!(s.allows_relationship_type("depends_on"));
175        assert!(s.allows_relationship_type("supersedes"));
176        assert!(s.allows_kind("entity"));
177        assert!(s.allows_status("active"));
178    }
179
180    #[test]
181    fn parses_extra_entity_type_from_md() {
182        let md = "# Entity types\n- `widget` — a widget thing\n";
183        let s = Schema::from_stoa_md(md);
184        assert!(s.allows_entity_type("widget"));
185        assert!(s.allows_entity_type("library"), "defaults preserved");
186    }
187
188    #[test]
189    fn parses_extra_relationship_type_from_md() {
190        let md = "# Relationship types\n- `blocks` — A blocks B\n";
191        let s = Schema::from_stoa_md(md);
192        assert!(s.allows_relationship_type("blocks"));
193        assert!(s.allows_relationship_type("depends_on"), "defaults preserved");
194    }
195}