Skip to main content

drft/rules/
schema_violation.rs

1use crate::diagnostic::Diagnostic;
2use crate::rules::{Rule, RuleContext};
3
4pub struct SchemaViolationRule;
5
6impl Rule for SchemaViolationRule {
7    fn name(&self) -> &str {
8        "schema-violation"
9    }
10
11    fn evaluate(&self, ctx: &RuleContext) -> Vec<Diagnostic> {
12        let options = match ctx.options {
13            Some(opts) => opts,
14            None => return vec![],
15        };
16
17        let global_required = extract_string_array(options, "required");
18        let schemas = options
19            .get("schemas")
20            .and_then(|v| v.as_table())
21            .cloned()
22            .unwrap_or_default();
23
24        let mut diagnostics = Vec::new();
25
26        // Pre-compile glob matchers for schemas
27        let mut compiled_schemas: Vec<(globset::GlobMatcher, SchemaSpec)> = Vec::new();
28        for (pattern, value) in &schemas {
29            match globset::Glob::new(pattern) {
30                Ok(glob) => {
31                    let spec = SchemaSpec::from_toml(value);
32                    compiled_schemas.push((glob.compile_matcher(), spec));
33                }
34                Err(e) => {
35                    diagnostics.push(Diagnostic {
36                        rule: "schema-violation".into(),
37                        message: format!("invalid schema glob \"{pattern}\": {e}"),
38                        fix: Some(format!(
39                            "fix the glob pattern \"{pattern}\" in [rules.schema-violation.options.schemas]"
40                        )),
41                        ..Default::default()
42                    });
43                }
44            }
45        }
46
47        for (path, node) in &ctx.graph.graph.nodes {
48            let metadata = merge_metadata(&node.metadata);
49            let source = metadata_source(&node.metadata);
50
51            // Check global required fields
52            for field in &global_required {
53                if !has_field(&metadata, field) {
54                    diagnostics.push(Diagnostic {
55                        rule: "schema-violation".into(),
56                        message: format!("missing required field \"{field}\""),
57                        node: Some(path.clone()),
58                        fix: Some(format!("add \"{field}\" to {source} in {path}")),
59                        ..Default::default()
60                    });
61                }
62            }
63
64            // Check per-glob schemas
65            for (matcher, spec) in &compiled_schemas {
66                if !matcher.is_match(path) {
67                    continue;
68                }
69
70                for field in &spec.required {
71                    if !has_field(&metadata, field) {
72                        diagnostics.push(Diagnostic {
73                            rule: "schema-violation".into(),
74                            message: format!("missing required field \"{field}\""),
75                            node: Some(path.clone()),
76                            fix: Some(format!("add \"{field}\" to {source} in {path}")),
77                            ..Default::default()
78                        });
79                    }
80                }
81
82                for (field, allowed_values) in &spec.allowed {
83                    if let Some(value) = get_field(&metadata, field)
84                        && let Some(s) = value_as_string(value)
85                        && !allowed_values.iter().any(|av| av == &s)
86                    {
87                        diagnostics.push(Diagnostic {
88                            rule: "schema-violation".into(),
89                            message: format!(
90                                "field \"{field}\" has value \"{s}\", allowed: [{}]",
91                                allowed_values.join(", ")
92                            ),
93                            node: Some(path.clone()),
94                            fix: Some(format!(
95                                "change \"{field}\" in {path} to one of: {}",
96                                allowed_values.join(", ")
97                            )),
98                            ..Default::default()
99                        });
100                    }
101                }
102            }
103        }
104
105        diagnostics.sort_by(|a, b| a.node.cmp(&b.node));
106        diagnostics
107    }
108}
109
110struct SchemaSpec {
111    required: Vec<String>,
112    allowed: Vec<(String, Vec<String>)>,
113}
114
115impl SchemaSpec {
116    fn from_toml(value: &toml::Value) -> Self {
117        let required = extract_string_array(value, "required");
118        let allowed = value
119            .get("allowed")
120            .and_then(|v| v.as_table())
121            .map(|table| {
122                table
123                    .iter()
124                    .map(|(k, v)| (k.clone(), extract_string_array_direct(v)))
125                    .collect()
126            })
127            .unwrap_or_default();
128        Self { required, allowed }
129    }
130}
131
132fn extract_string_array(value: &toml::Value, key: &str) -> Vec<String> {
133    value
134        .get(key)
135        .and_then(|v| v.as_array())
136        .map(|arr| {
137            arr.iter()
138                .filter_map(|v| v.as_str().map(String::from))
139                .collect()
140        })
141        .unwrap_or_default()
142}
143
144fn extract_string_array_direct(value: &toml::Value) -> Vec<String> {
145    value
146        .as_array()
147        .map(|arr| {
148            arr.iter()
149                .filter_map(|v| v.as_str().map(String::from))
150                .collect()
151        })
152        .unwrap_or_default()
153}
154
155/// Merge metadata from all parser namespaces into a single flat JSON object.
156/// Namespaces are merged in alphabetical order — later namespaces override earlier
157/// ones for conflicting keys (e.g., "markdown" overrides "frontmatter").
158fn merge_metadata(
159    metadata: &std::collections::HashMap<String, serde_json::Value>,
160) -> serde_json::Value {
161    let mut merged = serde_json::Map::new();
162    let mut keys: Vec<&String> = metadata.keys().collect();
163    keys.sort();
164    for key in keys {
165        if let serde_json::Value::Object(map) = &metadata[key] {
166            for (k, v) in map {
167                merged.insert(k.clone(), v.clone());
168            }
169        }
170    }
171    serde_json::Value::Object(merged)
172}
173
174/// Return a human-readable label for the metadata source.
175/// When a single parser contributed metadata, name it (e.g. "frontmatter").
176/// Otherwise fall back to the generic "metadata".
177fn metadata_source(metadata: &std::collections::HashMap<String, serde_json::Value>) -> String {
178    let keys: Vec<&String> = metadata.keys().collect();
179    if keys.len() == 1 {
180        keys[0].clone()
181    } else {
182        "metadata".to_string()
183    }
184}
185
186fn has_field(metadata: &serde_json::Value, field: &str) -> bool {
187    metadata.get(field).is_some_and(|v| !v.is_null())
188}
189
190fn get_field<'a>(metadata: &'a serde_json::Value, field: &str) -> Option<&'a serde_json::Value> {
191    metadata.get(field).filter(|v| !v.is_null())
192}
193
194fn value_as_string(value: &serde_json::Value) -> Option<String> {
195    match value {
196        serde_json::Value::String(s) => Some(s.clone()),
197        serde_json::Value::Number(n) => Some(n.to_string()),
198        serde_json::Value::Bool(b) => Some(b.to_string()),
199        _ => None,
200    }
201}
202
203#[cfg(test)]
204mod tests {
205    use super::*;
206    use crate::graph::test_helpers::make_enriched;
207    use crate::graph::{Graph, Node};
208    use crate::rules::RuleContext;
209    use std::collections::HashMap;
210
211    fn node_with_metadata(path: &str, metadata: serde_json::Value) -> Node {
212        let mut meta_map = HashMap::new();
213        meta_map.insert("frontmatter".to_string(), metadata);
214        Node {
215            path: path.into(),
216            node_type: Some(crate::graph::NodeType::File),
217            included: true,
218            hash: None,
219            metadata: meta_map,
220        }
221    }
222
223    #[test]
224    fn detects_missing_required_field() {
225        let mut graph = Graph::new();
226        graph.add_node(node_with_metadata(
227            "doc.md",
228            serde_json::json!({"status": "draft"}),
229        ));
230
231        let enriched = make_enriched(graph);
232        let options: toml::Value = toml::from_str("required = [\"title\"]").unwrap();
233        let ctx = RuleContext {
234            graph: &enriched,
235            options: Some(&options),
236        };
237        let diagnostics = SchemaViolationRule.evaluate(&ctx);
238
239        assert_eq!(diagnostics.len(), 1);
240        assert!(diagnostics[0].message.contains("title"));
241        assert_eq!(diagnostics[0].node.as_deref(), Some("doc.md"));
242        // Fix message names the parser when there's a single contributor
243        let fix = diagnostics[0].fix.as_ref().unwrap();
244        assert!(
245            fix.contains("frontmatter"),
246            "fix should name the parser: {fix}"
247        );
248    }
249
250    #[test]
251    fn passes_when_required_field_present() {
252        let mut graph = Graph::new();
253        graph.add_node(node_with_metadata(
254            "doc.md",
255            serde_json::json!({"title": "Hello"}),
256        ));
257
258        let enriched = make_enriched(graph);
259        let options: toml::Value = toml::from_str("required = [\"title\"]").unwrap();
260        let ctx = RuleContext {
261            graph: &enriched,
262            options: Some(&options),
263        };
264        let diagnostics = SchemaViolationRule.evaluate(&ctx);
265
266        assert!(diagnostics.is_empty());
267    }
268
269    #[test]
270    fn detects_per_glob_required() {
271        let mut graph = Graph::new();
272        graph.add_node(node_with_metadata(
273            "observations/note.md",
274            serde_json::json!({"title": "Note"}),
275        ));
276        // This file matches *.md but not observations/*.md
277        graph.add_node(node_with_metadata(
278            "readme.md",
279            serde_json::json!({"title": "README"}),
280        ));
281
282        let enriched = make_enriched(graph);
283        let options: toml::Value = toml::from_str(
284            r#"
285            [schemas."observations/*.md"]
286            required = ["title", "date", "status"]
287            "#,
288        )
289        .unwrap();
290        let ctx = RuleContext {
291            graph: &enriched,
292            options: Some(&options),
293        };
294        let diagnostics = SchemaViolationRule.evaluate(&ctx);
295
296        // observations/note.md missing "date" and "status"
297        assert_eq!(diagnostics.len(), 2);
298        let messages: Vec<&str> = diagnostics.iter().map(|d| d.message.as_str()).collect();
299        assert!(messages.iter().any(|m| m.contains("date")));
300        assert!(messages.iter().any(|m| m.contains("status")));
301    }
302
303    #[test]
304    fn detects_disallowed_value() {
305        let mut graph = Graph::new();
306        graph.add_node(node_with_metadata(
307            "observations/note.md",
308            serde_json::json!({"title": "Note", "status": "invalid"}),
309        ));
310
311        let enriched = make_enriched(graph);
312        let options: toml::Value = toml::from_str(
313            r#"
314            [schemas."observations/*.md"]
315            required = ["title"]
316            allowed.status = ["draft", "review", "final"]
317            "#,
318        )
319        .unwrap();
320        let ctx = RuleContext {
321            graph: &enriched,
322            options: Some(&options),
323        };
324        let diagnostics = SchemaViolationRule.evaluate(&ctx);
325
326        assert_eq!(diagnostics.len(), 1);
327        assert!(diagnostics[0].message.contains("invalid"));
328        assert!(diagnostics[0].message.contains("allowed"));
329    }
330
331    #[test]
332    fn allowed_value_passes() {
333        let mut graph = Graph::new();
334        graph.add_node(node_with_metadata(
335            "observations/note.md",
336            serde_json::json!({"title": "Note", "status": "draft"}),
337        ));
338
339        let enriched = make_enriched(graph);
340        let options: toml::Value = toml::from_str(
341            r#"
342            [schemas."observations/*.md"]
343            allowed.status = ["draft", "review", "final"]
344            "#,
345        )
346        .unwrap();
347        let ctx = RuleContext {
348            graph: &enriched,
349            options: Some(&options),
350        };
351        let diagnostics = SchemaViolationRule.evaluate(&ctx);
352
353        assert!(diagnostics.is_empty());
354    }
355
356    #[test]
357    fn no_options_no_diagnostics() {
358        let mut graph = Graph::new();
359        graph.add_node(node_with_metadata(
360            "doc.md",
361            serde_json::json!({"title": "Hello"}),
362        ));
363
364        let enriched = make_enriched(graph);
365        let ctx = RuleContext {
366            graph: &enriched,
367            options: None,
368        };
369        let diagnostics = SchemaViolationRule.evaluate(&ctx);
370
371        assert!(diagnostics.is_empty());
372    }
373
374    #[test]
375    fn skips_nodes_without_metadata() {
376        let mut graph = Graph::new();
377        graph.add_node(Node {
378            path: "no-frontmatter.md".into(),
379            node_type: Some(crate::graph::NodeType::File),
380            included: true,
381            hash: None,
382            metadata: HashMap::new(),
383        });
384
385        let enriched = make_enriched(graph);
386        let options: toml::Value = toml::from_str("required = [\"title\"]").unwrap();
387        let ctx = RuleContext {
388            graph: &enriched,
389            options: Some(&options),
390        };
391        let diagnostics = SchemaViolationRule.evaluate(&ctx);
392
393        // No metadata means no fields — should flag the missing required field
394        assert_eq!(diagnostics.len(), 1);
395        assert!(diagnostics[0].message.contains("title"));
396    }
397}