Skip to main content

drft/rules/
schema_violation.rs

1use crate::diagnostic::Diagnostic;
2use crate::graph::NodeType;
3use crate::rules::{Rule, RuleContext};
4
5pub struct SchemaViolationRule;
6
7impl Rule for SchemaViolationRule {
8    fn name(&self) -> &str {
9        "schema-violation"
10    }
11
12    fn evaluate(&self, ctx: &RuleContext) -> Vec<Diagnostic> {
13        let options = match ctx.options {
14            Some(opts) => opts,
15            None => return vec![],
16        };
17
18        let global_required = extract_string_array(options, "required");
19        let schemas = options
20            .get("schemas")
21            .and_then(|v| v.as_table())
22            .cloned()
23            .unwrap_or_default();
24
25        let mut diagnostics = Vec::new();
26
27        // Pre-compile glob matchers for schemas
28        let mut compiled_schemas: Vec<(globset::GlobMatcher, SchemaSpec)> = Vec::new();
29        for (pattern, value) in &schemas {
30            match globset::Glob::new(pattern) {
31                Ok(glob) => {
32                    let spec = SchemaSpec::from_toml(value);
33                    compiled_schemas.push((glob.compile_matcher(), spec));
34                }
35                Err(e) => {
36                    diagnostics.push(Diagnostic {
37                        rule: "schema-violation".into(),
38                        message: format!("invalid schema glob \"{pattern}\": {e}"),
39                        fix: Some(format!(
40                            "fix the glob pattern \"{pattern}\" in [rules.schema-violation.options.schemas]"
41                        )),
42                        ..Default::default()
43                    });
44                }
45            }
46        }
47
48        for (path, node) in &ctx.graph.graph.nodes {
49            if node.node_type != NodeType::File {
50                continue;
51            }
52
53            // Collect all metadata across parser namespaces into one merged view
54            let metadata = merge_metadata(&node.metadata);
55            let source = metadata_source(&node.metadata);
56
57            // Check global required fields
58            for field in &global_required {
59                if !has_field(&metadata, field) {
60                    diagnostics.push(Diagnostic {
61                        rule: "schema-violation".into(),
62                        message: format!("missing required field \"{field}\""),
63                        node: Some(path.clone()),
64                        fix: Some(format!("add \"{field}\" to {source} in {path}")),
65                        ..Default::default()
66                    });
67                }
68            }
69
70            // Check per-glob schemas
71            for (matcher, spec) in &compiled_schemas {
72                if !matcher.is_match(path) {
73                    continue;
74                }
75
76                for field in &spec.required {
77                    if !has_field(&metadata, field) {
78                        diagnostics.push(Diagnostic {
79                            rule: "schema-violation".into(),
80                            message: format!("missing required field \"{field}\""),
81                            node: Some(path.clone()),
82                            fix: Some(format!("add \"{field}\" to {source} in {path}")),
83                            ..Default::default()
84                        });
85                    }
86                }
87
88                for (field, allowed_values) in &spec.allowed {
89                    if let Some(value) = get_field(&metadata, field)
90                        && let Some(s) = value_as_string(value)
91                        && !allowed_values.iter().any(|av| av == &s)
92                    {
93                        diagnostics.push(Diagnostic {
94                            rule: "schema-violation".into(),
95                            message: format!(
96                                "field \"{field}\" has value \"{s}\", allowed: [{}]",
97                                allowed_values.join(", ")
98                            ),
99                            node: Some(path.clone()),
100                            fix: Some(format!(
101                                "change \"{field}\" in {path} to one of: {}",
102                                allowed_values.join(", ")
103                            )),
104                            ..Default::default()
105                        });
106                    }
107                }
108            }
109        }
110
111        diagnostics.sort_by(|a, b| a.node.cmp(&b.node));
112        diagnostics
113    }
114}
115
116struct SchemaSpec {
117    required: Vec<String>,
118    allowed: Vec<(String, Vec<String>)>,
119}
120
121impl SchemaSpec {
122    fn from_toml(value: &toml::Value) -> Self {
123        let required = extract_string_array(value, "required");
124        let allowed = value
125            .get("allowed")
126            .and_then(|v| v.as_table())
127            .map(|table| {
128                table
129                    .iter()
130                    .map(|(k, v)| (k.clone(), extract_string_array_direct(v)))
131                    .collect()
132            })
133            .unwrap_or_default();
134        Self { required, allowed }
135    }
136}
137
138fn extract_string_array(value: &toml::Value, key: &str) -> Vec<String> {
139    value
140        .get(key)
141        .and_then(|v| v.as_array())
142        .map(|arr| {
143            arr.iter()
144                .filter_map(|v| v.as_str().map(String::from))
145                .collect()
146        })
147        .unwrap_or_default()
148}
149
150fn extract_string_array_direct(value: &toml::Value) -> Vec<String> {
151    value
152        .as_array()
153        .map(|arr| {
154            arr.iter()
155                .filter_map(|v| v.as_str().map(String::from))
156                .collect()
157        })
158        .unwrap_or_default()
159}
160
161/// Merge metadata from all parser namespaces into a single flat JSON object.
162/// Namespaces are merged in alphabetical order — later namespaces override earlier
163/// ones for conflicting keys (e.g., "markdown" overrides "frontmatter").
164fn merge_metadata(
165    metadata: &std::collections::HashMap<String, serde_json::Value>,
166) -> serde_json::Value {
167    let mut merged = serde_json::Map::new();
168    let mut keys: Vec<&String> = metadata.keys().collect();
169    keys.sort();
170    for key in keys {
171        if let serde_json::Value::Object(map) = &metadata[key] {
172            for (k, v) in map {
173                merged.insert(k.clone(), v.clone());
174            }
175        }
176    }
177    serde_json::Value::Object(merged)
178}
179
180/// Return a human-readable label for the metadata source.
181/// When a single parser contributed metadata, name it (e.g. "frontmatter").
182/// Otherwise fall back to the generic "metadata".
183fn metadata_source(metadata: &std::collections::HashMap<String, serde_json::Value>) -> String {
184    let keys: Vec<&String> = metadata.keys().collect();
185    if keys.len() == 1 {
186        keys[0].clone()
187    } else {
188        "metadata".to_string()
189    }
190}
191
192fn has_field(metadata: &serde_json::Value, field: &str) -> bool {
193    metadata.get(field).is_some_and(|v| !v.is_null())
194}
195
196fn get_field<'a>(metadata: &'a serde_json::Value, field: &str) -> Option<&'a serde_json::Value> {
197    metadata.get(field).filter(|v| !v.is_null())
198}
199
200fn value_as_string(value: &serde_json::Value) -> Option<String> {
201    match value {
202        serde_json::Value::String(s) => Some(s.clone()),
203        serde_json::Value::Number(n) => Some(n.to_string()),
204        serde_json::Value::Bool(b) => Some(b.to_string()),
205        _ => None,
206    }
207}
208
209#[cfg(test)]
210mod tests {
211    use super::*;
212    use crate::graph::test_helpers::make_enriched;
213    use crate::graph::{Graph, Node, NodeType};
214    use crate::rules::RuleContext;
215    use std::collections::HashMap;
216
217    fn node_with_metadata(path: &str, metadata: serde_json::Value) -> Node {
218        let mut meta_map = HashMap::new();
219        meta_map.insert("frontmatter".to_string(), metadata);
220        Node {
221            path: path.into(),
222            node_type: NodeType::File,
223            hash: None,
224            graph: None,
225            is_graph: false,
226            metadata: meta_map,
227        }
228    }
229
230    #[test]
231    fn detects_missing_required_field() {
232        let mut graph = Graph::new();
233        graph.add_node(node_with_metadata(
234            "doc.md",
235            serde_json::json!({"status": "draft"}),
236        ));
237
238        let enriched = make_enriched(graph);
239        let options: toml::Value = toml::from_str("required = [\"title\"]").unwrap();
240        let ctx = RuleContext {
241            graph: &enriched,
242            options: Some(&options),
243        };
244        let diagnostics = SchemaViolationRule.evaluate(&ctx);
245
246        assert_eq!(diagnostics.len(), 1);
247        assert!(diagnostics[0].message.contains("title"));
248        assert_eq!(diagnostics[0].node.as_deref(), Some("doc.md"));
249        // Fix message names the parser when there's a single contributor
250        let fix = diagnostics[0].fix.as_ref().unwrap();
251        assert!(
252            fix.contains("frontmatter"),
253            "fix should name the parser: {fix}"
254        );
255    }
256
257    #[test]
258    fn passes_when_required_field_present() {
259        let mut graph = Graph::new();
260        graph.add_node(node_with_metadata(
261            "doc.md",
262            serde_json::json!({"title": "Hello"}),
263        ));
264
265        let enriched = make_enriched(graph);
266        let options: toml::Value = toml::from_str("required = [\"title\"]").unwrap();
267        let ctx = RuleContext {
268            graph: &enriched,
269            options: Some(&options),
270        };
271        let diagnostics = SchemaViolationRule.evaluate(&ctx);
272
273        assert!(diagnostics.is_empty());
274    }
275
276    #[test]
277    fn detects_per_glob_required() {
278        let mut graph = Graph::new();
279        graph.add_node(node_with_metadata(
280            "observations/note.md",
281            serde_json::json!({"title": "Note"}),
282        ));
283        // This file matches *.md but not observations/*.md
284        graph.add_node(node_with_metadata(
285            "readme.md",
286            serde_json::json!({"title": "README"}),
287        ));
288
289        let enriched = make_enriched(graph);
290        let options: toml::Value = toml::from_str(
291            r#"
292            [schemas."observations/*.md"]
293            required = ["title", "date", "status"]
294            "#,
295        )
296        .unwrap();
297        let ctx = RuleContext {
298            graph: &enriched,
299            options: Some(&options),
300        };
301        let diagnostics = SchemaViolationRule.evaluate(&ctx);
302
303        // observations/note.md missing "date" and "status"
304        assert_eq!(diagnostics.len(), 2);
305        let messages: Vec<&str> = diagnostics.iter().map(|d| d.message.as_str()).collect();
306        assert!(messages.iter().any(|m| m.contains("date")));
307        assert!(messages.iter().any(|m| m.contains("status")));
308    }
309
310    #[test]
311    fn detects_disallowed_value() {
312        let mut graph = Graph::new();
313        graph.add_node(node_with_metadata(
314            "observations/note.md",
315            serde_json::json!({"title": "Note", "status": "invalid"}),
316        ));
317
318        let enriched = make_enriched(graph);
319        let options: toml::Value = toml::from_str(
320            r#"
321            [schemas."observations/*.md"]
322            required = ["title"]
323            allowed.status = ["draft", "review", "final"]
324            "#,
325        )
326        .unwrap();
327        let ctx = RuleContext {
328            graph: &enriched,
329            options: Some(&options),
330        };
331        let diagnostics = SchemaViolationRule.evaluate(&ctx);
332
333        assert_eq!(diagnostics.len(), 1);
334        assert!(diagnostics[0].message.contains("invalid"));
335        assert!(diagnostics[0].message.contains("allowed"));
336    }
337
338    #[test]
339    fn allowed_value_passes() {
340        let mut graph = Graph::new();
341        graph.add_node(node_with_metadata(
342            "observations/note.md",
343            serde_json::json!({"title": "Note", "status": "draft"}),
344        ));
345
346        let enriched = make_enriched(graph);
347        let options: toml::Value = toml::from_str(
348            r#"
349            [schemas."observations/*.md"]
350            allowed.status = ["draft", "review", "final"]
351            "#,
352        )
353        .unwrap();
354        let ctx = RuleContext {
355            graph: &enriched,
356            options: Some(&options),
357        };
358        let diagnostics = SchemaViolationRule.evaluate(&ctx);
359
360        assert!(diagnostics.is_empty());
361    }
362
363    #[test]
364    fn no_options_no_diagnostics() {
365        let mut graph = Graph::new();
366        graph.add_node(node_with_metadata(
367            "doc.md",
368            serde_json::json!({"title": "Hello"}),
369        ));
370
371        let enriched = make_enriched(graph);
372        let ctx = RuleContext {
373            graph: &enriched,
374            options: None,
375        };
376        let diagnostics = SchemaViolationRule.evaluate(&ctx);
377
378        assert!(diagnostics.is_empty());
379    }
380
381    #[test]
382    fn skips_nodes_without_metadata() {
383        let mut graph = Graph::new();
384        graph.add_node(Node {
385            path: "no-frontmatter.md".into(),
386            node_type: NodeType::File,
387            hash: None,
388            graph: None,
389            is_graph: false,
390            metadata: HashMap::new(),
391        });
392
393        let enriched = make_enriched(graph);
394        let options: toml::Value = toml::from_str("required = [\"title\"]").unwrap();
395        let ctx = RuleContext {
396            graph: &enriched,
397            options: Some(&options),
398        };
399        let diagnostics = SchemaViolationRule.evaluate(&ctx);
400
401        // No metadata means no fields — should flag the missing required field
402        assert_eq!(diagnostics.len(), 1);
403        assert!(diagnostics[0].message.contains("title"));
404    }
405}