Skip to main content

drft/rules/
schema_violation.rs

1use crate::diagnostic::Diagnostic;
2use crate::graph::NodeType;
3use crate::rules::{Rule, RuleContext};
4
5pub struct SchemaViolationRule;
6
7impl Rule for SchemaViolationRule {
8    fn name(&self) -> &str {
9        "schema-violation"
10    }
11
12    fn evaluate(&self, ctx: &RuleContext) -> Vec<Diagnostic> {
13        let options = match ctx.options {
14            Some(opts) => opts,
15            None => return vec![],
16        };
17
18        let global_required = extract_string_array(options, "required");
19        let schemas = options
20            .get("schemas")
21            .and_then(|v| v.as_table())
22            .cloned()
23            .unwrap_or_default();
24
25        let mut diagnostics = Vec::new();
26
27        // Pre-compile glob matchers for schemas
28        let mut compiled_schemas: Vec<(globset::GlobMatcher, SchemaSpec)> = Vec::new();
29        for (pattern, value) in &schemas {
30            match globset::Glob::new(pattern) {
31                Ok(glob) => {
32                    let spec = SchemaSpec::from_toml(value);
33                    compiled_schemas.push((glob.compile_matcher(), spec));
34                }
35                Err(e) => {
36                    diagnostics.push(Diagnostic {
37                        rule: "schema-violation".into(),
38                        message: format!("invalid schema glob \"{pattern}\": {e}"),
39                        fix: Some(format!(
40                            "fix the glob pattern \"{pattern}\" in [rules.schema-violation.options.schemas]"
41                        )),
42                        ..Default::default()
43                    });
44                }
45            }
46        }
47
48        for (path, node) in &ctx.graph.graph.nodes {
49            if node.node_type != NodeType::File {
50                continue;
51            }
52
53            // Collect all metadata across parser namespaces into one merged view
54            let metadata = merge_metadata(&node.metadata);
55            let source = metadata_source(&node.metadata);
56
57            // Check global required fields
58            for field in &global_required {
59                if !has_field(&metadata, field) {
60                    diagnostics.push(Diagnostic {
61                        rule: "schema-violation".into(),
62                        message: format!("missing required field \"{field}\""),
63                        node: Some(path.clone()),
64                        fix: Some(format!("add \"{field}\" to {source} in {path}")),
65                        ..Default::default()
66                    });
67                }
68            }
69
70            // Check per-glob schemas
71            for (matcher, spec) in &compiled_schemas {
72                if !matcher.is_match(path) {
73                    continue;
74                }
75
76                for field in &spec.required {
77                    if !has_field(&metadata, field) {
78                        diagnostics.push(Diagnostic {
79                            rule: "schema-violation".into(),
80                            message: format!("missing required field \"{field}\""),
81                            node: Some(path.clone()),
82                            fix: Some(format!("add \"{field}\" to {source} in {path}")),
83                            ..Default::default()
84                        });
85                    }
86                }
87
88                for (field, allowed_values) in &spec.allowed {
89                    if let Some(value) = get_field(&metadata, field)
90                        && let Some(s) = value_as_string(value)
91                        && !allowed_values.iter().any(|av| av == &s)
92                    {
93                        diagnostics.push(Diagnostic {
94                            rule: "schema-violation".into(),
95                            message: format!(
96                                "field \"{field}\" has value \"{s}\", allowed: [{}]",
97                                allowed_values.join(", ")
98                            ),
99                            node: Some(path.clone()),
100                            fix: Some(format!(
101                                "change \"{field}\" in {path} to one of: {}",
102                                allowed_values.join(", ")
103                            )),
104                            ..Default::default()
105                        });
106                    }
107                }
108            }
109        }
110
111        diagnostics.sort_by(|a, b| a.node.cmp(&b.node));
112        diagnostics
113    }
114}
115
116struct SchemaSpec {
117    required: Vec<String>,
118    allowed: Vec<(String, Vec<String>)>,
119}
120
121impl SchemaSpec {
122    fn from_toml(value: &toml::Value) -> Self {
123        let required = extract_string_array(value, "required");
124        let allowed = value
125            .get("allowed")
126            .and_then(|v| v.as_table())
127            .map(|table| {
128                table
129                    .iter()
130                    .map(|(k, v)| (k.clone(), extract_string_array_direct(v)))
131                    .collect()
132            })
133            .unwrap_or_default();
134        Self { required, allowed }
135    }
136}
137
138fn extract_string_array(value: &toml::Value, key: &str) -> Vec<String> {
139    value
140        .get(key)
141        .and_then(|v| v.as_array())
142        .map(|arr| {
143            arr.iter()
144                .filter_map(|v| v.as_str().map(String::from))
145                .collect()
146        })
147        .unwrap_or_default()
148}
149
150fn extract_string_array_direct(value: &toml::Value) -> Vec<String> {
151    value
152        .as_array()
153        .map(|arr| {
154            arr.iter()
155                .filter_map(|v| v.as_str().map(String::from))
156                .collect()
157        })
158        .unwrap_or_default()
159}
160
161/// Merge metadata from all parser namespaces into a single flat JSON object.
162/// Namespaces are merged in alphabetical order — later namespaces override earlier
163/// ones for conflicting keys (e.g., "markdown" overrides "frontmatter").
164fn merge_metadata(
165    metadata: &std::collections::HashMap<String, serde_json::Value>,
166) -> serde_json::Value {
167    let mut merged = serde_json::Map::new();
168    let mut keys: Vec<&String> = metadata.keys().collect();
169    keys.sort();
170    for key in keys {
171        if let serde_json::Value::Object(map) = &metadata[key] {
172            for (k, v) in map {
173                merged.insert(k.clone(), v.clone());
174            }
175        }
176    }
177    serde_json::Value::Object(merged)
178}
179
180/// Return a human-readable label for the metadata source.
181/// When a single parser contributed metadata, name it (e.g. "frontmatter").
182/// Otherwise fall back to the generic "metadata".
183fn metadata_source(metadata: &std::collections::HashMap<String, serde_json::Value>) -> String {
184    let keys: Vec<&String> = metadata.keys().collect();
185    if keys.len() == 1 {
186        keys[0].clone()
187    } else {
188        "metadata".to_string()
189    }
190}
191
192fn has_field(metadata: &serde_json::Value, field: &str) -> bool {
193    metadata.get(field).is_some_and(|v| !v.is_null())
194}
195
196fn get_field<'a>(metadata: &'a serde_json::Value, field: &str) -> Option<&'a serde_json::Value> {
197    metadata.get(field).filter(|v| !v.is_null())
198}
199
200fn value_as_string(value: &serde_json::Value) -> Option<String> {
201    match value {
202        serde_json::Value::String(s) => Some(s.clone()),
203        serde_json::Value::Number(n) => Some(n.to_string()),
204        serde_json::Value::Bool(b) => Some(b.to_string()),
205        _ => None,
206    }
207}
208
209#[cfg(test)]
210mod tests {
211    use super::*;
212    use crate::graph::test_helpers::make_enriched;
213    use crate::graph::{Graph, Node, NodeType};
214    use crate::rules::RuleContext;
215    use std::collections::HashMap;
216
217    fn node_with_metadata(path: &str, metadata: serde_json::Value) -> Node {
218        let mut meta_map = HashMap::new();
219        meta_map.insert("frontmatter".to_string(), metadata);
220        Node {
221            path: path.into(),
222            node_type: NodeType::File,
223            hash: None,
224            graph: None,
225            is_graph: false,
226            metadata: meta_map,
227            included: true,
228        }
229    }
230
231    #[test]
232    fn detects_missing_required_field() {
233        let mut graph = Graph::new();
234        graph.add_node(node_with_metadata(
235            "doc.md",
236            serde_json::json!({"status": "draft"}),
237        ));
238
239        let enriched = make_enriched(graph);
240        let options: toml::Value = toml::from_str("required = [\"title\"]").unwrap();
241        let ctx = RuleContext {
242            graph: &enriched,
243            options: Some(&options),
244        };
245        let diagnostics = SchemaViolationRule.evaluate(&ctx);
246
247        assert_eq!(diagnostics.len(), 1);
248        assert!(diagnostics[0].message.contains("title"));
249        assert_eq!(diagnostics[0].node.as_deref(), Some("doc.md"));
250        // Fix message names the parser when there's a single contributor
251        let fix = diagnostics[0].fix.as_ref().unwrap();
252        assert!(
253            fix.contains("frontmatter"),
254            "fix should name the parser: {fix}"
255        );
256    }
257
258    #[test]
259    fn passes_when_required_field_present() {
260        let mut graph = Graph::new();
261        graph.add_node(node_with_metadata(
262            "doc.md",
263            serde_json::json!({"title": "Hello"}),
264        ));
265
266        let enriched = make_enriched(graph);
267        let options: toml::Value = toml::from_str("required = [\"title\"]").unwrap();
268        let ctx = RuleContext {
269            graph: &enriched,
270            options: Some(&options),
271        };
272        let diagnostics = SchemaViolationRule.evaluate(&ctx);
273
274        assert!(diagnostics.is_empty());
275    }
276
277    #[test]
278    fn detects_per_glob_required() {
279        let mut graph = Graph::new();
280        graph.add_node(node_with_metadata(
281            "observations/note.md",
282            serde_json::json!({"title": "Note"}),
283        ));
284        // This file matches *.md but not observations/*.md
285        graph.add_node(node_with_metadata(
286            "readme.md",
287            serde_json::json!({"title": "README"}),
288        ));
289
290        let enriched = make_enriched(graph);
291        let options: toml::Value = toml::from_str(
292            r#"
293            [schemas."observations/*.md"]
294            required = ["title", "date", "status"]
295            "#,
296        )
297        .unwrap();
298        let ctx = RuleContext {
299            graph: &enriched,
300            options: Some(&options),
301        };
302        let diagnostics = SchemaViolationRule.evaluate(&ctx);
303
304        // observations/note.md missing "date" and "status"
305        assert_eq!(diagnostics.len(), 2);
306        let messages: Vec<&str> = diagnostics.iter().map(|d| d.message.as_str()).collect();
307        assert!(messages.iter().any(|m| m.contains("date")));
308        assert!(messages.iter().any(|m| m.contains("status")));
309    }
310
311    #[test]
312    fn detects_disallowed_value() {
313        let mut graph = Graph::new();
314        graph.add_node(node_with_metadata(
315            "observations/note.md",
316            serde_json::json!({"title": "Note", "status": "invalid"}),
317        ));
318
319        let enriched = make_enriched(graph);
320        let options: toml::Value = toml::from_str(
321            r#"
322            [schemas."observations/*.md"]
323            required = ["title"]
324            allowed.status = ["draft", "review", "final"]
325            "#,
326        )
327        .unwrap();
328        let ctx = RuleContext {
329            graph: &enriched,
330            options: Some(&options),
331        };
332        let diagnostics = SchemaViolationRule.evaluate(&ctx);
333
334        assert_eq!(diagnostics.len(), 1);
335        assert!(diagnostics[0].message.contains("invalid"));
336        assert!(diagnostics[0].message.contains("allowed"));
337    }
338
339    #[test]
340    fn allowed_value_passes() {
341        let mut graph = Graph::new();
342        graph.add_node(node_with_metadata(
343            "observations/note.md",
344            serde_json::json!({"title": "Note", "status": "draft"}),
345        ));
346
347        let enriched = make_enriched(graph);
348        let options: toml::Value = toml::from_str(
349            r#"
350            [schemas."observations/*.md"]
351            allowed.status = ["draft", "review", "final"]
352            "#,
353        )
354        .unwrap();
355        let ctx = RuleContext {
356            graph: &enriched,
357            options: Some(&options),
358        };
359        let diagnostics = SchemaViolationRule.evaluate(&ctx);
360
361        assert!(diagnostics.is_empty());
362    }
363
364    #[test]
365    fn no_options_no_diagnostics() {
366        let mut graph = Graph::new();
367        graph.add_node(node_with_metadata(
368            "doc.md",
369            serde_json::json!({"title": "Hello"}),
370        ));
371
372        let enriched = make_enriched(graph);
373        let ctx = RuleContext {
374            graph: &enriched,
375            options: None,
376        };
377        let diagnostics = SchemaViolationRule.evaluate(&ctx);
378
379        assert!(diagnostics.is_empty());
380    }
381
382    #[test]
383    fn skips_nodes_without_metadata() {
384        let mut graph = Graph::new();
385        graph.add_node(Node {
386            path: "no-frontmatter.md".into(),
387            node_type: NodeType::File,
388            hash: None,
389            graph: None,
390            is_graph: false,
391            metadata: HashMap::new(),
392            included: true,
393        });
394
395        let enriched = make_enriched(graph);
396        let options: toml::Value = toml::from_str("required = [\"title\"]").unwrap();
397        let ctx = RuleContext {
398            graph: &enriched,
399            options: Some(&options),
400        };
401        let diagnostics = SchemaViolationRule.evaluate(&ctx);
402
403        // No metadata means no fields — should flag the missing required field
404        assert_eq!(diagnostics.len(), 1);
405        assert!(diagnostics[0].message.contains("title"));
406    }
407}