Skip to main content

drft/rules/
schema_violation.rs

1use crate::diagnostic::Diagnostic;
2use crate::graph::NodeType;
3use crate::rules::{Rule, RuleContext};
4
5pub struct SchemaViolationRule;
6
7impl Rule for SchemaViolationRule {
8    fn name(&self) -> &str {
9        "schema-violation"
10    }
11
12    fn evaluate(&self, ctx: &RuleContext) -> Vec<Diagnostic> {
13        let options = match ctx.options {
14            Some(opts) => opts,
15            None => return vec![],
16        };
17
18        let global_required = extract_string_array(options, "required");
19        let schemas = options
20            .get("schemas")
21            .and_then(|v| v.as_table())
22            .cloned()
23            .unwrap_or_default();
24
25        let mut diagnostics = Vec::new();
26
27        // Pre-compile glob matchers for schemas
28        let mut compiled_schemas: Vec<(globset::GlobMatcher, SchemaSpec)> = Vec::new();
29        for (pattern, value) in &schemas {
30            match globset::Glob::new(pattern) {
31                Ok(glob) => {
32                    let spec = SchemaSpec::from_toml(value);
33                    compiled_schemas.push((glob.compile_matcher(), spec));
34                }
35                Err(e) => {
36                    diagnostics.push(Diagnostic {
37                        rule: "schema-violation".into(),
38                        message: format!("invalid schema glob \"{pattern}\": {e}"),
39                        fix: Some(format!(
40                            "fix the glob pattern \"{pattern}\" in [rules.schema-violation.options.schemas]"
41                        )),
42                        ..Default::default()
43                    });
44                }
45            }
46        }
47
48        for (path, node) in &ctx.graph.graph.nodes {
49            if node.node_type != NodeType::File {
50                continue;
51            }
52
53            // Collect all metadata across parser namespaces into one merged view
54            let metadata = merge_metadata(&node.metadata);
55            let source = metadata_source(&node.metadata);
56
57            // Check global required fields
58            for field in &global_required {
59                if !has_field(&metadata, field) {
60                    diagnostics.push(Diagnostic {
61                        rule: "schema-violation".into(),
62                        message: format!("missing required field \"{field}\""),
63                        node: Some(path.clone()),
64                        fix: Some(format!("add \"{field}\" to {source} in {path}")),
65                        ..Default::default()
66                    });
67                }
68            }
69
70            // Check per-glob schemas
71            for (matcher, spec) in &compiled_schemas {
72                if !matcher.is_match(path) {
73                    continue;
74                }
75
76                for field in &spec.required {
77                    if !has_field(&metadata, field) {
78                        diagnostics.push(Diagnostic {
79                            rule: "schema-violation".into(),
80                            message: format!("missing required field \"{field}\""),
81                            node: Some(path.clone()),
82                            fix: Some(format!("add \"{field}\" to {source} in {path}")),
83                            ..Default::default()
84                        });
85                    }
86                }
87
88                for (field, allowed_values) in &spec.allowed {
89                    if let Some(value) = get_field(&metadata, field)
90                        && let Some(s) = value_as_string(value)
91                        && !allowed_values.iter().any(|av| av == &s)
92                    {
93                        diagnostics.push(Diagnostic {
94                            rule: "schema-violation".into(),
95                            message: format!(
96                                "field \"{field}\" has value \"{s}\", allowed: [{}]",
97                                allowed_values.join(", ")
98                            ),
99                            node: Some(path.clone()),
100                            fix: Some(format!(
101                                "change \"{field}\" in {path} to one of: {}",
102                                allowed_values.join(", ")
103                            )),
104                            ..Default::default()
105                        });
106                    }
107                }
108            }
109        }
110
111        diagnostics.sort_by(|a, b| a.node.cmp(&b.node));
112        diagnostics
113    }
114}
115
116struct SchemaSpec {
117    required: Vec<String>,
118    allowed: Vec<(String, Vec<String>)>,
119}
120
121impl SchemaSpec {
122    fn from_toml(value: &toml::Value) -> Self {
123        let required = extract_string_array(value, "required");
124        let allowed = value
125            .get("allowed")
126            .and_then(|v| v.as_table())
127            .map(|table| {
128                table
129                    .iter()
130                    .map(|(k, v)| (k.clone(), extract_string_array_direct(v)))
131                    .collect()
132            })
133            .unwrap_or_default();
134        Self { required, allowed }
135    }
136}
137
138fn extract_string_array(value: &toml::Value, key: &str) -> Vec<String> {
139    value
140        .get(key)
141        .and_then(|v| v.as_array())
142        .map(|arr| {
143            arr.iter()
144                .filter_map(|v| v.as_str().map(String::from))
145                .collect()
146        })
147        .unwrap_or_default()
148}
149
150fn extract_string_array_direct(value: &toml::Value) -> Vec<String> {
151    value
152        .as_array()
153        .map(|arr| {
154            arr.iter()
155                .filter_map(|v| v.as_str().map(String::from))
156                .collect()
157        })
158        .unwrap_or_default()
159}
160
161/// Merge metadata from all parser namespaces into a single flat JSON object.
162/// Namespaces are merged in alphabetical order — later namespaces override earlier
163/// ones for conflicting keys (e.g., "markdown" overrides "frontmatter").
164fn merge_metadata(
165    metadata: &std::collections::HashMap<String, serde_json::Value>,
166) -> serde_json::Value {
167    let mut merged = serde_json::Map::new();
168    let mut keys: Vec<&String> = metadata.keys().collect();
169    keys.sort();
170    for key in keys {
171        if let serde_json::Value::Object(map) = &metadata[key] {
172            for (k, v) in map {
173                merged.insert(k.clone(), v.clone());
174            }
175        }
176    }
177    serde_json::Value::Object(merged)
178}
179
180/// Return a human-readable label for the metadata source.
181/// When a single parser contributed metadata, name it (e.g. "frontmatter").
182/// Otherwise fall back to the generic "metadata".
183fn metadata_source(metadata: &std::collections::HashMap<String, serde_json::Value>) -> String {
184    let keys: Vec<&String> = metadata.keys().collect();
185    if keys.len() == 1 {
186        keys[0].clone()
187    } else {
188        "metadata".to_string()
189    }
190}
191
192fn has_field(metadata: &serde_json::Value, field: &str) -> bool {
193    metadata.get(field).is_some_and(|v| !v.is_null())
194}
195
196fn get_field<'a>(metadata: &'a serde_json::Value, field: &str) -> Option<&'a serde_json::Value> {
197    metadata.get(field).filter(|v| !v.is_null())
198}
199
200fn value_as_string(value: &serde_json::Value) -> Option<String> {
201    match value {
202        serde_json::Value::String(s) => Some(s.clone()),
203        serde_json::Value::Number(n) => Some(n.to_string()),
204        serde_json::Value::Bool(b) => Some(b.to_string()),
205        _ => None,
206    }
207}
208
209#[cfg(test)]
210mod tests {
211    use super::*;
212    use crate::analyses::EnrichedGraph;
213    use crate::config::Config;
214    use crate::graph::{Graph, Node, NodeType};
215    use crate::rules::RuleContext;
216    use std::collections::HashMap;
217
218    fn make_enriched(graph: Graph) -> EnrichedGraph {
219        crate::analyses::enrich_graph(graph, std::path::Path::new("."), &Config::defaults(), None)
220    }
221
222    fn node_with_metadata(path: &str, metadata: serde_json::Value) -> Node {
223        let mut meta_map = HashMap::new();
224        meta_map.insert("frontmatter".to_string(), metadata);
225        Node {
226            path: path.into(),
227            node_type: NodeType::File,
228            hash: None,
229            graph: None,
230            metadata: meta_map,
231        }
232    }
233
234    #[test]
235    fn detects_missing_required_field() {
236        let mut graph = Graph::new();
237        graph.add_node(node_with_metadata(
238            "doc.md",
239            serde_json::json!({"status": "draft"}),
240        ));
241
242        let enriched = make_enriched(graph);
243        let options: toml::Value = toml::from_str("required = [\"title\"]").unwrap();
244        let ctx = RuleContext {
245            graph: &enriched,
246            options: Some(&options),
247        };
248        let diagnostics = SchemaViolationRule.evaluate(&ctx);
249
250        assert_eq!(diagnostics.len(), 1);
251        assert!(diagnostics[0].message.contains("title"));
252        assert_eq!(diagnostics[0].node.as_deref(), Some("doc.md"));
253        // Fix message names the parser when there's a single contributor
254        let fix = diagnostics[0].fix.as_ref().unwrap();
255        assert!(
256            fix.contains("frontmatter"),
257            "fix should name the parser: {fix}"
258        );
259    }
260
261    #[test]
262    fn passes_when_required_field_present() {
263        let mut graph = Graph::new();
264        graph.add_node(node_with_metadata(
265            "doc.md",
266            serde_json::json!({"title": "Hello"}),
267        ));
268
269        let enriched = make_enriched(graph);
270        let options: toml::Value = toml::from_str("required = [\"title\"]").unwrap();
271        let ctx = RuleContext {
272            graph: &enriched,
273            options: Some(&options),
274        };
275        let diagnostics = SchemaViolationRule.evaluate(&ctx);
276
277        assert!(diagnostics.is_empty());
278    }
279
280    #[test]
281    fn detects_per_glob_required() {
282        let mut graph = Graph::new();
283        graph.add_node(node_with_metadata(
284            "observations/note.md",
285            serde_json::json!({"title": "Note"}),
286        ));
287        // This file matches *.md but not observations/*.md
288        graph.add_node(node_with_metadata(
289            "readme.md",
290            serde_json::json!({"title": "README"}),
291        ));
292
293        let enriched = make_enriched(graph);
294        let options: toml::Value = toml::from_str(
295            r#"
296            [schemas."observations/*.md"]
297            required = ["title", "date", "status"]
298            "#,
299        )
300        .unwrap();
301        let ctx = RuleContext {
302            graph: &enriched,
303            options: Some(&options),
304        };
305        let diagnostics = SchemaViolationRule.evaluate(&ctx);
306
307        // observations/note.md missing "date" and "status"
308        assert_eq!(diagnostics.len(), 2);
309        let messages: Vec<&str> = diagnostics.iter().map(|d| d.message.as_str()).collect();
310        assert!(messages.iter().any(|m| m.contains("date")));
311        assert!(messages.iter().any(|m| m.contains("status")));
312    }
313
314    #[test]
315    fn detects_disallowed_value() {
316        let mut graph = Graph::new();
317        graph.add_node(node_with_metadata(
318            "observations/note.md",
319            serde_json::json!({"title": "Note", "status": "invalid"}),
320        ));
321
322        let enriched = make_enriched(graph);
323        let options: toml::Value = toml::from_str(
324            r#"
325            [schemas."observations/*.md"]
326            required = ["title"]
327            allowed.status = ["draft", "review", "final"]
328            "#,
329        )
330        .unwrap();
331        let ctx = RuleContext {
332            graph: &enriched,
333            options: Some(&options),
334        };
335        let diagnostics = SchemaViolationRule.evaluate(&ctx);
336
337        assert_eq!(diagnostics.len(), 1);
338        assert!(diagnostics[0].message.contains("invalid"));
339        assert!(diagnostics[0].message.contains("allowed"));
340    }
341
342    #[test]
343    fn allowed_value_passes() {
344        let mut graph = Graph::new();
345        graph.add_node(node_with_metadata(
346            "observations/note.md",
347            serde_json::json!({"title": "Note", "status": "draft"}),
348        ));
349
350        let enriched = make_enriched(graph);
351        let options: toml::Value = toml::from_str(
352            r#"
353            [schemas."observations/*.md"]
354            allowed.status = ["draft", "review", "final"]
355            "#,
356        )
357        .unwrap();
358        let ctx = RuleContext {
359            graph: &enriched,
360            options: Some(&options),
361        };
362        let diagnostics = SchemaViolationRule.evaluate(&ctx);
363
364        assert!(diagnostics.is_empty());
365    }
366
367    #[test]
368    fn no_options_no_diagnostics() {
369        let mut graph = Graph::new();
370        graph.add_node(node_with_metadata(
371            "doc.md",
372            serde_json::json!({"title": "Hello"}),
373        ));
374
375        let enriched = make_enriched(graph);
376        let ctx = RuleContext {
377            graph: &enriched,
378            options: None,
379        };
380        let diagnostics = SchemaViolationRule.evaluate(&ctx);
381
382        assert!(diagnostics.is_empty());
383    }
384
385    #[test]
386    fn skips_nodes_without_metadata() {
387        let mut graph = Graph::new();
388        graph.add_node(Node {
389            path: "no-frontmatter.md".into(),
390            node_type: NodeType::File,
391            hash: None,
392            graph: None,
393            metadata: HashMap::new(),
394        });
395
396        let enriched = make_enriched(graph);
397        let options: toml::Value = toml::from_str("required = [\"title\"]").unwrap();
398        let ctx = RuleContext {
399            graph: &enriched,
400            options: Some(&options),
401        };
402        let diagnostics = SchemaViolationRule.evaluate(&ctx);
403
404        // No metadata means no fields — should flag the missing required field
405        assert_eq!(diagnostics.len(), 1);
406        assert!(diagnostics[0].message.contains("title"));
407    }
408}