1use crate::diagnostic::Diagnostic;
2use crate::graph::NodeType;
3use crate::rules::{Rule, RuleContext};
4
5pub struct SchemaViolationRule;
6
7impl Rule for SchemaViolationRule {
8 fn name(&self) -> &str {
9 "schema-violation"
10 }
11
12 fn evaluate(&self, ctx: &RuleContext) -> Vec<Diagnostic> {
13 let options = match ctx.options {
14 Some(opts) => opts,
15 None => return vec![],
16 };
17
18 let global_required = extract_string_array(options, "required");
19 let schemas = options
20 .get("schemas")
21 .and_then(|v| v.as_table())
22 .cloned()
23 .unwrap_or_default();
24
25 let mut diagnostics = Vec::new();
26
27 let mut compiled_schemas: Vec<(globset::GlobMatcher, SchemaSpec)> = Vec::new();
29 for (pattern, value) in &schemas {
30 match globset::Glob::new(pattern) {
31 Ok(glob) => {
32 let spec = SchemaSpec::from_toml(value);
33 compiled_schemas.push((glob.compile_matcher(), spec));
34 }
35 Err(e) => {
36 diagnostics.push(Diagnostic {
37 rule: "schema-violation".into(),
38 message: format!("invalid schema glob \"{pattern}\": {e}"),
39 fix: Some(format!(
40 "fix the glob pattern \"{pattern}\" in [rules.schema-violation.options.schemas]"
41 )),
42 ..Default::default()
43 });
44 }
45 }
46 }
47
48 for (path, node) in &ctx.graph.graph.nodes {
49 if node.node_type != NodeType::File {
50 continue;
51 }
52
53 let metadata = merge_metadata(&node.metadata);
55 let source = metadata_source(&node.metadata);
56
57 for field in &global_required {
59 if !has_field(&metadata, field) {
60 diagnostics.push(Diagnostic {
61 rule: "schema-violation".into(),
62 message: format!("missing required field \"{field}\""),
63 node: Some(path.clone()),
64 fix: Some(format!("add \"{field}\" to {source} in {path}")),
65 ..Default::default()
66 });
67 }
68 }
69
70 for (matcher, spec) in &compiled_schemas {
72 if !matcher.is_match(path) {
73 continue;
74 }
75
76 for field in &spec.required {
77 if !has_field(&metadata, field) {
78 diagnostics.push(Diagnostic {
79 rule: "schema-violation".into(),
80 message: format!("missing required field \"{field}\""),
81 node: Some(path.clone()),
82 fix: Some(format!("add \"{field}\" to {source} in {path}")),
83 ..Default::default()
84 });
85 }
86 }
87
88 for (field, allowed_values) in &spec.allowed {
89 if let Some(value) = get_field(&metadata, field)
90 && let Some(s) = value_as_string(value)
91 && !allowed_values.iter().any(|av| av == &s)
92 {
93 diagnostics.push(Diagnostic {
94 rule: "schema-violation".into(),
95 message: format!(
96 "field \"{field}\" has value \"{s}\", allowed: [{}]",
97 allowed_values.join(", ")
98 ),
99 node: Some(path.clone()),
100 fix: Some(format!(
101 "change \"{field}\" in {path} to one of: {}",
102 allowed_values.join(", ")
103 )),
104 ..Default::default()
105 });
106 }
107 }
108 }
109 }
110
111 diagnostics.sort_by(|a, b| a.node.cmp(&b.node));
112 diagnostics
113 }
114}
115
116struct SchemaSpec {
117 required: Vec<String>,
118 allowed: Vec<(String, Vec<String>)>,
119}
120
121impl SchemaSpec {
122 fn from_toml(value: &toml::Value) -> Self {
123 let required = extract_string_array(value, "required");
124 let allowed = value
125 .get("allowed")
126 .and_then(|v| v.as_table())
127 .map(|table| {
128 table
129 .iter()
130 .map(|(k, v)| (k.clone(), extract_string_array_direct(v)))
131 .collect()
132 })
133 .unwrap_or_default();
134 Self { required, allowed }
135 }
136}
137
138fn extract_string_array(value: &toml::Value, key: &str) -> Vec<String> {
139 value
140 .get(key)
141 .and_then(|v| v.as_array())
142 .map(|arr| {
143 arr.iter()
144 .filter_map(|v| v.as_str().map(String::from))
145 .collect()
146 })
147 .unwrap_or_default()
148}
149
150fn extract_string_array_direct(value: &toml::Value) -> Vec<String> {
151 value
152 .as_array()
153 .map(|arr| {
154 arr.iter()
155 .filter_map(|v| v.as_str().map(String::from))
156 .collect()
157 })
158 .unwrap_or_default()
159}
160
161fn merge_metadata(
165 metadata: &std::collections::HashMap<String, serde_json::Value>,
166) -> serde_json::Value {
167 let mut merged = serde_json::Map::new();
168 let mut keys: Vec<&String> = metadata.keys().collect();
169 keys.sort();
170 for key in keys {
171 if let serde_json::Value::Object(map) = &metadata[key] {
172 for (k, v) in map {
173 merged.insert(k.clone(), v.clone());
174 }
175 }
176 }
177 serde_json::Value::Object(merged)
178}
179
180fn metadata_source(metadata: &std::collections::HashMap<String, serde_json::Value>) -> String {
184 let keys: Vec<&String> = metadata.keys().collect();
185 if keys.len() == 1 {
186 keys[0].clone()
187 } else {
188 "metadata".to_string()
189 }
190}
191
192fn has_field(metadata: &serde_json::Value, field: &str) -> bool {
193 metadata.get(field).is_some_and(|v| !v.is_null())
194}
195
196fn get_field<'a>(metadata: &'a serde_json::Value, field: &str) -> Option<&'a serde_json::Value> {
197 metadata.get(field).filter(|v| !v.is_null())
198}
199
200fn value_as_string(value: &serde_json::Value) -> Option<String> {
201 match value {
202 serde_json::Value::String(s) => Some(s.clone()),
203 serde_json::Value::Number(n) => Some(n.to_string()),
204 serde_json::Value::Bool(b) => Some(b.to_string()),
205 _ => None,
206 }
207}
208
209#[cfg(test)]
210mod tests {
211 use super::*;
212 use crate::graph::test_helpers::make_enriched;
213 use crate::graph::{Graph, Node, NodeType};
214 use crate::rules::RuleContext;
215 use std::collections::HashMap;
216
217 fn node_with_metadata(path: &str, metadata: serde_json::Value) -> Node {
218 let mut meta_map = HashMap::new();
219 meta_map.insert("frontmatter".to_string(), metadata);
220 Node {
221 path: path.into(),
222 node_type: NodeType::File,
223 hash: None,
224 graph: None,
225 is_graph: false,
226 metadata: meta_map,
227 }
228 }
229
230 #[test]
231 fn detects_missing_required_field() {
232 let mut graph = Graph::new();
233 graph.add_node(node_with_metadata(
234 "doc.md",
235 serde_json::json!({"status": "draft"}),
236 ));
237
238 let enriched = make_enriched(graph);
239 let options: toml::Value = toml::from_str("required = [\"title\"]").unwrap();
240 let ctx = RuleContext {
241 graph: &enriched,
242 options: Some(&options),
243 };
244 let diagnostics = SchemaViolationRule.evaluate(&ctx);
245
246 assert_eq!(diagnostics.len(), 1);
247 assert!(diagnostics[0].message.contains("title"));
248 assert_eq!(diagnostics[0].node.as_deref(), Some("doc.md"));
249 let fix = diagnostics[0].fix.as_ref().unwrap();
251 assert!(
252 fix.contains("frontmatter"),
253 "fix should name the parser: {fix}"
254 );
255 }
256
257 #[test]
258 fn passes_when_required_field_present() {
259 let mut graph = Graph::new();
260 graph.add_node(node_with_metadata(
261 "doc.md",
262 serde_json::json!({"title": "Hello"}),
263 ));
264
265 let enriched = make_enriched(graph);
266 let options: toml::Value = toml::from_str("required = [\"title\"]").unwrap();
267 let ctx = RuleContext {
268 graph: &enriched,
269 options: Some(&options),
270 };
271 let diagnostics = SchemaViolationRule.evaluate(&ctx);
272
273 assert!(diagnostics.is_empty());
274 }
275
276 #[test]
277 fn detects_per_glob_required() {
278 let mut graph = Graph::new();
279 graph.add_node(node_with_metadata(
280 "observations/note.md",
281 serde_json::json!({"title": "Note"}),
282 ));
283 graph.add_node(node_with_metadata(
285 "readme.md",
286 serde_json::json!({"title": "README"}),
287 ));
288
289 let enriched = make_enriched(graph);
290 let options: toml::Value = toml::from_str(
291 r#"
292 [schemas."observations/*.md"]
293 required = ["title", "date", "status"]
294 "#,
295 )
296 .unwrap();
297 let ctx = RuleContext {
298 graph: &enriched,
299 options: Some(&options),
300 };
301 let diagnostics = SchemaViolationRule.evaluate(&ctx);
302
303 assert_eq!(diagnostics.len(), 2);
305 let messages: Vec<&str> = diagnostics.iter().map(|d| d.message.as_str()).collect();
306 assert!(messages.iter().any(|m| m.contains("date")));
307 assert!(messages.iter().any(|m| m.contains("status")));
308 }
309
310 #[test]
311 fn detects_disallowed_value() {
312 let mut graph = Graph::new();
313 graph.add_node(node_with_metadata(
314 "observations/note.md",
315 serde_json::json!({"title": "Note", "status": "invalid"}),
316 ));
317
318 let enriched = make_enriched(graph);
319 let options: toml::Value = toml::from_str(
320 r#"
321 [schemas."observations/*.md"]
322 required = ["title"]
323 allowed.status = ["draft", "review", "final"]
324 "#,
325 )
326 .unwrap();
327 let ctx = RuleContext {
328 graph: &enriched,
329 options: Some(&options),
330 };
331 let diagnostics = SchemaViolationRule.evaluate(&ctx);
332
333 assert_eq!(diagnostics.len(), 1);
334 assert!(diagnostics[0].message.contains("invalid"));
335 assert!(diagnostics[0].message.contains("allowed"));
336 }
337
338 #[test]
339 fn allowed_value_passes() {
340 let mut graph = Graph::new();
341 graph.add_node(node_with_metadata(
342 "observations/note.md",
343 serde_json::json!({"title": "Note", "status": "draft"}),
344 ));
345
346 let enriched = make_enriched(graph);
347 let options: toml::Value = toml::from_str(
348 r#"
349 [schemas."observations/*.md"]
350 allowed.status = ["draft", "review", "final"]
351 "#,
352 )
353 .unwrap();
354 let ctx = RuleContext {
355 graph: &enriched,
356 options: Some(&options),
357 };
358 let diagnostics = SchemaViolationRule.evaluate(&ctx);
359
360 assert!(diagnostics.is_empty());
361 }
362
363 #[test]
364 fn no_options_no_diagnostics() {
365 let mut graph = Graph::new();
366 graph.add_node(node_with_metadata(
367 "doc.md",
368 serde_json::json!({"title": "Hello"}),
369 ));
370
371 let enriched = make_enriched(graph);
372 let ctx = RuleContext {
373 graph: &enriched,
374 options: None,
375 };
376 let diagnostics = SchemaViolationRule.evaluate(&ctx);
377
378 assert!(diagnostics.is_empty());
379 }
380
381 #[test]
382 fn skips_nodes_without_metadata() {
383 let mut graph = Graph::new();
384 graph.add_node(Node {
385 path: "no-frontmatter.md".into(),
386 node_type: NodeType::File,
387 hash: None,
388 graph: None,
389 is_graph: false,
390 metadata: HashMap::new(),
391 });
392
393 let enriched = make_enriched(graph);
394 let options: toml::Value = toml::from_str("required = [\"title\"]").unwrap();
395 let ctx = RuleContext {
396 graph: &enriched,
397 options: Some(&options),
398 };
399 let diagnostics = SchemaViolationRule.evaluate(&ctx);
400
401 assert_eq!(diagnostics.len(), 1);
403 assert!(diagnostics[0].message.contains("title"));
404 }
405}