1use crate::diagnostic::Diagnostic;
2use crate::graph::NodeType;
3use crate::rules::{Rule, RuleContext};
4
5pub struct SchemaViolationRule;
6
7impl Rule for SchemaViolationRule {
8 fn name(&self) -> &str {
9 "schema-violation"
10 }
11
12 fn evaluate(&self, ctx: &RuleContext) -> Vec<Diagnostic> {
13 let options = match ctx.options {
14 Some(opts) => opts,
15 None => return vec![],
16 };
17
18 let global_required = extract_string_array(options, "required");
19 let schemas = options
20 .get("schemas")
21 .and_then(|v| v.as_table())
22 .cloned()
23 .unwrap_or_default();
24
25 let mut diagnostics = Vec::new();
26
27 let mut compiled_schemas: Vec<(globset::GlobMatcher, SchemaSpec)> = Vec::new();
29 for (pattern, value) in &schemas {
30 match globset::Glob::new(pattern) {
31 Ok(glob) => {
32 let spec = SchemaSpec::from_toml(value);
33 compiled_schemas.push((glob.compile_matcher(), spec));
34 }
35 Err(e) => {
36 diagnostics.push(Diagnostic {
37 rule: "schema-violation".into(),
38 message: format!("invalid schema glob \"{pattern}\": {e}"),
39 fix: Some(format!(
40 "fix the glob pattern \"{pattern}\" in [rules.schema-violation.options.schemas]"
41 )),
42 ..Default::default()
43 });
44 }
45 }
46 }
47
48 for (path, node) in &ctx.graph.graph.nodes {
49 if node.node_type != NodeType::File {
50 continue;
51 }
52
53 let metadata = merge_metadata(&node.metadata);
55 let source = metadata_source(&node.metadata);
56
57 for field in &global_required {
59 if !has_field(&metadata, field) {
60 diagnostics.push(Diagnostic {
61 rule: "schema-violation".into(),
62 message: format!("missing required field \"{field}\""),
63 node: Some(path.clone()),
64 fix: Some(format!("add \"{field}\" to {source} in {path}")),
65 ..Default::default()
66 });
67 }
68 }
69
70 for (matcher, spec) in &compiled_schemas {
72 if !matcher.is_match(path) {
73 continue;
74 }
75
76 for field in &spec.required {
77 if !has_field(&metadata, field) {
78 diagnostics.push(Diagnostic {
79 rule: "schema-violation".into(),
80 message: format!("missing required field \"{field}\""),
81 node: Some(path.clone()),
82 fix: Some(format!("add \"{field}\" to {source} in {path}")),
83 ..Default::default()
84 });
85 }
86 }
87
88 for (field, allowed_values) in &spec.allowed {
89 if let Some(value) = get_field(&metadata, field)
90 && let Some(s) = value_as_string(value)
91 && !allowed_values.iter().any(|av| av == &s)
92 {
93 diagnostics.push(Diagnostic {
94 rule: "schema-violation".into(),
95 message: format!(
96 "field \"{field}\" has value \"{s}\", allowed: [{}]",
97 allowed_values.join(", ")
98 ),
99 node: Some(path.clone()),
100 fix: Some(format!(
101 "change \"{field}\" in {path} to one of: {}",
102 allowed_values.join(", ")
103 )),
104 ..Default::default()
105 });
106 }
107 }
108 }
109 }
110
111 diagnostics.sort_by(|a, b| a.node.cmp(&b.node));
112 diagnostics
113 }
114}
115
116struct SchemaSpec {
117 required: Vec<String>,
118 allowed: Vec<(String, Vec<String>)>,
119}
120
121impl SchemaSpec {
122 fn from_toml(value: &toml::Value) -> Self {
123 let required = extract_string_array(value, "required");
124 let allowed = value
125 .get("allowed")
126 .and_then(|v| v.as_table())
127 .map(|table| {
128 table
129 .iter()
130 .map(|(k, v)| (k.clone(), extract_string_array_direct(v)))
131 .collect()
132 })
133 .unwrap_or_default();
134 Self { required, allowed }
135 }
136}
137
138fn extract_string_array(value: &toml::Value, key: &str) -> Vec<String> {
139 value
140 .get(key)
141 .and_then(|v| v.as_array())
142 .map(|arr| {
143 arr.iter()
144 .filter_map(|v| v.as_str().map(String::from))
145 .collect()
146 })
147 .unwrap_or_default()
148}
149
150fn extract_string_array_direct(value: &toml::Value) -> Vec<String> {
151 value
152 .as_array()
153 .map(|arr| {
154 arr.iter()
155 .filter_map(|v| v.as_str().map(String::from))
156 .collect()
157 })
158 .unwrap_or_default()
159}
160
161fn merge_metadata(
165 metadata: &std::collections::HashMap<String, serde_json::Value>,
166) -> serde_json::Value {
167 let mut merged = serde_json::Map::new();
168 let mut keys: Vec<&String> = metadata.keys().collect();
169 keys.sort();
170 for key in keys {
171 if let serde_json::Value::Object(map) = &metadata[key] {
172 for (k, v) in map {
173 merged.insert(k.clone(), v.clone());
174 }
175 }
176 }
177 serde_json::Value::Object(merged)
178}
179
180fn metadata_source(metadata: &std::collections::HashMap<String, serde_json::Value>) -> String {
184 let keys: Vec<&String> = metadata.keys().collect();
185 if keys.len() == 1 {
186 keys[0].clone()
187 } else {
188 "metadata".to_string()
189 }
190}
191
192fn has_field(metadata: &serde_json::Value, field: &str) -> bool {
193 metadata.get(field).is_some_and(|v| !v.is_null())
194}
195
196fn get_field<'a>(metadata: &'a serde_json::Value, field: &str) -> Option<&'a serde_json::Value> {
197 metadata.get(field).filter(|v| !v.is_null())
198}
199
200fn value_as_string(value: &serde_json::Value) -> Option<String> {
201 match value {
202 serde_json::Value::String(s) => Some(s.clone()),
203 serde_json::Value::Number(n) => Some(n.to_string()),
204 serde_json::Value::Bool(b) => Some(b.to_string()),
205 _ => None,
206 }
207}
208
209#[cfg(test)]
210mod tests {
211 use super::*;
212 use crate::graph::test_helpers::make_enriched;
213 use crate::graph::{Graph, Node, NodeType};
214 use crate::rules::RuleContext;
215 use std::collections::HashMap;
216
217 fn node_with_metadata(path: &str, metadata: serde_json::Value) -> Node {
218 let mut meta_map = HashMap::new();
219 meta_map.insert("frontmatter".to_string(), metadata);
220 Node {
221 path: path.into(),
222 node_type: NodeType::File,
223 hash: None,
224 graph: None,
225 is_graph: false,
226 metadata: meta_map,
227 included: true,
228 }
229 }
230
231 #[test]
232 fn detects_missing_required_field() {
233 let mut graph = Graph::new();
234 graph.add_node(node_with_metadata(
235 "doc.md",
236 serde_json::json!({"status": "draft"}),
237 ));
238
239 let enriched = make_enriched(graph);
240 let options: toml::Value = toml::from_str("required = [\"title\"]").unwrap();
241 let ctx = RuleContext {
242 graph: &enriched,
243 options: Some(&options),
244 };
245 let diagnostics = SchemaViolationRule.evaluate(&ctx);
246
247 assert_eq!(diagnostics.len(), 1);
248 assert!(diagnostics[0].message.contains("title"));
249 assert_eq!(diagnostics[0].node.as_deref(), Some("doc.md"));
250 let fix = diagnostics[0].fix.as_ref().unwrap();
252 assert!(
253 fix.contains("frontmatter"),
254 "fix should name the parser: {fix}"
255 );
256 }
257
258 #[test]
259 fn passes_when_required_field_present() {
260 let mut graph = Graph::new();
261 graph.add_node(node_with_metadata(
262 "doc.md",
263 serde_json::json!({"title": "Hello"}),
264 ));
265
266 let enriched = make_enriched(graph);
267 let options: toml::Value = toml::from_str("required = [\"title\"]").unwrap();
268 let ctx = RuleContext {
269 graph: &enriched,
270 options: Some(&options),
271 };
272 let diagnostics = SchemaViolationRule.evaluate(&ctx);
273
274 assert!(diagnostics.is_empty());
275 }
276
277 #[test]
278 fn detects_per_glob_required() {
279 let mut graph = Graph::new();
280 graph.add_node(node_with_metadata(
281 "observations/note.md",
282 serde_json::json!({"title": "Note"}),
283 ));
284 graph.add_node(node_with_metadata(
286 "readme.md",
287 serde_json::json!({"title": "README"}),
288 ));
289
290 let enriched = make_enriched(graph);
291 let options: toml::Value = toml::from_str(
292 r#"
293 [schemas."observations/*.md"]
294 required = ["title", "date", "status"]
295 "#,
296 )
297 .unwrap();
298 let ctx = RuleContext {
299 graph: &enriched,
300 options: Some(&options),
301 };
302 let diagnostics = SchemaViolationRule.evaluate(&ctx);
303
304 assert_eq!(diagnostics.len(), 2);
306 let messages: Vec<&str> = diagnostics.iter().map(|d| d.message.as_str()).collect();
307 assert!(messages.iter().any(|m| m.contains("date")));
308 assert!(messages.iter().any(|m| m.contains("status")));
309 }
310
311 #[test]
312 fn detects_disallowed_value() {
313 let mut graph = Graph::new();
314 graph.add_node(node_with_metadata(
315 "observations/note.md",
316 serde_json::json!({"title": "Note", "status": "invalid"}),
317 ));
318
319 let enriched = make_enriched(graph);
320 let options: toml::Value = toml::from_str(
321 r#"
322 [schemas."observations/*.md"]
323 required = ["title"]
324 allowed.status = ["draft", "review", "final"]
325 "#,
326 )
327 .unwrap();
328 let ctx = RuleContext {
329 graph: &enriched,
330 options: Some(&options),
331 };
332 let diagnostics = SchemaViolationRule.evaluate(&ctx);
333
334 assert_eq!(diagnostics.len(), 1);
335 assert!(diagnostics[0].message.contains("invalid"));
336 assert!(diagnostics[0].message.contains("allowed"));
337 }
338
339 #[test]
340 fn allowed_value_passes() {
341 let mut graph = Graph::new();
342 graph.add_node(node_with_metadata(
343 "observations/note.md",
344 serde_json::json!({"title": "Note", "status": "draft"}),
345 ));
346
347 let enriched = make_enriched(graph);
348 let options: toml::Value = toml::from_str(
349 r#"
350 [schemas."observations/*.md"]
351 allowed.status = ["draft", "review", "final"]
352 "#,
353 )
354 .unwrap();
355 let ctx = RuleContext {
356 graph: &enriched,
357 options: Some(&options),
358 };
359 let diagnostics = SchemaViolationRule.evaluate(&ctx);
360
361 assert!(diagnostics.is_empty());
362 }
363
364 #[test]
365 fn no_options_no_diagnostics() {
366 let mut graph = Graph::new();
367 graph.add_node(node_with_metadata(
368 "doc.md",
369 serde_json::json!({"title": "Hello"}),
370 ));
371
372 let enriched = make_enriched(graph);
373 let ctx = RuleContext {
374 graph: &enriched,
375 options: None,
376 };
377 let diagnostics = SchemaViolationRule.evaluate(&ctx);
378
379 assert!(diagnostics.is_empty());
380 }
381
382 #[test]
383 fn skips_nodes_without_metadata() {
384 let mut graph = Graph::new();
385 graph.add_node(Node {
386 path: "no-frontmatter.md".into(),
387 node_type: NodeType::File,
388 hash: None,
389 graph: None,
390 is_graph: false,
391 metadata: HashMap::new(),
392 included: true,
393 });
394
395 let enriched = make_enriched(graph);
396 let options: toml::Value = toml::from_str("required = [\"title\"]").unwrap();
397 let ctx = RuleContext {
398 graph: &enriched,
399 options: Some(&options),
400 };
401 let diagnostics = SchemaViolationRule.evaluate(&ctx);
402
403 assert_eq!(diagnostics.len(), 1);
405 assert!(diagnostics[0].message.contains("title"));
406 }
407}