1use crate::diagnostic::Diagnostic;
2use crate::rules::{Rule, RuleContext};
3
4pub struct SchemaViolationRule;
5
6impl Rule for SchemaViolationRule {
7 fn name(&self) -> &str {
8 "schema-violation"
9 }
10
11 fn evaluate(&self, ctx: &RuleContext) -> Vec<Diagnostic> {
12 let options = match ctx.options {
13 Some(opts) => opts,
14 None => return vec![],
15 };
16
17 let global_required = extract_string_array(options, "required");
18 let schemas = options
19 .get("schemas")
20 .and_then(|v| v.as_table())
21 .cloned()
22 .unwrap_or_default();
23
24 let mut diagnostics = Vec::new();
25
26 let mut compiled_schemas: Vec<(globset::GlobMatcher, SchemaSpec)> = Vec::new();
28 for (pattern, value) in &schemas {
29 match globset::Glob::new(pattern) {
30 Ok(glob) => {
31 let spec = SchemaSpec::from_toml(value);
32 compiled_schemas.push((glob.compile_matcher(), spec));
33 }
34 Err(e) => {
35 diagnostics.push(Diagnostic {
36 rule: "schema-violation".into(),
37 message: format!("invalid schema glob \"{pattern}\": {e}"),
38 fix: Some(format!(
39 "fix the glob pattern \"{pattern}\" in [rules.schema-violation.options.schemas]"
40 )),
41 ..Default::default()
42 });
43 }
44 }
45 }
46
47 for (path, node) in &ctx.graph.graph.nodes {
48 let metadata = merge_metadata(&node.metadata);
49 let source = metadata_source(&node.metadata);
50
51 for field in &global_required {
53 if !has_field(&metadata, field) {
54 diagnostics.push(Diagnostic {
55 rule: "schema-violation".into(),
56 message: format!("missing required field \"{field}\""),
57 node: Some(path.clone()),
58 fix: Some(format!("add \"{field}\" to {source} in {path}")),
59 ..Default::default()
60 });
61 }
62 }
63
64 for (matcher, spec) in &compiled_schemas {
66 if !matcher.is_match(path) {
67 continue;
68 }
69
70 for field in &spec.required {
71 if !has_field(&metadata, field) {
72 diagnostics.push(Diagnostic {
73 rule: "schema-violation".into(),
74 message: format!("missing required field \"{field}\""),
75 node: Some(path.clone()),
76 fix: Some(format!("add \"{field}\" to {source} in {path}")),
77 ..Default::default()
78 });
79 }
80 }
81
82 for (field, allowed_values) in &spec.allowed {
83 if let Some(value) = get_field(&metadata, field)
84 && let Some(s) = value_as_string(value)
85 && !allowed_values.iter().any(|av| av == &s)
86 {
87 diagnostics.push(Diagnostic {
88 rule: "schema-violation".into(),
89 message: format!(
90 "field \"{field}\" has value \"{s}\", allowed: [{}]",
91 allowed_values.join(", ")
92 ),
93 node: Some(path.clone()),
94 fix: Some(format!(
95 "change \"{field}\" in {path} to one of: {}",
96 allowed_values.join(", ")
97 )),
98 ..Default::default()
99 });
100 }
101 }
102 }
103 }
104
105 diagnostics.sort_by(|a, b| a.node.cmp(&b.node));
106 diagnostics
107 }
108}
109
110struct SchemaSpec {
111 required: Vec<String>,
112 allowed: Vec<(String, Vec<String>)>,
113}
114
115impl SchemaSpec {
116 fn from_toml(value: &toml::Value) -> Self {
117 let required = extract_string_array(value, "required");
118 let allowed = value
119 .get("allowed")
120 .and_then(|v| v.as_table())
121 .map(|table| {
122 table
123 .iter()
124 .map(|(k, v)| (k.clone(), extract_string_array_direct(v)))
125 .collect()
126 })
127 .unwrap_or_default();
128 Self { required, allowed }
129 }
130}
131
132fn extract_string_array(value: &toml::Value, key: &str) -> Vec<String> {
133 value
134 .get(key)
135 .and_then(|v| v.as_array())
136 .map(|arr| {
137 arr.iter()
138 .filter_map(|v| v.as_str().map(String::from))
139 .collect()
140 })
141 .unwrap_or_default()
142}
143
144fn extract_string_array_direct(value: &toml::Value) -> Vec<String> {
145 value
146 .as_array()
147 .map(|arr| {
148 arr.iter()
149 .filter_map(|v| v.as_str().map(String::from))
150 .collect()
151 })
152 .unwrap_or_default()
153}
154
155fn merge_metadata(
159 metadata: &std::collections::HashMap<String, serde_json::Value>,
160) -> serde_json::Value {
161 let mut merged = serde_json::Map::new();
162 let mut keys: Vec<&String> = metadata.keys().collect();
163 keys.sort();
164 for key in keys {
165 if let serde_json::Value::Object(map) = &metadata[key] {
166 for (k, v) in map {
167 merged.insert(k.clone(), v.clone());
168 }
169 }
170 }
171 serde_json::Value::Object(merged)
172}
173
174fn metadata_source(metadata: &std::collections::HashMap<String, serde_json::Value>) -> String {
178 let keys: Vec<&String> = metadata.keys().collect();
179 if keys.len() == 1 {
180 keys[0].clone()
181 } else {
182 "metadata".to_string()
183 }
184}
185
186fn has_field(metadata: &serde_json::Value, field: &str) -> bool {
187 metadata.get(field).is_some_and(|v| !v.is_null())
188}
189
190fn get_field<'a>(metadata: &'a serde_json::Value, field: &str) -> Option<&'a serde_json::Value> {
191 metadata.get(field).filter(|v| !v.is_null())
192}
193
194fn value_as_string(value: &serde_json::Value) -> Option<String> {
195 match value {
196 serde_json::Value::String(s) => Some(s.clone()),
197 serde_json::Value::Number(n) => Some(n.to_string()),
198 serde_json::Value::Bool(b) => Some(b.to_string()),
199 _ => None,
200 }
201}
202
203#[cfg(test)]
204mod tests {
205 use super::*;
206 use crate::graph::test_helpers::make_enriched;
207 use crate::graph::{Graph, Node};
208 use crate::rules::RuleContext;
209 use std::collections::HashMap;
210
211 fn node_with_metadata(path: &str, metadata: serde_json::Value) -> Node {
212 let mut meta_map = HashMap::new();
213 meta_map.insert("frontmatter".to_string(), metadata);
214 Node {
215 path: path.into(),
216 node_type: Some(crate::graph::NodeType::File),
217 included: true,
218 hash: None,
219 metadata: meta_map,
220 }
221 }
222
223 #[test]
224 fn detects_missing_required_field() {
225 let mut graph = Graph::new();
226 graph.add_node(node_with_metadata(
227 "doc.md",
228 serde_json::json!({"status": "draft"}),
229 ));
230
231 let enriched = make_enriched(graph);
232 let options: toml::Value = toml::from_str("required = [\"title\"]").unwrap();
233 let ctx = RuleContext {
234 graph: &enriched,
235 options: Some(&options),
236 };
237 let diagnostics = SchemaViolationRule.evaluate(&ctx);
238
239 assert_eq!(diagnostics.len(), 1);
240 assert!(diagnostics[0].message.contains("title"));
241 assert_eq!(diagnostics[0].node.as_deref(), Some("doc.md"));
242 let fix = diagnostics[0].fix.as_ref().unwrap();
244 assert!(
245 fix.contains("frontmatter"),
246 "fix should name the parser: {fix}"
247 );
248 }
249
250 #[test]
251 fn passes_when_required_field_present() {
252 let mut graph = Graph::new();
253 graph.add_node(node_with_metadata(
254 "doc.md",
255 serde_json::json!({"title": "Hello"}),
256 ));
257
258 let enriched = make_enriched(graph);
259 let options: toml::Value = toml::from_str("required = [\"title\"]").unwrap();
260 let ctx = RuleContext {
261 graph: &enriched,
262 options: Some(&options),
263 };
264 let diagnostics = SchemaViolationRule.evaluate(&ctx);
265
266 assert!(diagnostics.is_empty());
267 }
268
269 #[test]
270 fn detects_per_glob_required() {
271 let mut graph = Graph::new();
272 graph.add_node(node_with_metadata(
273 "observations/note.md",
274 serde_json::json!({"title": "Note"}),
275 ));
276 graph.add_node(node_with_metadata(
278 "readme.md",
279 serde_json::json!({"title": "README"}),
280 ));
281
282 let enriched = make_enriched(graph);
283 let options: toml::Value = toml::from_str(
284 r#"
285 [schemas."observations/*.md"]
286 required = ["title", "date", "status"]
287 "#,
288 )
289 .unwrap();
290 let ctx = RuleContext {
291 graph: &enriched,
292 options: Some(&options),
293 };
294 let diagnostics = SchemaViolationRule.evaluate(&ctx);
295
296 assert_eq!(diagnostics.len(), 2);
298 let messages: Vec<&str> = diagnostics.iter().map(|d| d.message.as_str()).collect();
299 assert!(messages.iter().any(|m| m.contains("date")));
300 assert!(messages.iter().any(|m| m.contains("status")));
301 }
302
303 #[test]
304 fn detects_disallowed_value() {
305 let mut graph = Graph::new();
306 graph.add_node(node_with_metadata(
307 "observations/note.md",
308 serde_json::json!({"title": "Note", "status": "invalid"}),
309 ));
310
311 let enriched = make_enriched(graph);
312 let options: toml::Value = toml::from_str(
313 r#"
314 [schemas."observations/*.md"]
315 required = ["title"]
316 allowed.status = ["draft", "review", "final"]
317 "#,
318 )
319 .unwrap();
320 let ctx = RuleContext {
321 graph: &enriched,
322 options: Some(&options),
323 };
324 let diagnostics = SchemaViolationRule.evaluate(&ctx);
325
326 assert_eq!(diagnostics.len(), 1);
327 assert!(diagnostics[0].message.contains("invalid"));
328 assert!(diagnostics[0].message.contains("allowed"));
329 }
330
331 #[test]
332 fn allowed_value_passes() {
333 let mut graph = Graph::new();
334 graph.add_node(node_with_metadata(
335 "observations/note.md",
336 serde_json::json!({"title": "Note", "status": "draft"}),
337 ));
338
339 let enriched = make_enriched(graph);
340 let options: toml::Value = toml::from_str(
341 r#"
342 [schemas."observations/*.md"]
343 allowed.status = ["draft", "review", "final"]
344 "#,
345 )
346 .unwrap();
347 let ctx = RuleContext {
348 graph: &enriched,
349 options: Some(&options),
350 };
351 let diagnostics = SchemaViolationRule.evaluate(&ctx);
352
353 assert!(diagnostics.is_empty());
354 }
355
356 #[test]
357 fn no_options_no_diagnostics() {
358 let mut graph = Graph::new();
359 graph.add_node(node_with_metadata(
360 "doc.md",
361 serde_json::json!({"title": "Hello"}),
362 ));
363
364 let enriched = make_enriched(graph);
365 let ctx = RuleContext {
366 graph: &enriched,
367 options: None,
368 };
369 let diagnostics = SchemaViolationRule.evaluate(&ctx);
370
371 assert!(diagnostics.is_empty());
372 }
373
374 #[test]
375 fn skips_nodes_without_metadata() {
376 let mut graph = Graph::new();
377 graph.add_node(Node {
378 path: "no-frontmatter.md".into(),
379 node_type: Some(crate::graph::NodeType::File),
380 included: true,
381 hash: None,
382 metadata: HashMap::new(),
383 });
384
385 let enriched = make_enriched(graph);
386 let options: toml::Value = toml::from_str("required = [\"title\"]").unwrap();
387 let ctx = RuleContext {
388 graph: &enriched,
389 options: Some(&options),
390 };
391 let diagnostics = SchemaViolationRule.evaluate(&ctx);
392
393 assert_eq!(diagnostics.len(), 1);
395 assert!(diagnostics[0].message.contains("title"));
396 }
397}