1use crate::diagnostic::Diagnostic;
2use crate::graph::NodeType;
3use crate::rules::{Rule, RuleContext};
4
5pub struct SchemaViolationRule;
6
7impl Rule for SchemaViolationRule {
8 fn name(&self) -> &str {
9 "schema-violation"
10 }
11
12 fn evaluate(&self, ctx: &RuleContext) -> Vec<Diagnostic> {
13 let options = match ctx.options {
14 Some(opts) => opts,
15 None => return vec![],
16 };
17
18 let global_required = extract_string_array(options, "required");
19 let schemas = options
20 .get("schemas")
21 .and_then(|v| v.as_table())
22 .cloned()
23 .unwrap_or_default();
24
25 let mut diagnostics = Vec::new();
26
27 let mut compiled_schemas: Vec<(globset::GlobMatcher, SchemaSpec)> = Vec::new();
29 for (pattern, value) in &schemas {
30 match globset::Glob::new(pattern) {
31 Ok(glob) => {
32 let spec = SchemaSpec::from_toml(value);
33 compiled_schemas.push((glob.compile_matcher(), spec));
34 }
35 Err(e) => {
36 diagnostics.push(Diagnostic {
37 rule: "schema-violation".into(),
38 message: format!("invalid schema glob \"{pattern}\": {e}"),
39 fix: Some(format!(
40 "fix the glob pattern \"{pattern}\" in [rules.schema-violation.options.schemas]"
41 )),
42 ..Default::default()
43 });
44 }
45 }
46 }
47
48 for (path, node) in &ctx.graph.graph.nodes {
49 if node.node_type != NodeType::File {
50 continue;
51 }
52
53 let metadata = merge_metadata(&node.metadata);
55 let source = metadata_source(&node.metadata);
56
57 for field in &global_required {
59 if !has_field(&metadata, field) {
60 diagnostics.push(Diagnostic {
61 rule: "schema-violation".into(),
62 message: format!("missing required field \"{field}\""),
63 node: Some(path.clone()),
64 fix: Some(format!("add \"{field}\" to {source} in {path}")),
65 ..Default::default()
66 });
67 }
68 }
69
70 for (matcher, spec) in &compiled_schemas {
72 if !matcher.is_match(path) {
73 continue;
74 }
75
76 for field in &spec.required {
77 if !has_field(&metadata, field) {
78 diagnostics.push(Diagnostic {
79 rule: "schema-violation".into(),
80 message: format!("missing required field \"{field}\""),
81 node: Some(path.clone()),
82 fix: Some(format!("add \"{field}\" to {source} in {path}")),
83 ..Default::default()
84 });
85 }
86 }
87
88 for (field, allowed_values) in &spec.allowed {
89 if let Some(value) = get_field(&metadata, field)
90 && let Some(s) = value_as_string(value)
91 && !allowed_values.iter().any(|av| av == &s)
92 {
93 diagnostics.push(Diagnostic {
94 rule: "schema-violation".into(),
95 message: format!(
96 "field \"{field}\" has value \"{s}\", allowed: [{}]",
97 allowed_values.join(", ")
98 ),
99 node: Some(path.clone()),
100 fix: Some(format!(
101 "change \"{field}\" in {path} to one of: {}",
102 allowed_values.join(", ")
103 )),
104 ..Default::default()
105 });
106 }
107 }
108 }
109 }
110
111 diagnostics.sort_by(|a, b| a.node.cmp(&b.node));
112 diagnostics
113 }
114}
115
116struct SchemaSpec {
117 required: Vec<String>,
118 allowed: Vec<(String, Vec<String>)>,
119}
120
121impl SchemaSpec {
122 fn from_toml(value: &toml::Value) -> Self {
123 let required = extract_string_array(value, "required");
124 let allowed = value
125 .get("allowed")
126 .and_then(|v| v.as_table())
127 .map(|table| {
128 table
129 .iter()
130 .map(|(k, v)| (k.clone(), extract_string_array_direct(v)))
131 .collect()
132 })
133 .unwrap_or_default();
134 Self { required, allowed }
135 }
136}
137
138fn extract_string_array(value: &toml::Value, key: &str) -> Vec<String> {
139 value
140 .get(key)
141 .and_then(|v| v.as_array())
142 .map(|arr| {
143 arr.iter()
144 .filter_map(|v| v.as_str().map(String::from))
145 .collect()
146 })
147 .unwrap_or_default()
148}
149
150fn extract_string_array_direct(value: &toml::Value) -> Vec<String> {
151 value
152 .as_array()
153 .map(|arr| {
154 arr.iter()
155 .filter_map(|v| v.as_str().map(String::from))
156 .collect()
157 })
158 .unwrap_or_default()
159}
160
161fn merge_metadata(
165 metadata: &std::collections::HashMap<String, serde_json::Value>,
166) -> serde_json::Value {
167 let mut merged = serde_json::Map::new();
168 let mut keys: Vec<&String> = metadata.keys().collect();
169 keys.sort();
170 for key in keys {
171 if let serde_json::Value::Object(map) = &metadata[key] {
172 for (k, v) in map {
173 merged.insert(k.clone(), v.clone());
174 }
175 }
176 }
177 serde_json::Value::Object(merged)
178}
179
180fn metadata_source(metadata: &std::collections::HashMap<String, serde_json::Value>) -> String {
184 let keys: Vec<&String> = metadata.keys().collect();
185 if keys.len() == 1 {
186 keys[0].clone()
187 } else {
188 "metadata".to_string()
189 }
190}
191
192fn has_field(metadata: &serde_json::Value, field: &str) -> bool {
193 metadata.get(field).is_some_and(|v| !v.is_null())
194}
195
196fn get_field<'a>(metadata: &'a serde_json::Value, field: &str) -> Option<&'a serde_json::Value> {
197 metadata.get(field).filter(|v| !v.is_null())
198}
199
200fn value_as_string(value: &serde_json::Value) -> Option<String> {
201 match value {
202 serde_json::Value::String(s) => Some(s.clone()),
203 serde_json::Value::Number(n) => Some(n.to_string()),
204 serde_json::Value::Bool(b) => Some(b.to_string()),
205 _ => None,
206 }
207}
208
209#[cfg(test)]
210mod tests {
211 use super::*;
212 use crate::analyses::EnrichedGraph;
213 use crate::config::Config;
214 use crate::graph::{Graph, Node, NodeType};
215 use crate::rules::RuleContext;
216 use std::collections::HashMap;
217
218 fn make_enriched(graph: Graph) -> EnrichedGraph {
219 crate::analyses::enrich_graph(graph, std::path::Path::new("."), &Config::defaults(), None)
220 }
221
222 fn node_with_metadata(path: &str, metadata: serde_json::Value) -> Node {
223 let mut meta_map = HashMap::new();
224 meta_map.insert("frontmatter".to_string(), metadata);
225 Node {
226 path: path.into(),
227 node_type: NodeType::File,
228 hash: None,
229 graph: None,
230 metadata: meta_map,
231 }
232 }
233
234 #[test]
235 fn detects_missing_required_field() {
236 let mut graph = Graph::new();
237 graph.add_node(node_with_metadata(
238 "doc.md",
239 serde_json::json!({"status": "draft"}),
240 ));
241
242 let enriched = make_enriched(graph);
243 let options: toml::Value = toml::from_str("required = [\"title\"]").unwrap();
244 let ctx = RuleContext {
245 graph: &enriched,
246 options: Some(&options),
247 };
248 let diagnostics = SchemaViolationRule.evaluate(&ctx);
249
250 assert_eq!(diagnostics.len(), 1);
251 assert!(diagnostics[0].message.contains("title"));
252 assert_eq!(diagnostics[0].node.as_deref(), Some("doc.md"));
253 let fix = diagnostics[0].fix.as_ref().unwrap();
255 assert!(
256 fix.contains("frontmatter"),
257 "fix should name the parser: {fix}"
258 );
259 }
260
261 #[test]
262 fn passes_when_required_field_present() {
263 let mut graph = Graph::new();
264 graph.add_node(node_with_metadata(
265 "doc.md",
266 serde_json::json!({"title": "Hello"}),
267 ));
268
269 let enriched = make_enriched(graph);
270 let options: toml::Value = toml::from_str("required = [\"title\"]").unwrap();
271 let ctx = RuleContext {
272 graph: &enriched,
273 options: Some(&options),
274 };
275 let diagnostics = SchemaViolationRule.evaluate(&ctx);
276
277 assert!(diagnostics.is_empty());
278 }
279
280 #[test]
281 fn detects_per_glob_required() {
282 let mut graph = Graph::new();
283 graph.add_node(node_with_metadata(
284 "observations/note.md",
285 serde_json::json!({"title": "Note"}),
286 ));
287 graph.add_node(node_with_metadata(
289 "readme.md",
290 serde_json::json!({"title": "README"}),
291 ));
292
293 let enriched = make_enriched(graph);
294 let options: toml::Value = toml::from_str(
295 r#"
296 [schemas."observations/*.md"]
297 required = ["title", "date", "status"]
298 "#,
299 )
300 .unwrap();
301 let ctx = RuleContext {
302 graph: &enriched,
303 options: Some(&options),
304 };
305 let diagnostics = SchemaViolationRule.evaluate(&ctx);
306
307 assert_eq!(diagnostics.len(), 2);
309 let messages: Vec<&str> = diagnostics.iter().map(|d| d.message.as_str()).collect();
310 assert!(messages.iter().any(|m| m.contains("date")));
311 assert!(messages.iter().any(|m| m.contains("status")));
312 }
313
314 #[test]
315 fn detects_disallowed_value() {
316 let mut graph = Graph::new();
317 graph.add_node(node_with_metadata(
318 "observations/note.md",
319 serde_json::json!({"title": "Note", "status": "invalid"}),
320 ));
321
322 let enriched = make_enriched(graph);
323 let options: toml::Value = toml::from_str(
324 r#"
325 [schemas."observations/*.md"]
326 required = ["title"]
327 allowed.status = ["draft", "review", "final"]
328 "#,
329 )
330 .unwrap();
331 let ctx = RuleContext {
332 graph: &enriched,
333 options: Some(&options),
334 };
335 let diagnostics = SchemaViolationRule.evaluate(&ctx);
336
337 assert_eq!(diagnostics.len(), 1);
338 assert!(diagnostics[0].message.contains("invalid"));
339 assert!(diagnostics[0].message.contains("allowed"));
340 }
341
342 #[test]
343 fn allowed_value_passes() {
344 let mut graph = Graph::new();
345 graph.add_node(node_with_metadata(
346 "observations/note.md",
347 serde_json::json!({"title": "Note", "status": "draft"}),
348 ));
349
350 let enriched = make_enriched(graph);
351 let options: toml::Value = toml::from_str(
352 r#"
353 [schemas."observations/*.md"]
354 allowed.status = ["draft", "review", "final"]
355 "#,
356 )
357 .unwrap();
358 let ctx = RuleContext {
359 graph: &enriched,
360 options: Some(&options),
361 };
362 let diagnostics = SchemaViolationRule.evaluate(&ctx);
363
364 assert!(diagnostics.is_empty());
365 }
366
367 #[test]
368 fn no_options_no_diagnostics() {
369 let mut graph = Graph::new();
370 graph.add_node(node_with_metadata(
371 "doc.md",
372 serde_json::json!({"title": "Hello"}),
373 ));
374
375 let enriched = make_enriched(graph);
376 let ctx = RuleContext {
377 graph: &enriched,
378 options: None,
379 };
380 let diagnostics = SchemaViolationRule.evaluate(&ctx);
381
382 assert!(diagnostics.is_empty());
383 }
384
385 #[test]
386 fn skips_nodes_without_metadata() {
387 let mut graph = Graph::new();
388 graph.add_node(Node {
389 path: "no-frontmatter.md".into(),
390 node_type: NodeType::File,
391 hash: None,
392 graph: None,
393 metadata: HashMap::new(),
394 });
395
396 let enriched = make_enriched(graph);
397 let options: toml::Value = toml::from_str("required = [\"title\"]").unwrap();
398 let ctx = RuleContext {
399 graph: &enriched,
400 options: Some(&options),
401 };
402 let diagnostics = SchemaViolationRule.evaluate(&ctx);
403
404 assert_eq!(diagnostics.len(), 1);
406 assert!(diagnostics[0].message.contains("title"));
407 }
408}