use crate::diagnostic::Diagnostic;
use crate::rules::{Rule, RuleContext};
pub struct SchemaViolationRule;
impl Rule for SchemaViolationRule {
fn name(&self) -> &str {
"schema-violation"
}
fn evaluate(&self, ctx: &RuleContext) -> Vec<Diagnostic> {
let options = match ctx.options {
Some(opts) => opts,
None => return vec![],
};
let global_required = extract_string_array(options, "required");
let schemas = options
.get("schemas")
.and_then(|v| v.as_table())
.cloned()
.unwrap_or_default();
let mut diagnostics = Vec::new();
let mut compiled_schemas: Vec<(globset::GlobMatcher, SchemaSpec)> = Vec::new();
for (pattern, value) in &schemas {
match globset::Glob::new(pattern) {
Ok(glob) => {
let spec = SchemaSpec::from_toml(value);
compiled_schemas.push((glob.compile_matcher(), spec));
}
Err(e) => {
diagnostics.push(Diagnostic {
rule: "schema-violation".into(),
message: format!("invalid schema glob \"{pattern}\": {e}"),
fix: Some(format!(
"fix the glob pattern \"{pattern}\" in [rules.schema-violation.options.schemas]"
)),
..Default::default()
});
}
}
}
for (path, node) in &ctx.graph.graph.nodes {
let metadata = merge_metadata(&node.metadata);
let source = metadata_source(&node.metadata);
for field in &global_required {
if !has_field(&metadata, field) {
diagnostics.push(Diagnostic {
rule: "schema-violation".into(),
message: format!("missing required field \"{field}\""),
node: Some(path.clone()),
fix: Some(format!("add \"{field}\" to {source} in {path}")),
..Default::default()
});
}
}
for (matcher, spec) in &compiled_schemas {
if !matcher.is_match(path) {
continue;
}
for field in &spec.required {
if !has_field(&metadata, field) {
diagnostics.push(Diagnostic {
rule: "schema-violation".into(),
message: format!("missing required field \"{field}\""),
node: Some(path.clone()),
fix: Some(format!("add \"{field}\" to {source} in {path}")),
..Default::default()
});
}
}
for (field, allowed_values) in &spec.allowed {
if let Some(value) = get_field(&metadata, field)
&& let Some(s) = value_as_string(value)
&& !allowed_values.iter().any(|av| av == &s)
{
diagnostics.push(Diagnostic {
rule: "schema-violation".into(),
message: format!(
"field \"{field}\" has value \"{s}\", allowed: [{}]",
allowed_values.join(", ")
),
node: Some(path.clone()),
fix: Some(format!(
"change \"{field}\" in {path} to one of: {}",
allowed_values.join(", ")
)),
..Default::default()
});
}
}
}
}
diagnostics.sort_by(|a, b| a.node.cmp(&b.node));
diagnostics
}
}
struct SchemaSpec {
required: Vec<String>,
allowed: Vec<(String, Vec<String>)>,
}
impl SchemaSpec {
fn from_toml(value: &toml::Value) -> Self {
let required = extract_string_array(value, "required");
let allowed = value
.get("allowed")
.and_then(|v| v.as_table())
.map(|table| {
table
.iter()
.map(|(k, v)| (k.clone(), extract_string_array_direct(v)))
.collect()
})
.unwrap_or_default();
Self { required, allowed }
}
}
fn extract_string_array(value: &toml::Value, key: &str) -> Vec<String> {
value
.get(key)
.and_then(|v| v.as_array())
.map(|arr| {
arr.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect()
})
.unwrap_or_default()
}
fn extract_string_array_direct(value: &toml::Value) -> Vec<String> {
value
.as_array()
.map(|arr| {
arr.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect()
})
.unwrap_or_default()
}
fn merge_metadata(
metadata: &std::collections::HashMap<String, serde_json::Value>,
) -> serde_json::Value {
let mut merged = serde_json::Map::new();
let mut keys: Vec<&String> = metadata.keys().collect();
keys.sort();
for key in keys {
if let serde_json::Value::Object(map) = &metadata[key] {
for (k, v) in map {
merged.insert(k.clone(), v.clone());
}
}
}
serde_json::Value::Object(merged)
}
fn metadata_source(metadata: &std::collections::HashMap<String, serde_json::Value>) -> String {
let keys: Vec<&String> = metadata.keys().collect();
if keys.len() == 1 {
keys[0].clone()
} else {
"metadata".to_string()
}
}
fn has_field(metadata: &serde_json::Value, field: &str) -> bool {
metadata.get(field).is_some_and(|v| !v.is_null())
}
fn get_field<'a>(metadata: &'a serde_json::Value, field: &str) -> Option<&'a serde_json::Value> {
metadata.get(field).filter(|v| !v.is_null())
}
fn value_as_string(value: &serde_json::Value) -> Option<String> {
match value {
serde_json::Value::String(s) => Some(s.clone()),
serde_json::Value::Number(n) => Some(n.to_string()),
serde_json::Value::Bool(b) => Some(b.to_string()),
_ => None,
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::graph::test_helpers::make_enriched;
use crate::graph::{Graph, Node};
use crate::rules::RuleContext;
use std::collections::HashMap;
fn node_with_metadata(path: &str, metadata: serde_json::Value) -> Node {
let mut meta_map = HashMap::new();
meta_map.insert("frontmatter".to_string(), metadata);
Node {
path: path.into(),
node_type: Some(crate::graph::NodeType::File),
included: true,
hash: None,
metadata: meta_map,
}
}
#[test]
fn detects_missing_required_field() {
let mut graph = Graph::new();
graph.add_node(node_with_metadata(
"doc.md",
serde_json::json!({"status": "draft"}),
));
let enriched = make_enriched(graph);
let options: toml::Value = toml::from_str("required = [\"title\"]").unwrap();
let ctx = RuleContext {
graph: &enriched,
options: Some(&options),
};
let diagnostics = SchemaViolationRule.evaluate(&ctx);
assert_eq!(diagnostics.len(), 1);
assert!(diagnostics[0].message.contains("title"));
assert_eq!(diagnostics[0].node.as_deref(), Some("doc.md"));
let fix = diagnostics[0].fix.as_ref().unwrap();
assert!(
fix.contains("frontmatter"),
"fix should name the parser: {fix}"
);
}
#[test]
fn passes_when_required_field_present() {
let mut graph = Graph::new();
graph.add_node(node_with_metadata(
"doc.md",
serde_json::json!({"title": "Hello"}),
));
let enriched = make_enriched(graph);
let options: toml::Value = toml::from_str("required = [\"title\"]").unwrap();
let ctx = RuleContext {
graph: &enriched,
options: Some(&options),
};
let diagnostics = SchemaViolationRule.evaluate(&ctx);
assert!(diagnostics.is_empty());
}
#[test]
fn detects_per_glob_required() {
let mut graph = Graph::new();
graph.add_node(node_with_metadata(
"observations/note.md",
serde_json::json!({"title": "Note"}),
));
graph.add_node(node_with_metadata(
"readme.md",
serde_json::json!({"title": "README"}),
));
let enriched = make_enriched(graph);
let options: toml::Value = toml::from_str(
r#"
[schemas."observations/*.md"]
required = ["title", "date", "status"]
"#,
)
.unwrap();
let ctx = RuleContext {
graph: &enriched,
options: Some(&options),
};
let diagnostics = SchemaViolationRule.evaluate(&ctx);
assert_eq!(diagnostics.len(), 2);
let messages: Vec<&str> = diagnostics.iter().map(|d| d.message.as_str()).collect();
assert!(messages.iter().any(|m| m.contains("date")));
assert!(messages.iter().any(|m| m.contains("status")));
}
#[test]
fn detects_disallowed_value() {
let mut graph = Graph::new();
graph.add_node(node_with_metadata(
"observations/note.md",
serde_json::json!({"title": "Note", "status": "invalid"}),
));
let enriched = make_enriched(graph);
let options: toml::Value = toml::from_str(
r#"
[schemas."observations/*.md"]
required = ["title"]
allowed.status = ["draft", "review", "final"]
"#,
)
.unwrap();
let ctx = RuleContext {
graph: &enriched,
options: Some(&options),
};
let diagnostics = SchemaViolationRule.evaluate(&ctx);
assert_eq!(diagnostics.len(), 1);
assert!(diagnostics[0].message.contains("invalid"));
assert!(diagnostics[0].message.contains("allowed"));
}
#[test]
fn allowed_value_passes() {
let mut graph = Graph::new();
graph.add_node(node_with_metadata(
"observations/note.md",
serde_json::json!({"title": "Note", "status": "draft"}),
));
let enriched = make_enriched(graph);
let options: toml::Value = toml::from_str(
r#"
[schemas."observations/*.md"]
allowed.status = ["draft", "review", "final"]
"#,
)
.unwrap();
let ctx = RuleContext {
graph: &enriched,
options: Some(&options),
};
let diagnostics = SchemaViolationRule.evaluate(&ctx);
assert!(diagnostics.is_empty());
}
#[test]
fn no_options_no_diagnostics() {
let mut graph = Graph::new();
graph.add_node(node_with_metadata(
"doc.md",
serde_json::json!({"title": "Hello"}),
));
let enriched = make_enriched(graph);
let ctx = RuleContext {
graph: &enriched,
options: None,
};
let diagnostics = SchemaViolationRule.evaluate(&ctx);
assert!(diagnostics.is_empty());
}
#[test]
fn skips_nodes_without_metadata() {
let mut graph = Graph::new();
graph.add_node(Node {
path: "no-frontmatter.md".into(),
node_type: Some(crate::graph::NodeType::File),
included: true,
hash: None,
metadata: HashMap::new(),
});
let enriched = make_enriched(graph);
let options: toml::Value = toml::from_str("required = [\"title\"]").unwrap();
let ctx = RuleContext {
graph: &enriched,
options: Some(&options),
};
let diagnostics = SchemaViolationRule.evaluate(&ctx);
assert_eq!(diagnostics.len(), 1);
assert!(diagnostics[0].message.contains("title"));
}
}