use serde::{Deserialize, Deserializer};
use std::collections::HashSet;
pub(crate) const KNOWN_KEYS: &[&str] = &["name", "description", "keep-coding-instructions"];
#[derive(Debug, Clone, Default, Deserialize)]
#[serde(default)]
pub struct OutputStyleSchema {
pub name: Option<String>,
pub description: Option<String>,
#[serde(
rename = "keep-coding-instructions",
deserialize_with = "deserialize_present_value"
)]
pub keep_coding_instructions: Option<serde_yaml::Value>,
}
fn deserialize_present_value<'de, D>(d: D) -> Result<Option<serde_yaml::Value>, D::Error>
where
D: Deserializer<'de>,
{
let v = serde_yaml::Value::deserialize(d)?;
Ok(Some(v))
}
#[derive(Debug, Clone)]
pub struct ParsedOutputStyleFrontmatter {
pub schema: Option<OutputStyleSchema>,
#[allow(dead_code)] pub raw: String,
pub start_line: usize,
#[allow(dead_code)] pub end_line: usize,
pub unknown_keys: Vec<UnknownKey>,
pub parse_error: Option<String>,
pub body_is_empty: bool,
}
#[derive(Debug, Clone)]
pub struct UnknownKey {
pub key: String,
pub line: usize,
pub column: usize,
}
pub fn parse_frontmatter(content: &str) -> Option<ParsedOutputStyleFrontmatter> {
if !content.starts_with("---") {
return None;
}
let lines: Vec<&str> = content.lines().collect();
if lines.is_empty() {
return None;
}
let mut end_idx = None;
for (i, line) in lines.iter().enumerate().skip(1) {
if line.trim() == "---" {
end_idx = Some(i);
break;
}
}
if end_idx.is_none() {
let frontmatter_lines: Vec<&str> = lines[1..].to_vec();
let raw = frontmatter_lines.join("\n");
return Some(ParsedOutputStyleFrontmatter {
schema: None,
raw,
start_line: 1,
end_line: lines.len(),
unknown_keys: Vec::new(),
parse_error: Some("missing closing ---".to_string()),
body_is_empty: true,
});
}
let end_idx = end_idx.unwrap();
let frontmatter_lines: Vec<&str> = lines[1..end_idx].to_vec();
let raw = frontmatter_lines.join("\n");
let body_lines: &[&str] = if end_idx + 1 < lines.len() {
&lines[end_idx + 1..]
} else {
&[]
};
let body_is_empty = body_lines.iter().all(|l| l.trim().is_empty());
let unknown_keys = find_unknown_keys(&raw, 2);
let (schema, parse_error) = parse_schema(&raw);
Some(ParsedOutputStyleFrontmatter {
schema,
raw,
start_line: 1,
end_line: end_idx + 1,
unknown_keys,
parse_error,
body_is_empty,
})
}
fn parse_schema(raw: &str) -> (Option<OutputStyleSchema>, Option<String>) {
if raw.trim().is_empty() {
return (Some(OutputStyleSchema::default()), None);
}
match serde_yaml::from_str::<OutputStyleSchema>(raw) {
Ok(schema) => (Some(schema), None),
Err(e) => (None, Some(e.to_string())),
}
}
fn find_unknown_keys(yaml: &str, start_line: usize) -> Vec<UnknownKey> {
let known: HashSet<&str> = KNOWN_KEYS.iter().copied().collect();
let mut unknown = Vec::new();
for (i, line) in yaml.lines().enumerate() {
if line.starts_with(' ') || line.starts_with('\t') {
continue;
}
if line.trim_start().starts_with('#') {
continue;
}
if let Some(colon_idx) = line.find(':') {
let key_raw = &line[..colon_idx];
let key = key_raw.trim().trim_matches(|c| c == '\'' || c == '\"');
if !key.is_empty() && !known.contains(key) {
unknown.push(UnknownKey {
key: key.to_string(),
line: start_line + i,
column: key_raw.len() - key_raw.trim_start().len(),
});
}
}
}
unknown
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_no_frontmatter() {
let content = "# Just markdown without frontmatter";
let result = parse_frontmatter(content);
assert!(result.is_none());
}
#[test]
fn test_parse_empty_frontmatter() {
let content = "---\n---\nbody";
let result = parse_frontmatter(content).unwrap();
let schema = result.schema.unwrap();
assert!(schema.name.is_none());
assert!(schema.description.is_none());
assert!(schema.keep_coding_instructions.is_none());
assert!(result.parse_error.is_none());
assert!(!result.body_is_empty);
}
#[test]
fn test_parse_with_keep_coding_instructions_true() {
let content = "---\nname: Concise\ndescription: short\nkeep-coding-instructions: true\n---\nBe brief.";
let result = parse_frontmatter(content).unwrap();
let schema = result.schema.unwrap();
assert_eq!(schema.name.as_deref(), Some("Concise"));
assert_eq!(schema.description.as_deref(), Some("short"));
assert_eq!(
schema.keep_coding_instructions,
Some(serde_yaml::Value::Bool(true))
);
assert!(result.unknown_keys.is_empty());
}
#[test]
fn test_parse_with_keep_coding_instructions_non_bool() {
let content = "---\nname: Concise\nkeep-coding-instructions: \"yes\"\n---\nBody";
let result = parse_frontmatter(content).unwrap();
let schema = result.schema.unwrap();
let v = schema.keep_coding_instructions.expect("present");
assert!(v.as_bool().is_none(), "value must NOT be bool");
assert_eq!(v.as_str(), Some("yes"));
}
#[test]
fn test_detect_unknown_keys() {
let content = "---\nname: X\ndescription: y\nfoo: bar\nalwaysApply: true\n---\nbody";
let result = parse_frontmatter(content).unwrap();
assert_eq!(result.unknown_keys.len(), 2);
assert!(result.unknown_keys.iter().any(|k| k.key == "foo"));
assert!(result.unknown_keys.iter().any(|k| k.key == "alwaysApply"));
}
#[test]
fn test_detect_empty_body() {
let content = "---\nname: X\n---\n \n\n";
let result = parse_frontmatter(content).unwrap();
assert!(result.body_is_empty);
}
#[test]
fn test_detect_non_empty_body() {
let content = "---\nname: X\n---\nReal instructions.";
let result = parse_frontmatter(content).unwrap();
assert!(!result.body_is_empty);
}
#[test]
fn test_known_keys_not_flagged() {
let content = "---\nname: X\ndescription: y\nkeep-coding-instructions: false\n---\nbody";
let result = parse_frontmatter(content).unwrap();
assert!(result.unknown_keys.is_empty());
}
#[test]
fn test_unclosed_frontmatter_is_parse_error() {
let content = "---\nname: X";
let result = parse_frontmatter(content).unwrap();
assert!(result.parse_error.is_some());
assert_eq!(result.parse_error.as_deref(), Some("missing closing ---"));
}
}