use serde::{Deserialize, Serialize};
use std::collections::HashSet;
const KNOWN_KEYS: &[&str] = &["applyTo", "description", "excludeAgent"];
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct CopilotScopedSchema {
#[serde(default)]
pub apply_to: Option<String>,
#[serde(default)]
pub description: Option<String>,
#[serde(default)]
pub exclude_agent: Option<String>,
}
#[derive(Debug, Clone)]
pub struct ParsedFrontmatter {
pub schema: Option<CopilotScopedSchema>,
pub raw: String,
pub start_line: usize,
pub end_line: usize,
pub body: String,
pub unknown_keys: Vec<UnknownKey>,
pub parse_error: Option<String>,
}
impl crate::rules::FrontmatterRanges for ParsedFrontmatter {
fn raw_content(&self) -> &str {
&self.raw
}
fn start_line(&self) -> usize {
self.start_line
}
}
#[derive(Debug, Clone)]
pub struct UnknownKey {
pub key: String,
pub line: usize,
pub column: usize,
}
#[derive(Debug, Clone)]
pub struct GlobValidation {
pub valid: bool,
#[allow(dead_code)] pub pattern: String,
pub error: Option<String>,
}
pub fn parse_frontmatter(content: &str) -> Option<ParsedFrontmatter> {
if !content.starts_with("---") {
return None;
}
let lines: Vec<&str> = content.lines().collect();
if lines.is_empty() {
return None;
}
let mut end_idx = None;
for (i, line) in lines.iter().enumerate().skip(1) {
if line.trim() == "---" {
end_idx = Some(i);
break;
}
}
if end_idx.is_none() {
let frontmatter_lines: Vec<&str> = lines[1..].to_vec();
let raw = frontmatter_lines.join(
"
",
);
return Some(ParsedFrontmatter {
schema: None,
raw,
start_line: 1,
end_line: lines.len(),
body: String::new(),
unknown_keys: Vec::new(),
parse_error: Some("missing closing ---".to_string()),
});
}
let end_idx = end_idx.unwrap();
let frontmatter_lines: Vec<&str> = lines[1..end_idx].to_vec();
let raw = frontmatter_lines.join("\n");
let body_lines: Vec<&str> = lines[end_idx + 1..].to_vec();
let body = body_lines.join("\n");
let (schema, parse_error) = match serde_yaml::from_str::<CopilotScopedSchema>(&raw) {
Ok(s) => (Some(s), None),
Err(e) => (None, Some(e.to_string())),
};
let unknown_keys = find_unknown_keys(&raw, 2);
Some(ParsedFrontmatter {
schema,
raw,
start_line: 1,
end_line: end_idx + 1,
body,
unknown_keys,
parse_error,
})
}
fn find_unknown_keys(yaml: &str, start_line: usize) -> Vec<UnknownKey> {
let known: HashSet<&str> = KNOWN_KEYS.iter().copied().collect();
let mut unknown = Vec::new();
for (i, line) in yaml.lines().enumerate() {
if line.starts_with(' ') || line.starts_with('\t') {
continue;
}
if let Some(colon_idx) = line.find(':') {
let key_raw = &line[..colon_idx];
let key = key_raw.trim().trim_matches(|c| c == '\'' || c == '\"');
if !key.is_empty() && !known.contains(key) {
unknown.push(UnknownKey {
key: key.to_string(),
line: start_line + i,
column: key_raw.len() - key_raw.trim_start().len(),
});
}
}
}
unknown
}
#[allow(dead_code)] pub fn split_comma_separated_globs(s: &str) -> Vec<&str> {
let mut result = Vec::new();
let mut brace_depth: usize = 0;
let mut bracket_depth: usize = 0;
let mut start = 0;
for (i, ch) in s.char_indices() {
match ch {
'{' => brace_depth += 1,
'}' => brace_depth = brace_depth.saturating_sub(1),
'[' => bracket_depth += 1,
']' => bracket_depth = bracket_depth.saturating_sub(1),
',' if brace_depth == 0 && bracket_depth == 0 => {
let segment = s[start..i].trim();
if !segment.is_empty() {
result.push(segment);
}
start = i + 1; }
_ => {}
}
}
let segment = s[start..].trim();
if !segment.is_empty() {
result.push(segment);
}
result
}
pub fn validate_glob_pattern(pattern: &str) -> GlobValidation {
match glob::Pattern::new(pattern) {
Ok(_) => GlobValidation {
valid: true,
pattern: pattern.to_string(),
error: None,
},
Err(e) => GlobValidation {
valid: false,
pattern: pattern.to_string(),
error: Some(e.to_string()),
},
}
}
pub fn is_body_empty(body: &str) -> bool {
body.trim().is_empty()
}
pub fn is_content_empty(content: &str) -> bool {
content.trim().is_empty()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_valid_frontmatter() {
let content = r#"---
applyTo: "**/*.ts"
---
# TypeScript Instructions
Use strict mode.
"#;
let result = parse_frontmatter(content).unwrap();
assert!(result.schema.is_some());
assert_eq!(
result.schema.as_ref().unwrap().apply_to,
Some("**/*.ts".to_string())
);
assert!(result.parse_error.is_none());
assert!(result.body.contains("TypeScript Instructions"));
}
#[test]
fn test_parse_frontmatter_no_apply_to() {
let content = r#"---
---
# Instructions
"#;
let result = parse_frontmatter(content).unwrap();
assert!(result.schema.is_some());
assert!(result.schema.as_ref().unwrap().apply_to.is_none());
}
#[test]
fn test_parse_no_frontmatter() {
let content = "# Just markdown without frontmatter";
let result = parse_frontmatter(content);
assert!(result.is_none());
}
#[test]
fn test_parse_unclosed_frontmatter() {
let content = r#"---
applyTo: "**/*.ts"
# Missing closing ---
"#;
let result = parse_frontmatter(content).unwrap();
assert!(result.parse_error.is_some());
assert_eq!(result.parse_error.as_ref().unwrap(), "missing closing ---");
}
#[test]
fn test_parse_invalid_yaml() {
let content = r#"---
applyTo: [unclosed
---
# Body
"#;
let result = parse_frontmatter(content).unwrap();
assert!(result.schema.is_none());
assert!(result.parse_error.is_some());
}
#[test]
fn test_detect_unknown_keys() {
let content = r#"---
applyTo: "**/*.ts"
unknownKey: value
anotherUnknown: 123
---
# Body
"#;
let result = parse_frontmatter(content).unwrap();
assert_eq!(result.unknown_keys.len(), 2);
assert!(result.unknown_keys.iter().any(|k| k.key == "unknownKey"));
assert!(
result
.unknown_keys
.iter()
.any(|k| k.key == "anotherUnknown")
);
}
#[test]
fn test_no_unknown_keys() {
let content = r#"---
applyTo: "**/*.rs"
---
# Body
"#;
let result = parse_frontmatter(content).unwrap();
assert!(result.unknown_keys.is_empty());
}
#[test]
fn test_valid_glob_patterns() {
let patterns = vec![
"**/*.ts",
"*.rs",
"src/**/*.js",
"tests/unit/*.test.ts",
"[abc].txt",
"file?.md",
];
for pattern in patterns {
let result = validate_glob_pattern(pattern);
assert!(result.valid, "Pattern '{}' should be valid", pattern);
}
}
#[test]
fn test_invalid_glob_pattern() {
let result = validate_glob_pattern("[unclosed");
assert!(!result.valid);
assert!(result.error.is_some());
}
#[test]
fn test_empty_body() {
assert!(is_body_empty(""));
assert!(is_body_empty(" "));
assert!(is_body_empty("\n\n\n"));
assert!(!is_body_empty("# Content"));
}
#[test]
fn test_empty_content() {
assert!(is_content_empty(""));
assert!(is_content_empty(" \n\t "));
assert!(!is_content_empty("# Instructions"));
}
#[test]
fn test_frontmatter_line_numbers() {
let content = r#"---
applyTo: "**/*.ts"
---
# Body
"#;
let result = parse_frontmatter(content).unwrap();
assert_eq!(result.start_line, 1);
assert_eq!(result.end_line, 3);
}
#[test]
fn test_unknown_key_line_numbers() {
let content = r#"---
applyTo: "**/*.ts"
unknownKey: value
---
# Body
"#;
let result = parse_frontmatter(content).unwrap();
assert_eq!(result.unknown_keys.len(), 1);
assert_eq!(result.unknown_keys[0].line, 3);
}
#[test]
fn test_parse_description() {
let content = r#"---
applyTo: "**/*.ts"
description: "TypeScript style rules"
---
# Body
"#;
let result = parse_frontmatter(content).unwrap();
assert!(result.parse_error.is_none());
let schema = result.schema.unwrap();
assert_eq!(schema.apply_to, Some("**/*.ts".to_string()));
assert_eq!(
schema.description,
Some("TypeScript style rules".to_string())
);
}
#[test]
fn test_description_not_unknown_key() {
let content = r#"---
applyTo: "**/*.ts"
description: "TypeScript style rules"
---
# Body
"#;
let result = parse_frontmatter(content).unwrap();
assert!(
result.parse_error.is_none(),
"frontmatter should parse cleanly, got error: {:?}",
result.parse_error
);
assert!(
result.unknown_keys.is_empty(),
"description should not be reported as unknown key (Copilot v0.43.0+), got: {:?}",
result.unknown_keys
);
}
#[test]
fn test_parse_exclude_agent() {
let content = r#"---
applyTo: "**/*.ts"
excludeAgent: "code-review"
---
# Body
"#;
let result = parse_frontmatter(content).unwrap();
assert!(result.schema.is_some());
let schema = result.schema.unwrap();
assert_eq!(schema.apply_to, Some("**/*.ts".to_string()));
assert_eq!(schema.exclude_agent, Some("code-review".to_string()));
assert!(result.parse_error.is_none());
}
#[test]
fn test_exclude_agent_not_unknown_key() {
let content = r#"---
applyTo: "**/*.ts"
excludeAgent: "coding-agent"
---
# Body
"#;
let result = parse_frontmatter(content).unwrap();
assert!(
result.unknown_keys.is_empty(),
"excludeAgent should not be reported as unknown key, got: {:?}",
result.unknown_keys
);
}
#[test]
fn test_exclude_agent_absent() {
let content = r#"---
applyTo: "**/*.ts"
---
# Body
"#;
let result = parse_frontmatter(content).unwrap();
assert!(result.schema.is_some());
assert!(result.schema.unwrap().exclude_agent.is_none());
}
#[test]
fn test_split_comma_single_pattern() {
assert_eq!(split_comma_separated_globs("**/*.ts"), vec!["**/*.ts"]);
}
#[test]
fn test_split_comma_two_patterns() {
assert_eq!(
split_comma_separated_globs("**/*.ts,**/*.tsx"),
vec!["**/*.ts", "**/*.tsx"]
);
}
#[test]
fn test_split_comma_with_spaces() {
assert_eq!(
split_comma_separated_globs("**/*.ts, **/*.tsx, **/*.js"),
vec!["**/*.ts", "**/*.tsx", "**/*.js"]
);
}
#[test]
fn test_split_comma_brace_expansion_preserved() {
assert_eq!(
split_comma_separated_globs("{src,lib}/**/*.ts"),
vec!["{src,lib}/**/*.ts"]
);
}
#[test]
fn test_split_comma_brace_plus_comma() {
assert_eq!(
split_comma_separated_globs("{src,lib}/**/*.ts,**/*.md"),
vec!["{src,lib}/**/*.ts", "**/*.md"]
);
}
#[test]
fn test_split_comma_nested_braces() {
assert_eq!(
split_comma_separated_globs("{{a,b},{c,d}}/*.ts"),
vec!["{{a,b},{c,d}}/*.ts"]
);
}
#[test]
fn test_split_comma_empty_string() {
let result: Vec<&str> = split_comma_separated_globs("");
assert!(result.is_empty());
}
#[test]
fn test_split_comma_trailing_comma() {
assert_eq!(split_comma_separated_globs("**/*.ts,"), vec!["**/*.ts"]);
}
#[test]
fn test_split_comma_leading_comma() {
assert_eq!(split_comma_separated_globs(",**/*.ts"), vec!["**/*.ts"]);
}
#[test]
fn test_split_comma_consecutive_commas() {
assert_eq!(
split_comma_separated_globs("**/*.ts,,**/*.tsx"),
vec!["**/*.ts", "**/*.tsx"]
);
}
#[test]
fn test_split_comma_unbalanced_braces() {
assert_eq!(
split_comma_separated_globs("{src,lib/**/*.ts,**/*.md"),
vec!["{src,lib/**/*.ts,**/*.md"]
);
assert_eq!(
split_comma_separated_globs("src}/**/*.ts,**/*.md"),
vec!["src}/**/*.ts", "**/*.md"]
);
}
#[test]
fn test_split_comma_bracket_character_class() {
assert_eq!(split_comma_separated_globs("**/*.[ts]"), vec!["**/*.[ts]"]);
assert_eq!(
split_comma_separated_globs("**/*.[ts],**/*.[js]"),
vec!["**/*.[ts]", "**/*.[js]"]
);
}
#[test]
fn test_split_comma_brace_and_bracket() {
assert_eq!(
split_comma_separated_globs("{src,lib}/**/*.[ts]"),
vec!["{src,lib}/**/*.[ts]"]
);
assert_eq!(
split_comma_separated_globs("{src,lib}/**/*.[ts],**/*.md"),
vec!["{src,lib}/**/*.[ts]", "**/*.md"]
);
}
#[test]
fn test_split_comma_unbalanced_brackets() {
assert_eq!(
split_comma_separated_globs("**/*.[ts,**/*.md"),
vec!["**/*.[ts,**/*.md"]
);
assert_eq!(
split_comma_separated_globs("**/*.ts],**/*.md"),
vec!["**/*.ts]", "**/*.md"]
);
}
}