use crate::types::{Eval, Frontmatter, ParseError, ParseResult, ParseWarning};
const MAX_FILE_SIZE: usize = 1024 * 1024;
pub fn parse(bytes: &[u8]) -> Result<ParseResult, ParseError> {
if bytes.len() > MAX_FILE_SIZE {
return Err(ParseError::FileTooLarge { size: bytes.len() });
}
let src = std::str::from_utf8(bytes).map_err(|e| ParseError::InvalidUtf8(e.to_string()))?;
let (frontmatter_raw, body) = split_frontmatter(src);
let (frontmatter, warnings) = match frontmatter_raw {
Some(yaml) => parse_frontmatter(yaml)?,
None => (None, vec![]),
};
let tier = detect_tier(&frontmatter);
let variables = extract_variables(body);
let mut all_warnings = warnings;
lint_warnings(tier, &frontmatter, &mut all_warnings);
Ok(ParseResult {
tier,
frontmatter,
body: body.to_string(),
variables,
warnings: all_warnings,
})
}
fn split_frontmatter(src: &str) -> (Option<&str>, &str) {
let src = src.trim_start();
if !src.starts_with("---") {
return (None, src);
}
let after_open = &src[3..];
let after_open = after_open.trim_start_matches([' ', '\t', '\r', '\n']);
if let Some(close_pos) = find_closing_fence(after_open) {
let yaml = &after_open[..close_pos];
let rest = &after_open[close_pos + 3..];
let body = rest.trim_start_matches(['\r', '\n']);
(Some(yaml), body)
} else {
(None, src)
}
}
fn find_closing_fence(haystack: &str) -> Option<usize> {
for (i, _) in haystack.char_indices() {
let rest = &haystack[i..];
if (i == 0 || haystack.as_bytes().get(i - 1) == Some(&b'\n')) && rest.starts_with("---") {
return Some(i);
}
}
None
}
fn parse_frontmatter(yaml: &str) -> Result<(Option<Frontmatter>, Vec<ParseWarning>), ParseError> {
if yaml.trim().is_empty() {
return Ok((None, vec![]));
}
let frontmatter: Frontmatter =
serde_yaml::from_str(yaml).map_err(|e| ParseError::InvalidFrontmatter(e.to_string()))?;
let mut warnings = vec![];
if let Some(ref ver) = frontmatter.version
&& semver_parse(ver).is_err()
{
return Err(ParseError::InvalidField {
field: "version".to_string(),
reason: format!("'{}' is not a valid semver string", ver),
});
}
if let Some(ref id) = frontmatter.id
&& !is_valid_id(id)
{
return Err(ParseError::InvalidField {
field: "id".to_string(),
reason: format!(
"'{}' must be kebab-case, alphanumeric + hyphens, max 64 chars",
id
),
});
}
if let Some(ref hints) = frontmatter.model
&& let Some(temp) = hints.temperature
&& !(0.0..=2.0).contains(&temp)
{
warnings.push(ParseWarning {
code: "temperature_out_of_range".to_string(),
message: format!("temperature {} is outside [0.0, 2.0]", temp),
});
}
Ok((Some(frontmatter), warnings))
}
fn detect_tier(frontmatter: &Option<Frontmatter>) -> u8 {
let Some(fm) = frontmatter else {
return 1;
};
if fm.id.is_none() || fm.version.is_none() {
return 1;
}
if fm.evals.as_ref().is_some_and(|e: &Vec<Eval>| !e.is_empty()) {
return 3;
}
2
}
fn extract_variables(body: &str) -> Vec<String> {
let mut vars = std::collections::BTreeSet::new();
let mut chars = body.chars().peekable();
while let Some(ch) = chars.next() {
if ch == '{' && chars.peek() == Some(&'{') {
chars.next(); let mut name = String::new();
let mut skip_hash = false;
if chars.peek() == Some(&'#') || chars.peek() == Some(&'/') {
skip_hash = true;
chars.next();
}
for inner in chars.by_ref() {
if inner == '}' {
break;
}
if inner.is_whitespace() && !skip_hash {
break;
}
if inner == '}' {
break;
}
name.push(inner);
}
let name = name.trim().to_string();
if !name.is_empty()
&& !skip_hash
&& name
.chars()
.all(|c| c.is_alphanumeric() || c == '_' || c == '.')
{
let name = name.trim_start_matches('@').to_string();
if !name.is_empty() {
vars.insert(name);
}
}
}
}
vars.into_iter().collect()
}
fn lint_warnings(tier: u8, frontmatter: &Option<Frontmatter>, warnings: &mut Vec<ParseWarning>) {
if tier >= 2 {
let fm = frontmatter.as_ref().expect("tier >= 2 implies frontmatter");
if fm.description.is_none() {
warnings.push(ParseWarning {
code: "missing_description".to_string(),
message: "No `description` field. Add one to improve commons discoverability."
.to_string(),
});
}
if fm.model.is_none() {
warnings.push(ParseWarning {
code: "missing_model_hints".to_string(),
message: "No `model` field. Specifying `model.preferred` improves reliability."
.to_string(),
});
}
}
}
fn semver_parse(s: &str) -> Result<(), ()> {
let parts: Vec<&str> = s.split('.').collect();
if parts.len() < 3 {
return Err(());
}
for part in &parts[..3] {
if part.parse::<u64>().is_err() {
return Err(());
}
}
Ok(())
}
fn is_valid_id(id: &str) -> bool {
!id.is_empty()
&& id.len() <= 64
&& id
.chars()
.all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
&& !id.starts_with('-')
&& !id.ends_with('-')
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn tier1_plain_body() {
let src = b"Summarize the following: {{input}}";
let result = parse(src).unwrap();
assert_eq!(result.tier, 1);
assert!(result.frontmatter.is_none());
assert!(result.variables.contains(&"input".to_string()));
}
#[test]
fn tier2_with_frontmatter() {
let src = b"---
id: greet
version: 1.0.0
description: Greet a user
model:
preferred: [claude-sonnet-4-6]
temperature: 0.3
input:
schema:
name: string
---
Hello, {{name}}!
";
let result = parse(src).unwrap();
assert_eq!(result.tier, 2);
let fm = result.frontmatter.unwrap();
assert_eq!(fm.id.as_deref(), Some("greet"));
assert_eq!(fm.version.as_deref(), Some("1.0.0"));
assert!(result.variables.contains(&"name".to_string()));
assert!(result.warnings.is_empty());
}
#[test]
fn tier3_with_evals() {
let src = b"---
id: summarize
version: 2.0.0
description: One-sentence summary
evals:
- description: Short text
input:
text: The sky is blue.
expect:
contains: blue
---
Summarize: {{text}}
";
let result = parse(src).unwrap();
assert_eq!(result.tier, 3);
assert_eq!(result.frontmatter.unwrap().evals.unwrap().len(), 1);
}
#[test]
fn invalid_version_is_error() {
let src = b"---
id: bad-ver
version: not-semver
---
body
";
let err = parse(src).unwrap_err();
assert!(matches!(err, ParseError::InvalidField { ref field, .. } if field == "version"));
}
#[test]
fn invalid_id_is_error() {
let src = b"---
id: -bad-start
version: 1.0.0
---
body
";
let err = parse(src).unwrap_err();
assert!(matches!(err, ParseError::InvalidField { ref field, .. } if field == "id"));
}
#[test]
fn file_too_large() {
let huge = vec![b'x'; MAX_FILE_SIZE + 1];
let err = parse(&huge).unwrap_err();
assert!(matches!(err, ParseError::FileTooLarge { .. }));
}
#[test]
fn missing_description_warns() {
let src = b"---
id: no-desc
version: 1.0.0
---
body
";
let result = parse(src).unwrap();
assert!(
result
.warnings
.iter()
.any(|w| w.code == "missing_description")
);
}
#[test]
fn variables_deduped_sorted() {
let src = b"{{b}} {{a}} {{b}} {{a}}";
let result = parse(src).unwrap();
assert_eq!(result.variables, vec!["a", "b"]);
}
}