Skip to main content

intr_parser/
parse.rs

1use crate::types::{Eval, Frontmatter, ParseError, ParseResult, ParseWarning};
2
3const MAX_FILE_SIZE: usize = 1024 * 1024; // 1 MB
4
5/// Parse a `.prompt` file from raw bytes.
6///
7/// Determines the tier, splits frontmatter from body, extracts Handlebars
8/// variables, and validates required fields.
9///
10/// # Errors
11///
12/// Returns [`ParseError`] if the file exceeds 1 MB, is not valid UTF-8, or
13/// has malformed YAML frontmatter.
14pub fn parse(bytes: &[u8]) -> Result<ParseResult, ParseError> {
15    if bytes.len() > MAX_FILE_SIZE {
16        return Err(ParseError::FileTooLarge { size: bytes.len() });
17    }
18
19    let src = std::str::from_utf8(bytes).map_err(|e| ParseError::InvalidUtf8(e.to_string()))?;
20
21    let (frontmatter_raw, body) = split_frontmatter(src);
22
23    let (frontmatter, warnings) = match frontmatter_raw {
24        Some(yaml) => parse_frontmatter(yaml)?,
25        None => (None, vec![]),
26    };
27
28    let tier = detect_tier(&frontmatter);
29    let variables = extract_variables(body);
30
31    let mut all_warnings = warnings;
32    lint_warnings(tier, &frontmatter, &mut all_warnings);
33
34    Ok(ParseResult {
35        tier,
36        frontmatter,
37        body: body.to_string(),
38        variables,
39        warnings: all_warnings,
40    })
41}
42
43// ---------------------------------------------------------------------------
44// Frontmatter splitting
45// ---------------------------------------------------------------------------
46
47/// Returns `(Some(yaml_str), body)` if a `---` fence is found, else
48/// `(None, whole_source)`.
49fn split_frontmatter(src: &str) -> (Option<&str>, &str) {
50    let src = src.trim_start();
51
52    if !src.starts_with("---") {
53        return (None, src);
54    }
55
56    // Find the closing fence. Skip the opening `---` line.
57    let after_open = &src[3..];
58    // Consume optional trailing whitespace/newline on the opening fence line.
59    let after_open = after_open.trim_start_matches([' ', '\t', '\r', '\n']);
60
61    if let Some(close_pos) = find_closing_fence(after_open) {
62        let yaml = &after_open[..close_pos];
63        let rest = &after_open[close_pos + 3..];
64        // Trim a single leading newline from the body.
65        let body = rest.trim_start_matches(['\r', '\n']);
66        (Some(yaml), body)
67    } else {
68        // No closing fence - treat entire file as body (Tier 1 fallback).
69        (None, src)
70    }
71}
72
73/// Find the position of the closing `---` fence within `haystack`.
74/// Returns the byte offset of the `---` within `haystack`, or `None`.
75fn find_closing_fence(haystack: &str) -> Option<usize> {
76    for (i, _) in haystack.char_indices() {
77        let rest = &haystack[i..];
78        // A closing fence must be `---` at the start of a line.
79        if (i == 0 || haystack.as_bytes().get(i - 1) == Some(&b'\n')) && rest.starts_with("---") {
80            return Some(i);
81        }
82    }
83    None
84}
85
86// ---------------------------------------------------------------------------
87// Frontmatter parsing
88// ---------------------------------------------------------------------------
89
90fn parse_frontmatter(yaml: &str) -> Result<(Option<Frontmatter>, Vec<ParseWarning>), ParseError> {
91    if yaml.trim().is_empty() {
92        return Ok((None, vec![]));
93    }
94
95    let frontmatter: Frontmatter =
96        serde_yaml::from_str(yaml).map_err(|e| ParseError::InvalidFrontmatter(e.to_string()))?;
97
98    let mut warnings = vec![];
99
100    // Validate `version` is valid semver if present.
101    if let Some(ref ver) = frontmatter.version
102        && semver_parse(ver).is_err()
103    {
104        return Err(ParseError::InvalidField {
105            field: "version".to_string(),
106            reason: format!("'{}' is not a valid semver string", ver),
107        });
108    }
109
110    // Validate `id` is kebab-case if present.
111    if let Some(ref id) = frontmatter.id
112        && !is_valid_id(id)
113    {
114        return Err(ParseError::InvalidField {
115            field: "id".to_string(),
116            reason: format!(
117                "'{}' must be kebab-case, alphanumeric + hyphens, max 64 chars",
118                id
119            ),
120        });
121    }
122
123    // Warn if temperature is out of [0, 2].
124    if let Some(ref hints) = frontmatter.model
125        && let Some(temp) = hints.temperature
126        && !(0.0..=2.0).contains(&temp)
127    {
128        warnings.push(ParseWarning {
129            code: "temperature_out_of_range".to_string(),
130            message: format!("temperature {} is outside [0.0, 2.0]", temp),
131        });
132    }
133
134    Ok((Some(frontmatter), warnings))
135}
136
137// ---------------------------------------------------------------------------
138// Tier detection
139// ---------------------------------------------------------------------------
140
141fn detect_tier(frontmatter: &Option<Frontmatter>) -> u8 {
142    let Some(fm) = frontmatter else {
143        return 1;
144    };
145
146    // Tier 2 requires both `id` and `version`.
147    if fm.id.is_none() || fm.version.is_none() {
148        return 1;
149    }
150
151    // Tier 3 requires non-empty evals.
152    if fm.evals.as_ref().is_some_and(|e: &Vec<Eval>| !e.is_empty()) {
153        return 3;
154    }
155
156    2
157}
158
159// ---------------------------------------------------------------------------
160// Variable extraction
161// ---------------------------------------------------------------------------
162
163/// Extract variable names from `{{variable}}` and `{{#if variable}}` markers.
164///
165/// Returns a sorted, de-duplicated list.
166fn extract_variables(body: &str) -> Vec<String> {
167    let mut vars = std::collections::BTreeSet::new();
168    let mut chars = body.chars().peekable();
169
170    while let Some(ch) = chars.next() {
171        if ch == '{' && chars.peek() == Some(&'{') {
172            chars.next(); // consume second `{`
173            let mut name = String::new();
174
175            // Skip Handlebars helpers like `#if`, `#each`, `/if`, etc.
176            // We only capture bare variable names and the first arg of helpers.
177            let mut skip_hash = false;
178            if chars.peek() == Some(&'#') || chars.peek() == Some(&'/') {
179                skip_hash = true;
180                chars.next();
181            }
182
183            for inner in chars.by_ref() {
184                if inner == '}' {
185                    break;
186                }
187                if inner.is_whitespace() && !skip_hash {
188                    break;
189                }
190                if inner == '}' {
191                    break;
192                }
193                name.push(inner);
194            }
195
196            let name = name.trim().to_string();
197            if !name.is_empty()
198                && !skip_hash
199                && name
200                    .chars()
201                    .all(|c| c.is_alphanumeric() || c == '_' || c == '.')
202            {
203                // Strip any leading `@` (Handlebars special vars).
204                let name = name.trim_start_matches('@').to_string();
205                if !name.is_empty() {
206                    vars.insert(name);
207                }
208            }
209        }
210    }
211
212    vars.into_iter().collect()
213}
214
215// ---------------------------------------------------------------------------
216// Lint warnings
217// ---------------------------------------------------------------------------
218
219fn lint_warnings(tier: u8, frontmatter: &Option<Frontmatter>, warnings: &mut Vec<ParseWarning>) {
220    if tier >= 2 {
221        let fm = frontmatter.as_ref().expect("tier >= 2 implies frontmatter");
222
223        if fm.description.is_none() {
224            warnings.push(ParseWarning {
225                code: "missing_description".to_string(),
226                message: "No `description` field. Add one to improve commons discoverability."
227                    .to_string(),
228            });
229        }
230
231        if fm.model.is_none() {
232            warnings.push(ParseWarning {
233                code: "missing_model_hints".to_string(),
234                message: "No `model` field. Specifying `model.preferred` improves reliability."
235                    .to_string(),
236            });
237        }
238    }
239}
240
241// ---------------------------------------------------------------------------
242// Helpers
243// ---------------------------------------------------------------------------
244
245/// Minimal semver validation - just checks MAJOR.MINOR.PATCH pattern.
246fn semver_parse(s: &str) -> Result<(), ()> {
247    let parts: Vec<&str> = s.split('.').collect();
248    if parts.len() < 3 {
249        return Err(());
250    }
251    for part in &parts[..3] {
252        if part.parse::<u64>().is_err() {
253            return Err(());
254        }
255    }
256    Ok(())
257}
258
259/// Validates an `id` field: kebab-case, alphanumeric + hyphens, 1–64 chars.
260fn is_valid_id(id: &str) -> bool {
261    !id.is_empty()
262        && id.len() <= 64
263        && id
264            .chars()
265            .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
266        && !id.starts_with('-')
267        && !id.ends_with('-')
268}
269
270// ---------------------------------------------------------------------------
271// Tests
272// ---------------------------------------------------------------------------
273
274#[cfg(test)]
275mod tests {
276    use super::*;
277
278    #[test]
279    fn tier1_plain_body() {
280        let src = b"Summarize the following: {{input}}";
281        let result = parse(src).unwrap();
282        assert_eq!(result.tier, 1);
283        assert!(result.frontmatter.is_none());
284        assert!(result.variables.contains(&"input".to_string()));
285    }
286
287    #[test]
288    fn tier2_with_frontmatter() {
289        let src = b"---
290id: greet
291version: 1.0.0
292description: Greet a user
293model:
294  preferred: [claude-sonnet-4-6]
295  temperature: 0.3
296input:
297  schema:
298    name: string
299---
300Hello, {{name}}!
301";
302        let result = parse(src).unwrap();
303        assert_eq!(result.tier, 2);
304        let fm = result.frontmatter.unwrap();
305        assert_eq!(fm.id.as_deref(), Some("greet"));
306        assert_eq!(fm.version.as_deref(), Some("1.0.0"));
307        assert!(result.variables.contains(&"name".to_string()));
308        assert!(result.warnings.is_empty());
309    }
310
311    #[test]
312    fn tier3_with_evals() {
313        let src = b"---
314id: summarize
315version: 2.0.0
316description: One-sentence summary
317evals:
318  - description: Short text
319    input:
320      text: The sky is blue.
321    expect:
322      contains: blue
323---
324Summarize: {{text}}
325";
326        let result = parse(src).unwrap();
327        assert_eq!(result.tier, 3);
328        assert_eq!(result.frontmatter.unwrap().evals.unwrap().len(), 1);
329    }
330
331    #[test]
332    fn invalid_version_is_error() {
333        let src = b"---
334id: bad-ver
335version: not-semver
336---
337body
338";
339        let err = parse(src).unwrap_err();
340        assert!(matches!(err, ParseError::InvalidField { ref field, .. } if field == "version"));
341    }
342
343    #[test]
344    fn invalid_id_is_error() {
345        let src = b"---
346id: -bad-start
347version: 1.0.0
348---
349body
350";
351        let err = parse(src).unwrap_err();
352        assert!(matches!(err, ParseError::InvalidField { ref field, .. } if field == "id"));
353    }
354
355    #[test]
356    fn file_too_large() {
357        let huge = vec![b'x'; MAX_FILE_SIZE + 1];
358        let err = parse(&huge).unwrap_err();
359        assert!(matches!(err, ParseError::FileTooLarge { .. }));
360    }
361
362    #[test]
363    fn missing_description_warns() {
364        let src = b"---
365id: no-desc
366version: 1.0.0
367---
368body
369";
370        let result = parse(src).unwrap();
371        assert!(
372            result
373                .warnings
374                .iter()
375                .any(|w| w.code == "missing_description")
376        );
377    }
378
379    #[test]
380    fn variables_deduped_sorted() {
381        let src = b"{{b}} {{a}} {{b}} {{a}}";
382        let result = parse(src).unwrap();
383        assert_eq!(result.variables, vec!["a", "b"]);
384    }
385}