Skip to main content

ito_core/
front_matter.rs

1//! YAML front matter parsing, writing, and metadata utilities.
2//!
3//! Ito module and change markdown artifacts support an optional YAML front
4//! matter header delimited by `---` lines at the beginning of the file.
5//!
6//! Front matter stores stable metadata (timestamps, identifiers, integrity
7//! checksums) that is independent of filesystem attributes and survives
8//! copies across hosts.
9
10use chrono::{DateTime, Utc};
11use serde::{Deserialize, Serialize};
12use sha2::{Digest, Sha256};
13use std::collections::BTreeMap;
14
15use crate::errors::CoreError;
16
17/// Parsed YAML front matter metadata for an Ito artifact.
18///
19/// Timestamps are stored as RFC 3339 strings to avoid requiring the `serde`
20/// feature on `chrono`. Use [`FrontMatter::created_at_dt`] and
21/// [`FrontMatter::updated_at_dt`] to parse them into `DateTime<Utc>`.
22#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
23pub struct FrontMatter {
24    /// Schema version for forward compatibility.
25    #[serde(default, skip_serializing_if = "Option::is_none")]
26    pub schema_version: Option<String>,
27
28    /// When the artifact was first created (RFC 3339 UTC string).
29    #[serde(default, skip_serializing_if = "Option::is_none")]
30    pub created_at: Option<String>,
31
32    /// When the artifact was last updated (RFC 3339 UTC string).
33    #[serde(default, skip_serializing_if = "Option::is_none")]
34    pub updated_at: Option<String>,
35
36    /// Identity of the creator (optional).
37    #[serde(default, skip_serializing_if = "Option::is_none")]
38    pub created_by: Option<String>,
39
40    /// Identity of the last updater (optional).
41    #[serde(default, skip_serializing_if = "Option::is_none")]
42    pub updated_by: Option<String>,
43
44    /// Change identifier for integrity validation.
45    #[serde(default, skip_serializing_if = "Option::is_none")]
46    pub change_id: Option<String>,
47
48    /// Module identifier for integrity validation.
49    #[serde(default, skip_serializing_if = "Option::is_none")]
50    pub module_id: Option<String>,
51
52    /// Integrity metadata for corruption detection.
53    #[serde(default, skip_serializing_if = "Option::is_none")]
54    pub integrity: Option<IntegrityMetadata>,
55
56    /// Additional fields not captured by the typed struct.
57    #[serde(flatten, default)]
58    pub extra: BTreeMap<String, serde_yaml::Value>,
59}
60
61impl FrontMatter {
62    /// Parse `created_at` into a `DateTime<Utc>`, if present and valid.
63    pub fn created_at_dt(&self) -> Option<DateTime<Utc>> {
64        self.created_at
65            .as_deref()
66            .and_then(|s| DateTime::parse_from_rfc3339(s).ok())
67            .map(|dt| dt.with_timezone(&Utc))
68    }
69
70    /// Parse `updated_at` into a `DateTime<Utc>`, if present and valid.
71    pub fn updated_at_dt(&self) -> Option<DateTime<Utc>> {
72        self.updated_at
73            .as_deref()
74            .and_then(|s| DateTime::parse_from_rfc3339(s).ok())
75            .map(|dt| dt.with_timezone(&Utc))
76    }
77}
78
79/// Integrity metadata for checksum-based corruption detection.
80#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
81pub struct IntegrityMetadata {
82    /// SHA-256 hex digest of the markdown body (content after front matter).
83    #[serde(default, skip_serializing_if = "Option::is_none")]
84    pub body_sha256: Option<String>,
85}
86
87/// Result of parsing front matter from a markdown document.
88#[derive(Debug, Clone, PartialEq)]
89pub struct ParsedDocument {
90    /// Parsed front matter, if present.
91    pub front_matter: Option<FrontMatter>,
92    /// Markdown body (everything after the closing `---`).
93    pub body: String,
94}
95
96/// Front matter delimiter.
97const DELIMITER: &str = "---";
98
99/// Parse a markdown document that may start with YAML front matter.
100///
101/// Front matter is delimited by `---` on a line by itself at the start
102/// of the document. The opening `---` must be the very first line.
103/// The closing `---` terminates the YAML block.
104///
105/// Returns the parsed metadata (if present) and the remaining markdown body.
106pub fn parse(content: &str) -> Result<ParsedDocument, CoreError> {
107    let Some(rest) = content.strip_prefix(DELIMITER) else {
108        return Ok(ParsedDocument {
109            front_matter: None,
110            body: content.to_string(),
111        });
112    };
113
114    // The delimiter must be followed by a newline (or be the entire first line)
115    let Some(rest) = rest
116        .strip_prefix('\n')
117        .or_else(|| rest.strip_prefix("\r\n"))
118    else {
119        // The line has content after `---`, so this is not front matter
120        return Ok(ParsedDocument {
121            front_matter: None,
122            body: content.to_string(),
123        });
124    };
125
126    // Find the closing delimiter
127    let Some(end_pos) = find_closing_delimiter(rest) else {
128        // No closing delimiter found — treat entire content as body
129        return Ok(ParsedDocument {
130            front_matter: None,
131            body: content.to_string(),
132        });
133    };
134
135    let yaml_block = &rest[..end_pos];
136    let body_start = end_pos + DELIMITER.len();
137    let remaining = &rest[body_start..];
138
139    // Strip exactly one leading newline from the body
140    let body = remaining
141        .strip_prefix('\n')
142        .or_else(|| remaining.strip_prefix("\r\n"))
143        .unwrap_or(remaining);
144
145    let front_matter: FrontMatter = serde_yaml::from_str(yaml_block)
146        .map_err(|e| CoreError::Parse(format!("invalid YAML front matter: {e}")))?;
147
148    Ok(ParsedDocument {
149        front_matter: Some(front_matter),
150        body: body.to_string(),
151    })
152}
153
154/// Find the position of the closing `---` delimiter in the remaining text.
155///
156/// The closing delimiter must appear on a line by itself (possibly with
157/// trailing whitespace).
158fn find_closing_delimiter(text: &str) -> Option<usize> {
159    let mut pos = 0;
160    for line in text.lines() {
161        if line.trim() == DELIMITER {
162            return Some(pos);
163        }
164        // Advance past this line plus its newline
165        pos += line.len();
166        // Account for the newline character(s)
167        if text[pos..].starts_with("\r\n") {
168            pos += 2;
169        } else if text[pos..].starts_with('\n') {
170            pos += 1;
171        }
172    }
173    None
174}
175
176/// Serialize front matter and body back into a markdown document with
177/// YAML front matter.
178///
179/// If `front_matter` is `None`, the body is returned as-is.
180pub fn write(front_matter: Option<&FrontMatter>, body: &str) -> Result<String, CoreError> {
181    let Some(fm) = front_matter else {
182        return Ok(body.to_string());
183    };
184
185    let yaml = serde_yaml::to_string(fm)
186        .map_err(|e| CoreError::Parse(format!("failed to serialize front matter: {e}")))?;
187
188    // serde_yaml adds a trailing newline; remove it so we control formatting
189    let yaml = yaml.trim_end();
190
191    Ok(format!("{DELIMITER}\n{yaml}\n{DELIMITER}\n{body}"))
192}
193
194/// Format a `DateTime<Utc>` as an RFC 3339 string for front matter.
195fn format_timestamp(dt: DateTime<Utc>) -> String {
196    dt.to_rfc3339_opts(chrono::SecondsFormat::Secs, true)
197}
198
199/// Update the `updated_at` timestamp in front matter to the current time.
200///
201/// If `front_matter` is `None`, creates a new `FrontMatter` with only
202/// `created_at` and `updated_at` set to `now`.
203pub fn touch(front_matter: Option<FrontMatter>, now: DateTime<Utc>) -> FrontMatter {
204    let ts = format_timestamp(now);
205    match front_matter {
206        Some(mut fm) => {
207            fm.updated_at = Some(ts);
208            fm
209        }
210        None => FrontMatter {
211            schema_version: Some("1".to_string()),
212            created_at: Some(ts.clone()),
213            updated_at: Some(ts),
214            created_by: None,
215            updated_by: None,
216            change_id: None,
217            module_id: None,
218            integrity: None,
219            extra: BTreeMap::new(),
220        },
221    }
222}
223
224/// Compute the SHA-256 hex digest of a body string.
225pub fn body_sha256(body: &str) -> String {
226    let mut hasher = Sha256::new();
227    hasher.update(body.as_bytes());
228    hex::encode(hasher.finalize())
229}
230
231/// Update the integrity checksum in front matter to match the given body.
232pub fn update_integrity(front_matter: &mut FrontMatter, body: &str) {
233    let checksum = body_sha256(body);
234    match &mut front_matter.integrity {
235        Some(integrity) => {
236            integrity.body_sha256 = Some(checksum);
237        }
238        None => {
239            front_matter.integrity = Some(IntegrityMetadata {
240                body_sha256: Some(checksum),
241            });
242        }
243    }
244}
245
246/// Validate that a front matter checksum matches the body content.
247///
248/// Returns `Ok(())` if there is no checksum or the checksum matches.
249/// Returns `Err` if the checksum is present but does not match.
250pub fn validate_integrity(front_matter: &FrontMatter, body: &str) -> Result<(), CoreError> {
251    let Some(integrity) = &front_matter.integrity else {
252        return Ok(());
253    };
254
255    let Some(expected) = &integrity.body_sha256 else {
256        return Ok(());
257    };
258
259    let actual = body_sha256(body);
260    if *expected != actual {
261        return Err(CoreError::Validation(format!(
262            "artifact body checksum mismatch: expected {expected}, got {actual}"
263        )));
264    }
265
266    Ok(())
267}
268
269/// Validate that a front matter identifier matches the expected value.
270///
271/// Returns `Ok(())` if the front matter field is `None` (absent).
272/// Returns `Err` if the field is present and does not match.
273pub fn validate_id(
274    field_name: &str,
275    front_matter_value: Option<&str>,
276    expected: &str,
277) -> Result<(), CoreError> {
278    let Some(actual) = front_matter_value else {
279        return Ok(());
280    };
281
282    if actual != expected {
283        return Err(CoreError::Validation(format!(
284            "{field_name} mismatch in front matter: expected '{expected}', found '{actual}'"
285        )));
286    }
287
288    Ok(())
289}
290
291#[cfg(test)]
292mod tests {
293    use super::*;
294    use chrono::{TimeZone, Timelike};
295
296    #[test]
297    fn parse_no_front_matter() {
298        let content = "# Hello\n\nSome content.";
299        let result = parse(content).unwrap();
300        assert!(result.front_matter.is_none());
301        assert_eq!(result.body, content);
302    }
303
304    #[test]
305    fn parse_valid_front_matter() {
306        let content =
307            "---\nschema_version: \"1\"\ncreated_at: \"2026-01-15T10:00:00Z\"\n---\n# Hello\n";
308        let result = parse(content).unwrap();
309        let fm = result.front_matter.unwrap();
310        assert_eq!(fm.schema_version.as_deref(), Some("1"));
311        assert_eq!(fm.created_at.as_deref(), Some("2026-01-15T10:00:00Z"));
312        let dt = fm.created_at_dt().unwrap();
313        assert_eq!(dt, Utc.with_ymd_and_hms(2026, 1, 15, 10, 0, 0).unwrap());
314        assert_eq!(result.body, "# Hello\n");
315    }
316
317    #[test]
318    fn parse_empty_front_matter() {
319        let content = "---\n---\n# Body";
320        let result = parse(content).unwrap();
321        let fm = result.front_matter.unwrap();
322        assert!(fm.schema_version.is_none());
323        assert_eq!(result.body, "# Body");
324    }
325
326    #[test]
327    fn parse_no_closing_delimiter() {
328        let content = "---\nschema_version: 1\n# Not closed";
329        let result = parse(content).unwrap();
330        // No closing delimiter → treat as regular content
331        assert!(result.front_matter.is_none());
332        assert_eq!(result.body, content);
333    }
334
335    #[test]
336    fn parse_delimiter_with_extra_text_on_first_line() {
337        let content = "--- extra stuff\nschema_version: 1\n---\nbody";
338        let result = parse(content).unwrap();
339        // Not valid front matter start
340        assert!(result.front_matter.is_none());
341        assert_eq!(result.body, content);
342    }
343
344    #[test]
345    fn parse_invalid_yaml() {
346        let content = "---\n: : invalid:\n---\nbody";
347        let result = parse(content);
348        assert!(result.is_err());
349    }
350
351    #[test]
352    fn parse_with_integrity() {
353        let body = "# Content\n";
354        let checksum = body_sha256(body);
355        let content = format!("---\nintegrity:\n  body_sha256: {checksum}\n---\n{body}");
356        let result = parse(&content).unwrap();
357        let fm = result.front_matter.unwrap();
358        assert_eq!(
359            fm.integrity.as_ref().unwrap().body_sha256.as_deref(),
360            Some(checksum.as_str())
361        );
362        assert_eq!(result.body, body);
363    }
364
365    #[test]
366    fn roundtrip_write_parse() {
367        let now = Utc.with_ymd_and_hms(2026, 3, 1, 12, 0, 0).unwrap();
368        let fm = touch(None, now);
369
370        let body = "# My proposal\n\nSome text.\n";
371        let doc = write(Some(&fm), body).unwrap();
372        let parsed = parse(&doc).unwrap();
373
374        let parsed_fm = parsed.front_matter.as_ref().unwrap();
375        assert_eq!(parsed_fm.created_at_dt(), Some(now));
376        assert_eq!(parsed.body, body);
377    }
378
379    #[test]
380    fn write_no_front_matter_returns_body() {
381        let body = "# Just body\n";
382        let result = write(None, body).unwrap();
383        assert_eq!(result, body);
384    }
385
386    #[test]
387    fn touch_creates_new_front_matter() {
388        let now = Utc::now();
389        let fm = touch(None, now);
390        assert!(fm.created_at.is_some());
391        assert!(fm.updated_at.is_some());
392        assert_eq!(fm.created_at, fm.updated_at);
393        assert_eq!(fm.schema_version.as_deref(), Some("1"));
394        // Verify roundtrip through DateTime
395        assert_eq!(fm.created_at_dt(), Some(now.with_nanosecond(0).unwrap()));
396    }
397
398    #[test]
399    fn touch_updates_existing() {
400        let t1 = Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap();
401        let t2 = Utc.with_ymd_and_hms(2026, 3, 1, 0, 0, 0).unwrap();
402        let fm = touch(None, t1);
403        let updated = touch(Some(fm), t2);
404        // created_at should be unchanged
405        assert_eq!(updated.created_at_dt(), Some(t1));
406        // updated_at should be the new time
407        assert_eq!(updated.updated_at_dt(), Some(t2));
408    }
409
410    #[test]
411    fn body_sha256_is_deterministic() {
412        let body = "# Hello world\n";
413        let h1 = body_sha256(body);
414        let h2 = body_sha256(body);
415        assert_eq!(h1, h2);
416        assert_eq!(h1.len(), 64);
417    }
418
419    #[test]
420    fn update_integrity_sets_checksum() {
421        let mut fm = touch(None, Utc::now());
422        let body = "Some content\n";
423        update_integrity(&mut fm, body);
424        let expected = body_sha256(body);
425        assert_eq!(
426            fm.integrity.as_ref().unwrap().body_sha256.as_deref(),
427            Some(expected.as_str())
428        );
429    }
430
431    #[test]
432    fn validate_integrity_passes_when_matching() {
433        let body = "# Good content\n";
434        let mut fm = touch(None, Utc::now());
435        update_integrity(&mut fm, body);
436        assert!(validate_integrity(&fm, body).is_ok());
437    }
438
439    #[test]
440    fn validate_integrity_fails_on_mismatch() {
441        let body = "# Good content\n";
442        let mut fm = touch(None, Utc::now());
443        update_integrity(&mut fm, body);
444        let result = validate_integrity(&fm, "# Tampered content\n");
445        assert!(result.is_err());
446        let msg = result.unwrap_err().to_string();
447        assert!(msg.contains("checksum mismatch"));
448    }
449
450    #[test]
451    fn validate_integrity_passes_when_no_checksum() {
452        let fm = touch(None, Utc::now());
453        assert!(validate_integrity(&fm, "anything").is_ok());
454    }
455
456    #[test]
457    fn validate_id_passes_when_absent() {
458        assert!(validate_id("change_id", None, "024-10").is_ok());
459    }
460
461    #[test]
462    fn validate_id_passes_when_matching() {
463        assert!(validate_id("change_id", Some("024-10"), "024-10").is_ok());
464    }
465
466    #[test]
467    fn validate_id_fails_on_mismatch() {
468        let result = validate_id("change_id", Some("999-99_bad"), "024-10");
469        assert!(result.is_err());
470        let msg = result.unwrap_err().to_string();
471        assert!(msg.contains("change_id"));
472        assert!(msg.contains("mismatch"));
473    }
474
475    #[test]
476    fn parse_preserves_extra_fields() {
477        let content = "---\ncustom_field: hello\n---\nbody";
478        let result = parse(content).unwrap();
479        let fm = result.front_matter.unwrap();
480        assert_eq!(
481            fm.extra.get("custom_field"),
482            Some(&serde_yaml::Value::String("hello".to_string()))
483        );
484    }
485
486    #[test]
487    fn format_timestamp_produces_rfc3339() {
488        let dt = Utc.with_ymd_and_hms(2026, 3, 1, 12, 30, 45).unwrap();
489        let ts = format_timestamp(dt);
490        assert_eq!(ts, "2026-03-01T12:30:45Z");
491    }
492
493    #[test]
494    fn created_at_dt_returns_none_when_absent() {
495        let fm = FrontMatter {
496            schema_version: None,
497            created_at: None,
498            updated_at: None,
499            created_by: None,
500            updated_by: None,
501            change_id: None,
502            module_id: None,
503            integrity: None,
504            extra: BTreeMap::new(),
505        };
506        assert!(fm.created_at_dt().is_none());
507    }
508
509    #[test]
510    fn created_at_dt_returns_none_for_invalid_timestamp() {
511        let fm = FrontMatter {
512            schema_version: None,
513            created_at: Some("not-a-date".to_string()),
514            updated_at: None,
515            created_by: None,
516            updated_by: None,
517            change_id: None,
518            module_id: None,
519            integrity: None,
520            extra: BTreeMap::new(),
521        };
522        assert!(fm.created_at_dt().is_none());
523    }
524}