Skip to main content

dm_meta/
lib.rs

1use std::path::{Path, PathBuf};
2
3use chrono::NaiveDate;
4use serde::{Deserialize, Serialize};
5
6// ---------------------------------------------------------------------------
7// Error
8// ---------------------------------------------------------------------------
9
10/// Errors that can occur during frontmatter parsing and document reading.
11#[derive(Debug, thiserror::Error)]
12pub enum MetaError {
13    #[error("YAML parse error: {0}")]
14    Yaml(#[from] serde_yaml::Error),
15    #[error("IO error: {0}")]
16    Io(#[from] std::io::Error),
17    #[error("missing frontmatter in {path}")]
18    MissingFrontmatter { path: String },
19}
20
21// ---------------------------------------------------------------------------
22// Raw frontmatter
23// ---------------------------------------------------------------------------
24
25/// Raw frontmatter deserialized from YAML. All fields optional to handle
26/// any document category (active, design, research, archive).
27#[derive(Debug, Clone, Default, Serialize, Deserialize)]
28#[serde(default)]
29pub struct RawFrontmatter {
30    #[serde(skip_serializing_if = "Option::is_none")]
31    pub title: Option<String>,
32    #[serde(skip_serializing_if = "Option::is_none")]
33    pub version: Option<f64>,
34    #[serde(skip_serializing_if = "Option::is_none")]
35    pub status: Option<String>,
36    #[serde(skip_serializing_if = "Option::is_none")]
37    pub created: Option<NaiveDate>,
38    #[serde(skip_serializing_if = "Option::is_none")]
39    pub last_updated: Option<NaiveDate>,
40    #[serde(skip_serializing_if = "Option::is_none")]
41    pub author: Option<String>,
42    #[serde(skip_serializing_if = "Option::is_none")]
43    pub owner: Option<String>,
44    #[serde(skip_serializing_if = "Option::is_none")]
45    pub reviewers: Option<Vec<String>>,
46    #[serde(skip_serializing_if = "Option::is_none")]
47    pub next_review: Option<NaiveDate>,
48    #[serde(skip_serializing_if = "Option::is_none")]
49    pub tags: Option<Vec<String>>,
50    #[serde(skip_serializing_if = "Option::is_none")]
51    pub related_docs: Option<Vec<String>>,
52    #[serde(skip_serializing_if = "Option::is_none")]
53    pub supersedes: Option<String>,
54    #[serde(skip_serializing_if = "Option::is_none")]
55    pub superseded_by: Option<String>,
56    // Design doc specific
57    #[serde(skip_serializing_if = "Option::is_none")]
58    pub doc_id: Option<u32>,
59    #[serde(skip_serializing_if = "Option::is_none")]
60    pub decision_date: Option<NaiveDate>,
61    #[serde(skip_serializing_if = "Option::is_none")]
62    pub implementation_pr: Option<u32>,
63    #[serde(skip_serializing_if = "Option::is_none")]
64    pub related_issues: Option<Vec<u32>>,
65    // Research specific
66    #[serde(rename = "type", skip_serializing_if = "Option::is_none")]
67    pub doc_type: Option<String>,
68    #[serde(skip_serializing_if = "Option::is_none")]
69    pub may_become_design_doc: Option<bool>,
70    // Archive specific
71    #[serde(skip_serializing_if = "Option::is_none")]
72    pub archived_date: Option<NaiveDate>,
73    #[serde(skip_serializing_if = "Option::is_none")]
74    pub archived_reason: Option<String>,
75    #[serde(skip_serializing_if = "Option::is_none")]
76    pub historical_value: Option<String>,
77}
78
79// ---------------------------------------------------------------------------
80// Category
81// ---------------------------------------------------------------------------
82
83/// Document category inferred from its file path.
84#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
85#[serde(rename_all = "lowercase")]
86pub enum Category {
87    Active,
88    Design,
89    Research,
90    Archive,
91}
92
93impl std::fmt::Display for Category {
94    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
95        match self {
96            Category::Active => write!(f, "active"),
97            Category::Design => write!(f, "design"),
98            Category::Research => write!(f, "research"),
99            Category::Archive => write!(f, "archive"),
100        }
101    }
102}
103
104// ---------------------------------------------------------------------------
105// Status enums
106// ---------------------------------------------------------------------------
107
108/// Status for active/living documents.
109#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
110#[serde(rename_all = "lowercase")]
111pub enum DocStatus {
112    Active,
113    Deprecated,
114    Draft,
115}
116
117/// Status for design documents.
118#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
119#[serde(rename_all = "lowercase")]
120pub enum DesignStatus {
121    Proposed,
122    Accepted,
123    Implemented,
124    Rejected,
125}
126
127/// Status for research documents.
128#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
129#[serde(rename_all = "lowercase")]
130pub enum ResearchStatus {
131    Draft,
132    Published,
133    Obsolete,
134}
135
136// ---------------------------------------------------------------------------
137// Document
138// ---------------------------------------------------------------------------
139
140/// A parsed document with its path, frontmatter, inferred category, and body.
141#[derive(Debug, Clone)]
142pub struct Document {
143    pub path: PathBuf,
144    pub frontmatter: RawFrontmatter,
145    pub category: Category,
146    pub body: String,
147}
148
149// ---------------------------------------------------------------------------
150// Validation
151// ---------------------------------------------------------------------------
152
153/// Severity level for validation issues.
154#[derive(Debug, Clone, Copy, PartialEq, Eq)]
155pub enum Severity {
156    Error,
157    Warning,
158    Info,
159}
160
161/// A single validation issue found in a document's frontmatter.
162#[derive(Debug, Clone)]
163pub struct ValidationIssue {
164    pub path: PathBuf,
165    pub severity: Severity,
166    pub message: String,
167}
168
169// ---------------------------------------------------------------------------
170// Frontmatter extraction
171// ---------------------------------------------------------------------------
172
173/// Extract YAML frontmatter and body from markdown content.
174///
175/// Returns `Some((yaml_str, body_str))` if frontmatter delimiters are found,
176/// `None` otherwise.
177pub fn extract_frontmatter(content: &str) -> Option<(&str, &str)> {
178    // Must start with "---" followed by a newline.
179    let rest = content.strip_prefix("---\n")
180        .or_else(|| content.strip_prefix("---\r\n"))?;
181
182    // Find the closing "---" on its own line.
183    let close = find_closing_delimiter(rest)?;
184    let yaml = &rest[..close];
185    let after = &rest[close + 3..]; // skip "---"
186    // Skip the newline after the closing delimiter.
187    let body = after.strip_prefix('\n')
188        .or_else(|| after.strip_prefix("\r\n"))
189        .unwrap_or(after);
190    Some((yaml, body))
191}
192
193/// Find the byte offset of a closing `---` that sits on its own line.
194fn find_closing_delimiter(s: &str) -> Option<usize> {
195    let mut search_from = 0;
196    while search_from < s.len() {
197        let idx = s[search_from..].find("---")?;
198        let abs = search_from + idx;
199        // Must be at start of a line (position 0 or preceded by '\n').
200        let at_line_start = abs == 0 || s.as_bytes()[abs - 1] == b'\n';
201        // Must be followed by newline or EOF.
202        let after = abs + 3;
203        let at_line_end = after >= s.len()
204            || s.as_bytes()[after] == b'\n'
205            || s.as_bytes()[after] == b'\r';
206        if at_line_start && at_line_end {
207            return Some(abs);
208        }
209        search_from = abs + 3;
210    }
211    None
212}
213
214// ---------------------------------------------------------------------------
215// Parsing
216// ---------------------------------------------------------------------------
217
218/// Parse a YAML string into `RawFrontmatter`.
219pub fn parse_frontmatter(yaml_str: &str) -> Result<RawFrontmatter, MetaError> {
220    let fm: RawFrontmatter = serde_yaml::from_str(yaml_str)?;
221    Ok(fm)
222}
223
224/// Infer document category from its file path.
225pub fn infer_category(path: &Path) -> Category {
226    let s = path.to_string_lossy();
227    // Normalise backslashes for Windows compatibility.
228    let norm = s.replace('\\', "/");
229    if norm.contains("/active/") || norm.starts_with("active/") {
230        Category::Active
231    } else if norm.contains("/design/") || norm.starts_with("design/") {
232        Category::Design
233    } else if norm.contains("/research/") || norm.starts_with("research/") {
234        Category::Research
235    } else if norm.contains("/archive/") || norm.starts_with("archive/") {
236        Category::Archive
237    } else {
238        Category::Active
239    }
240}
241
242/// Return a normalised status string for the document given its category.
243pub fn resolve_status(raw: &RawFrontmatter, category: Category) -> String {
244    let status = raw.status.as_deref().unwrap_or("").to_lowercase();
245    match category {
246        Category::Active => {
247            match status.as_str() {
248                "active" | "deprecated" | "draft" => status,
249                _ => "active".to_string(),
250            }
251        }
252        Category::Design => {
253            match status.as_str() {
254                "proposed" | "accepted" | "implemented" | "rejected" => status,
255                _ => "proposed".to_string(),
256            }
257        }
258        Category::Research => {
259            match status.as_str() {
260                "draft" | "published" | "obsolete" => status,
261                _ => "draft".to_string(),
262            }
263        }
264        Category::Archive => "archived".to_string(),
265    }
266}
267
268/// Read a file, parse its frontmatter, and return a `Document`.
269pub fn parse_document(path: &Path) -> Result<Document, MetaError> {
270    let content = std::fs::read_to_string(path)?;
271    let category = infer_category(path);
272
273    let (frontmatter, body) = match extract_frontmatter(&content) {
274        Some((yaml, body)) => (parse_frontmatter(yaml)?, body.to_string()),
275        None => (RawFrontmatter::default(), content),
276    };
277
278    Ok(Document {
279        path: path.to_path_buf(),
280        frontmatter,
281        category,
282        body,
283    })
284}
285
286// ---------------------------------------------------------------------------
287// Validation
288// ---------------------------------------------------------------------------
289
290const VALID_ACTIVE_STATUSES: &[&str] = &["active", "deprecated", "draft"];
291const VALID_DESIGN_STATUSES: &[&str] = &["proposed", "accepted", "implemented", "rejected"];
292const VALID_RESEARCH_STATUSES: &[&str] = &["draft", "published", "obsolete"];
293
294/// Validate a document's frontmatter and return any issues found.
295pub fn validate_frontmatter(doc: &Document) -> Vec<ValidationIssue> {
296    let mut issues = Vec::new();
297    let p = &doc.path;
298    let fm = &doc.frontmatter;
299
300    // No frontmatter at all (title is the simplest sentinel — a truly empty
301    // RawFrontmatter has every field as None).
302    let all_none = fm.title.is_none()
303        && fm.author.is_none()
304        && fm.status.is_none()
305        && fm.created.is_none()
306        && fm.tags.is_none();
307    if all_none && !doc.body.is_empty() {
308        issues.push(ValidationIssue {
309            path: p.clone(),
310            severity: Severity::Error,
311            message: "no frontmatter found".into(),
312        });
313        return issues;
314    }
315
316    // Missing title
317    if fm.title.is_none() {
318        issues.push(ValidationIssue {
319            path: p.clone(),
320            severity: Severity::Error,
321            message: "missing title".into(),
322        });
323    }
324
325    // Missing author
326    if fm.author.is_none() {
327        issues.push(ValidationIssue {
328            path: p.clone(),
329            severity: Severity::Warning,
330            message: "missing author".into(),
331        });
332    }
333
334    // Missing created date
335    if fm.created.is_none() {
336        issues.push(ValidationIssue {
337            path: p.clone(),
338            severity: Severity::Warning,
339            message: "missing created date".into(),
340        });
341    }
342
343    // Design docs must have doc_id
344    if doc.category == Category::Design && fm.doc_id.is_none() {
345        issues.push(ValidationIssue {
346            path: p.clone(),
347            severity: Severity::Error,
348            message: "design doc missing doc_id".into(),
349        });
350    }
351
352    // Active docs should have next_review
353    if doc.category == Category::Active && fm.next_review.is_none() {
354        issues.push(ValidationIssue {
355            path: p.clone(),
356            severity: Severity::Warning,
357            message: "active doc missing next_review".into(),
358        });
359    }
360
361    // Invalid status for category
362    if let Some(ref status) = fm.status {
363        let s = status.to_lowercase();
364        let valid = match doc.category {
365            Category::Active => VALID_ACTIVE_STATUSES.contains(&s.as_str()),
366            Category::Design => VALID_DESIGN_STATUSES.contains(&s.as_str()),
367            Category::Research => VALID_RESEARCH_STATUSES.contains(&s.as_str()),
368            Category::Archive => true, // any status is fine for archived docs
369        };
370        if !valid {
371            issues.push(ValidationIssue {
372                path: p.clone(),
373                severity: Severity::Error,
374                message: format!("invalid status '{}' for {} category", s, doc.category),
375            });
376        }
377    }
378
379    issues
380}
381
382// ---------------------------------------------------------------------------
383// Tests
384// ---------------------------------------------------------------------------
385
386#[cfg(test)]
387mod tests {
388    use super::*;
389    use std::io::Write;
390
391    #[test]
392    fn extract_frontmatter_returns_yaml_and_body() {
393        let content = "---\ntitle: Hello\n---\n\n# Body\n";
394        let (yaml, body) = extract_frontmatter(content).unwrap();
395        assert_eq!(yaml, "title: Hello\n");
396        assert_eq!(body, "\n# Body\n");
397    }
398
399    #[test]
400    fn extract_frontmatter_returns_none_without_delimiters() {
401        let content = "# No frontmatter\nJust text.\n";
402        assert!(extract_frontmatter(content).is_none());
403    }
404
405    #[test]
406    fn extract_frontmatter_handles_crlf() {
407        let content = "---\r\ntitle: Hi\r\n---\r\nBody\r\n";
408        let (yaml, body) = extract_frontmatter(content).unwrap();
409        assert_eq!(yaml, "title: Hi\r\n");
410        assert_eq!(body, "Body\r\n");
411    }
412
413    #[test]
414    fn parse_frontmatter_deserializes_all_fields() {
415        let yaml = r#"
416title: "Test"
417version: 1.5
418status: active
419created: 2025-06-01
420last_updated: 2026-01-15
421author: alice
422owner: alice
423reviewers: [bob, charlie]
424next_review: 2026-04-15
425tags: [arch, core]
426related_docs:
427  - some/path.md
428doc_id: 42
429decision_date: 2026-01-25
430implementation_pr: 100
431related_issues: [1, 2]
432type: research
433may_become_design_doc: true
434archived_date: 2026-01-01
435archived_reason: "old"
436historical_value: high
437supersedes: old.md
438superseded_by: new.md
439"#;
440        let fm = parse_frontmatter(yaml).unwrap();
441        assert_eq!(fm.title.as_deref(), Some("Test"));
442        assert_eq!(fm.version, Some(1.5));
443        assert_eq!(fm.doc_id, Some(42));
444        assert_eq!(fm.implementation_pr, Some(100));
445        assert_eq!(fm.reviewers.as_ref().unwrap().len(), 2);
446        assert_eq!(fm.doc_type.as_deref(), Some("research"));
447        assert_eq!(fm.may_become_design_doc, Some(true));
448        assert_eq!(fm.historical_value.as_deref(), Some("high"));
449        assert_eq!(fm.supersedes.as_deref(), Some("old.md"));
450        assert_eq!(fm.superseded_by.as_deref(), Some("new.md"));
451    }
452
453    #[test]
454    fn parse_frontmatter_handles_optional_fields() {
455        let yaml = "title: Minimal\n";
456        let fm = parse_frontmatter(yaml).unwrap();
457        assert_eq!(fm.title.as_deref(), Some("Minimal"));
458        assert!(fm.version.is_none());
459        assert!(fm.doc_id.is_none());
460        assert!(fm.tags.is_none());
461    }
462
463    #[test]
464    fn infer_category_active() {
465        assert_eq!(infer_category(Path::new("docs/active/architecture/FOO.md")), Category::Active);
466        assert_eq!(infer_category(Path::new("active/FOO.md")), Category::Active);
467    }
468
469    #[test]
470    fn infer_category_design() {
471        assert_eq!(infer_category(Path::new("docs/design/2026/proposed/001.md")), Category::Design);
472        assert_eq!(infer_category(Path::new("design/001.md")), Category::Design);
473    }
474
475    #[test]
476    fn infer_category_research() {
477        assert_eq!(infer_category(Path::new("docs/research/2026/survey.md")), Category::Research);
478        assert_eq!(infer_category(Path::new("research/survey.md")), Category::Research);
479    }
480
481    #[test]
482    fn infer_category_archive() {
483        assert_eq!(infer_category(Path::new("docs/archive/2025/old.md")), Category::Archive);
484        assert_eq!(infer_category(Path::new("archive/old.md")), Category::Archive);
485    }
486
487    #[test]
488    fn infer_category_defaults_to_active() {
489        assert_eq!(infer_category(Path::new("random/path.md")), Category::Active);
490        assert_eq!(infer_category(Path::new("README.md")), Category::Active);
491    }
492
493    #[test]
494    fn resolve_status_per_category() {
495        let mut fm = RawFrontmatter::default();
496        fm.status = Some("active".into());
497        assert_eq!(resolve_status(&fm, Category::Active), "active");
498
499        fm.status = Some("accepted".into());
500        assert_eq!(resolve_status(&fm, Category::Design), "accepted");
501
502        fm.status = Some("published".into());
503        assert_eq!(resolve_status(&fm, Category::Research), "published");
504
505        fm.status = Some("anything".into());
506        assert_eq!(resolve_status(&fm, Category::Archive), "archived");
507    }
508
509    #[test]
510    fn resolve_status_defaults() {
511        let fm = RawFrontmatter::default();
512        assert_eq!(resolve_status(&fm, Category::Active), "active");
513        assert_eq!(resolve_status(&fm, Category::Design), "proposed");
514        assert_eq!(resolve_status(&fm, Category::Research), "draft");
515        assert_eq!(resolve_status(&fm, Category::Archive), "archived");
516    }
517
518    #[test]
519    fn parse_document_reads_fixture() {
520        let path = Path::new("tests/fixtures/docs/active/architecture/CORE_CONCEPTS.md");
521        // Resolve relative to workspace root.
522        let abs = std::env::current_dir().unwrap().join(path);
523        // Only run if fixture exists (CI-friendly).
524        if !abs.exists() {
525            return;
526        }
527        let doc = parse_document(&abs).unwrap();
528        assert_eq!(doc.category, Category::Active);
529        assert_eq!(doc.frontmatter.title.as_deref(), Some("Core Concepts"));
530        assert!(doc.body.contains("# Core Concepts"));
531    }
532
533    #[test]
534    fn parse_document_handles_no_frontmatter() {
535        let dir = tempfile::tempdir().unwrap();
536        let file = dir.path().join("random").join("bare.md");
537        std::fs::create_dir_all(file.parent().unwrap()).unwrap();
538        {
539            let mut f = std::fs::File::create(&file).unwrap();
540            write!(f, "# Just a heading\nSome text.\n").unwrap();
541        }
542        let doc = parse_document(&file).unwrap();
543        assert!(doc.frontmatter.title.is_none());
544        assert!(doc.body.contains("# Just a heading"));
545    }
546
547    #[test]
548    fn validate_detects_missing_title() {
549        let doc = Document {
550            path: PathBuf::from("docs/active/x.md"),
551            frontmatter: RawFrontmatter {
552                author: Some("a".into()),
553                status: Some("active".into()),
554                created: Some(NaiveDate::from_ymd_opt(2025, 1, 1).unwrap()),
555                ..Default::default()
556            },
557            category: Category::Active,
558            body: "text".into(),
559        };
560        let issues = validate_frontmatter(&doc);
561        assert!(issues.iter().any(|i| i.severity == Severity::Error && i.message.contains("title")));
562    }
563
564    #[test]
565    fn validate_detects_design_missing_doc_id() {
566        let doc = Document {
567            path: PathBuf::from("docs/design/x.md"),
568            frontmatter: RawFrontmatter {
569                title: Some("D".into()),
570                author: Some("a".into()),
571                status: Some("proposed".into()),
572                created: Some(NaiveDate::from_ymd_opt(2026, 1, 1).unwrap()),
573                ..Default::default()
574            },
575            category: Category::Design,
576            body: "text".into(),
577        };
578        let issues = validate_frontmatter(&doc);
579        assert!(issues.iter().any(|i| i.severity == Severity::Error && i.message.contains("doc_id")));
580    }
581
582    #[test]
583    fn validate_detects_active_missing_next_review() {
584        let doc = Document {
585            path: PathBuf::from("docs/active/x.md"),
586            frontmatter: RawFrontmatter {
587                title: Some("A".into()),
588                author: Some("a".into()),
589                status: Some("active".into()),
590                created: Some(NaiveDate::from_ymd_opt(2025, 1, 1).unwrap()),
591                ..Default::default()
592            },
593            category: Category::Active,
594            body: "text".into(),
595        };
596        let issues = validate_frontmatter(&doc);
597        assert!(issues.iter().any(|i| i.severity == Severity::Warning && i.message.contains("next_review")));
598    }
599
600    #[test]
601    fn validate_detects_invalid_status() {
602        let doc = Document {
603            path: PathBuf::from("docs/active/x.md"),
604            frontmatter: RawFrontmatter {
605                title: Some("T".into()),
606                author: Some("a".into()),
607                status: Some("bogus".into()),
608                created: Some(NaiveDate::from_ymd_opt(2025, 1, 1).unwrap()),
609                next_review: Some(NaiveDate::from_ymd_opt(2026, 6, 1).unwrap()),
610                ..Default::default()
611            },
612            category: Category::Active,
613            body: "text".into(),
614        };
615        let issues = validate_frontmatter(&doc);
616        assert!(issues.iter().any(|i| i.severity == Severity::Error && i.message.contains("invalid status")));
617    }
618
619    #[test]
620    fn validate_no_frontmatter_error() {
621        let doc = Document {
622            path: PathBuf::from("docs/bare.md"),
623            frontmatter: RawFrontmatter::default(),
624            category: Category::Active,
625            body: "# Heading\nSome text".into(),
626        };
627        let issues = validate_frontmatter(&doc);
628        assert!(issues.iter().any(|i| i.severity == Severity::Error && i.message.contains("no frontmatter")));
629    }
630
631    #[test]
632    fn roundtrip_serialization() {
633        let fm = RawFrontmatter {
634            title: Some("Round Trip".into()),
635            version: Some(1.0),
636            status: Some("active".into()),
637            tags: Some(vec!["a".into(), "b".into()]),
638            ..Default::default()
639        };
640        let yaml = serde_yaml::to_string(&fm).unwrap();
641        let parsed: RawFrontmatter = serde_yaml::from_str(&yaml).unwrap();
642        assert_eq!(parsed.title, fm.title);
643        assert_eq!(parsed.version, fm.version);
644        assert_eq!(parsed.tags, fm.tags);
645    }
646}