sara_core/parser/
markdown.rs

1//! Markdown file parsing and document extraction.
2
3use std::path::Path;
4
5use serde::Deserialize;
6
7use crate::error::ParseError;
8use crate::model::{
9    DownstreamRefs, Item, ItemAttributes, ItemBuilder, ItemId, ItemType, SourceLocation,
10    UpstreamRefs,
11};
12use crate::parser::frontmatter::extract_frontmatter;
13
14/// Raw frontmatter structure for deserialization.
15///
16/// This represents the YAML frontmatter as it appears in Markdown files.
17/// All relationship fields accept both single values and arrays for flexibility.
18#[derive(Debug, Clone, Deserialize)]
19pub struct RawFrontmatter {
20    /// Unique identifier (required).
21    pub id: String,
22
23    /// Item type (required).
24    #[serde(rename = "type")]
25    pub item_type: ItemType,
26
27    /// Human-readable name (required).
28    pub name: String,
29
30    /// Optional description.
31    #[serde(default)]
32    pub description: Option<String>,
33
34    // Upstream references (toward Solution)
35    /// Items this item refines (for UseCase, Scenario).
36    #[serde(default)]
37    pub refines: Vec<String>,
38
39    /// Items this item derives from (for SystemRequirement, HW/SW Requirement).
40    #[serde(default)]
41    pub derives_from: Vec<String>,
42
43    /// Items this item satisfies (for SystemArchitecture, HW/SW DetailedDesign).
44    #[serde(default)]
45    pub satisfies: Vec<String>,
46
47    // Downstream references (toward Detailed Designs)
48    /// Items that refine this item (for Solution, UseCase).
49    #[serde(default)]
50    pub is_refined_by: Vec<String>,
51
52    /// Items derived from this item (for Scenario, SystemArchitecture).
53    #[serde(default)]
54    pub derives: Vec<String>,
55
56    /// Items that satisfy this item (for SystemRequirement, HW/SW Requirement).
57    #[serde(default)]
58    pub is_satisfied_by: Vec<String>,
59
60    // Type-specific attributes
61    /// Specification statement (required for requirement types).
62    #[serde(default)]
63    pub specification: Option<String>,
64
65    /// Peer dependencies (for requirement types).
66    #[serde(default)]
67    pub depends_on: Vec<String>,
68
69    /// Target platform (for SystemArchitecture).
70    #[serde(default)]
71    pub platform: Option<String>,
72
73    /// ADR links (reserved for future use).
74    #[serde(default)]
75    pub justified_by: Option<Vec<String>>,
76}
77
78impl RawFrontmatter {
79    /// Converts string IDs to ItemIds for upstream refs.
80    pub fn upstream_refs(&self) -> Result<UpstreamRefs, ParseError> {
81        Ok(UpstreamRefs {
82            refines: self.refines.iter().map(ItemId::new_unchecked).collect(),
83            derives_from: self
84                .derives_from
85                .iter()
86                .map(ItemId::new_unchecked)
87                .collect(),
88            satisfies: self.satisfies.iter().map(ItemId::new_unchecked).collect(),
89        })
90    }
91
92    /// Converts string IDs to ItemIds for downstream refs.
93    pub fn downstream_refs(&self) -> Result<DownstreamRefs, ParseError> {
94        Ok(DownstreamRefs {
95            is_refined_by: self
96                .is_refined_by
97                .iter()
98                .map(ItemId::new_unchecked)
99                .collect(),
100            derives: self.derives.iter().map(ItemId::new_unchecked).collect(),
101            is_satisfied_by: self
102                .is_satisfied_by
103                .iter()
104                .map(ItemId::new_unchecked)
105                .collect(),
106        })
107    }
108
109    /// Converts to ItemAttributes.
110    pub fn attributes(&self) -> ItemAttributes {
111        ItemAttributes {
112            specification: self.specification.clone(),
113            depends_on: self.depends_on.iter().map(ItemId::new_unchecked).collect(),
114            platform: self.platform.clone(),
115            justified_by: self
116                .justified_by
117                .as_ref()
118                .map(|ids| ids.iter().map(ItemId::new_unchecked).collect()),
119        }
120    }
121}
122
123/// Parses a Markdown file and extracts the item.
124///
125/// # Arguments
126/// * `content` - The raw file content.
127/// * `file_path` - Relative path within the repository.
128/// * `repository` - Absolute path to the repository root.
129///
130/// # Returns
131/// The parsed Item, or a ParseError if parsing fails.
132pub fn parse_markdown_file(
133    content: &str,
134    file_path: &Path,
135    repository: &Path,
136) -> Result<Item, ParseError> {
137    let extracted = extract_frontmatter(content, file_path)?;
138
139    let frontmatter: RawFrontmatter =
140        serde_yaml::from_str(&extracted.yaml).map_err(|e| ParseError::InvalidYaml {
141            file: file_path.to_path_buf(),
142            reason: e.to_string(),
143        })?;
144
145    // Validate item ID format
146    let item_id = ItemId::new(&frontmatter.id).map_err(|e| ParseError::InvalidFrontmatter {
147        file: file_path.to_path_buf(),
148        reason: format!("Invalid item ID: {}", e),
149    })?;
150
151    // Create source location
152    let source = SourceLocation::new(repository, file_path);
153
154    // Build the item
155    let mut builder = ItemBuilder::new()
156        .id(item_id)
157        .item_type(frontmatter.item_type)
158        .name(&frontmatter.name)
159        .source(source)
160        .upstream(frontmatter.upstream_refs()?)
161        .downstream(frontmatter.downstream_refs()?)
162        .attributes(frontmatter.attributes());
163
164    if let Some(desc) = &frontmatter.description {
165        builder = builder.description(desc);
166    }
167
168    builder.build().map_err(|e| ParseError::InvalidFrontmatter {
169        file: file_path.to_path_buf(),
170        reason: e.to_string(),
171    })
172}
173
174/// Represents a parsed document with its item and body content.
175#[derive(Debug)]
176pub struct ParsedDocument {
177    /// The extracted item.
178    pub item: Item,
179    /// The Markdown body content after frontmatter.
180    pub body: String,
181}
182
183/// Parses a Markdown file and returns the item and body.
184pub fn parse_document(
185    content: &str,
186    file_path: &Path,
187    repository: &Path,
188) -> Result<ParsedDocument, ParseError> {
189    let extracted = extract_frontmatter(content, file_path)?;
190    let item = parse_markdown_file(content, file_path, repository)?;
191
192    Ok(ParsedDocument {
193        item,
194        body: extracted.body,
195    })
196}
197
198#[cfg(test)]
199mod tests {
200    use super::*;
201    use std::path::PathBuf;
202
203    const SOLUTION_MD: &str = r#"---
204id: "SOL-001"
205type: solution
206name: "Test Solution"
207description: "A test solution"
208is_refined_by:
209  - "UC-001"
210---
211# Test Solution
212
213This is the body content.
214"#;
215
216    const REQUIREMENT_MD: &str = r#"---
217id: "SYSREQ-001"
218type: system_requirement
219name: "Performance Requirement"
220specification: "The system SHALL respond within 100ms."
221derives_from:
222  - "SCEN-001"
223is_satisfied_by:
224  - "SYSARCH-001"
225---
226# Requirement
227"#;
228
229    #[test]
230    fn test_parse_solution() {
231        let item = parse_markdown_file(
232            SOLUTION_MD,
233            &PathBuf::from("SOL-001.md"),
234            &PathBuf::from("/repo"),
235        )
236        .unwrap();
237
238        assert_eq!(item.id.as_str(), "SOL-001");
239        assert_eq!(item.item_type, ItemType::Solution);
240        assert_eq!(item.name, "Test Solution");
241        assert_eq!(item.description, Some("A test solution".to_string()));
242        assert_eq!(item.downstream.is_refined_by.len(), 1);
243        assert_eq!(item.downstream.is_refined_by[0].as_str(), "UC-001");
244    }
245
246    #[test]
247    fn test_parse_requirement() {
248        let item = parse_markdown_file(
249            REQUIREMENT_MD,
250            &PathBuf::from("SYSREQ-001.md"),
251            &PathBuf::from("/repo"),
252        )
253        .unwrap();
254
255        assert_eq!(item.id.as_str(), "SYSREQ-001");
256        assert_eq!(item.item_type, ItemType::SystemRequirement);
257        assert_eq!(
258            item.attributes.specification,
259            Some("The system SHALL respond within 100ms.".to_string())
260        );
261        assert_eq!(item.upstream.derives_from.len(), 1);
262        assert_eq!(item.downstream.is_satisfied_by.len(), 1);
263    }
264
265    #[test]
266    fn test_parse_document() {
267        let doc = parse_document(
268            SOLUTION_MD,
269            &PathBuf::from("SOL-001.md"),
270            &PathBuf::from("/repo"),
271        )
272        .unwrap();
273
274        assert_eq!(doc.item.id.as_str(), "SOL-001");
275        assert!(doc.body.contains("# Test Solution"));
276    }
277
278    #[test]
279    fn test_parse_invalid_id() {
280        let content = r#"---
281id: "invalid id with spaces"
282type: solution
283name: "Test"
284---
285"#;
286        let result =
287            parse_markdown_file(content, &PathBuf::from("test.md"), &PathBuf::from("/repo"));
288        assert!(result.is_err());
289    }
290
291    #[test]
292    fn test_parse_missing_type() {
293        let content = r#"---
294id: "SOL-001"
295name: "Test"
296---
297"#;
298        let result =
299            parse_markdown_file(content, &PathBuf::from("test.md"), &PathBuf::from("/repo"));
300        assert!(result.is_err());
301    }
302}