sara_core/parser/
markdown.rs

1//! Markdown file parsing and document extraction.
2
3use std::path::Path;
4
5use serde::Deserialize;
6
7use crate::error::ParseError;
8use crate::model::{
9    DownstreamRefs, Item, ItemAttributes, ItemBuilder, ItemId, ItemType, SourceLocation,
10    UpstreamRefs,
11};
12use crate::parser::frontmatter::extract_frontmatter;
13
14/// Raw frontmatter structure for deserialization.
15///
16/// This represents the YAML frontmatter as it appears in Markdown files.
17/// All relationship fields accept both single values and arrays for flexibility.
18#[derive(Debug, Clone, Deserialize)]
19pub struct RawFrontmatter {
20    /// Unique identifier (required).
21    pub id: String,
22
23    /// Item type (required).
24    #[serde(rename = "type")]
25    pub item_type: ItemType,
26
27    /// Human-readable name (required).
28    pub name: String,
29
30    /// Optional description.
31    #[serde(default)]
32    pub description: Option<String>,
33
34    // Upstream references (toward Solution)
35    /// Items this item refines (for UseCase, Scenario).
36    #[serde(default)]
37    pub refines: Vec<String>,
38
39    /// Items this item derives from (for SystemRequirement, HW/SW Requirement).
40    #[serde(default)]
41    pub derives_from: Vec<String>,
42
43    /// Items this item satisfies (for SystemArchitecture, HW/SW DetailedDesign).
44    #[serde(default)]
45    pub satisfies: Vec<String>,
46
47    // Downstream references (toward Detailed Designs)
48    /// Items that refine this item (for Solution, UseCase).
49    #[serde(default)]
50    pub is_refined_by: Vec<String>,
51
52    /// Items derived from this item (for Scenario, SystemArchitecture).
53    #[serde(default)]
54    pub derives: Vec<String>,
55
56    /// Items that satisfy this item (for SystemRequirement, HW/SW Requirement).
57    #[serde(default)]
58    pub is_satisfied_by: Vec<String>,
59
60    // Type-specific attributes
61    /// Specification statement (required for requirement types).
62    #[serde(default)]
63    pub specification: Option<String>,
64
65    /// Peer dependencies (for requirement types).
66    #[serde(default)]
67    pub depends_on: Vec<String>,
68
69    /// Target platform (for SystemArchitecture).
70    #[serde(default)]
71    pub platform: Option<String>,
72
73    /// ADR links (reserved for future use).
74    #[serde(default)]
75    pub justified_by: Option<Vec<String>>,
76}
77
78impl RawFrontmatter {
79    /// Converts string IDs to ItemIds for upstream refs.
80    pub fn upstream_refs(&self) -> Result<UpstreamRefs, ParseError> {
81        Ok(UpstreamRefs {
82            refines: self.refines.iter().map(ItemId::new_unchecked).collect(),
83            derives_from: self
84                .derives_from
85                .iter()
86                .map(ItemId::new_unchecked)
87                .collect(),
88            satisfies: self.satisfies.iter().map(ItemId::new_unchecked).collect(),
89        })
90    }
91
92    /// Converts string IDs to ItemIds for downstream refs.
93    pub fn downstream_refs(&self) -> Result<DownstreamRefs, ParseError> {
94        Ok(DownstreamRefs {
95            is_refined_by: self
96                .is_refined_by
97                .iter()
98                .map(ItemId::new_unchecked)
99                .collect(),
100            derives: self.derives.iter().map(ItemId::new_unchecked).collect(),
101            is_satisfied_by: self
102                .is_satisfied_by
103                .iter()
104                .map(ItemId::new_unchecked)
105                .collect(),
106        })
107    }
108
109    /// Converts to ItemAttributes.
110    pub fn attributes(&self) -> ItemAttributes {
111        ItemAttributes {
112            specification: self.specification.clone(),
113            depends_on: self.depends_on.iter().map(ItemId::new_unchecked).collect(),
114            platform: self.platform.clone(),
115            justified_by: self
116                .justified_by
117                .as_ref()
118                .map(|ids| ids.iter().map(ItemId::new_unchecked).collect()),
119        }
120    }
121}
122
123/// Parses a Markdown file and extracts the item.
124///
125/// # Arguments
126/// * `content` - The raw file content.
127/// * `file_path` - Relative path within the repository.
128/// * `repository` - Absolute path to the repository root.
129///
130/// # Returns
131/// The parsed Item, or a ParseError if parsing fails.
132pub fn parse_markdown_file(
133    content: &str,
134    file_path: &Path,
135    repository: &Path,
136) -> Result<Item, ParseError> {
137    let extracted = extract_frontmatter(content, file_path)?;
138
139    let frontmatter: RawFrontmatter =
140        serde_yaml::from_str(&extracted.yaml).map_err(|e| ParseError::InvalidYaml {
141            file: file_path.to_path_buf(),
142            reason: e.to_string(),
143        })?;
144
145    // Validate item ID format
146    let item_id = ItemId::new(&frontmatter.id).map_err(|e| ParseError::InvalidFrontmatter {
147        file: file_path.to_path_buf(),
148        line: extracted.start_line,
149        reason: format!("Invalid item ID: {}", e),
150    })?;
151
152    // Create source location
153    let source = SourceLocation::new(repository, file_path, extracted.start_line);
154
155    // Build the item
156    let mut builder = ItemBuilder::new()
157        .id(item_id)
158        .item_type(frontmatter.item_type)
159        .name(&frontmatter.name)
160        .source(source)
161        .upstream(frontmatter.upstream_refs()?)
162        .downstream(frontmatter.downstream_refs()?)
163        .attributes(frontmatter.attributes());
164
165    if let Some(desc) = &frontmatter.description {
166        builder = builder.description(desc);
167    }
168
169    builder.build().map_err(|e| ParseError::InvalidFrontmatter {
170        file: file_path.to_path_buf(),
171        line: extracted.start_line,
172        reason: e.to_string(),
173    })
174}
175
176/// Represents a parsed document with its item and body content.
177#[derive(Debug)]
178pub struct ParsedDocument {
179    /// The extracted item.
180    pub item: Item,
181    /// The Markdown body content after frontmatter.
182    pub body: String,
183}
184
185/// Parses a Markdown file and returns the item and body.
186pub fn parse_document(
187    content: &str,
188    file_path: &Path,
189    repository: &Path,
190) -> Result<ParsedDocument, ParseError> {
191    let extracted = extract_frontmatter(content, file_path)?;
192    let item = parse_markdown_file(content, file_path, repository)?;
193
194    Ok(ParsedDocument {
195        item,
196        body: extracted.body,
197    })
198}
199
200#[cfg(test)]
201mod tests {
202    use super::*;
203    use std::path::PathBuf;
204
205    const SOLUTION_MD: &str = r#"---
206id: "SOL-001"
207type: solution
208name: "Test Solution"
209description: "A test solution"
210is_refined_by:
211  - "UC-001"
212---
213# Test Solution
214
215This is the body content.
216"#;
217
218    const REQUIREMENT_MD: &str = r#"---
219id: "SYSREQ-001"
220type: system_requirement
221name: "Performance Requirement"
222specification: "The system SHALL respond within 100ms."
223derives_from:
224  - "SCEN-001"
225is_satisfied_by:
226  - "SYSARCH-001"
227---
228# Requirement
229"#;
230
231    #[test]
232    fn test_parse_solution() {
233        let item = parse_markdown_file(
234            SOLUTION_MD,
235            &PathBuf::from("SOL-001.md"),
236            &PathBuf::from("/repo"),
237        )
238        .unwrap();
239
240        assert_eq!(item.id.as_str(), "SOL-001");
241        assert_eq!(item.item_type, ItemType::Solution);
242        assert_eq!(item.name, "Test Solution");
243        assert_eq!(item.description, Some("A test solution".to_string()));
244        assert_eq!(item.downstream.is_refined_by.len(), 1);
245        assert_eq!(item.downstream.is_refined_by[0].as_str(), "UC-001");
246    }
247
248    #[test]
249    fn test_parse_requirement() {
250        let item = parse_markdown_file(
251            REQUIREMENT_MD,
252            &PathBuf::from("SYSREQ-001.md"),
253            &PathBuf::from("/repo"),
254        )
255        .unwrap();
256
257        assert_eq!(item.id.as_str(), "SYSREQ-001");
258        assert_eq!(item.item_type, ItemType::SystemRequirement);
259        assert_eq!(
260            item.attributes.specification,
261            Some("The system SHALL respond within 100ms.".to_string())
262        );
263        assert_eq!(item.upstream.derives_from.len(), 1);
264        assert_eq!(item.downstream.is_satisfied_by.len(), 1);
265    }
266
267    #[test]
268    fn test_parse_document() {
269        let doc = parse_document(
270            SOLUTION_MD,
271            &PathBuf::from("SOL-001.md"),
272            &PathBuf::from("/repo"),
273        )
274        .unwrap();
275
276        assert_eq!(doc.item.id.as_str(), "SOL-001");
277        assert!(doc.body.contains("# Test Solution"));
278    }
279
280    #[test]
281    fn test_parse_invalid_id() {
282        let content = r#"---
283id: "invalid id with spaces"
284type: solution
285name: "Test"
286---
287"#;
288        let result =
289            parse_markdown_file(content, &PathBuf::from("test.md"), &PathBuf::from("/repo"));
290        assert!(result.is_err());
291    }
292
293    #[test]
294    fn test_parse_missing_type() {
295        let content = r#"---
296id: "SOL-001"
297name: "Test"
298---
299"#;
300        let result =
301            parse_markdown_file(content, &PathBuf::from("test.md"), &PathBuf::from("/repo"));
302        assert!(result.is_err());
303    }
304}