1use crate::attrs::parse_attrs;
2use crate::error::{Diagnostic, Severity};
3use crate::types::{Attrs, Block, FrontMatter, Span, SurfDoc};
4
5#[derive(Debug, Clone)]
7pub struct ParseResult {
8 pub doc: SurfDoc,
10 pub diagnostics: Vec<Diagnostic>,
12}
13
14pub fn parse(input: &str) -> ParseResult {
19 let mut diagnostics = Vec::new();
20
21 let normalised = input.replace("\r\n", "\n");
23 let lines: Vec<&str> = normalised.split('\n').collect();
24
25 let (front_matter, body_start_line) = extract_front_matter(&lines, &normalised, &mut diagnostics);
29
30 let blocks = scan_blocks(&lines, body_start_line, &normalised, &mut diagnostics);
34
35 let blocks = blocks
39 .into_iter()
40 .map(|block| match block {
41 Block::Unknown { .. } => crate::blocks::resolve_block(block),
42 other => other,
43 })
44 .collect();
45
46 ParseResult {
47 doc: SurfDoc {
48 front_matter,
49 blocks,
50 source: normalised,
51 },
52 diagnostics,
53 }
54}
55
56fn extract_front_matter(
64 lines: &[&str],
65 source: &str,
66 diagnostics: &mut Vec<Diagnostic>,
67) -> (Option<FrontMatter>, usize) {
68 if lines.is_empty() || lines[0].trim() != "---" {
69 return (None, 0);
70 }
71
72 let mut end_idx = None;
74 for (i, line) in lines.iter().enumerate().skip(1) {
75 if line.trim() == "---" {
76 end_idx = Some(i);
77 break;
78 }
79 }
80
81 let end_idx = match end_idx {
82 Some(i) => i,
83 None => {
84 diagnostics.push(Diagnostic {
86 severity: Severity::Error,
87 message: "Front matter opened with `---` but never closed".into(),
88 span: Some(line_span(0, 0, source)),
89 code: Some("E001".into()),
90 });
91 return (None, 0);
92 }
93 };
94
95 let yaml_str: String = lines[1..end_idx].join("\n");
96 let fm_span = Span {
97 start_line: 1,
98 end_line: end_idx + 1,
99 start_offset: 0,
100 end_offset: byte_offset_end_of_line(end_idx, source),
101 };
102
103 match serde_yaml::from_str::<FrontMatter>(&yaml_str) {
104 Ok(fm) => (Some(fm), end_idx + 1),
105 Err(e) => {
106 diagnostics.push(Diagnostic {
107 severity: Severity::Error,
108 message: format!("Failed to parse front matter YAML: {e}"),
109 span: Some(fm_span),
110 code: Some("E002".into()),
111 });
112 (None, end_idx + 1)
113 }
114 }
115}
116
117struct OpenBlock {
123 name: String,
124 attrs: Attrs,
125 depth: usize, start_line: usize, start_offset: usize,
128 content_start_offset: usize, }
130
131fn scan_blocks(
137 lines: &[&str],
138 body_start: usize,
139 source: &str,
140 diagnostics: &mut Vec<Diagnostic>,
141) -> Vec<Block> {
142 let mut blocks: Vec<Block> = Vec::new();
143 let mut stack: Vec<OpenBlock> = Vec::new();
144
145 let mut md_start_line: Option<usize> = None; let mut md_start_offset: Option<usize> = None;
149
150 for (idx, &line) in lines.iter().enumerate().skip(body_start) {
151 let trimmed = line.trim();
152 let line_offset = byte_offset_start_of_line(idx, source);
153
154 if let Some(close_depth) = closing_directive_depth(trimmed) {
156 if let Some(pos) = stack.iter().rposition(|b| b.depth == close_depth) {
158 while stack.len() > pos + 1 {
160 let orphan = stack.pop().unwrap();
161 diagnostics.push(Diagnostic {
162 severity: Severity::Warning,
163 message: format!(
164 "Unclosed block directive '{}' opened at line {}",
165 orphan.name, orphan.start_line
166 ),
167 span: Some(Span {
168 start_line: orphan.start_line,
169 end_line: idx + 1,
170 start_offset: orphan.start_offset,
171 end_offset: line_offset + line.len(),
172 }),
173 code: Some("W001".into()),
174 });
175 }
176
177 let open = stack.pop().unwrap(); if stack.is_empty() {
181 let content = &source[open.content_start_offset..line_offset];
182 let content = content.strip_suffix('\n').unwrap_or(content);
183
184 blocks.push(Block::Unknown {
185 name: open.name,
186 attrs: open.attrs,
187 content: content.to_string(),
188 span: Span {
189 start_line: open.start_line,
190 end_line: idx + 1,
191 start_offset: open.start_offset,
192 end_offset: line_offset + line.len(),
193 },
194 });
195
196 md_start_line = None;
197 md_start_offset = None;
198 }
199 continue;
201 }
202 }
204
205 if let Some((depth, name, attrs_str)) = opening_directive(trimmed) {
207 if stack.is_empty() {
209 flush_markdown(
210 &mut blocks,
211 &mut md_start_line,
212 &mut md_start_offset,
213 idx,
214 source,
215 );
216
217 let attrs = match parse_attrs(&attrs_str) {
218 Ok(a) => a,
219 Err(e) => {
220 diagnostics.push(Diagnostic {
221 severity: Severity::Warning,
222 message: format!("Invalid attributes on '::{}': {}", name, e),
223 span: Some(line_span(idx, idx, source)),
224 code: Some("W002".into()),
225 });
226 Attrs::new()
227 }
228 };
229
230 let content_start = line_offset + line.len() + 1; let content_start = content_start.min(source.len());
232
233 stack.push(OpenBlock {
234 name,
235 attrs,
236 depth,
237 start_line: idx + 1,
238 start_offset: line_offset,
239 content_start_offset: content_start,
240 });
241 } else {
242 stack.push(OpenBlock {
246 name,
247 attrs: Attrs::new(),
248 depth,
249 start_line: idx + 1,
250 start_offset: line_offset,
251 content_start_offset: 0, });
253 }
254 continue;
255 }
256
257 if stack.is_empty() && md_start_line.is_none() {
259 md_start_line = Some(idx);
260 md_start_offset = Some(line_offset);
261 }
262 }
263
264 flush_markdown(
266 &mut blocks,
267 &mut md_start_line,
268 &mut md_start_offset,
269 lines.len(),
270 source,
271 );
272
273 while let Some(open) = stack.pop() {
276 let eof_offset = source.len();
277 let eof_line = lines.len();
278
279 diagnostics.push(Diagnostic {
280 severity: Severity::Warning,
281 message: format!(
282 "Unclosed block directive '{}' opened at line {}",
283 open.name, open.start_line
284 ),
285 span: Some(Span {
286 start_line: open.start_line,
287 end_line: eof_line,
288 start_offset: open.start_offset,
289 end_offset: eof_offset,
290 }),
291 code: Some("W001".into()),
292 });
293
294 if stack.is_empty() {
296 let content = if open.content_start_offset <= eof_offset {
297 &source[open.content_start_offset..eof_offset]
298 } else {
299 ""
300 };
301 let content = content.strip_suffix('\n').unwrap_or(content);
302
303 blocks.push(Block::Unknown {
304 name: open.name,
305 attrs: open.attrs,
306 content: content.to_string(),
307 span: Span {
308 start_line: open.start_line,
309 end_line: eof_line,
310 start_offset: open.start_offset,
311 end_offset: eof_offset,
312 },
313 });
314 }
315 }
316
317 blocks
318}
319
320fn flush_markdown(
322 blocks: &mut Vec<Block>,
323 md_start_line: &mut Option<usize>,
324 md_start_offset: &mut Option<usize>,
325 current_idx: usize,
326 source: &str,
327) {
328 if let (Some(start_idx), Some(start_off)) = (*md_start_line, *md_start_offset) {
329 let mut end_idx = current_idx.saturating_sub(1);
330
331 let source_lines: Vec<&str> = source.split('\n').collect();
333 while end_idx > start_idx && source_lines.get(end_idx).is_some_and(|l| l.trim().is_empty())
334 {
335 end_idx -= 1;
336 }
337
338 let end_offset = byte_offset_end_of_line(end_idx, source);
339 let content = &source[start_off..end_offset];
340
341 let trimmed = content.trim();
343 if !trimmed.is_empty() {
344 blocks.push(Block::Markdown {
345 content: content.to_string(),
346 span: Span {
347 start_line: start_idx + 1,
348 end_line: end_idx + 1,
349 start_offset: start_off,
350 end_offset,
351 },
352 });
353 }
354
355 *md_start_line = None;
356 *md_start_offset = None;
357 }
358}
359
360fn closing_directive_depth(trimmed: &str) -> Option<usize> {
366 if trimmed.is_empty() {
367 return None;
368 }
369 if trimmed.chars().all(|c| c == ':') && trimmed.len() >= 2 {
371 Some(trimmed.len())
372 } else {
373 None
374 }
375}
376
377fn opening_directive(trimmed: &str) -> Option<(usize, String, String)> {
379 if !trimmed.starts_with("::") {
380 return None;
381 }
382
383 let depth = trimmed.chars().take_while(|&c| c == ':').count();
385 if depth < 2 {
386 return None;
387 }
388
389 let rest = &trimmed[depth..];
390 if rest.is_empty() {
391 return None;
393 }
394
395 let first_char = rest.chars().next()?;
397 if !first_char.is_alphabetic() {
398 return None;
399 }
400
401 let name_end = rest
403 .find(|c: char| !c.is_alphanumeric() && c != '-' && c != '_')
404 .unwrap_or(rest.len());
405 let name = rest[..name_end].to_string();
406 let remainder = &rest[name_end..];
407
408 let attrs_str = if remainder.starts_with('[') {
410 if let Some(close) = remainder.find(']') {
411 remainder[..=close].to_string()
412 } else {
413 remainder.to_string()
415 }
416 } else {
417 String::new()
418 };
419
420 Some((depth, name, attrs_str))
421}
422
423fn byte_offset_start_of_line(idx: usize, source: &str) -> usize {
429 let mut offset = 0;
430 for (i, line) in source.split('\n').enumerate() {
431 if i == idx {
432 return offset;
433 }
434 offset += line.len() + 1; }
436 source.len()
437}
438
439fn byte_offset_end_of_line(idx: usize, source: &str) -> usize {
441 let mut offset = 0;
442 for (i, line) in source.split('\n').enumerate() {
443 offset += line.len();
444 if i == idx {
445 return offset;
446 }
447 offset += 1; }
449 source.len()
450}
451
452fn line_span(start_idx: usize, end_idx: usize, source: &str) -> Span {
454 Span {
455 start_line: start_idx + 1,
456 end_line: end_idx + 1,
457 start_offset: byte_offset_start_of_line(start_idx, source),
458 end_offset: byte_offset_end_of_line(end_idx, source),
459 }
460}
461
462#[cfg(test)]
463mod tests {
464 use super::*;
465 use pretty_assertions::assert_eq;
466
467 #[test]
468 fn parse_empty_input() {
469 let result = parse("");
470 assert!(result.doc.front_matter.is_none());
471 assert!(result.doc.blocks.is_empty());
472 assert!(result.diagnostics.is_empty());
473 }
474
475 #[test]
476 fn parse_plain_markdown() {
477 let input = "# Hello\n\nSome text here.\n";
478 let result = parse(input);
479 assert!(result.doc.front_matter.is_none());
480 assert_eq!(result.doc.blocks.len(), 1);
481 match &result.doc.blocks[0] {
482 Block::Markdown { content, .. } => {
483 assert!(content.contains("# Hello"));
484 assert!(content.contains("Some text here."));
485 }
486 _ => panic!("Expected Markdown block"),
487 }
488 }
489
490 #[test]
491 fn parse_front_matter() {
492 let input = "---\ntitle: Test\n---\n# Hello\n";
493 let result = parse(input);
494 assert!(result.diagnostics.is_empty(), "diagnostics: {:?}", result.diagnostics);
495 let fm = result.doc.front_matter.as_ref().unwrap();
496 assert_eq!(fm.title.as_deref(), Some("Test"));
497 assert_eq!(result.doc.blocks.len(), 1);
498 match &result.doc.blocks[0] {
499 Block::Markdown { content, .. } => {
500 assert!(content.contains("# Hello"));
501 }
502 _ => panic!("Expected Markdown block"),
503 }
504 }
505
506 #[test]
507 fn parse_single_block() {
508 let input = "::callout[type=warning]\nDanger!\n::\n";
509 let result = parse(input);
510 assert!(result.diagnostics.is_empty(), "diagnostics: {:?}", result.diagnostics);
511 assert_eq!(result.doc.blocks.len(), 1);
512 match &result.doc.blocks[0] {
513 Block::Callout {
514 callout_type,
515 content,
516 span,
517 ..
518 } => {
519 assert_eq!(*callout_type, crate::types::CalloutType::Warning);
520 assert_eq!(content, "Danger!");
521 assert_eq!(span.start_line, 1);
522 assert_eq!(span.end_line, 3);
523 }
524 other => panic!("Expected Callout block, got {other:?}"),
525 }
526 }
527
528 #[test]
529 fn parse_two_blocks() {
530 let input = "::callout[type=info]\nFirst\n::\n\nSome markdown.\n\n::data[format=json]\n{}\n::\n";
531 let result = parse(input);
532 assert!(result.diagnostics.is_empty(), "diagnostics: {:?}", result.diagnostics);
533 assert_eq!(result.doc.blocks.len(), 3);
534
535 assert!(matches!(&result.doc.blocks[0], Block::Callout { .. }));
536 assert!(matches!(&result.doc.blocks[1], Block::Markdown { .. }));
537 assert!(matches!(&result.doc.blocks[2], Block::Data { .. }));
538 }
539
540 #[test]
541 fn parse_nested_blocks() {
542 let input = "::columns\n:::column\nLeft text.\n:::\n:::column\nRight text.\n:::\n::\n";
543 let result = parse(input);
544 assert!(result.diagnostics.is_empty(), "diagnostics: {:?}", result.diagnostics);
545 assert_eq!(result.doc.blocks.len(), 1);
546 match &result.doc.blocks[0] {
547 Block::Columns { columns, .. } => {
548 assert_eq!(columns.len(), 2);
549 assert!(columns[0].content.contains("Left text."));
550 assert!(columns[1].content.contains("Right text."));
551 }
552 other => panic!("Expected Columns block, got {other:?}"),
553 }
554 }
555
556 #[test]
557 fn parse_unclosed_block() {
558 let input = "::callout[type=warning]\nNo closing marker";
559 let result = parse(input);
560 assert!(!result.diagnostics.is_empty(), "Expected a diagnostic for unclosed block");
561 assert_eq!(result.doc.blocks.len(), 1);
562 match &result.doc.blocks[0] {
563 Block::Callout { content, .. } => {
564 assert!(content.contains("No closing marker"));
565 }
566 other => panic!("Expected Callout block, got {other:?}"),
567 }
568 }
569
570 #[test]
571 fn parse_leaf_directive() {
572 let input = "# Title\n\n::metric[label=\"MRR\" value=\"$2K\"]\n\n## More\n";
573 let result = parse(input);
574 assert_eq!(
583 result.doc.blocks.len(),
584 2, "blocks: {:#?}", result.doc.blocks
586 );
587 let has_metric = result.doc.blocks.iter().any(|b| matches!(b, Block::Metric { .. }));
588 assert!(has_metric, "Should contain a metric block");
589 }
590
591 #[test]
592 fn parse_block_spans() {
593 let input = "# Title\n::callout\nInside\n::\n# After\n";
594 let result = parse(input);
595 assert!(result.diagnostics.is_empty(), "diagnostics: {:?}", result.diagnostics);
596
597 match &result.doc.blocks[0] {
599 Block::Markdown { span, .. } => {
600 assert_eq!(span.start_line, 1);
601 assert_eq!(span.end_line, 1);
602 }
603 _ => panic!("Expected Markdown"),
604 }
605
606 match &result.doc.blocks[1] {
608 Block::Callout { span, .. } => {
609 assert_eq!(span.start_line, 2);
610 assert_eq!(span.end_line, 4);
611 }
612 other => panic!("Expected Callout, got {other:?}"),
613 }
614
615 match &result.doc.blocks[2] {
617 Block::Markdown { span, .. } => {
618 assert_eq!(span.start_line, 5);
619 assert_eq!(span.end_line, 5);
620 }
621 _ => panic!("Expected Markdown"),
622 }
623 }
624
625 #[test]
626 fn parse_front_matter_all_fields() {
627 let input = r#"---
628title: "Full Document"
629type: plan
630status: active
631scope: workspace
632tags: [rust, parser]
633created: "2026-02-10"
634updated: "2026-02-10"
635author: "Brady Davis"
636confidence: high
637version: 2
638workspace: cloudsurf
639contributors: ["Claude"]
640decision: "Use Rust"
641related:
642 - path: plans/example.md
643 relationship: references
644---
645Body.
646"#;
647 let result = parse(input);
648 assert!(result.diagnostics.is_empty(), "diagnostics: {:?}", result.diagnostics);
649 let fm = result.doc.front_matter.as_ref().unwrap();
650 assert_eq!(fm.title.as_deref(), Some("Full Document"));
651 assert_eq!(fm.doc_type, Some(crate::types::DocType::Plan));
652 assert_eq!(fm.status, Some(crate::types::DocStatus::Active));
653 assert_eq!(fm.scope, Some(crate::types::Scope::Workspace));
654 assert_eq!(fm.tags.as_deref(), Some(&["rust".to_string(), "parser".to_string()][..]));
655 assert_eq!(fm.created.as_deref(), Some("2026-02-10"));
656 assert_eq!(fm.updated.as_deref(), Some("2026-02-10"));
657 assert_eq!(fm.author.as_deref(), Some("Brady Davis"));
658 assert_eq!(fm.confidence, Some(crate::types::Confidence::High));
659 assert_eq!(fm.version, Some(2));
660 assert_eq!(fm.workspace.as_deref(), Some("cloudsurf"));
661 assert_eq!(fm.decision.as_deref(), Some("Use Rust"));
662 let related = fm.related.as_ref().unwrap();
663 assert_eq!(related.len(), 1);
664 assert_eq!(related[0].path, "plans/example.md");
665 }
666
667 #[test]
668 fn parse_unknown_front_matter_fields() {
669 let input = "---\ntitle: Test\ncustom_field: hello\nanother: 42\n---\n";
670 let result = parse(input);
671 assert!(result.diagnostics.is_empty(), "diagnostics: {:?}", result.diagnostics);
672 let fm = result.doc.front_matter.as_ref().unwrap();
673 assert_eq!(fm.title.as_deref(), Some("Test"));
674 assert!(fm.extra.contains_key("custom_field"), "extra should contain custom_field");
675 assert!(fm.extra.contains_key("another"), "extra should contain another");
676 }
677
678 #[test]
683 fn parse_callout_end_to_end() {
684 let input = "::callout[type=warning]\nWatch out for sharp edges.\n::\n";
685 let result = parse(input);
686 assert!(result.diagnostics.is_empty(), "diagnostics: {:?}", result.diagnostics);
687 assert_eq!(result.doc.blocks.len(), 1);
688 match &result.doc.blocks[0] {
689 Block::Callout {
690 callout_type,
691 content,
692 span,
693 ..
694 } => {
695 assert_eq!(*callout_type, crate::types::CalloutType::Warning);
696 assert_eq!(content, "Watch out for sharp edges.");
697 assert_eq!(span.start_line, 1);
698 assert_eq!(span.end_line, 3);
699 }
700 other => panic!("Expected Callout block, got {other:?}"),
701 }
702 }
703
704 #[test]
705 fn parse_metric_end_to_end() {
706 let input = "::metric[label=\"MRR\" value=\"$2K\"]\n::\n";
707 let result = parse(input);
708 assert!(result.diagnostics.is_empty(), "diagnostics: {:?}", result.diagnostics);
709 assert_eq!(result.doc.blocks.len(), 1);
710 match &result.doc.blocks[0] {
711 Block::Metric {
712 label,
713 value,
714 trend,
715 ..
716 } => {
717 assert_eq!(label, "MRR");
718 assert_eq!(value, "$2K");
719 assert!(trend.is_none());
720 }
721 other => panic!("Expected Metric block, got {other:?}"),
722 }
723 }
724
725 #[test]
726 fn parse_mixed_typed_blocks() {
727 let input = concat!(
728 "::callout[type=info]\nFYI\n::\n",
729 "\n# Some Markdown\n\n",
730 "::data[format=csv]\nA, B\n1, 2\n::\n",
731 );
732 let result = parse(input);
733 assert!(result.diagnostics.is_empty(), "diagnostics: {:?}", result.diagnostics);
734 assert_eq!(result.doc.blocks.len(), 3, "blocks: {:#?}", result.doc.blocks);
735
736 assert!(matches!(&result.doc.blocks[0], Block::Callout { .. }));
737 assert!(matches!(&result.doc.blocks[1], Block::Markdown { .. }));
738 match &result.doc.blocks[2] {
739 Block::Data {
740 format,
741 headers,
742 rows,
743 ..
744 } => {
745 assert_eq!(*format, crate::types::DataFormat::Csv);
746 assert_eq!(headers, &["A", "B"]);
747 assert_eq!(rows.len(), 1);
748 }
749 other => panic!("Expected Data block, got {other:?}"),
750 }
751 }
752}