1use hmd_core::{
2 Diagnostic, DuplicateIdRecord, FenceInfo, HmdBlock, HmdDocument, HmdNode, IdRecord,
3 MarkdownNode, RefRecord, ReferenceIndex, SourceSpan, TomlValueObject,
4};
5use serde_json::{Map, Number, Value};
6use std::collections::BTreeMap;
7use toml_edit::DocumentMut;
8
9const BUILTIN_PROFILES: &[&str] = &["general@0.1", "decision@0.1", "progress@0.1", "todo@0.1"];
10
11pub fn parse_document(source: &str) -> HmdDocument {
12 let lines = collect_lines(source);
13 let mut diagnostics = Vec::new();
14 let frontmatter = parse_frontmatter(source, &lines, &mut diagnostics);
15
16 let mut parser = BlockParser {
17 source,
18 lines: &lines,
19 diagnostics: &mut diagnostics,
20 };
21 let mut index = frontmatter.body_start_line_index;
22 let parsed = parser.parse_nodes(&mut index, None);
23
24 let references = build_reference_index(&parsed.children, &mut diagnostics);
25
26 let mut document = HmdDocument::new(frontmatter.hmd_version, frontmatter.profile);
27 document.uses = frontmatter.uses;
28 document.meta = frontmatter.meta;
29 document.children = parsed.children;
30 document.references = references;
31 document.diagnostics = diagnostics;
32 document
33}
34
35#[derive(Debug)]
36struct Frontmatter {
37 hmd_version: String,
38 profile: String,
39 uses: Vec<String>,
40 meta: TomlValueObject,
41 body_start_line_index: usize,
42}
43
44fn parse_frontmatter(
45 source: &str,
46 lines: &[Line<'_>],
47 diagnostics: &mut Vec<Diagnostic>,
48) -> Frontmatter {
49 let mut frontmatter = Frontmatter {
50 hmd_version: String::new(),
51 profile: String::new(),
52 uses: Vec::new(),
53 meta: Map::new(),
54 body_start_line_index: 0,
55 };
56
57 let Some(first_line) = lines.first() else {
58 diagnostics.push(Diagnostic::error("HMD001", "missing document frontmatter"));
59 return frontmatter;
60 };
61
62 if !is_frontmatter_delimiter(first_line, true) {
63 diagnostics.push(
64 Diagnostic::error("HMD001", "missing document frontmatter")
65 .with_source(line_span(first_line)),
66 );
67 return frontmatter;
68 }
69
70 let closing_index = lines
71 .iter()
72 .enumerate()
73 .skip(1)
74 .find_map(|(index, line)| is_frontmatter_delimiter(line, false).then_some(index));
75
76 let Some(closing_index) = closing_index else {
77 diagnostics.push(
78 Diagnostic::error("HMD002", "unterminated document frontmatter")
79 .with_source(line_span(first_line)),
80 );
81 frontmatter.body_start_line_index = lines.len();
82 return frontmatter;
83 };
84
85 let toml_start = lines
86 .get(1)
87 .map(|line| line.start)
88 .unwrap_or(first_line.end);
89 let toml_end = lines[closing_index].start;
90 let toml_source = &source[toml_start..toml_end];
91
92 match parse_toml_object(toml_source) {
93 Ok(meta) => {
94 frontmatter.hmd_version = string_field(&meta, "hmd").unwrap_or_default();
95 frontmatter.profile = string_field(&meta, "profile").unwrap_or_default();
96 frontmatter.uses = string_array_field(&meta, "uses");
97 frontmatter.meta = meta;
98 }
99 Err(message) => diagnostics.push(
100 Diagnostic::error(
101 "HMD002",
102 format!("invalid document frontmatter TOML: {message}"),
103 )
104 .with_source(line_span(first_line)),
105 ),
106 }
107
108 if !frontmatter.profile.is_empty() && !BUILTIN_PROFILES.contains(&frontmatter.profile.as_str())
109 {
110 diagnostics.push(
111 Diagnostic::error(
112 "HMD007",
113 format!("unknown profile '{}'", frontmatter.profile),
114 )
115 .with_source(line_span(first_line)),
116 );
117 }
118
119 for profile in &frontmatter.uses {
120 if !BUILTIN_PROFILES.contains(&profile.as_str()) {
121 diagnostics.push(
122 Diagnostic::error("HMD007", format!("unknown profile '{profile}'"))
123 .with_source(line_span(first_line)),
124 );
125 }
126 }
127
128 frontmatter.body_start_line_index = closing_index + 1;
129 frontmatter
130}
131
132struct BlockParser<'a, 'd> {
133 source: &'a str,
134 lines: &'a [Line<'a>],
135 diagnostics: &'d mut Vec<Diagnostic>,
136}
137
138#[derive(Debug)]
139struct ParsedNodes {
140 children: Vec<HmdNode>,
141 close_line: Option<usize>,
142}
143
144impl BlockParser<'_, '_> {
145 fn parse_nodes(&mut self, index: &mut usize, close_fence_length: Option<usize>) -> ParsedNodes {
146 let mut children = Vec::new();
147 let mut markdown = String::new();
148
149 while *index < self.lines.len() {
150 let line = &self.lines[*index];
151
152 if let Some(close_length) = close_fence_length {
153 if is_closer(line, close_length) {
154 flush_markdown(&mut markdown, &mut children, true);
155 let close_line = line.number;
156 *index += 1;
157 return ParsedNodes {
158 children,
159 close_line: Some(close_line),
160 };
161 }
162 }
163
164 if let Some(opener) = parse_opener(line) {
165 flush_markdown(&mut markdown, &mut children, false);
166 let block = self.parse_block(index, opener);
167 children.push(HmdNode::HmdBlock(Box::new(block)));
168 continue;
169 }
170
171 markdown.push_str(line.raw);
172 *index += 1;
173 }
174
175 flush_markdown(&mut markdown, &mut children, false);
176 ParsedNodes {
177 children,
178 close_line: None,
179 }
180 }
181
182 fn parse_block(&mut self, index: &mut usize, opener: Opener) -> HmdBlock {
183 let opener_line = &self.lines[*index];
184 if !is_valid_block_type(&opener.block_type) {
185 self.diagnostics.push(
186 Diagnostic::error(
187 "HMD004",
188 format!("invalid block type '{}'", opener.block_type),
189 )
190 .with_source(line_span(opener_line)),
191 );
192 }
193
194 *index += 1;
195
196 let meta = if self
197 .lines
198 .get(*index)
199 .is_some_and(|line| is_frontmatter_delimiter(line, false))
200 {
201 self.parse_block_meta(index)
202 } else {
203 Map::new()
204 };
205
206 let parsed = self.parse_nodes(index, Some(opener.fence_length));
207 if parsed.close_line.is_none() {
208 self.diagnostics.push(
209 Diagnostic::error(
210 "HMD003",
211 format!("unclosed semantic block '{}'", opener.block_type),
212 )
213 .with_source(line_span(opener_line)),
214 );
215 }
216
217 let id = string_field(&meta, "id");
218 HmdBlock {
219 block_type: opener.block_type,
220 id,
221 meta,
222 children: parsed.children,
223 body: None,
224 source: None,
225 diagnostics: None,
226 fence: Some(FenceInfo {
227 fence_char: ':',
228 length: opener.fence_length,
229 open_line: opener_line.number,
230 close_line: parsed.close_line,
231 open_span: None,
232 close_span: None,
233 }),
234 profile: None,
235 validation: None,
236 }
237 }
238
239 fn parse_block_meta(&mut self, index: &mut usize) -> TomlValueObject {
240 let open_line = &self.lines[*index];
241 *index += 1;
242
243 let toml_start = self
244 .lines
245 .get(*index)
246 .map(|line| line.start)
247 .unwrap_or(open_line.end);
248
249 let closing_index =
250 self.lines
251 .iter()
252 .enumerate()
253 .skip(*index)
254 .find_map(|(candidate, line)| {
255 is_frontmatter_delimiter(line, false).then_some(candidate)
256 });
257
258 let Some(closing_index) = closing_index else {
259 self.diagnostics.push(
260 Diagnostic::error("HMD002", "unterminated block metadata")
261 .with_source(line_span(open_line)),
262 );
263 *index = self.lines.len();
264 return Map::new();
265 };
266
267 let toml_source = &self.source[toml_start..self.lines[closing_index].start];
268 *index = closing_index + 1;
269
270 match parse_toml_object(toml_source) {
271 Ok(meta) => meta,
272 Err(message) => {
273 self.diagnostics.push(
274 Diagnostic::error("HMD002", format!("invalid block metadata TOML: {message}"))
275 .with_source(line_span(open_line)),
276 );
277 Map::new()
278 }
279 }
280 }
281}
282
283fn flush_markdown(markdown: &mut String, children: &mut Vec<HmdNode>, trim_trailing_blank: bool) {
284 let text = trim_markdown_slice(markdown, trim_trailing_blank);
285 markdown.clear();
286
287 if text.trim().is_empty() {
288 return;
289 }
290
291 children.push(HmdNode::Markdown(MarkdownNode::source_slice(text)));
292}
293
294fn trim_markdown_slice(text: &str, trim_trailing_blank: bool) -> String {
295 let parts = split_preserving_newlines(text);
296 let mut start = 0;
297 let mut end = parts.len();
298
299 while start < end && is_blank_markdown_line(parts[start]) {
300 start += 1;
301 }
302
303 if trim_trailing_blank {
304 while end > start && is_blank_markdown_line(parts[end - 1]) {
305 end -= 1;
306 }
307 }
308
309 parts[start..end].concat()
310}
311
312fn split_preserving_newlines(text: &str) -> Vec<&str> {
313 if text.is_empty() {
314 return Vec::new();
315 }
316
317 let mut parts = Vec::new();
318 let mut start = 0;
319 for (index, byte) in text.bytes().enumerate() {
320 if byte == b'\n' {
321 parts.push(&text[start..=index]);
322 start = index + 1;
323 }
324 }
325 if start < text.len() {
326 parts.push(&text[start..]);
327 }
328 parts
329}
330
331fn is_blank_markdown_line(line: &str) -> bool {
332 let without_lf = line.strip_suffix('\n').unwrap_or(line);
333 let without_crlf = without_lf.strip_suffix('\r').unwrap_or(without_lf);
334 trim_horizontal(without_crlf).is_empty()
335}
336
337#[derive(Debug, Clone)]
338struct Opener {
339 fence_length: usize,
340 block_type: String,
341}
342
343fn parse_opener(line: &Line<'_>) -> Option<Opener> {
344 let fence_length = line
345 .content
346 .bytes()
347 .take_while(|byte| *byte == b':')
348 .count();
349 if fence_length < 3 {
350 return None;
351 }
352
353 let rest = &line.content[fence_length..];
354 let block_type = trim_horizontal(rest);
355 if block_type.is_empty() {
356 return None;
357 }
358
359 Some(Opener {
360 fence_length,
361 block_type: block_type.to_string(),
362 })
363}
364
365fn is_closer(line: &Line<'_>, opener_length: usize) -> bool {
366 let trimmed = trim_horizontal(line.content);
367 let colon_count = trimmed.bytes().take_while(|byte| *byte == b':').count();
368 colon_count >= opener_length && colon_count == trimmed.len()
369}
370
371fn is_valid_block_type(block_type: &str) -> bool {
372 let mut parts = block_type.split('.');
373 let Some(first) = parts.next() else {
374 return false;
375 };
376
377 if first.is_empty() || !is_valid_identifier_segment(first) {
378 return false;
379 }
380
381 parts.all(|part| !part.is_empty() && is_valid_identifier_segment(part))
382}
383
384fn is_valid_identifier_segment(segment: &str) -> bool {
385 let mut chars = segment.chars();
386 let Some(first) = chars.next() else {
387 return false;
388 };
389
390 if !first.is_ascii_alphabetic() {
391 return false;
392 }
393
394 chars.all(|ch| ch.is_ascii_alphanumeric() || ch == '_' || ch == '-')
395}
396
397fn build_reference_index(
398 children: &[HmdNode],
399 diagnostics: &mut Vec<Diagnostic>,
400) -> ReferenceIndex {
401 let mut records = Vec::new();
402 collect_block_records(children, None, &mut records);
403
404 let mut by_id: BTreeMap<String, Vec<BlockRecord>> = BTreeMap::new();
405 for record in records.iter().filter(|record| record.id.is_some()) {
406 by_id
407 .entry(record.id.clone().expect("filtered by id"))
408 .or_default()
409 .push(record.clone());
410 }
411
412 let mut references = ReferenceIndex::default();
413 for (id, matching_records) in by_id {
414 if matching_records.len() == 1 {
415 let record = &matching_records[0];
416 references.ids.insert(
417 id.clone(),
418 IdRecord {
419 id,
420 path: record.path.clone(),
421 block_type: Some(record.block_type.clone()),
422 source: None,
423 },
424 );
425 } else {
426 let paths = matching_records
427 .iter()
428 .map(|record| record.path.clone())
429 .collect::<Vec<_>>();
430 references.duplicates.push(DuplicateIdRecord {
431 id: id.clone(),
432 paths,
433 });
434 diagnostics.push(
435 Diagnostic::error("HMD006", format!("duplicate block id '{id}'"))
436 .with_path(format!("/blocks/{id}")),
437 );
438 }
439 }
440
441 for record in &records {
442 if record.block_type == "recommendation" {
443 if let Some(target) = string_field(&record.meta, "option") {
444 let target_record = references.ids.get(&target);
445 let resolved = target_record
446 .and_then(|id_record| id_record.block_type.as_deref())
447 .is_some_and(|block_type| block_type == "option");
448
449 references.refs.push(RefRecord {
450 from: record.path.clone(),
451 field: "option".to_string(),
452 target,
453 resolved: Some(resolved),
454 target_path: target_record
455 .filter(|_| resolved)
456 .map(|id_record| id_record.path.clone()),
457 source: None,
458 });
459 }
460 }
461 }
462
463 references
464}
465
466#[derive(Debug, Clone)]
467struct BlockRecord {
468 block_type: String,
469 id: Option<String>,
470 path: String,
471 meta: TomlValueObject,
472}
473
474fn collect_block_records(
475 children: &[HmdNode],
476 parent_path: Option<&str>,
477 records: &mut Vec<BlockRecord>,
478) {
479 for (index, node) in children.iter().enumerate() {
480 let Some(block) = node.as_block() else {
481 continue;
482 };
483
484 let child_path = match parent_path {
485 Some(parent_path) => format!("{parent_path}/children/{index}"),
486 None => format!("/children/{index}"),
487 };
488 let path = block
489 .id
490 .as_ref()
491 .map(|id| format!("/blocks/{id}"))
492 .unwrap_or(child_path);
493
494 records.push(BlockRecord {
495 block_type: block.block_type.clone(),
496 id: block.id.clone(),
497 path: path.clone(),
498 meta: block.meta.clone(),
499 });
500 collect_block_records(&block.children, Some(&path), records);
501 }
502}
503
504fn parse_toml_object(source: &str) -> Result<TomlValueObject, String> {
505 source
506 .parse::<DocumentMut>()
507 .map_err(|error| error.to_string())?;
508
509 let value = toml::from_str::<toml::Value>(source).map_err(|error| error.to_string())?;
510 match toml_value_to_json(value) {
511 Value::Object(object) => Ok(object),
512 _ => Ok(Map::new()),
513 }
514}
515
516fn toml_value_to_json(value: toml::Value) -> Value {
517 match value {
518 toml::Value::String(value) => Value::String(value),
519 toml::Value::Integer(value) => Value::Number(Number::from(value)),
520 toml::Value::Float(value) => Number::from_f64(value).map_or(Value::Null, Value::Number),
521 toml::Value::Boolean(value) => Value::Bool(value),
522 toml::Value::Datetime(value) => Value::String(value.to_string()),
523 toml::Value::Array(values) => {
524 Value::Array(values.into_iter().map(toml_value_to_json).collect())
525 }
526 toml::Value::Table(values) => {
527 let object = values
528 .into_iter()
529 .map(|(key, value)| (key, toml_value_to_json(value)))
530 .collect();
531 Value::Object(object)
532 }
533 }
534}
535
536fn string_field(meta: &TomlValueObject, field: &str) -> Option<String> {
537 meta.get(field)
538 .and_then(Value::as_str)
539 .map(ToString::to_string)
540}
541
542fn string_array_field(meta: &TomlValueObject, field: &str) -> Vec<String> {
543 meta.get(field)
544 .and_then(Value::as_array)
545 .map(|values| {
546 values
547 .iter()
548 .filter_map(Value::as_str)
549 .map(ToString::to_string)
550 .collect()
551 })
552 .unwrap_or_default()
553}
554
555#[derive(Debug, Clone)]
556struct Line<'a> {
557 raw: &'a str,
558 content: &'a str,
559 start: usize,
560 end: usize,
561 number: usize,
562}
563
564fn collect_lines(source: &str) -> Vec<Line<'_>> {
565 let mut lines = Vec::new();
566 let mut start = 0;
567 let mut number = 1;
568
569 for raw in source.split_inclusive('\n') {
570 let end = start + raw.len();
571 lines.push(Line {
572 raw,
573 content: strip_line_ending(raw),
574 start,
575 end,
576 number,
577 });
578 start = end;
579 number += 1;
580 }
581
582 if start < source.len() {
583 let raw = &source[start..];
584 lines.push(Line {
585 raw,
586 content: strip_line_ending(raw),
587 start,
588 end: source.len(),
589 number,
590 });
591 }
592
593 lines
594}
595
596fn strip_line_ending(line: &str) -> &str {
597 let without_lf = line.strip_suffix('\n').unwrap_or(line);
598 without_lf.strip_suffix('\r').unwrap_or(without_lf)
599}
600
601fn is_frontmatter_delimiter(line: &Line<'_>, allow_bom: bool) -> bool {
602 let content = if allow_bom {
603 line.content
604 .strip_prefix('\u{feff}')
605 .unwrap_or(line.content)
606 } else {
607 line.content
608 };
609 trim_horizontal(content) == "+++"
610}
611
612fn trim_horizontal(value: &str) -> &str {
613 value.trim_matches(|ch| ch == ' ' || ch == '\t')
614}
615
616fn line_span(line: &Line<'_>) -> SourceSpan {
617 SourceSpan {
618 start: line.start,
619 end: line.end,
620 start_line: line.number,
621 start_column: 1,
622 end_line: line.number,
623 end_column: line.content.chars().count() + 1,
624 }
625}
626
627#[cfg(test)]
628mod tests {
629 use super::*;
630 use std::fs;
631 use std::path::{Path, PathBuf};
632
633 #[test]
634 fn frontmatter_valid_todo_has_profile_metadata() {
635 let document = parse_fixture("fixtures/valid/todo-basic.hmd");
636
637 assert_eq!(document.hmd_version, "0.1");
638 assert_eq!(document.profile, "todo@0.1");
639 assert!(document.uses.is_empty());
640 assert!(!has_diagnostic(&document, "HMD001"));
641 assert!(!has_diagnostic(&document, "HMD002"));
642 assert!(!has_diagnostic(&document, "HMD007"));
643 }
644
645 #[test]
646 fn parse_todo_basic_matches_fixture() {
647 assert_parse_matches_fixture(
648 "fixtures/valid/todo-basic.hmd",
649 "fixtures/ir/todo-basic.json",
650 );
651 }
652
653 #[test]
654 fn parse_decision_basic_matches_fixture() {
655 assert_parse_matches_fixture(
656 "fixtures/valid/decision-basic.hmd",
657 "fixtures/ir/decision-basic.json",
658 );
659 }
660
661 #[test]
662 fn invalid_missing_frontmatter_reports_hmd001() {
663 assert_fixture_reports("fixtures/invalid/missing-frontmatter.hmd", "HMD001");
664 }
665
666 #[test]
667 fn invalid_toml_reports_hmd002() {
668 assert_fixture_reports("fixtures/invalid/invalid-toml.hmd", "HMD002");
669 }
670
671 #[test]
672 fn invalid_unclosed_block_reports_hmd003() {
673 assert_fixture_reports("fixtures/invalid/unclosed-block.hmd", "HMD003");
674 }
675
676 #[test]
677 fn invalid_block_type_reports_hmd004() {
678 assert_fixture_reports("fixtures/invalid/invalid-block-type.hmd", "HMD004");
679 }
680
681 #[test]
682 fn invalid_duplicate_id_reports_hmd006() {
683 assert_fixture_reports("fixtures/invalid/duplicate-id.hmd", "HMD006");
684 }
685
686 #[test]
687 fn invalid_unknown_profile_reports_hmd007() {
688 assert_fixture_reports("fixtures/invalid/unknown-profile.hmd", "HMD007");
689 }
690
691 fn assert_parse_matches_fixture(source_path: &str, expected_path: &str) {
692 let document = parse_fixture(source_path);
693 let actual = serde_json::to_value(document).expect("serializes parsed document");
694 let expected_source =
695 fs::read_to_string(repo_path(expected_path)).expect("reads expected fixture");
696 let expected: Value =
697 serde_json::from_str(&expected_source).expect("expected fixture is valid JSON");
698
699 assert_eq!(actual, expected);
700 }
701
702 fn assert_fixture_reports(path: &str, code: &str) {
703 let document = parse_fixture(path);
704 assert!(
705 has_diagnostic(&document, code),
706 "expected diagnostic {code}, got {:?}",
707 document.diagnostics
708 );
709 }
710
711 fn has_diagnostic(document: &HmdDocument, code: &str) -> bool {
712 document
713 .diagnostics
714 .iter()
715 .any(|diagnostic| diagnostic.code == code)
716 }
717
718 fn parse_fixture(path: &str) -> HmdDocument {
719 let source = fs::read_to_string(repo_path(path)).expect("reads source fixture");
720 parse_document(&source)
721 }
722
723 fn repo_path(path: impl AsRef<Path>) -> PathBuf {
724 Path::new(env!("CARGO_MANIFEST_DIR"))
725 .join("../..")
726 .join(path)
727 }
728}