use hmd_core::{
Diagnostic, DuplicateIdRecord, FenceInfo, HmdBlock, HmdDocument, HmdNode, IdRecord,
MarkdownNode, RefRecord, ReferenceIndex, SourceSpan, TomlValueObject,
};
use serde_json::{Map, Number, Value};
use std::collections::BTreeMap;
use toml_edit::DocumentMut;
const BUILTIN_PROFILES: &[&str] = &["general@0.1", "decision@0.1", "progress@0.1", "todo@0.1"];
pub fn parse_document(source: &str) -> HmdDocument {
let lines = collect_lines(source);
let mut diagnostics = Vec::new();
let frontmatter = parse_frontmatter(source, &lines, &mut diagnostics);
let mut parser = BlockParser {
source,
lines: &lines,
diagnostics: &mut diagnostics,
};
let mut index = frontmatter.body_start_line_index;
let parsed = parser.parse_nodes(&mut index, None);
let references = build_reference_index(&parsed.children, &mut diagnostics);
let mut document = HmdDocument::new(frontmatter.hmd_version, frontmatter.profile);
document.uses = frontmatter.uses;
document.meta = frontmatter.meta;
document.children = parsed.children;
document.references = references;
document.diagnostics = diagnostics;
document
}
#[derive(Debug)]
struct Frontmatter {
hmd_version: String,
profile: String,
uses: Vec<String>,
meta: TomlValueObject,
body_start_line_index: usize,
}
fn parse_frontmatter(
source: &str,
lines: &[Line<'_>],
diagnostics: &mut Vec<Diagnostic>,
) -> Frontmatter {
let mut frontmatter = Frontmatter {
hmd_version: String::new(),
profile: String::new(),
uses: Vec::new(),
meta: Map::new(),
body_start_line_index: 0,
};
let Some(first_line) = lines.first() else {
diagnostics.push(Diagnostic::error("HMD001", "missing document frontmatter"));
return frontmatter;
};
if !is_frontmatter_delimiter(first_line, true) {
diagnostics.push(
Diagnostic::error("HMD001", "missing document frontmatter")
.with_source(line_span(first_line)),
);
return frontmatter;
}
let closing_index = lines
.iter()
.enumerate()
.skip(1)
.find_map(|(index, line)| is_frontmatter_delimiter(line, false).then_some(index));
let Some(closing_index) = closing_index else {
diagnostics.push(
Diagnostic::error("HMD002", "unterminated document frontmatter")
.with_source(line_span(first_line)),
);
frontmatter.body_start_line_index = lines.len();
return frontmatter;
};
let toml_start = lines
.get(1)
.map(|line| line.start)
.unwrap_or(first_line.end);
let toml_end = lines[closing_index].start;
let toml_source = &source[toml_start..toml_end];
match parse_toml_object(toml_source) {
Ok(meta) => {
frontmatter.hmd_version = string_field(&meta, "hmd").unwrap_or_default();
frontmatter.profile = string_field(&meta, "profile").unwrap_or_default();
frontmatter.uses = string_array_field(&meta, "uses");
frontmatter.meta = meta;
}
Err(message) => diagnostics.push(
Diagnostic::error(
"HMD002",
format!("invalid document frontmatter TOML: {message}"),
)
.with_source(line_span(first_line)),
),
}
if !frontmatter.profile.is_empty() && !BUILTIN_PROFILES.contains(&frontmatter.profile.as_str())
{
diagnostics.push(
Diagnostic::error(
"HMD007",
format!("unknown profile '{}'", frontmatter.profile),
)
.with_source(line_span(first_line)),
);
}
for profile in &frontmatter.uses {
if !BUILTIN_PROFILES.contains(&profile.as_str()) {
diagnostics.push(
Diagnostic::error("HMD007", format!("unknown profile '{profile}'"))
.with_source(line_span(first_line)),
);
}
}
frontmatter.body_start_line_index = closing_index + 1;
frontmatter
}
struct BlockParser<'a, 'd> {
source: &'a str,
lines: &'a [Line<'a>],
diagnostics: &'d mut Vec<Diagnostic>,
}
#[derive(Debug)]
struct ParsedNodes {
children: Vec<HmdNode>,
close_line: Option<usize>,
}
impl BlockParser<'_, '_> {
fn parse_nodes(&mut self, index: &mut usize, close_fence_length: Option<usize>) -> ParsedNodes {
let mut children = Vec::new();
let mut markdown = String::new();
while *index < self.lines.len() {
let line = &self.lines[*index];
if let Some(close_length) = close_fence_length {
if is_closer(line, close_length) {
flush_markdown(&mut markdown, &mut children, true);
let close_line = line.number;
*index += 1;
return ParsedNodes {
children,
close_line: Some(close_line),
};
}
}
if let Some(opener) = parse_opener(line) {
flush_markdown(&mut markdown, &mut children, false);
let block = self.parse_block(index, opener);
children.push(HmdNode::HmdBlock(Box::new(block)));
continue;
}
markdown.push_str(line.raw);
*index += 1;
}
flush_markdown(&mut markdown, &mut children, false);
ParsedNodes {
children,
close_line: None,
}
}
fn parse_block(&mut self, index: &mut usize, opener: Opener) -> HmdBlock {
let opener_line = &self.lines[*index];
if !is_valid_block_type(&opener.block_type) {
self.diagnostics.push(
Diagnostic::error(
"HMD004",
format!("invalid block type '{}'", opener.block_type),
)
.with_source(line_span(opener_line)),
);
}
*index += 1;
let meta = if self
.lines
.get(*index)
.is_some_and(|line| is_frontmatter_delimiter(line, false))
{
self.parse_block_meta(index)
} else {
Map::new()
};
let parsed = self.parse_nodes(index, Some(opener.fence_length));
if parsed.close_line.is_none() {
self.diagnostics.push(
Diagnostic::error(
"HMD003",
format!("unclosed semantic block '{}'", opener.block_type),
)
.with_source(line_span(opener_line)),
);
}
let id = string_field(&meta, "id");
HmdBlock {
block_type: opener.block_type,
id,
meta,
children: parsed.children,
body: None,
source: None,
diagnostics: None,
fence: Some(FenceInfo {
fence_char: ':',
length: opener.fence_length,
open_line: opener_line.number,
close_line: parsed.close_line,
open_span: None,
close_span: None,
}),
profile: None,
validation: None,
}
}
fn parse_block_meta(&mut self, index: &mut usize) -> TomlValueObject {
let open_line = &self.lines[*index];
*index += 1;
let toml_start = self
.lines
.get(*index)
.map(|line| line.start)
.unwrap_or(open_line.end);
let closing_index =
self.lines
.iter()
.enumerate()
.skip(*index)
.find_map(|(candidate, line)| {
is_frontmatter_delimiter(line, false).then_some(candidate)
});
let Some(closing_index) = closing_index else {
self.diagnostics.push(
Diagnostic::error("HMD002", "unterminated block metadata")
.with_source(line_span(open_line)),
);
*index = self.lines.len();
return Map::new();
};
let toml_source = &self.source[toml_start..self.lines[closing_index].start];
*index = closing_index + 1;
match parse_toml_object(toml_source) {
Ok(meta) => meta,
Err(message) => {
self.diagnostics.push(
Diagnostic::error("HMD002", format!("invalid block metadata TOML: {message}"))
.with_source(line_span(open_line)),
);
Map::new()
}
}
}
}
fn flush_markdown(markdown: &mut String, children: &mut Vec<HmdNode>, trim_trailing_blank: bool) {
let text = trim_markdown_slice(markdown, trim_trailing_blank);
markdown.clear();
if text.trim().is_empty() {
return;
}
children.push(HmdNode::Markdown(MarkdownNode::source_slice(text)));
}
fn trim_markdown_slice(text: &str, trim_trailing_blank: bool) -> String {
let parts = split_preserving_newlines(text);
let mut start = 0;
let mut end = parts.len();
while start < end && is_blank_markdown_line(parts[start]) {
start += 1;
}
if trim_trailing_blank {
while end > start && is_blank_markdown_line(parts[end - 1]) {
end -= 1;
}
}
parts[start..end].concat()
}
fn split_preserving_newlines(text: &str) -> Vec<&str> {
if text.is_empty() {
return Vec::new();
}
let mut parts = Vec::new();
let mut start = 0;
for (index, byte) in text.bytes().enumerate() {
if byte == b'\n' {
parts.push(&text[start..=index]);
start = index + 1;
}
}
if start < text.len() {
parts.push(&text[start..]);
}
parts
}
fn is_blank_markdown_line(line: &str) -> bool {
let without_lf = line.strip_suffix('\n').unwrap_or(line);
let without_crlf = without_lf.strip_suffix('\r').unwrap_or(without_lf);
trim_horizontal(without_crlf).is_empty()
}
#[derive(Debug, Clone)]
struct Opener {
fence_length: usize,
block_type: String,
}
fn parse_opener(line: &Line<'_>) -> Option<Opener> {
let fence_length = line
.content
.bytes()
.take_while(|byte| *byte == b':')
.count();
if fence_length < 3 {
return None;
}
let rest = &line.content[fence_length..];
let block_type = trim_horizontal(rest);
if block_type.is_empty() {
return None;
}
Some(Opener {
fence_length,
block_type: block_type.to_string(),
})
}
fn is_closer(line: &Line<'_>, opener_length: usize) -> bool {
let trimmed = trim_horizontal(line.content);
let colon_count = trimmed.bytes().take_while(|byte| *byte == b':').count();
colon_count >= opener_length && colon_count == trimmed.len()
}
fn is_valid_block_type(block_type: &str) -> bool {
let mut parts = block_type.split('.');
let Some(first) = parts.next() else {
return false;
};
if first.is_empty() || !is_valid_identifier_segment(first) {
return false;
}
parts.all(|part| !part.is_empty() && is_valid_identifier_segment(part))
}
fn is_valid_identifier_segment(segment: &str) -> bool {
let mut chars = segment.chars();
let Some(first) = chars.next() else {
return false;
};
if !first.is_ascii_alphabetic() {
return false;
}
chars.all(|ch| ch.is_ascii_alphanumeric() || ch == '_' || ch == '-')
}
fn build_reference_index(
children: &[HmdNode],
diagnostics: &mut Vec<Diagnostic>,
) -> ReferenceIndex {
let mut records = Vec::new();
collect_block_records(children, None, &mut records);
let mut by_id: BTreeMap<String, Vec<BlockRecord>> = BTreeMap::new();
for record in records.iter().filter(|record| record.id.is_some()) {
by_id
.entry(record.id.clone().expect("filtered by id"))
.or_default()
.push(record.clone());
}
let mut references = ReferenceIndex::default();
for (id, matching_records) in by_id {
if matching_records.len() == 1 {
let record = &matching_records[0];
references.ids.insert(
id.clone(),
IdRecord {
id,
path: record.path.clone(),
block_type: Some(record.block_type.clone()),
source: None,
},
);
} else {
let paths = matching_records
.iter()
.map(|record| record.path.clone())
.collect::<Vec<_>>();
references.duplicates.push(DuplicateIdRecord {
id: id.clone(),
paths,
});
diagnostics.push(
Diagnostic::error("HMD006", format!("duplicate block id '{id}'"))
.with_path(format!("/blocks/{id}")),
);
}
}
for record in &records {
if record.block_type == "recommendation" {
if let Some(target) = string_field(&record.meta, "option") {
let target_record = references.ids.get(&target);
let resolved = target_record
.and_then(|id_record| id_record.block_type.as_deref())
.is_some_and(|block_type| block_type == "option");
references.refs.push(RefRecord {
from: record.path.clone(),
field: "option".to_string(),
target,
resolved: Some(resolved),
target_path: target_record
.filter(|_| resolved)
.map(|id_record| id_record.path.clone()),
source: None,
});
}
}
}
references
}
#[derive(Debug, Clone)]
struct BlockRecord {
block_type: String,
id: Option<String>,
path: String,
meta: TomlValueObject,
}
fn collect_block_records(
children: &[HmdNode],
parent_path: Option<&str>,
records: &mut Vec<BlockRecord>,
) {
for (index, node) in children.iter().enumerate() {
let Some(block) = node.as_block() else {
continue;
};
let child_path = match parent_path {
Some(parent_path) => format!("{parent_path}/children/{index}"),
None => format!("/children/{index}"),
};
let path = block
.id
.as_ref()
.map(|id| format!("/blocks/{id}"))
.unwrap_or(child_path);
records.push(BlockRecord {
block_type: block.block_type.clone(),
id: block.id.clone(),
path: path.clone(),
meta: block.meta.clone(),
});
collect_block_records(&block.children, Some(&path), records);
}
}
fn parse_toml_object(source: &str) -> Result<TomlValueObject, String> {
source
.parse::<DocumentMut>()
.map_err(|error| error.to_string())?;
let value = toml::from_str::<toml::Value>(source).map_err(|error| error.to_string())?;
match toml_value_to_json(value) {
Value::Object(object) => Ok(object),
_ => Ok(Map::new()),
}
}
fn toml_value_to_json(value: toml::Value) -> Value {
match value {
toml::Value::String(value) => Value::String(value),
toml::Value::Integer(value) => Value::Number(Number::from(value)),
toml::Value::Float(value) => Number::from_f64(value).map_or(Value::Null, Value::Number),
toml::Value::Boolean(value) => Value::Bool(value),
toml::Value::Datetime(value) => Value::String(value.to_string()),
toml::Value::Array(values) => {
Value::Array(values.into_iter().map(toml_value_to_json).collect())
}
toml::Value::Table(values) => {
let object = values
.into_iter()
.map(|(key, value)| (key, toml_value_to_json(value)))
.collect();
Value::Object(object)
}
}
}
fn string_field(meta: &TomlValueObject, field: &str) -> Option<String> {
meta.get(field)
.and_then(Value::as_str)
.map(ToString::to_string)
}
fn string_array_field(meta: &TomlValueObject, field: &str) -> Vec<String> {
meta.get(field)
.and_then(Value::as_array)
.map(|values| {
values
.iter()
.filter_map(Value::as_str)
.map(ToString::to_string)
.collect()
})
.unwrap_or_default()
}
#[derive(Debug, Clone)]
struct Line<'a> {
raw: &'a str,
content: &'a str,
start: usize,
end: usize,
number: usize,
}
fn collect_lines(source: &str) -> Vec<Line<'_>> {
let mut lines = Vec::new();
let mut start = 0;
let mut number = 1;
for raw in source.split_inclusive('\n') {
let end = start + raw.len();
lines.push(Line {
raw,
content: strip_line_ending(raw),
start,
end,
number,
});
start = end;
number += 1;
}
if start < source.len() {
let raw = &source[start..];
lines.push(Line {
raw,
content: strip_line_ending(raw),
start,
end: source.len(),
number,
});
}
lines
}
fn strip_line_ending(line: &str) -> &str {
let without_lf = line.strip_suffix('\n').unwrap_or(line);
without_lf.strip_suffix('\r').unwrap_or(without_lf)
}
fn is_frontmatter_delimiter(line: &Line<'_>, allow_bom: bool) -> bool {
let content = if allow_bom {
line.content
.strip_prefix('\u{feff}')
.unwrap_or(line.content)
} else {
line.content
};
trim_horizontal(content) == "+++"
}
fn trim_horizontal(value: &str) -> &str {
value.trim_matches(|ch| ch == ' ' || ch == '\t')
}
fn line_span(line: &Line<'_>) -> SourceSpan {
SourceSpan {
start: line.start,
end: line.end,
start_line: line.number,
start_column: 1,
end_line: line.number,
end_column: line.content.chars().count() + 1,
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use std::path::{Path, PathBuf};
#[test]
fn frontmatter_valid_todo_has_profile_metadata() {
let document = parse_fixture("fixtures/valid/todo-basic.hmd");
assert_eq!(document.hmd_version, "0.1");
assert_eq!(document.profile, "todo@0.1");
assert!(document.uses.is_empty());
assert!(!has_diagnostic(&document, "HMD001"));
assert!(!has_diagnostic(&document, "HMD002"));
assert!(!has_diagnostic(&document, "HMD007"));
}
#[test]
fn parse_todo_basic_matches_fixture() {
assert_parse_matches_fixture(
"fixtures/valid/todo-basic.hmd",
"fixtures/ir/todo-basic.json",
);
}
#[test]
fn parse_decision_basic_matches_fixture() {
assert_parse_matches_fixture(
"fixtures/valid/decision-basic.hmd",
"fixtures/ir/decision-basic.json",
);
}
#[test]
fn invalid_missing_frontmatter_reports_hmd001() {
assert_fixture_reports("fixtures/invalid/missing-frontmatter.hmd", "HMD001");
}
#[test]
fn invalid_toml_reports_hmd002() {
assert_fixture_reports("fixtures/invalid/invalid-toml.hmd", "HMD002");
}
#[test]
fn invalid_unclosed_block_reports_hmd003() {
assert_fixture_reports("fixtures/invalid/unclosed-block.hmd", "HMD003");
}
#[test]
fn invalid_block_type_reports_hmd004() {
assert_fixture_reports("fixtures/invalid/invalid-block-type.hmd", "HMD004");
}
#[test]
fn invalid_duplicate_id_reports_hmd006() {
assert_fixture_reports("fixtures/invalid/duplicate-id.hmd", "HMD006");
}
#[test]
fn invalid_unknown_profile_reports_hmd007() {
assert_fixture_reports("fixtures/invalid/unknown-profile.hmd", "HMD007");
}
fn assert_parse_matches_fixture(source_path: &str, expected_path: &str) {
let document = parse_fixture(source_path);
let actual = serde_json::to_value(document).expect("serializes parsed document");
let expected_source =
fs::read_to_string(repo_path(expected_path)).expect("reads expected fixture");
let expected: Value =
serde_json::from_str(&expected_source).expect("expected fixture is valid JSON");
assert_eq!(actual, expected);
}
fn assert_fixture_reports(path: &str, code: &str) {
let document = parse_fixture(path);
assert!(
has_diagnostic(&document, code),
"expected diagnostic {code}, got {:?}",
document.diagnostics
);
}
fn has_diagnostic(document: &HmdDocument, code: &str) -> bool {
document
.diagnostics
.iter()
.any(|diagnostic| diagnostic.code == code)
}
fn parse_fixture(path: &str) -> HmdDocument {
let source = fs::read_to_string(repo_path(path)).expect("reads source fixture");
parse_document(&source)
}
fn repo_path(path: impl AsRef<Path>) -> PathBuf {
Path::new(env!("CARGO_MANIFEST_DIR"))
.join("../..")
.join(path)
}
}