use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
use tracey_core::{RuleId, parse_rule_id};
#[derive(Debug, Clone)]
pub struct Marker {
pub line_span: std::ops::Range<usize>,
pub prefix: String,
pub id: RuleId,
pub blockquote: bool,
}
pub fn find_markers(content: &str) -> Vec<Marker> {
let code_mask = code_mask(content);
let mut out = Vec::new();
let mut offset = 0;
for line in content.split_inclusive('\n') {
let line_start = offset;
offset += line.len();
if *code_mask.get(line_start).unwrap_or(&false) {
continue;
}
let trimmed = line.trim_end_matches(['\n', '\r']);
let (body, blockquote) = if let Some(rest) = trimmed.strip_prefix('>') {
(rest.strip_prefix(' ').unwrap_or(rest), true)
} else {
(trimmed, false)
};
let Some((prefix, inner)) = parse_leading_marker(body) else {
continue;
};
let id_part = inner
.split(|c: char| c.is_ascii_whitespace())
.next()
.unwrap_or(inner);
let Some(id) = parse_rule_id(id_part) else {
continue;
};
out.push(Marker {
line_span: line_start..offset,
prefix: prefix.to_owned(),
id,
blockquote,
});
}
out
}
fn parse_leading_marker(text: &str) -> Option<(&str, &str)> {
let prefix_len = text
.bytes()
.take_while(|b| b.is_ascii_lowercase() || b.is_ascii_digit())
.count();
if prefix_len == 0 || text.as_bytes().get(prefix_len) != Some(&b'[') {
return None;
}
let close = text.find(']')?;
if close <= prefix_len + 1 {
return None; }
if !text[close + 1..].trim().is_empty() {
return None;
}
Some((&text[..prefix_len], &text[prefix_len + 1..close]))
}
fn code_mask(content: &str) -> Vec<bool> {
let mut mask = vec![false; content.len()];
let mut depth = 0usize;
let parser = Parser::new_ext(content, Options::all()).into_offset_iter();
for (event, range) in parser {
match event {
Event::Start(Tag::CodeBlock(_) | Tag::HtmlBlock) => {
depth += 1;
fill(&mut mask, range);
}
Event::End(TagEnd::CodeBlock | TagEnd::HtmlBlock) => {
depth = depth.saturating_sub(1);
fill(&mut mask, range);
}
Event::Code(_) | Event::Html(_) | Event::InlineHtml(_) => fill(&mut mask, range),
_ if depth > 0 => fill(&mut mask, range),
_ => {}
}
}
mask
}
fn fill(mask: &mut [bool], range: std::ops::Range<usize>) {
if let Some(slice) = mask.get_mut(range) {
slice.fill(true);
}
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
#[test]
fn standalone_column_zero() {
let md = "r[obs.log.batch]\nLog lines are batched...\n";
let markers = find_markers(md);
assert_eq!(markers.len(), 1);
assert_eq!(markers[0].prefix, "r");
assert_eq!(markers[0].id.base, "obs.log.batch");
assert_eq!(markers[0].id.version, 1);
assert!(!markers[0].blockquote);
assert_eq!(&md[markers[0].line_span.clone()], "r[obs.log.batch]\n");
}
#[test]
fn indented_is_not_a_definition() {
assert!(find_markers(" r[obs.log.batch]\n").is_empty());
}
#[test]
fn blockquote_form() {
let markers = find_markers("> r[api.error-format]\n> API errors...\n");
assert_eq!(markers.len(), 1);
assert_eq!(markers[0].id.base, "api.error-format");
assert!(markers[0].blockquote);
}
#[test]
fn blockquote_no_space_after_gt() {
let markers = find_markers(">r[x.y]\n");
assert_eq!(markers.len(), 1);
assert!(markers[0].blockquote);
}
#[test]
fn inline_mention_is_ignored() {
assert!(find_markers("When implementing r[auth.login] you should...\n").is_empty());
}
#[test]
fn trailing_text_after_bracket_is_ignored() {
assert!(find_markers("r[foo.bar] extra\n").is_empty());
}
#[test]
fn inside_fenced_code_is_ignored() {
let md = "```markdown\nr[foo.bar]\n```\n";
assert!(find_markers(md).is_empty());
}
#[test]
fn inside_inline_code_is_ignored() {
assert!(find_markers("`r[foo.bar]`\n").is_empty());
}
#[test]
fn inside_html_block_is_ignored() {
assert!(find_markers("<pre>\nr[foo.bar]\n</pre>\n").is_empty());
assert!(find_markers("<div>\nr[foo.bar]\n</div>\n").is_empty());
assert!(find_markers("<!--\nr[foo.bar]\n-->\n").is_empty());
assert!(find_markers("<script>\nr[foo.bar]\n</script>\n").is_empty());
}
#[test]
fn version_suffix() {
let markers = find_markers("r[auth.login+3]\n");
assert_eq!(markers.len(), 1);
assert_eq!(markers[0].id.base, "auth.login");
assert_eq!(markers[0].id.version, 3);
}
#[test]
fn attributes_dont_break_id_parse() {
let markers = find_markers("r[foo.bar status=draft level=must]\n");
assert_eq!(markers.len(), 1);
assert_eq!(markers[0].id.base, "foo.bar");
}
#[test]
fn tab_separated_attributes() {
let markers = find_markers("r[foo.bar\tstatus=draft]\n");
assert_eq!(markers.len(), 1);
assert_eq!(markers[0].id.base, "foo.bar");
}
#[test]
fn alternate_prefix() {
let markers = find_markers("h2[stream.window]\n");
assert_eq!(markers.len(), 1);
assert_eq!(markers[0].prefix, "h2");
}
#[test]
fn malformed_version_left_alone() {
assert!(find_markers("r[foo.bar+]\n").is_empty());
assert!(find_markers("r[foo.bar+0]\n").is_empty());
assert!(find_markers("r[foo.bar+1+2]\n").is_empty());
}
#[test]
fn multiple_markers() {
let md = "\
# Heading\n\
\n\
r[first.req]\n\
Text.\n\
\n\
r[second.req]\n\
More text.\n\
";
let markers = find_markers(md);
assert_eq!(markers.len(), 2);
assert_eq!(markers[0].id.base, "first.req");
assert_eq!(markers[1].id.base, "second.req");
}
#[test]
fn final_line_without_newline() {
let markers = find_markers("r[end.of.file]");
assert_eq!(markers.len(), 1);
assert_eq!(markers[0].line_span, 0..14);
}
}