use std::ops::Range;
use crate::highlighter::HighlightSpan;
#[derive(Clone, Debug)]
pub struct MarkerWord {
pub word: &'static str,
pub label_capture: &'static str,
pub tail_capture: &'static str,
}
pub fn default_markers() -> &'static [MarkerWord] {
&[
MarkerWord {
word: "TODO",
label_capture: "comment.marker.todo",
tail_capture: "comment.marker.tail.todo",
},
MarkerWord {
word: "FIXME",
label_capture: "comment.marker.fixme",
tail_capture: "comment.marker.tail.fixme",
},
MarkerWord {
word: "FIX",
label_capture: "comment.marker.fixme",
tail_capture: "comment.marker.tail.fixme",
},
MarkerWord {
word: "NOTE",
label_capture: "comment.marker.note",
tail_capture: "comment.marker.tail.note",
},
MarkerWord {
word: "INFO",
label_capture: "comment.marker.note",
tail_capture: "comment.marker.tail.note",
},
MarkerWord {
word: "WARN",
label_capture: "comment.marker.warn",
tail_capture: "comment.marker.tail.warn",
},
]
}
#[derive(Clone, Debug)]
pub struct CommentMarkerPass {
markers: Vec<MarkerWord>,
inheritance: bool,
}
impl CommentMarkerPass {
pub fn new() -> Self {
Self {
markers: default_markers().to_vec(),
inheritance: true,
}
}
pub fn with_markers(mut self, markers: Vec<MarkerWord>) -> Self {
self.markers = markers;
self
}
pub fn with_inheritance(mut self, on: bool) -> Self {
self.inheritance = on;
self
}
pub fn apply(&self, spans: &mut Vec<HighlightSpan>, bytes: &[u8]) {
let mut comments: Vec<Range<usize>> = spans
.iter()
.filter(|s| s.capture() == "comment" || s.capture().starts_with("comment."))
.map(|s| s.byte_range.clone())
.collect();
comments.sort_by_key(|r| r.start);
comments.dedup_by(|b, a| {
if b.start < a.end {
a.end = a.end.max(b.end);
true
} else {
false
}
});
if comments.is_empty() {
return;
}
let first_start = comments[0].start;
let mut active: Option<&MarkerWord> = if self.inheritance {
self.seed_active(bytes, first_start)
} else {
None
};
let mut extra: Vec<HighlightSpan> = Vec::new();
let mut prev_end: Option<usize> = None;
for comment_range in &comments {
let consecutive = if let Some(pe) = prev_end {
self.inheritance && is_consecutive(bytes, pe, comment_range.start)
} else {
false
};
if !consecutive {
active = None;
}
let body_start = delimiter_skip(bytes, comment_range.start);
let body_end = comment_range.end;
if body_start >= body_end {
prev_end = Some(comment_range.end);
continue;
}
let found = scan_markers(bytes, body_start, body_end, &self.markers);
if found.is_empty() {
if let Some(mw) = active {
extra.push(HighlightSpan {
byte_range: body_start..body_end,
capture: mw.tail_capture.to_string(),
});
}
prev_end = Some(comment_range.end);
continue;
}
let mut cursor = body_start;
for m in &found {
let label_start = m.word_start.saturating_sub(1).max(body_start);
if let Some(mw) = active
&& cursor < label_start
{
extra.push(HighlightSpan {
byte_range: cursor..label_start,
capture: mw.tail_capture.to_string(),
});
}
extra.push(HighlightSpan {
byte_range: label_start..m.word_end,
capture: m.marker.label_capture.to_string(),
});
let trail_end = if m.word_end < body_end {
m.word_end + 1
} else {
m.word_end
};
if trail_end > m.word_end {
extra.push(HighlightSpan {
byte_range: m.word_end..trail_end,
capture: m.marker.label_capture.to_string(),
});
}
cursor = trail_end;
active = Some(m.marker);
}
if let Some(mw) = active
&& cursor < body_end
{
extra.push(HighlightSpan {
byte_range: cursor..body_end,
capture: mw.tail_capture.to_string(),
});
}
prev_end = Some(comment_range.end);
}
spans.extend(extra);
}
fn seed_active<'m>(
&'m self,
bytes: &[u8],
first_comment_start: usize,
) -> Option<&'m MarkerWord> {
const CAP: usize = 500;
if first_comment_start == 0 {
return None;
}
let prefix = &bytes[..first_comment_start];
let newline_positions: Vec<usize> = prefix
.iter()
.enumerate()
.filter(|&(_, b)| *b == b'\n')
.map(|(i, _)| i)
.collect();
let start_nl = newline_positions.len().saturating_sub(CAP);
let line_starts: Vec<usize> = {
let mut v = vec![if start_nl == 0 {
0usize
} else {
newline_positions[start_nl - 1] + 1
}];
for &nl in &newline_positions[start_nl..] {
v.push(nl + 1);
}
v
};
let mut active: Option<&'m MarkerWord> = None;
for &ls in &line_starts {
let le = bytes[ls..]
.iter()
.position(|&b| b == b'\n')
.map(|p| ls + p)
.unwrap_or(first_comment_start);
let le = le.min(first_comment_start);
let line_bytes = &bytes[ls..le];
if let Some(del_off) = find_comment_delimiter(line_bytes) {
let body_start = ls + del_off + 2;
let body_end = le;
if body_start < body_end {
let found = scan_markers(bytes, body_start, body_end, &self.markers);
if let Some(last) = found.last() {
active = Some(last.marker);
}
}
} else {
active = None;
}
}
active
}
}
impl Default for CommentMarkerPass {
fn default() -> Self {
Self::new()
}
}
struct FoundMarker<'m> {
word_start: usize, word_end: usize,
marker: &'m MarkerWord,
}
fn scan_markers<'m>(
bytes: &[u8],
body_start: usize,
body_end: usize,
markers: &'m [MarkerWord],
) -> Vec<FoundMarker<'m>> {
let end = body_end.min(bytes.len());
if body_start >= end {
return Vec::new();
}
let body = &bytes[body_start..end];
let mut out: Vec<FoundMarker<'m>> = Vec::new();
for mw in markers {
let wbytes = mw.word.as_bytes();
let mut i = 0usize;
while i + wbytes.len() <= body.len() {
if &body[i..i + wbytes.len()] == wbytes {
let left_ok = i == 0 || !body[i - 1].is_ascii_alphanumeric();
let right_ok = body
.get(i + wbytes.len())
.map(|b| !b.is_ascii_alphanumeric())
.unwrap_or(true);
if left_ok && right_ok {
out.push(FoundMarker {
word_start: body_start + i,
word_end: body_start + i + wbytes.len(),
marker: mw,
});
i += wbytes.len();
continue;
}
}
i += 1;
}
}
out.sort_by_key(|m| m.word_start);
out
}
fn delimiter_skip(bytes: &[u8], pos: usize) -> usize {
if pos + 1 < bytes.len() {
let (a, b) = (bytes[pos], bytes[pos + 1]);
if (a == b'-' && b == b'-') || (a == b'/' && b == b'/') || (a == b'/' && b == b'*') {
return pos + 2;
}
}
if pos < bytes.len() && bytes[pos] == b'#' {
return pos + 1;
}
pos
}
fn find_comment_delimiter(line_bytes: &[u8]) -> Option<usize> {
let mut i = 0usize;
while i + 1 < line_bytes.len() {
let (a, b) = (line_bytes[i], line_bytes[i + 1]);
if (a == b'-' && b == b'-') || (a == b'/' && b == b'/') || (a == b'/' && b == b'*') {
return Some(i);
}
i += 1;
}
if line_bytes.contains(&b'#') {
return line_bytes.iter().position(|&b| b == b'#');
}
None
}
fn is_consecutive(bytes: &[u8], prev_end: usize, next_start: usize) -> bool {
if prev_end > next_start || next_start > bytes.len() {
return false;
}
let gap = &bytes[prev_end..next_start];
let mut newlines = 0usize;
for &b in gap {
match b {
b'\n' => {
newlines += 1;
if newlines > 1 {
return false;
}
}
b' ' | b'\t' | b'\r' => {}
_ => return false,
}
}
newlines == 1
}
#[cfg(test)]
mod tests {
use super::*;
use crate::Highlighter;
use crate::runtime::{Grammar, GrammarLoader, LangSpec, ManifestMeta, QuerySource};
use std::sync::{Arc, OnceLock};
const RUST_GIT: &str = "https://github.com/tree-sitter/tree-sitter-rust";
const RUST_REV: &str = "e86119bdb4968b9799f6a014ca2401c178d54b5f";
fn rust_grammar() -> Arc<Grammar> {
static G: OnceLock<Arc<Grammar>> = OnceLock::new();
G.get_or_init(|| {
let meta = ManifestMeta {
helix_repo: "https://github.com/helix-editor/helix".into(),
helix_rev: "87d5c05c4432a079d3b7aaa10cda1cfe1803c18c".into(),
nvim_treesitter_repo: "https://github.com/nvim-treesitter/nvim-treesitter".into(),
nvim_treesitter_rev: "cf12346a3414fa1b06af75c79faebe7f76df080a".into(),
};
let loader = GrammarLoader::user_default(&meta).expect("XDG paths");
let spec = LangSpec {
git_url: RUST_GIT.into(),
git_rev: RUST_REV.into(),
subpath: None,
extensions: vec!["rs".into()],
c_files: vec!["src/parser.c".into(), "src/scanner.c".into()],
query_source: QuerySource::Helix,
query_subdir: None,
source: None,
};
Arc::new(Grammar::load("rust", &spec, &loader, &meta).expect("rust grammar"))
})
.clone()
}
fn rust_comment_spans(src: &[u8]) -> Vec<HighlightSpan> {
let mut h = Highlighter::new(rust_grammar()).unwrap();
h.parse_initial(src);
h.highlight_range(src, 0..src.len())
}
fn marker_spans(src: &[u8]) -> Vec<HighlightSpan> {
let mut spans = rust_comment_spans(src);
let pass = CommentMarkerPass::new();
pass.apply(&mut spans, src);
spans
.into_iter()
.filter(|s| s.capture().starts_with("comment.marker"))
.collect()
}
#[test]
#[ignore = "network + compiler: clones tree-sitter-rust then builds it"]
fn single_line_todo_emits_label_and_tail() {
let src = b"// TODO: refactor";
let ms = marker_spans(src);
assert!(
ms.iter().any(|s| s.capture() == "comment.marker.todo"),
"expected label span; got {ms:#?}"
);
assert!(
ms.iter().any(|s| s.capture() == "comment.marker.tail.todo"),
"expected tail span; got {ms:#?}"
);
}
#[test]
#[ignore = "network + compiler: clones tree-sitter-rust then builds it"]
fn multi_line_block_todo_spans_full_body() {
let src = b"/* TODO: long\nexplanation */";
let ms = marker_spans(src);
assert!(
ms.iter().any(|s| s.capture() == "comment.marker.todo"),
"expected label; got {ms:#?}"
);
assert!(
ms.iter().any(|s| s.capture() == "comment.marker.tail.todo"),
"expected tail; got {ms:#?}"
);
}
#[test]
#[ignore = "network + compiler: clones tree-sitter-rust then builds it"]
fn consecutive_single_line_inheritance() {
let src = b"// TODO foo\n// continuation";
let ms = marker_spans(src);
let has_inherited_tail = ms
.iter()
.any(|s| s.capture() == "comment.marker.tail.todo" && s.byte_range.start >= 12);
assert!(
has_inherited_tail,
"expected inherited tail on second line; got {ms:#?}"
);
}
#[test]
#[ignore = "network + compiler: clones tree-sitter-rust then builds it"]
fn inheritance_breaks_on_blank_line() {
let src = b"// TODO foo\n\n// unrelated";
let ms = marker_spans(src);
let leaked = ms
.iter()
.any(|s| s.capture() == "comment.marker.tail.todo" && s.byte_range.start >= 13);
assert!(!leaked, "blank line should break inheritance; got {ms:#?}");
}
#[test]
#[ignore = "network + compiler: clones tree-sitter-rust then builds it"]
fn inheritance_breaks_on_non_comment_line() {
let src = b"// TODO\n let x = 1;\n// next";
let ms = marker_spans(src);
let last_comment_byte = src.iter().rposition(|&b| b == b'/').unwrap_or(0) - 1;
let inherited = ms.iter().any(|s| {
s.capture() == "comment.marker.tail.todo" && s.byte_range.start > last_comment_byte
});
assert!(
!inherited,
"expected no inherited tail on '// next'; got {ms:#?}"
);
}
#[test]
#[ignore = "network + compiler: clones tree-sitter-rust then builds it"]
fn inheritance_off_does_not_carry() {
let src = b"// TODO foo\n// continuation";
let mut spans = rust_comment_spans(src);
let pass = CommentMarkerPass::new().with_inheritance(false);
pass.apply(&mut spans, src);
let ms: Vec<_> = spans
.into_iter()
.filter(|s| s.capture().starts_with("comment.marker"))
.collect();
let has_second_line_marker = ms.iter().any(|s| s.byte_range.start >= 12);
assert!(
!has_second_line_marker,
"expected no spans on second line with inheritance off; got {ms:#?}"
);
}
#[test]
#[ignore = "network + compiler: clones tree-sitter-rust then builds it"]
fn marker_word_boundary_no_match() {
let src = b"// TODOlist\n// XTODO";
let ms = marker_spans(src);
assert!(
ms.is_empty(),
"expected no marker spans for non-boundary words; got {ms:#?}"
);
}
#[test]
#[ignore = "network + compiler: clones tree-sitter-rust then builds it"]
fn multiple_markers_one_comment() {
let src = b"// TODO foo FIXME bar";
let ms = marker_spans(src);
let has_todo = ms.iter().any(|s| s.capture() == "comment.marker.todo");
let has_fixme = ms.iter().any(|s| s.capture() == "comment.marker.fixme");
assert!(has_todo, "expected todo label; got {ms:#?}");
assert!(has_fixme, "expected fixme label; got {ms:#?}");
}
#[test]
#[ignore = "network + compiler: clones tree-sitter-rust then builds it"]
fn fixme_marker_emits_correct_capture() {
let src = b"// FIXME: broken";
let ms = marker_spans(src);
assert!(
ms.iter().any(|s| s.capture() == "comment.marker.fixme"),
"expected fixme label; got {ms:#?}"
);
assert!(
ms.iter()
.any(|s| s.capture() == "comment.marker.tail.fixme"),
"expected fixme tail; got {ms:#?}"
);
}
#[test]
#[ignore = "network + compiler: clones tree-sitter-rust then builds it"]
fn fix_marker_uses_fixme_capture() {
let src = b"// FIX: broken";
let ms = marker_spans(src);
assert!(
ms.iter().any(|s| s.capture() == "comment.marker.fixme"),
"FIX should map to comment.marker.fixme; got {ms:#?}"
);
}
#[test]
#[ignore = "network + compiler: clones tree-sitter-rust then builds it"]
fn note_and_info_use_note_capture() {
for word in [b"NOTE" as &[u8], b"INFO"] {
let src = [b"// ".as_ref(), word, b": context"].concat();
let ms = marker_spans(&src);
assert!(
ms.iter().any(|s| s.capture() == "comment.marker.note"),
"{} should map to comment.marker.note; got {ms:#?}",
std::str::from_utf8(word).unwrap()
);
}
}
#[test]
#[ignore = "network + compiler: clones tree-sitter-rust then builds it"]
fn warn_marker_emits_correct_capture() {
let src = b"// WARN: danger";
let ms = marker_spans(src);
assert!(
ms.iter().any(|s| s.capture() == "comment.marker.warn"),
"expected warn label; got {ms:#?}"
);
}
#[test]
#[ignore = "network + compiler: clones tree-sitter-rust then builds it"]
fn apply_is_idempotent_on_no_comments() {
let src = b"fn main() {}";
let mut spans = rust_comment_spans(src);
let before = spans.len();
let pass = CommentMarkerPass::new();
pass.apply(&mut spans, src);
let after = spans.len();
assert_eq!(before, after, "no-comment source should not grow spans");
}
#[test]
#[ignore = "network + compiler: clones tree-sitter-rust then builds it"]
fn default_pass_is_same_as_new() {
let a = CommentMarkerPass::new();
let b = CommentMarkerPass::default();
assert_eq!(a.inheritance, b.inheritance);
assert_eq!(a.markers.len(), b.markers.len());
}
#[test]
#[ignore = "network + compiler: clones tree-sitter-rust then builds it"]
fn scan_markers_word_boundary_left() {
let bytes = b"// XTODO";
let markers = default_markers();
let found = scan_markers(bytes, 3, bytes.len(), markers);
assert!(
found.is_empty(),
"XTODO should not match; got {found:?}",
found = found.iter().map(|m| m.marker.word).collect::<Vec<_>>()
);
}
#[test]
#[ignore = "network + compiler: clones tree-sitter-rust then builds it"]
fn scan_markers_word_boundary_right() {
let bytes = b"// TODOlist";
let markers = default_markers();
let found = scan_markers(bytes, 3, bytes.len(), markers);
assert!(found.is_empty(), "TODOlist should not match");
}
#[test]
#[ignore = "network + compiler: clones tree-sitter-rust then builds it"]
fn is_consecutive_whitespace_only() {
let bytes = b"// a\n// b";
assert!(is_consecutive(bytes, 4, 5));
}
#[test]
#[ignore = "network + compiler: clones tree-sitter-rust then builds it"]
fn is_consecutive_non_whitespace_between() {
let bytes = b"// a\nlet x=1;\n// b";
assert!(!is_consecutive(bytes, 4, 14));
}
}