use super::range::{Position, Range};
use super::text_content::TextContent;
use crate::lex::inlines::{InlineNode, ReferenceInline};
pub trait InlinePositionVisitor {
fn visit_plain(&mut self, _range: &Range, _text: &str) {}
fn enter_strong(&mut self, _open_marker: &Range) {}
fn leave_strong(&mut self, _content: &Range, _close_marker: &Range) {}
fn enter_emphasis(&mut self, _open_marker: &Range) {}
fn leave_emphasis(&mut self, _content: &Range, _close_marker: &Range) {}
fn visit_code(
&mut self,
_open_marker: &Range,
_content: &Range,
_close_marker: &Range,
_text: &str,
) {
}
fn visit_math(
&mut self,
_open_marker: &Range,
_content: &Range,
_close_marker: &Range,
_text: &str,
) {
}
fn visit_reference(
&mut self,
_open_marker: &Range,
_content: &Range,
_close_marker: &Range,
_data: &ReferenceInline,
) {
}
}
pub fn walk_text_content_positions<V: InlinePositionVisitor>(text: &TextContent, visitor: &mut V) {
let Some(base_range) = text.location.as_ref() else {
return;
};
let raw = text.as_string();
if raw.is_empty() {
return;
}
let owned;
let nodes: &[InlineNode] = match text.inlines() {
Some(borrowed) => borrowed,
None => {
owned = text.inline_items();
&owned
}
};
let mut walker = InlinePositionWalker {
raw,
base_range,
cursor: 0,
};
walker.walk_nodes(nodes, visitor);
}
struct InlinePositionWalker<'a> {
raw: &'a str,
base_range: &'a Range,
cursor: usize,
}
impl<'a> InlinePositionWalker<'a> {
fn walk_nodes<V: InlinePositionVisitor>(&mut self, nodes: &[InlineNode], v: &mut V) {
for node in nodes {
self.walk_node(node, v);
}
}
fn walk_node<V: InlinePositionVisitor>(&mut self, node: &InlineNode, v: &mut V) {
match node {
InlineNode::Plain { text, .. } => {
let start = self.cursor;
self.advance_unescaped(text);
let end = self.cursor;
if start < end {
let range = self.make_range(start, end);
v.visit_plain(&range, text);
}
}
InlineNode::Strong { content, .. } => self.walk_strong(content, v),
InlineNode::Emphasis { content, .. } => self.walk_emphasis(content, v),
InlineNode::Code { text, .. } => self.walk_literal(text, '`', v, EmitLiteral::Code),
InlineNode::Math { text, .. } => self.walk_literal(text, '#', v, EmitLiteral::Math),
InlineNode::Reference { data, .. } => self.walk_reference(data, v),
}
}
fn walk_strong<V: InlinePositionVisitor>(&mut self, children: &[InlineNode], v: &mut V) {
let m = '*'.len_utf8();
let open_start = self.cursor;
self.cursor += m;
let open = self.make_range(open_start, self.cursor);
v.enter_strong(&open);
let content_start = self.cursor;
self.walk_nodes(children, v);
let content_end = self.cursor;
let close_start = self.cursor;
self.cursor += m;
let close = self.make_range(close_start, self.cursor);
let content = self.make_range(content_start, content_end);
v.leave_strong(&content, &close);
}
fn walk_emphasis<V: InlinePositionVisitor>(&mut self, children: &[InlineNode], v: &mut V) {
let m = '_'.len_utf8();
let open_start = self.cursor;
self.cursor += m;
let open = self.make_range(open_start, self.cursor);
v.enter_emphasis(&open);
let content_start = self.cursor;
self.walk_nodes(children, v);
let content_end = self.cursor;
let close_start = self.cursor;
self.cursor += m;
let close = self.make_range(close_start, self.cursor);
let content = self.make_range(content_start, content_end);
v.leave_emphasis(&content, &close);
}
fn walk_literal<V: InlinePositionVisitor>(
&mut self,
text: &str,
marker: char,
v: &mut V,
kind: EmitLiteral,
) {
let m = marker.len_utf8();
let open_start = self.cursor;
self.cursor += m;
let open = self.make_range(open_start, self.cursor);
let content_start = self.cursor;
self.cursor += text.len();
let content = self.make_range(content_start, self.cursor);
let close_start = self.cursor;
self.cursor += m;
let close = self.make_range(close_start, self.cursor);
match kind {
EmitLiteral::Code => v.visit_code(&open, &content, &close, text),
EmitLiteral::Math => v.visit_math(&open, &content, &close, text),
}
}
fn walk_reference<V: InlinePositionVisitor>(&mut self, data: &ReferenceInline, v: &mut V) {
let open_start = self.cursor;
self.cursor += 1;
let open = self.make_range(open_start, self.cursor);
let content_start = self.cursor;
self.cursor += data.raw.len();
let content = self.make_range(content_start, self.cursor);
let close_start = self.cursor;
self.cursor += 1;
let close = self.make_range(close_start, self.cursor);
v.visit_reference(&open, &content, &close, data);
}
fn advance_unescaped(&mut self, text: &str) {
for _expected in text.chars() {
if self.cursor >= self.raw.len() {
break;
}
let raw_ch = self.raw[self.cursor..].chars().next().unwrap();
if raw_ch == '\\' {
if self.cursor + 1 >= self.raw.len() {
self.cursor += 1;
} else {
let next_ch = self.raw[self.cursor + 1..].chars().next();
match next_ch {
Some(nc) if !nc.is_alphanumeric() => {
self.cursor += 1 + nc.len_utf8();
}
_ => {
self.cursor += 1;
}
}
}
} else {
self.cursor += raw_ch.len_utf8();
}
}
}
fn make_range(&self, start: usize, end: usize) -> Range {
let start_pos = self.position_at(start);
let end_pos = self.position_at(end);
Range::new(
(self.base_range.span.start + start)..(self.base_range.span.start + end),
start_pos,
end_pos,
)
}
fn position_at(&self, offset: usize) -> Position {
let mut line = self.base_range.start.line;
let mut column = self.base_range.start.column;
for ch in self.raw[..offset].chars() {
if ch == '\n' {
line += 1;
column = 0;
} else {
column += ch.len_utf16();
}
}
Position::new(line, column)
}
}
enum EmitLiteral {
Code,
Math,
}
#[cfg(test)]
mod tests {
use super::super::range::Position;
use super::super::text_content::TextContent;
use super::*;
#[derive(Default)]
struct CodeCapture {
opens: Vec<Range>,
contents: Vec<Range>,
closes: Vec<Range>,
}
impl InlinePositionVisitor for CodeCapture {
fn visit_code(&mut self, open: &Range, content: &Range, close: &Range, _text: &str) {
self.opens.push(open.clone());
self.contents.push(content.clone());
self.closes.push(close.clone());
}
}
#[derive(Default)]
struct StrongCapture {
opens: Vec<Range>,
}
impl InlinePositionVisitor for StrongCapture {
fn enter_strong(&mut self, open: &Range) {
self.opens.push(open.clone());
}
}
fn make_text_content(raw: &str) -> TextContent {
let location = Range::new(
0..raw.len(),
Position::new(0, 0),
Position::new(0, raw.chars().map(char::len_utf16).sum::<usize>()),
);
TextContent::from_string(raw.to_string(), Some(location))
}
#[test]
fn code_marker_columns_are_utf16_code_units_after_arrow() {
let raw = "Hello → `Setup`";
let content = make_text_content(raw);
let mut visitor = CodeCapture::default();
walk_text_content_positions(&content, &mut visitor);
let open = visitor.opens.first().expect("captured open marker");
assert_eq!(open.span, 10..11, "byte span of the open backtick");
assert_eq!(
open.start,
Position::new(0, 8),
"open-marker column must be UTF-16 unit (8) not UTF-8 byte (10) — \
got {:?}",
open.start
);
assert_eq!(open.end, Position::new(0, 9));
let body = visitor.contents.first().expect("captured content");
assert_eq!(body.span, 11..16, "byte span of `Setup` content");
assert_eq!(body.start, Position::new(0, 9));
assert_eq!(body.end, Position::new(0, 14));
let close = visitor.closes.first().expect("captured close marker");
assert_eq!(close.span, 16..17, "byte span of close backtick");
assert_eq!(close.start, Position::new(0, 14));
assert_eq!(close.end, Position::new(0, 15));
}
#[test]
fn strong_marker_columns_are_utf16_code_units_after_arrow() {
let raw = "Hello → *bold*";
let content = make_text_content(raw);
let mut visitor = StrongCapture::default();
walk_text_content_positions(&content, &mut visitor);
let open = visitor.opens.first().expect("captured open marker");
assert_eq!(open.span, 10..11, "byte span of `*`");
assert_eq!(open.start, Position::new(0, 8));
assert_eq!(open.end, Position::new(0, 9));
}
#[test]
fn columns_advance_by_utf16_units_for_supplementary_chars() {
let raw = "x🦀 `c`";
let content = make_text_content(raw);
let mut visitor = CodeCapture::default();
walk_text_content_positions(&content, &mut visitor);
let open = visitor.opens.first().expect("captured open marker");
assert_eq!(open.span, 6..7, "byte span of `\\``");
assert_eq!(
open.start,
Position::new(0, 4),
"🦀 contributes 2 UTF-16 units (got column {:?})",
open.start.column
);
}
}