use std::ops::Range;
use serde::{Deserialize, Serialize};
use typst::syntax::{Source, Span};
use super::markup_util::is_typst_escapable;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BlockMapping {
pub typst_byte_range: Range<usize>,
pub md_byte_range: Range<usize>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum SpanKind {
Plain,
Code,
Math,
Break,
Opaque,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TextSpan {
pub typst_range: Range<usize>,
pub md_range: Range<usize>,
pub kind: SpanKind,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ContentIndex {
text_spans: Vec<TextSpan>,
block_spans: Vec<BlockMapping>,
}
impl ContentIndex {
pub fn new(text_spans: Vec<TextSpan>, block_spans: Vec<BlockMapping>) -> Self {
Self {
text_spans,
block_spans,
}
}
pub fn text_spans(&self) -> &[TextSpan] {
&self.text_spans
}
pub fn block_spans(&self) -> &[BlockMapping] {
&self.block_spans
}
fn find_text_span_by_typst_offset(&self, typst_offset: usize) -> Option<&TextSpan> {
let idx = self
.text_spans
.binary_search_by(|s| {
if typst_offset < s.typst_range.start {
std::cmp::Ordering::Greater
} else if typst_offset >= s.typst_range.end {
std::cmp::Ordering::Less
} else {
std::cmp::Ordering::Equal
}
})
.ok()?;
Some(&self.text_spans[idx])
}
fn find_block_by_typst_offset(&self, typst_offset: usize) -> Option<&BlockMapping> {
let idx = self
.block_spans
.binary_search_by(|b| {
if typst_offset < b.typst_byte_range.start {
std::cmp::Ordering::Greater
} else if typst_offset >= b.typst_byte_range.end {
std::cmp::Ordering::Less
} else {
std::cmp::Ordering::Equal
}
})
.ok()?;
Some(&self.block_spans[idx])
}
pub fn md_to_main_ranges(
&self,
md_ranges: &[Range<usize>],
md_source: &str,
content_offset: usize,
) -> Vec<Range<usize>> {
let mut result: Vec<Range<usize>> = Vec::new();
for md_range in md_ranges {
for span in &self.text_spans {
if span.md_range.start >= md_range.end || span.md_range.end <= md_range.start {
continue;
}
match span.kind {
SpanKind::Break | SpanKind::Opaque => continue,
SpanKind::Math => {
let start = span.typst_range.start + content_offset;
let end = span.typst_range.end + content_offset;
result.push(start..end);
}
SpanKind::Code => {
let overlap_start = md_range.start.max(span.md_range.start);
let overlap_end = md_range.end.min(span.md_range.end);
let local_start = overlap_start - span.md_range.start;
let local_end = overlap_end - span.md_range.start;
let typst_start = span.typst_range.start + local_start + content_offset;
let typst_end = span.typst_range.start + local_end + content_offset;
result.push(typst_start..typst_end);
}
SpanKind::Plain => {
let overlap_start = md_range.start.max(span.md_range.start);
let overlap_end = md_range.end.min(span.md_range.end);
let md_text = &md_source[span.md_range.clone()];
let local_start = overlap_start - span.md_range.start;
let local_end = overlap_end - span.md_range.start;
let typst_local_start = md_to_typst_local(md_text, local_start);
let typst_local_end = md_to_typst_local(md_text, local_end);
let typst_start =
span.typst_range.start + typst_local_start + content_offset;
let typst_end = span.typst_range.start + typst_local_end + content_offset;
result.push(typst_start..typst_end);
}
}
}
}
merge_ranges(&mut result);
result
}
}
pub struct MdPosition {
pub offset: usize,
pub block_range: Range<usize>,
}
pub struct BoundIndex<'a> {
index: &'a ContentIndex,
source: &'a Source,
content_offset: usize,
md_source: &'a str,
}
impl<'a> BoundIndex<'a> {
pub fn new(
index: &'a ContentIndex,
source: &'a Source,
content_offset: usize,
md_source: &'a str,
) -> Self {
Self {
index,
source,
content_offset,
md_source,
}
}
pub fn md_source(&self) -> &str {
self.md_source
}
pub fn resolve_span(&self, span: Span) -> Option<MdPosition> {
if span.is_detached() {
return None;
}
let main_range = self.source.range(span)?;
if main_range.start < self.content_offset {
return None; }
let typst_offset = main_range.start - self.content_offset;
let block = self.index.find_block_by_typst_offset(typst_offset)?;
let md_offset = match self.index.find_text_span_by_typst_offset(typst_offset) {
Some(ts) => {
let typst_local = typst_offset - ts.typst_range.start;
match ts.kind {
SpanKind::Code => ts.md_range.start + typst_local,
SpanKind::Plain => {
let md_text = &self.md_source[ts.md_range.clone()];
ts.md_range.start + typst_to_md_local(md_text, typst_local)
}
SpanKind::Math | SpanKind::Break | SpanKind::Opaque => ts.md_range.start,
}
}
None => block.md_byte_range.start,
};
Some(MdPosition {
offset: md_offset,
block_range: block.md_byte_range.clone(),
})
}
}
fn md_to_typst_local(md_text: &str, md_offset: usize) -> usize {
let mut typst_offset = 0;
for (i, ch) in md_text.char_indices() {
if i >= md_offset {
break;
}
if is_typst_escapable(ch) {
typst_offset += 1 + ch.len_utf8(); } else {
typst_offset += ch.len_utf8();
}
}
typst_offset
}
fn typst_to_md_local(md_text: &str, typst_local: usize) -> usize {
let mut typst_pos = 0usize;
for (i, ch) in md_text.char_indices() {
if typst_pos >= typst_local {
return i;
}
typst_pos += if is_typst_escapable(ch) {
1 + ch.len_utf8() } else {
ch.len_utf8()
};
}
md_text.len()
}
pub fn rendered_to_source_byte(source_text: &str, rendered_offset: usize) -> usize {
let mut rendered = 0usize;
let bytes = source_text.as_bytes();
let mut i = 0;
while i < bytes.len() {
if rendered >= rendered_offset {
return i;
}
if bytes[i] == b'\\' && i + 1 < bytes.len() {
let next_ch_len = char_len_at(bytes, i + 1);
rendered += next_ch_len;
i += 1 + next_ch_len; } else {
let ch_len = char_len_at(bytes, i);
rendered += ch_len;
i += ch_len;
}
}
i
}
fn char_len_at(bytes: &[u8], pos: usize) -> usize {
if pos >= bytes.len() {
return 1;
}
let b = bytes[pos];
if b < 0x80 {
1
} else if b < 0xE0 {
2
} else if b < 0xF0 {
3
} else {
4
}
}
fn merge_ranges(ranges: &mut Vec<Range<usize>>) {
if ranges.len() <= 1 {
return;
}
ranges.sort_by_key(|r| r.start);
let mut write = 0;
for read in 1..ranges.len() {
if ranges[read].start <= ranges[write].end {
ranges[write].end = ranges[write].end.max(ranges[read].end);
} else {
write += 1;
ranges[write] = ranges[read].clone();
}
}
ranges.truncate(write + 1);
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn md_to_typst_local_no_escapes() {
assert_eq!(md_to_typst_local("hello", 0), 0);
assert_eq!(md_to_typst_local("hello", 3), 3);
assert_eq!(md_to_typst_local("hello", 5), 5);
}
#[test]
fn md_to_typst_local_with_escapes() {
assert_eq!(md_to_typst_local("#hello", 0), 0);
assert_eq!(md_to_typst_local("#hello", 1), 2);
assert_eq!(md_to_typst_local("#hello", 3), 4);
}
#[test]
fn md_to_typst_local_multibyte_utf8() {
assert_eq!(md_to_typst_local("日本語", 0), 0);
assert_eq!(md_to_typst_local("日本語", 3), 3); assert_eq!(md_to_typst_local("日本語", 6), 6); }
#[test]
fn md_to_typst_local_mixed() {
assert_eq!(md_to_typst_local("$100", 0), 0);
assert_eq!(md_to_typst_local("$100", 1), 2); assert_eq!(md_to_typst_local("$100", 4), 5); }
#[test]
fn typst_to_md_local_no_escapes() {
assert_eq!(typst_to_md_local("hello", 0), 0);
assert_eq!(typst_to_md_local("hello", 3), 3);
assert_eq!(typst_to_md_local("hello", 5), 5);
}
#[test]
fn typst_to_md_local_with_escapes() {
assert_eq!(typst_to_md_local("#hello", 0), 0);
assert_eq!(typst_to_md_local("#hello", 2), 1);
assert_eq!(typst_to_md_local("#hello", 4), 3);
}
#[test]
fn typst_to_md_local_mixed() {
assert_eq!(typst_to_md_local("$100", 0), 0);
assert_eq!(typst_to_md_local("$100", 2), 1); assert_eq!(typst_to_md_local("$100", 5), 4); }
#[test]
fn typst_to_md_local_roundtrip() {
let text = "#hello $world";
for md_off in 0..=text.len() {
if !text.is_char_boundary(md_off) {
continue;
}
let typst = md_to_typst_local(text, md_off);
let recovered = typst_to_md_local(text, typst);
assert_eq!(
recovered, md_off,
"roundtrip failed at md_off={md_off}: typst={typst}, recovered={recovered}"
);
}
}
#[test]
fn rendered_to_source_no_escapes() {
assert_eq!(rendered_to_source_byte("hello", 0), 0);
assert_eq!(rendered_to_source_byte("hello", 3), 3);
assert_eq!(rendered_to_source_byte("hello", 5), 5);
}
#[test]
fn rendered_to_source_with_escape() {
assert_eq!(rendered_to_source_byte("\\#hello", 0), 0);
assert_eq!(rendered_to_source_byte("\\#hello", 1), 2);
assert_eq!(rendered_to_source_byte("\\#hello", 3), 4);
}
#[test]
fn rendered_to_source_double_backslash() {
assert_eq!(rendered_to_source_byte("\\\\world", 0), 0);
assert_eq!(rendered_to_source_byte("\\\\world", 1), 2);
}
#[test]
fn rendered_to_source_combined() {
assert_eq!(rendered_to_source_byte("\\#a\\\\b", 0), 0);
assert_eq!(rendered_to_source_byte("\\#a\\\\b", 1), 2); assert_eq!(rendered_to_source_byte("\\#a\\\\b", 2), 3); assert_eq!(rendered_to_source_byte("\\#a\\\\b", 3), 5); }
#[test]
fn md_to_main_ranges_plain() {
let ci = ContentIndex::new(
vec![TextSpan {
typst_range: 0..5, md_range: 0..5,
kind: SpanKind::Plain,
}],
vec![],
);
let result = ci.md_to_main_ranges(&[1..3], "hello", 100);
assert_eq!(result, vec![101..103]);
}
#[test]
fn md_to_main_ranges_plain_with_escape() {
let ci = ContentIndex::new(
vec![TextSpan {
typst_range: 0..4, md_range: 0..3, kind: SpanKind::Plain,
}],
vec![],
);
let result = ci.md_to_main_ranges(&[1..3], "#hi", 10);
assert_eq!(result, vec![12..14]);
}
#[test]
fn md_to_main_ranges_code() {
let ci = ContentIndex::new(
vec![TextSpan {
typst_range: 10..15, md_range: 5..10,
kind: SpanKind::Code,
}],
vec![],
);
let result = ci.md_to_main_ranges(&[6..9], "xxxxx12345", 0);
assert_eq!(result, vec![11..14]);
}
#[test]
fn md_to_main_ranges_math() {
let ci = ContentIndex::new(
vec![TextSpan {
typst_range: 0..20, md_range: 0..10,
kind: SpanKind::Math,
}],
vec![],
);
let result = ci.md_to_main_ranges(&[3..5], "0123456789", 50);
assert_eq!(result, vec![50..70]);
}
#[test]
fn md_to_main_ranges_opaque_skipped() {
let ci = ContentIndex::new(
vec![TextSpan {
typst_range: 0..30,
md_range: 0..10,
kind: SpanKind::Opaque,
}],
vec![],
);
let result = ci.md_to_main_ranges(&[0..10], "0123456789", 0);
assert!(result.is_empty());
}
#[test]
fn md_to_main_ranges_merge_adjacent() {
let ci = ContentIndex::new(
vec![
TextSpan {
typst_range: 0..5,
md_range: 0..5,
kind: SpanKind::Plain,
},
TextSpan {
typst_range: 5..10,
md_range: 5..10,
kind: SpanKind::Plain,
},
],
vec![],
);
let result = ci.md_to_main_ranges(&[3..8], "0123456789", 0);
assert_eq!(result, vec![3..8]);
}
#[test]
fn merge_ranges_empty() {
let mut r: Vec<Range<usize>> = vec![];
merge_ranges(&mut r);
assert!(r.is_empty());
}
#[test]
fn merge_ranges_overlapping() {
let mut r = vec![1..5, 3..8, 10..15];
merge_ranges(&mut r);
assert_eq!(r, vec![1..8, 10..15]);
}
#[test]
fn merge_ranges_adjacent() {
let mut r = vec![1..5, 5..10];
merge_ranges(&mut r);
assert_eq!(r, vec![1..10]);
}
}