use std::ops::Range;
use super::MathRegion;
use super::env::{EnvKind, KnownEnv};
use super::span::{AnyDelim, DisplayDelim, InlineDelim, MathBody, MathError, MathSpan};
#[derive(Copy, Clone, Debug)]
#[allow(clippy::struct_excessive_bools)]
pub struct MathConfig {
pub backslash_bracket: bool,
pub backslash_paren: bool,
pub double_dollar: bool,
pub single_dollar: bool,
pub environments: bool,
}
impl Default for MathConfig {
fn default() -> Self {
Self {
backslash_bracket: true,
backslash_paren: true,
double_dollar: false,
single_dollar: false,
environments: true,
}
}
}
#[tracing::instrument(
level = "debug",
skip_all,
fields(len = source.len(), transparent = transparent_runs.len()),
)]
pub fn scan_math_regions(
source: &str,
exclusions: &[Range<usize>],
transparent_runs: &[Range<usize>],
cfg: MathConfig,
) -> (Vec<MathRegion>, Vec<MathError>) {
let bytes = source.as_bytes();
let mut regions: Vec<MathRegion> = Vec::new();
let mut errors: Vec<MathError> = Vec::new();
let mut i = 0usize;
while i < bytes.len() {
if let Some(end) = excluded_end(exclusions, i) {
i = end;
continue;
}
if let Some(end) = transparent_end(transparent_runs, i) {
i = end;
continue;
}
if cfg.environments
&& let Some((env_name, name_range, after_begin)) = match_begin(source, bytes, i)
{
match find_end_env(source, bytes, after_begin, env_name, exclusions, transparent_runs) {
Some((end_start, end_after)) => {
let region = i..end_after;
let body_range = after_begin..end_start;
let env = match KnownEnv::from_name(env_name) {
Some(k) => EnvKind::Known(k),
None => EnvKind::Custom(name_range),
};
let body = build_math_body(body_range.clone(), transparent_runs);
record_brace_errors(source, ®ion, &body, &mut errors);
let span = MathSpan::Environment { env, body };
regions.push(MathRegion::new(region.clone(), span));
tracing::debug!(
env = env_name,
range = ?region,
stripped = !body_runs_empty(&body_range, transparent_runs),
"env region",
);
i = end_after;
continue;
}
None => {
errors.push(MathError::UnbalancedEnv {
name: env_name.to_string(),
range: i..after_begin,
});
i = after_begin;
continue;
}
}
}
let Some((delim, open_len)) = match_open(bytes, i, cfg) else {
i = i.saturating_add(1);
continue;
};
let content_start = i.saturating_add(open_len);
match find_close(bytes, content_start, delim, exclusions, transparent_runs) {
Some(close_start) => {
let body_slice = bytes.get(content_start..close_start).unwrap_or(&[]);
if !body_slice.iter().any(u8::is_ascii_alphanumeric) {
i = i.saturating_add(1);
continue;
}
let close_len = delim.close().len();
let region_end = close_start.saturating_add(close_len);
let region = i..region_end;
let body_range = content_start..close_start;
let body = build_math_body(body_range.clone(), transparent_runs);
record_brace_errors(source, ®ion, &body, &mut errors);
let span = match delim {
AnyDelim::Paren => MathSpan::Inline {
delim: InlineDelim::Paren,
body,
},
AnyDelim::Dollar => MathSpan::Inline {
delim: InlineDelim::Dollar,
body,
},
AnyDelim::Bracket => MathSpan::Display {
delim: DisplayDelim::Bracket,
body,
},
AnyDelim::Dollar2 => MathSpan::Display {
delim: DisplayDelim::Dollar2,
body,
},
};
regions.push(MathRegion::new(region.clone(), span));
tracing::debug!(
delim = delim.open(),
range = ?region,
stripped = !body_runs_empty(&body_range, transparent_runs),
"delim region",
);
i = region_end;
}
None => {
errors.push(MathError::UnbalancedDelim {
delim,
range: i..content_start,
});
i = content_start;
}
}
}
(regions, errors)
}
fn build_math_body(body_range: Range<usize>, transparent_runs: &[Range<usize>]) -> MathBody {
let runs: Box<[Range<usize>]> = transparent_runs
.iter()
.filter(|r| r.start < body_range.end && body_range.start < r.end)
.cloned()
.collect();
MathBody::new(body_range, runs)
}
fn body_runs_empty(body_range: &Range<usize>, transparent_runs: &[Range<usize>]) -> bool {
!transparent_runs
.iter()
.any(|r| r.start < body_range.end && body_range.start < r.end)
}
fn record_brace_errors(source: &str, region: &Range<usize>, body: &MathBody, errors: &mut Vec<MathError>) {
let clean = body.as_str(source);
if let Err(local_offset) = super::normalise::body_braces_balanced(clean.as_ref()) {
errors.push(MathError::UnbalancedBraces {
offset: body.clean_offset_to_source(local_offset),
region: region.clone(),
});
}
}
fn excluded_end(exclusions: &[Range<usize>], i: usize) -> Option<usize> {
let idx = exclusions.partition_point(|r| r.start <= i);
if let Some(prev_idx) = idx.checked_sub(1)
&& let Some(r) = exclusions.get(prev_idx)
&& i < r.end
{
return Some(r.end);
}
None
}
fn transparent_end(transparent_runs: &[Range<usize>], i: usize) -> Option<usize> {
let idx = transparent_runs.partition_point(|r| r.start <= i);
if let Some(prev_idx) = idx.checked_sub(1)
&& let Some(r) = transparent_runs.get(prev_idx)
&& i < r.end
{
return Some(r.end);
}
None
}
fn match_begin<'a>(source: &'a str, bytes: &[u8], i: usize) -> Option<(&'a str, Range<usize>, usize)> {
let after = match_kw(bytes, i, b"begin")?;
parse_env_name(source, after)
}
fn match_end<'a>(source: &'a str, bytes: &[u8], j: usize) -> Option<(&'a str, Range<usize>, usize)> {
let after = match_kw(bytes, j, b"end")?;
parse_env_name(source, after)
}
fn match_kw(bytes: &[u8], i: usize, keyword: &[u8]) -> Option<usize> {
if bytes.get(i).copied() != Some(b'\\') {
return None;
}
if !preceding_backslashes_even(bytes, i) {
return None;
}
let kw_start = i.saturating_add(1);
let kw_end = kw_start.saturating_add(keyword.len());
if bytes.get(kw_start..kw_end) != Some(keyword) {
return None;
}
Some(kw_end)
}
fn parse_env_name(source: &str, after: usize) -> Option<(&str, Range<usize>, usize)> {
let bytes = source.as_bytes();
if bytes.get(after).copied() != Some(b'{') {
return None;
}
let name_start = after.saturating_add(1);
let mut j = name_start;
while let Some(b) = bytes.get(j).copied() {
if b.is_ascii_alphabetic() {
j = j.saturating_add(1);
} else {
break;
}
}
if bytes.get(j).copied() == Some(b'*') {
j = j.saturating_add(1);
}
if j == name_start {
return None;
}
if bytes.get(j).copied() != Some(b'}') {
return None;
}
let name = source.get(name_start..j)?;
Some((name, name_start..j, j.saturating_add(1)))
}
fn find_end_env(
source: &str,
bytes: &[u8],
from: usize,
name: &str,
exclusions: &[Range<usize>],
transparent_runs: &[Range<usize>],
) -> Option<(usize, usize)> {
let mut depth: u32 = 1;
let mut j = from;
while j < bytes.len() {
if let Some(end) = excluded_end(exclusions, j) {
j = end;
continue;
}
if let Some(end) = transparent_end(transparent_runs, j) {
j = end;
continue;
}
if let Some((found_name, _, after)) = match_end(source, bytes, j) {
if found_name == name {
depth = depth.saturating_sub(1);
if depth == 0 {
return Some((j, after));
}
}
j = after;
continue;
}
if let Some((found_name, _, after)) = match_begin(source, bytes, j) {
if found_name == name {
depth = depth.saturating_add(1);
}
j = after;
continue;
}
j = j.saturating_add(1);
}
None
}
fn match_open(bytes: &[u8], i: usize, cfg: MathConfig) -> Option<(AnyDelim, usize)> {
let b = bytes.get(i).copied()?;
match b {
b'\\' => {
if !preceding_backslashes_even(bytes, i) {
return None;
}
let next = bytes.get(i.saturating_add(1)).copied()?;
match next {
b'[' if cfg.backslash_bracket => Some((AnyDelim::Bracket, 2)),
b'(' if cfg.backslash_paren => Some((AnyDelim::Paren, 2)),
_ => None,
}
}
b'$' => {
let two = bytes.get(i.saturating_add(1)).copied();
if cfg.double_dollar && two == Some(b'$') {
Some((AnyDelim::Dollar2, 2))
} else if cfg.single_dollar {
Some((AnyDelim::Dollar, 1))
} else {
None
}
}
_ => None,
}
}
fn preceding_backslashes_even(bytes: &[u8], i: usize) -> bool {
let mut j = i;
let mut count = 0usize;
while j > 0 {
let prev = j.saturating_sub(1);
if bytes.get(prev).copied() == Some(b'\\') {
count = count.saturating_add(1);
j = prev;
} else {
break;
}
}
count.is_multiple_of(2)
}
fn find_close(
bytes: &[u8],
from: usize,
delim: AnyDelim,
exclusions: &[Range<usize>],
transparent_runs: &[Range<usize>],
) -> Option<usize> {
let mut j = from;
while j < bytes.len() {
if excluded_end(exclusions, j).is_some() {
return None;
}
if let Some(end) = transparent_end(transparent_runs, j) {
j = end;
continue;
}
match delim {
AnyDelim::Bracket | AnyDelim::Paren => {
if bytes.get(j).copied() == Some(b'\\')
&& bytes.get(j.saturating_add(1)).copied() == Some(close_target_byte(delim))
&& preceding_backslashes_even(bytes, j)
{
return Some(j);
}
}
AnyDelim::Dollar2 => {
if bytes.get(j).copied() == Some(b'$') && bytes.get(j.saturating_add(1)).copied() == Some(b'$') {
return Some(j);
}
}
AnyDelim::Dollar => {
if bytes.get(j).copied() == Some(b'$') {
return Some(j);
}
}
}
j = j.saturating_add(1);
}
None
}
const fn close_target_byte(delim: AnyDelim) -> u8 {
match delim {
AnyDelim::Bracket => b']',
AnyDelim::Paren => b')',
AnyDelim::Dollar2 | AnyDelim::Dollar => b'$',
}
}
#[cfg(test)]
#[allow(clippy::indexing_slicing, clippy::panic)]
mod tests {
use std::borrow::Cow;
use super::*;
fn scan(source: &str) -> (Vec<MathRegion>, Vec<MathError>) {
scan_math_regions(source, &[], &[], MathConfig::default())
}
fn scan_with_runs(
source: &str,
transparent_runs: &[Range<usize>],
cfg: MathConfig,
) -> (Vec<MathRegion>, Vec<MathError>) {
scan_math_regions(source, &[], transparent_runs, cfg)
}
fn regions(source: &str) -> Vec<MathRegion> {
scan(source).0
}
#[test]
fn display_math_single_line() {
let s = r"prefix \[ A \] suffix";
let regs = regions(s);
assert_eq!(regs.len(), 1);
assert_eq!(&s[regs[0].range.clone()], r"\[ A \]");
assert!(matches!(
regs[0].span(),
MathSpan::Display {
delim: DisplayDelim::Bracket,
..
}
));
}
#[test]
fn display_math_multi_line() {
let s = "before \\[\n A \\to B\n\\] after";
let regs = regions(s);
assert_eq!(regs.len(), 1);
let span = &s[regs[0].range.clone()];
assert!(span.starts_with(r"\["));
assert!(span.ends_with(r"\]"));
}
#[test]
fn inline_math_paren() {
let s = r"x is \( a + b \) units";
let regs = regions(s);
assert_eq!(regs.len(), 1);
assert_eq!(&s[regs[0].range.clone()], r"\( a + b \)");
assert!(matches!(
regs[0].span(),
MathSpan::Inline {
delim: InlineDelim::Paren,
..
}
));
}
#[test]
fn two_separate_regions() {
let s = r"see \[ A \] and \[ B \] both";
let regs = regions(s);
assert_eq!(regs.len(), 2);
assert!(regs[0].range.end <= regs[1].range.start);
}
#[test]
fn unbalanced_open_drops_region_and_emits_error() {
let s = r"start \[ no close here";
let (regs, errs) = scan(s);
assert!(regs.is_empty());
assert_eq!(errs.len(), 1);
match &errs[0] {
MathError::UnbalancedDelim { delim, .. } => {
assert!(delim.is_display());
assert_eq!(delim.open(), r"\[");
assert_eq!(delim.close(), r"\]");
}
MathError::UnbalancedEnv { .. } | MathError::UnbalancedBraces { .. } => {
panic!("expected delim error")
}
}
}
#[test]
fn greedy_first_close() {
let s = r"\[ a \[ b \] c \]";
let regs = regions(s);
assert_eq!(regs.len(), 1);
assert_eq!(&s[regs[0].range.clone()], r"\[ a \[ b \]");
}
#[test]
fn double_backslash_open_is_not_math() {
let s = r"foo \\[ not math \] bar";
assert!(regions(s).is_empty());
}
#[test]
fn triple_backslash_open_is_math() {
let s = r"foo \\\[ A \] bar";
assert_eq!(regions(s).len(), 1);
}
#[test]
#[allow(
clippy::single_range_in_vec_init,
reason = "test intentionally passes one exclusion range"
)]
fn region_inside_code_span_excluded() {
let s = r"text `\[ x \]` more";
let exclusions = [5..14];
let (regs, _) = scan_math_regions(s, &exclusions, &[], MathConfig::default());
assert!(regs.is_empty());
}
#[test]
#[allow(
clippy::single_range_in_vec_init,
reason = "test intentionally passes one exclusion range"
)]
fn region_inside_code_block_excluded() {
let s = "```\n\\[ x \\]\n```";
let exclusions = [0..s.len()];
let (regs, _) = scan_math_regions(s, &exclusions, &[], MathConfig::default());
assert!(regs.is_empty());
}
#[test]
#[allow(
clippy::single_range_in_vec_init,
reason = "test intentionally passes one exclusion range"
)]
fn region_inside_inline_html_excluded() {
let s = r#"see <a href="/x?val=$foo">x</a> after"#;
let exclusions = [4..26];
let cfg = MathConfig {
single_dollar: true,
..MathConfig::default()
};
let (regs, _) = scan_math_regions(s, &exclusions, &[], cfg);
assert!(regs.is_empty());
}
#[test]
fn dollar_variants_off_by_default() {
let s = "value is $5 today, plus $$2 tomorrow";
assert!(regions(s).is_empty());
}
#[test]
fn double_dollar_when_enabled() {
let s = "see $$ x = 5 $$ above";
let cfg = MathConfig {
double_dollar: true,
..MathConfig::default()
};
let (regs, _) = scan_math_regions(s, &[], &[], cfg);
assert_eq!(regs.len(), 1);
assert_eq!(&s[regs[0].range.clone()], "$$ x = 5 $$");
assert!(matches!(
regs[0].span(),
MathSpan::Display {
delim: DisplayDelim::Dollar2,
..
}
));
}
#[test]
fn single_dollar_when_enabled() {
let s = "x is $a + b$";
let cfg = MathConfig {
single_dollar: true,
..MathConfig::default()
};
let (regs, _) = scan_math_regions(s, &[], &[], cfg);
assert_eq!(regs.len(), 1);
assert_eq!(&s[regs[0].range.clone()], "$a + b$");
assert!(matches!(
regs[0].span(),
MathSpan::Inline {
delim: InlineDelim::Dollar,
..
}
));
}
#[test]
fn region_with_subscripts_and_emphasis_chars() {
let s = r"see \[ \pi_A:\Gamma.A\to \Gamma \] above";
let regs = regions(s);
assert_eq!(regs.len(), 1);
let span = &s[regs[0].range.clone()];
assert!(span.contains("_A"));
assert!(span.contains(r"\Gamma"));
}
#[test]
fn regions_dont_overlap_or_misorder() {
let s = r"\[ a \] mid \( b \) end \[ c \]";
let regs = regions(s);
assert_eq!(regs.len(), 3);
for w in regs.windows(2) {
assert!(w[0].range.end <= w[1].range.start);
}
}
#[test]
fn environment_basic() {
let s = "before \\begin{align} x &= y \\end{align} after";
let regs = regions(s);
assert_eq!(regs.len(), 1);
let span = &s[regs[0].range.clone()];
assert!(span.starts_with("\\begin{align}"));
assert!(span.ends_with("\\end{align}"));
match regs[0].span() {
MathSpan::Environment { env, body } => {
assert!(matches!(env, EnvKind::Known(KnownEnv::Align)));
assert!(body.as_str(s).contains("x &= y"));
}
MathSpan::Inline { .. } | MathSpan::Display { .. } => {
panic!("expected environment span")
}
}
}
#[test]
fn environment_nested_same_name() {
let s = "\\begin{matrix} a \\begin{matrix} b \\end{matrix} c \\end{matrix}";
let regs = regions(s);
assert_eq!(regs.len(), 1);
assert_eq!(&s[regs[0].range.clone()], s);
}
#[test]
fn environment_starred_name() {
let s = "\\begin{align*} x \\end{align*}";
let regs = regions(s);
assert_eq!(regs.len(), 1);
assert!(matches!(
regs[0].span(),
MathSpan::Environment {
env: EnvKind::Known(KnownEnv::AlignStar),
..
}
));
}
#[test]
fn environment_custom_name_round_trips() {
let s = "\\begin{widget} q \\end{widget}";
let regs = regions(s);
assert_eq!(regs.len(), 1);
match regs[0].span() {
MathSpan::Environment {
env: EnvKind::Custom(name_range),
..
} => {
assert_eq!(&s[name_range.clone()], "widget");
}
MathSpan::Inline { .. }
| MathSpan::Display { .. }
| MathSpan::Environment {
env: EnvKind::Known(_), ..
} => {
panic!("expected custom env")
}
}
}
#[test]
fn environment_unbalanced_emits_error() {
let s = "\\begin{align} x = 1 \n";
let (regs, errs) = scan(s);
assert!(regs.is_empty());
assert_eq!(errs.len(), 1);
assert!(matches!(&errs[0], MathError::UnbalancedEnv { name, .. } if name == "align"));
}
#[test]
fn environment_inside_display_is_one_region() {
let s = "\\[ \\begin{aligned} a &= b \\end{aligned} \\]";
let regs = regions(s);
assert_eq!(regs.len(), 1);
assert!(matches!(
regs[0].span(),
MathSpan::Display {
delim: DisplayDelim::Bracket,
..
}
));
}
#[test]
fn brace_imbalance_emits_error_but_region_still_scans() {
let s = r"\[ \frac{a}{b \]";
let (regs, errs) = scan(s);
assert_eq!(regs.len(), 1);
assert!(errs.iter().any(|e| matches!(e, MathError::UnbalancedBraces { .. })));
}
#[test]
fn brace_balance_with_escaped_braces() {
let s = r"\[ \{ a \} \]";
let (_, errs) = scan(s);
assert!(
errs.iter().all(|e| !matches!(e, MathError::UnbalancedBraces { .. })),
"escaped braces should not count: {errs:?}"
);
}
#[test]
fn transparent_run_in_blockquote_strips_prefix() {
let s = "> $$\n> x = 1\n> $$";
let runs = vec![5..7, 13..15];
let cfg = MathConfig {
double_dollar: true,
..MathConfig::default()
};
let (regs, _) = scan_with_runs(s, &runs, cfg);
assert_eq!(regs.len(), 1, "expected one region in {s:?}");
let body = regs[0].span().body();
let clean = body.as_str(s);
assert!(
matches!(&clean, Cow::Owned(_)),
"expected owned body for container-nested math, got {clean:?}",
);
assert!(!clean.contains('>'), "container prefix leaked: {clean:?}");
assert!(clean.contains("x = 1"), "body lost content: {clean:?}");
}
#[test]
fn transparent_run_in_list_item_strips_indent() {
let s = "1. item\n $$\n x = 1\n $$";
let runs = vec![8..11, 14..17, 23..26];
let cfg = MathConfig {
double_dollar: true,
..MathConfig::default()
};
let (regs, _) = scan_with_runs(s, &runs, cfg);
assert_eq!(regs.len(), 1);
let clean = regs[0].span().body().as_str(s);
assert!(matches!(&clean, Cow::Owned(_)));
assert!(!clean.contains(" "), "indent leaked: {clean:?}");
assert!(clean.contains("x = 1"));
}
#[test]
fn nested_blockquote_combined_prefix() {
let s = "> > $$\n> > x\n> > $$";
let runs = vec![7..11, 13..17];
let cfg = MathConfig {
double_dollar: true,
..MathConfig::default()
};
let (regs, _) = scan_with_runs(s, &runs, cfg);
assert_eq!(regs.len(), 1);
let clean = regs[0].span().body().as_str(s);
assert!(!clean.contains('>'), "prefix leaked: {clean:?}");
assert!(clean.contains('x'));
}
#[test]
fn top_level_math_borrows() {
let s = "$$\nx\n$$";
let cfg = MathConfig {
double_dollar: true,
..MathConfig::default()
};
let (regs, _) = scan_with_runs(s, &[], cfg);
assert_eq!(regs.len(), 1);
let clean = regs[0].span().body().as_str(s);
assert!(
matches!(clean, Cow::Borrowed(_)),
"expected borrowed body for top-level math",
);
}
#[test]
fn body_source_ranges_can_drive_latex_translation_without_markdown_parsing() {
let s = r"Inline \( \alpha_i \) and \[ x^{2} \].";
let regs = regions(s);
let ranges = regs
.iter()
.map(|region| region.span().body().source_range())
.collect::<Vec<_>>();
let translated = mdwright_latex::translate_latex_ranges_to_unicode(s, &ranges);
assert_eq!(translated.text(), r"Inline \( αᵢ \) and \[ x² \].");
assert_eq!(translated.edit_count(), 2);
assert!(translated.is_lossless());
}
#[test]
fn transparent_run_protects_delim_match() {
let s = "> $$ x\n> $$";
let run = 7..9;
let runs = std::slice::from_ref(&run);
let cfg = MathConfig {
double_dollar: true,
..MathConfig::default()
};
let (regs, _) = scan_with_runs(s, runs, cfg);
assert_eq!(regs.len(), 1, "expected one region in {s:?}");
assert_eq!(regs[0].range.end, 11);
}
#[test]
fn transparent_run_blocks_spurious_delim() {
let s = "not math\n> $\n";
let run = 9..11;
let runs = std::slice::from_ref(&run);
let cfg = MathConfig {
single_dollar: true,
..MathConfig::default()
};
let (regs, errs) = scan_with_runs(s, runs, cfg);
assert!(regs.is_empty(), "no region should match in {s:?}");
assert!(
errs.iter().any(|e| matches!(e, MathError::UnbalancedDelim { .. })),
"expected an UnbalancedDelim for the unclosed `$`: {errs:?}",
);
}
}