use std::borrow::Cow;
use std::ops::ControlFlow;
use super::fence::{CodeRegion, FenceRegion, InlineRegion, InlineTerminator, scan_code_regions};
pub fn is_word_char(ch: char) -> bool {
ch.is_alphanumeric() || ch == '_'
}
pub(crate) fn is_escaped(bytes: &[u8], pos: usize) -> bool {
if pos == 0 || bytes[pos - 1] != b'\\' {
return false;
}
let mut backslashes = 0usize;
let mut j = pos;
while j > 0 && bytes[j - 1] == b'\\' {
backslashes += 1;
j -= 1;
}
backslashes % 2 == 1
}
pub fn is_empty_or_markers(s: &str) -> bool {
s.bytes()
.all(|b| matches!(b, b' ' | b'\t' | b'\n' | b'\r' | b'_' | b'~' | b'*' | b'`'))
}
pub fn is_list_marker_line(s: &str) -> bool {
let bytes = s.as_bytes();
let mut i = 0;
while i < bytes.len() && matches!(bytes[i], b' ' | b'\t') {
i += 1;
}
if i >= bytes.len() || !matches!(bytes[i], b'-' | b'*' | b'+') {
return false;
}
i += 1;
if i >= bytes.len() || !matches!(bytes[i], b' ' | b'\t') {
return false;
}
bytes[i..].iter().all(|&b| matches!(b, b' ' | b'\t'))
}
pub fn is_inside_code_block(text: &str, position: usize) -> bool {
let len = text.len();
let mut inside = false;
scan_code_regions(text, |region| {
let (open_pos, start, end) = match region {
CodeRegion::Fence(f) => (f.open_run_start, f.open_run_start + 1, fence_end(&f, len)),
CodeRegion::Inline(s) => (s.open_pos, s.open_pos + 1, inline_end(&s, len)),
};
if start <= position && position < end {
inside = true;
return ControlFlow::Break(());
}
if open_pos >= position {
return ControlFlow::Break(());
}
ControlFlow::Continue(())
});
inside
}
#[cfg(test)]
pub(crate) fn is_within_complete_inline_code(text: &str, position: usize) -> bool {
let mut inside = false;
scan_code_regions(text, |region| {
if let CodeRegion::Inline(s) = region
&& let InlineTerminator::Closed(close_pos) = s.terminator
{
if s.open_pos < position && position < close_pos {
inside = true;
return ControlFlow::Break(());
}
if s.open_pos >= position {
return ControlFlow::Break(());
}
}
ControlFlow::Continue(())
});
inside
}
#[inline]
pub(crate) fn fence_end(region: &FenceRegion, len: usize) -> usize {
if region.closed {
region.close_run_start + 1
} else {
len + 1
}
}
#[inline]
pub(crate) fn inline_end(region: &InlineRegion, len: usize) -> usize {
match region.terminator {
InlineTerminator::Closed(pos) | InlineTerminator::Newline(pos) => pos + 1,
InlineTerminator::Eof => len + 1,
}
}
pub fn is_part_of_triple_backtick(text: &str, pos: usize) -> bool {
let bytes = text.as_bytes();
if pos >= bytes.len() || bytes[pos] != b'`' {
return false;
}
let mut start = pos;
while start > 0 && bytes[start - 1] == b'`' {
start -= 1;
}
let mut end = pos + 1;
while end < bytes.len() && bytes[end] == b'`' {
end += 1;
}
(end - start) >= 3
}
pub fn count_single_backticks(text: &str) -> usize {
let bytes = text.as_bytes();
let mut count = 0;
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'\\' {
i += 2;
continue;
}
if bytes[i] == b'`' && !is_part_of_triple_backtick(text, i) {
count += 1;
}
i += 1;
}
count
}
pub fn is_within_math_block(text: &str, position: usize) -> bool {
let bytes = text.as_bytes();
let mut in_inline_math = false;
let mut in_block_math = false;
let mut i = 0;
while i < bytes.len() && i < position {
if bytes[i] == b'\\' && i + 1 < bytes.len() && bytes[i + 1] == b'$' {
i += 2;
continue;
}
if bytes[i] == b'$' {
if i + 1 < bytes.len() && bytes[i + 1] == b'$' {
in_block_math = !in_block_math;
i += 2;
in_inline_math = false; continue;
} else if !in_block_math {
in_inline_math = !in_inline_math;
}
}
i += 1;
}
in_inline_math || in_block_math
}
pub fn is_within_link_or_image_url(text: &str, position: usize) -> bool {
let bytes = text.as_bytes();
let mut i = position.saturating_sub(1);
loop {
if i >= bytes.len() {
break;
}
match bytes[i] {
b')' => return false,
b'(' => {
if i > 0 && bytes[i - 1] == b']' {
for &b in &bytes[position..] {
if b == b')' {
return true;
}
if b == b'\n' {
return false;
}
}
}
return false;
}
b'\n' => return false,
_ => {}
}
if i == 0 {
break;
}
i -= 1;
}
false
}
pub(crate) fn is_plausible_tag_remainder(remainder: &[u8]) -> bool {
let mut j = 0;
if remainder.first() == Some(&b'/') {
j = 1;
}
if j >= remainder.len() {
return j > 0;
}
if !remainder[j].is_ascii_alphabetic() {
return false;
}
j += 1;
while j < remainder.len() {
let b = remainder[j];
if b.is_ascii_alphanumeric() || b == b'-' {
j += 1;
} else if b == b' ' || b == b'\t' {
return true;
} else if b == b'/' && j == remainder.len() - 1 {
return true;
} else {
return false;
}
}
true
}
#[cfg(test)]
pub(crate) fn is_within_html_tag(text: &str, position: usize) -> bool {
let bytes = text.as_bytes();
let mut i = position.saturating_sub(1);
loop {
if i >= bytes.len() {
break;
}
match bytes[i] {
b'>' => return false,
b'<' => {
if i > 0 && (bytes[i - 1].is_ascii_alphanumeric() || bytes[i - 1] == b'_') {
return false;
}
return is_plausible_tag_remainder(&bytes[i + 1..]);
}
b'\n' => return false,
_ => {}
}
if i == 0 {
break;
}
i -= 1;
}
false
}
pub fn is_horizontal_rule(text: &str, marker_index: usize, marker: u8) -> bool {
let bytes = text.as_bytes();
let line_start = bytes[..marker_index]
.iter()
.rposition(|&b| b == b'\n')
.map(|p| p + 1)
.unwrap_or(0);
let line_end = bytes[marker_index..]
.iter()
.position(|&b| b == b'\n')
.map(|p| marker_index + p)
.unwrap_or(bytes.len());
let line = &bytes[line_start..line_end];
let mut marker_count = 0;
let mut has_other = false;
for &b in line {
if b == marker {
marker_count += 1;
} else if b != b' ' && b != b'\t' {
has_other = true;
break;
}
}
marker_count >= 3 && !has_other
}
pub(crate) fn find_trailing_delimiter<'a>(
text: &'a str,
delimiter: &[u8],
) -> Option<(usize, &'a str)> {
let bytes = text.as_bytes();
let dlen = delimiter.len();
if dlen == 0 || bytes.len() < dlen {
return None;
}
let forbidden = delimiter[0];
let mut saw_forbidden = false;
let mut i = bytes.len();
while i >= dlen {
i -= 1;
let candidate_end = i;
let candidate_start = candidate_end + 1 - dlen;
let candidate_ok = bytes[candidate_end] == delimiter[dlen - 1]
&& bytes[candidate_start..=candidate_end] == *delimiter;
if candidate_ok && !saw_forbidden {
let content = &text[candidate_end + 1..];
return Some((candidate_start, content));
}
if bytes[i] == forbidden {
saw_forbidden = true;
}
}
None
}
pub(crate) fn cow_append<'a>(text: &str, suffix: &str) -> Cow<'a, str> {
let mut s = String::with_capacity(text.len() + suffix.len());
s.push_str(text);
s.push_str(suffix);
Cow::Owned(s)
}
pub(crate) fn ends_with_odd_backslashes(text: &str) -> bool {
let count = text.bytes().rev().take_while(|&b| b == b'\\').count();
count % 2 == 1
}
#[cfg(test)]
mod tests {
use super::{
count_single_backticks, ends_with_odd_backslashes, find_trailing_delimiter,
is_empty_or_markers, is_horizontal_rule, is_inside_code_block, is_within_html_tag,
is_within_link_or_image_url, is_within_math_block, is_word_char,
};
#[test]
fn test_is_word_char() {
assert!(is_word_char('a'));
assert!(is_word_char('Z'));
assert!(is_word_char('0'));
assert!(is_word_char('_'));
assert!(!is_word_char(' '));
assert!(!is_word_char('*'));
assert!(is_word_char('é'));
assert!(is_word_char('中'));
}
#[test]
fn test_is_empty_or_markers() {
assert!(is_empty_or_markers(""));
assert!(is_empty_or_markers(" "));
assert!(is_empty_or_markers("*_~`"));
assert!(!is_empty_or_markers("hello"));
assert!(!is_empty_or_markers("*a"));
}
#[test]
fn test_ends_with_odd_backslashes() {
assert!(!ends_with_odd_backslashes(""));
assert!(!ends_with_odd_backslashes("a"));
assert!(ends_with_odd_backslashes("\\"));
assert!(!ends_with_odd_backslashes("\\\\"));
assert!(ends_with_odd_backslashes("\\\\\\"));
assert!(ends_with_odd_backslashes("a\\"));
assert!(!ends_with_odd_backslashes("a\\\\"));
assert!(ends_with_odd_backslashes("é\\"));
}
#[test]
fn test_is_inside_code_block() {
assert!(is_inside_code_block("```code", 5));
assert!(is_inside_code_block("```code```after", 12));
assert!(is_inside_code_block("`code", 3));
assert!(!is_inside_code_block("`code`after", 8));
assert!(is_inside_code_block("```\ncode", 8));
assert!(is_inside_code_block("```\ncode\n~~~\nmore", 14));
assert!(is_inside_code_block("````\ncode\n```\nmore", 15));
assert!(is_inside_code_block(" ```\ninside", 10));
assert!(!is_inside_code_block(" ```\nnot-inside", 10));
assert!(is_inside_code_block("`unclosed", 5));
assert!(!is_inside_code_block("`unclosed\nnext", 14));
assert!(!is_inside_code_block("plain\n```\ncode", 5));
assert!(is_inside_code_block("plain\n```\ncode", 10));
}
#[test]
fn test_is_within_math_block() {
assert!(is_within_math_block("$x+y", 2));
assert!(!is_within_math_block("$x+y$z", 6));
assert!(is_within_math_block("$$x+y", 3));
assert!(!is_within_math_block("\\$x", 2));
}
#[test]
fn test_is_horizontal_rule() {
assert!(is_horizontal_rule("---", 0, b'-'));
assert!(is_horizontal_rule("***", 0, b'*'));
assert!(is_horizontal_rule("- - -", 0, b'-'));
assert!(!is_horizontal_rule("--", 0, b'-'));
assert!(!is_horizontal_rule("--x", 0, b'-'));
}
#[test]
fn test_count_single_backticks() {
assert_eq!(count_single_backticks("`hello`"), 2);
assert_eq!(count_single_backticks("```hello```"), 0);
assert_eq!(count_single_backticks("`hello"), 1);
assert_eq!(count_single_backticks("\\`hello"), 0);
}
#[test]
fn test_is_within_link_or_image_url() {
assert!(is_within_link_or_image_url(
"[text](http://example.com)",
15
));
assert!(!is_within_link_or_image_url(
"[text](http://example.com)",
3
));
assert!(!is_within_link_or_image_url("just text", 3));
}
#[test]
fn test_is_within_html_tag() {
assert!(is_within_html_tag("<a href=\"test\">", 5));
assert!(!is_within_html_tag("<a href=\"test\">after", 16));
assert!(!is_within_html_tag("text", 2));
assert!(!is_within_html_tag("name@<example.com", 10));
assert!(!is_within_html_tag("a<b", 2));
}
#[test]
fn test_find_trailing_delimiter_double_underscore() {
assert_eq!(
find_trailing_delimiter("hello __world", b"__"),
Some((6, "world"))
);
assert_eq!(
find_trailing_delimiter("__bold__inner__text", b"__"),
Some((13, "text"))
);
assert_eq!(
find_trailing_delimiter("__a_b", b"__"),
None );
assert_eq!(find_trailing_delimiter("no delimiters", b"__"), None);
assert_eq!(find_trailing_delimiter("__", b"__"), Some((0, "")));
}
#[test]
fn test_find_trailing_delimiter_double_tilde() {
assert_eq!(
find_trailing_delimiter("hello ~~strike", b"~~"),
Some((6, "strike"))
);
assert_eq!(find_trailing_delimiter("no tildes", b"~~"), None);
assert_eq!(find_trailing_delimiter("~~a~b", b"~~"), None);
}
#[test]
fn is_inside_code_block_rejects_mid_line_backtick_run() {
assert!(!is_inside_code_block("hello ```\ncode", 12));
}
#[test]
fn is_inside_code_block_rejects_mid_line_tilde_run() {
assert!(!is_inside_code_block("hello ~~~\ncode", 12));
}
#[test]
fn is_inside_code_block_mid_line_fence_leaves_opener_unclosed() {
assert!(is_inside_code_block("```code```after", 12));
}
}