use regex::Regex;
use std::sync::LazyLock;
#[inline]
fn position_in_spans(position: usize, spans: &[(usize, usize)]) -> bool {
for &(start, end) in spans {
if position < start {
return false;
}
if position < end {
return true;
}
}
false
}
#[inline]
fn find_regex_spans(line: &str, pattern: &Regex) -> Vec<(usize, usize)> {
pattern.find_iter(line).map(|m| (m.start(), m.end())).collect()
}
fn find_single_delim_spans(line: &str, delim: char, double_spans: &[(usize, usize)]) -> Vec<(usize, usize)> {
let mut spans = Vec::new();
let mut chars = line.char_indices().peekable();
let delim_len = delim.len_utf8();
while let Some((start_byte, ch)) = chars.next() {
if position_in_spans(start_byte, double_spans) {
continue;
}
if ch != delim {
continue;
}
if chars.peek().is_some_and(|(_, c)| *c == delim) {
chars.next();
continue;
}
let mut found_content = false;
let mut has_whitespace = false;
for (byte_pos, inner_ch) in chars.by_ref() {
if position_in_spans(byte_pos, double_spans) {
break;
}
if inner_ch == delim {
let is_double = chars.peek().is_some_and(|(_, c)| *c == delim);
if !is_double && found_content && !has_whitespace {
spans.push((start_byte, byte_pos + delim_len));
}
break;
}
found_content = true;
if inner_ch.is_whitespace() {
has_whitespace = true;
}
}
}
spans
}
fn merge_spans(spans: &[(usize, usize)]) -> Vec<(usize, usize)> {
if spans.is_empty() {
return Vec::new();
}
let mut merged = Vec::with_capacity(spans.len());
let mut current = spans[0];
for &(start, end) in &spans[1..] {
if start <= current.1 {
current.1 = current.1.max(end);
} else {
merged.push(current);
current = (start, end);
}
}
merged.push(current);
merged
}
static INLINE_HILITE_PATTERN: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"`#!([a-zA-Z][a-zA-Z0-9_+-]*)\s+[^`]+`").unwrap());
static INLINE_HILITE_SHEBANG: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^#!([a-zA-Z][a-zA-Z0-9_+-]*)").unwrap());
#[inline]
pub fn contains_inline_hilite(line: &str) -> bool {
line.contains('`') && line.contains("#!") && INLINE_HILITE_PATTERN.is_match(line)
}
#[inline]
pub fn is_inline_hilite_content(content: &str) -> bool {
INLINE_HILITE_SHEBANG.is_match(content)
}
static KEYS_PATTERN: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"\+\+([a-zA-Z0-9_-]+(?:\+[a-zA-Z0-9_-]+)*)\+\+").unwrap());
pub const COMMON_KEYS: &[&str] = &[
"ctrl",
"alt",
"shift",
"cmd",
"meta",
"win",
"windows",
"option",
"enter",
"return",
"tab",
"space",
"backspace",
"delete",
"del",
"insert",
"ins",
"home",
"end",
"pageup",
"pagedown",
"up",
"down",
"left",
"right",
"escape",
"esc",
"capslock",
"numlock",
"scrolllock",
"printscreen",
"pause",
"break",
"f1",
"f2",
"f3",
"f4",
"f5",
"f6",
"f7",
"f8",
"f9",
"f10",
"f11",
"f12",
];
#[derive(Debug, Clone, PartialEq)]
pub struct KeyboardShortcut {
pub full_text: String,
pub keys: Vec<String>,
pub start: usize,
pub end: usize,
}
pub fn find_keys_spans(line: &str) -> Vec<(usize, usize)> {
if !line.contains("++") {
return Vec::new();
}
find_regex_spans(line, &KEYS_PATTERN)
}
#[inline]
pub fn contains_keys(line: &str) -> bool {
line.contains("++") && KEYS_PATTERN.is_match(line)
}
pub fn find_keyboard_shortcuts(line: &str) -> Vec<KeyboardShortcut> {
if !line.contains("++") {
return Vec::new();
}
KEYS_PATTERN
.find_iter(line)
.map(|m| {
let full_text = m.as_str().to_string();
let inner = &full_text[2..full_text.len() - 2];
let keys = inner.split('+').map(String::from).collect();
KeyboardShortcut {
full_text,
keys,
start: m.start(),
end: m.end(),
}
})
.collect()
}
pub fn is_in_keys(line: &str, position: usize) -> bool {
position_in_spans(position, &find_keys_spans(line))
}
static INSERT_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\^\^[^\^]+(?:\^[^\^]+)*\^\^").unwrap());
pub fn find_insert_spans(line: &str) -> Vec<(usize, usize)> {
if !line.contains("^^") {
return Vec::new();
}
find_regex_spans(line, &INSERT_PATTERN)
}
pub fn find_superscript_spans(line: &str) -> Vec<(usize, usize)> {
if !line.contains('^') {
return Vec::new();
}
let insert_spans = find_insert_spans(line);
find_single_delim_spans(line, '^', &insert_spans)
}
#[inline]
pub fn contains_superscript(line: &str) -> bool {
!find_superscript_spans(line).is_empty()
}
#[inline]
pub fn contains_insert(line: &str) -> bool {
line.contains("^^") && INSERT_PATTERN.is_match(line)
}
pub fn is_in_caret_markup(line: &str, position: usize) -> bool {
if !line.contains('^') {
return false;
}
let insert_spans = find_insert_spans(line);
if position_in_spans(position, &insert_spans) {
return true;
}
let super_spans = find_single_delim_spans(line, '^', &insert_spans);
position_in_spans(position, &super_spans)
}
static STRIKETHROUGH_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"~~[^~]+(?:~[^~]+)*~~").unwrap());
pub fn find_strikethrough_spans(line: &str) -> Vec<(usize, usize)> {
if !line.contains("~~") {
return Vec::new();
}
find_regex_spans(line, &STRIKETHROUGH_PATTERN)
}
pub fn find_subscript_spans(line: &str) -> Vec<(usize, usize)> {
if !line.contains('~') {
return Vec::new();
}
let strike_spans = find_strikethrough_spans(line);
find_single_delim_spans(line, '~', &strike_spans)
}
#[inline]
pub fn contains_subscript(line: &str) -> bool {
!find_subscript_spans(line).is_empty()
}
#[inline]
pub fn contains_strikethrough(line: &str) -> bool {
line.contains("~~") && STRIKETHROUGH_PATTERN.is_match(line)
}
pub fn is_in_tilde_markup(line: &str, position: usize) -> bool {
if !line.contains('~') {
return false;
}
let strike_spans = find_strikethrough_spans(line);
if position_in_spans(position, &strike_spans) {
return true;
}
let sub_spans = find_single_delim_spans(line, '~', &strike_spans);
position_in_spans(position, &sub_spans)
}
static MARK_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"==([^=]+)==").unwrap());
pub fn find_mark_spans(line: &str) -> Vec<(usize, usize)> {
if !line.contains("==") {
return Vec::new();
}
find_regex_spans(line, &MARK_PATTERN)
}
#[inline]
pub fn contains_mark(line: &str) -> bool {
line.contains("==") && MARK_PATTERN.is_match(line)
}
pub fn is_in_mark(line: &str, position: usize) -> bool {
position_in_spans(position, &find_mark_spans(line))
}
static SMART_SYMBOL_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"(?:\(c\)|\(C\)|\(r\)|\(R\)|\(tm\)|\(TM\)|\(p\)|\.\.\.|-{2,3}|<->|<-|->|<=>|<=|=>|1/4|1/2|3/4|\+-|!=)")
.unwrap()
});
pub fn find_smart_symbol_spans(line: &str) -> Vec<(usize, usize)> {
if !line.contains('(')
&& !line.contains("...")
&& !line.contains("--")
&& !line.contains("->")
&& !line.contains("<-")
&& !line.contains("=>")
&& !line.contains("<=")
&& !line.contains("1/")
&& !line.contains("3/")
&& !line.contains("+-")
&& !line.contains("!=")
{
return Vec::new();
}
find_regex_spans(line, &SMART_SYMBOL_PATTERN)
}
#[inline]
pub fn contains_smart_symbols(line: &str) -> bool {
!find_smart_symbol_spans(line).is_empty()
}
pub fn is_in_smart_symbol(line: &str, position: usize) -> bool {
position_in_spans(position, &find_smart_symbol_spans(line))
}
pub fn is_in_pymdown_markup(line: &str, position: usize) -> bool {
is_in_keys(line, position)
|| is_in_caret_markup(line, position)
|| is_in_tilde_markup(line, position)
|| is_in_mark(line, position)
|| is_in_smart_symbol(line, position)
}
pub fn mask_pymdown_markup(line: &str) -> String {
let mut all_spans: Vec<(usize, usize)> = Vec::new();
all_spans.extend(find_keys_spans(line));
if line.contains('^') {
let insert_spans = find_insert_spans(line);
let super_spans = find_single_delim_spans(line, '^', &insert_spans);
all_spans.extend(insert_spans);
all_spans.extend(super_spans);
}
if line.contains('~') {
let strike_spans = find_strikethrough_spans(line);
let sub_spans = find_single_delim_spans(line, '~', &strike_spans);
all_spans.extend(strike_spans);
all_spans.extend(sub_spans);
}
all_spans.extend(find_mark_spans(line));
if all_spans.is_empty() {
return line.to_string();
}
all_spans.sort_unstable_by_key(|&(start, _)| start);
let merged = merge_spans(&all_spans);
let mut result = String::with_capacity(line.len());
let mut last_end = 0;
for (start, end) in merged {
result.push_str(&line[last_end..start]);
for _ in 0..(end - start) {
result.push(' ');
}
last_end = end;
}
result.push_str(&line[last_end..]);
result
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_position_in_spans_empty() {
assert!(!position_in_spans(0, &[]));
assert!(!position_in_spans(100, &[]));
}
#[test]
fn test_position_in_spans_early_exit() {
let spans = [(10, 20), (30, 40)];
assert!(!position_in_spans(5, &spans)); assert!(!position_in_spans(25, &spans)); assert!(!position_in_spans(50, &spans)); }
#[test]
fn test_position_in_spans_inside() {
let spans = [(10, 20), (30, 40)];
assert!(position_in_spans(10, &spans)); assert!(position_in_spans(15, &spans)); assert!(position_in_spans(19, &spans)); assert!(!position_in_spans(20, &spans)); assert!(position_in_spans(30, &spans)); }
#[test]
fn test_merge_spans_empty() {
assert!(merge_spans(&[]).is_empty());
}
#[test]
fn test_merge_spans_no_overlap() {
let spans = [(0, 5), (10, 15), (20, 25)];
let merged = merge_spans(&spans);
assert_eq!(merged, vec![(0, 5), (10, 15), (20, 25)]);
}
#[test]
fn test_merge_spans_overlapping() {
let spans = [(0, 10), (5, 15), (20, 25)];
let merged = merge_spans(&spans);
assert_eq!(merged, vec![(0, 15), (20, 25)]);
}
#[test]
fn test_merge_spans_adjacent() {
let spans = [(0, 10), (10, 20)];
let merged = merge_spans(&spans);
assert_eq!(merged, vec![(0, 20)]);
}
#[test]
fn test_contains_inline_hilite() {
assert!(contains_inline_hilite("`#!python print('hello')`"));
assert!(contains_inline_hilite("Use `#!js alert('hi')` for alerts"));
assert!(contains_inline_hilite("`#!c++ cout << x;`"));
assert!(!contains_inline_hilite("`regular code`"));
assert!(!contains_inline_hilite("#! not in backticks"));
assert!(!contains_inline_hilite("`#!` empty"));
}
#[test]
fn test_is_inline_hilite_content() {
assert!(is_inline_hilite_content("#!python print()"));
assert!(is_inline_hilite_content("#!js code"));
assert!(!is_inline_hilite_content("regular code"));
assert!(!is_inline_hilite_content(" #!python with space"));
}
#[test]
fn test_contains_keys() {
assert!(contains_keys("Press ++ctrl++ to continue"));
assert!(contains_keys("++ctrl+alt+delete++"));
assert!(contains_keys("Use ++cmd+shift+p++ for command palette"));
assert!(!contains_keys("Use + for addition"));
assert!(!contains_keys("a++ increment"));
assert!(!contains_keys("++incomplete"));
}
#[test]
fn test_find_keyboard_shortcuts() {
let shortcuts = find_keyboard_shortcuts("Press ++ctrl+c++ then ++ctrl+v++");
assert_eq!(shortcuts.len(), 2);
assert_eq!(shortcuts[0].keys, vec!["ctrl", "c"]);
assert_eq!(shortcuts[1].keys, vec!["ctrl", "v"]);
let shortcuts = find_keyboard_shortcuts("++ctrl+alt+delete++");
assert_eq!(shortcuts.len(), 1);
assert_eq!(shortcuts[0].keys, vec!["ctrl", "alt", "delete"]);
}
#[test]
fn test_is_in_keys() {
let line = "Press ++ctrl++ here";
assert!(!is_in_keys(line, 0)); assert!(!is_in_keys(line, 5)); assert!(is_in_keys(line, 6)); assert!(is_in_keys(line, 10)); assert!(is_in_keys(line, 13)); assert!(!is_in_keys(line, 14)); }
#[test]
fn test_contains_superscript() {
assert!(contains_superscript("E=mc^2^"));
assert!(contains_superscript("x^n^ power"));
assert!(!contains_superscript("no caret here"));
assert!(!contains_superscript("^^insert^^")); }
#[test]
fn test_contains_insert() {
assert!(contains_insert("^^inserted text^^"));
assert!(contains_insert("Some ^^new^^ text"));
assert!(!contains_insert("^superscript^"));
assert!(!contains_insert("no markup"));
}
#[test]
fn test_find_superscript_spans() {
let spans = find_superscript_spans("E=mc^2^");
assert_eq!(spans.len(), 1);
assert_eq!(&"E=mc^2^"[spans[0].0..spans[0].1], "^2^");
}
#[test]
fn test_superscript_not_inside_insert() {
let line = "^^some^x^text^^";
let spans = find_superscript_spans(line);
assert!(spans.is_empty(), "Superscript inside insert should not be detected");
}
#[test]
fn test_is_in_caret_markup() {
let line = "Text ^super^ here";
assert!(!is_in_caret_markup(line, 0));
assert!(is_in_caret_markup(line, 5)); assert!(is_in_caret_markup(line, 8)); assert!(!is_in_caret_markup(line, 13));
let line2 = "Text ^^insert^^ here";
assert!(is_in_caret_markup(line2, 5)); assert!(is_in_caret_markup(line2, 10)); }
#[test]
fn test_contains_subscript() {
assert!(contains_subscript("H~2~O"));
assert!(contains_subscript("x~n~ power"));
assert!(!contains_subscript("no tilde here"));
assert!(!contains_subscript("~~strikethrough~~"));
}
#[test]
fn test_contains_strikethrough() {
assert!(contains_strikethrough("~~deleted text~~"));
assert!(contains_strikethrough("Some ~~old~~ text"));
assert!(contains_strikethrough("~~a~b~~"));
assert!(!contains_strikethrough("~subscript~"));
assert!(!contains_strikethrough("no markup"));
}
#[test]
fn test_find_subscript_spans() {
let spans = find_subscript_spans("H~2~O");
assert_eq!(spans.len(), 1);
assert_eq!(&"H~2~O"[spans[0].0..spans[0].1], "~2~");
}
#[test]
fn test_subscript_not_inside_strikethrough() {
let line = "~~some~x~text~~";
let spans = find_subscript_spans(line);
assert!(
spans.is_empty(),
"Subscript inside strikethrough should not be detected"
);
}
#[test]
fn test_multiple_subscripts() {
let line = "~a~ and ~b~";
let spans = find_subscript_spans(line);
assert_eq!(spans.len(), 2);
assert_eq!(&line[spans[0].0..spans[0].1], "~a~");
assert_eq!(&line[spans[1].0..spans[1].1], "~b~");
}
#[test]
fn test_subscript_no_whitespace() {
let line = "~no spaces allowed~";
let spans = find_subscript_spans(line);
assert!(spans.is_empty(), "Subscript with whitespace should not match");
}
#[test]
fn test_is_in_tilde_markup() {
let line = "Text ~sub~ here";
assert!(!is_in_tilde_markup(line, 0));
assert!(is_in_tilde_markup(line, 5)); assert!(is_in_tilde_markup(line, 7)); assert!(!is_in_tilde_markup(line, 12));
let line2 = "Text ~~strike~~ here";
assert!(is_in_tilde_markup(line2, 5)); assert!(is_in_tilde_markup(line2, 10)); }
#[test]
fn test_subscript_vs_strikethrough_coexist() {
let line = "H~2~O is ~~not~~ water";
assert!(contains_subscript(line));
assert!(contains_strikethrough(line));
}
#[test]
fn test_strikethrough_with_internal_tilde() {
let line = "~~a~b~~";
assert!(contains_strikethrough(line));
let strike_spans = find_strikethrough_spans(line);
assert_eq!(strike_spans.len(), 1);
assert_eq!(&line[strike_spans[0].0..strike_spans[0].1], "~~a~b~~");
assert!(!contains_subscript(line));
}
#[test]
fn test_contains_mark() {
assert!(contains_mark("This is ==highlighted== text"));
assert!(contains_mark("==important=="));
assert!(!contains_mark("no highlight"));
assert!(!contains_mark("a == b comparison")); }
#[test]
fn test_is_in_mark() {
let line = "Text ==highlight== more";
assert!(!is_in_mark(line, 0));
assert!(is_in_mark(line, 5)); assert!(is_in_mark(line, 10)); assert!(!is_in_mark(line, 19)); }
#[test]
fn test_contains_smart_symbols() {
assert!(contains_smart_symbols("Copyright (c) 2024"));
assert!(contains_smart_symbols("This is (tm) trademarked"));
assert!(contains_smart_symbols("Left arrow <- here"));
assert!(contains_smart_symbols("Right arrow -> there"));
assert!(contains_smart_symbols("Em dash --- here"));
assert!(contains_smart_symbols("Fraction 1/2"));
assert!(!contains_smart_symbols("No symbols here"));
assert!(!contains_smart_symbols("(other) parentheses"));
}
#[test]
fn test_is_in_smart_symbol() {
let line = "Copyright (c) text";
assert!(!is_in_smart_symbol(line, 0));
assert!(is_in_smart_symbol(line, 10)); assert!(is_in_smart_symbol(line, 11)); assert!(is_in_smart_symbol(line, 12)); assert!(!is_in_smart_symbol(line, 14)); }
#[test]
fn test_is_in_pymdown_markup() {
assert!(is_in_pymdown_markup("++ctrl++", 2));
assert!(is_in_pymdown_markup("^super^", 1));
assert!(is_in_pymdown_markup("~sub~", 1));
assert!(is_in_pymdown_markup("~~strike~~", 2));
assert!(is_in_pymdown_markup("==mark==", 2));
assert!(is_in_pymdown_markup("(c)", 1));
assert!(!is_in_pymdown_markup("plain text", 5));
}
#[test]
fn test_mask_pymdown_markup() {
let line = "Press ++ctrl++ and ^super^ with ==mark==";
let masked = mask_pymdown_markup(line);
assert!(!masked.contains("++"));
assert!(!masked.contains("^super^"));
assert!(!masked.contains("==mark=="));
assert!(masked.contains("Press"));
assert!(masked.contains("and"));
assert!(masked.contains("with"));
assert_eq!(masked.len(), line.len());
}
#[test]
fn test_mask_pymdown_markup_with_tilde() {
let line = "H~2~O is ~~deleted~~ water";
let masked = mask_pymdown_markup(line);
assert!(!masked.contains("~2~"));
assert!(!masked.contains("~~deleted~~"));
assert!(masked.contains("H"));
assert!(masked.contains("O is"));
assert!(masked.contains("water"));
assert_eq!(masked.len(), line.len());
}
#[test]
fn test_mask_preserves_unmasked_text() {
let line = "plain text without markup";
let masked = mask_pymdown_markup(line);
assert_eq!(masked, line);
}
#[test]
fn test_mask_complex_mixed_markup() {
let line = "++ctrl++ ^2^ ~x~ ~~old~~ ==new==";
let masked = mask_pymdown_markup(line);
assert!(!masked.contains("++"));
assert!(!masked.contains("^2^"));
assert!(!masked.contains("~x~"));
assert!(!masked.contains("~~old~~"));
assert!(!masked.contains("==new=="));
assert_eq!(masked.len(), line.len());
}
#[test]
fn test_empty_line() {
assert!(!contains_keys(""));
assert!(!contains_superscript(""));
assert!(!contains_subscript(""));
assert!(!contains_mark(""));
assert_eq!(mask_pymdown_markup(""), "");
}
#[test]
fn test_unclosed_delimiters() {
assert!(!contains_superscript("^unclosed"));
assert!(!contains_subscript("~unclosed"));
assert!(!contains_mark("==unclosed"));
assert!(!contains_keys("++unclosed"));
}
#[test]
fn test_adjacent_markup() {
let line = "^a^^b^";
let spans = find_superscript_spans(line);
assert_eq!(spans.len(), 1);
assert_eq!(&line[spans[0].0..spans[0].1], "^b^");
}
#[test]
fn test_triple_tilde() {
let line = "~~~a~~~";
let strike_spans = find_strikethrough_spans(line);
assert_eq!(strike_spans.len(), 1);
assert_eq!(&line[strike_spans[0].0..strike_spans[0].1], "~~a~~");
}
}