use regex::Regex;
use std::sync::LazyLock;
static INLINE_CODE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(`+)([^`]|[^`].*?[^`])(`+)").unwrap());
static INLINE_MATH: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\$\$[^$]*\$\$|\$[^$\n]*\$").unwrap());
static LIST_MARKER: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*[*+-]\s+").unwrap());
static DOC_METADATA_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*\*?\s*\*\*[^*]+\*\*\s*:").unwrap());
static BOLD_TEXT_PATTERN: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"\*\*[^*\s][^*]*[^*\s]\*\*|\*\*[^*\s]\*\*").unwrap());
static QUICK_DOC_CHECK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*\*\s+\*").unwrap());
static QUICK_BOLD_CHECK: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\*\*[^*\s]").unwrap());
static TEMPLATE_SHORTCODE_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\{\*.*\*\}").unwrap());
#[derive(Debug, Clone, PartialEq)]
pub struct EmphasisMarker {
pub marker_type: u8, pub count: u8, pub start_pos: usize, }
impl EmphasisMarker {
#[inline]
pub fn end_pos(&self) -> usize {
self.start_pos + self.count as usize
}
#[inline]
pub fn as_char(&self) -> char {
self.marker_type as char
}
}
#[derive(Debug, Clone)]
pub struct EmphasisSpan {
pub opening: EmphasisMarker,
pub closing: EmphasisMarker,
pub content: String,
pub has_leading_space: bool,
pub has_trailing_space: bool,
}
#[inline]
pub fn replace_inline_code(line: &str) -> String {
if !line.contains('`') {
return line.to_string();
}
let mut result = line.to_string();
let mut offset = 0;
for cap in INLINE_CODE.captures_iter(line) {
if let (Some(full_match), Some(_opening), Some(_content), Some(_closing)) =
(cap.get(0), cap.get(1), cap.get(2), cap.get(3))
{
let match_start = full_match.start();
let match_end = full_match.end();
let placeholder = "X".repeat(match_end - match_start);
result.replace_range(match_start + offset..match_end + offset, &placeholder);
offset += placeholder.len() - (match_end - match_start);
}
}
result
}
pub fn replace_inline_math(line: &str) -> String {
if !line.contains('$') {
return line.to_string();
}
let mut result = line.to_string();
let mut offset: isize = 0;
for m in INLINE_MATH.find_iter(line) {
let match_start = m.start();
let match_end = m.end();
let placeholder = "M".repeat(match_end - match_start);
let adjusted_start = (match_start as isize + offset) as usize;
let adjusted_end = (match_end as isize + offset) as usize;
result.replace_range(adjusted_start..adjusted_end, &placeholder);
offset += placeholder.len() as isize - (match_end - match_start) as isize;
}
result
}
#[inline]
pub fn find_emphasis_markers(line: &str) -> Vec<EmphasisMarker> {
if !line.contains('*') && !line.contains('_') {
return Vec::new();
}
let mut markers = Vec::new();
let bytes = line.as_bytes();
let mut i = 0;
while i < bytes.len() {
let byte = bytes[i];
if byte == b'*' || byte == b'_' {
let start_pos = i;
let mut count = 1u8;
while i + (count as usize) < bytes.len() && bytes[i + (count as usize)] == byte && count < 3 {
count += 1;
}
if count == 1 || count == 2 {
markers.push(EmphasisMarker {
marker_type: byte,
count,
start_pos,
});
}
i += count as usize;
} else {
i += 1;
}
}
markers
}
pub fn find_single_emphasis_spans(line: &str, markers: Vec<EmphasisMarker>) -> Vec<EmphasisSpan> {
if markers.len() < 2 {
return Vec::new();
}
let mut spans = Vec::new();
let mut used_markers = vec![false; markers.len()];
for i in 0..markers.len() {
if used_markers[i] || markers[i].count != 1 {
continue;
}
let opening = &markers[i];
for j in (i + 1)..markers.len() {
if used_markers[j] {
continue;
}
let closing = &markers[j];
if closing.marker_type == opening.marker_type && closing.count == 1 {
let content_start = opening.end_pos();
let content_end = closing.start_pos;
if content_end > content_start {
let content = &line[content_start..content_end];
if is_valid_emphasis_content_fast(content) && is_valid_emphasis_span_fast(line, opening, closing) {
let crosses_markers = markers[i + 1..j]
.iter()
.any(|marker| marker.marker_type == opening.marker_type && marker.count == 1);
if !crosses_markers {
let has_leading_space = content.starts_with(' ') || content.starts_with('\t');
let has_trailing_space = content.ends_with(' ') || content.ends_with('\t');
spans.push(EmphasisSpan {
opening: opening.clone(),
closing: closing.clone(),
content: content.to_string(),
has_leading_space,
has_trailing_space,
});
used_markers[i] = true;
used_markers[j] = true;
break;
}
}
}
}
}
}
spans
}
pub fn find_emphasis_spans(line: &str, markers: Vec<EmphasisMarker>) -> Vec<EmphasisSpan> {
if markers.len() < 2 {
return Vec::new();
}
let mut spans = Vec::new();
let mut used_markers = vec![false; markers.len()];
for i in 0..markers.len() {
if used_markers[i] {
continue;
}
let opening = &markers[i];
for j in (i + 1)..markers.len() {
if used_markers[j] {
continue;
}
let closing = &markers[j];
if closing.marker_type == opening.marker_type && closing.count == opening.count {
let content_start = opening.end_pos();
let content_end = closing.start_pos;
if content_end > content_start {
let content = &line[content_start..content_end];
if is_valid_emphasis_content_fast(content) && is_valid_emphasis_span_fast(line, opening, closing) {
let crosses_markers = markers[i + 1..j]
.iter()
.any(|marker| marker.marker_type == opening.marker_type);
if !crosses_markers {
let has_leading_space = content.starts_with(' ') || content.starts_with('\t');
let has_trailing_space = content.ends_with(' ') || content.ends_with('\t');
spans.push(EmphasisSpan {
opening: opening.clone(),
closing: closing.clone(),
content: content.to_string(),
has_leading_space,
has_trailing_space,
});
used_markers[i] = true;
used_markers[j] = true;
break;
}
}
}
}
}
}
spans
}
#[inline]
pub fn is_valid_emphasis_span_fast(line: &str, opening: &EmphasisMarker, closing: &EmphasisMarker) -> bool {
let content_start = opening.end_pos();
let content_end = closing.start_pos;
if content_end <= content_start {
return false;
}
let content = &line[content_start..content_end];
if content.trim().is_empty() {
return false;
}
let bytes = line.as_bytes();
let valid_opening = opening.start_pos == 0
|| matches!(
bytes.get(opening.start_pos.saturating_sub(1)),
Some(&b' ')
| Some(&b'\t')
| Some(&b'(')
| Some(&b'[')
| Some(&b'{')
| Some(&b'"')
| Some(&b'\'')
| Some(&b'>')
);
let valid_closing = closing.end_pos() >= bytes.len()
|| matches!(
bytes.get(closing.end_pos()),
Some(&b' ')
| Some(&b'\t')
| Some(&b')')
| Some(&b']')
| Some(&b'}')
| Some(&b'"')
| Some(&b'\'')
| Some(&b'.')
| Some(&b',')
| Some(&b'!')
| Some(&b'?')
| Some(&b';')
| Some(&b':')
| Some(&b'<')
);
valid_opening && valid_closing && !content.contains('\n')
}
#[inline]
pub fn is_valid_emphasis_content_fast(content: &str) -> bool {
!content.trim().is_empty()
}
pub fn is_likely_list_line(line: &str) -> bool {
LIST_MARKER.is_match(line)
}
pub fn has_doc_patterns(line: &str) -> bool {
if line.contains("{*") && TEMPLATE_SHORTCODE_PATTERN.is_match(line) {
return true;
}
(QUICK_DOC_CHECK.is_match(line) || QUICK_BOLD_CHECK.is_match(line))
&& (DOC_METADATA_PATTERN.is_match(line) || BOLD_TEXT_PATTERN.is_match(line))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_emphasis_marker_parsing() {
let markers = find_emphasis_markers("This has *single* and **double** emphasis");
assert_eq!(markers.len(), 4);
let markers = find_emphasis_markers("*start* and *end*");
assert_eq!(markers.len(), 4); }
#[test]
fn test_single_emphasis_span_detection() {
let markers = find_emphasis_markers("This has *valid* emphasis and **strong** too");
let spans = find_single_emphasis_spans("This has *valid* emphasis and **strong** too", markers);
assert_eq!(spans.len(), 1); assert_eq!(spans[0].content, "valid");
assert!(!spans[0].has_leading_space);
assert!(!spans[0].has_trailing_space);
}
#[test]
fn test_emphasis_with_spaces() {
let markers = find_emphasis_markers("This has * invalid * emphasis");
let spans = find_emphasis_spans("This has * invalid * emphasis", markers);
assert_eq!(spans.len(), 1);
assert_eq!(spans[0].content, " invalid ");
assert!(spans[0].has_leading_space);
assert!(spans[0].has_trailing_space);
}
#[test]
fn test_mixed_markers() {
let markers = find_emphasis_markers("This has *asterisk* and _underscore_ emphasis");
let spans = find_single_emphasis_spans("This has *asterisk* and _underscore_ emphasis", markers);
assert_eq!(spans.len(), 2);
assert_eq!(spans[0].opening.as_char(), '*');
assert_eq!(spans[1].opening.as_char(), '_');
}
#[test]
fn test_template_shortcode_detection() {
assert!(has_doc_patterns(
"{* ../../docs_src/cookie_param_models/tutorial001.py hl[9:12,16] *}"
));
assert!(has_doc_patterns(
"{* ../../docs_src/conditional_openapi/tutorial001.py hl[6,11] *}"
));
assert!(has_doc_patterns("{* file.py *}"));
assert!(has_doc_patterns("{* ../path/to/file.py ln[1-10] *}"));
assert!(!has_doc_patterns("This has *emphasis* text"));
assert!(!has_doc_patterns("This has * spaces * in emphasis"));
assert!(!has_doc_patterns("{* incomplete"));
}
}