use regex::Regex;
use std::sync::LazyLock;
static INLINE_LINK_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"^!?\[(?:[^\]\\]|\\.)*\]\((?:[^()]*\([^()]*\))*[^()]*\)$").unwrap());
static REF_LINK_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^!?\[(?:[^\]\\]|\\.)*\]\[[^\]]*\]$").unwrap());
pub(crate) fn has_hard_break(line: &str) -> bool {
let line = line.strip_suffix('\r').unwrap_or(line);
line.ends_with(" ") || line.ends_with('\\')
}
pub(crate) fn trim_preserving_hard_break(s: &str) -> String {
let s = s.strip_suffix('\r').unwrap_or(s);
if s.ends_with('\\') {
return s.to_string();
}
if s.ends_with(" ") {
let content_end = s.trim_end().len();
if content_end == 0 {
return String::new();
}
format!("{} ", &s[..content_end])
} else {
s.trim_end().to_string()
}
}
pub(crate) fn split_into_segments(para_lines: &[String]) -> Vec<Vec<String>> {
let mut segments: Vec<Vec<String>> = Vec::new();
let mut current_segment: Vec<String> = Vec::new();
for line in para_lines {
current_segment.push(line.clone());
if has_hard_break(line) {
segments.push(current_segment.clone());
current_segment.clear();
}
}
if !current_segment.is_empty() {
segments.push(current_segment);
}
segments
}
pub(crate) fn extract_list_marker_and_content(line: &str) -> (String, String) {
let indent_len = line.len() - line.trim_start().len();
let indent = &line[..indent_len];
let trimmed = &line[indent_len..];
for bullet in ["- ", "* ", "+ "] {
if let Some(rest) = trimmed.strip_prefix(bullet) {
let marker_prefix = &bullet[..bullet.len() - 1]; for checkbox in ["[ ] ", "[x] ", "[X] "] {
if let Some(content) = rest.strip_prefix(checkbox) {
return (
format!("{indent}{marker_prefix} {checkbox}"),
trim_preserving_hard_break(content),
);
}
}
return (format!("{indent}{bullet}"), trim_preserving_hard_break(rest));
}
}
let mut chars = trimmed.chars();
let mut marker_content = String::new();
while let Some(c) = chars.next() {
marker_content.push(c);
if c == '.' {
if let Some(next) = chars.next()
&& next == ' '
{
marker_content.push(next);
let rest = chars.as_str();
for checkbox in ["[ ] ", "[x] ", "[X] "] {
if let Some(content) = rest.strip_prefix(checkbox) {
return (
format!("{indent}{marker_content}{checkbox}"),
trim_preserving_hard_break(content),
);
}
}
let content = trim_preserving_hard_break(rest);
return (format!("{indent}{marker_content}"), content);
}
break;
}
}
(String::new(), line.to_string())
}
pub(crate) fn is_horizontal_rule(line: &str) -> bool {
crate::utils::thematic_break::is_thematic_break(line)
}
pub(crate) fn is_numbered_list_item(line: &str) -> bool {
let mut chars = line.chars();
if !chars.next().is_some_and(char::is_numeric) {
return false;
}
while let Some(c) = chars.next() {
if c == '.' {
return chars.next() == Some(' ');
}
if !c.is_numeric() {
return false;
}
}
false
}
pub(crate) fn is_list_item(line: &str) -> bool {
if (line.starts_with('-') || line.starts_with('*') || line.starts_with('+'))
&& line.len() > 1
&& line.chars().nth(1) == Some(' ')
{
return true;
}
is_numbered_list_item(line)
}
pub(crate) fn is_github_alert_marker(trimmed: &str) -> bool {
if !trimmed.starts_with("[!") {
return false;
}
let rest = &trimmed[2..];
let end = rest.find(|c: char| !c.is_ascii_uppercase()).unwrap_or(rest.len());
end > 0 && rest[end..].starts_with(']')
}
pub(crate) fn is_standalone_link_or_image_line(line: &str) -> bool {
let mut s = line.trim_start();
while let Some(rest) = s.strip_prefix('>') {
s = rest.trim_start();
}
if is_list_item(s) {
let (_, content) = extract_list_marker_and_content(s);
return is_link_with_optional_emphasis(&content);
}
is_link_with_optional_emphasis(s)
}
pub(crate) fn is_html_only_line(line: &str) -> bool {
let mut s = line.trim_start();
while let Some(rest) = s.strip_prefix('>') {
s = rest.trim_start();
}
if is_list_item(s) {
let (_, content) = extract_list_marker_and_content(s);
return is_html_only_content(&content);
}
is_html_only_content(s)
}
fn is_html_only_content(s: &str) -> bool {
let s = s.trim();
if s.is_empty() || !s.starts_with('<') {
return false;
}
if is_content_all_html_tags(s) {
return true;
}
if s.ends_with('>') && (s.contains("href=") || s.contains("src=") || s.contains("srcset=") || s.contains("poster="))
{
return true;
}
false
}
fn is_content_all_html_tags(s: &str) -> bool {
let s = s.trim();
if s.is_empty() || !s.starts_with('<') {
return false;
}
let mut in_tag = false;
let mut quote_char: Option<char> = None;
let mut found_complete_tag = false;
for c in s.chars() {
if let Some(q) = quote_char {
if c == q {
quote_char = None;
}
} else if in_tag {
match c {
'"' | '\'' => quote_char = Some(c),
'>' => {
in_tag = false;
found_complete_tag = true;
}
_ => {}
}
} else if c == '<' {
in_tag = true;
} else if !c.is_whitespace() {
return false;
}
}
found_complete_tag
}
fn is_link_with_optional_emphasis(s: &str) -> bool {
let mut s = s.trim();
if s.is_empty() {
return false;
}
let emphasis_chars: &[char] = &['*', '_'];
let leading_emphasis = s.chars().take_while(|c| emphasis_chars.contains(c)).count();
if leading_emphasis > 0 && leading_emphasis <= 3 {
let trimmed_end = s.trim_end();
let trailing_emphasis = trimmed_end
.chars()
.rev()
.take_while(|c| emphasis_chars.contains(c))
.count();
if trailing_emphasis == leading_emphasis {
s = &s[leading_emphasis..trimmed_end.len() - trailing_emphasis];
}
}
let s = s.trim();
if s.is_empty() {
return false;
}
INLINE_LINK_RE.is_match(s) || REF_LINK_RE.is_match(s)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_numbered_list_item_requires_space_after_period() {
assert!(is_numbered_list_item("1. Item"));
assert!(is_numbered_list_item("10. Item"));
assert!(is_numbered_list_item("99. Long number"));
assert!(is_numbered_list_item("123. Triple digits"));
assert!(!is_numbered_list_item("2019."));
assert!(!is_numbered_list_item("1999."));
assert!(!is_numbered_list_item("2023."));
assert!(!is_numbered_list_item("1."));
assert!(!is_numbered_list_item("a. Item"));
assert!(!is_numbered_list_item(". Item"));
assert!(!is_numbered_list_item("Item"));
assert!(!is_numbered_list_item("1 Item"));
assert!(!is_numbered_list_item("123"));
}
#[test]
fn test_extract_list_marker_task_checkboxes() {
assert_eq!(
extract_list_marker_and_content("- [ ] some content"),
("- [ ] ".to_string(), "some content".to_string())
);
assert_eq!(
extract_list_marker_and_content("- [x] done item"),
("- [x] ".to_string(), "done item".to_string())
);
assert_eq!(
extract_list_marker_and_content("- [X] also done"),
("- [X] ".to_string(), "also done".to_string())
);
assert_eq!(
extract_list_marker_and_content("* [ ] star task"),
("* [ ] ".to_string(), "star task".to_string())
);
assert_eq!(
extract_list_marker_and_content("+ [ ] plus task"),
("+ [ ] ".to_string(), "plus task".to_string())
);
assert_eq!(
extract_list_marker_and_content(" - [ ] indented task"),
(" - [ ] ".to_string(), "indented task".to_string())
);
assert_eq!(
extract_list_marker_and_content("- regular item"),
("- ".to_string(), "regular item".to_string())
);
assert_eq!(
extract_list_marker_and_content("1. [ ] unchecked ordered"),
("1. [ ] ".to_string(), "unchecked ordered".to_string())
);
assert_eq!(
extract_list_marker_and_content("1. [x] checked ordered"),
("1. [x] ".to_string(), "checked ordered".to_string())
);
assert_eq!(
extract_list_marker_and_content("1. [X] checked upper ordered"),
("1. [X] ".to_string(), "checked upper ordered".to_string())
);
assert_eq!(
extract_list_marker_and_content("99. [x] multi-digit ordered"),
("99. [x] ".to_string(), "multi-digit ordered".to_string())
);
}
#[test]
fn test_is_horizontal_rule_commonmark_indent() {
assert!(is_horizontal_rule("---"));
assert!(is_horizontal_rule(" ---"));
assert!(is_horizontal_rule(" ---"));
assert!(is_horizontal_rule(" ---"));
assert!(is_horizontal_rule(" ***"));
assert!(is_horizontal_rule(" - - -"));
assert!(!is_horizontal_rule(" ---"));
assert!(!is_horizontal_rule(" ---"));
assert!(!is_horizontal_rule(" ***"));
assert!(!is_horizontal_rule(" - - -"));
assert!(is_horizontal_rule("--- "));
assert!(is_horizontal_rule(" --- "));
assert!(is_horizontal_rule("----"));
assert!(is_horizontal_rule("***"));
assert!(is_horizontal_rule("___"));
assert!(is_horizontal_rule("- - -"));
assert!(!is_horizontal_rule("--"));
assert!(!is_horizontal_rule("text"));
assert!(!is_horizontal_rule(""));
}
#[test]
fn test_is_list_item_bullet_and_numbered() {
assert!(is_list_item("- Item"));
assert!(is_list_item("* Item"));
assert!(is_list_item("+ Item"));
assert!(!is_list_item("-Item"));
assert!(!is_list_item("*Item"));
assert!(is_list_item("1. Item"));
assert!(is_list_item("99. Item"));
assert!(!is_list_item("2019."));
}
#[test]
fn test_is_github_alert_marker() {
assert!(is_github_alert_marker("[!NOTE]"));
assert!(is_github_alert_marker("[!TIP]"));
assert!(is_github_alert_marker("[!WARNING]"));
assert!(is_github_alert_marker("[!CAUTION]"));
assert!(is_github_alert_marker("[!IMPORTANT]"));
assert!(is_github_alert_marker("[!NOTE] Some inline content here"));
assert!(is_github_alert_marker("[!WARNING] Do not do this"));
assert!(is_github_alert_marker("[!CUSTOM]"));
assert!(!is_github_alert_marker("[!note]")); assert!(!is_github_alert_marker("[Note]")); assert!(!is_github_alert_marker("[!]")); assert!(!is_github_alert_marker("[!NOTE")); assert!(!is_github_alert_marker("NOTE")); assert!(!is_github_alert_marker("[link]: url")); assert!(!is_github_alert_marker("Some text [!NOTE]")); }
#[test]
fn test_standalone_link_bare() {
assert!(is_standalone_link_or_image_line("[text](https://example.com)"));
assert!(is_standalone_link_or_image_line(
"[long title here](https://example.com/path)"
));
assert!(is_standalone_link_or_image_line(" [text](https://example.com)"));
assert!(is_standalone_link_or_image_line(
"[Rust](https://en.wikipedia.org/wiki/Rust_(programming_language))"
));
assert!(is_standalone_link_or_image_line("[A](https://example.com/A_(B)_C)"));
}
#[test]
fn test_standalone_image() {
assert!(is_standalone_link_or_image_line(
""
));
assert!(is_standalone_link_or_image_line(" "));
}
#[test]
fn test_standalone_link_in_list() {
assert!(is_standalone_link_or_image_line("- [text](url)"));
assert!(is_standalone_link_or_image_line("* [text](url)"));
assert!(is_standalone_link_or_image_line("+ [text](url)"));
assert!(is_standalone_link_or_image_line("1. [text](url)"));
assert!(is_standalone_link_or_image_line("99. [text](url)"));
assert!(is_standalone_link_or_image_line(" - [text](url)"));
assert!(is_standalone_link_or_image_line("- [ ] [text](url)"));
assert!(is_standalone_link_or_image_line("- [x] [text](url)"));
assert!(is_standalone_link_or_image_line("1. [x] [text](url)"));
assert!(is_standalone_link_or_image_line("1. [ ] [text](url)"));
}
#[test]
fn test_standalone_link_in_blockquote() {
assert!(is_standalone_link_or_image_line("> [text](url)"));
assert!(is_standalone_link_or_image_line(">> [text](url)"));
assert!(is_standalone_link_or_image_line("> > [text](url)"));
}
#[test]
fn test_standalone_link_with_emphasis() {
assert!(is_standalone_link_or_image_line("**[text](url)**"));
assert!(is_standalone_link_or_image_line("*[text](url)*"));
assert!(is_standalone_link_or_image_line("__[text](url)__"));
assert!(is_standalone_link_or_image_line("_[text](url)_"));
assert!(is_standalone_link_or_image_line("***[text](url)***"));
assert!(is_standalone_link_or_image_line("- **[text](url)**"));
}
#[test]
fn test_standalone_link_reference_style() {
assert!(is_standalone_link_or_image_line("[text][ref]"));
assert!(is_standalone_link_or_image_line("![alt][ref]"));
assert!(is_standalone_link_or_image_line("- [text][ref]"));
assert!(is_standalone_link_or_image_line("> [text][ref]"));
assert!(is_standalone_link_or_image_line("[text][]"));
assert!(is_standalone_link_or_image_line("- [text][]"));
}
#[test]
fn test_not_standalone_link() {
assert!(!is_standalone_link_or_image_line("Some text [link](url)"));
assert!(!is_standalone_link_or_image_line("See [link](url) for details"));
assert!(!is_standalone_link_or_image_line("Just some long text"));
assert!(!is_standalone_link_or_image_line(""));
assert!(!is_standalone_link_or_image_line(" "));
assert!(!is_standalone_link_or_image_line("[link1](url1) [link2](url2)"));
assert!(!is_standalone_link_or_image_line("[link](url) extra text"));
}
#[test]
fn test_html_only_badge_line() {
assert!(is_html_only_line(
r#"<a href="https://dotfyle.com/plugins/chrisgrieser/nvim-rulebook"><img alt="badge" src="https://dotfyle.com/plugins/chrisgrieser/nvim-rulebook/shield"/></a>"#
));
}
#[test]
fn test_html_only_self_closing_tags() {
assert!(is_html_only_line(
r#"<img src="https://example.com/image.png" alt="screenshot" width="800" height="600"/>"#
));
assert!(is_html_only_line(r#"<br/>"#));
assert!(is_html_only_line(r#"<hr />"#));
}
#[test]
fn test_html_only_multiple_tags() {
assert!(is_html_only_line(r#"<img src="a.png"/><img src="b.png"/>"#));
assert!(is_html_only_line(r#"<br/><br/><br/>"#));
}
#[test]
fn test_html_only_empty_element() {
assert!(is_html_only_line(r#"<video src="long-url.mp4" controls></video>"#));
assert!(is_html_only_line(r#"<div></div>"#));
}
#[test]
fn test_html_only_with_whitespace_between_tags() {
assert!(is_html_only_line(r#"<img src="a.png"/> <img src="b.png"/>"#));
}
#[test]
fn test_html_only_quoted_angle_brackets() {
assert!(is_html_only_line(r#"<img alt="a > b" src="test.png"/>"#));
assert!(is_html_only_line(r#"<img alt='a > b' src="test.png"/>"#));
}
#[test]
fn test_html_only_in_blockquote() {
assert!(is_html_only_line(r#"> <img src="long-url.png" alt="screenshot"/>"#));
assert!(is_html_only_line(r#">> <a href="url"><img src="img"/></a>"#));
}
#[test]
fn test_html_only_in_list() {
assert!(is_html_only_line(r#"- <img src="long-url.png" alt="screenshot"/>"#));
assert!(is_html_only_line(r#"1. <a href="url"><img src="img"/></a>"#));
assert!(is_html_only_line(r#" - <img src="long-url.png"/>"#));
}
#[test]
fn test_html_only_link_with_text_and_url() {
assert!(is_html_only_line(
r#"<a href="https://example.com/very-long-path">Click here for details</a>"#
));
assert!(is_html_only_line(
r#"<a href="https://example.com/very-long-path" target="_blank">Click here for details</a>"#
));
assert!(is_html_only_line(
r#"<a href="https://example.com/path"><img src="https://example.com/badge.svg" alt="status"/></a>"#
));
}
#[test]
fn test_not_html_only_text_before_tags() {
assert!(!is_html_only_line(r#"Click here: <a href="url">link</a>"#));
assert!(!is_html_only_line(r#"See <img src="url"/> for details"#));
}
#[test]
fn test_not_html_only_text_after_tags() {
assert!(!is_html_only_line(r#"<a href="url">link</a> - click above"#));
assert!(!is_html_only_line(r#"<img src="url"/> is an image"#));
}
#[test]
fn test_not_html_only_formatting_tags_without_urls() {
assert!(!is_html_only_line(
r#"<b>This is very long bold text that exceeds the line length limit</b>"#
));
assert!(!is_html_only_line(
r#"<p>This is a very long paragraph written in HTML tags for some reason</p>"#
));
assert!(!is_html_only_line(
r#"<span style="color:red">Some styled text that is quite long</span>"#
));
assert!(!is_html_only_line(
r#"<em>Emphasized text that goes on and on and on</em>"#
));
assert!(!is_html_only_line(r#"<b>bold</b> and <i>italic</i>"#));
}
#[test]
fn test_not_html_only_plain_text() {
assert!(!is_html_only_line("Just some long text without any HTML"));
assert!(!is_html_only_line(""));
assert!(!is_html_only_line(" "));
}
#[test]
fn test_not_html_only_incomplete_tag() {
assert!(!is_html_only_line("<unclosed"));
assert!(!is_html_only_line(r#"<a href="url">text"#));
}
#[test]
fn test_html_only_comment() {
assert!(is_html_only_line(
"<!-- this is a long HTML comment that spans many characters -->"
));
}
#[test]
fn test_html_only_media_elements() {
assert!(is_html_only_line(
r#"<video src="https://example.com/very-long-path/video.mp4" poster="https://example.com/thumb.jpg" controls></video>"#
));
assert!(is_html_only_line(
r#"<audio src="https://example.com/very-long-path/audio.mp3" controls></audio>"#
));
assert!(is_html_only_line(
r#"<source srcset="https://example.com/image-large.webp" media="(min-width: 800px)"/>"#
));
assert!(is_html_only_line(
r#"<picture><source srcset="large.webp"/><img src="fallback.png"/></picture>"#
));
}
#[test]
fn test_html_only_in_list_with_url_text() {
assert!(is_html_only_line(
r#"- <a href="https://example.com/very-long-path">documentation link</a>"#
));
}
#[test]
fn test_html_only_in_blockquote_with_url_text() {
assert!(is_html_only_line(
r#"> <a href="https://example.com/very-long-path">documentation link</a>"#
));
}
}