use dom_query::Selection;
use crate::options::MarkdownOptions;
pub(crate) fn get_tag_name(sel: &Selection) -> String {
sel.nodes()
.first()
.and_then(dom_query::NodeRef::node_name)
.unwrap_or_default()
.to_lowercase()
}
const MAX_BLANK_LINES: usize = 2;
pub(crate) fn normalize_output(text: &str) -> String {
let mut result = String::with_capacity(text.len());
let mut blank_count = 0;
for line in text.lines() {
let is_blank = line.trim().is_empty();
if is_blank {
blank_count += 1;
if blank_count <= MAX_BLANK_LINES {
result.push('\n');
}
} else {
result.push_str(line.trim_end());
result.push('\n');
blank_count = 0;
}
}
let trimmed = result.trim();
if trimmed.is_empty() {
String::new()
} else {
format!("{trimmed}\n")
}
}
pub(crate) fn resolve_url(url: &str, options: &MarkdownOptions) -> String {
let base_url = match &options.base_url {
Some(base) => base,
None => return url.to_string(),
};
if url.contains("://") || url.starts_with("//") || url.starts_with("data:") {
return url.to_string();
}
#[cfg(feature = "url")]
{
if let Ok(base) = url::Url::parse(base_url) {
if let Ok(resolved) = base.join(url) {
return resolved.to_string();
}
}
}
resolve_url_simple(url, base_url)
}
fn resolve_url_simple(url: &str, base_url: &str) -> String {
if url.starts_with('/') {
let base = base_url.trim_end_matches('/');
if let Some(scheme_end) = base.find("://") {
let after_scheme = &base[scheme_end + 3..];
if let Some(path_start) = after_scheme.find('/') {
let origin = &base[..scheme_end + 3 + path_start];
return format!("{origin}{url}");
}
}
format!("{base}{url}")
} else {
if base_url.ends_with('/') {
format!("{base_url}{url}")
} else {
let base_dir = if let Some(last_slash) = base_url.rfind('/') {
&base_url[..=last_slash]
} else {
base_url
};
format!("{base_dir}{url}")
}
}
}
pub(crate) fn escape_markdown_text(text: &str, at_line_start: bool) -> String {
let mut result = String::with_capacity(text.len() + text.len() / 10);
let bytes = text.as_bytes();
let len = bytes.len();
let mut at_line_start = at_line_start;
let mut only_digits_on_line = false;
for (i, c) in text.char_indices() {
match c {
'\n' => {
result.push('\n');
at_line_start = true;
only_digits_on_line = false;
continue;
}
'\\' | '`' | '*' | '_' | '[' | ']' | '<' => {
result.push('\\');
result.push(c);
}
'#' if at_line_start => {
result.push('\\');
result.push(c);
}
'>' if at_line_start => {
result.push('\\');
result.push(c);
}
'-' | '+' if at_line_start && i + 1 < len && bytes[i + 1] == b' ' => {
result.push('\\');
result.push(c);
}
'0'..='9' if at_line_start || only_digits_on_line => {
only_digits_on_line = true;
at_line_start = false;
result.push(c);
continue;
}
'.' if only_digits_on_line && i + 1 < len && bytes[i + 1] == b' ' => {
result.push('\\');
result.push(c);
}
'!' if i + 1 < len && bytes[i + 1] == b'[' => {
result.push('\\');
result.push(c);
}
_ => result.push(c),
}
if c != ' ' && c != '\t' {
at_line_start = false;
only_digits_on_line = false;
}
}
result
}
pub(crate) fn escape_url(url: &str) -> String {
url.replace('(', "%28")
.replace(')', "%29")
.replace(' ', "%20")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_normalize_output_collapses_blank_lines() {
let input = "line1\n\n\n\n\nline2";
let output = normalize_output(input);
assert_eq!(output, "line1\n\n\nline2\n");
}
#[test]
fn test_normalize_output_trims_trailing_whitespace() {
let input = "line1 \nline2\t\t\n";
let output = normalize_output(input);
assert_eq!(output, "line1\nline2\n");
}
#[test]
fn test_normalize_output_empty() {
let input = "";
let output = normalize_output(input);
assert_eq!(output, "");
}
#[test]
fn test_normalize_output_whitespace_only() {
let input = " \n\n\t\t\n ";
let output = normalize_output(input);
assert_eq!(output, "");
}
#[test]
fn test_resolve_url_no_base() {
let options = MarkdownOptions::new();
assert_eq!(resolve_url("/path/to/file", &options), "/path/to/file");
}
#[test]
fn test_resolve_url_absolute_passthrough() {
let options = MarkdownOptions::new().base_url("https://example.com");
assert_eq!(
resolve_url("https://other.com/page", &options),
"https://other.com/page"
);
}
#[test]
fn test_resolve_url_absolute_path() {
let options = MarkdownOptions::new().base_url("https://example.com/some/page");
let resolved = resolve_url("/images/logo.png", &options);
assert!(resolved.contains("example.com"));
assert!(resolved.contains("/images/logo.png"));
}
#[test]
fn test_resolve_url_relative_path() {
let options = MarkdownOptions::new().base_url("https://example.com/docs/page.html");
let resolved = resolve_url("images/logo.png", &options);
assert!(resolved.contains("example.com"));
assert!(resolved.contains("images/logo.png"));
}
#[test]
fn test_resolve_url_data_uri_passthrough() {
let options = MarkdownOptions::new().base_url("https://example.com");
let data_uri = "data:image/png;base64,ABC123";
assert_eq!(resolve_url(data_uri, &options), data_uri);
}
}