use regex::Regex;
use super::nfd;
pub const TOKEN_CHAR_RATIO: u8 = 4;
const LF_LENGTH: usize = 1;
const MIN_QUOTED_LENGTH: usize = 2;
const HEADING_PATTERN: &str = r"(?u)^#{1,6}\s+(.*)$";
const FENCE_PATTERN: &str = r"(?u)^(`{3,}|~{3,})\s*.*$";
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct LineSpan {
pub break_length: usize,
pub end: usize,
pub line_number: u32,
pub start: usize,
pub text: String,
}
#[must_use]
pub fn split_lines(content: &str) -> Vec<LineSpan> {
let mut lines = Vec::new();
let mut start = 0;
let mut line_number: u32 = 1;
let bytes = content.as_bytes();
while start < bytes.len() {
let end_of_line = bytes[start..].iter().position(|&b| b == b'\n');
if let Some(offset) = end_of_line {
let end = start + offset;
lines.push(LineSpan {
break_length: LF_LENGTH,
end,
line_number,
start,
text: content[start..end].to_string(),
});
start = end + LF_LENGTH;
line_number += 1;
} else {
lines.push(LineSpan {
break_length: 0,
end: content.len(),
line_number,
start,
text: content[start..].to_string(),
});
break;
}
}
lines
}
struct Patterns {
fence: Regex,
heading: Regex,
}
impl Patterns {
fn new() -> Self {
Self {
fence: Regex::new(FENCE_PATTERN).unwrap_or_else(|_| panic!("valid fence regex")),
heading: Regex::new(HEADING_PATTERN).unwrap_or_else(|_| panic!("valid heading regex")),
}
}
}
thread_local! {
static PATTERNS: Patterns = Patterns::new();
}
#[must_use]
pub fn is_fence_line(line: &str) -> bool {
PATTERNS.with(|p| p.fence.is_match(line.trim()))
}
#[must_use]
pub fn is_heading_line(line: &str) -> bool {
PATTERNS.with(|p| p.heading.is_match(line.trim()))
}
#[must_use]
pub fn strip_heading_text(line: &str) -> String {
let trimmed = line.trim();
let hash_count = trimmed.chars().take_while(|&c| c == '#').count().min(6);
let without_hashes = &trimmed[hash_count..];
let without_ws = without_hashes.trim_start();
let without_trailing = without_ws.trim_end_matches('#');
without_trailing.trim().to_string()
}
#[must_use]
pub fn estimate_tokens(text: &str) -> usize {
if text.is_empty() {
return 1;
}
let len = text.len();
len.div_ceil(TOKEN_CHAR_RATIO as usize).max(1)
}
#[must_use]
pub fn normalize_keyword(value: &str) -> String {
nfd::normalize(value.trim()).to_lowercase()
}
#[must_use]
pub fn normalize_vault_path(value: &str) -> String {
nfd::normalize(&value.replace('\\', "/"))
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ParsedWikiLink {
pub alias: Option<String>,
pub heading: Option<String>,
pub raw_target: String,
pub target: String,
}
#[must_use]
pub fn parse_wikilink(raw: &str) -> ParsedWikiLink {
let (target_part, alias_part) = raw
.find('|')
.map_or((raw, ""), |i| (&raw[..i], &raw[i + 1..]));
let (target, heading) = target_part.find('#').map_or_else(
|| (target_part.trim(), None),
|i| {
let t = target_part[..i].trim();
let h = target_part[i + 1..].trim();
(
t,
if h.is_empty() {
None
} else {
Some(h.to_string())
},
)
},
);
let alias = if alias_part.is_empty() {
None
} else {
Some(alias_part.trim().to_string())
};
ParsedWikiLink {
alias,
heading,
raw_target: target_part.trim().to_string(),
target: target.to_string(),
}
}
#[must_use]
pub fn strip_outer_quotes(value: &str) -> String {
let trimmed = value.trim();
if trimmed.len() < MIN_QUOTED_LENGTH {
return trimmed.to_string();
}
let first = trimmed.chars().next().unwrap_or('\0');
let last = trimmed.chars().last().unwrap_or('\0');
if (first == '"' || first == '\'') && first == last {
trimmed[1..trimmed.len() - 1].to_string()
} else {
trimmed.to_string()
}
}
#[cfg(test)]
mod tests;