use regex::Regex;
use std::sync::OnceLock;
use super::state::{DetectedItemType, DetectedUrl};
static URL_REGEX: OnceLock<Regex> = OnceLock::new();
static FILE_PATH_REGEX: OnceLock<Regex> = OnceLock::new();
fn url_regex() -> &'static Regex {
URL_REGEX.get_or_init(|| {
Regex::new(
r"(?x)
\b(?:
# URLs with explicit schemes
(?:https?|ftps?|file|git|ssh)://[^\s<>{}|\\^`\[\]]+
|
# URLs starting with www.
www\.[^\s<>{}|\\^`\[\]]+
)\b
",
)
.expect("Failed to compile URL regex")
})
}
fn file_path_regex() -> &'static Regex {
FILE_PATH_REGEX.get_or_init(|| {
Regex::new(
r#"(?x)
(?:
# Home-relative paths (~/...)
~/[^\s:,;'"<>|)\]}\[\(\x00-\x1f\u{2500}-\u{257F}]+
|
# Relative paths starting with ./ or ../
\.\.?/[^\s:,;'"<>|)\]}\[\(\x00-\x1f\u{2500}-\u{257F}]+
|
# Absolute paths: must be at start of string or after whitespace
# Require at least two path components to reduce false positives
(?:^|\s)/[^\s:,;'"<>|)\]}\[\(\x00-\x1f\u{2500}-\u{257F}]+/[^\s:,;'"<>|)\]}\[\(\x00-\x1f\u{2500}-\u{257F}]+
)
# Optional line/column number in various formats
(?:
:\d+(?::\d+)? # :line or :line:col
| \[\d+(?:,\s?\d+)?\] # [line] or [line, col]
| \(\d+(?:,\s?\d+)?\) # (line) or (line, col)
)?
"#,
)
.expect("Failed to compile file path regex")
})
}
fn strip_trailing_sentence_punctuation(s: &str) -> (&str, usize) {
let trimmed = s.trim_end_matches(['.', '!', '?']);
let stripped = s.len() - trimmed.len();
(trimmed, stripped)
}
pub fn detect_urls_in_line(text: &str, row: usize) -> Vec<DetectedUrl> {
let regex = url_regex();
let mut urls = Vec::new();
for mat in regex.find_iter(text) {
let matched = mat.as_str();
let start_col = mat.start();
let (url, stripped) = strip_trailing_sentence_punctuation(matched);
if url.is_empty() {
continue;
}
let end_col = mat.end() - stripped;
urls.push(DetectedUrl {
url: url.to_string(),
start_col,
end_col,
row,
hyperlink_id: None, item_type: DetectedItemType::Url,
});
}
urls
}
pub fn detect_file_paths_in_line(text: &str, row: usize) -> Vec<DetectedUrl> {
let regex = file_path_regex();
let mut paths = Vec::new();
for mat in regex.find_iter(text) {
let full_match = mat.as_str();
let mut start_col = mat.start();
let end_col = mat.end();
let trimmed_match = if full_match.starts_with(char::is_whitespace) {
let trimmed = full_match.trim_start();
start_col += full_match.len() - trimmed.len();
trimmed
} else {
full_match
};
let (stripped_match, punct_bytes) = strip_trailing_sentence_punctuation(trimmed_match);
if stripped_match.is_empty() {
continue;
}
let end_col = end_col - punct_bytes;
let (path, line, column) = parse_path_with_line_number(stripped_match);
paths.push(DetectedUrl {
url: path,
start_col,
end_col,
row,
hyperlink_id: None,
item_type: DetectedItemType::FilePath { line, column },
});
}
paths
}
fn parse_path_with_line_number(path_str: &str) -> (String, Option<usize>, Option<usize>) {
if let Some(bracket_start) = path_str.rfind('[')
&& path_str.ends_with(']')
{
let path = path_str[..bracket_start].to_string();
let inner = &path_str[bracket_start + 1..path_str.len() - 1];
let (line, col) = parse_line_col_pair(inner);
if line.is_some() {
return (path, line, col);
}
}
if let Some(paren_start) = path_str.rfind('(')
&& path_str.ends_with(')')
{
let path = path_str[..paren_start].to_string();
let inner = &path_str[paren_start + 1..path_str.len() - 1];
let (line, col) = parse_line_col_pair(inner);
if line.is_some() {
return (path, line, col);
}
}
let parts: Vec<&str> = path_str.rsplitn(3, ':').collect();
match parts.len() {
3 => {
let col = parts[0].parse::<usize>().ok();
let line = parts[1].parse::<usize>().ok();
if line.is_some() {
let path = parts[2].to_string();
(path, line, col)
} else {
(path_str.to_string(), None, None)
}
}
2 => {
let line = parts[0].parse::<usize>().ok();
if line.is_some() {
let path = parts[1].to_string();
(path, line, None)
} else {
(path_str.to_string(), None, None)
}
}
_ => (path_str.to_string(), None, None),
}
}
fn parse_line_col_pair(s: &str) -> (Option<usize>, Option<usize>) {
let parts: Vec<&str> = s.split(',').map(|p| p.trim()).collect();
match parts.len() {
1 => (parts[0].parse().ok(), None),
2 => (parts[0].parse().ok(), parts[1].parse().ok()),
_ => (None, None),
}
}
pub fn detect_osc8_hyperlinks(
cells: &[crate::cell_renderer::Cell],
row: usize,
hyperlink_urls: &std::collections::HashMap<u32, String>,
) -> Vec<DetectedUrl> {
let mut urls = Vec::new();
let mut current_hyperlink: Option<(u32, usize, String)> = None;
for (col, cell) in cells.iter().enumerate() {
match (cell.hyperlink_id, ¤t_hyperlink) {
(Some(id), Some((current_id, _start_col, _url))) if id == *current_id => {
continue;
}
(Some(id), _) => {
if let Some((prev_id, start_col, url)) = current_hyperlink.take() {
urls.push(DetectedUrl {
url,
start_col,
end_col: col, row,
hyperlink_id: Some(prev_id),
item_type: DetectedItemType::Url,
});
}
if let Some(url) = hyperlink_urls.get(&id) {
current_hyperlink = Some((id, col, url.clone()));
}
}
(None, Some((prev_id, start_col, url))) => {
urls.push(DetectedUrl {
url: url.clone(),
start_col: *start_col,
end_col: col, row,
hyperlink_id: Some(*prev_id),
item_type: DetectedItemType::Url,
});
current_hyperlink = None;
}
(None, None) => {
continue;
}
}
}
if let Some((id, start_col, url)) = current_hyperlink {
urls.push(DetectedUrl {
url,
start_col,
end_col: cells.len(), row,
hyperlink_id: Some(id),
item_type: DetectedItemType::Url,
});
}
urls
}