par_term/
url_detection.rs

1/// URL detection and handling utilities
2use regex::Regex;
3use std::sync::OnceLock;
4
5/// URL pattern that matches common URL schemes
6static URL_REGEX: OnceLock<Regex> = OnceLock::new();
7
8/// Get the compiled URL regex pattern
9fn url_regex() -> &'static Regex {
10    URL_REGEX.get_or_init(|| {
11        // Matches URLs with common schemes (http, https, ftp, etc.)
12        // Also matches URLs without schemes that start with www.
13        Regex::new(
14            r"(?x)
15            \b(?:
16                # URLs with explicit schemes
17                (?:https?|ftps?|file|git|ssh)://[^\s<>{}|\\^`\[\]]+
18                |
19                # URLs starting with www.
20                www\.[^\s<>{}|\\^`\[\]]+
21            )\b
22            ",
23        )
24        .expect("Failed to compile URL regex")
25    })
26}
27
28/// Detected URL with position information
29#[derive(Debug, Clone, PartialEq)]
30pub struct DetectedUrl {
31    /// The URL text
32    pub url: String,
33    /// Start column position
34    pub start_col: usize,
35    /// End column position (exclusive)
36    pub end_col: usize,
37    /// Row position
38    pub row: usize,
39    /// OSC 8 hyperlink ID (if this is an OSC 8 hyperlink, None for regex-detected URLs)
40    pub hyperlink_id: Option<u32>,
41}
42
43/// Detect URLs in a line of text using regex patterns
44pub fn detect_urls_in_line(text: &str, row: usize) -> Vec<DetectedUrl> {
45    let regex = url_regex();
46    let mut urls = Vec::new();
47
48    for mat in regex.find_iter(text) {
49        let url = mat.as_str().to_string();
50        let start_col = mat.start();
51        let end_col = mat.end();
52
53        urls.push(DetectedUrl {
54            url,
55            start_col,
56            end_col,
57            row,
58            hyperlink_id: None, // Regex-detected URLs don't have OSC 8 IDs
59        });
60    }
61
62    urls
63}
64
65/// Detect OSC 8 hyperlinks from terminal cells
66///
67/// # Arguments
68/// * `cells` - Slice of cells from a single row
69/// * `row` - Row number
70/// * `hyperlink_urls` - Mapping from hyperlink_id to URL string
71///
72/// # Returns
73/// Vector of DetectedUrl objects for OSC 8 hyperlinks in this row
74pub fn detect_osc8_hyperlinks(
75    cells: &[crate::cell_renderer::Cell],
76    row: usize,
77    hyperlink_urls: &std::collections::HashMap<u32, String>,
78) -> Vec<DetectedUrl> {
79    let mut urls = Vec::new();
80    let mut current_hyperlink: Option<(u32, usize, String)> = None; // (id, start_col, url)
81
82    for (col, cell) in cells.iter().enumerate() {
83        match (cell.hyperlink_id, &current_hyperlink) {
84            // Cell has a hyperlink ID
85            (Some(id), Some((current_id, _start_col, _url))) if id == *current_id => {
86                // Continue existing hyperlink (same ID as previous cell)
87                continue;
88            }
89            (Some(id), _) => {
90                // Start of a new hyperlink or different hyperlink
91                // First, save the previous hyperlink if there was one
92                if let Some((prev_id, start_col, url)) = current_hyperlink.take() {
93                    urls.push(DetectedUrl {
94                        url,
95                        start_col,
96                        end_col: col, // Previous hyperlink ends at current position
97                        row,
98                        hyperlink_id: Some(prev_id),
99                    });
100                }
101
102                // Start new hyperlink if we have a URL for this ID
103                if let Some(url) = hyperlink_urls.get(&id) {
104                    current_hyperlink = Some((id, col, url.clone()));
105                }
106            }
107            (None, Some((prev_id, start_col, url))) => {
108                // End of current hyperlink
109                urls.push(DetectedUrl {
110                    url: url.clone(),
111                    start_col: *start_col,
112                    end_col: col, // Hyperlink ends at current position
113                    row,
114                    hyperlink_id: Some(*prev_id),
115                });
116                current_hyperlink = None;
117            }
118            (None, None) => {
119                // No hyperlink in this cell or previous cells
120                continue;
121            }
122        }
123    }
124
125    // Save last hyperlink if it extends to the end of the row
126    if let Some((id, start_col, url)) = current_hyperlink {
127        urls.push(DetectedUrl {
128            url,
129            start_col,
130            end_col: cells.len(), // Extends to end of row
131            row,
132            hyperlink_id: Some(id),
133        });
134    }
135
136    urls
137}
138
139/// Check if a specific position is within a URL
140pub fn find_url_at_position(urls: &[DetectedUrl], col: usize, row: usize) -> Option<&DetectedUrl> {
141    urls.iter()
142        .find(|url| url.row == row && col >= url.start_col && col < url.end_col)
143}
144
145/// Open a URL in the default browser
146pub fn open_url(url: &str) -> Result<(), String> {
147    // Add scheme if missing (e.g., www.example.com -> https://www.example.com)
148    let url_with_scheme = if !url.contains("://") {
149        format!("https://{}", url)
150    } else {
151        url.to_string()
152    };
153
154    open::that(&url_with_scheme).map_err(|e| format!("Failed to open URL: {}", e))
155}
156
157#[cfg(test)]
158mod tests {
159    use super::*;
160
161    #[test]
162    fn test_detect_http_url() {
163        let text = "Visit https://example.com for more info";
164        let urls = detect_urls_in_line(text, 0);
165        assert_eq!(urls.len(), 1);
166        assert_eq!(urls[0].url, "https://example.com");
167        assert_eq!(urls[0].start_col, 6);
168        assert_eq!(urls[0].end_col, 25); // Exclusive end position
169    }
170
171    #[test]
172    fn test_detect_www_url() {
173        let text = "Check out www.example.com";
174        let urls = detect_urls_in_line(text, 0);
175        assert_eq!(urls.len(), 1);
176        assert_eq!(urls[0].url, "www.example.com");
177    }
178
179    #[test]
180    fn test_detect_multiple_urls() {
181        let text = "See https://example.com and http://test.org";
182        let urls = detect_urls_in_line(text, 0);
183        assert_eq!(urls.len(), 2);
184        assert_eq!(urls[0].url, "https://example.com");
185        assert_eq!(urls[1].url, "http://test.org");
186    }
187
188    #[test]
189    fn test_find_url_at_position() {
190        let text = "Visit https://example.com for more";
191        let urls = detect_urls_in_line(text, 5);
192
193        // Position within URL
194        assert!(find_url_at_position(&urls, 10, 5).is_some());
195
196        // Position outside URL
197        assert!(find_url_at_position(&urls, 0, 5).is_none());
198        assert!(find_url_at_position(&urls, 30, 5).is_none());
199
200        // Wrong row
201        assert!(find_url_at_position(&urls, 10, 6).is_none());
202    }
203
204    #[test]
205    fn test_no_urls() {
206        let text = "This line has no URLs at all";
207        let urls = detect_urls_in_line(text, 0);
208        assert_eq!(urls.len(), 0);
209    }
210
211    #[test]
212    fn test_url_schemes() {
213        let text = "ftp://files.com ssh://git.com file:///path git://repo.com";
214        let urls = detect_urls_in_line(text, 0);
215        assert_eq!(urls.len(), 4);
216    }
217}