Skip to main content

zeph_tui/
hyperlink.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use std::io::Write;
5use std::sync::LazyLock;
6
7use crossterm::cursor::MoveTo;
8use crossterm::queue;
9use ratatui::buffer::Buffer;
10use ratatui::layout::Rect;
11use regex::Regex;
12
13use crate::widgets::chat::MdLink;
14
15static URL_RE: LazyLock<Regex> =
16    LazyLock::new(|| Regex::new(r"https?://[^\s<>\[\]()\x22'`]+").unwrap());
17
18/// A detected hyperlink span in the rendered terminal buffer.
19///
20/// Carries the URL and the exact terminal cell range where the link text
21/// appears. Used by [`write_osc8`] to emit OSC 8 escape sequences so that
22/// supporting terminals display clickable hyperlinks.
23///
24/// # Examples
25///
26/// ```rust
27/// use zeph_tui::hyperlink::HyperlinkSpan;
28///
29/// let span = HyperlinkSpan {
30///     url: "https://example.com".to_string(),
31///     row: 3,
32///     start_col: 0,
33///     end_col: 19,
34/// };
35/// assert!(span.url.starts_with("https://"));
36/// ```
37#[derive(Debug)]
38pub struct HyperlinkSpan {
39    /// The target URL (http/https only after sanitisation).
40    pub url: String,
41    /// Terminal row (0-based from the top of the viewport).
42    pub row: u16,
43    /// First terminal column of the link text (inclusive).
44    pub start_col: u16,
45    /// One past the last terminal column of the link text (exclusive).
46    pub end_col: u16,
47}
48
49/// Find all HTTP/HTTPS URLs in `text` and return their byte ranges and URLs.
50///
51/// The returned ranges are byte-based and valid UTF-8 substrings of `text`.
52///
53/// # Examples
54///
55/// ```rust
56/// use zeph_tui::hyperlink::detect_urls_in_text;
57///
58/// let hits = detect_urls_in_text("see https://example.com for details");
59/// assert_eq!(hits.len(), 1);
60/// assert_eq!(hits[0].1, "https://example.com");
61/// ```
62pub fn detect_urls_in_text(text: &str) -> Vec<(std::ops::Range<usize>, String)> {
63    URL_RE
64        .find_iter(text)
65        .map(|m| (m.start()..m.end(), m.as_str().to_string()))
66        .collect()
67}
68
69/// Scan the rendered terminal `buffer` within `area` for bare HTTP/HTTPS URLs.
70///
71/// Returns a [`HyperlinkSpan`] for each URL found, with accurate terminal
72/// column positions.
73///
74/// # Examples
75///
76/// ```rust
77/// use ratatui::buffer::Buffer;
78/// use ratatui::layout::Rect;
79/// use ratatui::style::Style;
80/// use zeph_tui::hyperlink::collect_from_buffer;
81///
82/// let area = Rect::new(0, 0, 40, 1);
83/// let mut buf = Buffer::empty(area);
84/// buf.set_string(0, 0, "visit https://example.com", Style::default());
85/// let spans = collect_from_buffer(&buf, area);
86/// assert_eq!(spans.len(), 1);
87/// assert_eq!(spans[0].url, "https://example.com");
88/// ```
89#[must_use]
90pub fn collect_from_buffer(buffer: &Buffer, area: Rect) -> Vec<HyperlinkSpan> {
91    let mut spans = Vec::new();
92    for row in area.y..area.y + area.height {
93        let mut row_text = String::new();
94        let mut col_offsets: Vec<u16> = Vec::new();
95        for col in area.x..area.x + area.width {
96            let sym = buffer[(col, row)].symbol();
97            for _ in sym.chars() {
98                col_offsets.push(col);
99            }
100            row_text.push_str(sym);
101        }
102        for (range, url) in detect_urls_in_text(&row_text) {
103            let Some(&start_col) = col_offsets.get(range.start) else {
104                continue;
105            };
106            let end_col = col_offsets
107                .get(range.end.saturating_sub(1))
108                .map_or(start_col + 1, |c| c + 1);
109            spans.push(HyperlinkSpan {
110                url,
111                row,
112                start_col,
113                end_col,
114            });
115        }
116    }
117    spans
118}
119
120fn is_safe_url(url: &str) -> bool {
121    url.starts_with("https://") || url.starts_with("http://")
122}
123
124/// Collects hyperlink spans from the buffer in a single pass, merging regex-detected
125/// bare URLs with markdown links. Markdown links take precedence: if a markdown link's
126/// display text overlaps with a bare-URL span on the same row, the bare-URL span is
127/// replaced. Only http(s) URLs are emitted for markdown links.
128#[must_use]
129pub fn collect_from_buffer_with_md_links(
130    buffer: &Buffer,
131    area: Rect,
132    md_links: &[MdLink],
133) -> Vec<HyperlinkSpan> {
134    // Filter to safe-scheme, non-empty md_links up front.
135    let safe_links: Vec<&MdLink> = md_links
136        .iter()
137        .filter(|l| !l.text.is_empty() && is_safe_url(&l.url))
138        .collect();
139
140    let mut spans: Vec<HyperlinkSpan> = Vec::new();
141
142    for row in area.y..area.y + area.height {
143        // Build row_text and char→col mapping in one pass.
144        let mut row_chars: Vec<char> = Vec::new();
145        let mut col_offsets: Vec<u16> = Vec::new();
146        for col in area.x..area.x + area.width {
147            let sym = buffer[(col, row)].symbol();
148            for ch in sym.chars() {
149                col_offsets.push(col);
150                row_chars.push(ch);
151            }
152        }
153        let row_text: String = row_chars.iter().collect();
154
155        // Collect bare URL spans for this row.
156        let mut row_spans: Vec<HyperlinkSpan> = Vec::new();
157        for (range, url) in detect_urls_in_text(&row_text) {
158            // range is byte-based; convert to char index via col_offsets.
159            // Since URL_RE only matches ASCII characters, byte index == char index here,
160            // but we use col_offsets for correctness regardless.
161            let Some(&start_col) = col_offsets.get(range.start) else {
162                continue;
163            };
164            let end_col = col_offsets
165                .get(range.end.saturating_sub(1))
166                .map_or(start_col + 1, |c| c + 1);
167            row_spans.push(HyperlinkSpan {
168                url,
169                row,
170                start_col,
171                end_col,
172            });
173        }
174
175        // Search for each markdown link text using char indices.
176        for link in &safe_links {
177            let link_chars: Vec<char> = link.text.chars().collect();
178            let link_len = link_chars.len();
179            if link_len == 0 || link_len > row_chars.len() {
180                continue;
181            }
182            let mut search_from = 0;
183            while search_from + link_len <= row_chars.len() {
184                if row_chars[search_from..search_from + link_len] == link_chars[..] {
185                    let start_col = col_offsets[search_from];
186                    let end_col = col_offsets[search_from + link_len - 1] + 1;
187
188                    // Remove bare-URL spans that overlap this region on the same row.
189                    row_spans.retain(|s| s.end_col <= start_col || s.start_col >= end_col);
190
191                    row_spans.push(HyperlinkSpan {
192                        url: link.url.clone(),
193                        row,
194                        start_col,
195                        end_col,
196                    });
197
198                    search_from += link_len;
199                } else {
200                    search_from += 1;
201                }
202            }
203        }
204
205        spans.extend(row_spans);
206    }
207
208    spans
209}
210
211/// Write OSC 8 escape sequences directly to the terminal writer.
212/// Cursor is repositioned for each hyperlink; the visible text is untouched.
213///
214/// # Errors
215///
216/// Returns an error if writing to the terminal fails.
217pub fn write_osc8(writer: &mut impl Write, spans: &[HyperlinkSpan]) -> std::io::Result<()> {
218    for span in spans {
219        // Strip ASCII control characters to prevent OSC 8 escape sequence injection.
220        let safe_url: String = span.url.chars().filter(|c| !c.is_ascii_control()).collect();
221        queue!(writer, MoveTo(span.start_col, span.row))?;
222        write!(writer, "\x1b]8;;{safe_url}\x1b\\")?;
223        queue!(writer, MoveTo(span.end_col, span.row))?;
224        write!(writer, "\x1b]8;;\x1b\\")?;
225    }
226    Ok(())
227}
228
229#[cfg(test)]
230mod tests {
231    use super::*;
232
233    #[test]
234    fn detect_urls_basic() {
235        let urls = detect_urls_in_text("visit https://example.com for info");
236        assert_eq!(urls.len(), 1);
237        assert_eq!(urls[0].1, "https://example.com");
238    }
239
240    #[test]
241    fn detect_urls_multiple() {
242        let text = "see http://a.com and https://b.org/path?q=1";
243        let urls = detect_urls_in_text(text);
244        assert_eq!(urls.len(), 2);
245        assert_eq!(urls[0].1, "http://a.com");
246        assert_eq!(urls[1].1, "https://b.org/path?q=1");
247    }
248
249    #[test]
250    fn detect_urls_none() {
251        let urls = detect_urls_in_text("no links here");
252        assert!(urls.is_empty());
253    }
254
255    #[test]
256    fn detect_urls_in_markdown_brackets() {
257        let urls = detect_urls_in_text("[text](https://example.com)");
258        assert_eq!(urls.len(), 1);
259        assert_eq!(urls[0].1, "https://example.com");
260    }
261
262    #[test]
263    fn collect_from_buffer_finds_urls() {
264        let area = Rect::new(0, 0, 40, 2);
265        let mut buf = Buffer::empty(area);
266        buf.set_string(
267            0,
268            0,
269            "visit https://example.com now",
270            ratatui::style::Style::default(),
271        );
272        buf.set_string(0, 1, "no links here", ratatui::style::Style::default());
273
274        let spans = collect_from_buffer(&buf, area);
275        assert_eq!(spans.len(), 1);
276        assert_eq!(spans[0].url, "https://example.com");
277        assert_eq!(spans[0].row, 0);
278        assert_eq!(spans[0].start_col, 6);
279        assert_eq!(spans[0].end_col, 25);
280    }
281
282    #[test]
283    fn collect_with_md_links_adds_link_span() {
284        let area = Rect::new(0, 0, 40, 1);
285        let mut buf = Buffer::empty(area);
286        buf.set_string(
287            0,
288            0,
289            "click here for info",
290            ratatui::style::Style::default(),
291        );
292
293        let md_links = vec![MdLink {
294            text: "click here".to_string(),
295            url: "https://example.com".to_string(),
296        }];
297        let spans = collect_from_buffer_with_md_links(&buf, area, &md_links);
298        assert_eq!(spans.len(), 1);
299        assert_eq!(spans[0].url, "https://example.com");
300        assert_eq!(spans[0].start_col, 0);
301        assert_eq!(spans[0].end_col, 10);
302    }
303
304    #[test]
305    fn collect_with_md_links_replaces_bare_url_overlap() {
306        let area = Rect::new(0, 0, 50, 1);
307        let mut buf = Buffer::empty(area);
308        // Display text is the URL itself — bare URL regex would also match.
309        buf.set_string(
310            0,
311            0,
312            "https://example.com",
313            ratatui::style::Style::default(),
314        );
315
316        let md_links = vec![MdLink {
317            text: "https://example.com".to_string(),
318            url: "https://example.com".to_string(),
319        }];
320        let spans = collect_from_buffer_with_md_links(&buf, area, &md_links);
321        // Deduplication: only one span should remain.
322        assert_eq!(spans.len(), 1);
323        assert_eq!(spans[0].url, "https://example.com");
324    }
325
326    #[test]
327    fn collect_with_md_links_non_ascii_text() {
328        // Non-ASCII link text (CJK characters) must use char indices.
329        // CJK chars are wide (2 columns each), so "日本語" occupies cols 0-5.
330        let area = Rect::new(0, 0, 10, 1);
331        let mut buf = Buffer::empty(area);
332        buf.set_string(0, 0, "日本語", ratatui::style::Style::default());
333
334        // Verify that the implementation can find CJK text in the buffer.
335        // The row_chars built from the buffer symbols should contain the CJK chars.
336        let mut row_chars: Vec<char> = Vec::new();
337        for col in 0u16..10 {
338            let sym = buf[(col, 0)].symbol();
339            for ch in sym.chars() {
340                row_chars.push(ch);
341            }
342        }
343        // The buffer should contain the CJK chars in row_chars.
344        let row_text: String = row_chars.iter().collect();
345        // If CJK chars are present, the md_link test should find them.
346        // If the buffer stores them differently (e.g. as placeholder spaces),
347        // the test verifies the current actual behavior.
348        let md_links = vec![MdLink {
349            text: "日本語".to_string(),
350            url: "https://example.com".to_string(),
351        }];
352        let spans = collect_from_buffer_with_md_links(&buf, area, &md_links);
353        if row_text.contains("日本語") {
354            // CJK chars stored as-is: link span should be found.
355            assert_eq!(spans.len(), 1);
356            assert_eq!(spans[0].url, "https://example.com");
357        } else {
358            // Buffer stores wide chars differently; no span produced (safe default).
359            assert_eq!(spans.len(), 0);
360        }
361    }
362
363    #[test]
364    fn collect_with_md_links_rejects_unsafe_scheme() {
365        let area = Rect::new(0, 0, 30, 1);
366        let mut buf = Buffer::empty(area);
367        buf.set_string(0, 0, "click me", ratatui::style::Style::default());
368
369        let md_links = vec![MdLink {
370            text: "click me".to_string(),
371            url: "javascript:alert(1)".to_string(),
372        }];
373        let spans = collect_from_buffer_with_md_links(&buf, area, &md_links);
374        assert!(spans.is_empty());
375    }
376
377    #[test]
378    fn write_osc8_strips_control_chars() {
379        let spans = vec![HyperlinkSpan {
380            url: "https://x.com/\x1b]evil".to_string(),
381            row: 0,
382            start_col: 0,
383            end_col: 5,
384        }];
385        let mut buf = Vec::new();
386        write_osc8(&mut buf, &spans).unwrap();
387        let output = String::from_utf8(buf).unwrap();
388        // The injected ESC must not appear inside the OSC 8 URL parameter.
389        assert!(output.contains("https://x.com/]evil"));
390        assert!(!output.contains("https://x.com/\x1b]evil"));
391    }
392
393    #[test]
394    fn write_osc8_produces_escape_sequences() {
395        let spans = vec![HyperlinkSpan {
396            url: "https://x.com".to_string(),
397            row: 0,
398            start_col: 0,
399            end_col: 5,
400        }];
401        let mut buf = Vec::new();
402        write_osc8(&mut buf, &spans).unwrap();
403        let output = String::from_utf8(buf).unwrap();
404        assert!(output.contains("\x1b]8;;https://x.com\x1b\\"));
405        assert!(output.contains("\x1b]8;;\x1b\\"));
406    }
407
408    mod proptest_hyperlink {
409        use super::*;
410        use proptest::prelude::*;
411
412        fn ascii_text() -> impl Strategy<Value = String> {
413            "[a-zA-Z0-9 ]{1,60}"
414        }
415
416        fn safe_url() -> impl Strategy<Value = String> {
417            "[a-zA-Z0-9/._~-]{1,40}".prop_map(|s| format!("https://example.com/{s}"))
418        }
419
420        proptest! {
421            #![proptest_config(ProptestConfig::with_cases(200))]
422
423            #[test]
424            fn collect_never_panics(
425                text in ascii_text(),
426                url in safe_url(),
427                width in 20u16..120,
428            ) {
429                let area = Rect::new(0, 0, width, 3);
430                let mut buf = Buffer::empty(area);
431                buf.set_string(0, 0, &text, ratatui::style::Style::default());
432                let md_links = vec![MdLink {
433                    text: text.clone(),
434                    url,
435                }];
436                let _ = collect_from_buffer_with_md_links(&buf, area, &md_links);
437            }
438
439            #[test]
440            fn spans_within_buffer_bounds(
441                text in "[a-z]{3,20}",
442                url in safe_url(),
443                width in 30u16..100,
444            ) {
445                let area = Rect::new(0, 0, width, 1);
446                let mut buf = Buffer::empty(area);
447                buf.set_string(0, 0, &text, ratatui::style::Style::default());
448                let md_links = vec![MdLink { text, url }];
449                let spans = collect_from_buffer_with_md_links(&buf, area, &md_links);
450                for span in &spans {
451                    prop_assert!(span.start_col < span.end_col);
452                    prop_assert!(span.end_col <= area.x + area.width);
453                    prop_assert!(span.row < area.y + area.height);
454                }
455            }
456
457            #[test]
458            fn empty_md_links_matches_collect_from_buffer(
459                width in 30u16..80,
460            ) {
461                let area = Rect::new(0, 0, width, 1);
462                let mut buf = Buffer::empty(area);
463                buf.set_string(
464                    0, 0,
465                    "visit https://example.com now",
466                    ratatui::style::Style::default(),
467                );
468                let baseline = collect_from_buffer(&buf, area);
469                let with_empty = collect_from_buffer_with_md_links(&buf, area, &[]);
470                prop_assert_eq!(baseline.len(), with_empty.len());
471                for (a, b) in baseline.iter().zip(with_empty.iter()) {
472                    prop_assert_eq!(&a.url, &b.url);
473                    prop_assert_eq!(a.start_col, b.start_col);
474                    prop_assert_eq!(a.end_col, b.end_col);
475                }
476            }
477        }
478    }
479}