Skip to main content

beamterm_renderer/
url.rs

1use compact_str::CompactString;
2
3use crate::{SelectionMode, TerminalGrid, gl::CellQuery, position::CursorPosition, select};
4
5/// Result of URL detection containing the query and extracted URL text.
6pub struct UrlMatch {
7    /// A `CellQuery` configured with the URL's start and end positions.
8    pub query: CellQuery,
9    /// The extracted URL string.
10    pub url: CompactString,
11}
12
13/// Characters that are valid within a URL (RFC 3986 unreserved + reserved).
14fn is_url_char(ch: char) -> bool {
15    ch.is_ascii_alphanumeric()
16        || matches!(
17            ch,
18            '-' | '.'
19                | '_'
20                | '~'
21                | ':'
22                | '/'
23                | '?'
24                | '#'
25                | '['
26                | ']'
27                | '@'
28                | '!'
29                | '$'
30                | '&'
31                | '\''
32                | '('
33                | ')'
34                | '*'
35                | '+'
36                | ','
37                | ';'
38                | '='
39                | '%'
40        )
41}
42
43/// Characters that should be trimmed from the end of a URL.
44fn is_trailing_punctuation(ch: char) -> bool {
45    matches!(ch, '.' | ',' | ';' | ':' | '!' | '?')
46}
47
48/// Detects an HTTP/HTTPS URL at or around the given cursor position.
49///
50/// Scans left to find a URL scheme (`http://` or `https://`), then scans right
51/// to find the URL end. Handles trailing punctuation and unbalanced parentheses.
52///
53/// Returns `None` if no URL is found at the cursor position.
54pub(super) fn find_url_at_cursor(cursor: CursorPosition, grid: &TerminalGrid) -> Option<UrlMatch> {
55    let cols = grid.terminal_size().0;
56
57    // Find scheme start by scanning left
58    let scheme_start = find_scheme_start(cursor, grid, cols)?;
59
60    // Verify and get scheme length
61    let scheme_len = if matches_sequence(grid, scheme_start, "https://", cols) {
62        8
63    } else if matches_sequence(grid, scheme_start, "http://", cols) {
64        7
65    } else {
66        return None;
67    };
68
69    // Scan right from after scheme, tracking paren balance
70    let after_scheme = scheme_start.move_right(scheme_len, cols)?;
71    let (raw_end, paren_balance) = scan_url_extent(after_scheme, grid, cols);
72
73    // Trim trailing punctuation and unbalanced close parens
74    let url_end = trim_url_end(scheme_start, raw_end, paren_balance, grid);
75
76    // Verify cursor is within the URL bounds
77    if cursor.col < scheme_start.col || cursor.col > url_end.col {
78        return None;
79    }
80
81    // Now extract the text
82    let query = select(SelectionMode::Linear)
83        .start((scheme_start.col, scheme_start.row))
84        .end((url_end.col, url_end.row));
85
86    let url = grid.get_text(query);
87
88    Some(UrlMatch { query, url })
89}
90
91/// Scans left from the cursor to find the start of a URL scheme.
92fn find_scheme_start(
93    cursor: CursorPosition,
94    grid: &TerminalGrid,
95    cols: u16,
96) -> Option<CursorPosition> {
97    let mut pos = cursor;
98
99    loop {
100        // Check if this position starts a valid scheme
101        if grid.get_ascii_char_at(pos) == Some('h')
102            && (matches_sequence(grid, pos, "https://", cols)
103                || matches_sequence(grid, pos, "http://", cols))
104        {
105            return Some(pos);
106        }
107
108        // Move left, stop if we hit the start of the row
109        pos = pos.move_left(1)?;
110    }
111}
112
113/// Checks if a sequence of characters matches starting at the given position.
114fn matches_sequence(grid: &TerminalGrid, start: CursorPosition, seq: &str, cols: u16) -> bool {
115    let mut pos = start;
116    let char_count = seq.chars().count();
117
118    for (i, ch) in seq.chars().enumerate() {
119        if grid.get_ascii_char_at(pos) != Some(ch) {
120            return false;
121        }
122        // Move right for next character, but not after the last one
123        if i < char_count - 1 {
124            match pos.move_right(1, cols) {
125                Some(next) => pos = next,
126                None => return false, // Can't advance but more chars remain
127            }
128        }
129    }
130    true
131}
132
133/// Scans right from the starting position to find the extent of a URL.
134///
135/// Returns the end position and the parenthesis balance (positive means more '(' than ')').
136fn scan_url_extent(start: CursorPosition, grid: &TerminalGrid, cols: u16) -> (CursorPosition, i32) {
137    let mut pos = start;
138    let mut paren_balance: i32 = 0;
139    let mut last_valid = start;
140
141    loop {
142        match grid.get_ascii_char_at(pos) {
143            Some(ch) if is_url_char(ch) => {
144                if ch == '(' {
145                    paren_balance += 1;
146                } else if ch == ')' {
147                    paren_balance -= 1;
148                }
149                last_valid = pos;
150            },
151            _ => break,
152        }
153
154        match pos.move_right(1, cols) {
155            Some(next) => pos = next,
156            None => break,
157        }
158    }
159
160    (last_valid, paren_balance)
161}
162
163/// Trims trailing punctuation and unbalanced closing parentheses from the URL end.
164fn trim_url_end(
165    start: CursorPosition,
166    mut end: CursorPosition,
167    mut paren_balance: i32,
168    grid: &TerminalGrid,
169) -> CursorPosition {
170    // Work backwards, trimming trailing punctuation and unbalanced ')'
171    while end.col > start.col {
172        let ch = match grid.get_ascii_char_at(end) {
173            Some(c) => c,
174            None => break,
175        };
176
177        if is_trailing_punctuation(ch) {
178            // Trim trailing punctuation
179            end = end.move_left(1).unwrap_or(end);
180        } else if ch == ')' && paren_balance < 0 {
181            // Trim unbalanced closing paren
182            paren_balance += 1;
183            end = end.move_left(1).unwrap_or(end);
184        } else {
185            break;
186        }
187    }
188
189    end
190}
191
192#[cfg(test)]
193mod tests {
194    use super::*;
195
196    #[test]
197    fn test_is_url_char() {
198        // Valid URL characters
199        assert!(is_url_char('a'));
200        assert!(is_url_char('Z'));
201        assert!(is_url_char('0'));
202        assert!(is_url_char('-'));
203        assert!(is_url_char('.'));
204        assert!(is_url_char('/'));
205        assert!(is_url_char('?'));
206        assert!(is_url_char('='));
207        assert!(is_url_char('&'));
208        assert!(is_url_char('('));
209        assert!(is_url_char(')'));
210
211        // Invalid URL characters
212        assert!(!is_url_char(' '));
213        assert!(!is_url_char('\n'));
214        assert!(!is_url_char('<'));
215        assert!(!is_url_char('>'));
216        assert!(!is_url_char('"'));
217    }
218
219    #[test]
220    fn test_is_trailing_punctuation() {
221        assert!(is_trailing_punctuation('.'));
222        assert!(is_trailing_punctuation(','));
223        assert!(is_trailing_punctuation(';'));
224        assert!(is_trailing_punctuation(':'));
225        assert!(is_trailing_punctuation('!'));
226        assert!(is_trailing_punctuation('?'));
227
228        assert!(!is_trailing_punctuation('/'));
229        assert!(!is_trailing_punctuation('-'));
230        assert!(!is_trailing_punctuation('a'));
231    }
232}