Skip to main content

beamterm_core/
url.rs

1use compact_str::CompactString;
2
3use crate::{SelectionMode, TerminalGrid, gl::CellQuery, position::CursorPosition, select};
4
5/// Result of URL detection containing the query and extracted URL text.
6pub struct UrlMatch {
7    /// A `CellQuery` configured with the URL's start and end positions.
8    pub query: CellQuery,
9    /// The extracted URL string.
10    pub url: CompactString,
11}
12
13/// Characters that are valid within a URL (RFC 3986 unreserved + reserved).
14fn is_url_char(ch: char) -> bool {
15    ch.is_ascii_alphanumeric()
16        || matches!(
17            ch,
18            '-' | '.'
19                | '_'
20                | '~'
21                | ':'
22                | '/'
23                | '?'
24                | '#'
25                | '['
26                | ']'
27                | '@'
28                | '!'
29                | '$'
30                | '&'
31                | '\''
32                | '('
33                | ')'
34                | '*'
35                | '+'
36                | ','
37                | ';'
38                | '='
39                | '%'
40        )
41}
42
43/// Characters that should be trimmed from the end of a URL.
44fn is_trailing_punctuation(ch: char) -> bool {
45    matches!(ch, '.' | ',' | ';' | ':' | '!' | '?')
46}
47
48/// Detects an HTTP/HTTPS URL at or around the given cursor position.
49///
50/// Scans left to find a URL scheme (`http://` or `https://`), then scans right
51/// to find the URL end. Handles trailing punctuation and unbalanced parentheses.
52///
53/// Returns `None` if no URL is found at the cursor position.
54#[must_use]
55pub fn find_url_at_cursor(cursor: CursorPosition, grid: &TerminalGrid) -> Option<UrlMatch> {
56    let cols = grid.terminal_size().cols;
57
58    // Find scheme start by scanning left
59    let scheme_start = find_scheme_start(cursor, grid, cols)?;
60
61    // Verify and get scheme length
62    let scheme_len = if matches_sequence(grid, scheme_start, "https://", cols) {
63        8
64    } else if matches_sequence(grid, scheme_start, "http://", cols) {
65        7
66    } else {
67        return None;
68    };
69
70    // Scan right from after scheme, tracking paren balance
71    let after_scheme = scheme_start.move_right(scheme_len, cols)?;
72    let (raw_end, paren_balance) = scan_url_extent(after_scheme, grid, cols);
73
74    // Trim trailing punctuation and unbalanced close parens
75    let url_end = trim_url_end(scheme_start, raw_end, paren_balance, grid);
76
77    // Verify cursor is within the URL bounds
78    if cursor.col < scheme_start.col || cursor.col > url_end.col {
79        return None;
80    }
81
82    // Now extract the text
83    let query = select(SelectionMode::Linear)
84        .start((scheme_start.col, scheme_start.row))
85        .end((url_end.col, url_end.row));
86
87    let url = grid.get_text(query);
88
89    Some(UrlMatch { query, url })
90}
91
92/// Scans left from the cursor to find the start of a URL scheme.
93fn find_scheme_start(
94    cursor: CursorPosition,
95    grid: &TerminalGrid,
96    cols: u16,
97) -> Option<CursorPosition> {
98    let mut pos = cursor;
99
100    loop {
101        // Check if this position starts a valid scheme
102        if grid.get_ascii_char_at(pos) == Some('h')
103            && (matches_sequence(grid, pos, "https://", cols)
104                || matches_sequence(grid, pos, "http://", cols))
105        {
106            return Some(pos);
107        }
108
109        // Move left, stop if we hit the start of the row
110        pos = pos.move_left(1)?;
111    }
112}
113
114/// Checks if a sequence of characters matches starting at the given position.
115fn matches_sequence(grid: &TerminalGrid, start: CursorPosition, seq: &str, cols: u16) -> bool {
116    let mut pos = start;
117    let char_count = seq.chars().count();
118
119    for (i, ch) in seq.chars().enumerate() {
120        if grid.get_ascii_char_at(pos) != Some(ch) {
121            return false;
122        }
123        // Move right for next character, but not after the last one
124        if i < char_count - 1 {
125            match pos.move_right(1, cols) {
126                Some(next) => pos = next,
127                None => return false, // Can't advance but more chars remain
128            }
129        }
130    }
131    true
132}
133
134/// Scans right from the starting position to find the extent of a URL.
135///
136/// Returns the end position and the parenthesis balance (positive means more '(' than ')').
137fn scan_url_extent(start: CursorPosition, grid: &TerminalGrid, cols: u16) -> (CursorPosition, i32) {
138    let mut pos = start;
139    let mut paren_balance: i32 = 0;
140    let mut last_valid = start;
141
142    loop {
143        match grid.get_ascii_char_at(pos) {
144            Some(ch) if is_url_char(ch) => {
145                if ch == '(' {
146                    paren_balance += 1;
147                } else if ch == ')' {
148                    paren_balance -= 1;
149                }
150                last_valid = pos;
151            },
152            _ => break,
153        }
154
155        match pos.move_right(1, cols) {
156            Some(next) => pos = next,
157            None => break,
158        }
159    }
160
161    (last_valid, paren_balance)
162}
163
164/// Trims trailing punctuation and unbalanced closing parentheses from the URL end.
165fn trim_url_end(
166    start: CursorPosition,
167    mut end: CursorPosition,
168    mut paren_balance: i32,
169    grid: &TerminalGrid,
170) -> CursorPosition {
171    // Work backwards, trimming trailing punctuation and unbalanced ')'
172    while end.col > start.col {
173        let Some(ch) = grid.get_ascii_char_at(end) else {
174            break;
175        };
176
177        if is_trailing_punctuation(ch) {
178            // Trim trailing punctuation
179            end = end.move_left(1).unwrap_or(end);
180        } else if ch == ')' && paren_balance < 0 {
181            // Trim unbalanced closing paren
182            paren_balance += 1;
183            end = end.move_left(1).unwrap_or(end);
184        } else {
185            break;
186        }
187    }
188
189    end
190}
191
192#[cfg(test)]
193mod tests {
194    use super::*;
195
196    #[test]
197    fn test_is_url_char() {
198        // Valid URL characters
199        assert!(is_url_char('a'));
200        assert!(is_url_char('Z'));
201        assert!(is_url_char('0'));
202        assert!(is_url_char('-'));
203        assert!(is_url_char('.'));
204        assert!(is_url_char('/'));
205        assert!(is_url_char('?'));
206        assert!(is_url_char('='));
207        assert!(is_url_char('&'));
208        assert!(is_url_char('('));
209        assert!(is_url_char(')'));
210
211        // Invalid URL characters
212        assert!(!is_url_char(' '));
213        assert!(!is_url_char('\n'));
214        assert!(!is_url_char('<'));
215        assert!(!is_url_char('>'));
216        assert!(!is_url_char('"'));
217    }
218
219    #[test]
220    fn test_is_trailing_punctuation() {
221        assert!(is_trailing_punctuation('.'));
222        assert!(is_trailing_punctuation(','));
223        assert!(is_trailing_punctuation(';'));
224        assert!(is_trailing_punctuation(':'));
225        assert!(is_trailing_punctuation('!'));
226        assert!(is_trailing_punctuation('?'));
227
228        assert!(!is_trailing_punctuation('/'));
229        assert!(!is_trailing_punctuation('-'));
230        assert!(!is_trailing_punctuation('a'));
231    }
232}