1#[derive(Debug, Clone, PartialEq, Eq)]
12pub struct HyperlinkSpan {
13 pub start: usize,
15 pub end: usize,
17 pub url: String,
19}
20
21const URL_TERMINATORS: &[char] = &['.', ',', '!', '?', ';', ':', ')', ']', '}', '\'', '"'];
26
27pub fn find_hyperlinks(text: &str) -> Vec<HyperlinkSpan> {
35 let mut spans: Vec<HyperlinkSpan> = Vec::new();
36 let bytes = text.as_bytes();
37 let len = bytes.len();
38
39 let mut i = 0usize;
40 while i < len {
41 let prefix = try_match_prefix(text, i);
43 if let Some(prefix_end) = prefix {
44 let url_end = extend_url(text, i, prefix_end);
46 let url = &text[i..url_end];
47 let url = strip_trailing_punct(url);
49 let url_end = i + url.len();
50 if url_end > i {
51 spans.push(HyperlinkSpan {
52 start: i,
53 end: url_end,
54 url: url.to_owned(),
55 });
56 i = url_end;
57 continue;
58 }
59 }
60 i += char_len_at(bytes, i);
62 }
63
64 spans
65}
66
67fn try_match_prefix(text: &str, pos: usize) -> Option<usize> {
73 let rest = &text[pos..];
74 for prefix in &["https://", "http://", "www."] {
75 if rest.starts_with(prefix) {
76 return Some(pos + prefix.len());
77 }
78 }
79 None
80}
81
82fn extend_url(text: &str, start: usize, _prefix_end: usize) -> usize {
85 let rest = &text[start..];
86 let end_local = rest
87 .char_indices()
88 .find(|(_, c)| c.is_ascii_whitespace())
89 .map(|(i, _)| i)
90 .unwrap_or(rest.len());
91 start + end_local
92}
93
94fn strip_trailing_punct(url: &str) -> &str {
96 let mut end = url.len();
97 while end > 0 {
98 let ch = url[..end].chars().next_back().unwrap_or('\0');
99 if URL_TERMINATORS.contains(&ch) {
100 end -= ch.len_utf8();
101 } else {
102 break;
103 }
104 }
105 &url[..end]
106}
107
108fn char_len_at(bytes: &[u8], pos: usize) -> usize {
110 match bytes[pos] {
111 b if b < 0x80 => 1,
112 b if b < 0xC0 => 1, b if b < 0xE0 => 2,
114 b if b < 0xF0 => 3,
115 _ => 4,
116 }
117}
118
119#[cfg(test)]
122mod tests {
123 use super::*;
124
125 #[test]
126 fn hyperlink_finds_https() {
127 let spans = find_hyperlinks("visit https://example.com today");
128 assert_eq!(spans.len(), 1);
129 assert!(spans[0].url.starts_with("https://"));
130 }
131
132 #[test]
133 fn hyperlink_finds_http() {
134 let spans = find_hyperlinks("see http://example.com/path");
135 assert_eq!(spans.len(), 1);
136 assert!(spans[0].url.starts_with("http://"));
137 }
138
139 #[test]
140 fn hyperlink_finds_www() {
141 let spans = find_hyperlinks("see www.example.com");
142 assert_eq!(spans.len(), 1);
143 assert!(spans[0].url.starts_with("www."));
144 }
145
146 #[test]
147 fn hyperlink_ignores_plain_text() {
148 let spans = find_hyperlinks("hello world");
149 assert!(spans.is_empty());
150 }
151
152 #[test]
153 fn hyperlink_multiple_urls() {
154 let spans = find_hyperlinks("a https://a.com b http://b.com c");
155 assert_eq!(spans.len(), 2);
156 }
157
158 #[test]
159 fn hyperlink_strips_trailing_punctuation() {
160 let spans = find_hyperlinks("visit https://example.com.");
161 assert_eq!(spans.len(), 1);
162 assert!(
163 !spans[0].url.ends_with('.'),
164 "trailing dot must be stripped"
165 );
166 }
167
168 #[test]
169 fn hyperlink_correct_byte_offsets() {
170 let text = "x https://example.com y";
171 let spans = find_hyperlinks(text);
172 assert_eq!(spans.len(), 1);
173 let span = &spans[0];
174 assert_eq!(&text[span.start..span.end], span.url.as_str());
175 }
176
177 #[test]
178 fn hyperlink_empty_string() {
179 assert!(find_hyperlinks("").is_empty());
180 }
181}