Skip to main content

php_lsp/editing/
document_link.rs

1/// Document links: clickable paths in require/include expressions and @link/@see docblock tags.
2use php_ast::{ExprKind, NamespaceBody, Stmt, StmtKind};
3use tower_lsp::lsp_types::{DocumentLink, Position, Range, Url};
4
5use crate::ast::{ParsedDoc, SourceView};
6use crate::util::byte_to_utf16;
7
8pub fn document_links(uri: &Url, doc: &ParsedDoc, _source: &str) -> Vec<DocumentLink> {
9    let sv = doc.view();
10    let mut links = Vec::new();
11    collect_in_stmts(&doc.program().stmts, sv, uri, &mut links);
12    collect_docblock_links(sv.source(), &mut links);
13    links
14}
15
16fn collect_in_stmts(
17    stmts: &[Stmt<'_, '_>],
18    sv: SourceView<'_>,
19    uri: &Url,
20    out: &mut Vec<DocumentLink>,
21) {
22    for stmt in stmts {
23        collect_in_stmt(stmt, sv, uri, out);
24    }
25}
26
27fn collect_in_stmt(
28    stmt: &Stmt<'_, '_>,
29    sv: SourceView<'_>,
30    uri: &Url,
31    out: &mut Vec<DocumentLink>,
32) {
33    match &stmt.kind {
34        StmtKind::Expression(e) => collect_in_expr(e, sv, uri, out),
35        StmtKind::Return(Some(v)) => collect_in_expr(v, sv, uri, out),
36        StmtKind::Echo(exprs) => {
37            for expr in exprs.iter() {
38                collect_in_expr(expr, sv, uri, out);
39            }
40        }
41        StmtKind::Function(f) => collect_in_stmts(&f.body.stmts, sv, uri, out),
42        StmtKind::Class(c) => {
43            use php_ast::ClassMemberKind;
44            for member in c.body.members.iter() {
45                if let ClassMemberKind::Method(m) = &member.kind
46                    && let Some(body) = &m.body
47                {
48                    collect_in_stmts(&body.stmts, sv, uri, out);
49                }
50            }
51        }
52        StmtKind::Namespace(ns) => {
53            if let NamespaceBody::Braced(inner) = &ns.body {
54                collect_in_stmts(&inner.stmts, sv, uri, out);
55            }
56        }
57        _ => {}
58    }
59}
60
61fn collect_in_expr(
62    expr: &php_ast::Expr<'_, '_>,
63    sv: SourceView<'_>,
64    uri: &Url,
65    out: &mut Vec<DocumentLink>,
66) {
67    if let ExprKind::Include(_, path_expr) = &expr.kind
68        && let Some(link) = link_from_path_expr(path_expr, sv, uri)
69    {
70        out.push(link);
71    }
72}
73
74fn link_from_path_expr(
75    path_expr: &php_ast::Expr<'_, '_>,
76    sv: SourceView<'_>,
77    uri: &Url,
78) -> Option<DocumentLink> {
79    let ExprKind::String(s) = &path_expr.kind else {
80        return None;
81    };
82    let raw: &str = s;
83    if raw.is_empty() {
84        return None;
85    }
86    // span.start points to the opening quote; content starts one byte after
87    let quote_offset = path_expr.span.start;
88    let content_offset = quote_offset + 1;
89    let start = sv.position_of(content_offset);
90    let end = Position {
91        line: start.line,
92        character: start.character + raw.chars().map(|c| c.len_utf16() as u32).sum::<u32>(),
93    };
94    let range = Range { start, end };
95
96    let target = if std::path::Path::new(raw).is_absolute() {
97        Url::from_file_path(raw).ok()
98    } else {
99        // Resolve relative to the document URI. Url::join strips the last
100        // path segment (the filename) and appends `raw`, which is correct
101        // for both real and synthetic (no drive letter) file:// URIs.
102        uri.join(raw).ok()
103    };
104
105    Some(DocumentLink {
106        range,
107        target,
108        tooltip: None,
109        data: None,
110    })
111}
112
113/// Scan sv.source() text for `@link` and `@see` tags with HTTP(S) URLs in docblock/line comments.
114fn collect_docblock_links(source: &str, out: &mut Vec<DocumentLink>) {
115    for (line_idx, line) in source.lines().enumerate() {
116        let trimmed = line.trim();
117        if !trimmed.starts_with('*') && !trimmed.starts_with("/**") && !trimmed.starts_with("//") {
118            continue;
119        }
120        for tag in &["@link ", "@see "] {
121            if let Some(tag_start) = trimmed.find(tag) {
122                let after = trimmed[tag_start + tag.len()..].trim_start();
123                if !after.starts_with("http://") && !after.starts_with("https://") {
124                    continue;
125                }
126                let url_str = after.split_whitespace().next().unwrap_or("");
127                if url_str.is_empty() {
128                    continue;
129                }
130                if let Ok(target) = Url::parse(url_str)
131                    && let Some(col) = line.find(url_str)
132                {
133                    let start = Position {
134                        line: line_idx as u32,
135                        character: byte_to_utf16(line, col),
136                    };
137                    let end = Position {
138                        line: line_idx as u32,
139                        character: byte_to_utf16(line, col + url_str.len()),
140                    };
141                    out.push(DocumentLink {
142                        range: Range { start, end },
143                        target: Some(target),
144                        tooltip: None,
145                        data: None,
146                    });
147                }
148            }
149        }
150    }
151}
152
153#[cfg(test)]
154mod tests {
155    use super::*;
156
157    fn doc(src: &str) -> ParsedDoc {
158        ParsedDoc::parse(src.to_string())
159    }
160
161    fn dummy_uri() -> Url {
162        Url::parse("file:///project/src/Foo.php").unwrap()
163    }
164
165    #[test]
166    fn docblock_at_link_produces_link() {
167        let src = "<?php\n/** @link https://php.net/array_map */\nfunction foo() {}";
168        let d = doc(src);
169        let links = document_links(&dummy_uri(), &d, src);
170        assert_eq!(links.len(), 1);
171        assert_eq!(
172            links[0].target.as_ref().unwrap().as_str(),
173            "https://php.net/array_map"
174        );
175    }
176
177    #[test]
178    fn docblock_see_produces_link() {
179        let src = "<?php\n/**\n * @see https://example.com/docs\n */\nfunction bar() {}";
180        let d = doc(src);
181        let links = document_links(&dummy_uri(), &d, src);
182        assert_eq!(links.len(), 1);
183        assert_eq!(
184            links[0].target.as_ref().unwrap().as_str(),
185            "https://example.com/docs"
186        );
187    }
188
189    #[test]
190    fn non_http_see_is_ignored() {
191        let src = "<?php\n/** @see SomeClass::method */\nfunction baz() {}";
192        let d = doc(src);
193        let links = document_links(&dummy_uri(), &d, src);
194        assert!(links.is_empty());
195    }
196
197    #[test]
198    fn docblock_link_position_correct_after_multibyte_chars() {
199        // "café " is 5 chars but 6 bytes; the URL starts at UTF-16 offset 10
200        // (after "* " = 2, "café " = 5 → 7, "@link " = 6 → 13... let's keep it simple:
201        // place the URL after a 2-byte UTF-8 char so byte and UTF-16 diverge).
202        // Line: " * é @link https://example.com"
203        //         0123456789...
204        // "é" = U+00E9: 2 bytes UTF-8, 1 UTF-16 unit.
205        // byte col of URL start = len("* é @link ") = 2+3+1+6+1 = ... let's just check
206        // that start.character == end.character - url_len (in UTF-16 units).
207        let src = "<?php\n/** é @link https://example.com */\nfunction f() {}";
208        let d = doc(src);
209        let links = document_links(&dummy_uri(), &d, src);
210        assert_eq!(links.len(), 1);
211        let range = links[0].range;
212        let url = "https://example.com";
213        // URL is pure ASCII so its UTF-16 length == its byte length
214        assert_eq!(
215            range.end.character - range.start.character,
216            url.len() as u32,
217            "link range width must equal URL length in UTF-16 units"
218        );
219        // Verify start is not the raw byte offset (which would be wrong due to é)
220        // The line is "/** é @link https://example.com */"
221        // "é" encodes as 2 bytes, so raw byte find() of the URL would give a position
222        // 1 higher than the correct UTF-16 position.
223        let line = "/** é @link https://example.com */";
224        let byte_col = line.find(url).unwrap();
225        let utf16_col: u32 = line[..byte_col].chars().map(|c| c.len_utf16() as u32).sum();
226        assert_eq!(range.start.character, utf16_col);
227        assert_ne!(
228            range.start.character, byte_col as u32,
229            "must not use raw byte offset"
230        );
231    }
232}