Skip to main content

lex_core/lex/ast/
links.rs

1//! Document link extraction for LSP support
2//!
3//! This module provides APIs for extracting clickable links from Lex documents,
4//! enabling the LSP "document links" feature that makes URLs and file references
5//! clickable in editors.
6//!
7//! ## Problem
8//!
9//! The LSP document links feature needs to find all clickable links:
10//! - URLs in text (`[https://example.com]`)
11//! - File references (`[./file.txt]`)
12//! - Verbatim block `src` parameters (images, includes)
13//!
14//! While `ReferenceType::Url` and `ReferenceType::File` exist, there's no API to
15//! extract all links from a document.
16//!
17//! ## Solution
18//!
19//! This module provides:
20//! - `DocumentLink` struct representing a link with its location and type
21//! - `find_all_links()` methods on Document and Session
22//! - `src_parameter()` method on Verbatim to access src parameters
23//!
24//! ## Link Types
25//!
26//! 1. **URL links**: `[https://example.com]` - HTTP/HTTPS URLs
27//! 2. **File links**: `[./file.txt]`, `[../path/to/file.md]` - File references
28//! 3. **Verbatim src**: `:: image src=./image.png ::` - External resource references
29
30use super::elements::Verbatim;
31use super::inline_positions::{walk_text_content_positions, InlinePositionVisitor};
32use super::range::Range;
33use super::text_content::TextContent;
34use super::{Document, Session};
35use crate::lex::inlines::{ReferenceInline, ReferenceType};
36use std::fmt;
37
38/// Represents a document link with its location and type
39#[derive(Debug, Clone, PartialEq)]
40pub struct DocumentLink {
41    pub range: Range,
42    pub target: String,
43    pub link_type: LinkType,
44}
45
46impl DocumentLink {
47    pub fn new(range: Range, target: String, link_type: LinkType) -> Self {
48        Self {
49            range,
50            target,
51            link_type,
52        }
53    }
54}
55
56impl fmt::Display for DocumentLink {
57    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
58        write!(
59            f,
60            "{:?} link: {} at {}",
61            self.link_type, self.target, self.range.start
62        )
63    }
64}
65
66/// Type of document link
67#[derive(Debug, Clone, Copy, PartialEq, Eq)]
68pub enum LinkType {
69    /// HTTP/HTTPS URL
70    Url,
71    /// File reference (relative or absolute path)
72    File,
73    /// Verbatim block src parameter
74    VerbatimSrc,
75}
76
77impl Verbatim {
78    /// Get the src parameter value if present
79    ///
80    /// The src parameter is commonly used for:
81    /// - Image sources: `:: image src=./diagram.png ::`
82    /// - File includes: `:: include src=./code.rs ::`
83    /// - External resources: `:: data src=./data.csv ::`
84    ///
85    /// # Returns
86    /// The value of the `src` parameter, or None if not present
87    ///
88    /// # Example
89    /// ```rust,ignore
90    /// if let Some(src) = verbatim.src_parameter() {
91    ///     // Make src clickable in editor
92    ///     println!("Link to: {}", src);
93    /// }
94    /// ```
95    pub fn src_parameter(&self) -> Option<&str> {
96        self.closing_data
97            .parameters
98            .iter()
99            .find(|p| p.key == "src")
100            .map(|p| p.value.as_str())
101    }
102}
103
104impl Session {
105    /// Find all links at any depth in this session
106    ///
107    /// This searches recursively through all content to find:
108    /// - URL references: `[https://example.com]`
109    /// - File references: `[./path/to/file.txt]`
110    /// - Verbatim src parameters: `src=./image.png`
111    ///
112    /// # Returns
113    /// Vector of all links found in this session and its descendants
114    ///
115    /// # Example
116    /// ```rust,ignore
117    /// let links = session.find_all_links();
118    /// for link in links {
119    ///     println!("Found {} link: {}", link.link_type, link.target);
120    /// }
121    /// ```
122    pub fn find_all_links(&self) -> Vec<DocumentLink> {
123        use super::elements::content_item::ContentItem;
124        use super::traits::AstNode;
125
126        let mut links = Vec::new();
127
128        // Links in this session's title and every nested session's title.
129        //
130        // `Document::find_all_links` invokes us on the implicit root session
131        // (whose title is empty), so without the recursive sweep below we
132        // would silently drop every URL/File reference that appears in a
133        // section heading — `1. See [./handlers.lex] for details` and
134        // similar — even though paragraph-body refs were correctly found.
135        collect_text_content_links(&self.title, &mut links);
136        for nested in self.iter_sessions_recursive() {
137            collect_text_content_links(&nested.title, &mut links);
138        }
139
140        // Paragraphs (recursively into nested sessions).
141        for paragraph in self.iter_paragraphs_recursive() {
142            for line_item in &paragraph.lines {
143                if let ContentItem::TextLine(line) = line_item {
144                    collect_text_content_links(&line.content, &mut links);
145                }
146            }
147        }
148
149        // Verbatim `src` parameters — these aren't bracketed inline references,
150        // so the verbatim's range stays as-is.
151        for (item, _depth) in self.iter_all_nodes_with_depth() {
152            if let ContentItem::VerbatimBlock(verbatim) = item {
153                if let Some(src) = verbatim.src_parameter() {
154                    let link = DocumentLink::new(
155                        verbatim.range().clone(),
156                        src.to_string(),
157                        LinkType::VerbatimSrc,
158                    );
159                    links.push(link);
160                }
161            }
162        }
163
164        links
165    }
166}
167
168impl Document {
169    /// Find all links in the entire document
170    ///
171    /// This searches the entire document tree to find all clickable links:
172    /// - URL references in text
173    /// - File references in text
174    /// - Verbatim block src parameters
175    ///
176    /// # Returns
177    /// Vector of all links found in the document
178    ///
179    /// # Example
180    /// ```rust,ignore
181    /// let doc = parse_document(source)?;
182    /// let links = doc.find_all_links();
183    /// for link in links {
184    ///     // Make link clickable in LSP
185    ///     send_document_link(link.range, link.target);
186    /// }
187    /// ```
188    pub fn find_all_links(&self) -> Vec<DocumentLink> {
189        let mut links = Vec::new();
190        if let Some(title) = &self.title {
191            collect_text_content_links(&title.content, &mut links);
192        }
193        links.extend(self.root.find_all_links());
194        links
195    }
196}
197
198/// Walks `text`'s inline tree and pushes a [`DocumentLink`] for each URL and
199/// File reference, with a range covering exactly the `[bracketed]` reference.
200///
201/// LSP `textDocument/documentLink` ranges drive the clickable + visually
202/// underlined area in editors. Using the containing paragraph or title range
203/// would underline the whole element — which is exactly the bug this function
204/// is replacing.
205///
206/// The cursor work is delegated to the shared
207/// [`crate::lex::ast::inline_positions::walk_text_content_positions`] visitor;
208/// this function only contributes the link-shaping logic in `LinkCollector`.
209fn collect_text_content_links(text: &TextContent, out: &mut Vec<DocumentLink>) {
210    let mut collector = LinkCollector { out };
211    walk_text_content_positions(text, &mut collector);
212}
213
214/// Visitor that emits a [`DocumentLink`] per URL/File reference. All other
215/// inline node variants are intentionally ignored (footnote/citation/session/
216/// annotation/TK refs do not become document links).
217struct LinkCollector<'a> {
218    out: &'a mut Vec<DocumentLink>,
219}
220
221impl<'a> InlinePositionVisitor for LinkCollector<'a> {
222    fn visit_reference(
223        &mut self,
224        open_marker: &Range,
225        _content: &Range,
226        close_marker: &Range,
227        data: &ReferenceInline,
228    ) {
229        let (target, link_type) = match &data.reference_type {
230            ReferenceType::Url { target } => (target.clone(), LinkType::Url),
231            ReferenceType::File { target } => (target.clone(), LinkType::File),
232            _ => return,
233        };
234        // The link covers `[content]` inclusive of brackets — span from the
235        // open marker's start to the close marker's end.
236        let full = Range::new(
237            open_marker.span.start..close_marker.span.end,
238            open_marker.start,
239            close_marker.end,
240        );
241        self.out.push(DocumentLink::new(full, target, link_type));
242    }
243}
244
245#[cfg(test)]
246mod tests {
247    use super::*;
248    use crate::lex::parsing::parse_document;
249
250    #[test]
251    fn test_url_link_extraction() {
252        let source = "Check out [https://example.com] for more info.\n\n";
253        let doc = parse_document(source).unwrap();
254
255        let links = doc.find_all_links();
256
257        assert_eq!(links.len(), 1);
258        assert_eq!(links[0].link_type, LinkType::Url);
259        assert_eq!(links[0].target, "https://example.com");
260    }
261
262    #[test]
263    fn test_file_link_extraction() {
264        let source = "See [./README.md] for details.\n\n";
265        let doc = parse_document(source).unwrap();
266
267        let links = doc.find_all_links();
268
269        assert_eq!(links.len(), 1);
270        assert_eq!(links[0].link_type, LinkType::File);
271        assert_eq!(links[0].target, "./README.md");
272    }
273
274    #[test]
275    fn test_multiple_links() {
276        let source = "Visit [https://example.com] and check [./docs.md].\n\n";
277        let doc = parse_document(source).unwrap();
278
279        let links = doc.find_all_links();
280
281        assert_eq!(links.len(), 2);
282        assert!(links.iter().any(|l| l.link_type == LinkType::Url));
283        assert!(links.iter().any(|l| l.link_type == LinkType::File));
284    }
285
286    #[test]
287    fn test_verbatim_src_parameter() {
288        let source =
289            "Sunset Photo:\n    As the sun sets over the ocean.\n:: image src=./diagram.png ::\n\n";
290        let doc = parse_document(source).unwrap();
291
292        let links = doc.find_all_links();
293
294        // Find verbatim src link
295        let src_links: Vec<_> = links
296            .iter()
297            .filter(|l| l.link_type == LinkType::VerbatimSrc)
298            .collect();
299        assert_eq!(
300            src_links.len(),
301            1,
302            "Expected 1 verbatim src link, found {}. All links: {:?}",
303            src_links.len(),
304            links
305        );
306        assert_eq!(src_links[0].target, "./diagram.png");
307    }
308
309    #[test]
310    fn test_verbatim_src_parameter_method() {
311        use super::super::elements::{Data, Label, Parameter};
312
313        let verbatim = Verbatim::with_subject(
314            "Test".to_string(),
315            Data::new(
316                Label::new("image".to_string()),
317                vec![Parameter::new("src".to_string(), "./test.png".to_string())],
318            ),
319        );
320
321        assert_eq!(verbatim.src_parameter(), Some("./test.png"));
322
323        // Test verbatim without src parameter
324        let verbatim_no_src = Verbatim::with_subject(
325            "Test".to_string(),
326            Data::new(Label::new("code".to_string()), vec![]),
327        );
328
329        assert_eq!(verbatim_no_src.src_parameter(), None);
330    }
331
332    #[test]
333    fn test_no_links() {
334        let source = "Just plain text with no links.\n\n";
335        let doc = parse_document(source).unwrap();
336
337        let links = doc.find_all_links();
338
339        assert_eq!(links.len(), 0);
340    }
341
342    #[test]
343    fn test_footnote_not_a_link() {
344        let source = "Text with footnote [42].\n\n";
345        let doc = parse_document(source).unwrap();
346
347        let links = doc.find_all_links();
348
349        // Footnote references are not clickable links
350        assert_eq!(links.len(), 0);
351    }
352
353    #[test]
354    fn test_nested_session_links() {
355        let source = "Outer Session\n\n    Inner session with [https://example.com].\n\n";
356        let doc = parse_document(source).unwrap();
357
358        let links = doc.find_all_links();
359
360        // Should find link in nested session
361        assert_eq!(links.len(), 1);
362        assert_eq!(links[0].target, "https://example.com");
363    }
364
365    // -----------------------------------------------------------------------
366    // Range-precision tests
367    //
368    // The LSP `textDocument/documentLink` response uses each link's `range`
369    // to decide what is clickable and what gets the link decoration in the
370    // editor. Editors (notably VSCode) render the entire range as an
371    // underlined link. So the range must cover *only* the `[bracketed]`
372    // reference, not the surrounding paragraph or title line.
373    // -----------------------------------------------------------------------
374
375    use super::super::range::Position;
376
377    #[test]
378    fn test_url_link_range_is_bracket_bounded_in_paragraph() {
379        // Byte map of source line:
380        //   "Check out [https://example.com] for more info."
381        //    0123456789^                   ^
382        //              10                  30 (inclusive ']' position)
383        let source = "Check out [https://example.com] for more info.\n\n";
384        let doc = parse_document(source).unwrap();
385        let links = doc.find_all_links();
386
387        assert_eq!(links.len(), 1);
388        let link = &links[0];
389        assert_eq!(link.target, "https://example.com");
390
391        let captured = &source[link.range.span.clone()];
392        assert_eq!(
393            link.range.span,
394            10..31,
395            "DocumentLink range must cover only the [bracketed] reference, not the whole paragraph. \
396             Captured text: {captured:?}"
397        );
398        assert_eq!(link.range.start, Position::new(0, 10));
399        assert_eq!(link.range.end, Position::new(0, 31));
400    }
401
402    #[test]
403    fn test_file_link_range_is_bracket_bounded_in_paragraph() {
404        // Byte map of source line:
405        //   "See [./README.md] for details."
406        //    0123^         ^
407        //        4         16 (inclusive ']')
408        let source = "See [./README.md] for details.\n\n";
409        let doc = parse_document(source).unwrap();
410        let links = doc.find_all_links();
411
412        assert_eq!(links.len(), 1);
413        let link = &links[0];
414        assert_eq!(link.target, "./README.md");
415
416        let captured = &source[link.range.span.clone()];
417        assert_eq!(
418            link.range.span,
419            4..17,
420            "DocumentLink range must cover only the [bracketed] reference, not the whole paragraph. \
421             Captured text: {captured:?}"
422        );
423        assert_eq!(link.range.start, Position::new(0, 4));
424        assert_eq!(link.range.end, Position::new(0, 17));
425    }
426
427    #[test]
428    fn test_multiple_links_have_distinct_bracket_bounded_ranges() {
429        // Byte map:
430        //   "Visit [https://example.com] and check [./docs.md]."
431        //    0     6                    27          38       49
432        let source = "Visit [https://example.com] and check [./docs.md].\n\n";
433        let doc = parse_document(source).unwrap();
434        let links = doc.find_all_links();
435
436        assert_eq!(links.len(), 2);
437
438        let url = links
439            .iter()
440            .find(|l| l.link_type == LinkType::Url)
441            .expect("url link");
442        let file = links
443            .iter()
444            .find(|l| l.link_type == LinkType::File)
445            .expect("file link");
446
447        assert_eq!(
448            url.range.span,
449            6..27,
450            "URL link captured: {:?}",
451            &source[url.range.span.clone()]
452        );
453        assert_eq!(
454            file.range.span,
455            38..49,
456            "File link captured: {:?}",
457            &source[file.range.span.clone()]
458        );
459    }
460
461    #[test]
462    fn test_long_paragraph_with_single_file_ref_does_not_include_surrounding_text_in_range() {
463        // Reproduces the dodot architecture.lex case: a long paragraph that
464        // contains a single file reference. Before the fix, the link's range
465        // covered the whole paragraph so VSCode underlined every word.
466        let source = "\
467This document describes how dodot is organized. It is the conceptual view. \
468For concrete types, crate layout, and trait signatures, see [./types.lex].\n\n";
469        let doc = parse_document(source).unwrap();
470        let links = doc.find_all_links();
471
472        assert_eq!(links.len(), 1);
473        let link = &links[0];
474        assert_eq!(link.target, "./types.lex");
475
476        let bracket_start = source.find("[./types.lex]").expect("bracket present");
477        let bracket_end = bracket_start + "[./types.lex]".len();
478
479        let captured = &source[link.range.span.clone()];
480        assert_eq!(
481            link.range.span,
482            bracket_start..bracket_end,
483            "Link range must be bracket-bounded. Got captured text: {captured:?}"
484        );
485    }
486
487    // -----------------------------------------------------------------------
488    // Nested-session title coverage
489    //
490    // `Session::find_all_links` originally only inspected `self.title`, while
491    // `Document::find_all_links` calls it on the implicit root session whose
492    // title is empty. Paragraph traversal recurses into nested sessions, but
493    // nested-session *titles* never get scanned. So URL/File refs that appear
494    // in a section heading like
495    //
496    //     1. See [./handlers.lex] for the phase list
497    //
498    //         (body)
499    //
500    // were silently dropped from the LSP `documentLink` response, and editors
501    // had no clickable surface on the heading.
502    // -----------------------------------------------------------------------
503
504    #[test]
505    fn test_file_ref_in_nested_session_title_produces_link() {
506        // "Doc title" + blank + indent → outer session whose title is
507        // "Doc title". Then the indented "See [./other.lex] for details"
508        // line, followed by a blank and a deeper indent, becomes a *nested*
509        // session whose title contains a file reference.
510        let source =
511            "Doc title\n\n    See [./other.lex] for details\n\n        nested content here.\n\n";
512        let doc = parse_document(source).unwrap();
513        let links = doc.find_all_links();
514
515        assert_eq!(
516            links.len(),
517            1,
518            "expected one link for the file ref in the nested-session title; got {links:?}"
519        );
520        let link = &links[0];
521        assert_eq!(link.target, "./other.lex");
522        assert_eq!(link.link_type, LinkType::File);
523
524        let bracket_start = source.find("[./other.lex]").expect("bracket present");
525        let bracket_end = bracket_start + "[./other.lex]".len();
526        assert_eq!(
527            link.range.span,
528            bracket_start..bracket_end,
529            "Nested-session title link must be bracket-bounded. Got captured text: {:?}",
530            &source[link.range.span.clone()]
531        );
532    }
533
534    #[test]
535    fn test_url_ref_in_nested_session_title_produces_link() {
536        let source = "Doc title\n\n    Visit [https://example.com] today\n\n        body line.\n\n";
537        let doc = parse_document(source).unwrap();
538        let links = doc.find_all_links();
539
540        assert_eq!(links.len(), 1);
541        let link = &links[0];
542        assert_eq!(link.target, "https://example.com");
543        assert_eq!(link.link_type, LinkType::Url);
544
545        let bracket_start = source
546            .find("[https://example.com]")
547            .expect("bracket present");
548        let bracket_end = bracket_start + "[https://example.com]".len();
549        assert_eq!(link.range.span, bracket_start..bracket_end);
550    }
551
552    #[test]
553    fn test_refs_in_both_outer_and_nested_session_titles_produce_links() {
554        // The outer title also contains a file reference, so both the outer
555        // and nested titles should each contribute one link, distinct from
556        // any links found in paragraphs.
557        let source = "\
558Top [./top.lex] section
559
560    Inner [./inner.lex] subsection
561
562        See also [./body.lex] in the body.
563";
564        let doc = parse_document(source).unwrap();
565        let links = doc.find_all_links();
566
567        assert_eq!(
568            links.len(),
569            3,
570            "expected three links (outer-title, inner-title, body); got {links:?}"
571        );
572        let targets: Vec<&str> = links.iter().map(|l| l.target.as_str()).collect();
573        assert!(targets.contains(&"./top.lex"));
574        assert!(targets.contains(&"./inner.lex"));
575        assert!(targets.contains(&"./body.lex"));
576    }
577}