lex-core 0.14.0

Parser library for the lex format
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
//! Document link extraction for LSP support
//!
//! This module provides APIs for extracting clickable links from Lex documents,
//! enabling the LSP "document links" feature that makes URLs and file references
//! clickable in editors.
//!
//! ## Problem
//!
//! The LSP document links feature needs to find all clickable links:
//! - URLs in text (`[https://example.com]`)
//! - File references (`[./file.txt]`)
//! - Verbatim block `src` parameters (images, includes)
//!
//! While `ReferenceType::Url` and `ReferenceType::File` exist, there's no API to
//! extract all links from a document.
//!
//! ## Solution
//!
//! This module provides:
//! - `DocumentLink` struct representing a link with its location and type
//! - `find_all_links()` methods on Document and Session
//! - `src_parameter()` method on Verbatim to access src parameters
//!
//! ## Link Types
//!
//! 1. **URL links**: `[https://example.com]` - HTTP/HTTPS URLs
//! 2. **File links**: `[./file.txt]`, `[../path/to/file.md]` - File references
//! 3. **Verbatim src**: `:: image src=./image.png ::` - External resource references

use super::elements::Verbatim;
use super::inline_positions::{walk_text_content_positions, InlinePositionVisitor};
use super::range::Range;
use super::text_content::TextContent;
use super::{Document, Session};
use crate::lex::inlines::{ReferenceInline, ReferenceType};
use std::fmt;

/// Represents a document link with its location and type
#[derive(Debug, Clone, PartialEq)]
pub struct DocumentLink {
    pub range: Range,
    pub target: String,
    pub link_type: LinkType,
}

impl DocumentLink {
    pub fn new(range: Range, target: String, link_type: LinkType) -> Self {
        Self {
            range,
            target,
            link_type,
        }
    }
}

impl fmt::Display for DocumentLink {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(
            f,
            "{:?} link: {} at {}",
            self.link_type, self.target, self.range.start
        )
    }
}

/// Type of document link
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum LinkType {
    /// HTTP/HTTPS URL
    Url,
    /// File reference (relative or absolute path)
    File,
    /// Verbatim block src parameter
    VerbatimSrc,
}

impl Verbatim {
    /// Get the src parameter value if present
    ///
    /// The src parameter is commonly used for:
    /// - Image sources: `:: image src=./diagram.png ::`
    /// - File includes: `:: include src=./code.rs ::`
    /// - External resources: `:: data src=./data.csv ::`
    ///
    /// # Returns
    /// The value of the `src` parameter, or None if not present
    ///
    /// # Example
    /// ```rust,ignore
    /// if let Some(src) = verbatim.src_parameter() {
    ///     // Make src clickable in editor
    ///     println!("Link to: {}", src);
    /// }
    /// ```
    pub fn src_parameter(&self) -> Option<&str> {
        self.closing_data
            .parameters
            .iter()
            .find(|p| p.key == "src")
            .map(|p| p.value.as_str())
    }
}

impl Session {
    /// Find all links at any depth in this session
    ///
    /// This searches recursively through all content to find:
    /// - URL references: `[https://example.com]`
    /// - File references: `[./path/to/file.txt]`
    /// - Verbatim src parameters: `src=./image.png`
    ///
    /// # Returns
    /// Vector of all links found in this session and its descendants
    ///
    /// # Example
    /// ```rust,ignore
    /// let links = session.find_all_links();
    /// for link in links {
    ///     println!("Found {} link: {}", link.link_type, link.target);
    /// }
    /// ```
    pub fn find_all_links(&self) -> Vec<DocumentLink> {
        use super::elements::content_item::ContentItem;
        use super::traits::AstNode;

        let mut links = Vec::new();

        // Links in this session's title and every nested session's title.
        //
        // `Document::find_all_links` invokes us on the implicit root session
        // (whose title is empty), so without the recursive sweep below we
        // would silently drop every URL/File reference that appears in a
        // section heading — `1. See [./handlers.lex] for details` and
        // similar — even though paragraph-body refs were correctly found.
        collect_text_content_links(&self.title, &mut links);
        for nested in self.iter_sessions_recursive() {
            collect_text_content_links(&nested.title, &mut links);
        }

        // Paragraphs (recursively into nested sessions).
        for paragraph in self.iter_paragraphs_recursive() {
            for line_item in &paragraph.lines {
                if let ContentItem::TextLine(line) = line_item {
                    collect_text_content_links(&line.content, &mut links);
                }
            }
        }

        // Verbatim `src` parameters — these aren't bracketed inline references,
        // so the verbatim's range stays as-is.
        for (item, _depth) in self.iter_all_nodes_with_depth() {
            if let ContentItem::VerbatimBlock(verbatim) = item {
                if let Some(src) = verbatim.src_parameter() {
                    let link = DocumentLink::new(
                        verbatim.range().clone(),
                        src.to_string(),
                        LinkType::VerbatimSrc,
                    );
                    links.push(link);
                }
            }
        }

        links
    }
}

impl Document {
    /// Find all links in the entire document
    ///
    /// This searches the entire document tree to find all clickable links:
    /// - URL references in text
    /// - File references in text
    /// - Verbatim block src parameters
    ///
    /// # Returns
    /// Vector of all links found in the document
    ///
    /// # Example
    /// ```rust,ignore
    /// let doc = parse_document(source)?;
    /// let links = doc.find_all_links();
    /// for link in links {
    ///     // Make link clickable in LSP
    ///     send_document_link(link.range, link.target);
    /// }
    /// ```
    pub fn find_all_links(&self) -> Vec<DocumentLink> {
        let mut links = Vec::new();
        if let Some(title) = &self.title {
            collect_text_content_links(&title.content, &mut links);
        }
        links.extend(self.root.find_all_links());
        links
    }
}

/// Walks `text`'s inline tree and pushes a [`DocumentLink`] for each URL and
/// File reference, with a range covering exactly the `[bracketed]` reference.
///
/// LSP `textDocument/documentLink` ranges drive the clickable + visually
/// underlined area in editors. Using the containing paragraph or title range
/// would underline the whole element — which is exactly the bug this function
/// is replacing.
///
/// The cursor work is delegated to the shared
/// [`crate::lex::ast::inline_positions::walk_text_content_positions`] visitor;
/// this function only contributes the link-shaping logic in `LinkCollector`.
fn collect_text_content_links(text: &TextContent, out: &mut Vec<DocumentLink>) {
    let mut collector = LinkCollector { out };
    walk_text_content_positions(text, &mut collector);
}

/// Visitor that emits a [`DocumentLink`] per URL/File reference. All other
/// inline node variants are intentionally ignored (footnote/citation/session/
/// annotation/TK refs do not become document links).
struct LinkCollector<'a> {
    out: &'a mut Vec<DocumentLink>,
}

impl<'a> InlinePositionVisitor for LinkCollector<'a> {
    fn visit_reference(
        &mut self,
        open_marker: &Range,
        _content: &Range,
        close_marker: &Range,
        data: &ReferenceInline,
    ) {
        let (target, link_type) = match &data.reference_type {
            ReferenceType::Url { target } => (target.clone(), LinkType::Url),
            ReferenceType::File { target } => (target.clone(), LinkType::File),
            _ => return,
        };
        // The link covers `[content]` inclusive of brackets — span from the
        // open marker's start to the close marker's end.
        let full = Range::new(
            open_marker.span.start..close_marker.span.end,
            open_marker.start,
            close_marker.end,
        );
        self.out.push(DocumentLink::new(full, target, link_type));
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::lex::parsing::parse_document;

    #[test]
    fn test_url_link_extraction() {
        let source = "Check out [https://example.com] for more info.\n\n";
        let doc = parse_document(source).unwrap();

        let links = doc.find_all_links();

        assert_eq!(links.len(), 1);
        assert_eq!(links[0].link_type, LinkType::Url);
        assert_eq!(links[0].target, "https://example.com");
    }

    #[test]
    fn test_file_link_extraction() {
        let source = "See [./README.md] for details.\n\n";
        let doc = parse_document(source).unwrap();

        let links = doc.find_all_links();

        assert_eq!(links.len(), 1);
        assert_eq!(links[0].link_type, LinkType::File);
        assert_eq!(links[0].target, "./README.md");
    }

    #[test]
    fn test_multiple_links() {
        let source = "Visit [https://example.com] and check [./docs.md].\n\n";
        let doc = parse_document(source).unwrap();

        let links = doc.find_all_links();

        assert_eq!(links.len(), 2);
        assert!(links.iter().any(|l| l.link_type == LinkType::Url));
        assert!(links.iter().any(|l| l.link_type == LinkType::File));
    }

    #[test]
    fn test_verbatim_src_parameter() {
        let source =
            "Sunset Photo:\n    As the sun sets over the ocean.\n:: image src=./diagram.png ::\n\n";
        let doc = parse_document(source).unwrap();

        let links = doc.find_all_links();

        // Find verbatim src link
        let src_links: Vec<_> = links
            .iter()
            .filter(|l| l.link_type == LinkType::VerbatimSrc)
            .collect();
        assert_eq!(
            src_links.len(),
            1,
            "Expected 1 verbatim src link, found {}. All links: {:?}",
            src_links.len(),
            links
        );
        assert_eq!(src_links[0].target, "./diagram.png");
    }

    #[test]
    fn test_verbatim_src_parameter_method() {
        use super::super::elements::{Data, Label, Parameter};

        let verbatim = Verbatim::with_subject(
            "Test".to_string(),
            Data::new(
                Label::new("image".to_string()),
                vec![Parameter::new("src".to_string(), "./test.png".to_string())],
            ),
        );

        assert_eq!(verbatim.src_parameter(), Some("./test.png"));

        // Test verbatim without src parameter
        let verbatim_no_src = Verbatim::with_subject(
            "Test".to_string(),
            Data::new(Label::new("code".to_string()), vec![]),
        );

        assert_eq!(verbatim_no_src.src_parameter(), None);
    }

    #[test]
    fn test_no_links() {
        let source = "Just plain text with no links.\n\n";
        let doc = parse_document(source).unwrap();

        let links = doc.find_all_links();

        assert_eq!(links.len(), 0);
    }

    #[test]
    fn test_footnote_not_a_link() {
        let source = "Text with footnote [42].\n\n";
        let doc = parse_document(source).unwrap();

        let links = doc.find_all_links();

        // Footnote references are not clickable links
        assert_eq!(links.len(), 0);
    }

    #[test]
    fn test_nested_session_links() {
        let source = "Outer Session\n\n    Inner session with [https://example.com].\n\n";
        let doc = parse_document(source).unwrap();

        let links = doc.find_all_links();

        // Should find link in nested session
        assert_eq!(links.len(), 1);
        assert_eq!(links[0].target, "https://example.com");
    }

    // -----------------------------------------------------------------------
    // Range-precision tests
    //
    // The LSP `textDocument/documentLink` response uses each link's `range`
    // to decide what is clickable and what gets the link decoration in the
    // editor. Editors (notably VSCode) render the entire range as an
    // underlined link. So the range must cover *only* the `[bracketed]`
    // reference, not the surrounding paragraph or title line.
    // -----------------------------------------------------------------------

    use super::super::range::Position;

    #[test]
    fn test_url_link_range_is_bracket_bounded_in_paragraph() {
        // Byte map of source line:
        //   "Check out [https://example.com] for more info."
        //    0123456789^                   ^
        //              10                  30 (inclusive ']' position)
        let source = "Check out [https://example.com] for more info.\n\n";
        let doc = parse_document(source).unwrap();
        let links = doc.find_all_links();

        assert_eq!(links.len(), 1);
        let link = &links[0];
        assert_eq!(link.target, "https://example.com");

        let captured = &source[link.range.span.clone()];
        assert_eq!(
            link.range.span,
            10..31,
            "DocumentLink range must cover only the [bracketed] reference, not the whole paragraph. \
             Captured text: {captured:?}"
        );
        assert_eq!(link.range.start, Position::new(0, 10));
        assert_eq!(link.range.end, Position::new(0, 31));
    }

    #[test]
    fn test_file_link_range_is_bracket_bounded_in_paragraph() {
        // Byte map of source line:
        //   "See [./README.md] for details."
        //    0123^         ^
        //        4         16 (inclusive ']')
        let source = "See [./README.md] for details.\n\n";
        let doc = parse_document(source).unwrap();
        let links = doc.find_all_links();

        assert_eq!(links.len(), 1);
        let link = &links[0];
        assert_eq!(link.target, "./README.md");

        let captured = &source[link.range.span.clone()];
        assert_eq!(
            link.range.span,
            4..17,
            "DocumentLink range must cover only the [bracketed] reference, not the whole paragraph. \
             Captured text: {captured:?}"
        );
        assert_eq!(link.range.start, Position::new(0, 4));
        assert_eq!(link.range.end, Position::new(0, 17));
    }

    #[test]
    fn test_multiple_links_have_distinct_bracket_bounded_ranges() {
        // Byte map:
        //   "Visit [https://example.com] and check [./docs.md]."
        //    0     6                    27          38       49
        let source = "Visit [https://example.com] and check [./docs.md].\n\n";
        let doc = parse_document(source).unwrap();
        let links = doc.find_all_links();

        assert_eq!(links.len(), 2);

        let url = links
            .iter()
            .find(|l| l.link_type == LinkType::Url)
            .expect("url link");
        let file = links
            .iter()
            .find(|l| l.link_type == LinkType::File)
            .expect("file link");

        assert_eq!(
            url.range.span,
            6..27,
            "URL link captured: {:?}",
            &source[url.range.span.clone()]
        );
        assert_eq!(
            file.range.span,
            38..49,
            "File link captured: {:?}",
            &source[file.range.span.clone()]
        );
    }

    #[test]
    fn test_long_paragraph_with_single_file_ref_does_not_include_surrounding_text_in_range() {
        // Reproduces the dodot architecture.lex case: a long paragraph that
        // contains a single file reference. Before the fix, the link's range
        // covered the whole paragraph so VSCode underlined every word.
        let source = "\
This document describes how dodot is organized. It is the conceptual view. \
For concrete types, crate layout, and trait signatures, see [./types.lex].\n\n";
        let doc = parse_document(source).unwrap();
        let links = doc.find_all_links();

        assert_eq!(links.len(), 1);
        let link = &links[0];
        assert_eq!(link.target, "./types.lex");

        let bracket_start = source.find("[./types.lex]").expect("bracket present");
        let bracket_end = bracket_start + "[./types.lex]".len();

        let captured = &source[link.range.span.clone()];
        assert_eq!(
            link.range.span,
            bracket_start..bracket_end,
            "Link range must be bracket-bounded. Got captured text: {captured:?}"
        );
    }

    // -----------------------------------------------------------------------
    // Nested-session title coverage
    //
    // `Session::find_all_links` originally only inspected `self.title`, while
    // `Document::find_all_links` calls it on the implicit root session whose
    // title is empty. Paragraph traversal recurses into nested sessions, but
    // nested-session *titles* never get scanned. So URL/File refs that appear
    // in a section heading like
    //
    //     1. See [./handlers.lex] for the phase list
    //
    //         (body)
    //
    // were silently dropped from the LSP `documentLink` response, and editors
    // had no clickable surface on the heading.
    // -----------------------------------------------------------------------

    #[test]
    fn test_file_ref_in_nested_session_title_produces_link() {
        // "Doc title" + blank + indent → outer session whose title is
        // "Doc title". Then the indented "See [./other.lex] for details"
        // line, followed by a blank and a deeper indent, becomes a *nested*
        // session whose title contains a file reference.
        let source =
            "Doc title\n\n    See [./other.lex] for details\n\n        nested content here.\n\n";
        let doc = parse_document(source).unwrap();
        let links = doc.find_all_links();

        assert_eq!(
            links.len(),
            1,
            "expected one link for the file ref in the nested-session title; got {links:?}"
        );
        let link = &links[0];
        assert_eq!(link.target, "./other.lex");
        assert_eq!(link.link_type, LinkType::File);

        let bracket_start = source.find("[./other.lex]").expect("bracket present");
        let bracket_end = bracket_start + "[./other.lex]".len();
        assert_eq!(
            link.range.span,
            bracket_start..bracket_end,
            "Nested-session title link must be bracket-bounded. Got captured text: {:?}",
            &source[link.range.span.clone()]
        );
    }

    #[test]
    fn test_url_ref_in_nested_session_title_produces_link() {
        let source = "Doc title\n\n    Visit [https://example.com] today\n\n        body line.\n\n";
        let doc = parse_document(source).unwrap();
        let links = doc.find_all_links();

        assert_eq!(links.len(), 1);
        let link = &links[0];
        assert_eq!(link.target, "https://example.com");
        assert_eq!(link.link_type, LinkType::Url);

        let bracket_start = source
            .find("[https://example.com]")
            .expect("bracket present");
        let bracket_end = bracket_start + "[https://example.com]".len();
        assert_eq!(link.range.span, bracket_start..bracket_end);
    }

    #[test]
    fn test_refs_in_both_outer_and_nested_session_titles_produce_links() {
        // The outer title also contains a file reference, so both the outer
        // and nested titles should each contribute one link, distinct from
        // any links found in paragraphs.
        let source = "\
Top [./top.lex] section

    Inner [./inner.lex] subsection

        See also [./body.lex] in the body.
";
        let doc = parse_document(source).unwrap();
        let links = doc.find_all_links();

        assert_eq!(
            links.len(),
            3,
            "expected three links (outer-title, inner-title, body); got {links:?}"
        );
        let targets: Vec<&str> = links.iter().map(|l| l.target.as_str()).collect();
        assert!(targets.contains(&"./top.lex"));
        assert!(targets.contains(&"./inner.lex"));
        assert!(targets.contains(&"./body.lex"));
    }
}