use super::anchoring::{ReferenceAnchor, ReferenceLine};
use super::elements::Verbatim;
use super::inline_positions::{walk_text_content_positions, InlinePositionVisitor};
use super::range::{Position, Range};
use super::text_content::TextContent;
use super::{Document, Session};
use crate::lex::inlines::{AnchorDirection, ReferenceInline, ReferenceType, WordAnchor};
use std::fmt;
#[derive(Debug, Clone, PartialEq)]
pub struct DocumentLink {
pub range: Range,
pub target: String,
pub link_type: LinkType,
}
impl DocumentLink {
pub fn new(range: Range, target: String, link_type: LinkType) -> Self {
Self {
range,
target,
link_type,
}
}
}
impl fmt::Display for DocumentLink {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"{:?} link: {} at {}",
self.link_type, self.target, self.range.start
)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum LinkType {
Url,
File,
VerbatimSrc,
}
impl Verbatim {
pub fn src_parameter(&self) -> Option<&str> {
self.closing_data
.parameters
.iter()
.find(|p| p.key == "src")
.map(|p| p.value.as_str())
}
}
impl Session {
pub fn find_all_links(&self) -> Vec<DocumentLink> {
use super::elements::content_item::ContentItem;
use super::traits::AstNode;
let mut links = Vec::new();
collect_text_content_links(&self.title, &mut links);
for nested in self.iter_sessions_recursive() {
collect_text_content_links(&nested.title, &mut links);
}
for paragraph in self.iter_paragraphs_recursive() {
for line_item in ¶graph.lines {
if let ContentItem::TextLine(line) = line_item {
collect_text_content_links(&line.content, &mut links);
}
}
}
for (item, _depth) in self.iter_all_nodes_with_depth() {
if let ContentItem::VerbatimBlock(verbatim) = item {
if let Some(src) = verbatim.src_parameter() {
let link = DocumentLink::new(
verbatim.range().clone(),
src.to_string(),
LinkType::VerbatimSrc,
);
links.push(link);
}
}
}
links
}
}
impl Document {
pub fn find_all_links(&self) -> Vec<DocumentLink> {
let mut links = Vec::new();
if let Some(title) = &self.title {
collect_text_content_links(&title.content, &mut links);
}
links.extend(self.root.find_all_links());
for ref_line in self.reference_lines() {
collect_reference_line_link(ref_line, &mut links);
}
links
}
}
fn collect_reference_line_link(ref_line: &ReferenceLine, out: &mut Vec<DocumentLink>) {
let (target, link_type) = match &ref_line.reference.reference_type {
ReferenceType::Url { target } => (target.clone(), LinkType::Url),
ReferenceType::File { target } => (target.clone(), LinkType::File),
_ => return,
};
let (base, anchored_utf16) = match &ref_line.anchor {
ReferenceAnchor::WholeElement {
anchor_range,
anchor_text,
..
} => (anchor_range, utf16_width(anchor_text)),
ReferenceAnchor::SelfLink => (
&ref_line.reference_range,
utf16_width("[") + utf16_width(&ref_line.reference.raw) + utf16_width("]"),
),
};
let range = Range::new(
base.span.clone(),
base.start,
Position::new(base.start.line, base.start.column + anchored_utf16),
);
out.push(DocumentLink::new(range, target, link_type));
}
fn collect_text_content_links(text: &TextContent, out: &mut Vec<DocumentLink>) {
let mut collector = LinkCollector::new(out);
walk_text_content_positions(text, &mut collector);
collector.flush();
}
struct LinkCollector<'a> {
out: &'a mut Vec<DocumentLink>,
last_plain: Option<PlainSpan>,
pending_following: Option<PendingFollowing>,
}
struct PlainSpan {
range: Range,
text: String,
}
struct PendingFollowing {
word: String,
target: String,
link_type: LinkType,
bracket_range: Range,
}
impl<'a> LinkCollector<'a> {
fn new(out: &'a mut Vec<DocumentLink>) -> Self {
Self {
out,
last_plain: None,
pending_following: None,
}
}
fn bracket_range(open_marker: &Range, close_marker: &Range) -> Range {
Range::new(
open_marker.span.start..close_marker.span.end,
open_marker.start,
close_marker.end,
)
}
fn push(&mut self, range: Range, target: String, link_type: LinkType) {
self.out.push(DocumentLink::new(range, target, link_type));
}
fn flush(&mut self) {
if let Some(pending) = self.pending_following.take() {
self.push(pending.bracket_range, pending.target, pending.link_type);
}
}
}
impl<'a> InlinePositionVisitor for LinkCollector<'a> {
fn visit_plain(&mut self, range: &Range, text: &str) {
if let Some(pending) = self.pending_following.take() {
let plain = PlainSpan {
range: range.clone(),
text: text.to_string(),
};
let resolved = locate_word_range(&plain, &pending.word, WordEnd::Start)
.unwrap_or(pending.bracket_range);
self.push(resolved, pending.target, pending.link_type);
}
self.last_plain = Some(PlainSpan {
range: range.clone(),
text: text.to_string(),
});
}
fn visit_reference(
&mut self,
open_marker: &Range,
_content: &Range,
close_marker: &Range,
data: &ReferenceInline,
) {
let (target, link_type) = match &data.reference_type {
ReferenceType::Url { target } => (target.clone(), LinkType::Url),
ReferenceType::File { target } => (target.clone(), LinkType::File),
_ => return,
};
let bracket_range = Self::bracket_range(open_marker, close_marker);
match &data.word_anchor {
Some(WordAnchor {
word,
direction: AnchorDirection::Preceding,
}) => {
let range = self
.last_plain
.as_ref()
.and_then(|plain| locate_word_range(plain, word, WordEnd::End))
.unwrap_or(bracket_range);
self.push(range, target, link_type);
}
Some(WordAnchor {
word,
direction: AnchorDirection::Following,
}) => {
self.flush();
self.pending_following = Some(PendingFollowing {
word: word.clone(),
target,
link_type,
bracket_range,
});
}
None => {
self.push(bracket_range, target, link_type);
}
}
}
}
#[derive(Clone, Copy)]
enum WordEnd {
End,
Start,
}
fn locate_word_range(plain: &PlainSpan, word: &str, end: WordEnd) -> Option<Range> {
let text = &plain.text;
if plain.range.span.len() != text.len() {
return None;
}
let token = match end {
WordEnd::End => last_token(text),
WordEnd::Start => first_token(text),
}?;
let word_in_token = token.text.find(word)?;
let word_start = token.offset + word_in_token;
let word_end = word_start + word.len();
let base = &plain.range;
let span = (base.span.start + word_start)..(base.span.start + word_end);
let start_col = base.start.column + utf16_width(&text[..word_start]);
let end_col = base.start.column + utf16_width(&text[..word_end]);
Some(Range::new(
span,
Position::new(base.start.line, start_col),
Position::new(base.start.line, end_col),
))
}
struct Token<'a> {
text: &'a str,
offset: usize,
}
fn last_token(text: &str) -> Option<Token<'_>> {
let tok = text.split_whitespace().next_back()?;
let trimmed_end = text.trim_end().len();
let offset = trimmed_end - tok.len();
Some(Token { text: tok, offset })
}
fn first_token(text: &str) -> Option<Token<'_>> {
let tok = text.split_whitespace().next()?;
let offset = text.len() - text.trim_start().len();
Some(Token { text: tok, offset })
}
fn utf16_width(s: &str) -> usize {
s.chars().map(char::len_utf16).sum()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::lex::parsing::parse_document;
#[test]
fn test_url_link_extraction() {
let source = "Check out [https://example.com] for more info.\n\n";
let doc = parse_document(source).unwrap();
let links = doc.find_all_links();
assert_eq!(links.len(), 1);
assert_eq!(links[0].link_type, LinkType::Url);
assert_eq!(links[0].target, "https://example.com");
}
#[test]
fn test_file_link_extraction() {
let source = "See [./README.md] for details.\n\n";
let doc = parse_document(source).unwrap();
let links = doc.find_all_links();
assert_eq!(links.len(), 1);
assert_eq!(links[0].link_type, LinkType::File);
assert_eq!(links[0].target, "./README.md");
}
#[test]
fn test_multiple_links() {
let source = "Visit [https://example.com] and check [./docs.md].\n\n";
let doc = parse_document(source).unwrap();
let links = doc.find_all_links();
assert_eq!(links.len(), 2);
assert!(links.iter().any(|l| l.link_type == LinkType::Url));
assert!(links.iter().any(|l| l.link_type == LinkType::File));
}
#[test]
fn test_verbatim_src_parameter() {
let source =
"Sunset Photo:\n As the sun sets over the ocean.\n:: image src=./diagram.png ::\n\n";
let doc = parse_document(source).unwrap();
let links = doc.find_all_links();
let src_links: Vec<_> = links
.iter()
.filter(|l| l.link_type == LinkType::VerbatimSrc)
.collect();
assert_eq!(
src_links.len(),
1,
"Expected 1 verbatim src link, found {}. All links: {:?}",
src_links.len(),
links
);
assert_eq!(src_links[0].target, "./diagram.png");
}
#[test]
fn test_verbatim_src_parameter_method() {
use super::super::elements::{Data, Label, Parameter};
let verbatim = Verbatim::with_subject(
"Test".to_string(),
Data::new(
Label::new("image".to_string()),
vec![Parameter::new("src".to_string(), "./test.png".to_string())],
),
);
assert_eq!(verbatim.src_parameter(), Some("./test.png"));
let verbatim_no_src = Verbatim::with_subject(
"Test".to_string(),
Data::new(Label::new("code".to_string()), vec![]),
);
assert_eq!(verbatim_no_src.src_parameter(), None);
}
#[test]
fn test_no_links() {
let source = "Just plain text with no links.\n\n";
let doc = parse_document(source).unwrap();
let links = doc.find_all_links();
assert_eq!(links.len(), 0);
}
#[test]
fn test_footnote_not_a_link() {
let source = "Text with footnote [42].\n\n";
let doc = parse_document(source).unwrap();
let links = doc.find_all_links();
assert_eq!(links.len(), 0);
}
#[test]
fn test_nested_session_links() {
let source = "Outer Session\n\n Inner session with [https://example.com].\n\n";
let doc = parse_document(source).unwrap();
let links = doc.find_all_links();
assert_eq!(links.len(), 1);
assert_eq!(links[0].target, "https://example.com");
}
use super::super::range::Position;
#[test]
fn test_url_link_range_covers_preceding_word_in_paragraph() {
let source = "Check out [https://example.com] for more info.\n\n";
let doc = parse_document(source).unwrap();
let links = doc.find_all_links();
assert_eq!(links.len(), 1);
let link = &links[0];
assert_eq!(link.target, "https://example.com");
let captured = &source[link.range.span.clone()];
assert_eq!(
link.range.span,
6..9,
"inline link range must cover the anchored word 'out'. Captured: {captured:?}"
);
assert_eq!(captured, "out");
assert_eq!(link.range.start, Position::new(0, 6));
assert_eq!(link.range.end, Position::new(0, 9));
}
#[test]
fn test_file_link_range_covers_preceding_word_in_paragraph() {
let source = "See [./README.md] for details.\n\n";
let doc = parse_document(source).unwrap();
let links = doc.find_all_links();
assert_eq!(links.len(), 1);
let link = &links[0];
assert_eq!(link.target, "./README.md");
let captured = &source[link.range.span.clone()];
assert_eq!(
link.range.span,
0..3,
"inline link range must cover the anchored word 'See'. Captured: {captured:?}"
);
assert_eq!(captured, "See");
assert_eq!(link.range.start, Position::new(0, 0));
assert_eq!(link.range.end, Position::new(0, 3));
}
#[test]
fn test_following_word_anchor_range() {
let source = "[https://lex.ing] is the home page.\n\n";
let doc = parse_document(source).unwrap();
let links = doc.find_all_links();
assert_eq!(links.len(), 1);
let link = &links[0];
assert_eq!(link.target, "https://lex.ing");
let captured = &source[link.range.span.clone()];
assert_eq!(captured, "is", "following-anchor link must cover 'is'");
let is_start = source.find("is").unwrap();
assert_eq!(link.range.span, is_start..is_start + 2);
}
#[test]
fn test_word_anchor_excludes_trailing_punctuation() {
let source = "the project website, [https://x.example] is fast.\n\n";
let doc = parse_document(source).unwrap();
let links = doc.find_all_links();
assert_eq!(links.len(), 1);
let captured = &source[links[0].range.span.clone()];
assert_eq!(captured, "website", "range must exclude the trailing comma");
}
#[test]
fn test_abutting_word_anchor_range() {
let source = "Hello[./file.txt] World\n\n";
let doc = parse_document(source).unwrap();
let links = doc.find_all_links();
assert_eq!(links.len(), 1);
let captured = &source[links[0].range.span.clone()];
assert_eq!(captured, "Hello");
}
#[test]
fn test_multiple_links_anchor_distinct_words() {
let source = "Visit [https://example.com] and check [./docs.md].\n\n";
let doc = parse_document(source).unwrap();
let links = doc.find_all_links();
assert_eq!(links.len(), 2);
let url = links
.iter()
.find(|l| l.link_type == LinkType::Url)
.expect("url link");
let file = links
.iter()
.find(|l| l.link_type == LinkType::File)
.expect("file link");
assert_eq!(&source[url.range.span.clone()], "Visit");
assert_eq!(&source[file.range.span.clone()], "check");
}
#[test]
fn test_long_paragraph_with_single_file_ref_anchors_only_the_word() {
let source = "\
This document describes how dodot is organized. It is the conceptual view. \
For concrete types, crate layout, and trait signatures, see [./types.lex].\n\n";
let doc = parse_document(source).unwrap();
let links = doc.find_all_links();
assert_eq!(links.len(), 1);
let link = &links[0];
assert_eq!(link.target, "./types.lex");
let captured = &source[link.range.span.clone()];
assert_eq!(
captured, "see",
"inline link range must cover only the anchored word, not the paragraph"
);
}
#[test]
fn test_file_ref_in_nested_session_title_produces_link() {
let source =
"Doc title\n\n See [./other.lex] for details\n\n nested content here.\n\n";
let doc = parse_document(source).unwrap();
let links = doc.find_all_links();
assert_eq!(
links.len(),
1,
"expected one link for the file ref in the nested-session title; got {links:?}"
);
let link = &links[0];
assert_eq!(link.target, "./other.lex");
assert_eq!(link.link_type, LinkType::File);
assert_eq!(
&source[link.range.span.clone()],
"See",
"nested-session title link anchors the preceding word"
);
}
#[test]
fn test_url_ref_in_nested_session_title_produces_link() {
let source = "Doc title\n\n Visit [https://example.com] today\n\n body line.\n\n";
let doc = parse_document(source).unwrap();
let links = doc.find_all_links();
assert_eq!(links.len(), 1);
let link = &links[0];
assert_eq!(link.target, "https://example.com");
assert_eq!(link.link_type, LinkType::Url);
assert_eq!(&source[link.range.span.clone()], "Visit");
}
#[test]
fn test_refs_in_both_outer_and_nested_session_titles_produce_links() {
let source = "\
Top [./top.lex] section
Inner [./inner.lex] subsection
See also [./body.lex] in the body.
";
let doc = parse_document(source).unwrap();
let links = doc.find_all_links();
assert_eq!(
links.len(),
3,
"expected three links (outer-title, inner-title, body); got {links:?}"
);
let targets: Vec<&str> = links.iter().map(|l| l.target.as_str()).collect();
assert!(targets.contains(&"./top.lex"));
assert!(targets.contains(&"./inner.lex"));
assert!(targets.contains(&"./body.lex"));
}
#[test]
fn test_reference_line_whole_element_anchors_session_title() {
let source = "Getting Started\n[./readme.txt]\n\n Welcome to the docs.\n\n";
let doc = parse_document(source).unwrap();
let links = doc.find_all_links();
assert_eq!(links.len(), 1, "one whole-element link; got {links:?}");
let link = &links[0];
assert_eq!(link.target, "./readme.txt");
assert_eq!(link.link_type, LinkType::File);
assert_eq!(
&source[link.range.span.clone()],
"Getting Started",
"whole-element link must cover the anchored head line"
);
assert_eq!(link.range.start, Position::new(0, 0));
assert_eq!(link.range.end, Position::new(0, "Getting Started".len()));
}
#[test]
fn test_reference_line_whole_element_anchors_list_item() {
let source = "- Food\n- Water\n[https://water.example]\n- Bread\n\n";
let doc = parse_document(source).unwrap();
let links = doc.find_all_links();
assert_eq!(links.len(), 1);
let link = &links[0];
assert_eq!(link.target, "https://water.example");
assert_eq!(link.link_type, LinkType::Url);
assert_eq!(
&source[link.range.span.clone()],
"Water",
"list-item anchor excludes the `- ` marker"
);
}
#[test]
fn test_reference_line_whole_element_anchors_definition_subject() {
let source =
"API Endpoint:\n[./endpoint.txt]\n A URL that provides access to a resource.\n\n";
let doc = parse_document(source).unwrap();
let links = doc.find_all_links();
assert_eq!(links.len(), 1);
let link = &links[0];
assert_eq!(link.target, "./endpoint.txt");
assert_eq!(
&source[link.range.span.clone()],
"API Endpoint",
"subject anchor excludes the trailing colon"
);
}
#[test]
fn test_reference_line_self_link_range_is_the_reference_text() {
let source = "See the upstream project:\n\n[https://github.com/lex-fmt/lex]\n\n";
let doc = parse_document(source).unwrap();
let links = doc.find_all_links();
assert_eq!(links.len(), 1, "one self-link; got {links:?}");
let link = &links[0];
assert_eq!(link.target, "https://github.com/lex-fmt/lex");
assert_eq!(
&source[link.range.span.clone()],
"[https://github.com/lex-fmt/lex]",
"self-link covers the reference's own bracketed text"
);
}
#[test]
fn test_reference_line_self_link_at_start_of_document() {
let source = "[https://lex.ing]\n\n";
let doc = parse_document(source).unwrap();
let links = doc.find_all_links();
assert_eq!(links.len(), 1);
assert_eq!(
&source[links[0].range.span.clone()],
"[https://lex.ing]",
"self-link covers the reference's own bracketed text"
);
}
#[test]
fn test_following_anchor_followed_by_only_non_plain_nodes_still_emits() {
let source = "[https://a.example]`code`\n\n";
let doc = parse_document(source).unwrap();
let links = doc.find_all_links();
assert_eq!(
links.len(),
1,
"following-anchor link followed only by non-plain nodes must still \
emit (bracket-range fallback); got {links:?}"
);
let link = &links[0];
assert_eq!(link.target, "https://a.example");
assert_eq!(&source[link.range.span.clone()], "[https://a.example]");
}
#[test]
fn test_two_following_anchors_in_a_row_both_emit() {
let source = "[https://a.example][https://b.example] tail\n\n";
let doc = parse_document(source).unwrap();
let links = doc.find_all_links();
assert_eq!(
links.len(),
2,
"both back-to-back following-anchor refs must emit; got {links:?}"
);
let targets: Vec<&str> = links.iter().map(|l| l.target.as_str()).collect();
assert!(targets.contains(&"https://a.example"));
assert!(targets.contains(&"https://b.example"));
}
#[test]
fn test_reference_line_whole_element_end_column_is_utf16() {
let source = "Café Menu\n[./menu.txt]\n\n Today's specials.\n\n";
let doc = parse_document(source).unwrap();
let links = doc.find_all_links();
assert_eq!(links.len(), 1, "got {links:?}");
let link = &links[0];
assert_eq!(link.target, "./menu.txt");
assert_eq!(
&source[link.range.span.clone()],
"Café Menu",
"byte span still covers the whole anchored title"
);
assert_eq!(link.range.start, Position::new(0, 0));
assert_eq!(
link.range.end,
Position::new(0, 9),
"end column must be the UTF-16 width of the anchor, not its byte length"
);
}
#[test]
fn test_reference_line_self_link_end_column_is_utf16() {
let source = "[https://café.example]\n\n";
let doc = parse_document(source).unwrap();
let links = doc.find_all_links();
assert_eq!(links.len(), 1, "got {links:?}");
let link = &links[0];
assert_eq!(link.target, "https://café.example");
assert_eq!(
&source[link.range.span.clone()],
"[https://café.example]",
"byte span still covers the bracketed reference"
);
assert_eq!(link.range.start, Position::new(0, 0));
assert_eq!(
link.range.end,
Position::new(0, 22),
"self-link end column must be the UTF-16 width of `[` + raw + `]`"
);
}
#[test]
fn test_escaped_char_in_anchored_word_falls_back_to_bracket_range() {
let source = "a\\*b [https://x.example] tail\n\n";
let doc = parse_document(source).unwrap();
let links = doc.find_all_links();
assert_eq!(links.len(), 1, "got {links:?}");
let link = &links[0];
assert_eq!(link.target, "https://x.example");
assert_eq!(
&source[link.range.span.clone()],
"[https://x.example]",
"escaped anchored run must fall back to the bracket range"
);
}
#[test]
fn test_marker_reference_line_is_not_a_document_link() {
let source = "Some claim.\n[42]\n\n:: 42 :: A footnote.\n\n";
let doc = parse_document(source).unwrap();
let links = doc.find_all_links();
assert!(
links.is_empty(),
"marker-style references are not document links: {links:?}"
);
}
}