use super::elements::Verbatim;
use super::inline_positions::{walk_text_content_positions, InlinePositionVisitor};
use super::range::Range;
use super::text_content::TextContent;
use super::{Document, Session};
use crate::lex::inlines::{ReferenceInline, ReferenceType};
use std::fmt;
#[derive(Debug, Clone, PartialEq)]
pub struct DocumentLink {
pub range: Range,
pub target: String,
pub link_type: LinkType,
}
impl DocumentLink {
pub fn new(range: Range, target: String, link_type: LinkType) -> Self {
Self {
range,
target,
link_type,
}
}
}
impl fmt::Display for DocumentLink {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"{:?} link: {} at {}",
self.link_type, self.target, self.range.start
)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum LinkType {
Url,
File,
VerbatimSrc,
}
impl Verbatim {
pub fn src_parameter(&self) -> Option<&str> {
self.closing_data
.parameters
.iter()
.find(|p| p.key == "src")
.map(|p| p.value.as_str())
}
}
impl Session {
pub fn find_all_links(&self) -> Vec<DocumentLink> {
use super::elements::content_item::ContentItem;
use super::traits::AstNode;
let mut links = Vec::new();
collect_text_content_links(&self.title, &mut links);
for nested in self.iter_sessions_recursive() {
collect_text_content_links(&nested.title, &mut links);
}
for paragraph in self.iter_paragraphs_recursive() {
for line_item in ¶graph.lines {
if let ContentItem::TextLine(line) = line_item {
collect_text_content_links(&line.content, &mut links);
}
}
}
for (item, _depth) in self.iter_all_nodes_with_depth() {
if let ContentItem::VerbatimBlock(verbatim) = item {
if let Some(src) = verbatim.src_parameter() {
let link = DocumentLink::new(
verbatim.range().clone(),
src.to_string(),
LinkType::VerbatimSrc,
);
links.push(link);
}
}
}
links
}
}
impl Document {
pub fn find_all_links(&self) -> Vec<DocumentLink> {
let mut links = Vec::new();
if let Some(title) = &self.title {
collect_text_content_links(&title.content, &mut links);
}
links.extend(self.root.find_all_links());
links
}
}
fn collect_text_content_links(text: &TextContent, out: &mut Vec<DocumentLink>) {
let mut collector = LinkCollector { out };
walk_text_content_positions(text, &mut collector);
}
struct LinkCollector<'a> {
out: &'a mut Vec<DocumentLink>,
}
impl<'a> InlinePositionVisitor for LinkCollector<'a> {
fn visit_reference(
&mut self,
open_marker: &Range,
_content: &Range,
close_marker: &Range,
data: &ReferenceInline,
) {
let (target, link_type) = match &data.reference_type {
ReferenceType::Url { target } => (target.clone(), LinkType::Url),
ReferenceType::File { target } => (target.clone(), LinkType::File),
_ => return,
};
let full = Range::new(
open_marker.span.start..close_marker.span.end,
open_marker.start,
close_marker.end,
);
self.out.push(DocumentLink::new(full, target, link_type));
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::lex::parsing::parse_document;
#[test]
fn test_url_link_extraction() {
let source = "Check out [https://example.com] for more info.\n\n";
let doc = parse_document(source).unwrap();
let links = doc.find_all_links();
assert_eq!(links.len(), 1);
assert_eq!(links[0].link_type, LinkType::Url);
assert_eq!(links[0].target, "https://example.com");
}
#[test]
fn test_file_link_extraction() {
let source = "See [./README.md] for details.\n\n";
let doc = parse_document(source).unwrap();
let links = doc.find_all_links();
assert_eq!(links.len(), 1);
assert_eq!(links[0].link_type, LinkType::File);
assert_eq!(links[0].target, "./README.md");
}
#[test]
fn test_multiple_links() {
let source = "Visit [https://example.com] and check [./docs.md].\n\n";
let doc = parse_document(source).unwrap();
let links = doc.find_all_links();
assert_eq!(links.len(), 2);
assert!(links.iter().any(|l| l.link_type == LinkType::Url));
assert!(links.iter().any(|l| l.link_type == LinkType::File));
}
#[test]
fn test_verbatim_src_parameter() {
let source =
"Sunset Photo:\n As the sun sets over the ocean.\n:: image src=./diagram.png ::\n\n";
let doc = parse_document(source).unwrap();
let links = doc.find_all_links();
let src_links: Vec<_> = links
.iter()
.filter(|l| l.link_type == LinkType::VerbatimSrc)
.collect();
assert_eq!(
src_links.len(),
1,
"Expected 1 verbatim src link, found {}. All links: {:?}",
src_links.len(),
links
);
assert_eq!(src_links[0].target, "./diagram.png");
}
#[test]
fn test_verbatim_src_parameter_method() {
use super::super::elements::{Data, Label, Parameter};
let verbatim = Verbatim::with_subject(
"Test".to_string(),
Data::new(
Label::new("image".to_string()),
vec![Parameter::new("src".to_string(), "./test.png".to_string())],
),
);
assert_eq!(verbatim.src_parameter(), Some("./test.png"));
let verbatim_no_src = Verbatim::with_subject(
"Test".to_string(),
Data::new(Label::new("code".to_string()), vec![]),
);
assert_eq!(verbatim_no_src.src_parameter(), None);
}
#[test]
fn test_no_links() {
let source = "Just plain text with no links.\n\n";
let doc = parse_document(source).unwrap();
let links = doc.find_all_links();
assert_eq!(links.len(), 0);
}
#[test]
fn test_footnote_not_a_link() {
let source = "Text with footnote [42].\n\n";
let doc = parse_document(source).unwrap();
let links = doc.find_all_links();
assert_eq!(links.len(), 0);
}
#[test]
fn test_nested_session_links() {
let source = "Outer Session\n\n Inner session with [https://example.com].\n\n";
let doc = parse_document(source).unwrap();
let links = doc.find_all_links();
assert_eq!(links.len(), 1);
assert_eq!(links[0].target, "https://example.com");
}
use super::super::range::Position;
#[test]
fn test_url_link_range_is_bracket_bounded_in_paragraph() {
let source = "Check out [https://example.com] for more info.\n\n";
let doc = parse_document(source).unwrap();
let links = doc.find_all_links();
assert_eq!(links.len(), 1);
let link = &links[0];
assert_eq!(link.target, "https://example.com");
let captured = &source[link.range.span.clone()];
assert_eq!(
link.range.span,
10..31,
"DocumentLink range must cover only the [bracketed] reference, not the whole paragraph. \
Captured text: {captured:?}"
);
assert_eq!(link.range.start, Position::new(0, 10));
assert_eq!(link.range.end, Position::new(0, 31));
}
#[test]
fn test_file_link_range_is_bracket_bounded_in_paragraph() {
let source = "See [./README.md] for details.\n\n";
let doc = parse_document(source).unwrap();
let links = doc.find_all_links();
assert_eq!(links.len(), 1);
let link = &links[0];
assert_eq!(link.target, "./README.md");
let captured = &source[link.range.span.clone()];
assert_eq!(
link.range.span,
4..17,
"DocumentLink range must cover only the [bracketed] reference, not the whole paragraph. \
Captured text: {captured:?}"
);
assert_eq!(link.range.start, Position::new(0, 4));
assert_eq!(link.range.end, Position::new(0, 17));
}
#[test]
fn test_multiple_links_have_distinct_bracket_bounded_ranges() {
let source = "Visit [https://example.com] and check [./docs.md].\n\n";
let doc = parse_document(source).unwrap();
let links = doc.find_all_links();
assert_eq!(links.len(), 2);
let url = links
.iter()
.find(|l| l.link_type == LinkType::Url)
.expect("url link");
let file = links
.iter()
.find(|l| l.link_type == LinkType::File)
.expect("file link");
assert_eq!(
url.range.span,
6..27,
"URL link captured: {:?}",
&source[url.range.span.clone()]
);
assert_eq!(
file.range.span,
38..49,
"File link captured: {:?}",
&source[file.range.span.clone()]
);
}
#[test]
fn test_long_paragraph_with_single_file_ref_does_not_include_surrounding_text_in_range() {
let source = "\
This document describes how dodot is organized. It is the conceptual view. \
For concrete types, crate layout, and trait signatures, see [./types.lex].\n\n";
let doc = parse_document(source).unwrap();
let links = doc.find_all_links();
assert_eq!(links.len(), 1);
let link = &links[0];
assert_eq!(link.target, "./types.lex");
let bracket_start = source.find("[./types.lex]").expect("bracket present");
let bracket_end = bracket_start + "[./types.lex]".len();
let captured = &source[link.range.span.clone()];
assert_eq!(
link.range.span,
bracket_start..bracket_end,
"Link range must be bracket-bounded. Got captured text: {captured:?}"
);
}
#[test]
fn test_file_ref_in_nested_session_title_produces_link() {
let source =
"Doc title\n\n See [./other.lex] for details\n\n nested content here.\n\n";
let doc = parse_document(source).unwrap();
let links = doc.find_all_links();
assert_eq!(
links.len(),
1,
"expected one link for the file ref in the nested-session title; got {links:?}"
);
let link = &links[0];
assert_eq!(link.target, "./other.lex");
assert_eq!(link.link_type, LinkType::File);
let bracket_start = source.find("[./other.lex]").expect("bracket present");
let bracket_end = bracket_start + "[./other.lex]".len();
assert_eq!(
link.range.span,
bracket_start..bracket_end,
"Nested-session title link must be bracket-bounded. Got captured text: {:?}",
&source[link.range.span.clone()]
);
}
#[test]
fn test_url_ref_in_nested_session_title_produces_link() {
let source = "Doc title\n\n Visit [https://example.com] today\n\n body line.\n\n";
let doc = parse_document(source).unwrap();
let links = doc.find_all_links();
assert_eq!(links.len(), 1);
let link = &links[0];
assert_eq!(link.target, "https://example.com");
assert_eq!(link.link_type, LinkType::Url);
let bracket_start = source
.find("[https://example.com]")
.expect("bracket present");
let bracket_end = bracket_start + "[https://example.com]".len();
assert_eq!(link.range.span, bracket_start..bracket_end);
}
#[test]
fn test_refs_in_both_outer_and_nested_session_titles_produce_links() {
let source = "\
Top [./top.lex] section
Inner [./inner.lex] subsection
See also [./body.lex] in the body.
";
let doc = parse_document(source).unwrap();
let links = doc.find_all_links();
assert_eq!(
links.len(),
3,
"expected three links (outer-title, inner-title, body); got {links:?}"
);
let targets: Vec<&str> = links.iter().map(|l| l.target.as_str()).collect();
assert!(targets.contains(&"./top.lex"));
assert!(targets.contains(&"./inner.lex"));
assert!(targets.contains(&"./body.lex"));
}
}