Skip to main content

lex_core/lex/ast/elements/
document.rs

1//! Document element
2//!
3//!     The document node serves two purposes:
4//!         - Contains the document tree.
5//!         - Contains document-level annotations, including non-content metadata (like file name,
6//!           parser version, etc).
7//!
8//!     Lex documents are plain text, utf-8 encoded files with the file extension .lex. Line width
9//!     is not limited, and is considered a presentation detail. Best practice dictates only
10//!     limiting line length when publishing, not while authoring.
11//!
12//!     The document node holds the document metadata and the content's root node, which is a
13//!     session node. The structure of the document then is a tree of sessions, which can be nested
14//!     arbitrarily. This creates powerful addressing capabilities as one can target any sub-session
15//!     from an index.
16//!
17//!     Document Title:
18//!     The document title is a first-class element, represented as a dedicated `DocumentTitle`
19//!     AST node owned directly by the `Document`. It is parsed from a single unindented line
20//!     at the start of the document, followed by blank lines, where no indented content follows
21//!     (distinguishing it from a session title). See specs/elements/document.lex.
22//!
23//!     Document Start:
24//!     A synthetic `DocumentStart` token is used to mark the boundary between document-level
25//!     annotations (metadata) and the actual document content. This allows the parser and
26//!     assembly logic to correctly identify where the body begins.
27//!
28//!     For more details on document structure and sessions, see the [ast](crate::lex::ast) module.
29//!
30//! Learn More:
31//! - Paragraphs: specs/v1/elements/paragraph.lex
32//! - Lists: specs/v1/elements/list.lex
33//! - Sessions: specs/v1/elements/session.lex
34//! - Annotations: specs/v1/elements/annotation.lex
35//! - Definitions: specs/v1/elements/definition.lex
36//! - Verbatim blocks: specs/v1/elements/verbatim.lex
37//!
38//! Examples:
39//! - Document-level metadata via annotations
40//! - All body content accessible via document.root.children
41
42use super::super::range::{Position, Range};
43use super::super::text_content::TextContent;
44use super::super::traits::{AstNode, Container, Visitor};
45use super::annotation::Annotation;
46use super::content_item::ContentItem;
47use super::session::Session;
48use super::typed_content;
49use std::fmt;
50
51/// A first-class document title element.
52///
53/// Represents the title of a Lex document — a single unindented line at the start
54/// of the document, followed by blank lines, with no indented content after.
55/// This is distinct from session titles.
56///
57/// An optional subtitle is supported: when the title line ends with a colon and a
58/// second non-blank, non-indented line follows before the blank separator, the
59/// second line is parsed as a subtitle. The trailing colon is structural (stripped
60/// from the title content).
61#[derive(Debug, Clone, PartialEq)]
62pub struct DocumentTitle {
63    pub content: TextContent,
64    pub subtitle: Option<TextContent>,
65    pub location: Range,
66}
67
68impl DocumentTitle {
69    pub fn new(content: TextContent, location: Range) -> Self {
70        Self {
71            content,
72            subtitle: None,
73            location,
74        }
75    }
76
77    pub fn with_subtitle(content: TextContent, subtitle: TextContent, location: Range) -> Self {
78        Self {
79            content,
80            subtitle: Some(subtitle),
81            location,
82        }
83    }
84
85    pub fn from_string(text: String, location: Range) -> Self {
86        Self {
87            content: TextContent::from_string(text, Some(location.clone())),
88            subtitle: None,
89            location,
90        }
91    }
92
93    pub fn as_str(&self) -> &str {
94        self.content.as_string()
95    }
96
97    pub fn subtitle_str(&self) -> Option<&str> {
98        self.subtitle.as_ref().map(|s| s.as_string())
99    }
100}
101
102impl AstNode for DocumentTitle {
103    fn node_type(&self) -> &'static str {
104        "DocumentTitle"
105    }
106
107    fn display_label(&self) -> String {
108        match &self.subtitle {
109            Some(sub) => format!(
110                "DocumentTitle(\"{}\", subtitle: \"{}\")",
111                self.as_str(),
112                sub.as_string()
113            ),
114            None => format!("DocumentTitle(\"{}\")", self.as_str()),
115        }
116    }
117
118    fn range(&self) -> &Range {
119        &self.location
120    }
121
122    fn accept(&self, _visitor: &mut dyn Visitor) {}
123}
124
125#[derive(Debug, Clone, PartialEq)]
126pub struct Document {
127    pub annotations: Vec<Annotation>,
128    pub title: Option<DocumentTitle>,
129    // all content is attached to the root node
130    pub root: Session,
131    /// Reference lines (whole-element anchors) extracted before structural
132    /// parsing and resolved against the original source. See
133    /// [`crate::lex::ast::anchoring`] and [`crate::lex::anchoring`].
134    ///
135    /// Empty for documents that contained no reference lines and for any
136    /// document built programmatically rather than parsed.
137    pub reference_lines: Vec<super::super::anchoring::ReferenceLine>,
138    /// Warnings produced while resolving reference-line anchors (overlap /
139    /// stacking, per §2.3.3). Surfaced through [`Document::diagnostics`].
140    pub reference_line_diagnostics: Vec<super::super::diagnostics::Diagnostic>,
141}
142
143impl Document {
144    pub fn new() -> Self {
145        Self {
146            annotations: Vec::new(),
147            title: None,
148            root: Session::with_title(String::new()),
149            reference_lines: Vec::new(),
150            reference_line_diagnostics: Vec::new(),
151        }
152    }
153
154    pub fn with_content(content: Vec<ContentItem>) -> Self {
155        let mut root = Session::with_title(String::new());
156        let session_content = typed_content::into_session_contents(content);
157        root.children = super::container::SessionContainer::from_typed(session_content);
158        Self {
159            annotations: Vec::new(),
160            title: None,
161            root,
162            reference_lines: Vec::new(),
163            reference_line_diagnostics: Vec::new(),
164        }
165    }
166
167    /// Construct a document from an existing root session.
168    pub fn from_root(root: Session) -> Self {
169        Self {
170            annotations: Vec::new(),
171            title: None,
172            root,
173            reference_lines: Vec::new(),
174            reference_line_diagnostics: Vec::new(),
175        }
176    }
177
178    /// Construct a document from a title and root session.
179    pub fn from_title_and_root(title: Option<DocumentTitle>, root: Session) -> Self {
180        Self {
181            annotations: Vec::new(),
182            title,
183            root,
184            reference_lines: Vec::new(),
185            reference_line_diagnostics: Vec::new(),
186        }
187    }
188
189    pub fn with_annotations_and_content(
190        annotations: Vec<Annotation>,
191        content: Vec<ContentItem>,
192    ) -> Self {
193        let mut root = Session::with_title(String::new());
194        let session_content = typed_content::into_session_contents(content);
195        root.children = super::container::SessionContainer::from_typed(session_content);
196        Self {
197            annotations,
198            title: None,
199            root,
200            reference_lines: Vec::new(),
201            reference_line_diagnostics: Vec::new(),
202        }
203    }
204
205    pub fn with_root_location(mut self, location: Range) -> Self {
206        self.root.location = location;
207        self
208    }
209
210    /// Reference lines (whole-element anchors) resolved for this document.
211    ///
212    /// Consumed by the babel serializers and the LSP `documentLink` provider so
213    /// they can wrap the anchored head line without re-deriving §2.3 adjacency.
214    pub fn reference_lines(&self) -> &[super::super::anchoring::ReferenceLine] {
215        &self.reference_lines
216    }
217
218    pub fn root_session(&self) -> &Session {
219        &self.root
220    }
221
222    pub fn root_session_mut(&mut self) -> &mut Session {
223        &mut self.root
224    }
225
226    pub fn into_root(self) -> Session {
227        self.root
228    }
229
230    /// Get the document title text.
231    ///
232    /// Returns the title string if a DocumentTitle is present, empty string otherwise.
233    pub fn title(&self) -> &str {
234        match &self.title {
235            Some(dt) => dt.as_str(),
236            None => "",
237        }
238    }
239
240    /// Set the document title.
241    pub fn set_title(&mut self, title: String) {
242        if title.is_empty() {
243            self.title = None;
244        } else {
245            let location = Range::default();
246            self.title = Some(DocumentTitle::from_string(title, location));
247        }
248    }
249
250    /// Returns the path of nodes at the given position, starting from the document
251    pub fn node_path_at_position(&self, pos: Position) -> Vec<&dyn AstNode> {
252        let path = self.root.node_path_at_position(pos);
253        if !path.is_empty() {
254            let mut nodes: Vec<&dyn AstNode> = Vec::with_capacity(path.len() + 1);
255            nodes.push(self);
256            nodes.extend(path);
257            nodes
258        } else {
259            Vec::new()
260        }
261    }
262
263    /// Returns the deepest (most nested) element that contains the position
264    pub fn element_at(&self, pos: Position) -> Option<&ContentItem> {
265        self.root.element_at(pos)
266    }
267
268    /// Returns the visual line element at the given position
269    pub fn visual_line_at(&self, pos: Position) -> Option<&ContentItem> {
270        self.root.visual_line_at(pos)
271    }
272
273    /// Returns the block element at the given position
274    pub fn block_element_at(&self, pos: Position) -> Option<&ContentItem> {
275        self.root.block_element_at(pos)
276    }
277
278    /// All annotations attached directly to the document (document-level metadata).
279    pub fn annotations(&self) -> &[Annotation] {
280        &self.annotations
281    }
282
283    /// Mutable access to document-level annotations.
284    pub fn annotations_mut(&mut self) -> &mut Vec<Annotation> {
285        &mut self.annotations
286    }
287
288    /// Iterate over document-level annotation blocks in source order.
289    pub fn iter_annotations(&self) -> std::slice::Iter<'_, Annotation> {
290        self.annotations.iter()
291    }
292
293    /// Iterate over all content items nested inside document-level annotations.
294    pub fn iter_annotation_contents(&self) -> impl Iterator<Item = &ContentItem> {
295        self.annotations
296            .iter()
297            .flat_map(|annotation| annotation.children())
298    }
299
300    // ========================================================================
301    // REFERENCE RESOLUTION APIs (Issue #291)
302    // Delegates to the root session
303    // ========================================================================
304
305    /// Find the first annotation with a matching label.
306    ///
307    /// This searches recursively through all annotations in the document,
308    /// including both document-level annotations and annotations in the content tree.
309    ///
310    /// # Arguments
311    /// * `label` - The label string to search for
312    ///
313    /// # Returns
314    /// The first annotation whose label matches exactly, or None if not found.
315    ///
316    /// # Example
317    /// ```rust,ignore
318    /// // Find annotation with label "42" for reference [42]
319    /// if let Some(annotation) = document.find_annotation_by_label("42") {
320    ///     // Jump to this annotation in go-to-definition
321    /// }
322    /// ```
323    pub fn find_annotation_by_label(&self, label: &str) -> Option<&Annotation> {
324        // First check document-level annotations
325        self.annotations
326            .iter()
327            .find(|ann| ann.data.label.value == label)
328            .or_else(|| self.root.find_annotation_by_label(label))
329    }
330
331    /// Find the first annotation with `label` whose `Range.origin_path`
332    /// matches `origin`.
333    ///
334    /// Used after include resolution so that footnote references like `[1]`
335    /// scope to the file they were authored in: a `[1]` in `chapter.lex`
336    /// finds `:: 1 ::` defined in `chapter.lex` and not in some other
337    /// included file that happens to also have a `:: 1 ::`. The reference's
338    /// origin is the `Range.origin_path` of the inline's containing node
339    /// (paragraph / session title / etc.).
340    ///
341    /// Unlike [`Document::find_annotation_by_label`], this walker checks
342    /// both *standalone* annotations (children of containers) **and**
343    /// *attached* annotations (those moved into a node's `.annotations`
344    /// field by `AttachAnnotations`). The bare `find_annotation_by_label`
345    /// only sees standalone ones, which is fine for an unresolved tree
346    /// (annotations live in children pre-attachment) but misses footnote
347    /// definitions in any tree that has been through the parser's
348    /// attachment phase. After include resolution every annotation has
349    /// either form, so the more thorough walker is the right one.
350    ///
351    /// `find_annotation_by_label` (without origin filter) remains the
352    /// right call when origin filtering is not needed and the caller
353    /// knows annotations have not been attached.
354    pub fn find_annotation_by_label_in_origin(
355        &self,
356        label: &str,
357        origin: Option<&std::path::Path>,
358    ) -> Option<&Annotation> {
359        fn matches(ann: &Annotation, label: &str, origin: Option<&std::path::Path>) -> bool {
360            ann.data.label.value == label && ann.location.origin() == origin
361        }
362
363        // Document-level annotations first.
364        if let Some(ann) = self.annotations.iter().find(|a| matches(a, label, origin)) {
365            return Some(ann);
366        }
367        // Then walk the body. The recursive helper visits both the
368        // children stream and each node's attached `.annotations` slots.
369        find_annotation_in_session_with_origin(&self.root, label, origin)
370    }
371
372    /// Find all annotations with a matching label.
373    ///
374    /// This searches recursively through all annotations in the document,
375    /// including both document-level annotations and annotations in the content tree.
376    ///
377    /// # Arguments
378    /// * `label` - The label string to search for
379    ///
380    /// # Returns
381    /// A vector of all annotations whose labels match exactly.
382    ///
383    /// # Example
384    /// ```rust,ignore
385    /// // Find all annotations labeled "note"
386    /// let notes = document.find_annotations_by_label("note");
387    /// for note in notes {
388    ///     // Process each note annotation
389    /// }
390    /// ```
391    pub fn find_annotations_by_label(&self, label: &str) -> Vec<&Annotation> {
392        let mut results: Vec<&Annotation> = self
393            .annotations
394            .iter()
395            .filter(|ann| ann.data.label.value == label)
396            .collect();
397
398        results.extend(self.root.find_annotations_by_label(label));
399        results
400    }
401
402    /// Iterate all inline references at any depth.
403    ///
404    /// This method recursively walks the document tree, parses inline content,
405    /// and yields all reference inline nodes (e.g., \[42\], \[@citation\], \[::note\]).
406    ///
407    /// # Returns
408    /// An iterator of references to ReferenceInline nodes
409    ///
410    /// # Example
411    /// ```rust,ignore
412    /// for reference in document.iter_all_references() {
413    ///     match &reference.reference_type {
414    ///         ReferenceType::FootnoteNumber { number } => {
415    ///             // Find annotation with this number
416    ///         }
417    ///         ReferenceType::Citation(data) => {
418    ///             // Process citation
419    ///         }
420    ///         _ => {}
421    ///     }
422    /// }
423    /// ```
424    pub fn iter_all_references(
425        &self,
426    ) -> Box<dyn Iterator<Item = crate::lex::inlines::ReferenceInline> + '_> {
427        let title_refs = self
428            .title
429            .iter()
430            .flat_map(|t| {
431                let title_inlines = t.content.inline_items();
432                let subtitle_inlines = t
433                    .subtitle
434                    .iter()
435                    .flat_map(|s| s.inline_items())
436                    .collect::<Vec<_>>();
437                title_inlines.into_iter().chain(subtitle_inlines)
438            })
439            .filter_map(|node| {
440                if let crate::lex::inlines::InlineNode::Reference { data, .. } = node {
441                    Some(data)
442                } else {
443                    None
444                }
445            });
446        Box::new(title_refs.chain(self.root.iter_all_references()))
447    }
448
449    /// Find all references to a specific target label.
450    ///
451    /// This method searches for inline references that point to the given target.
452    /// For example, find all `[42]` references when looking for footnote "42".
453    ///
454    /// # Arguments
455    /// * `target` - The target label to search for
456    ///
457    /// # Returns
458    /// A vector of references to ReferenceInline nodes that match the target
459    ///
460    /// # Example
461    /// ```rust,ignore
462    /// // Find all references to footnote "42"
463    /// let refs = document.find_references_to("42");
464    /// println!("Found {} references to footnote 42", refs.len());
465    /// ```
466    pub fn find_references_to(&self, target: &str) -> Vec<crate::lex::inlines::ReferenceInline> {
467        self.root.find_references_to(target)
468    }
469}
470
471impl AstNode for Document {
472    fn node_type(&self) -> &'static str {
473        "Document"
474    }
475
476    fn display_label(&self) -> String {
477        format!(
478            "Document ({} annotations, {} items)",
479            self.annotations.len(),
480            self.root.children.len()
481        )
482    }
483
484    fn range(&self) -> &Range {
485        &self.root.location
486    }
487
488    fn accept(&self, visitor: &mut dyn Visitor) {
489        for annotation in &self.annotations {
490            annotation.accept(visitor);
491        }
492        if let Some(title) = &self.title {
493            title.accept(visitor);
494        }
495        self.root.accept(visitor);
496    }
497}
498
499impl Default for Document {
500    fn default() -> Self {
501        Self::new()
502    }
503}
504
505/// Recursive walker for `find_annotation_by_label_in_origin`. Visits both
506/// standalone annotations (in containers' children) and attached
507/// annotations (in each node's `.annotations` slot).
508fn find_annotation_in_session_with_origin<'a>(
509    s: &'a Session,
510    label: &str,
511    origin: Option<&std::path::Path>,
512) -> Option<&'a Annotation> {
513    fn matches(ann: &Annotation, label: &str, origin: Option<&std::path::Path>) -> bool {
514        ann.data.label.value == label && ann.location.origin() == origin
515    }
516    for ann in &s.annotations {
517        if matches(ann, label, origin) {
518            return Some(ann);
519        }
520    }
521    find_annotation_in_items_with_origin(&s.children, label, origin)
522}
523
524fn find_annotation_in_items_with_origin<'a>(
525    items: &'a [ContentItem],
526    label: &str,
527    origin: Option<&std::path::Path>,
528) -> Option<&'a Annotation> {
529    fn matches(ann: &Annotation, label: &str, origin: Option<&std::path::Path>) -> bool {
530        ann.data.label.value == label && ann.location.origin() == origin
531    }
532    for item in items {
533        // Attached annotations live on every node that has a public
534        // `.annotations: Vec<Annotation>` field. Today that's:
535        // Session, Definition, ListItem, Paragraph, List, Table, and
536        // Verbatim (as VerbatimBlock). Missing any of these would let
537        // origin-aware lookups silently skip a valid match.
538        let attached: &[Annotation] = match item {
539            ContentItem::Session(s) => &s.annotations,
540            ContentItem::Definition(d) => &d.annotations,
541            ContentItem::ListItem(li) => &li.annotations,
542            ContentItem::Paragraph(p) => &p.annotations,
543            ContentItem::List(l) => &l.annotations,
544            ContentItem::Table(t) => &t.annotations,
545            ContentItem::VerbatimBlock(v) => &v.annotations,
546            _ => &[],
547        };
548        for ann in attached {
549            if matches(ann, label, origin) {
550                return Some(ann);
551            }
552        }
553        // Standalone annotation directly in the children stream:
554        if let ContentItem::Annotation(a) = item {
555            if matches(a, label, origin) {
556                return Some(a);
557            }
558            // Annotation body can host more annotations.
559            if let Some(found) = find_annotation_in_items_with_origin(&a.children, label, origin) {
560                return Some(found);
561            }
562        }
563        // Recurse into container-style nodes' children:
564        match item {
565            ContentItem::Session(s) => {
566                if let Some(found) = find_annotation_in_session_with_origin(s, label, origin) {
567                    return Some(found);
568                }
569            }
570            ContentItem::Definition(d) => {
571                if let Some(found) =
572                    find_annotation_in_items_with_origin(&d.children, label, origin)
573                {
574                    return Some(found);
575                }
576            }
577            ContentItem::ListItem(li) => {
578                if let Some(found) =
579                    find_annotation_in_items_with_origin(&li.children, label, origin)
580                {
581                    return Some(found);
582                }
583            }
584            ContentItem::List(l) => {
585                if let Some(found) = find_annotation_in_items_with_origin(&l.items, label, origin) {
586                    return Some(found);
587                }
588            }
589            _ => {}
590        }
591    }
592    None
593}
594
595impl fmt::Display for Document {
596    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
597        write!(
598            f,
599            "Document({} annotations, {} items)",
600            self.annotations.len(),
601            self.root.children.len()
602        )
603    }
604}
605
606#[cfg(test)]
607mod tests {
608    use super::super::super::range::Position;
609    use super::super::paragraph::{Paragraph, TextLine};
610    use super::super::session::Session;
611    use super::*;
612    use crate::lex::ast::text_content::TextContent;
613    use crate::lex::ast::traits::AstNode;
614
615    #[test]
616    fn test_document_creation() {
617        let doc = Document::with_content(vec![
618            ContentItem::Paragraph(Paragraph::from_line("Para 1".to_string())),
619            ContentItem::Session(Session::with_title("Section 1".to_string())),
620        ]);
621        assert_eq!(doc.annotations.len(), 0);
622        assert_eq!(doc.root.children.len(), 2);
623    }
624
625    #[test]
626    fn test_document_element_at() {
627        let text_line1 = TextLine::new(TextContent::from_string("First".to_string(), None))
628            .at(Range::new(0..0, Position::new(0, 0), Position::new(0, 5)));
629        let para1 = Paragraph::new(vec![ContentItem::TextLine(text_line1)]).at(Range::new(
630            0..0,
631            Position::new(0, 0),
632            Position::new(0, 5),
633        ));
634
635        let text_line2 = TextLine::new(TextContent::from_string("Second".to_string(), None))
636            .at(Range::new(0..0, Position::new(1, 0), Position::new(1, 6)));
637        let para2 = Paragraph::new(vec![ContentItem::TextLine(text_line2)]).at(Range::new(
638            0..0,
639            Position::new(1, 0),
640            Position::new(1, 6),
641        ));
642
643        let doc = Document::with_content(vec![
644            ContentItem::Paragraph(para1),
645            ContentItem::Paragraph(para2),
646        ]);
647
648        let result = doc.root.element_at(Position::new(1, 3));
649        assert!(result.is_some(), "Expected to find element at position");
650        assert!(result.unwrap().is_text_line());
651    }
652
653    #[test]
654    fn test_document_traits() {
655        let doc = Document::with_content(vec![ContentItem::Paragraph(Paragraph::from_line(
656            "Line".to_string(),
657        ))]);
658
659        assert_eq!(doc.node_type(), "Document");
660        assert_eq!(doc.display_label(), "Document (0 annotations, 1 items)");
661        assert_eq!(doc.root.children.len(), 1);
662    }
663
664    #[test]
665    fn test_root_session_accessors() {
666        let doc = Document::with_content(vec![ContentItem::Session(Session::with_title(
667            "Section".to_string(),
668        ))]);
669
670        assert_eq!(doc.root_session().children.len(), 1);
671
672        let mut doc = doc;
673        doc.root_session_mut().title = TextContent::from_string("Updated".to_string(), None);
674        assert_eq!(doc.root_session().title.as_string(), "Updated");
675
676        let root = doc.into_root();
677        assert_eq!(root.title.as_string(), "Updated");
678    }
679
680    #[test]
681    fn test_document_title_field() {
682        let mut doc = Document::new();
683        assert!(doc.title.is_none());
684        assert_eq!(doc.title(), "");
685
686        doc.set_title("My Title".to_string());
687        assert!(doc.title.is_some());
688        assert_eq!(doc.title(), "My Title");
689
690        doc.set_title(String::new());
691        assert!(doc.title.is_none());
692        assert_eq!(doc.title(), "");
693    }
694
695    #[test]
696    fn test_from_title_and_root() {
697        let title = DocumentTitle::from_string("Test Title".to_string(), Range::default());
698        let root = Session::with_title(String::new());
699        let doc = Document::from_title_and_root(Some(title), root);
700        assert_eq!(doc.title(), "Test Title");
701    }
702}