Skip to main content

lex_core/lex/ast/elements/
document.rs

1//! Document element
2//!
3//!     The document node serves two purposes:
4//!         - Contains the document tree.
5//!         - Contains document-level annotations, including non-content metadata (like file name,
6//!           parser version, etc).
7//!
8//!     Lex documents are plain text, utf-8 encoded files with the file extension .lex. Line width
9//!     is not limited, and is considered a presentation detail. Best practice dictates only
10//!     limiting line length when publishing, not while authoring.
11//!
12//!     The document node holds the document metadata and the content's root node, which is a
13//!     session node. The structure of the document then is a tree of sessions, which can be nested
14//!     arbitrarily. This creates powerful addressing capabilities as one can target any sub-session
15//!     from an index.
16//!
17//!     Document Title:
18//!     The document title is a first-class element, represented as a dedicated `DocumentTitle`
19//!     AST node owned directly by the `Document`. It is parsed from a single unindented line
20//!     at the start of the document, followed by blank lines, where no indented content follows
21//!     (distinguishing it from a session title). See specs/elements/document.lex.
22//!
23//!     Document Start:
24//!     A synthetic `DocumentStart` token is used to mark the boundary between document-level
25//!     annotations (metadata) and the actual document content. This allows the parser and
26//!     assembly logic to correctly identify where the body begins.
27//!
28//!     For more details on document structure and sessions, see the [ast](crate::lex::ast) module.
29//!
30//! Learn More:
31//! - Paragraphs: specs/v1/elements/paragraph.lex
32//! - Lists: specs/v1/elements/list.lex
33//! - Sessions: specs/v1/elements/session.lex
34//! - Annotations: specs/v1/elements/annotation.lex
35//! - Definitions: specs/v1/elements/definition.lex
36//! - Verbatim blocks: specs/v1/elements/verbatim.lex
37//!
38//! Examples:
39//! - Document-level metadata via annotations
40//! - All body content accessible via document.root.children
41
42use super::super::range::{Position, Range};
43use super::super::text_content::TextContent;
44use super::super::traits::{AstNode, Container, Visitor};
45use super::annotation::Annotation;
46use super::content_item::ContentItem;
47use super::session::Session;
48use super::typed_content;
49use std::fmt;
50
51/// A first-class document title element.
52///
53/// Represents the title of a Lex document — a single unindented line at the start
54/// of the document, followed by blank lines, with no indented content after.
55/// This is distinct from session titles.
56///
57/// An optional subtitle is supported: when the title line ends with a colon and a
58/// second non-blank, non-indented line follows before the blank separator, the
59/// second line is parsed as a subtitle. The trailing colon is structural (stripped
60/// from the title content).
61#[derive(Debug, Clone, PartialEq)]
62pub struct DocumentTitle {
63    pub content: TextContent,
64    pub subtitle: Option<TextContent>,
65    pub location: Range,
66}
67
68impl DocumentTitle {
69    pub fn new(content: TextContent, location: Range) -> Self {
70        Self {
71            content,
72            subtitle: None,
73            location,
74        }
75    }
76
77    pub fn with_subtitle(content: TextContent, subtitle: TextContent, location: Range) -> Self {
78        Self {
79            content,
80            subtitle: Some(subtitle),
81            location,
82        }
83    }
84
85    pub fn from_string(text: String, location: Range) -> Self {
86        Self {
87            content: TextContent::from_string(text, Some(location.clone())),
88            subtitle: None,
89            location,
90        }
91    }
92
93    pub fn as_str(&self) -> &str {
94        self.content.as_string()
95    }
96
97    pub fn subtitle_str(&self) -> Option<&str> {
98        self.subtitle.as_ref().map(|s| s.as_string())
99    }
100}
101
102impl AstNode for DocumentTitle {
103    fn node_type(&self) -> &'static str {
104        "DocumentTitle"
105    }
106
107    fn display_label(&self) -> String {
108        match &self.subtitle {
109            Some(sub) => format!(
110                "DocumentTitle(\"{}\", subtitle: \"{}\")",
111                self.as_str(),
112                sub.as_string()
113            ),
114            None => format!("DocumentTitle(\"{}\")", self.as_str()),
115        }
116    }
117
118    fn range(&self) -> &Range {
119        &self.location
120    }
121
122    fn accept(&self, _visitor: &mut dyn Visitor) {}
123}
124
125#[derive(Debug, Clone, PartialEq)]
126pub struct Document {
127    pub annotations: Vec<Annotation>,
128    pub title: Option<DocumentTitle>,
129    // all content is attached to the root node
130    pub root: Session,
131}
132
133impl Document {
134    pub fn new() -> Self {
135        Self {
136            annotations: Vec::new(),
137            title: None,
138            root: Session::with_title(String::new()),
139        }
140    }
141
142    pub fn with_content(content: Vec<ContentItem>) -> Self {
143        let mut root = Session::with_title(String::new());
144        let session_content = typed_content::into_session_contents(content);
145        root.children = super::container::SessionContainer::from_typed(session_content);
146        Self {
147            annotations: Vec::new(),
148            title: None,
149            root,
150        }
151    }
152
153    /// Construct a document from an existing root session.
154    pub fn from_root(root: Session) -> Self {
155        Self {
156            annotations: Vec::new(),
157            title: None,
158            root,
159        }
160    }
161
162    /// Construct a document from a title and root session.
163    pub fn from_title_and_root(title: Option<DocumentTitle>, root: Session) -> Self {
164        Self {
165            annotations: Vec::new(),
166            title,
167            root,
168        }
169    }
170
171    pub fn with_annotations_and_content(
172        annotations: Vec<Annotation>,
173        content: Vec<ContentItem>,
174    ) -> Self {
175        let mut root = Session::with_title(String::new());
176        let session_content = typed_content::into_session_contents(content);
177        root.children = super::container::SessionContainer::from_typed(session_content);
178        Self {
179            annotations,
180            title: None,
181            root,
182        }
183    }
184
185    pub fn with_root_location(mut self, location: Range) -> Self {
186        self.root.location = location;
187        self
188    }
189
190    pub fn root_session(&self) -> &Session {
191        &self.root
192    }
193
194    pub fn root_session_mut(&mut self) -> &mut Session {
195        &mut self.root
196    }
197
198    pub fn into_root(self) -> Session {
199        self.root
200    }
201
202    /// Get the document title text.
203    ///
204    /// Returns the title string if a DocumentTitle is present, empty string otherwise.
205    pub fn title(&self) -> &str {
206        match &self.title {
207            Some(dt) => dt.as_str(),
208            None => "",
209        }
210    }
211
212    /// Set the document title.
213    pub fn set_title(&mut self, title: String) {
214        if title.is_empty() {
215            self.title = None;
216        } else {
217            let location = Range::default();
218            self.title = Some(DocumentTitle::from_string(title, location));
219        }
220    }
221
222    /// Returns the path of nodes at the given position, starting from the document
223    pub fn node_path_at_position(&self, pos: Position) -> Vec<&dyn AstNode> {
224        let path = self.root.node_path_at_position(pos);
225        if !path.is_empty() {
226            let mut nodes: Vec<&dyn AstNode> = Vec::with_capacity(path.len() + 1);
227            nodes.push(self);
228            nodes.extend(path);
229            nodes
230        } else {
231            Vec::new()
232        }
233    }
234
235    /// Returns the deepest (most nested) element that contains the position
236    pub fn element_at(&self, pos: Position) -> Option<&ContentItem> {
237        self.root.element_at(pos)
238    }
239
240    /// Returns the visual line element at the given position
241    pub fn visual_line_at(&self, pos: Position) -> Option<&ContentItem> {
242        self.root.visual_line_at(pos)
243    }
244
245    /// Returns the block element at the given position
246    pub fn block_element_at(&self, pos: Position) -> Option<&ContentItem> {
247        self.root.block_element_at(pos)
248    }
249
250    /// All annotations attached directly to the document (document-level metadata).
251    pub fn annotations(&self) -> &[Annotation] {
252        &self.annotations
253    }
254
255    /// Mutable access to document-level annotations.
256    pub fn annotations_mut(&mut self) -> &mut Vec<Annotation> {
257        &mut self.annotations
258    }
259
260    /// Iterate over document-level annotation blocks in source order.
261    pub fn iter_annotations(&self) -> std::slice::Iter<'_, Annotation> {
262        self.annotations.iter()
263    }
264
265    /// Iterate over all content items nested inside document-level annotations.
266    pub fn iter_annotation_contents(&self) -> impl Iterator<Item = &ContentItem> {
267        self.annotations
268            .iter()
269            .flat_map(|annotation| annotation.children())
270    }
271
272    // ========================================================================
273    // REFERENCE RESOLUTION APIs (Issue #291)
274    // Delegates to the root session
275    // ========================================================================
276
277    /// Find the first annotation with a matching label.
278    ///
279    /// This searches recursively through all annotations in the document,
280    /// including both document-level annotations and annotations in the content tree.
281    ///
282    /// # Arguments
283    /// * `label` - The label string to search for
284    ///
285    /// # Returns
286    /// The first annotation whose label matches exactly, or None if not found.
287    ///
288    /// # Example
289    /// ```rust,ignore
290    /// // Find annotation with label "42" for reference [42]
291    /// if let Some(annotation) = document.find_annotation_by_label("42") {
292    ///     // Jump to this annotation in go-to-definition
293    /// }
294    /// ```
295    pub fn find_annotation_by_label(&self, label: &str) -> Option<&Annotation> {
296        // First check document-level annotations
297        self.annotations
298            .iter()
299            .find(|ann| ann.data.label.value == label)
300            .or_else(|| self.root.find_annotation_by_label(label))
301    }
302
303    /// Find the first annotation with `label` whose `Range.origin_path`
304    /// matches `origin`.
305    ///
306    /// Used after include resolution so that footnote references like `[1]`
307    /// scope to the file they were authored in: a `[1]` in `chapter.lex`
308    /// finds `:: 1 ::` defined in `chapter.lex` and not in some other
309    /// included file that happens to also have a `:: 1 ::`. The reference's
310    /// origin is the `Range.origin_path` of the inline's containing node
311    /// (paragraph / session title / etc.).
312    ///
313    /// Unlike [`Document::find_annotation_by_label`], this walker checks
314    /// both *standalone* annotations (children of containers) **and**
315    /// *attached* annotations (those moved into a node's `.annotations`
316    /// field by `AttachAnnotations`). The bare `find_annotation_by_label`
317    /// only sees standalone ones, which is fine for an unresolved tree
318    /// (annotations live in children pre-attachment) but misses footnote
319    /// definitions in any tree that has been through the parser's
320    /// attachment phase. After include resolution every annotation has
321    /// either form, so the more thorough walker is the right one.
322    ///
323    /// `find_annotation_by_label` (without origin filter) remains the
324    /// right call when origin filtering is not needed and the caller
325    /// knows annotations have not been attached.
326    pub fn find_annotation_by_label_in_origin(
327        &self,
328        label: &str,
329        origin: Option<&std::path::Path>,
330    ) -> Option<&Annotation> {
331        fn matches(ann: &Annotation, label: &str, origin: Option<&std::path::Path>) -> bool {
332            ann.data.label.value == label && ann.location.origin() == origin
333        }
334
335        // Document-level annotations first.
336        if let Some(ann) = self.annotations.iter().find(|a| matches(a, label, origin)) {
337            return Some(ann);
338        }
339        // Then walk the body. The recursive helper visits both the
340        // children stream and each node's attached `.annotations` slots.
341        find_annotation_in_session_with_origin(&self.root, label, origin)
342    }
343
344    /// Find all annotations with a matching label.
345    ///
346    /// This searches recursively through all annotations in the document,
347    /// including both document-level annotations and annotations in the content tree.
348    ///
349    /// # Arguments
350    /// * `label` - The label string to search for
351    ///
352    /// # Returns
353    /// A vector of all annotations whose labels match exactly.
354    ///
355    /// # Example
356    /// ```rust,ignore
357    /// // Find all annotations labeled "note"
358    /// let notes = document.find_annotations_by_label("note");
359    /// for note in notes {
360    ///     // Process each note annotation
361    /// }
362    /// ```
363    pub fn find_annotations_by_label(&self, label: &str) -> Vec<&Annotation> {
364        let mut results: Vec<&Annotation> = self
365            .annotations
366            .iter()
367            .filter(|ann| ann.data.label.value == label)
368            .collect();
369
370        results.extend(self.root.find_annotations_by_label(label));
371        results
372    }
373
374    /// Iterate all inline references at any depth.
375    ///
376    /// This method recursively walks the document tree, parses inline content,
377    /// and yields all reference inline nodes (e.g., \[42\], \[@citation\], \[::note\]).
378    ///
379    /// # Returns
380    /// An iterator of references to ReferenceInline nodes
381    ///
382    /// # Example
383    /// ```rust,ignore
384    /// for reference in document.iter_all_references() {
385    ///     match &reference.reference_type {
386    ///         ReferenceType::FootnoteNumber { number } => {
387    ///             // Find annotation with this number
388    ///         }
389    ///         ReferenceType::Citation(data) => {
390    ///             // Process citation
391    ///         }
392    ///         _ => {}
393    ///     }
394    /// }
395    /// ```
396    pub fn iter_all_references(
397        &self,
398    ) -> Box<dyn Iterator<Item = crate::lex::inlines::ReferenceInline> + '_> {
399        let title_refs = self
400            .title
401            .iter()
402            .flat_map(|t| {
403                let title_inlines = t.content.inline_items();
404                let subtitle_inlines = t
405                    .subtitle
406                    .iter()
407                    .flat_map(|s| s.inline_items())
408                    .collect::<Vec<_>>();
409                title_inlines.into_iter().chain(subtitle_inlines)
410            })
411            .filter_map(|node| {
412                if let crate::lex::inlines::InlineNode::Reference { data, .. } = node {
413                    Some(data)
414                } else {
415                    None
416                }
417            });
418        Box::new(title_refs.chain(self.root.iter_all_references()))
419    }
420
421    /// Find all references to a specific target label.
422    ///
423    /// This method searches for inline references that point to the given target.
424    /// For example, find all `[42]` references when looking for footnote "42".
425    ///
426    /// # Arguments
427    /// * `target` - The target label to search for
428    ///
429    /// # Returns
430    /// A vector of references to ReferenceInline nodes that match the target
431    ///
432    /// # Example
433    /// ```rust,ignore
434    /// // Find all references to footnote "42"
435    /// let refs = document.find_references_to("42");
436    /// println!("Found {} references to footnote 42", refs.len());
437    /// ```
438    pub fn find_references_to(&self, target: &str) -> Vec<crate::lex::inlines::ReferenceInline> {
439        self.root.find_references_to(target)
440    }
441}
442
443impl AstNode for Document {
444    fn node_type(&self) -> &'static str {
445        "Document"
446    }
447
448    fn display_label(&self) -> String {
449        format!(
450            "Document ({} annotations, {} items)",
451            self.annotations.len(),
452            self.root.children.len()
453        )
454    }
455
456    fn range(&self) -> &Range {
457        &self.root.location
458    }
459
460    fn accept(&self, visitor: &mut dyn Visitor) {
461        for annotation in &self.annotations {
462            annotation.accept(visitor);
463        }
464        if let Some(title) = &self.title {
465            title.accept(visitor);
466        }
467        self.root.accept(visitor);
468    }
469}
470
471impl Default for Document {
472    fn default() -> Self {
473        Self::new()
474    }
475}
476
477/// Recursive walker for `find_annotation_by_label_in_origin`. Visits both
478/// standalone annotations (in containers' children) and attached
479/// annotations (in each node's `.annotations` slot).
480fn find_annotation_in_session_with_origin<'a>(
481    s: &'a Session,
482    label: &str,
483    origin: Option<&std::path::Path>,
484) -> Option<&'a Annotation> {
485    fn matches(ann: &Annotation, label: &str, origin: Option<&std::path::Path>) -> bool {
486        ann.data.label.value == label && ann.location.origin() == origin
487    }
488    for ann in &s.annotations {
489        if matches(ann, label, origin) {
490            return Some(ann);
491        }
492    }
493    find_annotation_in_items_with_origin(&s.children, label, origin)
494}
495
496fn find_annotation_in_items_with_origin<'a>(
497    items: &'a [ContentItem],
498    label: &str,
499    origin: Option<&std::path::Path>,
500) -> Option<&'a Annotation> {
501    fn matches(ann: &Annotation, label: &str, origin: Option<&std::path::Path>) -> bool {
502        ann.data.label.value == label && ann.location.origin() == origin
503    }
504    for item in items {
505        // Attached annotations live on every node that has a public
506        // `.annotations: Vec<Annotation>` field. Today that's:
507        // Session, Definition, ListItem, Paragraph, List, Table, and
508        // Verbatim (as VerbatimBlock). Missing any of these would let
509        // origin-aware lookups silently skip a valid match.
510        let attached: &[Annotation] = match item {
511            ContentItem::Session(s) => &s.annotations,
512            ContentItem::Definition(d) => &d.annotations,
513            ContentItem::ListItem(li) => &li.annotations,
514            ContentItem::Paragraph(p) => &p.annotations,
515            ContentItem::List(l) => &l.annotations,
516            ContentItem::Table(t) => &t.annotations,
517            ContentItem::VerbatimBlock(v) => &v.annotations,
518            _ => &[],
519        };
520        for ann in attached {
521            if matches(ann, label, origin) {
522                return Some(ann);
523            }
524        }
525        // Standalone annotation directly in the children stream:
526        if let ContentItem::Annotation(a) = item {
527            if matches(a, label, origin) {
528                return Some(a);
529            }
530            // Annotation body can host more annotations.
531            if let Some(found) = find_annotation_in_items_with_origin(&a.children, label, origin) {
532                return Some(found);
533            }
534        }
535        // Recurse into container-style nodes' children:
536        match item {
537            ContentItem::Session(s) => {
538                if let Some(found) = find_annotation_in_session_with_origin(s, label, origin) {
539                    return Some(found);
540                }
541            }
542            ContentItem::Definition(d) => {
543                if let Some(found) =
544                    find_annotation_in_items_with_origin(&d.children, label, origin)
545                {
546                    return Some(found);
547                }
548            }
549            ContentItem::ListItem(li) => {
550                if let Some(found) =
551                    find_annotation_in_items_with_origin(&li.children, label, origin)
552                {
553                    return Some(found);
554                }
555            }
556            ContentItem::List(l) => {
557                if let Some(found) = find_annotation_in_items_with_origin(&l.items, label, origin) {
558                    return Some(found);
559                }
560            }
561            _ => {}
562        }
563    }
564    None
565}
566
567impl fmt::Display for Document {
568    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
569        write!(
570            f,
571            "Document({} annotations, {} items)",
572            self.annotations.len(),
573            self.root.children.len()
574        )
575    }
576}
577
578#[cfg(test)]
579mod tests {
580    use super::super::super::range::Position;
581    use super::super::paragraph::{Paragraph, TextLine};
582    use super::super::session::Session;
583    use super::*;
584    use crate::lex::ast::text_content::TextContent;
585    use crate::lex::ast::traits::AstNode;
586
587    #[test]
588    fn test_document_creation() {
589        let doc = Document::with_content(vec![
590            ContentItem::Paragraph(Paragraph::from_line("Para 1".to_string())),
591            ContentItem::Session(Session::with_title("Section 1".to_string())),
592        ]);
593        assert_eq!(doc.annotations.len(), 0);
594        assert_eq!(doc.root.children.len(), 2);
595    }
596
597    #[test]
598    fn test_document_element_at() {
599        let text_line1 = TextLine::new(TextContent::from_string("First".to_string(), None))
600            .at(Range::new(0..0, Position::new(0, 0), Position::new(0, 5)));
601        let para1 = Paragraph::new(vec![ContentItem::TextLine(text_line1)]).at(Range::new(
602            0..0,
603            Position::new(0, 0),
604            Position::new(0, 5),
605        ));
606
607        let text_line2 = TextLine::new(TextContent::from_string("Second".to_string(), None))
608            .at(Range::new(0..0, Position::new(1, 0), Position::new(1, 6)));
609        let para2 = Paragraph::new(vec![ContentItem::TextLine(text_line2)]).at(Range::new(
610            0..0,
611            Position::new(1, 0),
612            Position::new(1, 6),
613        ));
614
615        let doc = Document::with_content(vec![
616            ContentItem::Paragraph(para1),
617            ContentItem::Paragraph(para2),
618        ]);
619
620        let result = doc.root.element_at(Position::new(1, 3));
621        assert!(result.is_some(), "Expected to find element at position");
622        assert!(result.unwrap().is_text_line());
623    }
624
625    #[test]
626    fn test_document_traits() {
627        let doc = Document::with_content(vec![ContentItem::Paragraph(Paragraph::from_line(
628            "Line".to_string(),
629        ))]);
630
631        assert_eq!(doc.node_type(), "Document");
632        assert_eq!(doc.display_label(), "Document (0 annotations, 1 items)");
633        assert_eq!(doc.root.children.len(), 1);
634    }
635
636    #[test]
637    fn test_root_session_accessors() {
638        let doc = Document::with_content(vec![ContentItem::Session(Session::with_title(
639            "Section".to_string(),
640        ))]);
641
642        assert_eq!(doc.root_session().children.len(), 1);
643
644        let mut doc = doc;
645        doc.root_session_mut().title = TextContent::from_string("Updated".to_string(), None);
646        assert_eq!(doc.root_session().title.as_string(), "Updated");
647
648        let root = doc.into_root();
649        assert_eq!(root.title.as_string(), "Updated");
650    }
651
652    #[test]
653    fn test_document_title_field() {
654        let mut doc = Document::new();
655        assert!(doc.title.is_none());
656        assert_eq!(doc.title(), "");
657
658        doc.set_title("My Title".to_string());
659        assert!(doc.title.is_some());
660        assert_eq!(doc.title(), "My Title");
661
662        doc.set_title(String::new());
663        assert!(doc.title.is_none());
664        assert_eq!(doc.title(), "");
665    }
666
667    #[test]
668    fn test_from_title_and_root() {
669        let title = DocumentTitle::from_string("Test Title".to_string(), Range::default());
670        let root = Session::with_title(String::new());
671        let doc = Document::from_title_and_root(Some(title), root);
672        assert_eq!(doc.title(), "Test Title");
673    }
674}