Skip to main content

mos_parse/
syntax.rs

1use std::path::PathBuf;
2
3use mos_core::{Diagnostic, Severity, SourceSpan};
4
5/// Concrete syntax tree for a single `.mos` source file.
6#[derive(Debug, Clone)]
7pub struct SyntaxTree {
8    pub file: PathBuf,
9    pub items: Vec<Item>,
10}
11
12/// Top-level construct in a `.mos` file.
13#[derive(Debug, Clone)]
14pub enum Item {
15    /// `= Title`, `== Subtitle`, `=== Subsubtitle`. A trailing
16    /// `<label>` token after the title attaches to this heading.
17    Heading {
18        level: u8,
19        inlines: Vec<Inline>,
20        label: Option<String>,
21        label_span: Option<SourceSpan>,
22        span: SourceSpan,
23    },
24    /// One or more consecutive non-blank lines that are not a heading
25    /// and not a `#set` block. A leading `<label>` token (possibly
26    /// preceded by ASCII whitespace) attaches to this paragraph.
27    Paragraph {
28        inlines: Vec<Inline>,
29        label: Option<String>,
30        label_span: Option<SourceSpan>,
31        span: SourceSpan,
32    },
33    /// `#set name(...)`, `#image(...)`, `#figure(...)`. The body is
34    /// lexed into typed `(key, value)` args; semantic validation
35    /// (known target/key, type coercion, sanity floors) happens in
36    /// the lowerer. `kind` distinguishes the `#set`-style configuration
37    /// directive from standalone calls like `#image` and `#figure`,
38    /// which the lowerer dispatches to dedicated paths.
39    Set {
40        kind: DirectiveKind,
41        name: String,
42        args: Vec<SetArg>,
43        span: SourceSpan,
44    },
45    /// Raw preformatted text or code block. Both forms preserve their
46    /// long-bracket body as text; the kind leaves room for later styling
47    /// or language-aware code rendering.
48    RawBlock {
49        kind: RawBlockKind,
50        args: Vec<SetArg>,
51        text: String,
52        label: Option<String>,
53        label_span: Option<SourceSpan>,
54        span: SourceSpan,
55    },
56    /// A bullet (`- `) or numbered (`\d+\. `) list. Sibling items at
57    /// the same indent are grouped under one list; deeper indents
58    /// become nested lists hanging off the most recent item. Numbered
59    /// lists always renumber from 1 in MVP — explicit `start: N` is
60    /// deferred.
61    List {
62        ordered: bool,
63        items: Vec<ListItem>,
64        span: SourceSpan,
65    },
66}
67
68/// One entry inside an [`Item::List`]. `inlines` is the item's own
69/// text (markers stripped, parsed with the same inline tokenizer as
70/// paragraphs); `children` carries nested blocks, currently restricted
71/// to further [`Item::List`]s per the MVP scope.
72#[derive(Debug, Clone)]
73pub struct ListItem {
74    pub inlines: Vec<Inline>,
75    pub children: Vec<Item>,
76    pub span: SourceSpan,
77}
78
79/// Tag for the directive shapes [`Item::Set`] can represent — the
80/// `#set <target>(...)` configuration directive vs the standalone
81/// `#image(...)`, `#figure(...)`, and `#bibliography(...)` calls. The
82/// lowerer dispatches on this rather than the [`Item::Set::name`] string
83/// so `#set image(...)` can never collide with `#image(...)`.
84#[derive(Debug, Clone, Copy, Eq, PartialEq)]
85pub enum DirectiveKind {
86    /// `#set <name>(...)` — sets defaults on a style target.
87    Set,
88    /// `#image("path", ...)` — raster image directive.
89    Image,
90    /// `#figure(image: ..., caption: ...)` — captioned image container.
91    Figure,
92    /// `#bibliography("refs.bib")` — declares a bibliography source
93    /// database. The lowerer records the (source-relative) path so a
94    /// later BibTeX-parsing slice can read it; citation resolution and
95    /// rendering are not part of this directive.
96    Bibliography,
97}
98
99#[derive(Debug, Clone, Copy, Eq, PartialEq)]
100pub enum RawBlockKind {
101    Pre,
102    Code,
103}
104
105/// Borrowed view of an [`Item::RawBlock`] payload.
106#[derive(Debug, Clone, Copy)]
107pub struct RawBlockView<'a> {
108    pub kind: RawBlockKind,
109    pub args: &'a [SetArg],
110    pub text: &'a str,
111    pub label: Option<&'a str>,
112    pub label_span: Option<&'a SourceSpan>,
113    pub span: &'a SourceSpan,
114}
115
116/// One argument inside a directive body — either a `key: value`
117/// pair (the only form `#set` accepts) or a positional value (a
118/// leading string literal allowed on `#image(...)` / `#figure(...)`).
119///
120/// This used to be a struct with an empty-string `key` standing in
121/// for "positional," but that sentinel was a brittle public contract:
122/// any consumer that forgot the special-case would silently treat a
123/// positional path as a named arg called `""`. The enum form makes
124/// the two shapes explicit so the compiler can enforce exhaustive
125/// matches.
126#[derive(Debug, Clone)]
127pub enum SetArg {
128    /// A `key: value` argument. `key_span` covers the identifier
129    /// before the colon; `value_span` covers the literal.
130    Named {
131        key: String,
132        value: SetValue,
133        key_span: SourceSpan,
134        value_span: SourceSpan,
135    },
136    /// A leading positional value. The parser currently only accepts
137    /// string literals here (used for `#image("path.png")`); other
138    /// literal kinds in a positional slot would surface as a parse
139    /// error rather than land in this variant.
140    Positional {
141        value: SetValue,
142        value_span: SourceSpan,
143    },
144}
145
146impl SetArg {
147    /// Borrow the value carried by this argument, regardless of shape.
148    #[must_use]
149    pub fn value(&self) -> &SetValue {
150        match self {
151            Self::Named { value, .. } | Self::Positional { value, .. } => value,
152        }
153    }
154
155    /// The span covering the argument's value literal.
156    #[must_use]
157    pub fn value_span(&self) -> &SourceSpan {
158        match self {
159            Self::Named { value_span, .. } | Self::Positional { value_span, .. } => value_span,
160        }
161    }
162
163    /// The key identifier for [`Self::Named`]; `None` for
164    /// [`Self::Positional`].
165    #[must_use]
166    pub fn key(&self) -> Option<&str> {
167        match self {
168            Self::Named { key, .. } => Some(key.as_str()),
169            Self::Positional { .. } => None,
170        }
171    }
172
173    /// The span covering the key identifier, for [`Self::Named`].
174    /// `None` for [`Self::Positional`].
175    #[must_use]
176    pub fn key_span(&self) -> Option<&SourceSpan> {
177        match self {
178            Self::Named { key_span, .. } => Some(key_span),
179            Self::Positional { .. } => None,
180        }
181    }
182}
183
184/// Literal values recognised inside a `#set` body. Full expression
185/// evaluation (`#let`, function calls, `if`) is deferred to MVP 5; this
186/// covers what the manifest examples actually use.
187#[derive(Debug, Clone, PartialEq)]
188pub enum SetValue {
189    Str(String),
190    Int(i64),
191    Float(f64),
192    Length(f64, LengthUnit),
193    Ident(String),
194}
195
196#[derive(Debug, Clone, Copy, Eq, PartialEq)]
197pub enum LengthUnit {
198    Mm,
199    Pt,
200    Em,
201}
202
203/// Inline run produced by the markup tokenizer.
204#[derive(Debug, Clone)]
205pub struct Inline {
206    pub kind: InlineKind,
207    pub text: String,
208    pub span: SourceSpan,
209}
210
211#[derive(Debug, Clone, Copy, Eq, PartialEq)]
212pub enum InlineKind {
213    Text,
214    Emphasis,
215    Strong,
216    BoldItalic,
217    Code,
218    /// `@label` — a cross-reference to a labelled block. The
219    /// [`Inline::text`] payload is the bare label name (no leading
220    /// `@`); the resolver rewrites it to the target's resolved text.
221    Reference,
222    /// `[@key]` — a citation to a bibliography entry. The
223    /// [`Inline::text`] payload is the bare citation key (no leading
224    /// `[@` or trailing `]`); bibliography loading and rendering are
225    /// future work tracked under MVP 4. The key alphabet matches the
226    /// label alphabet (`[A-Za-z0-9_:.-]`); a single key per
227    /// `[@…]` group is the only form recognised in this slice — list
228    /// forms like `[@a; @b]` and prefix/suffix bodies are deferred.
229    Citation,
230    /// `\\` — a forced line break inside a paragraph. The line
231    /// breaks here without the extra leading a blank-line paragraph
232    /// break would give. Carries no text payload. The shorthand for
233    /// a soft hyphen `\-` lowers to a literal U+00AD inside a
234    /// surrounding [`InlineKind::Text`] run, not to a separate variant.
235    HardBreak,
236}
237
238impl Item {
239    /// Borrow the heading payload if `self` is [`Item::Heading`].
240    #[must_use]
241    pub fn as_heading(&self) -> Option<(u8, &[Inline], &SourceSpan)> {
242        if let Self::Heading {
243            level,
244            inlines,
245            span,
246            ..
247        } = self
248        {
249            Some((*level, inlines, span))
250        } else {
251            None
252        }
253    }
254
255    /// Borrow the paragraph payload if `self` is [`Item::Paragraph`].
256    #[must_use]
257    pub fn as_paragraph(&self) -> Option<(&[Inline], &SourceSpan)> {
258        if let Self::Paragraph { inlines, span, .. } = self {
259            Some((inlines, span))
260        } else {
261            None
262        }
263    }
264
265    /// Borrow the directive payload if `self` is [`Item::Set`].
266    ///
267    /// The returned tuple is `(name, args, span)`; the caller can also
268    /// reach [`DirectiveKind`] via [`Self::directive_kind`]. The
269    /// accessor name is retained for back-compat — every existing
270    /// caller pre-dates the `#image`/`#figure` directives and only
271    /// looks at name/args/span.
272    #[must_use]
273    pub fn as_set(&self) -> Option<(&str, &[SetArg], &SourceSpan)> {
274        if let Self::Set {
275            name, args, span, ..
276        } = self
277        {
278            Some((name.as_str(), args.as_slice(), span))
279        } else {
280            None
281        }
282    }
283
284    /// Borrow the raw block payload if `self` is [`Item::RawBlock`].
285    #[must_use]
286    pub fn as_raw_block(&self) -> Option<RawBlockView<'_>> {
287        if let Self::RawBlock {
288            kind,
289            args,
290            text,
291            label,
292            label_span,
293            span,
294        } = self
295        {
296            Some(RawBlockView {
297                kind: *kind,
298                args: args.as_slice(),
299                text: text.as_str(),
300                label: label.as_deref(),
301                label_span: label_span.as_ref(),
302                span,
303            })
304        } else {
305            None
306        }
307    }
308
309    /// Borrow the [`DirectiveKind`] tag if `self` is [`Item::Set`].
310    #[must_use]
311    pub fn directive_kind(&self) -> Option<DirectiveKind> {
312        if let Self::Set { kind, .. } = self {
313            Some(*kind)
314        } else {
315            None
316        }
317    }
318
319    /// Borrow the list payload if `self` is [`Item::List`]. The
320    /// returned tuple is `(ordered, items, span)`.
321    #[must_use]
322    pub fn as_list(&self) -> Option<(bool, &[ListItem], &SourceSpan)> {
323        if let Self::List {
324            ordered,
325            items,
326            span,
327        } = self
328        {
329            Some((*ordered, items.as_slice(), span))
330        } else {
331            None
332        }
333    }
334
335    /// Borrow the explicit `<label>` attached to this block, if any.
336    /// Returns `None` for [`Item::Set`] and [`Item::List`] (label
337    /// syntax is not yet defined on those blocks).
338    #[must_use]
339    pub fn label(&self) -> Option<&str> {
340        match self {
341            Self::Heading { label, .. }
342            | Self::Paragraph { label, .. }
343            | Self::RawBlock { label, .. } => label.as_deref(),
344            Self::Set { .. } | Self::List { .. } => None,
345        }
346    }
347
348    /// Borrow the source span covering only the label token text, if any.
349    /// The delimiters (`<`, `>`, or directive string quotes) are excluded so a
350    /// structured suggestion can replace just the label bytes.
351    #[must_use]
352    pub fn label_span(&self) -> Option<&SourceSpan> {
353        match self {
354            Self::Heading { label_span, .. }
355            | Self::Paragraph { label_span, .. }
356            | Self::RawBlock { label_span, .. } => label_span.as_ref(),
357            Self::Set { .. } | Self::List { .. } => None,
358        }
359    }
360}
361
362/// Output of [`crate::parse`]. Diagnostics may include warnings even
363/// when the tree is structurally usable; callers decide what to do per
364/// [`ParseResult::has_errors`].
365#[derive(Debug)]
366pub struct ParseResult {
367    pub tree: SyntaxTree,
368    pub diagnostics: Vec<Diagnostic>,
369}
370
371impl ParseResult {
372    #[must_use]
373    pub fn has_errors(&self) -> bool {
374        self.diagnostics
375            .iter()
376            .any(|d| d.severity() == Severity::Error)
377    }
378}