mos_parse/syntax.rs
1use std::path::PathBuf;
2
3use mos_core::{Diagnostic, Severity, SourceSpan};
4
5/// Concrete syntax tree for a single `.mos` source file.
6#[derive(Debug, Clone)]
7pub struct SyntaxTree {
8 pub file: PathBuf,
9 pub items: Vec<Item>,
10}
11
12/// Top-level construct in a `.mos` file.
13#[derive(Debug, Clone)]
14pub enum Item {
15 /// `= Title`, `== Subtitle`, `=== Subsubtitle`. A trailing
16 /// `<label>` token after the title attaches to this heading.
17 Heading {
18 level: u8,
19 inlines: Vec<Inline>,
20 label: Option<String>,
21 label_span: Option<SourceSpan>,
22 span: SourceSpan,
23 },
24 /// One or more consecutive non-blank lines that are not a heading
25 /// and not a `#set` block. A leading `<label>` token (possibly
26 /// preceded by ASCII whitespace) attaches to this paragraph.
27 Paragraph {
28 inlines: Vec<Inline>,
29 label: Option<String>,
30 label_span: Option<SourceSpan>,
31 span: SourceSpan,
32 },
33 /// `#set name(...)`, `#image(...)`, `#figure(...)`. The body is
34 /// lexed into typed `(key, value)` args; semantic validation
35 /// (known target/key, type coercion, sanity floors) happens in
36 /// the lowerer. `kind` distinguishes the `#set`-style configuration
37 /// directive from standalone calls like `#image` and `#figure`,
38 /// which the lowerer dispatches to dedicated paths.
39 Set {
40 kind: DirectiveKind,
41 name: String,
42 args: Vec<SetArg>,
43 span: SourceSpan,
44 },
45 /// Raw preformatted text or code block. Both forms preserve their
46 /// long-bracket body as text; the kind leaves room for later styling
47 /// or language-aware code rendering.
48 RawBlock {
49 kind: RawBlockKind,
50 args: Vec<SetArg>,
51 text: String,
52 label: Option<String>,
53 label_span: Option<SourceSpan>,
54 span: SourceSpan,
55 },
56 /// A bullet (`- `) or numbered (`\d+\. `) list. Sibling items at
57 /// the same indent are grouped under one list; deeper indents
58 /// become nested lists hanging off the most recent item. Numbered
59 /// lists always renumber from 1 in MVP — explicit `start: N` is
60 /// deferred.
61 List {
62 ordered: bool,
63 items: Vec<ListItem>,
64 span: SourceSpan,
65 },
66}
67
68/// One entry inside an [`Item::List`]. `inlines` is the item's own
69/// text (markers stripped, parsed with the same inline tokenizer as
70/// paragraphs); `children` carries nested blocks, currently restricted
71/// to further [`Item::List`]s per the MVP scope.
72#[derive(Debug, Clone)]
73pub struct ListItem {
74 pub inlines: Vec<Inline>,
75 pub children: Vec<Item>,
76 pub span: SourceSpan,
77}
78
79/// Tag for the directive shapes [`Item::Set`] can represent — the
80/// `#set <target>(...)` configuration directive vs the standalone
81/// `#image(...)`, `#figure(...)`, and `#bibliography(...)` calls. The
82/// lowerer dispatches on this rather than the [`Item::Set::name`] string
83/// so `#set image(...)` can never collide with `#image(...)`.
84#[derive(Debug, Clone, Copy, Eq, PartialEq)]
85pub enum DirectiveKind {
86 /// `#set <name>(...)` — sets defaults on a style target.
87 Set,
88 /// `#image("path", ...)` — raster image directive.
89 Image,
90 /// `#figure(image: ..., caption: ...)` — captioned image container.
91 Figure,
92 /// `#bibliography("refs.bib")` — declares a bibliography source
93 /// database. The lowerer records the (source-relative) path so a
94 /// later BibTeX-parsing slice can read it; citation resolution and
95 /// rendering are not part of this directive.
96 Bibliography,
97}
98
99#[derive(Debug, Clone, Copy, Eq, PartialEq)]
100pub enum RawBlockKind {
101 Pre,
102 Code,
103}
104
105/// Borrowed view of an [`Item::RawBlock`] payload.
106#[derive(Debug, Clone, Copy)]
107pub struct RawBlockView<'a> {
108 pub kind: RawBlockKind,
109 pub args: &'a [SetArg],
110 pub text: &'a str,
111 pub label: Option<&'a str>,
112 pub label_span: Option<&'a SourceSpan>,
113 pub span: &'a SourceSpan,
114}
115
116/// One argument inside a directive body — either a `key: value`
117/// pair (the only form `#set` accepts) or a positional value (a
118/// leading string literal allowed on `#image(...)` / `#figure(...)`).
119///
120/// This used to be a struct with an empty-string `key` standing in
121/// for "positional," but that sentinel was a brittle public contract:
122/// any consumer that forgot the special-case would silently treat a
123/// positional path as a named arg called `""`. The enum form makes
124/// the two shapes explicit so the compiler can enforce exhaustive
125/// matches.
126#[derive(Debug, Clone)]
127pub enum SetArg {
128 /// A `key: value` argument. `key_span` covers the identifier
129 /// before the colon; `value_span` covers the literal.
130 Named {
131 key: String,
132 value: SetValue,
133 key_span: SourceSpan,
134 value_span: SourceSpan,
135 },
136 /// A leading positional value. The parser currently only accepts
137 /// string literals here (used for `#image("path.png")`); other
138 /// literal kinds in a positional slot would surface as a parse
139 /// error rather than land in this variant.
140 Positional {
141 value: SetValue,
142 value_span: SourceSpan,
143 },
144}
145
146impl SetArg {
147 /// Borrow the value carried by this argument, regardless of shape.
148 #[must_use]
149 pub fn value(&self) -> &SetValue {
150 match self {
151 Self::Named { value, .. } | Self::Positional { value, .. } => value,
152 }
153 }
154
155 /// The span covering the argument's value literal.
156 #[must_use]
157 pub fn value_span(&self) -> &SourceSpan {
158 match self {
159 Self::Named { value_span, .. } | Self::Positional { value_span, .. } => value_span,
160 }
161 }
162
163 /// The key identifier for [`Self::Named`]; `None` for
164 /// [`Self::Positional`].
165 #[must_use]
166 pub fn key(&self) -> Option<&str> {
167 match self {
168 Self::Named { key, .. } => Some(key.as_str()),
169 Self::Positional { .. } => None,
170 }
171 }
172
173 /// The span covering the key identifier, for [`Self::Named`].
174 /// `None` for [`Self::Positional`].
175 #[must_use]
176 pub fn key_span(&self) -> Option<&SourceSpan> {
177 match self {
178 Self::Named { key_span, .. } => Some(key_span),
179 Self::Positional { .. } => None,
180 }
181 }
182}
183
184/// Literal values recognised inside a `#set` body. Full expression
185/// evaluation (`#let`, function calls, `if`) is deferred to MVP 5; this
186/// covers what the manifest examples actually use.
187#[derive(Debug, Clone, PartialEq)]
188pub enum SetValue {
189 Str(String),
190 Int(i64),
191 Float(f64),
192 Length(f64, LengthUnit),
193 Ident(String),
194}
195
196#[derive(Debug, Clone, Copy, Eq, PartialEq)]
197pub enum LengthUnit {
198 Mm,
199 Pt,
200 Em,
201}
202
203/// Inline run produced by the markup tokenizer.
204#[derive(Debug, Clone)]
205pub struct Inline {
206 pub kind: InlineKind,
207 pub text: String,
208 pub span: SourceSpan,
209}
210
211#[derive(Debug, Clone, Copy, Eq, PartialEq)]
212pub enum InlineKind {
213 Text,
214 Emphasis,
215 Strong,
216 BoldItalic,
217 Code,
218 /// `@label` — a cross-reference to a labelled block. The
219 /// [`Inline::text`] payload is the bare label name (no leading
220 /// `@`); the resolver rewrites it to the target's resolved text.
221 Reference,
222 /// `[@key]` — a citation to a bibliography entry. The
223 /// [`Inline::text`] payload is the bare citation key (no leading
224 /// `[@` or trailing `]`); bibliography loading and rendering are
225 /// future work tracked under MVP 4. The key alphabet matches the
226 /// label alphabet (`[A-Za-z0-9_:.-]`); a single key per
227 /// `[@…]` group is the only form recognised in this slice — list
228 /// forms like `[@a; @b]` and prefix/suffix bodies are deferred.
229 Citation,
230 /// `\\` — a forced line break inside a paragraph. The line
231 /// breaks here without the extra leading a blank-line paragraph
232 /// break would give. Carries no text payload. The shorthand for
233 /// a soft hyphen `\-` lowers to a literal U+00AD inside a
234 /// surrounding [`InlineKind::Text`] run, not to a separate variant.
235 HardBreak,
236}
237
238impl Item {
239 /// Borrow the heading payload if `self` is [`Item::Heading`].
240 #[must_use]
241 pub fn as_heading(&self) -> Option<(u8, &[Inline], &SourceSpan)> {
242 if let Self::Heading {
243 level,
244 inlines,
245 span,
246 ..
247 } = self
248 {
249 Some((*level, inlines, span))
250 } else {
251 None
252 }
253 }
254
255 /// Borrow the paragraph payload if `self` is [`Item::Paragraph`].
256 #[must_use]
257 pub fn as_paragraph(&self) -> Option<(&[Inline], &SourceSpan)> {
258 if let Self::Paragraph { inlines, span, .. } = self {
259 Some((inlines, span))
260 } else {
261 None
262 }
263 }
264
265 /// Borrow the directive payload if `self` is [`Item::Set`].
266 ///
267 /// The returned tuple is `(name, args, span)`; the caller can also
268 /// reach [`DirectiveKind`] via [`Self::directive_kind`]. The
269 /// accessor name is retained for back-compat — every existing
270 /// caller pre-dates the `#image`/`#figure` directives and only
271 /// looks at name/args/span.
272 #[must_use]
273 pub fn as_set(&self) -> Option<(&str, &[SetArg], &SourceSpan)> {
274 if let Self::Set {
275 name, args, span, ..
276 } = self
277 {
278 Some((name.as_str(), args.as_slice(), span))
279 } else {
280 None
281 }
282 }
283
284 /// Borrow the raw block payload if `self` is [`Item::RawBlock`].
285 #[must_use]
286 pub fn as_raw_block(&self) -> Option<RawBlockView<'_>> {
287 if let Self::RawBlock {
288 kind,
289 args,
290 text,
291 label,
292 label_span,
293 span,
294 } = self
295 {
296 Some(RawBlockView {
297 kind: *kind,
298 args: args.as_slice(),
299 text: text.as_str(),
300 label: label.as_deref(),
301 label_span: label_span.as_ref(),
302 span,
303 })
304 } else {
305 None
306 }
307 }
308
309 /// Borrow the [`DirectiveKind`] tag if `self` is [`Item::Set`].
310 #[must_use]
311 pub fn directive_kind(&self) -> Option<DirectiveKind> {
312 if let Self::Set { kind, .. } = self {
313 Some(*kind)
314 } else {
315 None
316 }
317 }
318
319 /// Borrow the list payload if `self` is [`Item::List`]. The
320 /// returned tuple is `(ordered, items, span)`.
321 #[must_use]
322 pub fn as_list(&self) -> Option<(bool, &[ListItem], &SourceSpan)> {
323 if let Self::List {
324 ordered,
325 items,
326 span,
327 } = self
328 {
329 Some((*ordered, items.as_slice(), span))
330 } else {
331 None
332 }
333 }
334
335 /// Borrow the explicit `<label>` attached to this block, if any.
336 /// Returns `None` for [`Item::Set`] and [`Item::List`] (label
337 /// syntax is not yet defined on those blocks).
338 #[must_use]
339 pub fn label(&self) -> Option<&str> {
340 match self {
341 Self::Heading { label, .. }
342 | Self::Paragraph { label, .. }
343 | Self::RawBlock { label, .. } => label.as_deref(),
344 Self::Set { .. } | Self::List { .. } => None,
345 }
346 }
347
348 /// Borrow the source span covering only the label token text, if any.
349 /// The delimiters (`<`, `>`, or directive string quotes) are excluded so a
350 /// structured suggestion can replace just the label bytes.
351 #[must_use]
352 pub fn label_span(&self) -> Option<&SourceSpan> {
353 match self {
354 Self::Heading { label_span, .. }
355 | Self::Paragraph { label_span, .. }
356 | Self::RawBlock { label_span, .. } => label_span.as_ref(),
357 Self::Set { .. } | Self::List { .. } => None,
358 }
359 }
360}
361
362/// Output of [`crate::parse`]. Diagnostics may include warnings even
363/// when the tree is structurally usable; callers decide what to do per
364/// [`ParseResult::has_errors`].
365#[derive(Debug)]
366pub struct ParseResult {
367 pub tree: SyntaxTree,
368 pub diagnostics: Vec<Diagnostic>,
369}
370
371impl ParseResult {
372 #[must_use]
373 pub fn has_errors(&self) -> bool {
374 self.diagnostics
375 .iter()
376 .any(|d| d.severity() == Severity::Error)
377 }
378}