mos_parse/syntax.rs
1use std::path::PathBuf;
2
3use mos_core::{Diagnostic, Severity, SourceSpan};
4
5/// Concrete syntax tree for a single `.mos` source file.
6#[derive(Debug, Clone)]
7pub struct SyntaxTree {
8 pub file: PathBuf,
9 pub items: Vec<Item>,
10}
11
12/// Top-level construct in a `.mos` file.
13#[derive(Debug, Clone)]
14pub enum Item {
15 /// `= Title`, `== Subtitle`, `=== Subsubtitle`. A trailing
16 /// `<label>` token after the title attaches to this heading.
17 Heading {
18 level: u8,
19 inlines: Vec<Inline>,
20 label: Option<String>,
21 span: SourceSpan,
22 },
23 /// One or more consecutive non-blank lines that are not a heading
24 /// and not a `#set` block. A leading `<label>` token (possibly
25 /// preceded by ASCII whitespace) attaches to this paragraph.
26 Paragraph {
27 inlines: Vec<Inline>,
28 label: Option<String>,
29 span: SourceSpan,
30 },
31 /// `#set name(...)`, `#image(...)`, `#figure(...)`. The body is
32 /// lexed into typed `(key, value)` args; semantic validation
33 /// (known target/key, type coercion, sanity floors) happens in
34 /// the lowerer. `kind` distinguishes the `#set`-style configuration
35 /// directive from standalone calls like `#image` and `#figure`,
36 /// which the lowerer dispatches to dedicated paths.
37 Set {
38 kind: DirectiveKind,
39 name: String,
40 args: Vec<SetArg>,
41 span: SourceSpan,
42 },
43 /// Raw preformatted text or code block. Both forms preserve their
44 /// long-bracket body as text; the kind leaves room for later styling
45 /// or language-aware code rendering.
46 RawBlock {
47 kind: RawBlockKind,
48 args: Vec<SetArg>,
49 text: String,
50 label: Option<String>,
51 span: SourceSpan,
52 },
53 /// A bullet (`- `) or numbered (`\d+\. `) list. Sibling items at
54 /// the same indent are grouped under one list; deeper indents
55 /// become nested lists hanging off the most recent item. Numbered
56 /// lists always renumber from 1 in MVP — explicit `start: N` is
57 /// deferred.
58 List {
59 ordered: bool,
60 items: Vec<ListItem>,
61 span: SourceSpan,
62 },
63}
64
65/// One entry inside an [`Item::List`]. `inlines` is the item's own
66/// text (markers stripped, parsed with the same inline tokenizer as
67/// paragraphs); `children` carries nested blocks, currently restricted
68/// to further [`Item::List`]s per the MVP scope.
69#[derive(Debug, Clone)]
70pub struct ListItem {
71 pub inlines: Vec<Inline>,
72 pub children: Vec<Item>,
73 pub span: SourceSpan,
74}
75
76/// Tag for the three directive shapes [`Item::Set`] can represent —
77/// the `#set <target>(...)` configuration directive vs the standalone
78/// `#image(...)` and `#figure(...)` calls. The lowerer dispatches on
79/// this rather than the [`Item::Set::name`] string so `#set image(...)`
80/// can never collide with `#image(...)`.
81#[derive(Debug, Clone, Copy, Eq, PartialEq)]
82pub enum DirectiveKind {
83 /// `#set <name>(...)` — sets defaults on a style target.
84 Set,
85 /// `#image("path", ...)` — raster image directive.
86 Image,
87 /// `#figure(image: ..., caption: ...)` — captioned image container.
88 Figure,
89}
90
91#[derive(Debug, Clone, Copy, Eq, PartialEq)]
92pub enum RawBlockKind {
93 Pre,
94 Code,
95}
96
97/// Borrowed view of an [`Item::RawBlock`] payload.
98#[derive(Debug, Clone, Copy)]
99pub struct RawBlockView<'a> {
100 pub kind: RawBlockKind,
101 pub args: &'a [SetArg],
102 pub text: &'a str,
103 pub label: Option<&'a str>,
104 pub span: &'a SourceSpan,
105}
106
107/// One argument inside a directive body — either a `key: value`
108/// pair (the only form `#set` accepts) or a positional value (a
109/// leading string literal allowed on `#image(...)` / `#figure(...)`).
110///
111/// This used to be a struct with an empty-string `key` standing in
112/// for "positional," but that sentinel was a brittle public contract:
113/// any consumer that forgot the special-case would silently treat a
114/// positional path as a named arg called `""`. The enum form makes
115/// the two shapes explicit so the compiler can enforce exhaustive
116/// matches.
117#[derive(Debug, Clone)]
118pub enum SetArg {
119 /// A `key: value` argument. `key_span` covers the identifier
120 /// before the colon; `value_span` covers the literal.
121 Named {
122 key: String,
123 value: SetValue,
124 key_span: SourceSpan,
125 value_span: SourceSpan,
126 },
127 /// A leading positional value. The parser currently only accepts
128 /// string literals here (used for `#image("path.png")`); other
129 /// literal kinds in a positional slot would surface as a parse
130 /// error rather than land in this variant.
131 Positional {
132 value: SetValue,
133 value_span: SourceSpan,
134 },
135}
136
137impl SetArg {
138 /// Borrow the value carried by this argument, regardless of shape.
139 #[must_use]
140 pub fn value(&self) -> &SetValue {
141 match self {
142 Self::Named { value, .. } | Self::Positional { value, .. } => value,
143 }
144 }
145
146 /// The span covering the argument's value literal.
147 #[must_use]
148 pub fn value_span(&self) -> &SourceSpan {
149 match self {
150 Self::Named { value_span, .. } | Self::Positional { value_span, .. } => value_span,
151 }
152 }
153
154 /// The key identifier for [`Self::Named`]; `None` for
155 /// [`Self::Positional`].
156 #[must_use]
157 pub fn key(&self) -> Option<&str> {
158 match self {
159 Self::Named { key, .. } => Some(key.as_str()),
160 Self::Positional { .. } => None,
161 }
162 }
163
164 /// The span covering the key identifier, for [`Self::Named`].
165 /// `None` for [`Self::Positional`].
166 #[must_use]
167 pub fn key_span(&self) -> Option<&SourceSpan> {
168 match self {
169 Self::Named { key_span, .. } => Some(key_span),
170 Self::Positional { .. } => None,
171 }
172 }
173}
174
175/// Literal values recognised inside a `#set` body. Full expression
176/// evaluation (`#let`, function calls, `if`) is deferred to MVP 5; this
177/// covers what the manifest examples actually use.
178#[derive(Debug, Clone, PartialEq)]
179pub enum SetValue {
180 Str(String),
181 Int(i64),
182 Float(f64),
183 Length(f64, LengthUnit),
184 Ident(String),
185}
186
187#[derive(Debug, Clone, Copy, Eq, PartialEq)]
188pub enum LengthUnit {
189 Mm,
190 Pt,
191 Em,
192}
193
194/// Inline run produced by the markup tokenizer.
195#[derive(Debug, Clone)]
196pub struct Inline {
197 pub kind: InlineKind,
198 pub text: String,
199 pub span: SourceSpan,
200}
201
202#[derive(Debug, Clone, Copy, Eq, PartialEq)]
203pub enum InlineKind {
204 Text,
205 Emphasis,
206 Strong,
207 BoldItalic,
208 Code,
209 /// `@label` — a cross-reference to a labelled block. The
210 /// [`Inline::text`] payload is the bare label name (no leading
211 /// `@`); the resolver rewrites it to the target's resolved text.
212 Reference,
213}
214
215impl Item {
216 /// Borrow the heading payload if `self` is [`Item::Heading`].
217 #[must_use]
218 pub fn as_heading(&self) -> Option<(u8, &[Inline], &SourceSpan)> {
219 if let Self::Heading {
220 level,
221 inlines,
222 span,
223 ..
224 } = self
225 {
226 Some((*level, inlines, span))
227 } else {
228 None
229 }
230 }
231
232 /// Borrow the paragraph payload if `self` is [`Item::Paragraph`].
233 #[must_use]
234 pub fn as_paragraph(&self) -> Option<(&[Inline], &SourceSpan)> {
235 if let Self::Paragraph { inlines, span, .. } = self {
236 Some((inlines, span))
237 } else {
238 None
239 }
240 }
241
242 /// Borrow the directive payload if `self` is [`Item::Set`].
243 ///
244 /// The returned tuple is `(name, args, span)`; the caller can also
245 /// reach [`DirectiveKind`] via [`Self::directive_kind`]. The
246 /// accessor name is retained for back-compat — every existing
247 /// caller pre-dates the `#image`/`#figure` directives and only
248 /// looks at name/args/span.
249 #[must_use]
250 pub fn as_set(&self) -> Option<(&str, &[SetArg], &SourceSpan)> {
251 if let Self::Set {
252 name, args, span, ..
253 } = self
254 {
255 Some((name.as_str(), args.as_slice(), span))
256 } else {
257 None
258 }
259 }
260
261 /// Borrow the raw block payload if `self` is [`Item::RawBlock`].
262 #[must_use]
263 pub fn as_raw_block(&self) -> Option<RawBlockView<'_>> {
264 if let Self::RawBlock {
265 kind,
266 args,
267 text,
268 label,
269 span,
270 } = self
271 {
272 Some(RawBlockView {
273 kind: *kind,
274 args: args.as_slice(),
275 text: text.as_str(),
276 label: label.as_deref(),
277 span,
278 })
279 } else {
280 None
281 }
282 }
283
284 /// Borrow the [`DirectiveKind`] tag if `self` is [`Item::Set`].
285 #[must_use]
286 pub fn directive_kind(&self) -> Option<DirectiveKind> {
287 if let Self::Set { kind, .. } = self {
288 Some(*kind)
289 } else {
290 None
291 }
292 }
293
294 /// Borrow the list payload if `self` is [`Item::List`]. The
295 /// returned tuple is `(ordered, items, span)`.
296 #[must_use]
297 pub fn as_list(&self) -> Option<(bool, &[ListItem], &SourceSpan)> {
298 if let Self::List {
299 ordered,
300 items,
301 span,
302 } = self
303 {
304 Some((*ordered, items.as_slice(), span))
305 } else {
306 None
307 }
308 }
309
310 /// Borrow the explicit `<label>` attached to this block, if any.
311 /// Returns `None` for [`Item::Set`] and [`Item::List`] (label
312 /// syntax is not yet defined on those blocks).
313 #[must_use]
314 pub fn label(&self) -> Option<&str> {
315 match self {
316 Self::Heading { label, .. }
317 | Self::Paragraph { label, .. }
318 | Self::RawBlock { label, .. } => label.as_deref(),
319 Self::Set { .. } | Self::List { .. } => None,
320 }
321 }
322}
323
324/// Output of [`crate::parse`]. Diagnostics may include warnings even
325/// when the tree is structurally usable; callers decide what to do per
326/// [`ParseResult::has_errors`].
327#[derive(Debug)]
328pub struct ParseResult {
329 pub tree: SyntaxTree,
330 pub diagnostics: Vec<Diagnostic>,
331}
332
333impl ParseResult {
334 #[must_use]
335 pub fn has_errors(&self) -> bool {
336 self.diagnostics
337 .iter()
338 .any(|d| d.severity == Severity::Error)
339 }
340}