lucid_lint/parser/document.rs
1//! Document model produced by the parser.
2//!
3//! A [`Document`] contains an ordered list of [`Section`]s (derived from
4//! headings in Markdown). Each section contains an ordered list of
5//! [`Paragraph`]s. Each paragraph carries its sentences, computed once
6//! at construction via [`super::tokenizer::split_sentences`] — eight
7//! rhythm/syntax/lexicon/structure rules consume them, so paying the
8//! split once and sharing across rules is strictly cheaper than the
9//! previous "lazy per-rule" pattern (F103, samply 2026-04-25).
10
11use crate::parser::tokenizer::split_sentences;
12use crate::types::SourceFile;
13
14/// The parsed representation of a single input text.
15#[derive(Debug, Clone)]
16pub struct Document {
17 /// Origin of the document.
18 pub source: SourceFile,
19
20 /// Sections of the document. The first section may have no heading
21 /// (content before the first heading, or plain text input).
22 pub sections: Vec<Section>,
23
24 /// Inline-disable directives extracted from the source. Each directive
25 /// silences one rule on one target line. See [`Directive`].
26 pub directives: Vec<Directive>,
27
28 /// List items captured during parsing, with their nesting depth and line.
29 /// Empty for plain-text inputs (which have no list structure).
30 pub list_items: Vec<ListItem>,
31}
32
33impl Document {
34 /// Create a new document with no directives and no list items.
35 #[must_use]
36 pub const fn new(source: SourceFile, sections: Vec<Section>) -> Self {
37 Self {
38 source,
39 sections,
40 directives: Vec::new(),
41 list_items: Vec::new(),
42 }
43 }
44
45 /// Create a new document carrying parser-captured metadata (directives
46 /// and list items).
47 #[must_use]
48 pub const fn with_metadata(
49 source: SourceFile,
50 sections: Vec<Section>,
51 directives: Vec<Directive>,
52 list_items: Vec<ListItem>,
53 ) -> Self {
54 Self {
55 source,
56 sections,
57 directives,
58 list_items,
59 }
60 }
61
62 /// Iterate over all paragraphs across all sections, yielding each paragraph
63 /// with the title of the section it belongs to.
64 pub fn paragraphs_with_section(&self) -> impl Iterator<Item = (&Paragraph, Option<&str>)> {
65 self.sections.iter().flat_map(|section| {
66 let title = section.title.as_deref();
67 section.paragraphs.iter().map(move |p| (p, title))
68 })
69 }
70}
71
72/// A section of a document, rooted at a heading (or synthetic for pre-heading content).
73#[derive(Debug, Clone)]
74pub struct Section {
75 /// The heading text (without the leading `#` markers).
76 ///
77 /// `None` for the implicit section containing content before the first
78 /// heading, or for plain text input.
79 pub title: Option<String>,
80
81 /// Heading depth (1 for H1, 2 for H2, etc.). 0 for the synthetic pre-heading section.
82 pub depth: u32,
83
84 /// 1-based line of the heading in the source. `None` for the synthetic
85 /// pre-heading section.
86 pub heading_line: Option<u32>,
87
88 /// Paragraphs under this section.
89 pub paragraphs: Vec<Paragraph>,
90}
91
92impl Section {
93 /// Create a new section without a heading line (synthetic or plain text).
94 #[must_use]
95 pub const fn new(title: Option<String>, depth: u32, paragraphs: Vec<Paragraph>) -> Self {
96 Self {
97 title,
98 depth,
99 heading_line: None,
100 paragraphs,
101 }
102 }
103
104 /// Create a new section rooted at a heading on a specific line.
105 #[must_use]
106 pub const fn with_heading_line(
107 title: Option<String>,
108 depth: u32,
109 heading_line: u32,
110 paragraphs: Vec<Paragraph>,
111 ) -> Self {
112 Self {
113 title,
114 depth,
115 heading_line: Some(heading_line),
116 paragraphs,
117 }
118 }
119}
120
121/// A paragraph of prose.
122#[derive(Debug, Clone)]
123pub struct Paragraph {
124 /// The paragraph text with Markdown inline markup stripped.
125 pub text: String,
126
127 /// 1-based line number in the source where the paragraph starts.
128 pub start_line: u32,
129
130 /// Sentences extracted from `text` at construction time, with absolute
131 /// source positions seeded from `start_line`. Rules consume this slice
132 /// instead of re-running [`split_sentences`] per rule.
133 pub sentences: Vec<Sentence>,
134
135 /// Typed inline tree for this paragraph (F143 substrate).
136 ///
137 /// **Contract (lazy-build).** This field is **empty** when the
138 /// paragraph contained no emphasis (the common case in real
139 /// documents); rules that want to know "no spans worth modeling
140 /// here" simply check `inline.is_empty()`. When *non-empty*, the
141 /// tree faithfully mirrors [`Self::text`]: recursively flattening
142 /// it (concatenating `Text` payloads + descending into `Emphasis`
143 /// children) reproduces the visible-text string byte-for-byte.
144 /// Empty for plain-text input regardless.
145 ///
146 /// Today only [`Inline::Text`] and [`Inline::Emphasis`] are
147 /// produced — the enum is intentionally narrow until a second
148 /// rule demands more (Strong / Link / Code etc).
149 pub inline: Vec<Inline>,
150
151 /// `true` when the paragraph was extracted from a list-item span (tight
152 /// or loose). Rules that target body-prose width (e.g.
153 /// `structure.line-length-wide`) skip these because a rendered list item
154 /// wraps in a narrower column than running prose.
155 pub from_list_item: bool,
156}
157
158impl Paragraph {
159 /// Create a new body paragraph and split it into sentences.
160 #[must_use]
161 pub fn new(text: String, start_line: u32) -> Self {
162 Self::with_origin(text, start_line, false, Vec::new())
163 }
164
165 /// Create a new paragraph derived from a list-item span.
166 #[must_use]
167 pub fn from_list_item(text: String, start_line: u32) -> Self {
168 Self::with_origin(text, start_line, true, Vec::new())
169 }
170
171 /// Create a new body paragraph with a captured inline tree (F143).
172 #[must_use]
173 pub fn with_inline(text: String, start_line: u32, inline: Vec<Inline>) -> Self {
174 Self::with_origin(text, start_line, false, inline)
175 }
176
177 /// Create a new list-item paragraph with a captured inline tree (F143).
178 #[must_use]
179 pub fn from_list_item_with_inline(text: String, start_line: u32, inline: Vec<Inline>) -> Self {
180 Self::with_origin(text, start_line, true, inline)
181 }
182
183 fn with_origin(
184 text: String,
185 start_line: u32,
186 from_list_item: bool,
187 inline: Vec<Inline>,
188 ) -> Self {
189 let sentences = split_sentences(&text, start_line, 1);
190 Self {
191 text,
192 start_line,
193 sentences,
194 inline,
195 from_list_item,
196 }
197 }
198}
199
200/// A typed node in the paragraph-level inline tree (F143 substrate).
201///
202/// Captured at parse time from the Markdown event stream so rules that
203/// need *structural* inline information — emphasis-span boundaries,
204/// future strong / link / code spans — can walk a typed model instead
205/// of regex-scanning the flattened paragraph text.
206///
207/// **Variant set is intentionally narrow.** Today only [`Inline::Text`]
208/// and [`Inline::Emphasis`] are produced. Strong, Link, Code, footnotes,
209/// and task-list markers all flatten into [`Inline::Text`] until a rule
210/// actually needs them. Widen the enum *when a second rule demands it*,
211/// not preemptively.
212#[derive(Debug, Clone, PartialEq, Eq)]
213pub enum Inline {
214 /// Plain prose text. May contain spaces and authorial newlines
215 /// (from soft / hard breaks) the same way [`Paragraph::text`] does.
216 Text(String),
217 /// An italic / `*…*` / `_…_` span. Carries its own children so
218 /// nested emphasis (e.g. `*foo *bar* baz*`) round-trips faithfully.
219 Emphasis(EmphasisSpan),
220}
221
222/// An emphasis (italic) span captured during parsing (F143).
223///
224/// Position fields point at the *opening* `*` / `_` in the source, so
225/// rules emitting diagnostics on the span can land their squiggle on
226/// the visible delimiter rather than an arbitrary column inside the
227/// paragraph.
228#[derive(Debug, Clone, PartialEq, Eq)]
229pub struct EmphasisSpan {
230 /// Inline children. Recursive so nested emphasis is preserved.
231 pub children: Vec<Inline>,
232
233 /// 1-based source line of the opening delimiter.
234 pub start_line: u32,
235
236 /// 1-based source column of the opening delimiter (within `start_line`).
237 pub start_column: u32,
238}
239
240/// A sentence extracted from a paragraph.
241///
242/// Produced on demand by [`super::tokenizer::split_sentences`].
243#[derive(Debug, Clone, PartialEq, Eq)]
244pub struct Sentence {
245 /// The sentence text.
246 pub text: String,
247
248 /// 1-based line where the sentence starts (approximate).
249 pub line: u32,
250
251 /// 1-based column where the sentence starts within its line (approximate).
252 pub column: u32,
253}
254
255/// A disable directive extracted from the source.
256///
257/// Two forms are supported:
258///
259/// - **Line form** (v0.1): `<!-- lucid-lint disable-next-line <rule-id> -->`
260/// silences `rule_id` on the next non-blank line. `start_line == end_line`.
261/// - **Block form** (v0.2, F18):
262/// `<!-- lucid-lint-disable <rule-id> -->` … `<!-- lucid-lint-enable -->`
263/// silences `rule_id` on every line between the two comments (inclusive).
264#[derive(Debug, Clone, PartialEq, Eq)]
265pub struct Directive {
266 /// The rule id silenced by this directive.
267 pub rule_id: String,
268
269 /// 1-based first line the directive covers (inclusive).
270 pub start_line: u32,
271
272 /// 1-based last line the directive covers (inclusive).
273 pub end_line: u32,
274}
275
276/// A list item position captured during parsing.
277///
278/// Emitted once per `<li>` (or ordered-list item), carrying the 1-based
279/// nesting depth (outermost list is 1) and the 1-based source line where
280/// the item starts.
281#[derive(Debug, Clone, PartialEq, Eq)]
282pub struct ListItem {
283 /// 1-based nesting depth: outermost list items are depth 1.
284 pub depth: u32,
285
286 /// 1-based line where the item starts in the source.
287 pub line: u32,
288}
289
290impl ListItem {
291 /// Create a new list item.
292 #[must_use]
293 pub const fn new(depth: u32, line: u32) -> Self {
294 Self { depth, line }
295 }
296}
297
298impl Directive {
299 /// Create a line-form directive covering a single line.
300 #[must_use]
301 pub fn new(rule_id: impl Into<String>, target_line: u32) -> Self {
302 Self {
303 rule_id: rule_id.into(),
304 start_line: target_line,
305 end_line: target_line,
306 }
307 }
308
309 /// Create a block-form directive covering an inclusive line range.
310 #[must_use]
311 pub fn block(rule_id: impl Into<String>, start_line: u32, end_line: u32) -> Self {
312 Self {
313 rule_id: rule_id.into(),
314 start_line,
315 end_line,
316 }
317 }
318
319 /// Whether `line` falls inside this directive's range (inclusive).
320 #[must_use]
321 pub const fn covers(&self, line: u32) -> bool {
322 line >= self.start_line && line <= self.end_line
323 }
324}
325
326impl Sentence {
327 /// Create a new sentence with explicit position.
328 #[must_use]
329 pub fn new(text: impl Into<String>, line: u32, column: u32) -> Self {
330 Self {
331 text: text.into(),
332 line,
333 column,
334 }
335 }
336}
337
338#[cfg(test)]
339mod tests {
340 use super::*;
341
342 #[test]
343 fn paragraphs_with_section_yields_titles() {
344 let section = Section::new(
345 Some("Intro".to_string()),
346 2,
347 vec![Paragraph::new("Hello.".to_string(), 1)],
348 );
349 let doc = Document::new(SourceFile::Anonymous, vec![section]);
350 let collected: Vec<_> = doc
351 .paragraphs_with_section()
352 .map(|(p, title)| (p.text.clone(), title.map(ToOwned::to_owned)))
353 .collect();
354 assert_eq!(
355 collected,
356 vec![("Hello.".to_string(), Some("Intro".to_string()))]
357 );
358 }
359
360 #[test]
361 fn paragraphs_with_section_yields_none_for_untitled_sections() {
362 let section = Section::new(None, 0, vec![Paragraph::new("Body.".to_string(), 1)]);
363 let doc = Document::new(SourceFile::Anonymous, vec![section]);
364 let titles: Vec<_> = doc
365 .paragraphs_with_section()
366 .map(|(_, title)| title.map(ToOwned::to_owned))
367 .collect();
368 assert_eq!(titles, vec![None]);
369 }
370}