Skip to main content

acdc_parser/model/
location.rs

1use serde::{
2    Serialize,
3    ser::{SerializeSeq, Serializer},
4};
5
6/// A range where a specific leveloffset value applies.
7///
8/// When include directives use `leveloffset=+N`, we track the byte ranges where
9/// leveloffsets apply. The parser then queries these ranges to determine the effective
10/// leveloffset at any given position.
11#[derive(Debug, Clone, Default, PartialEq)]
12pub(crate) struct LeveloffsetRange {
13    /// Byte offset where this leveloffset begins (inclusive).
14    pub(crate) start_offset: usize,
15    /// Byte offset where this leveloffset ends (exclusive).
16    pub(crate) end_offset: usize,
17    /// The leveloffset value to apply in this range.
18    pub(crate) value: isize,
19}
20
21impl LeveloffsetRange {
22    /// Create a new leveloffset range.
23    #[must_use]
24    pub(crate) fn new(start_offset: usize, end_offset: usize, value: isize) -> Self {
25        Self {
26            start_offset,
27            end_offset,
28            value,
29        }
30    }
31
32    /// Check if a byte offset falls within this range.
33    #[must_use]
34    pub(crate) fn contains(&self, byte_offset: usize) -> bool {
35        byte_offset >= self.start_offset && byte_offset < self.end_offset
36    }
37}
38
39/// Calculate the total leveloffset at a given byte offset.
40///
41/// Sums all leveloffset values from ranges that contain the given offset.
42/// Ranges can nest (include within include), so we sum all applicable values.
43#[must_use]
44pub(crate) fn calculate_leveloffset_at(ranges: &[LeveloffsetRange], byte_offset: usize) -> isize {
45    ranges
46        .iter()
47        .filter_map(|r| {
48            if r.contains(byte_offset) {
49                Some(r.value)
50            } else {
51                None
52            }
53        })
54        .sum()
55}
56
57/// Maps a byte range in the preprocessed output back to its source file and starting line.
58///
59/// When `include::` directives merge content into a single string, we track which byte
60/// ranges came from which files. The parser queries these ranges to produce accurate
61/// file names and line numbers in warnings and errors.
62#[derive(Debug, Clone, PartialEq)]
63pub(crate) struct SourceRange {
64    /// Byte offset where this source range begins (inclusive).
65    pub(crate) start_offset: usize,
66    /// Byte offset where this source range ends (exclusive).
67    pub(crate) end_offset: usize,
68    /// The source file this range came from.
69    pub(crate) file: std::path::PathBuf,
70    /// The line number (1-indexed) of the first line in this range within the source file.
71    pub(crate) start_line: usize,
72}
73
74impl SourceRange {
75    /// Check if a byte offset falls within this range.
76    #[must_use]
77    pub(crate) fn contains(&self, byte_offset: usize) -> bool {
78        byte_offset >= self.start_offset && byte_offset < self.end_offset
79    }
80}
81
82pub(crate) trait Locateable {
83    /// Get a reference to the location.
84    fn location(&self) -> &Location;
85}
86
87/// A `Location` represents a location in a document.
88#[derive(Debug, Default, Clone, Hash, Eq, PartialEq)]
89#[non_exhaustive]
90pub struct Location {
91    /// The absolute start position of the location.
92    pub absolute_start: usize,
93    /// The absolute end position of the location.
94    pub absolute_end: usize,
95
96    /// The start position of the location.
97    pub start: Position,
98    /// The end position of the location.
99    pub end: Position,
100}
101
102impl Location {
103    /// Validates that this location satisfies all invariants.
104    ///
105    /// Checks:
106    /// - `absolute_start <= absolute_end` (valid range)
107    /// - `absolute_end <= input.len()` (within bounds)
108    /// - Both offsets are on UTF-8 character boundaries
109    ///
110    /// # Errors
111    /// Returned as strings for easier debugging.
112    pub fn validate(&self, input: &str) -> Result<(), String> {
113        // Check range validity using the canonical byte offsets
114        if self.absolute_start > self.absolute_end {
115            return Err(format!(
116                "Invalid range: start {} > end {}",
117                self.absolute_start, self.absolute_end
118            ));
119        }
120
121        // Check bounds
122        if self.absolute_end > input.len() {
123            return Err(format!(
124                "End offset {} exceeds input length {}",
125                self.absolute_end,
126                input.len()
127            ));
128        }
129
130        // Check UTF-8 boundaries on the canonical offsets
131        if !input.is_char_boundary(self.absolute_start) {
132            return Err(format!(
133                "Start offset {} not on UTF-8 boundary",
134                self.absolute_start
135            ));
136        }
137
138        if !input.is_char_boundary(self.absolute_end) {
139            return Err(format!(
140                "End offset {} not on UTF-8 boundary",
141                self.absolute_end
142            ));
143        }
144
145        Ok(())
146    }
147
148    /// Shift the start and end positions of the location by the parent location.
149    ///
150    /// This is super useful to adjust the location of a block that is inside another
151    /// block, like anything inside a delimiter block.
152    pub fn shift(&mut self, parent: Option<&Location>) {
153        if let Some(parent) = parent {
154            if parent.start.line == 0 {
155                return;
156            }
157            self.absolute_start += parent.absolute_start;
158            self.absolute_end += parent.absolute_start;
159            self.start.line += parent.start.line;
160            self.end.line += parent.start.line;
161        }
162    }
163
164    /// Shifts the location inline. We subtract 1 from the line number of the start and
165    /// end to account for the fact that inlines are always in the same line as the
166    /// parent calling the parsing function.
167    pub fn shift_inline(&mut self, parent: Option<&Location>) {
168        if let Some(parent) = parent {
169            if parent.start.line != 0 || parent.start.column != 0 {
170                self.absolute_start += parent.absolute_start;
171                self.absolute_end += parent.absolute_start;
172            }
173            if parent.start.line != 0 {
174                self.start.line += parent.start.line - 1;
175                self.end.line += parent.start.line - 1;
176            }
177            if parent.start.column != 0 {
178                self.start.column += parent.start.column - 1;
179                self.end.column += parent.start.column - 1;
180            }
181        }
182    }
183
184    pub fn shift_line_column(&mut self, line: usize, column: usize) {
185        self.start.line += line - 1;
186        self.end.line += line - 1;
187        self.start.column += column - 1;
188        self.end.column += column - 1;
189    }
190}
191
192// We need to implement `Serialize` because I prefer our current `Location` struct to the
193// `asciidoc` `ASG` definition.
194//
195// We serialize `Location` into the ASG format, which is a sequence of two elements: the
196// start and end positions as an array.
197impl Serialize for Location {
198    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
199    where
200        S: Serializer,
201    {
202        let mut state = serializer.serialize_seq(Some(4))?;
203        state.serialize_element(&self.start)?;
204        state.serialize_element(&self.end)?;
205        state.end()
206    }
207}
208
209impl std::fmt::Display for Location {
210    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
211        write!(
212            f,
213            "location.start({}), location.end({})",
214            self.start, self.end
215        )
216    }
217}
218
219/// A `Position` represents a human-readable position in a document.
220///
221/// This is purely for display/error reporting purposes. For byte offsets,
222/// use `Location.absolute_start` and `Location.absolute_end`.
223#[derive(Debug, Default, Clone, Hash, Eq, PartialEq, Serialize)]
224#[non_exhaustive]
225pub struct Position {
226    /// The line number of the position (1-indexed).
227    pub line: usize,
228    /// The column number of the position (1-indexed, counted as Unicode scalar values).
229    #[serde(rename = "col")]
230    pub column: usize,
231}
232
233impl std::fmt::Display for Position {
234    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
235        write!(f, "line: {}, column: {}", self.line, self.column)
236    }
237}