acdc_parser/model/
location.rs

1use serde::{
2    Deserialize, Serialize,
3    de::{SeqAccess, Visitor},
4    ser::{SerializeSeq, Serializer},
5};
6
7pub(crate) trait Locateable {
8    /// Get a reference to the location.
9    fn location(&self) -> &Location;
10}
11
12/// A `Location` represents a location in a document.
13#[derive(Debug, Default, Clone, Hash, Eq, PartialEq)]
14#[non_exhaustive]
15pub struct Location {
16    /// The absolute start position of the location.
17    pub absolute_start: usize,
18    /// The absolute end position of the location.
19    pub absolute_end: usize,
20
21    /// The start position of the location.
22    pub start: Position,
23    /// The end position of the location.
24    pub end: Position,
25}
26
27impl Location {
28    /// Validates that this location satisfies all invariants.
29    ///
30    /// Checks:
31    /// - `absolute_start <= absolute_end` (valid range)
32    /// - `absolute_end <= input.len()` (within bounds)
33    /// - Both offsets are on UTF-8 character boundaries
34    ///
35    /// # Errors
36    /// Returned as strings for easier debugging.
37    pub fn validate(&self, input: &str) -> Result<(), String> {
38        // Check range validity using the canonical byte offsets
39        if self.absolute_start > self.absolute_end {
40            return Err(format!(
41                "Invalid range: start {} > end {}",
42                self.absolute_start, self.absolute_end
43            ));
44        }
45
46        // Check bounds
47        if self.absolute_end > input.len() {
48            return Err(format!(
49                "End offset {} exceeds input length {}",
50                self.absolute_end,
51                input.len()
52            ));
53        }
54
55        // Check UTF-8 boundaries on the canonical offsets
56        if !input.is_char_boundary(self.absolute_start) {
57            return Err(format!(
58                "Start offset {} not on UTF-8 boundary",
59                self.absolute_start
60            ));
61        }
62
63        if !input.is_char_boundary(self.absolute_end) {
64            return Err(format!(
65                "End offset {} not on UTF-8 boundary",
66                self.absolute_end
67            ));
68        }
69
70        Ok(())
71    }
72
73    /// Shift the start and end positions of the location by the parent location.
74    ///
75    /// This is super useful to adjust the location of a block that is inside another
76    /// block, like anything inside a delimiter block.
77    pub fn shift(&mut self, parent: Option<&Location>) {
78        if let Some(parent) = parent {
79            if parent.start.line == 0 {
80                return;
81            }
82            self.absolute_start += parent.absolute_start;
83            self.absolute_end += parent.absolute_start;
84            self.start.line += parent.start.line;
85            self.end.line += parent.start.line;
86        }
87    }
88
89    /// Shifts the location inline. We subtract 1 from the line number of the start and
90    /// end to account for the fact that inlines are always in the same line as the
91    /// parent calling the parsing function.
92    pub fn shift_inline(&mut self, parent: Option<&Location>) {
93        if let Some(parent) = parent {
94            if parent.start.line != 0 || parent.start.column != 0 {
95                self.absolute_start += parent.absolute_start;
96                self.absolute_end += parent.absolute_start;
97            }
98            if parent.start.line != 0 {
99                self.start.line += parent.start.line - 1;
100                self.end.line += parent.start.line - 1;
101            }
102            if parent.start.column != 0 {
103                self.start.column += parent.start.column - 1;
104                self.end.column += parent.start.column - 1;
105            }
106        }
107    }
108
109    pub fn shift_line_column(&mut self, line: usize, column: usize) {
110        self.start.line += line - 1;
111        self.end.line += line - 1;
112        self.start.column += column - 1;
113        self.end.column += column - 1;
114    }
115}
116
117// We need to implement `Serialize` because I prefer our current `Location` struct to the
118// `asciidoc` `ASG` definition.
119//
120// We serialize `Location` into the ASG format, which is a sequence of two elements: the
121// start and end positions as an array.
122impl Serialize for Location {
123    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
124    where
125        S: Serializer,
126    {
127        let mut state = serializer.serialize_seq(Some(4))?;
128        state.serialize_element(&self.start)?;
129        state.serialize_element(&self.end)?;
130        state.end()
131    }
132}
133
134impl<'de> Deserialize<'de> for Location {
135    fn deserialize<D>(deserializer: D) -> Result<Location, D::Error>
136    where
137        D: serde::Deserializer<'de>,
138    {
139        struct LocationVisitor;
140
141        impl<'de> Visitor<'de> for LocationVisitor {
142            type Value = Location;
143
144            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
145                formatter.write_str("a sequence of two elements")
146            }
147
148            fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
149            where
150                A: SeqAccess<'de>,
151            {
152                let start = seq
153                    .next_element()?
154                    .ok_or_else(|| serde::de::Error::invalid_length(0, &self))?;
155                let end = seq
156                    .next_element()?
157                    .ok_or_else(|| serde::de::Error::invalid_length(1, &self))?;
158                Ok(Location {
159                    start,
160                    end,
161                    ..Location::default()
162                })
163            }
164        }
165        deserializer.deserialize_seq(LocationVisitor)
166    }
167}
168
169impl std::fmt::Display for Location {
170    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
171        write!(
172            f,
173            "location.start({}), location.end({})",
174            self.start, self.end
175        )
176    }
177}
178
179/// A `Position` represents a human-readable position in a document.
180///
181/// This is purely for display/error reporting purposes. For byte offsets,
182/// use `Location.absolute_start` and `Location.absolute_end`.
183#[derive(Debug, Default, Clone, Hash, Eq, PartialEq, Serialize, Deserialize)]
184#[non_exhaustive]
185pub struct Position {
186    /// The line number of the position (1-indexed).
187    pub line: usize,
188    /// The column number of the position (1-indexed, counted as Unicode scalar values).
189    #[serde(rename = "col")]
190    pub column: usize,
191}
192
193impl std::fmt::Display for Position {
194    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
195        write!(f, "line: {}, column: {}", self.line, self.column)
196    }
197}