asciidoc_parser/span/mod.rs
1// Adapted from nom-span, which comes with the following license:
2
3// Copyright 2023 Jules Guesnon
4//
5// Permission is hereby granted, free of charge, to any person obtaining a copy
6// of this software and associated documentation files (the “Software”), to deal
7// in the Software without restriction, including without limitation the rights
8// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9// copies of the Software, and to permit persons to whom the Software is
10// furnished to do so, subject to the following conditions:
11//
12// The above copyright notice and this permission notice shall be included in
13// all copies or substantial portions of the Software.
14//
15// THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21// SOFTWARE.
22
23use std::ops::Deref;
24
25/// Represents a subset of the overall UTF-8 input stream.
26///
27/// Annotated with 1-based line and column numbers relative to the
28/// beginning of the overall input stream.
29///
30/// Called `Span` because its [`data()`] member can be consumed
31/// to yield another `Span` with annotations for the end of the
32/// syntactic element in question.
33///
34/// ## How to use it?
35///
36/// Here is a basic example of how to create the input and how to retrieve all
37/// the informations you need.
38///
39/// ```
40/// # use asciidoc_parser::Span;
41/// #
42/// fn main() {
43/// let span = Span::new(r#"{"hello": "world 🙌"}"#);
44///
45/// assert_eq!(span.line(), 1);
46/// assert_eq!(span.col(), 1);
47/// assert_eq!(span.byte_offset(), 0);
48/// }
49/// ```
50///
51/// [`data()`]: Self::data
52#[derive(Clone, Copy, Debug, Eq, PartialEq)]
53pub struct Span<'src> {
54 data: &'src str,
55 line: usize,
56 col: usize,
57 offset: usize,
58}
59
60impl<'src> Span<'src> {
61 /// Create a new `Span` that describes an entire UTF-8 input stream.
62 pub const fn new(data: &'src str) -> Self {
63 Self {
64 data,
65 line: 1,
66 col: 1,
67 offset: 0,
68 }
69 }
70
71 /// Get the current line number.
72 pub fn line(&self) -> usize {
73 self.line
74 }
75
76 /// Get the current column number.
77 pub fn col(&self) -> usize {
78 self.col
79 }
80
81 /// Get the current byte offset.
82 pub fn byte_offset(&self) -> usize {
83 self.offset
84 }
85
86 /// Get the current data in the span.
87 pub fn data(&self) -> &'src str {
88 self.data
89 }
90}
91
92impl AsRef<str> for Span<'_> {
93 fn as_ref(&self) -> &str {
94 self.data
95 }
96}
97
98const EMPTY_STR: &str = "";
99
100impl Default for Span<'_> {
101 fn default() -> Self {
102 Self {
103 data: EMPTY_STR,
104 line: 1,
105 col: 1,
106 offset: 0,
107 }
108 }
109}
110
111impl<'src> Deref for Span<'src> {
112 type Target = &'src str;
113
114 fn deref(&self) -> &Self::Target {
115 &self.data
116 }
117}
118
119// NOTE: The `Span` API is large. Only the public interface is implemented here.
120// The other modules referenced below implement additional APIs that are
121// available inside this crate only. (Exception: `Content` is defined here and
122// exported publicly.)
123
124mod discard;
125mod line;
126mod matched_item;
127mod primitives;
128mod r#slice;
129mod split;
130mod take;
131mod trim;
132
133pub(crate) use matched_item::MatchedItem;
134
135/// Any syntactic element can describe its location
136/// within the source material using this trait.
137pub trait HasSpan<'src> {
138 /// Return a [`Span`] describing the syntactic element's
139 /// location within the source string/file.
140 fn span(&self) -> Span<'src>;
141}
142
143#[cfg(test)]
144mod tests {
145 #![allow(clippy::unwrap_used)]
146
147 use pretty_assertions_sorted::assert_eq;
148
149 #[test]
150 fn simple_case() {
151 let span = crate::Span::new(r#"{"hello": "world 🙌"}"#);
152
153 assert_eq!(span.line(), 1);
154 assert_eq!(span.col(), 1);
155 assert_eq!(span.byte_offset(), 0);
156 }
157
158 #[test]
159 fn impl_default() {
160 let span = crate::Span::default();
161
162 assert_eq!(span.data(), "");
163 assert_eq!(span.line(), 1);
164 assert_eq!(span.col(), 1);
165 assert_eq!(span.byte_offset(), 0);
166 }
167
168 #[test]
169 fn impl_as_ref() {
170 let span = crate::Span::new("abcdef");
171 assert_eq!(span.as_ref(), "abcdef");
172 }
173
174 #[test]
175 fn into_parse_result() {
176 let s = crate::Span::new("abc");
177 let mi = s.into_parse_result(1);
178
179 assert_eq!(mi.item.data(), "a");
180 assert_eq!(mi.item.line(), 1);
181 assert_eq!(mi.item.col(), 1);
182 assert_eq!(mi.item.byte_offset(), 0);
183
184 assert_eq!(mi.after.data(), "bc");
185 assert_eq!(mi.after.line(), 1);
186 assert_eq!(mi.after.col(), 2);
187 assert_eq!(mi.after.byte_offset(), 1);
188 }
189
190 mod split_at_match_non_empty {
191 use pretty_assertions_sorted::assert_eq;
192
193 #[test]
194 fn empty_source() {
195 let s = crate::Span::default();
196 assert!(s.split_at_match_non_empty(|c| c == ':').is_none());
197 }
198
199 #[test]
200 fn empty_subspan() {
201 let s = crate::Span::new(":abc");
202 assert!(s.split_at_match_non_empty(|c| c == ':').is_none());
203 }
204
205 #[test]
206 fn match_after_first() {
207 let s = crate::Span::new("ab:cd");
208 let mi = s.split_at_match_non_empty(|c| c == ':').unwrap();
209
210 assert_eq!(mi.item.data(), "ab");
211 assert_eq!(mi.item.line(), 1);
212 assert_eq!(mi.item.col(), 1);
213 assert_eq!(mi.item.byte_offset(), 0);
214
215 assert_eq!(mi.after.data(), ":cd");
216 assert_eq!(mi.after.line(), 1);
217 assert_eq!(mi.after.col(), 3);
218 assert_eq!(mi.after.byte_offset(), 2);
219 }
220 }
221}