1use lex_core::lex::ast::{Position, Range, TextContent};
7use lex_core::lex::inlines::{InlineNode, ReferenceInline, ReferenceType};
8
9#[derive(Debug, Clone, PartialEq)]
11pub struct PositionedReference {
12 pub range: Range,
13 pub reference_type: ReferenceType,
14 pub raw: String,
15}
16
17pub fn extract_references(text: &TextContent) -> Vec<PositionedReference> {
23 let Some(base_range) = text.location.as_ref() else {
24 return Vec::new();
25 };
26 let raw = text.as_string();
27 if raw.is_empty() {
28 return Vec::new();
29 }
30 let nodes = text.inline_items();
31 let mut walker = ReferenceWalker {
32 raw,
33 base_range,
34 cursor: 0,
35 refs: Vec::new(),
36 };
37 walker.walk_nodes(&nodes);
38 walker.refs
39}
40
41struct ReferenceWalker<'a> {
42 raw: &'a str,
43 base_range: &'a Range,
44 cursor: usize,
45 refs: Vec<PositionedReference>,
46}
47
48impl<'a> ReferenceWalker<'a> {
49 fn walk_nodes(&mut self, nodes: &[InlineNode]) {
50 for node in nodes {
51 self.walk_node(node);
52 }
53 }
54
55 fn walk_node(&mut self, node: &InlineNode) {
56 match node {
57 InlineNode::Plain { text, .. } => self.skip_plain(text),
58 InlineNode::Strong { content, .. } => self.skip_container(content, '*'),
59 InlineNode::Emphasis { content, .. } => self.skip_container(content, '_'),
60 InlineNode::Code { text, .. } => self.skip_literal(text, '`'),
61 InlineNode::Math { text, .. } => self.skip_literal(text, '#'),
62 InlineNode::Reference { data, .. } => self.collect_reference(data),
63 }
64 }
65
66 fn skip_plain(&mut self, text: &str) {
67 self.advance_unescaped(text);
68 }
69
70 fn skip_container(&mut self, content: &[InlineNode], marker: char) {
71 self.cursor += marker.len_utf8(); self.walk_nodes(content);
73 self.cursor += marker.len_utf8(); }
75
76 fn skip_literal(&mut self, text: &str, marker: char) {
77 self.cursor += marker.len_utf8(); self.cursor += text.len(); self.cursor += marker.len_utf8(); }
81
82 fn collect_reference(&mut self, data: &ReferenceInline) {
83 self.cursor += 1; let content_start = self.cursor;
86 self.cursor += data.raw.len();
87 let content_end = self.cursor;
88
89 self.cursor += 1; if content_start < content_end {
92 self.refs.push(PositionedReference {
93 range: self.make_range(content_start, content_end),
94 reference_type: data.reference_type.clone(),
95 raw: data.raw.clone(),
96 });
97 }
98 }
99
100 fn advance_unescaped(&mut self, text: &str) {
102 for _expected in text.chars() {
103 if self.cursor >= self.raw.len() {
104 break;
105 }
106 let raw_ch = self.raw[self.cursor..].chars().next().unwrap();
107 if raw_ch == '\\' {
108 if self.cursor + 1 >= self.raw.len() {
109 self.cursor += 1;
111 } else {
112 let next_ch = self.raw[self.cursor + 1..].chars().next();
113 match next_ch {
114 Some(nc) if !nc.is_alphanumeric() => {
115 self.cursor += 1 + nc.len_utf8();
117 }
118 _ => {
119 self.cursor += 1;
121 }
122 }
123 }
124 } else {
125 self.cursor += raw_ch.len_utf8();
126 }
127 }
128 }
129
130 fn make_range(&self, start: usize, end: usize) -> Range {
131 let start_pos = self.position_at(start);
132 let end_pos = self.position_at(end);
133 let mut range = Range::new(
134 (self.base_range.span.start + start)..(self.base_range.span.start + end),
135 start_pos,
136 end_pos,
137 );
138 range.origin_path = self.base_range.origin_path.clone();
142 range
143 }
144
145 fn position_at(&self, offset: usize) -> Position {
146 let mut line = self.base_range.start.line;
152 let mut column = self.base_range.start.column;
153 for ch in self.raw[..offset].chars() {
154 if ch == '\n' {
155 line += 1;
156 column = 0;
157 } else {
158 column += ch.len_utf16();
159 }
160 }
161 Position::new(line, column)
162 }
163}
164
165#[cfg(test)]
166mod tests {
167 use super::*;
168
169 fn text_with_range(content: &str, line: usize, column: usize) -> TextContent {
170 let start = Position::new(line, column);
171 let end = Position::new(line, column + content.len());
172 let range = Range::new(0..content.len(), start, end);
173 TextContent::from_string(content.to_string(), Some(range))
174 }
175
176 #[test]
177 fn extracts_references_with_classification() {
178 let text = text_with_range("See [::note] and [@spec2024] plus [42]", 0, 0);
179 let refs = extract_references(&text);
180 assert_eq!(refs.len(), 3);
181 assert!(refs
182 .iter()
183 .any(|r| matches!(r.reference_type, ReferenceType::AnnotationReference { .. })));
184 assert!(refs
185 .iter()
186 .any(|r| matches!(r.reference_type, ReferenceType::Citation(_))));
187 assert!(refs
188 .iter()
189 .any(|r| matches!(r.reference_type, ReferenceType::FootnoteNumber { .. })));
190 }
191
192 #[test]
193 fn reference_ranges_are_correct() {
194 let text = text_with_range("Hello [world] end", 0, 0);
195 let refs = extract_references(&text);
196 assert_eq!(refs.len(), 1);
197 assert_eq!(refs[0].raw, "world");
198 assert_eq!(refs[0].range.span, 7..12);
200 }
201
202 #[test]
203 fn references_inside_formatting() {
204 let text = text_with_range("*bold [ref]* end", 0, 0);
205 let refs = extract_references(&text);
206 assert_eq!(refs.len(), 1);
207 assert_eq!(refs[0].raw, "ref");
208 }
209
210 #[test]
211 fn escaped_brackets_not_references() {
212 let text = text_with_range("\\[not a ref\\]", 0, 0);
213 let refs = extract_references(&text);
214 assert!(refs.is_empty());
215 }
216
217 #[test]
218 fn empty_text_returns_nothing() {
219 let text = text_with_range("", 0, 0);
220 let refs = extract_references(&text);
221 assert!(refs.is_empty());
222 }
223
224 #[test]
231 fn reference_columns_are_utf16_units_after_arrow() {
232 let raw = "see → [ref] end";
236 let utf16_len: usize = raw.chars().map(char::len_utf16).sum();
237 let location = Range::new(
238 0..raw.len(),
239 Position::new(0, 0),
240 Position::new(0, utf16_len),
241 );
242 let text = TextContent::from_string(raw.to_string(), Some(location));
243
244 let refs = extract_references(&text);
245 assert_eq!(refs.len(), 1);
246 let r = &refs[0];
247 assert_eq!(r.raw, "ref");
248 assert_eq!(r.range.span, 9..12, "byte span (UTF-8 bytes)");
251 assert_eq!(
253 r.range.start,
254 Position::new(0, 7),
255 "content start column should be UTF-16 (got {:?})",
256 r.range.start
257 );
258 assert_eq!(r.range.end, Position::new(0, 10));
259 }
260
261 #[test]
262 fn no_location_returns_nothing() {
263 let text = TextContent::from_string("Hello [world]".to_string(), None);
264 let refs = extract_references(&text);
265 assert!(refs.is_empty());
266 }
267
268 #[test]
269 fn trailing_backslash_does_not_panic() {
270 let text = text_with_range("Hello\\\\", 0, 0);
272 let refs = extract_references(&text);
273 assert!(refs.is_empty());
274
275 let text2 = text_with_range("Hello\\", 0, 0);
277 let refs2 = extract_references(&text2);
278 assert!(refs2.is_empty());
279 }
280}