1use lex_core::lex::ast::{Position, Range, TextContent};
7use lex_core::lex::inlines::{InlineNode, ReferenceInline, ReferenceType};
8
9#[derive(Debug, Clone, PartialEq)]
11pub struct PositionedReference {
12 pub range: Range,
13 pub reference_type: ReferenceType,
14 pub raw: String,
15}
16
17pub fn extract_references(text: &TextContent) -> Vec<PositionedReference> {
23 let Some(base_range) = text.location.as_ref() else {
24 return Vec::new();
25 };
26 let raw = text.as_string();
27 if raw.is_empty() {
28 return Vec::new();
29 }
30 let nodes = text.inline_items();
31 let mut walker = ReferenceWalker {
32 raw,
33 base_range,
34 cursor: 0,
35 refs: Vec::new(),
36 };
37 walker.walk_nodes(&nodes);
38 walker.refs
39}
40
41struct ReferenceWalker<'a> {
42 raw: &'a str,
43 base_range: &'a Range,
44 cursor: usize,
45 refs: Vec<PositionedReference>,
46}
47
48impl<'a> ReferenceWalker<'a> {
49 fn walk_nodes(&mut self, nodes: &[InlineNode]) {
50 for node in nodes {
51 self.walk_node(node);
52 }
53 }
54
55 fn walk_node(&mut self, node: &InlineNode) {
56 match node {
57 InlineNode::Plain { text, .. } => self.skip_plain(text),
58 InlineNode::Strong { content, .. } => self.skip_container(content, '*'),
59 InlineNode::Emphasis { content, .. } => self.skip_container(content, '_'),
60 InlineNode::Code { text, .. } => self.skip_literal(text, '`'),
61 InlineNode::Math { text, .. } => self.skip_literal(text, '#'),
62 InlineNode::Reference { data, .. } => self.collect_reference(data),
63 }
64 }
65
66 fn skip_plain(&mut self, text: &str) {
67 self.advance_unescaped(text);
68 }
69
70 fn skip_container(&mut self, content: &[InlineNode], marker: char) {
71 self.cursor += marker.len_utf8(); self.walk_nodes(content);
73 self.cursor += marker.len_utf8(); }
75
76 fn skip_literal(&mut self, text: &str, marker: char) {
77 self.cursor += marker.len_utf8(); self.cursor += text.len(); self.cursor += marker.len_utf8(); }
81
82 fn collect_reference(&mut self, data: &ReferenceInline) {
83 self.cursor += 1; let content_start = self.cursor;
86 self.cursor += data.raw.len();
87 let content_end = self.cursor;
88
89 self.cursor += 1; if content_start < content_end {
92 self.refs.push(PositionedReference {
93 range: self.make_range(content_start, content_end),
94 reference_type: data.reference_type.clone(),
95 raw: data.raw.clone(),
96 });
97 }
98 }
99
100 fn advance_unescaped(&mut self, text: &str) {
102 for _expected in text.chars() {
103 if self.cursor >= self.raw.len() {
104 break;
105 }
106 let raw_ch = self.raw[self.cursor..].chars().next().unwrap();
107 if raw_ch == '\\' {
108 if self.cursor + 1 >= self.raw.len() {
109 self.cursor += 1;
111 } else {
112 let next_ch = self.raw[self.cursor + 1..].chars().next();
113 match next_ch {
114 Some(nc) if !nc.is_alphanumeric() => {
115 self.cursor += 1 + nc.len_utf8();
117 }
118 _ => {
119 self.cursor += 1;
121 }
122 }
123 }
124 } else {
125 self.cursor += raw_ch.len_utf8();
126 }
127 }
128 }
129
130 fn make_range(&self, start: usize, end: usize) -> Range {
131 let start_pos = self.position_at(start);
132 let end_pos = self.position_at(end);
133 Range::new(
134 (self.base_range.span.start + start)..(self.base_range.span.start + end),
135 start_pos,
136 end_pos,
137 )
138 }
139
140 fn position_at(&self, offset: usize) -> Position {
141 let mut line = self.base_range.start.line;
142 let mut column = self.base_range.start.column;
143 for ch in self.raw[..offset].chars() {
144 if ch == '\n' {
145 line += 1;
146 column = 0;
147 } else {
148 column += ch.len_utf8();
149 }
150 }
151 Position::new(line, column)
152 }
153}
154
155#[cfg(test)]
156mod tests {
157 use super::*;
158
159 fn text_with_range(content: &str, line: usize, column: usize) -> TextContent {
160 let start = Position::new(line, column);
161 let end = Position::new(line, column + content.len());
162 let range = Range::new(0..content.len(), start, end);
163 TextContent::from_string(content.to_string(), Some(range))
164 }
165
166 #[test]
167 fn extracts_references_with_classification() {
168 let text = text_with_range("See [^note] and [@spec2024] plus [42]", 0, 0);
169 let refs = extract_references(&text);
170 assert_eq!(refs.len(), 3);
171 assert!(refs
172 .iter()
173 .any(|r| matches!(r.reference_type, ReferenceType::FootnoteLabeled { .. })));
174 assert!(refs
175 .iter()
176 .any(|r| matches!(r.reference_type, ReferenceType::Citation(_))));
177 assert!(refs
178 .iter()
179 .any(|r| matches!(r.reference_type, ReferenceType::FootnoteNumber { .. })));
180 }
181
182 #[test]
183 fn reference_ranges_are_correct() {
184 let text = text_with_range("Hello [world] end", 0, 0);
185 let refs = extract_references(&text);
186 assert_eq!(refs.len(), 1);
187 assert_eq!(refs[0].raw, "world");
188 assert_eq!(refs[0].range.span, 7..12);
190 }
191
192 #[test]
193 fn references_inside_formatting() {
194 let text = text_with_range("*bold [ref]* end", 0, 0);
195 let refs = extract_references(&text);
196 assert_eq!(refs.len(), 1);
197 assert_eq!(refs[0].raw, "ref");
198 }
199
200 #[test]
201 fn escaped_brackets_not_references() {
202 let text = text_with_range("\\[not a ref\\]", 0, 0);
203 let refs = extract_references(&text);
204 assert!(refs.is_empty());
205 }
206
207 #[test]
208 fn empty_text_returns_nothing() {
209 let text = text_with_range("", 0, 0);
210 let refs = extract_references(&text);
211 assert!(refs.is_empty());
212 }
213
214 #[test]
215 fn no_location_returns_nothing() {
216 let text = TextContent::from_string("Hello [world]".to_string(), None);
217 let refs = extract_references(&text);
218 assert!(refs.is_empty());
219 }
220
221 #[test]
222 fn trailing_backslash_does_not_panic() {
223 let text = text_with_range("Hello\\\\", 0, 0);
225 let refs = extract_references(&text);
226 assert!(refs.is_empty());
227
228 let text2 = text_with_range("Hello\\", 0, 0);
230 let refs2 = extract_references(&text2);
231 assert!(refs2.is_empty());
232 }
233}