1use lex_core::lex::ast::{Position, Range, TextContent};
2use lex_core::lex::inlines::{parse_inlines, InlineNode, ReferenceType};
3
4#[derive(Debug, Clone, PartialEq)]
5pub enum InlineSpanKind {
6 Strong,
7 Emphasis,
8 Code,
9 Math,
10 Reference(ReferenceType),
11 StrongMarkerStart,
12 StrongMarkerEnd,
13 EmphasisMarkerStart,
14 EmphasisMarkerEnd,
15 CodeMarkerStart,
16 CodeMarkerEnd,
17 MathMarkerStart,
18 MathMarkerEnd,
19 RefMarkerStart,
20 RefMarkerEnd,
21}
22
23#[derive(Debug, Clone, PartialEq)]
24pub struct InlineSpan {
25 pub kind: InlineSpanKind,
26 pub range: Range,
27 pub raw: String,
28}
29
30pub fn extract_inline_spans(text: &TextContent) -> Vec<InlineSpan> {
32 let Some(base_range) = text.location.as_ref() else {
33 return Vec::new();
34 };
35
36 let content = text.as_string();
37 if content.is_empty() {
38 return Vec::new();
39 }
40
41 let mut spans = Vec::new();
42 spans.extend(spans_from_marker(
43 content,
44 base_range,
45 '*',
46 InlineSpanKind::Strong,
47 InlineSpanKind::StrongMarkerStart,
48 InlineSpanKind::StrongMarkerEnd,
49 ));
50 spans.extend(spans_from_marker(
51 content,
52 base_range,
53 '_',
54 InlineSpanKind::Emphasis,
55 InlineSpanKind::EmphasisMarkerStart,
56 InlineSpanKind::EmphasisMarkerEnd,
57 ));
58 spans.extend(spans_from_marker(
59 content,
60 base_range,
61 '`',
62 InlineSpanKind::Code,
63 InlineSpanKind::CodeMarkerStart,
64 InlineSpanKind::CodeMarkerEnd,
65 ));
66 spans.extend(spans_from_marker(
67 content,
68 base_range,
69 '#',
70 InlineSpanKind::Math,
71 InlineSpanKind::MathMarkerStart,
72 InlineSpanKind::MathMarkerEnd,
73 ));
74 spans.extend(reference_spans(content, base_range));
75 spans
76}
77
78fn spans_from_marker(
79 text: &str,
80 base_range: &Range,
81 marker: char,
82 content_kind: InlineSpanKind,
83 start_marker_kind: InlineSpanKind,
84 end_marker_kind: InlineSpanKind,
85) -> Vec<InlineSpan> {
86 let mut spans = Vec::new();
87 for (start, end) in scan_symmetric_pairs(text, marker) {
88 let marker_len = marker.len_utf8();
89 let inner_start = start + marker_len;
90 let inner_end = end.saturating_sub(marker_len);
91 if inner_end <= inner_start {
92 continue;
93 }
94
95 spans.push(InlineSpan {
97 kind: start_marker_kind.clone(),
98 range: sub_range(base_range, text, start, inner_start),
99 raw: marker.to_string(),
100 });
101
102 spans.push(InlineSpan {
104 kind: content_kind.clone(),
105 range: sub_range(base_range, text, inner_start, inner_end),
106 raw: text[inner_start..inner_end].to_string(),
107 });
108
109 spans.push(InlineSpan {
111 kind: end_marker_kind.clone(),
112 range: sub_range(base_range, text, inner_end, end),
113 raw: marker.to_string(),
114 });
115 }
116 spans
117}
118
119fn reference_spans(text: &str, base_range: &Range) -> Vec<InlineSpan> {
120 let mut spans = Vec::new();
121 for (start, end) in scan_bracket_pairs(text) {
122 let inner_start = start + '['.len_utf8();
123 let inner_end = end.saturating_sub(']'.len_utf8());
124 if inner_end <= inner_start {
125 continue;
126 }
127 let raw = text[inner_start..inner_end].to_string();
128 let reference_type = classify_reference(&raw);
129
130 spans.push(InlineSpan {
132 kind: InlineSpanKind::RefMarkerStart,
133 range: sub_range(base_range, text, start, inner_start),
134 raw: "[".to_string(),
135 });
136
137 spans.push(InlineSpan {
139 kind: InlineSpanKind::Reference(reference_type),
140 range: sub_range(base_range, text, inner_start, inner_end),
141 raw,
142 });
143
144 spans.push(InlineSpan {
146 kind: InlineSpanKind::RefMarkerEnd,
147 range: sub_range(base_range, text, inner_end, end),
148 raw: "]".to_string(),
149 });
150 }
151 spans
152}
153
154fn classify_reference(raw: &str) -> ReferenceType {
155 let wrapped = format!("[{raw}]");
156 for node in parse_inlines(&wrapped) {
157 if let InlineNode::Reference { data, .. } = node {
158 return data.reference_type;
159 }
160 }
161 ReferenceType::NotSure
162}
163
164fn scan_symmetric_pairs(text: &str, marker: char) -> Vec<(usize, usize)> {
165 let mut spans = Vec::new();
166 let mut open: Option<usize> = None;
167 let mut escape = false;
168 for (idx, ch) in text.char_indices() {
169 if escape {
170 escape = false;
171 continue;
172 }
173 if ch == '\\' {
174 escape = true;
175 continue;
176 }
177 if ch == marker {
178 if let Some(start_idx) = open {
179 if idx > start_idx + marker.len_utf8() {
180 spans.push((start_idx, idx + marker.len_utf8()));
181 }
182 open = None;
183 } else {
184 open = Some(idx);
185 }
186 }
187 }
188 spans
189}
190
191fn scan_bracket_pairs(text: &str) -> Vec<(usize, usize)> {
192 let mut spans = Vec::new();
193 let mut open: Option<usize> = None;
194 let mut escape = false;
195 for (idx, ch) in text.char_indices() {
196 if escape {
197 escape = false;
198 continue;
199 }
200 if ch == '\\' {
201 escape = true;
202 continue;
203 }
204 if ch == '[' {
205 if open.is_none() {
206 open = Some(idx);
207 }
208 } else if ch == ']' {
209 if let Some(start_idx) = open.take() {
210 if idx > start_idx + '['.len_utf8() {
211 spans.push((start_idx, idx + ']'.len_utf8()));
212 }
213 }
214 }
215 }
216 spans
217}
218
219fn sub_range(base: &Range, text: &str, start: usize, end: usize) -> Range {
220 let start_pos = position_for_offset(base, text, start);
221 let end_pos = position_for_offset(base, text, end);
222 Range::new(
223 (base.span.start + start)..(base.span.start + end),
224 start_pos,
225 end_pos,
226 )
227}
228
229fn position_for_offset(base: &Range, text: &str, offset: usize) -> Position {
230 let mut line = base.start.line;
231 let mut column = base.start.column;
232 for ch in text[..offset].chars() {
233 if ch == '\n' {
234 line += 1;
235 column = 0;
236 } else {
237 column += ch.len_utf8();
238 }
239 }
240 Position::new(line, column)
241}
242
243#[cfg(test)]
244mod tests {
245 use super::*;
246 use lex_core::lex::ast::Range;
247
248 fn text_with_range(content: &str, line: usize, column: usize) -> TextContent {
249 let start = Position::new(line, column);
250 let end = Position::new(line, column + content.len());
251 let range = Range::new(0..content.len(), start, end);
252 TextContent::from_string(content.to_string(), Some(range))
253 }
254
255 #[test]
256 fn detects_basic_inline_spans() {
257 let text = text_with_range("*bold* _em_ `code` #math#", 2, 4);
258 let spans = extract_inline_spans(&text);
259 assert_eq!(spans.len(), 12);
261 assert!(spans
262 .iter()
263 .any(|span| matches!(span.kind, InlineSpanKind::Strong)));
264 assert!(spans
265 .iter()
266 .any(|span| matches!(span.kind, InlineSpanKind::Emphasis)));
267 assert!(spans
268 .iter()
269 .any(|span| matches!(span.kind, InlineSpanKind::Code)));
270 assert!(spans
271 .iter()
272 .any(|span| matches!(span.kind, InlineSpanKind::Math)));
273 assert!(spans
274 .iter()
275 .any(|span| matches!(span.kind, InlineSpanKind::StrongMarkerStart)));
276 assert!(spans
277 .iter()
278 .any(|span| matches!(span.kind, InlineSpanKind::StrongMarkerEnd)));
279 }
280
281 #[test]
282 fn detects_references_with_classification() {
283 let text = text_with_range("See [^note] and [@spec2024] plus [42]", 0, 0);
284 let spans = extract_inline_spans(&text);
285 let kinds: Vec<_> = spans
286 .iter()
287 .filter_map(|span| match &span.kind {
288 InlineSpanKind::Reference(reference) => Some(reference.clone()),
289 _ => None,
290 })
291 .collect();
292 assert_eq!(kinds.len(), 3);
293 assert!(kinds
294 .iter()
295 .any(|kind| matches!(kind, ReferenceType::FootnoteLabeled { .. })));
296 assert!(kinds
297 .iter()
298 .any(|kind| matches!(kind, ReferenceType::Citation(_))));
299 assert!(kinds
300 .iter()
301 .any(|kind| matches!(kind, ReferenceType::FootnoteNumber { .. })));
302 }
303}