Skip to main content

lex_analysis/
inline.rs

1use lex_core::lex::ast::{Position, Range, TextContent};
2use lex_core::lex::inlines::{parse_inlines, InlineNode, ReferenceType};
3
4#[derive(Debug, Clone, PartialEq)]
5pub enum InlineSpanKind {
6    Strong,
7    Emphasis,
8    Code,
9    Math,
10    Reference(ReferenceType),
11    StrongMarkerStart,
12    StrongMarkerEnd,
13    EmphasisMarkerStart,
14    EmphasisMarkerEnd,
15    CodeMarkerStart,
16    CodeMarkerEnd,
17    MathMarkerStart,
18    MathMarkerEnd,
19    RefMarkerStart,
20    RefMarkerEnd,
21}
22
23#[derive(Debug, Clone, PartialEq)]
24pub struct InlineSpan {
25    pub kind: InlineSpanKind,
26    pub range: Range,
27    pub raw: String,
28}
29
30/// Extract inline spans (formatting + references) from a text node.
31pub fn extract_inline_spans(text: &TextContent) -> Vec<InlineSpan> {
32    let Some(base_range) = text.location.as_ref() else {
33        return Vec::new();
34    };
35
36    let content = text.as_string();
37    if content.is_empty() {
38        return Vec::new();
39    }
40
41    let mut spans = Vec::new();
42    spans.extend(spans_from_marker(
43        content,
44        base_range,
45        '*',
46        InlineSpanKind::Strong,
47        InlineSpanKind::StrongMarkerStart,
48        InlineSpanKind::StrongMarkerEnd,
49    ));
50    spans.extend(spans_from_marker(
51        content,
52        base_range,
53        '_',
54        InlineSpanKind::Emphasis,
55        InlineSpanKind::EmphasisMarkerStart,
56        InlineSpanKind::EmphasisMarkerEnd,
57    ));
58    spans.extend(spans_from_marker(
59        content,
60        base_range,
61        '`',
62        InlineSpanKind::Code,
63        InlineSpanKind::CodeMarkerStart,
64        InlineSpanKind::CodeMarkerEnd,
65    ));
66    spans.extend(spans_from_marker(
67        content,
68        base_range,
69        '#',
70        InlineSpanKind::Math,
71        InlineSpanKind::MathMarkerStart,
72        InlineSpanKind::MathMarkerEnd,
73    ));
74    spans.extend(reference_spans(content, base_range));
75    spans
76}
77
78fn spans_from_marker(
79    text: &str,
80    base_range: &Range,
81    marker: char,
82    content_kind: InlineSpanKind,
83    start_marker_kind: InlineSpanKind,
84    end_marker_kind: InlineSpanKind,
85) -> Vec<InlineSpan> {
86    let mut spans = Vec::new();
87    for (start, end) in scan_symmetric_pairs(text, marker) {
88        let marker_len = marker.len_utf8();
89        let inner_start = start + marker_len;
90        let inner_end = end.saturating_sub(marker_len);
91        if inner_end <= inner_start {
92            continue;
93        }
94
95        // Opening marker
96        spans.push(InlineSpan {
97            kind: start_marker_kind.clone(),
98            range: sub_range(base_range, text, start, inner_start),
99            raw: marker.to_string(),
100        });
101
102        // Content
103        spans.push(InlineSpan {
104            kind: content_kind.clone(),
105            range: sub_range(base_range, text, inner_start, inner_end),
106            raw: text[inner_start..inner_end].to_string(),
107        });
108
109        // Closing marker
110        spans.push(InlineSpan {
111            kind: end_marker_kind.clone(),
112            range: sub_range(base_range, text, inner_end, end),
113            raw: marker.to_string(),
114        });
115    }
116    spans
117}
118
119fn reference_spans(text: &str, base_range: &Range) -> Vec<InlineSpan> {
120    let mut spans = Vec::new();
121    for (start, end) in scan_bracket_pairs(text) {
122        let inner_start = start + '['.len_utf8();
123        let inner_end = end.saturating_sub(']'.len_utf8());
124        if inner_end <= inner_start {
125            continue;
126        }
127        let raw = text[inner_start..inner_end].to_string();
128        let reference_type = classify_reference(&raw);
129
130        // Opening bracket
131        spans.push(InlineSpan {
132            kind: InlineSpanKind::RefMarkerStart,
133            range: sub_range(base_range, text, start, inner_start),
134            raw: "[".to_string(),
135        });
136
137        // Reference content
138        spans.push(InlineSpan {
139            kind: InlineSpanKind::Reference(reference_type),
140            range: sub_range(base_range, text, inner_start, inner_end),
141            raw,
142        });
143
144        // Closing bracket
145        spans.push(InlineSpan {
146            kind: InlineSpanKind::RefMarkerEnd,
147            range: sub_range(base_range, text, inner_end, end),
148            raw: "]".to_string(),
149        });
150    }
151    spans
152}
153
154fn classify_reference(raw: &str) -> ReferenceType {
155    let wrapped = format!("[{raw}]");
156    for node in parse_inlines(&wrapped) {
157        if let InlineNode::Reference { data, .. } = node {
158            return data.reference_type;
159        }
160    }
161    ReferenceType::NotSure
162}
163
164fn scan_symmetric_pairs(text: &str, marker: char) -> Vec<(usize, usize)> {
165    let mut spans = Vec::new();
166    let mut open: Option<usize> = None;
167    let mut escape = false;
168    for (idx, ch) in text.char_indices() {
169        if escape {
170            escape = false;
171            continue;
172        }
173        if ch == '\\' {
174            escape = true;
175            continue;
176        }
177        if ch == marker {
178            if let Some(start_idx) = open {
179                if idx > start_idx + marker.len_utf8() {
180                    spans.push((start_idx, idx + marker.len_utf8()));
181                }
182                open = None;
183            } else {
184                open = Some(idx);
185            }
186        }
187    }
188    spans
189}
190
191fn scan_bracket_pairs(text: &str) -> Vec<(usize, usize)> {
192    let mut spans = Vec::new();
193    let mut open: Option<usize> = None;
194    let mut escape = false;
195    for (idx, ch) in text.char_indices() {
196        if escape {
197            escape = false;
198            continue;
199        }
200        if ch == '\\' {
201            escape = true;
202            continue;
203        }
204        if ch == '[' {
205            if open.is_none() {
206                open = Some(idx);
207            }
208        } else if ch == ']' {
209            if let Some(start_idx) = open.take() {
210                if idx > start_idx + '['.len_utf8() {
211                    spans.push((start_idx, idx + ']'.len_utf8()));
212                }
213            }
214        }
215    }
216    spans
217}
218
219fn sub_range(base: &Range, text: &str, start: usize, end: usize) -> Range {
220    let start_pos = position_for_offset(base, text, start);
221    let end_pos = position_for_offset(base, text, end);
222    Range::new(
223        (base.span.start + start)..(base.span.start + end),
224        start_pos,
225        end_pos,
226    )
227}
228
229fn position_for_offset(base: &Range, text: &str, offset: usize) -> Position {
230    let mut line = base.start.line;
231    let mut column = base.start.column;
232    for ch in text[..offset].chars() {
233        if ch == '\n' {
234            line += 1;
235            column = 0;
236        } else {
237            column += ch.len_utf8();
238        }
239    }
240    Position::new(line, column)
241}
242
243#[cfg(test)]
244mod tests {
245    use super::*;
246    use lex_core::lex::ast::Range;
247
248    fn text_with_range(content: &str, line: usize, column: usize) -> TextContent {
249        let start = Position::new(line, column);
250        let end = Position::new(line, column + content.len());
251        let range = Range::new(0..content.len(), start, end);
252        TextContent::from_string(content.to_string(), Some(range))
253    }
254
255    #[test]
256    fn detects_basic_inline_spans() {
257        let text = text_with_range("*bold* _em_ `code` #math#", 2, 4);
258        let spans = extract_inline_spans(&text);
259        // Each inline produces 3 spans: start marker, content, end marker
260        assert_eq!(spans.len(), 12);
261        assert!(spans
262            .iter()
263            .any(|span| matches!(span.kind, InlineSpanKind::Strong)));
264        assert!(spans
265            .iter()
266            .any(|span| matches!(span.kind, InlineSpanKind::Emphasis)));
267        assert!(spans
268            .iter()
269            .any(|span| matches!(span.kind, InlineSpanKind::Code)));
270        assert!(spans
271            .iter()
272            .any(|span| matches!(span.kind, InlineSpanKind::Math)));
273        assert!(spans
274            .iter()
275            .any(|span| matches!(span.kind, InlineSpanKind::StrongMarkerStart)));
276        assert!(spans
277            .iter()
278            .any(|span| matches!(span.kind, InlineSpanKind::StrongMarkerEnd)));
279    }
280
281    #[test]
282    fn detects_references_with_classification() {
283        let text = text_with_range("See [^note] and [@spec2024] plus [42]", 0, 0);
284        let spans = extract_inline_spans(&text);
285        let kinds: Vec<_> = spans
286            .iter()
287            .filter_map(|span| match &span.kind {
288                InlineSpanKind::Reference(reference) => Some(reference.clone()),
289                _ => None,
290            })
291            .collect();
292        assert_eq!(kinds.len(), 3);
293        assert!(kinds
294            .iter()
295            .any(|kind| matches!(kind, ReferenceType::FootnoteLabeled { .. })));
296        assert!(kinds
297            .iter()
298            .any(|kind| matches!(kind, ReferenceType::Citation(_))));
299        assert!(kinds
300            .iter()
301            .any(|kind| matches!(kind, ReferenceType::FootnoteNumber { .. })));
302    }
303}